Backport FRR patch tp FRR-7.2 to handle pthread race in peer notify message handling (#10324)

What I did:

Backport FRR patch FRRouting/frr#8220 on FRR 7.2. Fixes the Issue FRRouting/frr#8213

Why I did:-

Because of this race-condition we saw GR getting triggered even though BGP shut is given on peer device.

How I verify:

After patching this fix GR is not triggered on doing BGP shut on peer.
This commit is contained in:
abdosi 2022-03-23 08:58:10 -07:00 committed by GitHub
parent 218a310eeb
commit 74305f8a56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 169 additions and 0 deletions

View File

@ -0,0 +1,168 @@
From 3bafb068b9e54690c263ddf8e8c8c15e2efb5fe1 Mon Sep 17 00:00:00 2001
From: Abhishek Dosi <abdosi@microsoft.com>
Date: Tue, 22 Mar 2022 17:35:34 +0000
Subject: [PATCH] Backport PR: https://github.com/FRRouting/frr/pull/8220 to
FRR 7.2
Signed-off-by: Abhishek Dosi <abdosi@microsoft.com>
---
bgpd/bgp_io.c | 40 +++++++++++++++++++---------------------
bgpd/bgp_packet.c | 32 ++++++++++++++++++++++++++++++++
bgpd/bgp_packet.h | 4 ++++
3 files changed, 55 insertions(+), 21 deletions(-)
diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c
index c2d06a4b5..8d99d3067 100644
--- a/bgpd/bgp_io.c
+++ b/bgpd/bgp_io.c
@@ -43,7 +43,7 @@
/* forward declarations */
static uint16_t bgp_write(struct peer *);
-static uint16_t bgp_read(struct peer *);
+static uint16_t bgp_read(struct peer *peer, int *code_p);
static int bgp_process_writes(struct thread *);
static int bgp_process_reads(struct thread *);
static bool validate_header(struct peer *);
@@ -174,6 +174,7 @@ static int bgp_process_reads(struct thread *thread)
bool fatal = false; // whether fatal error occurred
bool added_pkt = false; // whether we pushed onto ->ibuf
/* clang-format on */
+ int code;
peer = THREAD_ARG(thread);
@@ -183,7 +184,7 @@ static int bgp_process_reads(struct thread *thread)
struct frr_pthread *fpt = bgp_pth_io;
frr_with_mutex(&peer->io_mtx) {
- status = bgp_read(peer);
+ status = bgp_read(peer, &code);
}
/* error checking phase */
@@ -196,6 +197,12 @@ static int bgp_process_reads(struct thread *thread)
/* problem; tear down session */
more = false;
fatal = true;
+
+ /* Handle the error in the main pthread, include the
+ * specific state change from 'bgp_read'.
+ */
+ thread_add_event(bm->master, bgp_packet_process_error,
+ peer, code, NULL);
}
while (more) {
@@ -381,7 +388,7 @@ done : {
*
* @return status flag (see top-of-file)
*/
-static uint16_t bgp_read(struct peer *peer)
+static uint16_t bgp_read(struct peer *peer, int *code_p)
{
size_t readsize; // how many bytes we want to read
ssize_t nbytes; // how many bytes we actually read
@@ -394,37 +401,28 @@ static uint16_t bgp_read(struct peer *peer)
/* EAGAIN or EWOULDBLOCK; come back later */
if (nbytes < 0 && ERRNO_IO_RETRY(errno)) {
SET_FLAG(status, BGP_IO_TRANS_ERR);
- /* Fatal error; tear down session */
} else if (nbytes < 0) {
+ /* Fatal error; tear down session */
flog_err(EC_BGP_UPDATE_RCV,
"%s [Error] bgp_read_packet error: %s", peer->host,
safe_strerror(errno));
- if (peer->status == Established) {
- if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
- peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
- SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
- } else
- peer->last_reset = PEER_DOWN_CLOSE_SESSION;
- }
+ /* Handle the error in the main pthread. */
+ if (code_p)
+ *code_p = TCP_fatal_error;
- BGP_EVENT_ADD(peer, TCP_fatal_error);
SET_FLAG(status, BGP_IO_FATAL_ERR);
- /* Received EOF / TCP session closed */
+
} else if (nbytes == 0) {
+ /* Received EOF / TCP session closed */
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s [Event] BGP connection closed fd %d",
peer->host, peer->fd);
- if (peer->status == Established) {
- if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
- peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
- SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
- } else
- peer->last_reset = PEER_DOWN_CLOSE_SESSION;
- }
+ /* Handle the error in the main pthread. */
+ if (code_p)
+ *code_p = TCP_connection_closed;
- BGP_EVENT_ADD(peer, TCP_connection_closed);
SET_FLAG(status, BGP_IO_FATAL_ERR);
} else {
assert(ringbuf_put(peer->ibuf_work, ibw, nbytes)
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index c7c1780c2..1eb9fc5c4 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -2366,3 +2366,35 @@ int bgp_process_packet(struct thread *thread)
return 0;
}
+
+/*
+ * Task callback to handle socket error encountered in the io pthread. We avoid
+ * having the io pthread try to enqueue fsm events or mess with the peer
+ * struct.
+ */
+int bgp_packet_process_error(struct thread *thread)
+{
+ struct peer *peer;
+ int code;
+
+ peer = THREAD_ARG(thread);
+ code = THREAD_VAL(thread);
+
+ if (bgp_debug_neighbor_events(peer))
+ zlog_debug("%s [Event] BGP error %d on fd %d",
+ peer->host, peer->fd, code);
+
+ /* Closed connection or error on the socket */
+
+ if (peer->status == Established) {
+ if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
+ peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
+ SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
+ } else
+ peer->last_reset = PEER_DOWN_CLOSE_SESSION;
+ }
+
+ bgp_event_update(peer, code);
+
+ return 0;
+}
diff --git a/bgpd/bgp_packet.h b/bgpd/bgp_packet.h
index fc6fc66a4..d0a3ff99f 100644
--- a/bgpd/bgp_packet.h
+++ b/bgpd/bgp_packet.h
@@ -73,4 +73,8 @@ extern int bgp_packet_set_size(struct stream *s);
extern int bgp_generate_updgrp_packets(struct thread *);
extern int bgp_process_packet(struct thread *);
+
+/* Task callback to handle socket error encountered in the io pthread */
+int bgp_packet_process_error(struct thread *thread);
+
#endif /* _QUAGGA_BGP_PACKET_H */
--
2.17.1

View File

@ -5,3 +5,4 @@
0005-nexthops-compare-vrf-only-if-ip-type.patch
0006-changes-for-making-snmp-socket-non-blocking.patch
0007-frr-remove-frr-log-outchannel-to-var-log-frr.log.patch
0008-Backport-PR-https-github.com-FRRouting-frr-pull-8220.patch