From 5d9bd9c1ad31987d10ac2ad127580b157107ea70 Mon Sep 17 00:00:00 2001 From: sudhanshukumar22 <51457531+sudhanshukumar22@users.noreply.github.com> Date: Thu, 17 Oct 2019 12:50:36 +0530 Subject: [PATCH] Port a fix from FRR community (#3614) Author: sudhanshukumar22 Date: Tue Oct 16 12:33:20 2019 -0700 https://github.com/donaldsharp/frr/commit/39c93f379a5b57c56739a339ad75ec06e30daef3 If we have a case where have created a fd for i/o and we have removed the handling thread but still have the fd in the poll data structure, there existed a case where we would get the handle this fd return from poll but we would immediately do nothing with it because we didn't have a thread to hand the event to. This leads to an infinite loop. Prevent the infinite loop from happening and log the problem. Signed-off-by: Preetham Singh (preetham.singh@broadcom.com) --- ...-dead-fd-poll-data-port-fix-from-frr.patch | 107 ++++++++++++++++++ src/sonic-frr/patch/series | 1 + 2 files changed, 108 insertions(+) create mode 100644 src/sonic-frr/patch/0007-prevent-dead-fd-poll-data-port-fix-from-frr.patch diff --git a/src/sonic-frr/patch/0007-prevent-dead-fd-poll-data-port-fix-from-frr.patch b/src/sonic-frr/patch/0007-prevent-dead-fd-poll-data-port-fix-from-frr.patch new file mode 100644 index 0000000000..5954e5c7c5 --- /dev/null +++ b/src/sonic-frr/patch/0007-prevent-dead-fd-poll-data-port-fix-from-frr.patch @@ -0,0 +1,107 @@ +From e4225086634fb7ae7fa762f3a45af6ad39e78641 Mon Sep 17 00:00:00 2001 +From: sudhanshukumar22 +Date: Tue, 15 Oct 2019 02:17:00 -0700 +Subject: [PATCH] Port a fix from FRR community + https://github.com/donaldsharp/frr/commit/39c93f379a5b57c56739a339ad75ec06e30daef3 + If we have a case where have created a fd for i/o and we have removed the + handling thread but still have the fd in the poll data structure, there + existed a case where we would get the handle this fd return from poll but we + would immediately do nothing with it because we didn't have a thread to hand + the event to. + +This leads to an infinite loop. Prevent the infinite loop +from happening and log the problem. +--- + lib/lib_errors.c | 6 ++++++ + lib/lib_errors.h | 1 + + lib/thread.c | 25 ++++++++++++++++++------- + 3 files changed, 25 insertions(+), 7 deletions(-) + +diff --git a/lib/lib_errors.c b/lib/lib_errors.c +index b6c764d87..033f27e58 100644 +--- a/lib/lib_errors.c ++++ b/lib/lib_errors.c +@@ -50,6 +50,12 @@ static struct log_ref ferr_lib_warn[] = { + .description = "The Event subsystem has detected a slow process, this typically indicates that FRR is having trouble completing work in a timely manner. This can be either a misconfiguration, bug, or some combination therof.", + .suggestion = "Gather log data and open an Issue", + }, ++ { ++ .code = EC_LIB_NO_THREAD, ++ .title = "The Event subsystem has detected an internal FD problem", ++ .description = "The Event subsystem has detected a file descriptor read/write event without an associated handling function. This is a bug, please collect log data and open an issue.", ++ .suggestion = "Gather log data and open an Issue", ++ }, + { + .code = EC_LIB_RMAP_RECURSION_LIMIT, + .title = "Reached the Route-Map Recursion Limit", +diff --git a/lib/lib_errors.h b/lib/lib_errors.h +index 39b39fb06..996a16ba9 100644 +--- a/lib/lib_errors.h ++++ b/lib/lib_errors.h +@@ -45,6 +45,7 @@ enum lib_log_refs { + EC_LIB_STREAM, + EC_LIB_LINUX_NS, + EC_LIB_SLOW_THREAD, ++ EC_LIB_NO_THREAD, + EC_LIB_RMAP_RECURSION_LIMIT, + EC_LIB_BACKUP_CONFIG, + EC_LIB_VRF_LENGTH, +diff --git a/lib/thread.c b/lib/thread.c +index 5ca859a74..82708557d 100644 +--- a/lib/thread.c ++++ b/lib/thread.c +@@ -1235,12 +1235,26 @@ static struct thread *thread_run(struct thread_master *m, struct thread *thread, + } + + static int thread_process_io_helper(struct thread_master *m, +- struct thread *thread, short state, int pos) ++ struct thread *thread, short state, short actual_state, int pos) + { + struct thread **thread_array; + +- if (!thread) ++ /* ++ * If another pthread scheduled this file descriptor for this event ++ * we're responding to, no problem, we're getting to it now. ++ * Additionally if !thread if we don't clear this now we'll ++ * infinaloop( which sucks ) ++ */ ++ m->handler.pfds[pos].events &= ~(state); ++ ++ if (!thread) { ++ if ((actual_state & (POLLHUP | POLLIN)) != POLLHUP) ++ flog_err( EC_LIB_NO_THREAD, ++ "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!\n", ++ m->handler.pfds[pos].fd, actual_state); + return 0; ++ } ++ + + if (thread->type == THREAD_READ) + thread_array = m->read; +@@ -1250,9 +1264,6 @@ static int thread_process_io_helper(struct thread_master *m, + thread_array[thread->u.fd] = NULL; + thread_list_add_tail(&m->ready, thread); + thread->type = THREAD_READY; +- /* if another pthread scheduled this file descriptor for the event we're +- * responding to, no problem; we're getting to it now */ +- thread->master->handler.pfds[pos].events &= ~(state); + return 1; + } + +@@ -1290,10 +1301,10 @@ static void thread_process_io(struct thread_master *m, unsigned int num) + * should still be a valid index into the master's pfds. */ + if (pfds[i].revents & (POLLIN | POLLHUP)) + thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN, +- i); ++ pfds[i].revents,i); + if (pfds[i].revents & POLLOUT) + thread_process_io_helper(m, m->write[pfds[i].fd], +- POLLOUT, i); ++ POLLOUT, pfds[i].revents, i); + + /* if one of our file descriptors is garbage, remove the same + * from +-- +2.18.0 + diff --git a/src/sonic-frr/patch/series b/src/sonic-frr/patch/series index 1acf6d94f4..edeb30ce1e 100644 --- a/src/sonic-frr/patch/series +++ b/src/sonic-frr/patch/series @@ -3,3 +3,4 @@ 0004-Allow-BGP-attr-NEXT_HOP-to-be-0.0.0.0-due-to-allevia.patch 0005-Support-VRF.patch 0006-changes-for-making-snmp-socket-non-blocking.patch +0007-prevent-dead-fd-poll-data-port-fix-from-frr.patch