From 71f2a6a3a96333abb474dfe6e27ad5fef12ab296 Mon Sep 17 00:00:00 2001 From: Lior Avramov <73036155+liorghub@users.noreply.github.com> Date: Sat, 8 Apr 2023 00:15:19 +0300 Subject: [PATCH] Add teamd patches to solve traffic loss issue when removing port from LAG (#14002) #### Why I did it When removing port from LAG while traffic is running thorough LAG there is traffic disruption of 60 seconds. Fix issue https://github.com/sonic-net/sonic-buildimage/issues/14381 #### How I did it The patch I added introduces "port_removing" op and call it right before Kernel is asked to remove the port. Implement the op in LACP runner to disable the port which leads to proper LACPDU send. #### How to verify it Set LAG between 2 switches. Set LAGs to be router port and set ip address. In switch A send ping to ip address of LAG in switch B. In switch B, while ping is running remove port from LAG. Verify ping is not stopping. --- ...ort-to-disabled-state-during-removal.patch | 108 ++++++++++++++++++ ...om-disabled-when-admin-state-is-down.patch | 32 ++++++ src/libteam/patch/series | 2 + 3 files changed, 142 insertions(+) create mode 100644 src/libteam/patch/0013-set-port-to-disabled-state-during-removal.patch create mode 100644 src/libteam/patch/0014-dont-move-the-port-state-from-disabled-when-admin-state-is-down.patch diff --git a/src/libteam/patch/0013-set-port-to-disabled-state-during-removal.patch b/src/libteam/patch/0013-set-port-to-disabled-state-during-removal.patch new file mode 100644 index 0000000000..8341a53572 --- /dev/null +++ b/src/libteam/patch/0013-set-port-to-disabled-state-during-removal.patch @@ -0,0 +1,108 @@ +From fd26b370d85d63cca0736d7e666736bb15c395aa Mon Sep 17 00:00:00 2001 +From: Jiri Pirko +Date: Wed, 7 Dec 2022 10:44:35 +0100 +Subject: [PATCH] teamd: lacp: set port to disabled state during removal + +Currently, the disabled state is set only after port is removed from +team master in kernel. Team driver puts the port netdevice down right +away. In some cases, there is nice to send LACPDU to the partner with +flags set accordingly for the disabled port. + +Introduce "port_removing" op and call it right before kernel +is asked to remove the port. Implement the op in LACP runner +to disable the port which leads to proper LACPDU send. + +Signed-off-by: Jiri Pirko +--- + teamd/teamd.h | 4 ++++ + teamd/teamd_events.c | 12 ++++++++++++ + teamd/teamd_per_port.c | 1 + + teamd/teamd_runner_lacp.c | 12 ++++++++++++ + 4 files changed, 29 insertions(+) +diff --git a/teamd/teamd.h b/teamd/teamd.h +index 701a6a4..d1d0f7f 100644 +--- a/teamd/teamd.h ++++ b/teamd/teamd.h +@@ -183,6 +183,8 @@ struct teamd_event_watch_ops { + int (*admin_state_changed)(struct teamd_context *ctx, void *priv); + int (*port_added)(struct teamd_context *ctx, + struct teamd_port *tdport, void *priv); ++ void (*port_removing)(struct teamd_context *ctx, ++ struct teamd_port *tdport, void *priv); + void (*port_removed)(struct teamd_context *ctx, + struct teamd_port *tdport, void *priv); + int (*port_changed)(struct teamd_context *ctx, +@@ -209,6 +211,8 @@ void teamd_refresh_ports(struct teamd_context *ctx); + void teamd_ports_flush_data(struct teamd_context *ctx); + int teamd_event_port_added(struct teamd_context *ctx, + struct teamd_port *tdport); ++void teamd_event_port_removing(struct teamd_context *ctx, ++ struct teamd_port *tdport); + void teamd_event_port_removed(struct teamd_context *ctx, + struct teamd_port *tdport); + int teamd_event_port_changed(struct teamd_context *ctx, +diff --git a/teamd/teamd_events.c b/teamd/teamd_events.c +index bd4dcc1..ff39990 100644 +--- a/teamd/teamd_events.c ++++ b/teamd/teamd_events.c +@@ -76,6 +76,18 @@ int teamd_event_port_added(struct teamd_context *ctx, + return 0; + } + ++void teamd_event_port_removing(struct teamd_context *ctx, ++ struct teamd_port *tdport) ++{ ++ struct event_watch_item *watch; ++ ++ list_for_each_node_entry(watch, &ctx->event_watch_list, list) { ++ if (!watch->ops->port_removing) ++ continue; ++ watch->ops->port_removing(ctx, tdport, watch->priv); ++ } ++} ++ + void teamd_event_port_removed(struct teamd_context *ctx, + struct teamd_port *tdport) + { +diff --git a/teamd/teamd_per_port.c b/teamd/teamd_per_port.c +index cefd6c2..68fc553 100644 +--- a/teamd/teamd_per_port.c ++++ b/teamd/teamd_per_port.c +@@ -358,6 +358,7 @@ static int teamd_port_remove(struct teamd_context *ctx, + + teamd_log_dbg(ctx, "%s: Removing port (found ifindex \"%d\").", + tdport->ifname, tdport->ifindex); ++ teamd_event_port_removing(ctx, tdport); + err = team_port_remove(ctx->th, tdport->ifindex); + if (err) + teamd_log_err("%s: Failed to remove port.", tdport->ifname); +diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c +index 82b8b86..89f462a 100644 +--- a/teamd/teamd_runner_lacp.c ++++ b/teamd/teamd_runner_lacp.c +@@ -1779,6 +1779,17 @@ static int lacp_event_watch_port_added(struct teamd_context *ctx, + return teamd_balancer_port_added(lacp->tb, tdport); + } + ++static void lacp_event_watch_port_removing(struct teamd_context *ctx, ++ struct teamd_port *tdport, void *priv) ++{ ++ struct lacp *lacp = priv; ++ struct lacp_port *lacp_port = lacp_port_get(lacp, tdport); ++ ++ /* Ensure that no incoming LACPDU is going to be processed. */ ++ teamd_loop_callback_disable(ctx, LACP_SOCKET_CB_NAME, lacp_port); ++ lacp_port_set_state(lacp_port, PORT_STATE_DISABLED); ++} ++ + static void lacp_event_watch_port_removed(struct teamd_context *ctx, + struct teamd_port *tdport, void *priv) + { +@@ -1845,6 +1856,7 @@ static const struct teamd_event_watch_ops lacp_event_watch_ops = { + .hwaddr_changed = lacp_event_watch_hwaddr_changed, + .port_hwaddr_changed = lacp_event_watch_port_hwaddr_changed, + .port_added = lacp_event_watch_port_added, ++ .port_removing = lacp_event_watch_port_removing, + .port_removed = lacp_event_watch_port_removed, + .port_changed = lacp_event_watch_port_changed, + .admin_state_changed = lacp_event_watch_admin_state_changed, diff --git a/src/libteam/patch/0014-dont-move-the-port-state-from-disabled-when-admin-state-is-down.patch b/src/libteam/patch/0014-dont-move-the-port-state-from-disabled-when-admin-state-is-down.patch new file mode 100644 index 0000000000..82637529fc --- /dev/null +++ b/src/libteam/patch/0014-dont-move-the-port-state-from-disabled-when-admin-state-is-down.patch @@ -0,0 +1,32 @@ +From 23ab49c4df0c06eb629ce2e3bb4c4dd7c527975a Mon Sep 17 00:00:00 2001 +From: Jiri Pirko +Date: Thu, 2 Feb 2023 17:00:51 +0100 +Subject: [PATCH] teamd: lacp: don't move the port state from disabled when + admin state is down + +When the team admin state is down, the port should stay in disabled +state, no matter what's happening. So check the admin state and bail out +in that case. + +Signed-off-by: Jiri Pirko +--- + teamd/teamd_runner_lacp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c +index 51c7714..a76c372 100644 +--- a/teamd/teamd_runner_lacp.c ++++ b/teamd/teamd_runner_lacp.c +@@ -956,9 +956,11 @@ static int lacpdu_send(struct lacp_port *lacp_port); + static int lacp_port_set_state(struct lacp_port *lacp_port, + enum lacp_port_state new_state) + { ++ bool admin_state = team_get_ifinfo_admin_state(lacp_port->ctx->ifinfo); + int err; + +- if (new_state == lacp_port->state) ++ if (new_state == lacp_port->state || ++ (!admin_state && new_state != PORT_STATE_DISABLED)) + return 0; + if (new_state == PORT_STATE_DISABLED) + lacp_port_periodic_off(lacp_port); diff --git a/src/libteam/patch/series b/src/libteam/patch/series index cd7522918f..f47ee9c06a 100644 --- a/src/libteam/patch/series +++ b/src/libteam/patch/series @@ -10,3 +10,5 @@ 0010-When-read-of-timerfd-returned-0-don-t-consider-this-.patch 0011-Remove-extensive-debug-output.patch 0012-Increase-min_ports-upper-limit-to-1024.patch +0013-set-port-to-disabled-state-during-removal.patch +0014-dont-move-the-port-state-from-disabled-when-admin-state-is-down.patch