Add teamd patches to solve traffic loss issue when removing port from LAG (#14002)

#### Why I did it
When removing port from LAG while traffic is running thorough LAG there is traffic disruption of 60 seconds.
Fix issue https://github.com/sonic-net/sonic-buildimage/issues/14381

#### How I did it
The patch I added introduces "port_removing" op and call it right before Kernel is asked to remove the port. 
Implement the op in LACP runner to disable the port which leads to proper LACPDU send.

#### How to verify it
Set LAG between 2 switches.
Set LAGs to be router port and set ip address.
In switch A send ping to ip address of LAG in switch B.
In switch B, while ping is running remove port from LAG.
Verify ping is not stopping.
This commit is contained in:
Lior Avramov 2023-04-08 00:15:19 +03:00 committed by GitHub
parent 3b5871f7f8
commit 71f2a6a3a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 142 additions and 0 deletions

View File

@ -0,0 +1,108 @@
From fd26b370d85d63cca0736d7e666736bb15c395aa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 7 Dec 2022 10:44:35 +0100
Subject: [PATCH] teamd: lacp: set port to disabled state during removal
Currently, the disabled state is set only after port is removed from
team master in kernel. Team driver puts the port netdevice down right
away. In some cases, there is nice to send LACPDU to the partner with
flags set accordingly for the disabled port.
Introduce "port_removing" op and call it right before kernel
is asked to remove the port. Implement the op in LACP runner
to disable the port which leads to proper LACPDU send.
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
teamd/teamd.h | 4 ++++
teamd/teamd_events.c | 12 ++++++++++++
teamd/teamd_per_port.c | 1 +
teamd/teamd_runner_lacp.c | 12 ++++++++++++
4 files changed, 29 insertions(+)
diff --git a/teamd/teamd.h b/teamd/teamd.h
index 701a6a4..d1d0f7f 100644
--- a/teamd/teamd.h
+++ b/teamd/teamd.h
@@ -183,6 +183,8 @@ struct teamd_event_watch_ops {
int (*admin_state_changed)(struct teamd_context *ctx, void *priv);
int (*port_added)(struct teamd_context *ctx,
struct teamd_port *tdport, void *priv);
+ void (*port_removing)(struct teamd_context *ctx,
+ struct teamd_port *tdport, void *priv);
void (*port_removed)(struct teamd_context *ctx,
struct teamd_port *tdport, void *priv);
int (*port_changed)(struct teamd_context *ctx,
@@ -209,6 +211,8 @@ void teamd_refresh_ports(struct teamd_context *ctx);
void teamd_ports_flush_data(struct teamd_context *ctx);
int teamd_event_port_added(struct teamd_context *ctx,
struct teamd_port *tdport);
+void teamd_event_port_removing(struct teamd_context *ctx,
+ struct teamd_port *tdport);
void teamd_event_port_removed(struct teamd_context *ctx,
struct teamd_port *tdport);
int teamd_event_port_changed(struct teamd_context *ctx,
diff --git a/teamd/teamd_events.c b/teamd/teamd_events.c
index bd4dcc1..ff39990 100644
--- a/teamd/teamd_events.c
+++ b/teamd/teamd_events.c
@@ -76,6 +76,18 @@ int teamd_event_port_added(struct teamd_context *ctx,
return 0;
}
+void teamd_event_port_removing(struct teamd_context *ctx,
+ struct teamd_port *tdport)
+{
+ struct event_watch_item *watch;
+
+ list_for_each_node_entry(watch, &ctx->event_watch_list, list) {
+ if (!watch->ops->port_removing)
+ continue;
+ watch->ops->port_removing(ctx, tdport, watch->priv);
+ }
+}
+
void teamd_event_port_removed(struct teamd_context *ctx,
struct teamd_port *tdport)
{
diff --git a/teamd/teamd_per_port.c b/teamd/teamd_per_port.c
index cefd6c2..68fc553 100644
--- a/teamd/teamd_per_port.c
+++ b/teamd/teamd_per_port.c
@@ -358,6 +358,7 @@ static int teamd_port_remove(struct teamd_context *ctx,
teamd_log_dbg(ctx, "%s: Removing port (found ifindex \"%d\").",
tdport->ifname, tdport->ifindex);
+ teamd_event_port_removing(ctx, tdport);
err = team_port_remove(ctx->th, tdport->ifindex);
if (err)
teamd_log_err("%s: Failed to remove port.", tdport->ifname);
diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c
index 82b8b86..89f462a 100644
--- a/teamd/teamd_runner_lacp.c
+++ b/teamd/teamd_runner_lacp.c
@@ -1779,6 +1779,17 @@ static int lacp_event_watch_port_added(struct teamd_context *ctx,
return teamd_balancer_port_added(lacp->tb, tdport);
}
+static void lacp_event_watch_port_removing(struct teamd_context *ctx,
+ struct teamd_port *tdport, void *priv)
+{
+ struct lacp *lacp = priv;
+ struct lacp_port *lacp_port = lacp_port_get(lacp, tdport);
+
+ /* Ensure that no incoming LACPDU is going to be processed. */
+ teamd_loop_callback_disable(ctx, LACP_SOCKET_CB_NAME, lacp_port);
+ lacp_port_set_state(lacp_port, PORT_STATE_DISABLED);
+}
+
static void lacp_event_watch_port_removed(struct teamd_context *ctx,
struct teamd_port *tdport, void *priv)
{
@@ -1845,6 +1856,7 @@ static const struct teamd_event_watch_ops lacp_event_watch_ops = {
.hwaddr_changed = lacp_event_watch_hwaddr_changed,
.port_hwaddr_changed = lacp_event_watch_port_hwaddr_changed,
.port_added = lacp_event_watch_port_added,
+ .port_removing = lacp_event_watch_port_removing,
.port_removed = lacp_event_watch_port_removed,
.port_changed = lacp_event_watch_port_changed,
.admin_state_changed = lacp_event_watch_admin_state_changed,

View File

@ -0,0 +1,32 @@
From 23ab49c4df0c06eb629ce2e3bb4c4dd7c527975a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Thu, 2 Feb 2023 17:00:51 +0100
Subject: [PATCH] teamd: lacp: don't move the port state from disabled when
admin state is down
When the team admin state is down, the port should stay in disabled
state, no matter what's happening. So check the admin state and bail out
in that case.
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
teamd/teamd_runner_lacp.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/teamd/teamd_runner_lacp.c b/teamd/teamd_runner_lacp.c
index 51c7714..a76c372 100644
--- a/teamd/teamd_runner_lacp.c
+++ b/teamd/teamd_runner_lacp.c
@@ -956,9 +956,11 @@ static int lacpdu_send(struct lacp_port *lacp_port);
static int lacp_port_set_state(struct lacp_port *lacp_port,
enum lacp_port_state new_state)
{
+ bool admin_state = team_get_ifinfo_admin_state(lacp_port->ctx->ifinfo);
int err;
- if (new_state == lacp_port->state)
+ if (new_state == lacp_port->state ||
+ (!admin_state && new_state != PORT_STATE_DISABLED))
return 0;
if (new_state == PORT_STATE_DISABLED)
lacp_port_periodic_off(lacp_port);

View File

@ -10,3 +10,5 @@
0010-When-read-of-timerfd-returned-0-don-t-consider-this-.patch
0011-Remove-extensive-debug-output.patch
0012-Increase-min_ports-upper-limit-to-1024.patch
0013-set-port-to-disabled-state-during-removal.patch
0014-dont-move-the-port-state-from-disabled-when-admin-state-is-down.patch