Downport the netlink patch to libteam1.26. Increase netlink buffers (#2822)
This commit is contained in:
parent
5004d2b4fe
commit
525ee59165
@ -0,0 +1,302 @@
|
||||
commit 046fb6ba0aec8246075b18d787daec43201566fa
|
||||
Author: Antti Tiainen <atiainen@forcepoint.com>
|
||||
Date: Mon Feb 6 15:41:05 2017 +0200
|
||||
|
||||
libteam: resynchronize ifinfo after lost RTNLGRP_LINK notifications
|
||||
|
||||
When there's a large number of interfaces (e.g. vlans), teamd loses
|
||||
link notifications as it cannot read them as fast as kernel is
|
||||
broadcasting them. This often prevents teamd starting properly if
|
||||
started concurrently when other links are being set up. It can also
|
||||
fail when it's up and running, especially in the cases where the team
|
||||
device itself has a lot of vlans under it.
|
||||
|
||||
This can easily be reproduces by simple example (in SMP system) by
|
||||
manually adding team device with a bunch of vlans, putting it up,
|
||||
and starting teamd with --take-over option:
|
||||
|
||||
root@debian:~# ip link add name team0 type team
|
||||
root@debian:~# for i in `seq 100 150` ; do
|
||||
> ip link add link team0 name team0.$i type vlan id $i ; done
|
||||
root@debian:~# ip link set team0 up
|
||||
root@debian:~# cat teamd.conf
|
||||
{
|
||||
"device": "team0",
|
||||
"runner": {
|
||||
"name": "activebackup"
|
||||
},
|
||||
"ports": {
|
||||
"eth1": {},
|
||||
"eth2": {}
|
||||
}
|
||||
}
|
||||
root@debian:~# teamd -o -N -f teamd.conf
|
||||
|
||||
At this point, teamd will not give any error messages or other
|
||||
indication that something is wrong. But state will not look healthy:
|
||||
|
||||
root@debian:~# teamdctl team0 state
|
||||
setup:
|
||||
runner: activebackup
|
||||
ports:
|
||||
eth1
|
||||
link watches:
|
||||
link summary: up
|
||||
instance[link_watch_0]:
|
||||
name: ethtool
|
||||
link: up
|
||||
down count: 0
|
||||
Failed to parse JSON port dump.
|
||||
command call failed (Invalid argument)
|
||||
|
||||
If checking state dump, it will show that port eth2 is missing info.
|
||||
Running strace to teamd will reveal that there's one recvmsgs() that
|
||||
returned -1 with errno ENOBUFS. What happened in this example was
|
||||
that when teamd started, all vlans got carrier up, and kernel flooded
|
||||
notifications faster than teamd could read them. It then lost events
|
||||
related to port eth2 getting enslaved and up.
|
||||
|
||||
The socket that joins RTNLGRP_LINK notifications uses default libnl
|
||||
32k buffer size. Netlink messages are large (over 1k), and this buffer
|
||||
gets easily full. Kernel neither knows nor cares were notification
|
||||
broadcasts delivered. This cannot be fixed by simply increasing the
|
||||
buffer size, as there's no size that is guaranteed to work in every
|
||||
use case, and this can require several megabytes of buffer (a way over
|
||||
normal rmem_max limit) if there are hunderds of vlans.
|
||||
|
||||
Only way to recover from this is to refresh all ifinfo list, as it's
|
||||
invalidated at this point. It cannot easily work around of this by
|
||||
just refreshing team device and its ports, because library side might
|
||||
not have ports linked due to events missed, and it doesn't know about
|
||||
teamd configuration.
|
||||
|
||||
Checks now return value of nl_recvmsgs_default() for event socket. In
|
||||
case of ENOBUFS (which libnl nicely changes to ENOMEM), refreshes
|
||||
all ifinfo list. get_ifinfo_list() also checks now for removed interfaces
|
||||
in case of missed dellink event. Currently all TEAM_IFINFO_CHANGE
|
||||
handlers processed events one by one, so it had to be changed to support
|
||||
multiple ifinfo changes. For this, ifinfo changed flags are cleared
|
||||
and removed entries destroyed only after all handlers have been called.
|
||||
|
||||
Also, increased nl_cli.sock_event receive buffers to 96k like all other
|
||||
sockets. Added possibility to change this via environment variable.
|
||||
|
||||
Signed-off-by: Antti Tiainen <atiainen@forcepoint.com>
|
||||
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
|
||||
|
||||
diff --git a/libteam/ifinfo.c b/libteam/ifinfo.c
|
||||
index 72155ae..5c32a9c 100644
|
||||
--- a/libteam/ifinfo.c
|
||||
+++ b/libteam/ifinfo.c
|
||||
@@ -72,6 +72,10 @@ struct team_ifinfo {
|
||||
#define CHANGED_PHYS_PORT_ID (1 << 5)
|
||||
#define CHANGED_PHYS_PORT_ID_LEN (1 << 6)
|
||||
#define CHANGED_ADMIN_STATE (1 << 7)
|
||||
+/* This is only used when tagging interfaces for finding
|
||||
+ * removed, and thus not included to CHANGED_ANY.
|
||||
+ */
|
||||
+#define CHANGED_REFRESHING (1 << 8)
|
||||
#define CHANGED_ANY (CHANGED_REMOVED | CHANGED_HWADDR | \
|
||||
CHANGED_HWADDR_LEN | CHANGED_IFNAME | \
|
||||
CHANGED_MASTER_IFINDEX | CHANGED_PHYS_PORT_ID | \
|
||||
@@ -202,7 +206,7 @@ static struct team_ifinfo *ifinfo_find(struct team_handle *th, uint32_t ifindex)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static void clear_last_changed(struct team_handle *th)
|
||||
+void ifinfo_clear_changed(struct team_handle *th)
|
||||
{
|
||||
struct team_ifinfo *ifinfo;
|
||||
|
||||
@@ -236,7 +240,7 @@ static void ifinfo_destroy(struct team_ifinfo *ifinfo)
|
||||
free(ifinfo);
|
||||
}
|
||||
|
||||
-static void ifinfo_destroy_removed(struct team_handle *th)
|
||||
+void ifinfo_destroy_removed(struct team_handle *th)
|
||||
{
|
||||
struct team_ifinfo *ifinfo, *tmp;
|
||||
|
||||
@@ -254,8 +258,6 @@ static void obj_input_newlink(struct nl_object *obj, void *arg, bool event)
|
||||
uint32_t ifindex;
|
||||
int err;
|
||||
|
||||
- ifinfo_destroy_removed(th);
|
||||
-
|
||||
link = (struct rtnl_link *) obj;
|
||||
|
||||
ifindex = rtnl_link_get_ifindex(link);
|
||||
@@ -269,7 +271,7 @@ static void obj_input_newlink(struct nl_object *obj, void *arg, bool event)
|
||||
return;
|
||||
}
|
||||
|
||||
- clear_last_changed(th);
|
||||
+ clear_changed(ifinfo);
|
||||
ifinfo_update(ifinfo, link);
|
||||
|
||||
if (event)
|
||||
@@ -292,8 +294,6 @@ static void event_handler_obj_input_dellink(struct nl_object *obj, void *arg)
|
||||
uint32_t ifindex;
|
||||
int err;
|
||||
|
||||
- ifinfo_destroy_removed(th);
|
||||
-
|
||||
link = (struct rtnl_link *) obj;
|
||||
|
||||
ifindex = rtnl_link_get_ifindex(link);
|
||||
@@ -311,7 +311,7 @@ static void event_handler_obj_input_dellink(struct nl_object *obj, void *arg)
|
||||
return;
|
||||
}
|
||||
|
||||
- clear_last_changed(th);
|
||||
+ clear_changed(ifinfo);
|
||||
set_changed(ifinfo, CHANGED_REMOVED);
|
||||
set_call_change_handlers(th, TEAM_IFINFO_CHANGE);
|
||||
}
|
||||
@@ -367,6 +367,14 @@ int get_ifinfo_list(struct team_handle *th)
|
||||
};
|
||||
int ret;
|
||||
int retry = 1;
|
||||
+ struct team_ifinfo *ifinfo;
|
||||
+
|
||||
+ /* Tag all ifinfo, this is cleared in newlink handler.
|
||||
+ * Any interface that has this after dump is processed
|
||||
+ * has been removed.
|
||||
+ */
|
||||
+ list_for_each_node_entry(ifinfo, &th->ifinfo_list, list)
|
||||
+ set_changed(ifinfo, CHANGED_REFRESHING);
|
||||
|
||||
while (retry) {
|
||||
retry = 0;
|
||||
@@ -395,6 +403,15 @@ int get_ifinfo_list(struct team_handle *th)
|
||||
retry = 1;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ list_for_each_node_entry(ifinfo, &th->ifinfo_list, list) {
|
||||
+ if (is_changed(ifinfo, CHANGED_REFRESHING)) {
|
||||
+ clear_changed(ifinfo);
|
||||
+ set_changed(ifinfo, CHANGED_REMOVED);
|
||||
+ set_call_change_handlers(th, TEAM_IFINFO_CHANGE);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
ret = check_call_change_handlers(th, TEAM_IFINFO_CHANGE);
|
||||
if (ret < 0)
|
||||
err(th, "get_ifinfo_list: check_call_change_handers failed");
|
||||
diff --git a/libteam/libteam.c b/libteam/libteam.c
|
||||
index ac187aa..d5f22cd 100644
|
||||
--- a/libteam/libteam.c
|
||||
+++ b/libteam/libteam.c
|
||||
@@ -236,6 +236,10 @@ int check_call_change_handlers(struct team_handle *th,
|
||||
break;
|
||||
}
|
||||
}
|
||||
+ if (call_type_mask & TEAM_IFINFO_CHANGE) {
|
||||
+ ifinfo_destroy_removed(th);
|
||||
+ ifinfo_clear_changed(th);
|
||||
+ }
|
||||
th->change_handler.pending_type_mask &= ~call_type_mask;
|
||||
return err;
|
||||
}
|
||||
@@ -546,6 +550,11 @@ int team_destroy(struct team_handle *th)
|
||||
#endif
|
||||
/* \endcond */
|
||||
|
||||
+/* libnl uses default 32k socket receive buffer size,
|
||||
+ * whicn can get too small. Use 96k for all sockets.
|
||||
+ */
|
||||
+#define NETLINK_RCVBUF 983040
|
||||
+
|
||||
/**
|
||||
* @param th libteam library context
|
||||
* @param ifindex team device interface index
|
||||
@@ -561,6 +570,8 @@ int team_init(struct team_handle *th, uint32_t ifindex)
|
||||
int err;
|
||||
int grp_id;
|
||||
int val;
|
||||
+ int eventbufsize;
|
||||
+ const char *env;
|
||||
|
||||
if (!ifindex) {
|
||||
err(th, "Passed interface index %d is not valid.", ifindex);
|
||||
@@ -589,12 +600,12 @@ int team_init(struct team_handle *th, uint32_t ifindex)
|
||||
return -errno;
|
||||
}
|
||||
|
||||
- err = nl_socket_set_buffer_size(th->nl_sock, 98304, 0);
|
||||
+ err = nl_socket_set_buffer_size(th->nl_sock, NETLINK_RCVBUF, 0);
|
||||
if (err) {
|
||||
err(th, "Failed to set buffer size of netlink sock.");
|
||||
return -nl2syserr(err);
|
||||
}
|
||||
- err = nl_socket_set_buffer_size(th->nl_sock_event, 98304, 0);
|
||||
+ err = nl_socket_set_buffer_size(th->nl_sock_event, NETLINK_RCVBUF, 0);
|
||||
if (err) {
|
||||
err(th, "Failed to set buffer size of netlink event sock.");
|
||||
return -nl2syserr(err);
|
||||
@@ -627,6 +638,25 @@ int team_init(struct team_handle *th, uint32_t ifindex)
|
||||
nl_socket_modify_cb(th->nl_cli.sock_event, NL_CB_VALID,
|
||||
NL_CB_CUSTOM, cli_event_handler, th);
|
||||
nl_cli_connect(th->nl_cli.sock_event, NETLINK_ROUTE);
|
||||
+
|
||||
+ env = getenv("TEAM_EVENT_BUFSIZE");
|
||||
+ if (env) {
|
||||
+ eventbufsize = strtol(env, NULL, 10);
|
||||
+ /* ignore other errors, libnl forces minimum 32k and
|
||||
+ * too large values are truncated to system rmem_max
|
||||
+ */
|
||||
+ if (eventbufsize < 0)
|
||||
+ eventbufsize = 0;
|
||||
+ } else {
|
||||
+ eventbufsize = NETLINK_RCVBUF;
|
||||
+ }
|
||||
+
|
||||
+ err = nl_socket_set_buffer_size(th->nl_cli.sock_event, eventbufsize, 0);
|
||||
+ if (err) {
|
||||
+ err(th, "Failed to set cli event socket buffer size.");
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
err = nl_socket_add_membership(th->nl_cli.sock_event, RTNLGRP_LINK);
|
||||
if (err < 0) {
|
||||
err(th, "Failed to add netlink membership.");
|
||||
@@ -767,7 +797,23 @@ static int get_cli_sock_event_fd(struct team_handle *th)
|
||||
|
||||
static int cli_sock_event_handler(struct team_handle *th)
|
||||
{
|
||||
- nl_recvmsgs_default(th->nl_cli.sock_event);
|
||||
+ int err;
|
||||
+
|
||||
+ err = nl_recvmsgs_default(th->nl_cli.sock_event);
|
||||
+ err = -nl2syserr(err);
|
||||
+
|
||||
+ /* libnl thinks ENOBUFS and ENOMEM are same. Hope it was ENOBUFS. */
|
||||
+ if (err == -ENOMEM) {
|
||||
+ warn(th, "Lost link notifications from kernel.");
|
||||
+ /* There's no way to know what events were lost and no
|
||||
+ * way to get them again. Refresh all.
|
||||
+ */
|
||||
+ err = get_ifinfo_list(th);
|
||||
+ }
|
||||
+
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
return check_call_change_handlers(th, TEAM_IFINFO_CHANGE);
|
||||
}
|
||||
|
||||
diff --git a/libteam/team_private.h b/libteam/team_private.h
|
||||
index a07632f..a5eb0be 100644
|
||||
--- a/libteam/team_private.h
|
||||
+++ b/libteam/team_private.h
|
||||
@@ -115,6 +115,9 @@ int ifinfo_link_with_port(struct team_handle *th, uint32_t ifindex,
|
||||
int ifinfo_link(struct team_handle *th, uint32_t ifindex,
|
||||
struct team_ifinfo **p_ifinfo);
|
||||
void ifinfo_unlink(struct team_ifinfo *ifinfo);
|
||||
+void ifinfo_clear_changed(struct team_handle *th);
|
||||
+void ifinfo_destroy_removed(struct team_handle *th);
|
||||
+int get_ifinfo_list(struct team_handle *th);
|
||||
int get_options_handler(struct nl_msg *msg, void *arg);
|
||||
int option_list_alloc(struct team_handle *th);
|
||||
int option_list_init(struct team_handle *th);
|
@ -7,4 +7,5 @@
|
||||
0007-Skip-setting-the-same-hwaddr-to-lag-port-to-avoid-di.patch
|
||||
0008-teamd-register-change-handler-for-TEAM_IFINFO_CHANGE.patch
|
||||
0009-teamd-prevent-private-change-handler-reentrance.patch
|
||||
0010-teamd-lacp-update-port-state-according-to-partners-sy.patch
|
||||
0010-teamd-lacp-update-port-state-according-to-partners-sy.patch
|
||||
0011-libteam-resynchronize-ifinfo-after-lost-RTNLGRP_LINK-.patch
|
Loading…
Reference in New Issue
Block a user