diff --git a/src/libteam/0011-libteam-resynchronize-ifinfo-after-lost-RTNLGRP_LINK-.patch b/src/libteam/0011-libteam-resynchronize-ifinfo-after-lost-RTNLGRP_LINK-.patch new file mode 100644 index 0000000000..fcda983011 --- /dev/null +++ b/src/libteam/0011-libteam-resynchronize-ifinfo-after-lost-RTNLGRP_LINK-.patch @@ -0,0 +1,302 @@ +commit 046fb6ba0aec8246075b18d787daec43201566fa +Author: Antti Tiainen +Date: Mon Feb 6 15:41:05 2017 +0200 + + libteam: resynchronize ifinfo after lost RTNLGRP_LINK notifications + + When there's a large number of interfaces (e.g. vlans), teamd loses + link notifications as it cannot read them as fast as kernel is + broadcasting them. This often prevents teamd starting properly if + started concurrently when other links are being set up. It can also + fail when it's up and running, especially in the cases where the team + device itself has a lot of vlans under it. + + This can easily be reproduces by simple example (in SMP system) by + manually adding team device with a bunch of vlans, putting it up, + and starting teamd with --take-over option: + + root@debian:~# ip link add name team0 type team + root@debian:~# for i in `seq 100 150` ; do + > ip link add link team0 name team0.$i type vlan id $i ; done + root@debian:~# ip link set team0 up + root@debian:~# cat teamd.conf + { + "device": "team0", + "runner": { + "name": "activebackup" + }, + "ports": { + "eth1": {}, + "eth2": {} + } + } + root@debian:~# teamd -o -N -f teamd.conf + + At this point, teamd will not give any error messages or other + indication that something is wrong. But state will not look healthy: + + root@debian:~# teamdctl team0 state + setup: + runner: activebackup + ports: + eth1 + link watches: + link summary: up + instance[link_watch_0]: + name: ethtool + link: up + down count: 0 + Failed to parse JSON port dump. + command call failed (Invalid argument) + + If checking state dump, it will show that port eth2 is missing info. + Running strace to teamd will reveal that there's one recvmsgs() that + returned -1 with errno ENOBUFS. What happened in this example was + that when teamd started, all vlans got carrier up, and kernel flooded + notifications faster than teamd could read them. It then lost events + related to port eth2 getting enslaved and up. + + The socket that joins RTNLGRP_LINK notifications uses default libnl + 32k buffer size. Netlink messages are large (over 1k), and this buffer + gets easily full. Kernel neither knows nor cares were notification + broadcasts delivered. This cannot be fixed by simply increasing the + buffer size, as there's no size that is guaranteed to work in every + use case, and this can require several megabytes of buffer (a way over + normal rmem_max limit) if there are hunderds of vlans. + + Only way to recover from this is to refresh all ifinfo list, as it's + invalidated at this point. It cannot easily work around of this by + just refreshing team device and its ports, because library side might + not have ports linked due to events missed, and it doesn't know about + teamd configuration. + + Checks now return value of nl_recvmsgs_default() for event socket. In + case of ENOBUFS (which libnl nicely changes to ENOMEM), refreshes + all ifinfo list. get_ifinfo_list() also checks now for removed interfaces + in case of missed dellink event. Currently all TEAM_IFINFO_CHANGE + handlers processed events one by one, so it had to be changed to support + multiple ifinfo changes. For this, ifinfo changed flags are cleared + and removed entries destroyed only after all handlers have been called. + + Also, increased nl_cli.sock_event receive buffers to 96k like all other + sockets. Added possibility to change this via environment variable. + + Signed-off-by: Antti Tiainen + Signed-off-by: Jiri Pirko + +diff --git a/libteam/ifinfo.c b/libteam/ifinfo.c +index 72155ae..5c32a9c 100644 +--- a/libteam/ifinfo.c ++++ b/libteam/ifinfo.c +@@ -72,6 +72,10 @@ struct team_ifinfo { + #define CHANGED_PHYS_PORT_ID (1 << 5) + #define CHANGED_PHYS_PORT_ID_LEN (1 << 6) + #define CHANGED_ADMIN_STATE (1 << 7) ++/* This is only used when tagging interfaces for finding ++ * removed, and thus not included to CHANGED_ANY. ++ */ ++#define CHANGED_REFRESHING (1 << 8) + #define CHANGED_ANY (CHANGED_REMOVED | CHANGED_HWADDR | \ + CHANGED_HWADDR_LEN | CHANGED_IFNAME | \ + CHANGED_MASTER_IFINDEX | CHANGED_PHYS_PORT_ID | \ +@@ -202,7 +206,7 @@ static struct team_ifinfo *ifinfo_find(struct team_handle *th, uint32_t ifindex) + return NULL; + } + +-static void clear_last_changed(struct team_handle *th) ++void ifinfo_clear_changed(struct team_handle *th) + { + struct team_ifinfo *ifinfo; + +@@ -236,7 +240,7 @@ static void ifinfo_destroy(struct team_ifinfo *ifinfo) + free(ifinfo); + } + +-static void ifinfo_destroy_removed(struct team_handle *th) ++void ifinfo_destroy_removed(struct team_handle *th) + { + struct team_ifinfo *ifinfo, *tmp; + +@@ -254,8 +258,6 @@ static void obj_input_newlink(struct nl_object *obj, void *arg, bool event) + uint32_t ifindex; + int err; + +- ifinfo_destroy_removed(th); +- + link = (struct rtnl_link *) obj; + + ifindex = rtnl_link_get_ifindex(link); +@@ -269,7 +271,7 @@ static void obj_input_newlink(struct nl_object *obj, void *arg, bool event) + return; + } + +- clear_last_changed(th); ++ clear_changed(ifinfo); + ifinfo_update(ifinfo, link); + + if (event) +@@ -292,8 +294,6 @@ static void event_handler_obj_input_dellink(struct nl_object *obj, void *arg) + uint32_t ifindex; + int err; + +- ifinfo_destroy_removed(th); +- + link = (struct rtnl_link *) obj; + + ifindex = rtnl_link_get_ifindex(link); +@@ -311,7 +311,7 @@ static void event_handler_obj_input_dellink(struct nl_object *obj, void *arg) + return; + } + +- clear_last_changed(th); ++ clear_changed(ifinfo); + set_changed(ifinfo, CHANGED_REMOVED); + set_call_change_handlers(th, TEAM_IFINFO_CHANGE); + } +@@ -367,6 +367,14 @@ int get_ifinfo_list(struct team_handle *th) + }; + int ret; + int retry = 1; ++ struct team_ifinfo *ifinfo; ++ ++ /* Tag all ifinfo, this is cleared in newlink handler. ++ * Any interface that has this after dump is processed ++ * has been removed. ++ */ ++ list_for_each_node_entry(ifinfo, &th->ifinfo_list, list) ++ set_changed(ifinfo, CHANGED_REFRESHING); + + while (retry) { + retry = 0; +@@ -395,6 +403,15 @@ int get_ifinfo_list(struct team_handle *th) + retry = 1; + } + } ++ ++ list_for_each_node_entry(ifinfo, &th->ifinfo_list, list) { ++ if (is_changed(ifinfo, CHANGED_REFRESHING)) { ++ clear_changed(ifinfo); ++ set_changed(ifinfo, CHANGED_REMOVED); ++ set_call_change_handlers(th, TEAM_IFINFO_CHANGE); ++ } ++ } ++ + ret = check_call_change_handlers(th, TEAM_IFINFO_CHANGE); + if (ret < 0) + err(th, "get_ifinfo_list: check_call_change_handers failed"); +diff --git a/libteam/libteam.c b/libteam/libteam.c +index ac187aa..d5f22cd 100644 +--- a/libteam/libteam.c ++++ b/libteam/libteam.c +@@ -236,6 +236,10 @@ int check_call_change_handlers(struct team_handle *th, + break; + } + } ++ if (call_type_mask & TEAM_IFINFO_CHANGE) { ++ ifinfo_destroy_removed(th); ++ ifinfo_clear_changed(th); ++ } + th->change_handler.pending_type_mask &= ~call_type_mask; + return err; + } +@@ -546,6 +550,11 @@ int team_destroy(struct team_handle *th) + #endif + /* \endcond */ + ++/* libnl uses default 32k socket receive buffer size, ++ * whicn can get too small. Use 96k for all sockets. ++ */ ++#define NETLINK_RCVBUF 983040 ++ + /** + * @param th libteam library context + * @param ifindex team device interface index +@@ -561,6 +570,8 @@ int team_init(struct team_handle *th, uint32_t ifindex) + int err; + int grp_id; + int val; ++ int eventbufsize; ++ const char *env; + + if (!ifindex) { + err(th, "Passed interface index %d is not valid.", ifindex); +@@ -589,12 +600,12 @@ int team_init(struct team_handle *th, uint32_t ifindex) + return -errno; + } + +- err = nl_socket_set_buffer_size(th->nl_sock, 98304, 0); ++ err = nl_socket_set_buffer_size(th->nl_sock, NETLINK_RCVBUF, 0); + if (err) { + err(th, "Failed to set buffer size of netlink sock."); + return -nl2syserr(err); + } +- err = nl_socket_set_buffer_size(th->nl_sock_event, 98304, 0); ++ err = nl_socket_set_buffer_size(th->nl_sock_event, NETLINK_RCVBUF, 0); + if (err) { + err(th, "Failed to set buffer size of netlink event sock."); + return -nl2syserr(err); +@@ -627,6 +638,25 @@ int team_init(struct team_handle *th, uint32_t ifindex) + nl_socket_modify_cb(th->nl_cli.sock_event, NL_CB_VALID, + NL_CB_CUSTOM, cli_event_handler, th); + nl_cli_connect(th->nl_cli.sock_event, NETLINK_ROUTE); ++ ++ env = getenv("TEAM_EVENT_BUFSIZE"); ++ if (env) { ++ eventbufsize = strtol(env, NULL, 10); ++ /* ignore other errors, libnl forces minimum 32k and ++ * too large values are truncated to system rmem_max ++ */ ++ if (eventbufsize < 0) ++ eventbufsize = 0; ++ } else { ++ eventbufsize = NETLINK_RCVBUF; ++ } ++ ++ err = nl_socket_set_buffer_size(th->nl_cli.sock_event, eventbufsize, 0); ++ if (err) { ++ err(th, "Failed to set cli event socket buffer size."); ++ return err; ++ } ++ + err = nl_socket_add_membership(th->nl_cli.sock_event, RTNLGRP_LINK); + if (err < 0) { + err(th, "Failed to add netlink membership."); +@@ -767,7 +797,23 @@ static int get_cli_sock_event_fd(struct team_handle *th) + + static int cli_sock_event_handler(struct team_handle *th) + { +- nl_recvmsgs_default(th->nl_cli.sock_event); ++ int err; ++ ++ err = nl_recvmsgs_default(th->nl_cli.sock_event); ++ err = -nl2syserr(err); ++ ++ /* libnl thinks ENOBUFS and ENOMEM are same. Hope it was ENOBUFS. */ ++ if (err == -ENOMEM) { ++ warn(th, "Lost link notifications from kernel."); ++ /* There's no way to know what events were lost and no ++ * way to get them again. Refresh all. ++ */ ++ err = get_ifinfo_list(th); ++ } ++ ++ if (err) ++ return err; ++ + return check_call_change_handlers(th, TEAM_IFINFO_CHANGE); + } + +diff --git a/libteam/team_private.h b/libteam/team_private.h +index a07632f..a5eb0be 100644 +--- a/libteam/team_private.h ++++ b/libteam/team_private.h +@@ -115,6 +115,9 @@ int ifinfo_link_with_port(struct team_handle *th, uint32_t ifindex, + int ifinfo_link(struct team_handle *th, uint32_t ifindex, + struct team_ifinfo **p_ifinfo); + void ifinfo_unlink(struct team_ifinfo *ifinfo); ++void ifinfo_clear_changed(struct team_handle *th); ++void ifinfo_destroy_removed(struct team_handle *th); ++int get_ifinfo_list(struct team_handle *th); + int get_options_handler(struct nl_msg *msg, void *arg); + int option_list_alloc(struct team_handle *th); + int option_list_init(struct team_handle *th); diff --git a/src/libteam/series b/src/libteam/series index 415d6f79af..3cdcbd993c 100644 --- a/src/libteam/series +++ b/src/libteam/series @@ -7,4 +7,5 @@ 0007-Skip-setting-the-same-hwaddr-to-lag-port-to-avoid-di.patch 0008-teamd-register-change-handler-for-TEAM_IFINFO_CHANGE.patch 0009-teamd-prevent-private-change-handler-reentrance.patch -0010-teamd-lacp-update-port-state-according-to-partners-sy.patch \ No newline at end of file +0010-teamd-lacp-update-port-state-according-to-partners-sy.patch +0011-libteam-resynchronize-ifinfo-after-lost-RTNLGRP_LINK-.patch \ No newline at end of file