Add "bgp bestpath peer-type multipath-relax" to FRR (#5629)

* Add "bgp bestpath peer-type multipath-relax" to frr This new BGP configuration is akin to "bgp bestpath aspath multipath-relax". When applied, paths learned from different peer types will be eligible to be considered for multipath (ECMP). Paths from all of eBGP, iBGP, and confederation peers may be included in a multipath group if they are otherwise equal cost. When such a multipath group is created, it is not desirable for iBGP nexthops to be discarded from the FIB because they are not directly connected. So when publishing the nexthop group to zebra, bgpd will allow recursive resolution, but only when there are iBGP-learned paths in the group. This change is merged in FRR in this PR FRRouting/frr#8056 Signed-off-by: Joanne Mikkelson <jmmikkel@arista.com>
2021-04-16 11:20:15 -07:00 · 2021-04-16 11:20:15 -07:00 · e1bee859aa
commit e1bee859aa
parent 43342b33b8
2 changed files with 347 additions and 0 deletions
--- a/src/sonic-frr/patch/0009-Add-bgp-bestpath-peer-type-multipath-relax.patch
+++ b/src/sonic-frr/patch/0009-Add-bgp-bestpath-peer-type-multipath-relax.patch
@ -0,0 +1,346 @@
+From a70c630a504e041da1c2441337b2d47e8fdd3d30 Mon Sep 17 00:00:00 2001
+From: Joanne Mikkelson <jmmikkel@arista.com>
+Date: Mon, 29 Jun 2020 13:31:49 -0700
+Subject: [PATCH] Add "bgp bestpath peer-type multipath-relax"
+
+This new BGP configuration is akin to "bgp bestpath aspath
+multipath-relax". When applied, paths learned from different peer types
+will be eligible to be considered for multipath (ECMP). Paths from all
+of eBGP, iBGP, and confederation peers may be included in multipaths
+if they are otherwise equal cost.
+
+This change preserves the existing bestpath behavior of step 10's result
+being returned, not the result from steps 8 and 9, in the case where
+both 8+9 and 10 determine a winner.
+
+When "bgp bestpath peer-type multipath-relax" is enabled, multipaths
+with both eBGP and iBGP learned routes may exist. It is not desirable
+for the iBGP next hops to be discarded from the FIB because they are not
+directly connected. When publishing a nexthop group to zebra, the
+ZEBRA_FLAG_ALLOW_RECURSION flag is normally not set when the best path
+is eBGP; when "bgp bestpath aspath multipath-relax" is configured, the
+flag will now be set if any paths are from iBGP peers. This leaves
+all-eBGP multipaths still requiring nexthops over connected routes.
+---
+ bgpd/bgp_route.c | 76 ++++++++++++++++++++++++++++++++++--------------
+ bgpd/bgp_vty.c   | 43 +++++++++++++++++++++++++++
+ bgpd/bgp_zebra.c | 15 +++++++++-
+ bgpd/bgpd.h      |  1 +
+ doc/user/bgp.rst |  7 +++++
+ 5 files changed, 119 insertions(+), 23 deletions(-)
+
+diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
+index 1c646c03e..73dbf3c5f 100644
+--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
+@@ -540,6 +540,8 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ 	int internal_as_route;
+ 	int confed_as_route;
+ 	int ret = 0;
+	int igp_metric_ret = 0;
+	int peer_sort_ret = -1;
+ 	char new_buf[PATH_ADDPATH_STR_BUFFER];
+ 	char exist_buf[PATH_ADDPATH_STR_BUFFER];
+ 	uint32_t new_mm_seq;
+@@ -940,7 +942,9 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ 			zlog_debug(
+ 				"%s: %s wins over %s due to eBGP peer > iBGP peer",
+ 				pfx_buf, new_buf, exist_buf);
+-		return 1;
+		if (!CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+			return 1;
+		peer_sort_ret = 1;
+ 	}
+ 
+ 	if (exist_sort == BGP_PEER_EBGP
+@@ -950,7 +954,9 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ 			zlog_debug(
+ 				"%s: %s loses to %s due to iBGP peer < eBGP peer",
+ 				pfx_buf, new_buf, exist_buf);
+-		return 0;
+		if (!CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+			return 0;
+		peer_sort_ret = 0;
+ 	}
+ 
+ 	/* 8. IGP metric check. */
+@@ -962,19 +968,19 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ 		existm = exist->extra->igpmetric;
+ 
+ 	if (newm < existm) {
+-		if (debug)
+		if (debug && peer_sort_ret < 0)
+ 			zlog_debug(
+ 				"%s: %s wins over %s due to IGP metric %d < %d",
+ 				pfx_buf, new_buf, exist_buf, newm, existm);
+-		ret = 1;
+		igp_metric_ret = 1;
+ 	}
+ 
+ 	if (newm > existm) {
+-		if (debug)
+		if (debug && peer_sort_ret < 0)
+ 			zlog_debug(
+ 				"%s: %s loses to %s due to IGP metric %d > %d",
+ 				pfx_buf, new_buf, exist_buf, newm, existm);
+-		ret = 0;
+		igp_metric_ret = 0;
+ 	}
+ 
+ 	/* 9. Same IGP metric. Compare the cluster list length as
+@@ -992,21 +998,21 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ 			existm = BGP_CLUSTER_LIST_LENGTH(exist->attr);
+ 
+ 			if (newm < existm) {
+-				if (debug)
+				if (debug && peer_sort_ret < 0)
+ 					zlog_debug(
+ 						"%s: %s wins over %s due to CLUSTER_LIST length %d < %d",
+ 						pfx_buf, new_buf, exist_buf,
+ 						newm, existm);
+-				ret = 1;
+				igp_metric_ret = 1;
+ 			}
+ 
+ 			if (newm > existm) {
+-				if (debug)
+				if (debug && peer_sort_ret < 0)
+ 					zlog_debug(
+ 						"%s: %s loses to %s due to CLUSTER_LIST length %d > %d",
+ 						pfx_buf, new_buf, exist_buf,
+ 						newm, existm);
+-				ret = 0;
+				igp_metric_ret = 0;
+ 			}
+ 		}
+ 	}
+@@ -1020,7 +1026,10 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ 				zlog_debug(
+ 					"%s: %s wins over %s due to confed-external peer > confed-internal peer",
+ 					pfx_buf, new_buf, exist_buf);
+-			return 1;
+			if (!CHECK_FLAG(bgp->flags,
+					BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+				return 1;
+			peer_sort_ret = 1;
+ 		}
+ 
+ 		if (exist_sort == BGP_PEER_CONFED
+@@ -1030,7 +1039,10 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ 				zlog_debug(
+ 					"%s: %s loses to %s due to confed-internal peer < confed-external peer",
+ 					pfx_buf, new_buf, exist_buf);
+-			return 0;
+			if (!CHECK_FLAG(bgp->flags,
+					BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+				return 0;
+			peer_sort_ret = 0;
+ 		}
+ 	}
+ 
+@@ -1091,20 +1103,40 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ 		 * TODO: If unequal cost ibgp multipath is enabled we can
+ 		 * mark the paths as equal here instead of returning
+ 		 */
+-		if (debug) {
+-			if (ret == 1)
+-				zlog_debug(
+-					"%s: %s wins over %s after IGP metric comparison",
+-					pfx_buf, new_buf, exist_buf);
+-			else
+-				zlog_debug(
+-					"%s: %s loses to %s after IGP metric comparison",
+-					pfx_buf, new_buf, exist_buf);
+
+		/* Prior to the addition of BGP_FLAG_PEERTYPE_MULTIPATH_RELAX,
+		 * if either step 7 or 10 (peer type checks) yielded a winner,
+		 * that result was returned immediately. Returning from step 10
+		 * ignored the return value computed in steps 8 and 9 (IGP
+		 * metric checks). In order to preserve that behavior, if
+		 * peer_sort_ret is set, return that rather than igp_metric_ret.
+		 */
+		ret = peer_sort_ret;
+		if (peer_sort_ret < 0) {
+			ret = igp_metric_ret;
+			if (debug) {
+				if (ret == 1)
+					zlog_debug(
+						"%s: %s wins over %s after IGP metric comparison",
+						pfx_buf, new_buf, exist_buf);
+				else
+					zlog_debug(
+						"%s: %s loses to %s after IGP metric comparison",
+						pfx_buf, new_buf, exist_buf);
+			}
+			*reason = bgp_path_selection_igp_metric;
+ 		}
+-		*reason = bgp_path_selection_igp_metric;
+ 		return ret;
+ 	}
+ 
+	/*
+	 * At this point, the decision whether to set *paths_eq = 1 has been
+	 * completed. If we deferred returning because of bestpath peer-type
+	 * relax configuration, return now.
+	 */
+	if (peer_sort_ret >= 0)
+		return peer_sort_ret;
+
+ 	/* 12. If both paths are external, prefer the path that was received
+ 	   first (the oldest one).  This step minimizes route-flap, since a
+ 	   newer path won't displace an older one, even if it was the
+diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c
+index bb2f89f9e..4d1ce5a46 100644
+--- a/bgpd/bgp_vty.c
+++ b/bgpd/bgp_vty.c
+@@ -3009,6 +3009,37 @@ DEFUN (no_bgp_bestpath_aspath_multipath_relax,
+ 	return CMD_SUCCESS;
+ }
+ 
+/* "bgp bestpath peer-type multipath-relax" configuration. */
+DEFUN(bgp_bestpath_peer_type_multipath_relax,
+      bgp_bestpath_peer_type_multipath_relax_cmd,
+      "bgp bestpath peer-type multipath-relax",
+      BGP_STR
+      "Change the default bestpath selection\n"
+      "Peer type\n"
+      "Allow load sharing across routes learned from different peer types\n")
+{
+	VTY_DECLVAR_CONTEXT(bgp, bgp);
+	SET_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX);
+	bgp_recalculate_all_bestpaths(bgp);
+
+	return CMD_SUCCESS;
+}
+
+DEFUN(no_bgp_bestpath_peer_type_multipath_relax,
+      no_bgp_bestpath_peer_type_multipath_relax_cmd,
+      "no bgp bestpath peer-type multipath-relax",
+      NO_STR BGP_STR
+      "Change the default bestpath selection\n"
+      "Peer type\n"
+      "Allow load sharing across routes learned from different peer types\n")
+{
+	VTY_DECLVAR_CONTEXT(bgp, bgp);
+	UNSET_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX);
+	bgp_recalculate_all_bestpaths(bgp);
+
+	return CMD_SUCCESS;
+}
+
+ /* "bgp log-neighbor-changes" configuration.  */
+ DEFUN (bgp_log_neighbor_changes,
+        bgp_log_neighbor_changes_cmd,
+@@ -8999,6 +9030,9 @@ static void bgp_show_bestpath_json(struct bgp *bgp, json_object *json)
+ 	} else
+ 		json_object_string_add(bestpath, "multiPathRelax", "false");
+ 
+	if (CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+		json_object_boolean_true_add(bestpath, "peerTypeRelax");
+
+ 	if (CHECK_FLAG(bgp->flags, BGP_FLAG_COMPARE_ROUTER_ID))
+ 		json_object_string_add(bestpath, "compareRouterId", "true");
+ 	if (CHECK_FLAG(bgp->flags, BGP_FLAG_MED_CONFED)
+@@ -15750,6 +15784,10 @@ int bgp_config_write(struct vty *vty)
+ 			vty_out(vty, "\n");
+ 		}
+ 
+		if (CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+			vty_out(vty,
+				" bgp bestpath peer-type multipath-relax\n");
+
+ 		/* Link bandwidth handling. */
+ 		if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW)
+ 			vty_out(vty, " bgp bestpath bandwidth ignore\n");
+@@ -16214,6 +16252,11 @@ void bgp_vty_init(void)
+ 	install_element(BGP_NODE, &bgp_bestpath_aspath_multipath_relax_cmd);
+ 	install_element(BGP_NODE, &no_bgp_bestpath_aspath_multipath_relax_cmd);
+ 
+	/* "bgp bestpath peer-type multipath-relax" commands */
+	install_element(BGP_NODE, &bgp_bestpath_peer_type_multipath_relax_cmd);
+	install_element(BGP_NODE,
+			&no_bgp_bestpath_peer_type_multipath_relax_cmd);
+
+ 	/* "bgp log-neighbor-changes" commands */
+ 	install_element(BGP_NODE, &bgp_log_neighbor_changes_cmd);
+ 	install_element(BGP_NODE, &no_bgp_log_neighbor_changes_cmd);
+diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
+index b20323852..4f7cfeaa2 100644
+--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
+@@ -1184,6 +1184,7 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p,
+ 	int nh_family;
+ 	unsigned int valid_nh_count = 0;
+ 	int has_valid_label = 0;
+	bool allow_recursion = false;
+ 	uint8_t distance;
+ 	struct peer *peer;
+ 	struct bgp_path_info *mpinfo;
+@@ -1259,7 +1260,7 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p,
+ 	    || CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)
+ 	    || CHECK_FLAG(bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
+ 
+-		SET_FLAG(api.flags, ZEBRA_FLAG_ALLOW_RECURSION);
+		allow_recursion = true;
+ 
+ 	if (info->attr->rmap_table_id) {
+ 		SET_FLAG(api.message, ZAPI_MESSAGE_TABLEID);
+@@ -1397,6 +1398,15 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p,
+ 		if (!nh_updated)
+ 			continue;
+ 
+		/* Allow recursion if it is a multipath group with both
+		 * eBGP and iBGP paths.
+		 */
+		if (!allow_recursion
+		    && CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX)
+		    && (mpinfo->peer->sort == BGP_PEER_IBGP
+			|| mpinfo->peer->sort == BGP_PEER_CONFED))
+			allow_recursion = true;
+
+ 		if (mpinfo->extra
+ 		    && bgp_is_valid_label(&mpinfo->extra->label[0])
+ 		    && !CHECK_FLAG(api.flags, ZEBRA_FLAG_EVPN_ROUTE)) {
+@@ -1415,6 +1425,9 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p,
+ 		valid_nh_count++;
+ 	}
+ 
+	if (allow_recursion)
+		SET_FLAG(api.flags, ZEBRA_FLAG_ALLOW_RECURSION);
+
+ 	/*
+ 	 * When we create an aggregate route we must also
+ 	 * install a Null0 route in the RIB, so overwrite
+diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
+index 2aa069002..e268d5b1e 100644
+--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
+@@ -456,6 +456,7 @@ struct bgp {
+ #define BGP_FLAG_GR_DISABLE_EOR           (1 << 24)
+ #define BGP_FLAG_EBGP_REQUIRES_POLICY (1 << 25)
+ #define BGP_FLAG_SHOW_NEXTHOP_HOSTNAME (1 << 26)
+#define BGP_FLAG_PEERTYPE_MULTIPATH_RELAX (1 << 31)
+ 
+ /* This flag is set if the instance is in administrative shutdown */
+ #define BGP_FLAG_SHUTDOWN (1 << 27)
+diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst
+index b58030212..e248ffceb 100644
+--- a/doc/user/bgp.rst
+++ b/doc/user/bgp.rst
+@@ -386,6 +386,13 @@ Route Selection
+    other measures were taken to avoid these. The exact behaviour will be
+    sensitive to the iBGP and reflection topology.
+ 
+.. clicmd:: bgp bestpath peer-type multipath-relax
+
+   This command specifies that BGP decision process should consider paths
+   from all peers for multipath computation. If this option is enabled,
+   paths learned from any of eBGP, iBGP, or confederation neighbors will
+   be multipath if they are otherwise considered equal cost.
+
+ .. _bgp-distance:
+ 
+ Administrative Distance Metrics
+-- 
+2.29.2
+
--- a/src/sonic-frr/patch/series
+++ b/src/sonic-frr/patch/series
@ -5,3 +5,4 @@
 0005-nexthops-compare-vrf-only-if-ip-type.patch
 0007-frr-remove-frr-log-outchannel-to-var-log-frr.log.patch
 0008-Add-support-of-bgp-l3vni-evpn.patch
+0009-Add-bgp-bestpath-peer-type-multipath-relax.patch