Add "bgp bestpath peer-type multipath-relax" to FRR (#5629)

* Add "bgp bestpath peer-type multipath-relax" to frr

This new BGP configuration is akin to "bgp bestpath aspath multipath-relax".
When applied, paths learned from different peer types will be eligible
to be considered for multipath (ECMP). Paths from all of eBGP, iBGP, and
confederation peers may be included in a multipath group if they are
otherwise equal cost.

When such a multipath group is created, it is not desirable for
iBGP nexthops to be discarded from the FIB because they are not directly
connected. So when publishing the nexthop group to zebra, bgpd will allow
recursive resolution, but only when there are iBGP-learned paths in the
group.

This change is merged in FRR in this PR FRRouting/frr#8056

Signed-off-by: Joanne Mikkelson <jmmikkel@arista.com>
This commit is contained in:
jmmikkel 2021-04-16 11:20:15 -07:00 committed by GitHub
parent 43342b33b8
commit e1bee859aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 347 additions and 0 deletions

View File

@ -0,0 +1,346 @@
From a70c630a504e041da1c2441337b2d47e8fdd3d30 Mon Sep 17 00:00:00 2001
From: Joanne Mikkelson <jmmikkel@arista.com>
Date: Mon, 29 Jun 2020 13:31:49 -0700
Subject: [PATCH] Add "bgp bestpath peer-type multipath-relax"
This new BGP configuration is akin to "bgp bestpath aspath
multipath-relax". When applied, paths learned from different peer types
will be eligible to be considered for multipath (ECMP). Paths from all
of eBGP, iBGP, and confederation peers may be included in multipaths
if they are otherwise equal cost.
This change preserves the existing bestpath behavior of step 10's result
being returned, not the result from steps 8 and 9, in the case where
both 8+9 and 10 determine a winner.
When "bgp bestpath peer-type multipath-relax" is enabled, multipaths
with both eBGP and iBGP learned routes may exist. It is not desirable
for the iBGP next hops to be discarded from the FIB because they are not
directly connected. When publishing a nexthop group to zebra, the
ZEBRA_FLAG_ALLOW_RECURSION flag is normally not set when the best path
is eBGP; when "bgp bestpath aspath multipath-relax" is configured, the
flag will now be set if any paths are from iBGP peers. This leaves
all-eBGP multipaths still requiring nexthops over connected routes.
---
bgpd/bgp_route.c | 76 ++++++++++++++++++++++++++++++++++--------------
bgpd/bgp_vty.c | 43 +++++++++++++++++++++++++++
bgpd/bgp_zebra.c | 15 +++++++++-
bgpd/bgpd.h | 1 +
doc/user/bgp.rst | 7 +++++
5 files changed, 119 insertions(+), 23 deletions(-)
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index 1c646c03e..73dbf3c5f 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -540,6 +540,8 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
int internal_as_route;
int confed_as_route;
int ret = 0;
+ int igp_metric_ret = 0;
+ int peer_sort_ret = -1;
char new_buf[PATH_ADDPATH_STR_BUFFER];
char exist_buf[PATH_ADDPATH_STR_BUFFER];
uint32_t new_mm_seq;
@@ -940,7 +942,9 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
zlog_debug(
"%s: %s wins over %s due to eBGP peer > iBGP peer",
pfx_buf, new_buf, exist_buf);
- return 1;
+ if (!CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+ return 1;
+ peer_sort_ret = 1;
}
if (exist_sort == BGP_PEER_EBGP
@@ -950,7 +954,9 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
zlog_debug(
"%s: %s loses to %s due to iBGP peer < eBGP peer",
pfx_buf, new_buf, exist_buf);
- return 0;
+ if (!CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+ return 0;
+ peer_sort_ret = 0;
}
/* 8. IGP metric check. */
@@ -962,19 +968,19 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
existm = exist->extra->igpmetric;
if (newm < existm) {
- if (debug)
+ if (debug && peer_sort_ret < 0)
zlog_debug(
"%s: %s wins over %s due to IGP metric %d < %d",
pfx_buf, new_buf, exist_buf, newm, existm);
- ret = 1;
+ igp_metric_ret = 1;
}
if (newm > existm) {
- if (debug)
+ if (debug && peer_sort_ret < 0)
zlog_debug(
"%s: %s loses to %s due to IGP metric %d > %d",
pfx_buf, new_buf, exist_buf, newm, existm);
- ret = 0;
+ igp_metric_ret = 0;
}
/* 9. Same IGP metric. Compare the cluster list length as
@@ -992,21 +998,21 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
existm = BGP_CLUSTER_LIST_LENGTH(exist->attr);
if (newm < existm) {
- if (debug)
+ if (debug && peer_sort_ret < 0)
zlog_debug(
"%s: %s wins over %s due to CLUSTER_LIST length %d < %d",
pfx_buf, new_buf, exist_buf,
newm, existm);
- ret = 1;
+ igp_metric_ret = 1;
}
if (newm > existm) {
- if (debug)
+ if (debug && peer_sort_ret < 0)
zlog_debug(
"%s: %s loses to %s due to CLUSTER_LIST length %d > %d",
pfx_buf, new_buf, exist_buf,
newm, existm);
- ret = 0;
+ igp_metric_ret = 0;
}
}
}
@@ -1020,7 +1026,10 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
zlog_debug(
"%s: %s wins over %s due to confed-external peer > confed-internal peer",
pfx_buf, new_buf, exist_buf);
- return 1;
+ if (!CHECK_FLAG(bgp->flags,
+ BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+ return 1;
+ peer_sort_ret = 1;
}
if (exist_sort == BGP_PEER_CONFED
@@ -1030,7 +1039,10 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
zlog_debug(
"%s: %s loses to %s due to confed-internal peer < confed-external peer",
pfx_buf, new_buf, exist_buf);
- return 0;
+ if (!CHECK_FLAG(bgp->flags,
+ BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+ return 0;
+ peer_sort_ret = 0;
}
}
@@ -1091,20 +1103,40 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
* TODO: If unequal cost ibgp multipath is enabled we can
* mark the paths as equal here instead of returning
*/
- if (debug) {
- if (ret == 1)
- zlog_debug(
- "%s: %s wins over %s after IGP metric comparison",
- pfx_buf, new_buf, exist_buf);
- else
- zlog_debug(
- "%s: %s loses to %s after IGP metric comparison",
- pfx_buf, new_buf, exist_buf);
+
+ /* Prior to the addition of BGP_FLAG_PEERTYPE_MULTIPATH_RELAX,
+ * if either step 7 or 10 (peer type checks) yielded a winner,
+ * that result was returned immediately. Returning from step 10
+ * ignored the return value computed in steps 8 and 9 (IGP
+ * metric checks). In order to preserve that behavior, if
+ * peer_sort_ret is set, return that rather than igp_metric_ret.
+ */
+ ret = peer_sort_ret;
+ if (peer_sort_ret < 0) {
+ ret = igp_metric_ret;
+ if (debug) {
+ if (ret == 1)
+ zlog_debug(
+ "%s: %s wins over %s after IGP metric comparison",
+ pfx_buf, new_buf, exist_buf);
+ else
+ zlog_debug(
+ "%s: %s loses to %s after IGP metric comparison",
+ pfx_buf, new_buf, exist_buf);
+ }
+ *reason = bgp_path_selection_igp_metric;
}
- *reason = bgp_path_selection_igp_metric;
return ret;
}
+ /*
+ * At this point, the decision whether to set *paths_eq = 1 has been
+ * completed. If we deferred returning because of bestpath peer-type
+ * relax configuration, return now.
+ */
+ if (peer_sort_ret >= 0)
+ return peer_sort_ret;
+
/* 12. If both paths are external, prefer the path that was received
first (the oldest one). This step minimizes route-flap, since a
newer path won't displace an older one, even if it was the
diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c
index bb2f89f9e..4d1ce5a46 100644
--- a/bgpd/bgp_vty.c
+++ b/bgpd/bgp_vty.c
@@ -3009,6 +3009,37 @@ DEFUN (no_bgp_bestpath_aspath_multipath_relax,
return CMD_SUCCESS;
}
+/* "bgp bestpath peer-type multipath-relax" configuration. */
+DEFUN(bgp_bestpath_peer_type_multipath_relax,
+ bgp_bestpath_peer_type_multipath_relax_cmd,
+ "bgp bestpath peer-type multipath-relax",
+ BGP_STR
+ "Change the default bestpath selection\n"
+ "Peer type\n"
+ "Allow load sharing across routes learned from different peer types\n")
+{
+ VTY_DECLVAR_CONTEXT(bgp, bgp);
+ SET_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX);
+ bgp_recalculate_all_bestpaths(bgp);
+
+ return CMD_SUCCESS;
+}
+
+DEFUN(no_bgp_bestpath_peer_type_multipath_relax,
+ no_bgp_bestpath_peer_type_multipath_relax_cmd,
+ "no bgp bestpath peer-type multipath-relax",
+ NO_STR BGP_STR
+ "Change the default bestpath selection\n"
+ "Peer type\n"
+ "Allow load sharing across routes learned from different peer types\n")
+{
+ VTY_DECLVAR_CONTEXT(bgp, bgp);
+ UNSET_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX);
+ bgp_recalculate_all_bestpaths(bgp);
+
+ return CMD_SUCCESS;
+}
+
/* "bgp log-neighbor-changes" configuration. */
DEFUN (bgp_log_neighbor_changes,
bgp_log_neighbor_changes_cmd,
@@ -8999,6 +9030,9 @@ static void bgp_show_bestpath_json(struct bgp *bgp, json_object *json)
} else
json_object_string_add(bestpath, "multiPathRelax", "false");
+ if (CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+ json_object_boolean_true_add(bestpath, "peerTypeRelax");
+
if (CHECK_FLAG(bgp->flags, BGP_FLAG_COMPARE_ROUTER_ID))
json_object_string_add(bestpath, "compareRouterId", "true");
if (CHECK_FLAG(bgp->flags, BGP_FLAG_MED_CONFED)
@@ -15750,6 +15784,10 @@ int bgp_config_write(struct vty *vty)
vty_out(vty, "\n");
}
+ if (CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX))
+ vty_out(vty,
+ " bgp bestpath peer-type multipath-relax\n");
+
/* Link bandwidth handling. */
if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW)
vty_out(vty, " bgp bestpath bandwidth ignore\n");
@@ -16214,6 +16252,11 @@ void bgp_vty_init(void)
install_element(BGP_NODE, &bgp_bestpath_aspath_multipath_relax_cmd);
install_element(BGP_NODE, &no_bgp_bestpath_aspath_multipath_relax_cmd);
+ /* "bgp bestpath peer-type multipath-relax" commands */
+ install_element(BGP_NODE, &bgp_bestpath_peer_type_multipath_relax_cmd);
+ install_element(BGP_NODE,
+ &no_bgp_bestpath_peer_type_multipath_relax_cmd);
+
/* "bgp log-neighbor-changes" commands */
install_element(BGP_NODE, &bgp_log_neighbor_changes_cmd);
install_element(BGP_NODE, &no_bgp_log_neighbor_changes_cmd);
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index b20323852..4f7cfeaa2 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -1184,6 +1184,7 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p,
int nh_family;
unsigned int valid_nh_count = 0;
int has_valid_label = 0;
+ bool allow_recursion = false;
uint8_t distance;
struct peer *peer;
struct bgp_path_info *mpinfo;
@@ -1259,7 +1260,7 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p,
|| CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)
|| CHECK_FLAG(bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
- SET_FLAG(api.flags, ZEBRA_FLAG_ALLOW_RECURSION);
+ allow_recursion = true;
if (info->attr->rmap_table_id) {
SET_FLAG(api.message, ZAPI_MESSAGE_TABLEID);
@@ -1397,6 +1398,15 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p,
if (!nh_updated)
continue;
+ /* Allow recursion if it is a multipath group with both
+ * eBGP and iBGP paths.
+ */
+ if (!allow_recursion
+ && CHECK_FLAG(bgp->flags, BGP_FLAG_PEERTYPE_MULTIPATH_RELAX)
+ && (mpinfo->peer->sort == BGP_PEER_IBGP
+ || mpinfo->peer->sort == BGP_PEER_CONFED))
+ allow_recursion = true;
+
if (mpinfo->extra
&& bgp_is_valid_label(&mpinfo->extra->label[0])
&& !CHECK_FLAG(api.flags, ZEBRA_FLAG_EVPN_ROUTE)) {
@@ -1415,6 +1425,9 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p,
valid_nh_count++;
}
+ if (allow_recursion)
+ SET_FLAG(api.flags, ZEBRA_FLAG_ALLOW_RECURSION);
+
/*
* When we create an aggregate route we must also
* install a Null0 route in the RIB, so overwrite
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 2aa069002..e268d5b1e 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -456,6 +456,7 @@ struct bgp {
#define BGP_FLAG_GR_DISABLE_EOR (1 << 24)
#define BGP_FLAG_EBGP_REQUIRES_POLICY (1 << 25)
#define BGP_FLAG_SHOW_NEXTHOP_HOSTNAME (1 << 26)
+#define BGP_FLAG_PEERTYPE_MULTIPATH_RELAX (1 << 31)
/* This flag is set if the instance is in administrative shutdown */
#define BGP_FLAG_SHUTDOWN (1 << 27)
diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst
index b58030212..e248ffceb 100644
--- a/doc/user/bgp.rst
+++ b/doc/user/bgp.rst
@@ -386,6 +386,13 @@ Route Selection
other measures were taken to avoid these. The exact behaviour will be
sensitive to the iBGP and reflection topology.
+.. clicmd:: bgp bestpath peer-type multipath-relax
+
+ This command specifies that BGP decision process should consider paths
+ from all peers for multipath computation. If this option is enabled,
+ paths learned from any of eBGP, iBGP, or confederation neighbors will
+ be multipath if they are otherwise considered equal cost.
+
.. _bgp-distance:
Administrative Distance Metrics
--
2.29.2

View File

@ -5,3 +5,4 @@
0005-nexthops-compare-vrf-only-if-ip-type.patch
0007-frr-remove-frr-log-outchannel-to-var-log-frr.log.patch
0008-Add-support-of-bgp-l3vni-evpn.patch
0009-Add-bgp-bestpath-peer-type-multipath-relax.patch