2017-05-15 19:06:19 -05:00
|
|
|
#!/bin/bash
|
|
|
|
#
|
|
|
|
# usage:
|
Fix potential blackholing/looping traffic when link-local was used and refresh ipv6 neighbor to avoid CPU hit (#1904)
* Fix potential blackholing/looping traffic and refresh ipv6 neighbor to avoid CPU hit
In case ipv6 global addresses were configured on L3 interfaces and used for peering,
and routing protocol was using link-local addresses on the same interfaces as prefered nexthops,
the link-local addresses could be aged out after a while due to no activities towards the link-local
addresses themselves. And when we receive new routes with the link-local nexthops, SONiC won't insert
them to the HW, and thus cause looping or blackholing traffic.
Global ipv6 addresses on L3 interfaces between switches are refreshed by BGP keeplive and other messages.
On server facing side, traffic may hit fowarding plane only, and no refresh for the ipv6 neighbor entries regularly.
This could age-out the linux kernel ipv6 neighbor entries, and HW neighbor table entries could be removed,
and thus traffic going to those neighbors would hit CPU, and cause traffic drop and temperary CPU high load.
Also, if link-local addresses were not learned, we may not get them at all later.
It is intended to fix all above issues.
Changes:
Add ndisc6 package in swss docker and use it for ipv6 ndp ping to update the neighbors' state on Vlan interfaces
Change the default ipv6 neighbor reachable timer to 30mins
Add periodical ipv6 multicast ping to ff02::11 to get/refresh link-local neighbor info.
* Fix review comments:
Add PORTCHANNEL_INTERFACE interface for ipv6 multicast ping
format issue
* Combine regular L3 interface and portchannel interface for looping
* Add ndisc6 package to vs docker
2018-08-12 05:14:55 -05:00
|
|
|
# arp_update:
|
|
|
|
# Send ipv6 multicast pings to all "UP" L3 interfaces including vlan interfaces to
|
|
|
|
# refresh link-local addresses from neighbors.
|
|
|
|
# Send gratuitous ARP/NDP requests to VLAN member neighbors to refresh
|
|
|
|
# the ipv4/ipv6 neighbors state.
|
2017-05-15 19:06:19 -05:00
|
|
|
|
2020-09-18 20:44:23 -05:00
|
|
|
ARP_UPDATE_VARS_FILE="/usr/share/sonic/templates/arp_update_vars.j2"
|
|
|
|
|
2018-07-18 15:04:57 -05:00
|
|
|
while /bin/true; do
|
Fix potential blackholing/looping traffic when link-local was used and refresh ipv6 neighbor to avoid CPU hit (#1904)
* Fix potential blackholing/looping traffic and refresh ipv6 neighbor to avoid CPU hit
In case ipv6 global addresses were configured on L3 interfaces and used for peering,
and routing protocol was using link-local addresses on the same interfaces as prefered nexthops,
the link-local addresses could be aged out after a while due to no activities towards the link-local
addresses themselves. And when we receive new routes with the link-local nexthops, SONiC won't insert
them to the HW, and thus cause looping or blackholing traffic.
Global ipv6 addresses on L3 interfaces between switches are refreshed by BGP keeplive and other messages.
On server facing side, traffic may hit fowarding plane only, and no refresh for the ipv6 neighbor entries regularly.
This could age-out the linux kernel ipv6 neighbor entries, and HW neighbor table entries could be removed,
and thus traffic going to those neighbors would hit CPU, and cause traffic drop and temperary CPU high load.
Also, if link-local addresses were not learned, we may not get them at all later.
It is intended to fix all above issues.
Changes:
Add ndisc6 package in swss docker and use it for ipv6 ndp ping to update the neighbors' state on Vlan interfaces
Change the default ipv6 neighbor reachable timer to 30mins
Add periodical ipv6 multicast ping to ff02::11 to get/refresh link-local neighbor info.
* Fix review comments:
Add PORTCHANNEL_INTERFACE interface for ipv6 multicast ping
format issue
* Combine regular L3 interface and portchannel interface for looping
* Add ndisc6 package to vs docker
2018-08-12 05:14:55 -05:00
|
|
|
# find L3 interfaces which are UP, send ipv6 multicast pings
|
2020-09-18 20:44:23 -05:00
|
|
|
ARP_UPDATE_VARS=$(sonic-cfggen -d -t ${ARP_UPDATE_VARS_FILE})
|
2022-07-26 18:50:16 -05:00
|
|
|
SWITCH_TYPE=$(echo $ARP_UPDATE_VARS | jq -r '.switch_type')
|
|
|
|
if [[ "$SWITCH_TYPE" == "chassis-packet" ]]; then
|
|
|
|
STATIC_ROUTE_NEXTHOPS=$(echo $ARP_UPDATE_VARS | jq -r '.static_route_nexthops')
|
|
|
|
# on supervisor/rp exit the script gracefully
|
|
|
|
if [[ -z "$STATIC_ROUTE_NEXTHOPS" ]]; then
|
|
|
|
logger "arp_update: exiting as no static route in packet based chassis"
|
|
|
|
exit 0
|
|
|
|
fi
|
|
|
|
for nexthop in $STATIC_ROUTE_NEXTHOPS; do
|
|
|
|
if [[ $nexthop == *"."* ]]; then
|
|
|
|
neigh_state=( $(ip -4 neigh show | grep $nexthop | tr -s ' ' | cut -d ' ' -f 3,4) )
|
|
|
|
ping_prefix=ping
|
|
|
|
elif [[ $nexthop == *":"* ]] ; then
|
|
|
|
neigh_state=( $(ip -6 neigh show | grep $nexthop | tr -s ' ' | cut -d ' ' -f 3,4) )
|
|
|
|
ping_prefix=ping6
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ "${neigh_state[1]}" == "INCOMPLETE" ]] || [[ "${neigh_state[1]}" == "FAILED" ]]; then
|
|
|
|
pingcmd="timeout 0.2 $ping_prefix -I ${neigh_state[0]} -n -q -i 0 -c 1 -W 1 $nexthop >/dev/null"
|
|
|
|
eval $pingcmd
|
|
|
|
logger "arp_update: sttaic route nexthop not resolved, pinging $nexthop on ${neigh_state[0]}"
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
sleep 300
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
# find L3 interfaces which are UP, send ipv6 multicast pings
|
2020-09-18 20:44:23 -05:00
|
|
|
INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.interface')
|
|
|
|
PC_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.pc_interface')
|
2021-08-06 23:14:18 -05:00
|
|
|
VLAN_SUB_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.vlan_sub_interface')
|
Fix potential blackholing/looping traffic when link-local was used and refresh ipv6 neighbor to avoid CPU hit (#1904)
* Fix potential blackholing/looping traffic and refresh ipv6 neighbor to avoid CPU hit
In case ipv6 global addresses were configured on L3 interfaces and used for peering,
and routing protocol was using link-local addresses on the same interfaces as prefered nexthops,
the link-local addresses could be aged out after a while due to no activities towards the link-local
addresses themselves. And when we receive new routes with the link-local nexthops, SONiC won't insert
them to the HW, and thus cause looping or blackholing traffic.
Global ipv6 addresses on L3 interfaces between switches are refreshed by BGP keeplive and other messages.
On server facing side, traffic may hit fowarding plane only, and no refresh for the ipv6 neighbor entries regularly.
This could age-out the linux kernel ipv6 neighbor entries, and HW neighbor table entries could be removed,
and thus traffic going to those neighbors would hit CPU, and cause traffic drop and temperary CPU high load.
Also, if link-local addresses were not learned, we may not get them at all later.
It is intended to fix all above issues.
Changes:
Add ndisc6 package in swss docker and use it for ipv6 ndp ping to update the neighbors' state on Vlan interfaces
Change the default ipv6 neighbor reachable timer to 30mins
Add periodical ipv6 multicast ping to ff02::11 to get/refresh link-local neighbor info.
* Fix review comments:
Add PORTCHANNEL_INTERFACE interface for ipv6 multicast ping
format issue
* Combine regular L3 interface and portchannel interface for looping
* Add ndisc6 package to vs docker
2018-08-12 05:14:55 -05:00
|
|
|
|
2021-08-06 23:14:18 -05:00
|
|
|
ALL_INTERFACE="$INTERFACE $PC_INTERFACE $VLAN_SUB_INTERFACE"
|
Fix potential blackholing/looping traffic when link-local was used and refresh ipv6 neighbor to avoid CPU hit (#1904)
* Fix potential blackholing/looping traffic and refresh ipv6 neighbor to avoid CPU hit
In case ipv6 global addresses were configured on L3 interfaces and used for peering,
and routing protocol was using link-local addresses on the same interfaces as prefered nexthops,
the link-local addresses could be aged out after a while due to no activities towards the link-local
addresses themselves. And when we receive new routes with the link-local nexthops, SONiC won't insert
them to the HW, and thus cause looping or blackholing traffic.
Global ipv6 addresses on L3 interfaces between switches are refreshed by BGP keeplive and other messages.
On server facing side, traffic may hit fowarding plane only, and no refresh for the ipv6 neighbor entries regularly.
This could age-out the linux kernel ipv6 neighbor entries, and HW neighbor table entries could be removed,
and thus traffic going to those neighbors would hit CPU, and cause traffic drop and temperary CPU high load.
Also, if link-local addresses were not learned, we may not get them at all later.
It is intended to fix all above issues.
Changes:
Add ndisc6 package in swss docker and use it for ipv6 ndp ping to update the neighbors' state on Vlan interfaces
Change the default ipv6 neighbor reachable timer to 30mins
Add periodical ipv6 multicast ping to ff02::11 to get/refresh link-local neighbor info.
* Fix review comments:
Add PORTCHANNEL_INTERFACE interface for ipv6 multicast ping
format issue
* Combine regular L3 interface and portchannel interface for looping
* Add ndisc6 package to vs docker
2018-08-12 05:14:55 -05:00
|
|
|
for intf in $ALL_INTERFACE; do
|
|
|
|
ping6cmd="ping6 -I $intf -n -q -i 0 -c 1 -W 0 ff02::1 >/dev/null"
|
|
|
|
intf_up=$(ip link show $intf | grep "state UP")
|
|
|
|
if [[ -n "$intf_up" ]]; then
|
|
|
|
eval $ping6cmd
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2020-09-18 20:44:23 -05:00
|
|
|
VLAN=$(echo $ARP_UPDATE_VARS | jq -r '.vlan')
|
2022-08-09 18:19:42 -05:00
|
|
|
SUBTYPE=$(sonic-db-cli CONFIG_DB hget 'DEVICE_METADATA|localhost' 'subtype' | tr '[:upper:]' '[:lower:]')
|
2018-07-18 15:04:57 -05:00
|
|
|
for vlan in $VLAN; do
|
|
|
|
# generate a list of arping commands:
|
|
|
|
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 1>;
|
|
|
|
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 2>;
|
|
|
|
# ...
|
|
|
|
arpingcmd="sed -e 's/ / -i /' -e 's/^/arping -q -w 0 -c 1 /' -e 's/$/;/'"
|
|
|
|
ipcmd="ip -4 neigh show | grep $vlan | cut -d ' ' -f 1,3 | $arpingcmd"
|
2017-05-15 19:06:19 -05:00
|
|
|
|
2018-07-18 15:04:57 -05:00
|
|
|
eval `eval $ipcmd`
|
Fix potential blackholing/looping traffic when link-local was used and refresh ipv6 neighbor to avoid CPU hit (#1904)
* Fix potential blackholing/looping traffic and refresh ipv6 neighbor to avoid CPU hit
In case ipv6 global addresses were configured on L3 interfaces and used for peering,
and routing protocol was using link-local addresses on the same interfaces as prefered nexthops,
the link-local addresses could be aged out after a while due to no activities towards the link-local
addresses themselves. And when we receive new routes with the link-local nexthops, SONiC won't insert
them to the HW, and thus cause looping or blackholing traffic.
Global ipv6 addresses on L3 interfaces between switches are refreshed by BGP keeplive and other messages.
On server facing side, traffic may hit fowarding plane only, and no refresh for the ipv6 neighbor entries regularly.
This could age-out the linux kernel ipv6 neighbor entries, and HW neighbor table entries could be removed,
and thus traffic going to those neighbors would hit CPU, and cause traffic drop and temperary CPU high load.
Also, if link-local addresses were not learned, we may not get them at all later.
It is intended to fix all above issues.
Changes:
Add ndisc6 package in swss docker and use it for ipv6 ndp ping to update the neighbors' state on Vlan interfaces
Change the default ipv6 neighbor reachable timer to 30mins
Add periodical ipv6 multicast ping to ff02::11 to get/refresh link-local neighbor info.
* Fix review comments:
Add PORTCHANNEL_INTERFACE interface for ipv6 multicast ping
format issue
* Combine regular L3 interface and portchannel interface for looping
* Add ndisc6 package to vs docker
2018-08-12 05:14:55 -05:00
|
|
|
|
|
|
|
# send ipv6 multicast pings to Vlan interfaces to get/refresh link-local addrs
|
2021-08-13 01:25:28 -05:00
|
|
|
ping6cmd="timeout 1 ping6 -I $vlan -n -q -i 0 -c 1 -W 0 ff02::1 >/dev/null"
|
Fix potential blackholing/looping traffic when link-local was used and refresh ipv6 neighbor to avoid CPU hit (#1904)
* Fix potential blackholing/looping traffic and refresh ipv6 neighbor to avoid CPU hit
In case ipv6 global addresses were configured on L3 interfaces and used for peering,
and routing protocol was using link-local addresses on the same interfaces as prefered nexthops,
the link-local addresses could be aged out after a while due to no activities towards the link-local
addresses themselves. And when we receive new routes with the link-local nexthops, SONiC won't insert
them to the HW, and thus cause looping or blackholing traffic.
Global ipv6 addresses on L3 interfaces between switches are refreshed by BGP keeplive and other messages.
On server facing side, traffic may hit fowarding plane only, and no refresh for the ipv6 neighbor entries regularly.
This could age-out the linux kernel ipv6 neighbor entries, and HW neighbor table entries could be removed,
and thus traffic going to those neighbors would hit CPU, and cause traffic drop and temperary CPU high load.
Also, if link-local addresses were not learned, we may not get them at all later.
It is intended to fix all above issues.
Changes:
Add ndisc6 package in swss docker and use it for ipv6 ndp ping to update the neighbors' state on Vlan interfaces
Change the default ipv6 neighbor reachable timer to 30mins
Add periodical ipv6 multicast ping to ff02::11 to get/refresh link-local neighbor info.
* Fix review comments:
Add PORTCHANNEL_INTERFACE interface for ipv6 multicast ping
format issue
* Combine regular L3 interface and portchannel interface for looping
* Add ndisc6 package to vs docker
2018-08-12 05:14:55 -05:00
|
|
|
eval $ping6cmd
|
|
|
|
|
|
|
|
# generate a list of ndisc6 commands (exclude link-local addrs since it is done above):
|
|
|
|
# ndisc6 -q -w 0 -1 <IP 1> <VLAN interface>;
|
|
|
|
# ndisc6 -q -w 0 -1 <IP 2> <VLAN interface>;
|
|
|
|
# ...
|
|
|
|
ndisc6cmd="sed -e 's/^/ndisc6 -q -w 0 -1 /' -e 's/$/;/'"
|
|
|
|
ip6cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | cut -d ' ' -f 1,3 | $ndisc6cmd"
|
|
|
|
eval `eval $ip6cmd`
|
2022-08-09 18:19:42 -05:00
|
|
|
|
|
|
|
if [[ $SUBTYPE == "dualtor" ]]; then
|
2022-09-02 15:40:40 -05:00
|
|
|
# manually set any remaining FAILED/INCOMPLETE entries to permanently INCOMPLETE
|
|
|
|
# this prevents any remaining INCOMPLETE entries from automatically transitioning to FAILED
|
|
|
|
# once these entries are incomplete, any subsequent neighbor advertisement messages
|
|
|
|
# are able to resolve the entry
|
|
|
|
|
|
|
|
# generates the following command for each failed or incomplete IPv6 neighbor
|
|
|
|
# ip neigh replace <neighbor IPv6> dev <VLAN name> nud incomplete
|
|
|
|
neigh_replace_template="sed -e 's/^/ip neigh replace /' -e 's/,/ dev /' -e 's/$/ nud incomplete;/'"
|
|
|
|
ip_neigh_replace_cmd="ip -6 neigh show | grep -v fe80 | grep Vlan1000 | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $neigh_replace_template"
|
|
|
|
eval `eval $ip_neigh_replace_cmd`
|
|
|
|
|
2022-08-09 18:19:42 -05:00
|
|
|
# on dual ToR devices, try to resolve failed neighbor entries since
|
|
|
|
# these entries will have tunnel routes installed, preventing normal
|
|
|
|
# neighbor resolution (SWSS PR #2137)
|
|
|
|
|
|
|
|
# since ndisc6 is a userland process, the above ndisc6 commands are
|
|
|
|
# insufficient to update the kernel neighbor table for failed entries
|
|
|
|
|
|
|
|
# we don't need to do this for ipv4 neighbors since arping is able to
|
|
|
|
# update the kernel neighbor table
|
|
|
|
|
|
|
|
# generates the following command for each failed or incomplete IPv6 neighbor
|
|
|
|
# timeout 0.2 ping <neighbor IPv6> -n -q -i 0 -c 1 -W 1 -I <VLAN name> >/dev/null
|
|
|
|
ping6_template="sed -e 's/^/timeout 0.2 ping /' -e 's/,/ -n -q -i 0 -c 1 -W 1 -I /' -e 's/$/ >\/dev\/null;/'"
|
|
|
|
failed_ip6_neigh_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $ping6_template"
|
|
|
|
eval `eval $failed_ip6_neigh_cmd`
|
|
|
|
fi
|
2018-07-18 15:04:57 -05:00
|
|
|
done
|
2022-08-09 18:19:42 -05:00
|
|
|
|
2020-02-13 12:27:37 -06:00
|
|
|
|
2020-02-20 14:54:39 -06:00
|
|
|
# sleep here before handling the mismatch as it is not required during startup
|
|
|
|
sleep 300
|
|
|
|
|
2020-02-13 12:27:37 -06:00
|
|
|
# refresh neighbor entries from APP_DB in case of mismatch with kernel
|
|
|
|
DBNEIGH=$(sonic-db-cli APPL_DB keys NEIGH_TABLE*)
|
|
|
|
KERNEIGH4=$(ip -4 neigh show | grep Vlan | cut -d ' ' -f 1,3 --output-delimiter=',')
|
|
|
|
KERNEIGH6=$(ip -6 neigh show | grep -v fe80 | grep Vlan | cut -d ' ' -f 1,3 --output-delimiter=',')
|
|
|
|
for neigh in $DBNEIGH; do
|
|
|
|
intf="$( cut -d ':' -f 2 <<< "$neigh" )"
|
|
|
|
ip="$( cut -d ':' -f 3- <<< "$neigh" )"
|
|
|
|
if [[ $intf == *"Vlan"* ]]; then
|
|
|
|
if [[ $ip == *"."* ]] && [[ ! $KERNEIGH4 =~ "${ip},${intf}" ]]; then
|
|
|
|
pingcmd="timeout 0.2 ping -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null"
|
|
|
|
eval $pingcmd
|
|
|
|
logger "arp_update: mismatch arp entry, pinging ${ip} on ${intf}"
|
|
|
|
elif [[ $ip == *":"* ]] && [[ ! $KERNEIGH6 =~ "${ip},${intf}" ]]; then
|
|
|
|
ping6cmd="timeout 0.2 ping6 -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null"
|
|
|
|
eval $ping6cmd
|
|
|
|
logger "arp_update: mismatch v6 nbr entry, pinging ${ip} on ${intf}"
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2017-05-15 19:06:19 -05:00
|
|
|
done
|