[arp_update]: Resolve failed neighbors on dualtor (#11615)

In arp_update, check for FAILED or INCOMPLETE kernel neighbor entries and manually ping them to try and resolve the neighbor

Signed-off-by: Lawrence Lee <lawlee@microsoft.com>
This commit is contained in:
Lawrence Lee 2022-08-09 16:19:42 -07:00 committed by GitHub
parent a3e3530d1d
commit 889741c9bc
No account linked to committer's email address

View File

@ -54,6 +54,7 @@ while /bin/true; do
done
VLAN=$(echo $ARP_UPDATE_VARS | jq -r '.vlan')
SUBTYPE=$(sonic-db-cli CONFIG_DB hget 'DEVICE_METADATA|localhost' 'subtype' | tr '[:upper:]' '[:lower:]')
for vlan in $VLAN; do
# generate a list of arping commands:
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 1>;
@ -75,7 +76,26 @@ while /bin/true; do
ndisc6cmd="sed -e 's/^/ndisc6 -q -w 0 -1 /' -e 's/$/;/'"
ip6cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | cut -d ' ' -f 1,3 | $ndisc6cmd"
eval `eval $ip6cmd`
if [[ $SUBTYPE == "dualtor" ]]; then
# on dual ToR devices, try to resolve failed neighbor entries since
# these entries will have tunnel routes installed, preventing normal
# neighbor resolution (SWSS PR #2137)
# since ndisc6 is a userland process, the above ndisc6 commands are
# insufficient to update the kernel neighbor table for failed entries
# we don't need to do this for ipv4 neighbors since arping is able to
# update the kernel neighbor table
# generates the following command for each failed or incomplete IPv6 neighbor
# timeout 0.2 ping <neighbor IPv6> -n -q -i 0 -c 1 -W 1 -I <VLAN name> >/dev/null
ping6_template="sed -e 's/^/timeout 0.2 ping /' -e 's/,/ -n -q -i 0 -c 1 -W 1 -I /' -e 's/$/ >\/dev\/null;/'"
failed_ip6_neigh_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $ping6_template"
eval `eval $failed_ip6_neigh_cmd`
fi
done
# sleep here before handling the mismatch as it is not required during startup
sleep 300