sonic-buildimage/files/scripts/arp_update
Vladyslav Morokhovych 80e0627acc [swss] Fix arp_update script (#8412)
Fix #7968

Issue is detected on SONiC.20201231.11

In test_static_route.py::test_static_route_ecmp static routes are configured, but neighbors are not resolved after config reload even after 10 minutes.
It looks like the arp_update script is starting to ping when Vlan1000 is not fully configured.
When issue is reproduced, stuck ping6 process is observed in swss container :

USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root         180  0.1  0.0   6296  1272 pts/0    S    17:03   0:03 ping6 -I Vlan1000 -n -q -i 0 -c 1 -W 0 ff02::1
And when arp_update script successfully resolves neighbors, we observe sleep 300 instead of ping process
2021-08-12 23:29:22 -07:00

76 lines
3.1 KiB
Bash
Executable File

#!/bin/bash
#
# usage:
# arp_update:
# Send ipv6 multicast pings to all "UP" L3 interfaces including vlan interfaces to
# refresh link-local addresses from neighbors.
# Send gratuitous ARP/NDP requests to VLAN member neighbors to refresh
# the ipv4/ipv6 neighbors state.
ARP_UPDATE_VARS_FILE="/usr/share/sonic/templates/arp_update_vars.j2"
while /bin/true; do
# find L3 interfaces which are UP, send ipv6 multicast pings
ARP_UPDATE_VARS=$(sonic-cfggen -d -t ${ARP_UPDATE_VARS_FILE})
INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.interface')
PC_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.pc_interface')
VLAN_SUB_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.vlan_sub_interface')
ALL_INTERFACE="$INTERFACE $PC_INTERFACE $VLAN_SUB_INTERFACE"
for intf in $ALL_INTERFACE; do
ping6cmd="ping6 -I $intf -n -q -i 0 -c 1 -W 0 ff02::1 >/dev/null"
intf_up=$(ip link show $intf | grep "state UP")
if [[ -n "$intf_up" ]]; then
eval $ping6cmd
fi
done
VLAN=$(echo $ARP_UPDATE_VARS | jq -r '.vlan')
for vlan in $VLAN; do
# generate a list of arping commands:
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 1>;
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 2>;
# ...
arpingcmd="sed -e 's/ / -i /' -e 's/^/arping -q -w 0 -c 1 /' -e 's/$/;/'"
ipcmd="ip -4 neigh show | grep $vlan | cut -d ' ' -f 1,3 | $arpingcmd"
eval `eval $ipcmd`
# send ipv6 multicast pings to Vlan interfaces to get/refresh link-local addrs
ping6cmd="timeout 1 ping6 -I $vlan -n -q -i 0 -c 1 -W 0 ff02::1 >/dev/null"
eval $ping6cmd
# generate a list of ndisc6 commands (exclude link-local addrs since it is done above):
# ndisc6 -q -w 0 -1 <IP 1> <VLAN interface>;
# ndisc6 -q -w 0 -1 <IP 2> <VLAN interface>;
# ...
ndisc6cmd="sed -e 's/^/ndisc6 -q -w 0 -1 /' -e 's/$/;/'"
ip6cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | cut -d ' ' -f 1,3 | $ndisc6cmd"
eval `eval $ip6cmd`
done
# sleep here before handling the mismatch as it is not required during startup
sleep 300
# refresh neighbor entries from APP_DB in case of mismatch with kernel
DBNEIGH=$(sonic-db-cli APPL_DB keys NEIGH_TABLE*)
KERNEIGH4=$(ip -4 neigh show | grep Vlan | cut -d ' ' -f 1,3 --output-delimiter=',')
KERNEIGH6=$(ip -6 neigh show | grep -v fe80 | grep Vlan | cut -d ' ' -f 1,3 --output-delimiter=',')
for neigh in $DBNEIGH; do
intf="$( cut -d ':' -f 2 <<< "$neigh" )"
ip="$( cut -d ':' -f 3- <<< "$neigh" )"
if [[ $intf == *"Vlan"* ]]; then
if [[ $ip == *"."* ]] && [[ ! $KERNEIGH4 =~ "${ip},${intf}" ]]; then
pingcmd="timeout 0.2 ping -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null"
eval $pingcmd
logger "arp_update: mismatch arp entry, pinging ${ip} on ${intf}"
elif [[ $ip == *":"* ]] && [[ ! $KERNEIGH6 =~ "${ip},${intf}" ]]; then
ping6cmd="timeout 0.2 ping6 -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null"
eval $ping6cmd
logger "arp_update: mismatch v6 nbr entry, pinging ${ip} on ${intf}"
fi
fi
done
done