sonic-buildimage/files/scripts/arp_update
abdosi bd348c5264
[chassis-packet] fix the issue of internal ip arp not getting resolved. (#12127)
Fix the issue where arp_update will not ping some of the ip's even
though they are in failed state since grep of that ip on ip neigh show
command does not do exact word match and can return multiple match.
2022-11-14 10:15:17 -08:00

135 lines
6.3 KiB
Bash
Executable File

#!/bin/bash
#
# usage:
# arp_update:
# Send ipv6 multicast pings to all "UP" L3 interfaces including vlan interfaces to
# refresh link-local addresses from neighbors.
# Send gratuitous ARP/NDP requests to VLAN member neighbors to refresh
# the ipv4/ipv6 neighbors state.
ARP_UPDATE_VARS_FILE="/usr/share/sonic/templates/arp_update_vars.j2"
while /bin/true; do
# find L3 interfaces which are UP, send ipv6 multicast pings
ARP_UPDATE_VARS=$(sonic-cfggen -d -t ${ARP_UPDATE_VARS_FILE})
SWITCH_TYPE=$(echo $ARP_UPDATE_VARS | jq -r '.switch_type')
if [[ "$SWITCH_TYPE" == "chassis-packet" ]]; then
STATIC_ROUTE_NEXTHOPS=$(echo $ARP_UPDATE_VARS | jq -r '.static_route_nexthops')
# on supervisor/rp exit the script gracefully
if [[ -z "$STATIC_ROUTE_NEXTHOPS" ]]; then
logger "arp_update: exiting as no static route in packet based chassis"
exit 0
fi
for nexthop in $STATIC_ROUTE_NEXTHOPS; do
if [[ $nexthop == *"."* ]]; then
neigh_state=( $(ip -4 neigh show | grep -w $nexthop | tr -s ' ' | cut -d ' ' -f 3,4) )
ping_prefix=ping
elif [[ $nexthop == *":"* ]] ; then
neigh_state=( $(ip -6 neigh show | grep -w $nexthop | tr -s ' ' | cut -d ' ' -f 3,4) )
ping_prefix=ping6
fi
if [[ "${neigh_state[1]}" == "INCOMPLETE" ]] || [[ "${neigh_state[1]}" == "FAILED" ]]; then
pingcmd="timeout 0.2 $ping_prefix -I ${neigh_state[0]} -n -q -i 0 -c 1 -W 1 $nexthop >/dev/null"
eval $pingcmd
logger "arp_update: sttaic route nexthop not resolved, pinging $nexthop on ${neigh_state[0]}"
fi
done
sleep 300
continue
fi
# find L3 interfaces which are UP, send ipv6 multicast pings
INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.interface')
PC_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.pc_interface')
VLAN_SUB_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.vlan_sub_interface')
ALL_INTERFACE="$INTERFACE $PC_INTERFACE $VLAN_SUB_INTERFACE"
for intf in $ALL_INTERFACE; do
ping6cmd="ping6 -I $intf -n -q -i 0 -c 1 -W 0 ff02::1 >/dev/null"
intf_up=$(ip link show $intf | grep "state UP")
if [[ -n "$intf_up" ]]; then
eval $ping6cmd
fi
done
VLAN=$(echo $ARP_UPDATE_VARS | jq -r '.vlan')
SUBTYPE=$(sonic-db-cli CONFIG_DB hget 'DEVICE_METADATA|localhost' 'subtype' | tr '[:upper:]' '[:lower:]')
for vlan in $VLAN; do
# generate a list of arping commands:
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 1>;
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 2>;
# ...
arpingcmd="sed -e 's/ / -i /' -e 's/^/arping -q -w 0 -c 1 /' -e 's/$/;/'"
ipcmd="ip -4 neigh show | grep $vlan | cut -d ' ' -f 1,3 | $arpingcmd"
eval `eval $ipcmd`
# send ipv6 multicast pings to Vlan interfaces to get/refresh link-local addrs
ping6cmd="timeout 1 ping6 -I $vlan -n -q -i 0 -c 1 -W 0 ff02::1 >/dev/null"
eval $ping6cmd
# generate a list of ndisc6 commands (exclude link-local addrs since it is done above):
# ndisc6 -q -w 0 -1 <IP 1> <VLAN interface>;
# ndisc6 -q -w 0 -1 <IP 2> <VLAN interface>;
# ...
ndisc6cmd="sed -e 's/^/ndisc6 -q -w 0 -1 /' -e 's/$/;/'"
ip6cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | cut -d ' ' -f 1,3 | $ndisc6cmd"
eval `eval $ip6cmd`
if [[ $SUBTYPE == "dualtor" ]]; then
# manually set any remaining FAILED/INCOMPLETE entries to permanently INCOMPLETE
# this prevents any remaining INCOMPLETE entries from automatically transitioning to FAILED
# once these entries are incomplete, any subsequent neighbor advertisement messages
# are able to resolve the entry
# generates the following command for each failed or incomplete IPv6 neighbor
# ip neigh replace <neighbor IPv6> dev <VLAN name> nud incomplete
neigh_replace_template="sed -e 's/^/ip neigh replace /' -e 's/,/ dev /' -e 's/$/ nud incomplete;/'"
ip_neigh_replace_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $neigh_replace_template"
eval `eval $ip_neigh_replace_cmd`
# on dual ToR devices, try to resolve failed neighbor entries since
# these entries will have tunnel routes installed, preventing normal
# neighbor resolution (SWSS PR #2137)
# since ndisc6 is a userland process, the above ndisc6 commands are
# insufficient to update the kernel neighbor table for failed entries
# we don't need to do this for ipv4 neighbors since arping is able to
# update the kernel neighbor table
# generates the following command for each failed or incomplete IPv6 neighbor
# timeout 0.2 ping <neighbor IPv6> -n -q -i 0 -c 1 -W 1 -I <VLAN name> >/dev/null
ping6_template="sed -e 's/^/timeout 0.2 ping /' -e 's/,/ -n -q -i 0 -c 1 -W 1 -I /' -e 's/$/ >\/dev\/null;/'"
failed_ip6_neigh_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $ping6_template"
eval `eval $failed_ip6_neigh_cmd`
fi
done
# sleep here before handling the mismatch as it is not required during startup
sleep 300
# refresh neighbor entries from APP_DB in case of mismatch with kernel
DBNEIGH=$(sonic-db-cli APPL_DB keys NEIGH_TABLE*)
KERNEIGH4=$(ip -4 neigh show | grep Vlan | cut -d ' ' -f 1,3 --output-delimiter=',')
KERNEIGH6=$(ip -6 neigh show | grep -v fe80 | grep Vlan | cut -d ' ' -f 1,3 --output-delimiter=',')
for neigh in $DBNEIGH; do
intf="$( cut -d ':' -f 2 <<< "$neigh" )"
ip="$( cut -d ':' -f 3- <<< "$neigh" )"
if [[ $intf == *"Vlan"* ]]; then
if [[ $ip == *"."* ]] && [[ ! $KERNEIGH4 =~ "${ip},${intf}" ]]; then
pingcmd="timeout 0.2 ping -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null"
eval $pingcmd
logger "arp_update: mismatch arp entry, pinging ${ip} on ${intf}"
elif [[ $ip == *":"* ]] && [[ ! $KERNEIGH6 =~ "${ip},${intf}" ]]; then
ping6cmd="timeout 0.2 ping6 -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null"
eval $ping6cmd
logger "arp_update: mismatch v6 nbr entry, pinging ${ip} on ${intf}"
fi
fi
done
done