889741c9bc
In arp_update, check for FAILED or INCOMPLETE kernel neighbor entries and manually ping them to try and resolve the neighbor Signed-off-by: Lawrence Lee <lawlee@microsoft.com>
124 lines
5.5 KiB
Bash
Executable File
124 lines
5.5 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# usage:
|
|
# arp_update:
|
|
# Send ipv6 multicast pings to all "UP" L3 interfaces including vlan interfaces to
|
|
# refresh link-local addresses from neighbors.
|
|
# Send gratuitous ARP/NDP requests to VLAN member neighbors to refresh
|
|
# the ipv4/ipv6 neighbors state.
|
|
|
|
ARP_UPDATE_VARS_FILE="/usr/share/sonic/templates/arp_update_vars.j2"
|
|
|
|
while /bin/true; do
|
|
# find L3 interfaces which are UP, send ipv6 multicast pings
|
|
ARP_UPDATE_VARS=$(sonic-cfggen -d -t ${ARP_UPDATE_VARS_FILE})
|
|
SWITCH_TYPE=$(echo $ARP_UPDATE_VARS | jq -r '.switch_type')
|
|
if [[ "$SWITCH_TYPE" == "chassis-packet" ]]; then
|
|
STATIC_ROUTE_NEXTHOPS=$(echo $ARP_UPDATE_VARS | jq -r '.static_route_nexthops')
|
|
# on supervisor/rp exit the script gracefully
|
|
if [[ -z "$STATIC_ROUTE_NEXTHOPS" ]]; then
|
|
logger "arp_update: exiting as no static route in packet based chassis"
|
|
exit 0
|
|
fi
|
|
for nexthop in $STATIC_ROUTE_NEXTHOPS; do
|
|
if [[ $nexthop == *"."* ]]; then
|
|
neigh_state=( $(ip -4 neigh show | grep $nexthop | tr -s ' ' | cut -d ' ' -f 3,4) )
|
|
ping_prefix=ping
|
|
elif [[ $nexthop == *":"* ]] ; then
|
|
neigh_state=( $(ip -6 neigh show | grep $nexthop | tr -s ' ' | cut -d ' ' -f 3,4) )
|
|
ping_prefix=ping6
|
|
fi
|
|
|
|
if [[ "${neigh_state[1]}" == "INCOMPLETE" ]] || [[ "${neigh_state[1]}" == "FAILED" ]]; then
|
|
pingcmd="timeout 0.2 $ping_prefix -I ${neigh_state[0]} -n -q -i 0 -c 1 -W 1 $nexthop >/dev/null"
|
|
eval $pingcmd
|
|
logger "arp_update: sttaic route nexthop not resolved, pinging $nexthop on ${neigh_state[0]}"
|
|
fi
|
|
done
|
|
|
|
sleep 300
|
|
continue
|
|
fi
|
|
# find L3 interfaces which are UP, send ipv6 multicast pings
|
|
INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.interface')
|
|
PC_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.pc_interface')
|
|
VLAN_SUB_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.vlan_sub_interface')
|
|
|
|
ALL_INTERFACE="$INTERFACE $PC_INTERFACE $VLAN_SUB_INTERFACE"
|
|
for intf in $ALL_INTERFACE; do
|
|
ping6cmd="ping6 -I $intf -n -q -i 0 -c 1 -W 0 ff02::1 >/dev/null"
|
|
intf_up=$(ip link show $intf | grep "state UP")
|
|
if [[ -n "$intf_up" ]]; then
|
|
eval $ping6cmd
|
|
fi
|
|
done
|
|
|
|
VLAN=$(echo $ARP_UPDATE_VARS | jq -r '.vlan')
|
|
SUBTYPE=$(sonic-db-cli CONFIG_DB hget 'DEVICE_METADATA|localhost' 'subtype' | tr '[:upper:]' '[:lower:]')
|
|
for vlan in $VLAN; do
|
|
# generate a list of arping commands:
|
|
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 1>;
|
|
# arping -q -w 0 -c 1 -i <VLAN interface> <IP 2>;
|
|
# ...
|
|
arpingcmd="sed -e 's/ / -i /' -e 's/^/arping -q -w 0 -c 1 /' -e 's/$/;/'"
|
|
ipcmd="ip -4 neigh show | grep $vlan | cut -d ' ' -f 1,3 | $arpingcmd"
|
|
|
|
eval `eval $ipcmd`
|
|
|
|
# send ipv6 multicast pings to Vlan interfaces to get/refresh link-local addrs
|
|
ping6cmd="timeout 1 ping6 -I $vlan -n -q -i 0 -c 1 -W 0 ff02::1 >/dev/null"
|
|
eval $ping6cmd
|
|
|
|
# generate a list of ndisc6 commands (exclude link-local addrs since it is done above):
|
|
# ndisc6 -q -w 0 -1 <IP 1> <VLAN interface>;
|
|
# ndisc6 -q -w 0 -1 <IP 2> <VLAN interface>;
|
|
# ...
|
|
ndisc6cmd="sed -e 's/^/ndisc6 -q -w 0 -1 /' -e 's/$/;/'"
|
|
ip6cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | cut -d ' ' -f 1,3 | $ndisc6cmd"
|
|
eval `eval $ip6cmd`
|
|
|
|
if [[ $SUBTYPE == "dualtor" ]]; then
|
|
# on dual ToR devices, try to resolve failed neighbor entries since
|
|
# these entries will have tunnel routes installed, preventing normal
|
|
# neighbor resolution (SWSS PR #2137)
|
|
|
|
# since ndisc6 is a userland process, the above ndisc6 commands are
|
|
# insufficient to update the kernel neighbor table for failed entries
|
|
|
|
# we don't need to do this for ipv4 neighbors since arping is able to
|
|
# update the kernel neighbor table
|
|
|
|
# generates the following command for each failed or incomplete IPv6 neighbor
|
|
# timeout 0.2 ping <neighbor IPv6> -n -q -i 0 -c 1 -W 1 -I <VLAN name> >/dev/null
|
|
ping6_template="sed -e 's/^/timeout 0.2 ping /' -e 's/,/ -n -q -i 0 -c 1 -W 1 -I /' -e 's/$/ >\/dev\/null;/'"
|
|
failed_ip6_neigh_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $ping6_template"
|
|
eval `eval $failed_ip6_neigh_cmd`
|
|
fi
|
|
done
|
|
|
|
|
|
# sleep here before handling the mismatch as it is not required during startup
|
|
sleep 300
|
|
|
|
# refresh neighbor entries from APP_DB in case of mismatch with kernel
|
|
DBNEIGH=$(sonic-db-cli APPL_DB keys NEIGH_TABLE*)
|
|
KERNEIGH4=$(ip -4 neigh show | grep Vlan | cut -d ' ' -f 1,3 --output-delimiter=',')
|
|
KERNEIGH6=$(ip -6 neigh show | grep -v fe80 | grep Vlan | cut -d ' ' -f 1,3 --output-delimiter=',')
|
|
for neigh in $DBNEIGH; do
|
|
intf="$( cut -d ':' -f 2 <<< "$neigh" )"
|
|
ip="$( cut -d ':' -f 3- <<< "$neigh" )"
|
|
if [[ $intf == *"Vlan"* ]]; then
|
|
if [[ $ip == *"."* ]] && [[ ! $KERNEIGH4 =~ "${ip},${intf}" ]]; then
|
|
pingcmd="timeout 0.2 ping -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null"
|
|
eval $pingcmd
|
|
logger "arp_update: mismatch arp entry, pinging ${ip} on ${intf}"
|
|
elif [[ $ip == *":"* ]] && [[ ! $KERNEIGH6 =~ "${ip},${intf}" ]]; then
|
|
ping6cmd="timeout 0.2 ping6 -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null"
|
|
eval $ping6cmd
|
|
logger "arp_update: mismatch v6 nbr entry, pinging ${ip} on ${intf}"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
done
|