[dualtor][active-active]Killing radv instead of stopping on active-active dualtor if config knob is on (#13408)

How I did it
radv sends a good-bye packet when the service is stopped, which causes a IPv6 route update on SoC side. And this update leads to an interface bouncing and causes traffic disruption even though the ToR device might already be isolated.

This PR is to mitigate the traffic disruption issue during planned maintenance, by killing radv instead of stopping. So the cease packet won't be sent.

How to verify it
Verified on dev clusters:

Traffic disruption was no longer reproducible.
radv took the killing path
if knob was off, radv would take the stopping path

sign-off: Jing Zhang zhangjing@microsoft.com
This commit is contained in:
Jing Zhang 2023-01-20 15:34:34 -08:00 committed by GitHub
parent 439d4eab98
commit 260a2ec3e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -26,6 +26,26 @@ function check_fast_boot ()
fi
}
function check_redundant_type()
{
DEVICE_SUBTYPE=`$SONIC_DB_CLI CONFIG_DB hget "DEVICE_METADATA|localhost" subtype`
if [[ x"$DEVICE_SUBTYPE" == x"DualToR" ]]; then
MUX_CONFIG=`show muxcable config`
if [[ $MUX_CONFIG =~ .*active-active.* ]]; then
ACTIVE_ACTIVE="true"
else
ACTIVE_ACTIVE="false"
fi
else
ACTIVE_ACTIVE="false"
fi
CONFIG_KNOB=`$SONIC_DB_CLI CONFIG_DB hget "MUX_LINKMGR|SERVICE_MGMT" kill_radv`
if [[ x"$CONFIG_KNOB" != x"True" ]]; then
ACTIVE_ACTIVE='false'
fi
debug "DEVICE_SUBTYPE: ${DEVICE_SUBTYPE}, CONFIG_KNOB: ${CONFIG_KNOB}"
}
start() {
debug "Starting ${SERVICE}$DEV service..."
@ -43,13 +63,19 @@ stop() {
check_warm_boot
check_fast_boot
check_redundant_type
debug "Warm boot flag: ${SERVICE}$DEV ${WARM_BOOT}."
debug "Fast boot flag: ${SERVICE}$DEV ${FAST_BOOT}."
# For WARM/FAST boot do not perform service stop
if [[ x"$WARM_BOOT" != x"true" ]] && [[ x"$FAST_BOOT" != x"true" ]]; then
/usr/bin/${SERVICE}.sh stop $DEV
debug "Stopped ${SERVICE}$DEV service..."
if [[ x"$SERVICE" == x"radv" ]] && [[ x"$ACTIVE_ACTIVE" == x"true" ]]; then
debug "Killing Docker ${SERVICE}${DEV} for active-active dualtor device..."
/usr/bin/${SERVICE}.sh kill $DEV
else
/usr/bin/${SERVICE}.sh stop $DEV
debug "Stopped ${SERVICE}$DEV service..."
fi
else
debug "Killing Docker ${SERVICE}${DEV}..."
/usr/bin/${SERVICE}.sh kill $DEV