From 260a2ec3e7c671b716529b0faf9040fc924ecddc Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 20 Jan 2023 15:34:34 -0800 Subject: [PATCH] [dualtor][active-active]Killing radv instead of stopping on `active-active` dualtor if config knob is on (#13408) How I did it radv sends a good-bye packet when the service is stopped, which causes a IPv6 route update on SoC side. And this update leads to an interface bouncing and causes traffic disruption even though the ToR device might already be isolated. This PR is to mitigate the traffic disruption issue during planned maintenance, by killing radv instead of stopping. So the cease packet won't be sent. How to verify it Verified on dev clusters: Traffic disruption was no longer reproducible. radv took the killing path if knob was off, radv would take the stopping path sign-off: Jing Zhang zhangjing@microsoft.com --- files/scripts/service_mgmt.sh | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/files/scripts/service_mgmt.sh b/files/scripts/service_mgmt.sh index d400c84722..c1916aefa0 100755 --- a/files/scripts/service_mgmt.sh +++ b/files/scripts/service_mgmt.sh @@ -26,6 +26,26 @@ function check_fast_boot () fi } +function check_redundant_type() +{ + DEVICE_SUBTYPE=`$SONIC_DB_CLI CONFIG_DB hget "DEVICE_METADATA|localhost" subtype` + if [[ x"$DEVICE_SUBTYPE" == x"DualToR" ]]; then + MUX_CONFIG=`show muxcable config` + if [[ $MUX_CONFIG =~ .*active-active.* ]]; then + ACTIVE_ACTIVE="true" + else + ACTIVE_ACTIVE="false" + fi + else + ACTIVE_ACTIVE="false" + fi + CONFIG_KNOB=`$SONIC_DB_CLI CONFIG_DB hget "MUX_LINKMGR|SERVICE_MGMT" kill_radv` + if [[ x"$CONFIG_KNOB" != x"True" ]]; then + ACTIVE_ACTIVE='false' + fi + debug "DEVICE_SUBTYPE: ${DEVICE_SUBTYPE}, CONFIG_KNOB: ${CONFIG_KNOB}" +} + start() { debug "Starting ${SERVICE}$DEV service..." @@ -43,13 +63,19 @@ stop() { check_warm_boot check_fast_boot + check_redundant_type debug "Warm boot flag: ${SERVICE}$DEV ${WARM_BOOT}." debug "Fast boot flag: ${SERVICE}$DEV ${FAST_BOOT}." # For WARM/FAST boot do not perform service stop if [[ x"$WARM_BOOT" != x"true" ]] && [[ x"$FAST_BOOT" != x"true" ]]; then - /usr/bin/${SERVICE}.sh stop $DEV - debug "Stopped ${SERVICE}$DEV service..." + if [[ x"$SERVICE" == x"radv" ]] && [[ x"$ACTIVE_ACTIVE" == x"true" ]]; then + debug "Killing Docker ${SERVICE}${DEV} for active-active dualtor device..." + /usr/bin/${SERVICE}.sh kill $DEV + else + /usr/bin/${SERVICE}.sh stop $DEV + debug "Stopped ${SERVICE}$DEV service..." + fi else debug "Killing Docker ${SERVICE}${DEV}..." /usr/bin/${SERVICE}.sh kill $DEV