2020-11-13 15:34:18 -06:00
|
|
|
#!/bin/bash
|
|
|
|
|
[systemd] ASIC status based service bringup on VOQ chassis (#7477)
Changes to allow starting per asic services like swss and syncd only if the platform vendor codedetects the asic is detected and notified. The systemd services ordering we want is database->database@->pmon->swss@->syncd@->teamd@->lldp@
There is also a requirement that management, telemetry, snmp dockers can start even if all asic services are not up.
Why I did it
For VOQ chassis, the fabric cards will have 1-N asics. Also, there could be multiple removable fabric cards. On the supervisor, swss and syncd containers need to be started only if the fabric-card is in Online state and respective asics are detected by the kernel. Using systemd, the dependent services can be in inactive state.
How I did it
Introduce a mechanism where all ASIC dependent service wait on its state to be published via PMON to REDIS. Once the subscription is received, the service proceeds to create respective dockers.
For fixed platforms, systemd is unchanged i.e. the service bring up and docker creation happens in the start()/ExecStartPre routine of the .sh scripts.
For VOQ chassis platform on supervisor, the service bringup skips docker creation in the start() routine, but does it in the wait()/ExecStart routine of the .sh scrips.
Management dockers are decoupled from ASIC docker creation.
2021-07-28 01:02:49 -05:00
|
|
|
. /usr/local/bin/asic_status.sh
|
|
|
|
|
2020-11-13 15:34:18 -06:00
|
|
|
function debug()
|
|
|
|
{
|
|
|
|
/usr/bin/logger $1
|
2021-10-26 11:12:07 -05:00
|
|
|
/bin/echo `date` "- $1" >> ${DEBUG_LOG}
|
2020-11-13 15:34:18 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
function check_warm_boot()
|
|
|
|
{
|
|
|
|
SYSTEM_WARM_START=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_ENABLE_TABLE|system" enable`
|
|
|
|
SERVICE_WARM_START=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_ENABLE_TABLE|${SERVICE}" enable`
|
|
|
|
if [[ x"$SYSTEM_WARM_START" == x"true" ]] || [[ x"$SERVICE_WARM_START" == x"true" ]]; then
|
|
|
|
WARM_BOOT="true"
|
|
|
|
else
|
|
|
|
WARM_BOOT="false"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
function validate_restore_count()
|
|
|
|
{
|
|
|
|
if [[ x"$WARM_BOOT" == x"true" ]]; then
|
|
|
|
RESTORE_COUNT=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_TABLE|${SERVICE}" restore_count`
|
|
|
|
# We have to make sure db data has not been flushed.
|
|
|
|
if [[ -z "$RESTORE_COUNT" ]]; then
|
|
|
|
WARM_BOOT="false"
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
function check_fast_boot ()
|
|
|
|
{
|
|
|
|
if [[ $($SONIC_DB_CLI STATE_DB GET "FAST_REBOOT|system") == "1" ]]; then
|
|
|
|
FAST_BOOT="true"
|
|
|
|
else
|
|
|
|
FAST_BOOT="false"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
start() {
|
|
|
|
debug "Starting ${SERVICE}$DEV service..."
|
|
|
|
|
|
|
|
check_warm_boot
|
|
|
|
validate_restore_count
|
|
|
|
|
|
|
|
check_fast_boot
|
|
|
|
|
|
|
|
debug "Warm boot flag: ${SERVICE}$DEV ${WARM_BOOT}."
|
|
|
|
debug "Fast boot flag: ${SERVICE}$DEV ${Fast_BOOT}."
|
|
|
|
|
[systemd] ASIC status based service bringup on VOQ chassis (#7477)
Changes to allow starting per asic services like swss and syncd only if the platform vendor codedetects the asic is detected and notified. The systemd services ordering we want is database->database@->pmon->swss@->syncd@->teamd@->lldp@
There is also a requirement that management, telemetry, snmp dockers can start even if all asic services are not up.
Why I did it
For VOQ chassis, the fabric cards will have 1-N asics. Also, there could be multiple removable fabric cards. On the supervisor, swss and syncd containers need to be started only if the fabric-card is in Online state and respective asics are detected by the kernel. Using systemd, the dependent services can be in inactive state.
How I did it
Introduce a mechanism where all ASIC dependent service wait on its state to be published via PMON to REDIS. Once the subscription is received, the service proceeds to create respective dockers.
For fixed platforms, systemd is unchanged i.e. the service bring up and docker creation happens in the start()/ExecStartPre routine of the .sh scripts.
For VOQ chassis platform on supervisor, the service bringup skips docker creation in the start() routine, but does it in the wait()/ExecStart routine of the .sh scrips.
Management dockers are decoupled from ASIC docker creation.
2021-07-28 01:02:49 -05:00
|
|
|
# On supervisor card, skip starting asic related services here. In wait(),
|
|
|
|
# wait until the asic is detected by pmon and published via database.
|
|
|
|
if ! is_chassis_supervisor; then
|
|
|
|
# start service docker
|
|
|
|
/usr/bin/${SERVICE}.sh start $DEV
|
|
|
|
debug "Started ${SERVICE}$DEV service..."
|
|
|
|
fi
|
2020-11-13 15:34:18 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
wait() {
|
[systemd] ASIC status based service bringup on VOQ chassis (#7477)
Changes to allow starting per asic services like swss and syncd only if the platform vendor codedetects the asic is detected and notified. The systemd services ordering we want is database->database@->pmon->swss@->syncd@->teamd@->lldp@
There is also a requirement that management, telemetry, snmp dockers can start even if all asic services are not up.
Why I did it
For VOQ chassis, the fabric cards will have 1-N asics. Also, there could be multiple removable fabric cards. On the supervisor, swss and syncd containers need to be started only if the fabric-card is in Online state and respective asics are detected by the kernel. Using systemd, the dependent services can be in inactive state.
How I did it
Introduce a mechanism where all ASIC dependent service wait on its state to be published via PMON to REDIS. Once the subscription is received, the service proceeds to create respective dockers.
For fixed platforms, systemd is unchanged i.e. the service bring up and docker creation happens in the start()/ExecStartPre routine of the .sh scripts.
For VOQ chassis platform on supervisor, the service bringup skips docker creation in the start() routine, but does it in the wait()/ExecStart routine of the .sh scrips.
Management dockers are decoupled from ASIC docker creation.
2021-07-28 01:02:49 -05:00
|
|
|
# On supervisor card, wait for asic to be online before starting the docker.
|
|
|
|
if is_chassis_supervisor; then
|
|
|
|
check_asic_status
|
|
|
|
ASIC_STATUS=$?
|
|
|
|
|
|
|
|
# start service docker
|
|
|
|
if [[ $ASIC_STATUS == 0 ]]; then
|
|
|
|
/usr/bin/${SERVICE}.sh start $DEV
|
|
|
|
debug "Started ${SERVICE}$DEV service..."
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
2020-11-13 15:34:18 -06:00
|
|
|
/usr/bin/${SERVICE}.sh wait $DEV
|
|
|
|
}
|
|
|
|
|
|
|
|
stop() {
|
|
|
|
debug "Stopping ${SERVICE}$DEV service..."
|
|
|
|
|
|
|
|
check_warm_boot
|
|
|
|
check_fast_boot
|
|
|
|
debug "Warm boot flag: ${SERVICE}$DEV ${WARM_BOOT}."
|
|
|
|
debug "Fast boot flag: ${SERVICE}$DEV ${FAST_BOOT}."
|
|
|
|
|
|
|
|
if [[ x"$WARM_BOOT" == x"true" ]]; then
|
|
|
|
# Send USR1 signal to all teamd instances to stop them
|
|
|
|
# It will prepare teamd for warm-reboot
|
|
|
|
# Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port
|
2021-10-26 11:12:07 -05:00
|
|
|
docker exec -i ${SERVICE}$DEV pkill -USR1 -f ${TEAMD_CMD} > /dev/null || [ $? == 1 ]
|
2020-11-13 15:34:18 -06:00
|
|
|
elif [[ x"$FAST_BOOT" == x"true" ]]; then
|
|
|
|
# Kill teamd processes inside of teamd container with SIGUSR2 to allow them to send last LACP frames
|
|
|
|
# We call `docker kill teamd` to ensure the container stops as quickly as possible,
|
|
|
|
# Note: teamd must be killed before syncd, because it will send the last packet through CPU port
|
2021-10-26 11:12:07 -05:00
|
|
|
docker exec -i ${SERVICE}$DEV pkill -USR2 -f ${TEAMD_CMD} || [ $? == 1 ]
|
2022-03-15 02:20:36 -05:00
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ x"$WARM_BOOT" == x"true" ]] || [[ x"$FAST_BOOT" == x"true" ]]; then
|
2021-10-26 11:12:07 -05:00
|
|
|
while docker exec -i ${SERVICE}$DEV pgrep -f ${TEAMD_CMD} > /dev/null; do
|
2020-11-13 15:34:18 -06:00
|
|
|
sleep 0.05
|
|
|
|
done
|
|
|
|
docker kill ${SERVICE}$DEV &> /dev/null || debug "Docker ${SERVICE}$DEV is not running ($?) ..."
|
2022-03-15 02:20:36 -05:00
|
|
|
else
|
|
|
|
/usr/bin/${SERVICE}.sh stop $DEV
|
2020-11-13 15:34:18 -06:00
|
|
|
fi
|
|
|
|
|
|
|
|
debug "Stopped ${SERVICE}$DEV service..."
|
|
|
|
}
|
|
|
|
|
|
|
|
DEV=$2
|
|
|
|
|
|
|
|
SERVICE="teamd"
|
2021-10-26 11:12:07 -05:00
|
|
|
TEAMD_CMD="/usr/bin/teamd"
|
|
|
|
DEBUG_LOG="/tmp/teamd-debug$DEV.log"
|
2020-11-13 15:34:18 -06:00
|
|
|
NAMESPACE_PREFIX="asic"
|
|
|
|
if [ "$DEV" ]; then
|
|
|
|
NET_NS="$NAMESPACE_PREFIX$DEV" #name of the network namespace
|
|
|
|
SONIC_DB_CLI="sonic-db-cli -n $NET_NS"
|
|
|
|
else
|
|
|
|
SONIC_DB_CLI="sonic-db-cli"
|
|
|
|
fi
|
|
|
|
|
|
|
|
case "$1" in
|
|
|
|
start|wait|stop)
|
|
|
|
$1
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
echo "Usage: $0 {start|wait|stop}"
|
|
|
|
exit 1
|
|
|
|
;;
|
|
|
|
esac
|