2018-08-22 15:02:32 -05:00
|
|
|
#!/bin/bash
|
|
|
|
|
2020-03-31 12:06:19 -05:00
|
|
|
DEPENDENT="radv dhcp_relay"
|
|
|
|
MULTI_INST_DEPENDENT="teamd"
|
2018-09-24 18:35:01 -05:00
|
|
|
|
|
|
|
function debug()
|
|
|
|
{
|
2019-02-10 13:56:31 -06:00
|
|
|
/usr/bin/logger $1
|
2018-09-24 18:35:01 -05:00
|
|
|
/bin/echo `date` "- $1" >> ${DEBUGLOG}
|
|
|
|
}
|
|
|
|
|
|
|
|
function lock_service_state_change()
|
|
|
|
{
|
2020-03-31 12:06:19 -05:00
|
|
|
debug "Locking ${LOCKFILE} from ${SERVICE}$DEV service"
|
2018-09-24 18:35:01 -05:00
|
|
|
|
|
|
|
exec {LOCKFD}>${LOCKFILE}
|
|
|
|
/usr/bin/flock -x ${LOCKFD}
|
|
|
|
trap "/usr/bin/flock -u ${LOCKFD}" 0 2 3 15
|
|
|
|
|
2020-03-31 12:06:19 -05:00
|
|
|
debug "Locked ${LOCKFILE} (${LOCKFD}) from ${SERVICE}$DEV service"
|
2018-09-24 18:35:01 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
function unlock_service_state_change()
|
|
|
|
{
|
2020-03-31 12:06:19 -05:00
|
|
|
debug "Unlocking ${LOCKFILE} (${LOCKFD}) from ${SERVICE}$DEV service"
|
2018-09-24 18:35:01 -05:00
|
|
|
/usr/bin/flock -u ${LOCKFD}
|
|
|
|
}
|
|
|
|
|
|
|
|
function check_warm_boot()
|
|
|
|
{
|
2020-05-08 23:24:05 -05:00
|
|
|
SYSTEM_WARM_START=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_ENABLE_TABLE|system" enable`
|
|
|
|
SERVICE_WARM_START=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_ENABLE_TABLE|${SERVICE}" enable`
|
2018-09-24 18:35:01 -05:00
|
|
|
if [[ x"$SYSTEM_WARM_START" == x"true" ]] || [[ x"$SERVICE_WARM_START" == x"true" ]]; then
|
|
|
|
WARM_BOOT="true"
|
|
|
|
else
|
|
|
|
WARM_BOOT="false"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2018-10-02 08:08:26 -05:00
|
|
|
function validate_restore_count()
|
2018-09-24 18:35:01 -05:00
|
|
|
{
|
|
|
|
if [[ x"$WARM_BOOT" == x"true" ]]; then
|
2020-05-08 23:24:05 -05:00
|
|
|
RESTORE_COUNT=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_TABLE|orchagent" restore_count`
|
2018-09-24 18:35:01 -05:00
|
|
|
# We have to make sure db data has not been flushed.
|
2018-10-02 08:08:26 -05:00
|
|
|
if [[ -z "$RESTORE_COUNT" ]]; then
|
2018-09-24 18:35:01 -05:00
|
|
|
WARM_BOOT="false"
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
function wait_for_database_service()
|
|
|
|
{
|
2018-08-22 15:02:32 -05:00
|
|
|
# Wait for redis server start before database clean
|
2020-05-08 23:24:05 -05:00
|
|
|
until [[ $($SONIC_DB_CLI PING | grep -c PONG) -gt 0 ]]; do
|
2020-05-06 17:41:28 -05:00
|
|
|
sleep 1;
|
|
|
|
done
|
2018-08-22 15:02:32 -05:00
|
|
|
|
2018-08-25 03:39:09 -05:00
|
|
|
# Wait for configDB initialization
|
2020-05-08 23:24:05 -05:00
|
|
|
until [[ $($SONIC_DB_CLI CONFIG_DB GET "CONFIG_DB_INITIALIZED") ]];
|
2018-08-25 03:39:09 -05:00
|
|
|
do sleep 1;
|
|
|
|
done
|
2018-09-24 18:35:01 -05:00
|
|
|
}
|
2018-08-25 03:39:09 -05:00
|
|
|
|
2018-11-03 14:32:46 -05:00
|
|
|
# This function cleans up the tables with specific prefixes from the database
|
|
|
|
# $1 the index of the database
|
|
|
|
# $2 the string of a list of table prefixes
|
|
|
|
function clean_up_tables()
|
|
|
|
{
|
2020-05-08 23:24:05 -05:00
|
|
|
$SONIC_DB_CLI $1 EVAL "
|
2018-11-03 14:32:46 -05:00
|
|
|
local tables = {$2}
|
|
|
|
for i = 1, table.getn(tables) do
|
|
|
|
local matches = redis.call('KEYS', tables[i])
|
|
|
|
for j,name in ipairs(matches) do
|
|
|
|
redis.call('DEL', name)
|
|
|
|
end
|
|
|
|
end" 0
|
|
|
|
}
|
|
|
|
|
2019-08-08 17:45:17 -05:00
|
|
|
start_peer_and_dependent_services() {
|
2019-03-04 18:46:55 -06:00
|
|
|
check_warm_boot
|
|
|
|
|
|
|
|
if [[ x"$WARM_BOOT" != x"true" ]]; then
|
2020-03-31 12:06:19 -05:00
|
|
|
if [[ ! -z $DEV ]]; then
|
|
|
|
/bin/systemctl start ${PEER}@$DEV
|
|
|
|
else
|
|
|
|
/bin/systemctl start ${PEER}
|
|
|
|
fi
|
2019-08-08 17:45:17 -05:00
|
|
|
for dep in ${DEPENDENT}; do
|
2019-12-02 17:54:55 -06:00
|
|
|
/bin/systemctl start ${dep}
|
2019-08-08 17:45:17 -05:00
|
|
|
done
|
2020-03-31 12:06:19 -05:00
|
|
|
for dep in ${MULTI_INST_DEPENDENT}; do
|
|
|
|
if [[ ! -z $DEV ]]; then
|
|
|
|
/bin/systemctl start ${dep}@$DEV
|
|
|
|
else
|
|
|
|
/bin/systemctl start ${dep}
|
|
|
|
fi
|
|
|
|
done
|
2019-08-08 17:45:17 -05:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
stop_peer_and_dependent_services() {
|
|
|
|
# if warm start enabled or peer lock exists, don't stop peer service docker
|
|
|
|
if [[ x"$WARM_BOOT" != x"true" ]]; then
|
2020-03-31 12:06:19 -05:00
|
|
|
for dep in ${MULTI_INST_DEPENDENT}; do
|
|
|
|
if [[ ! -z $DEV ]]; then
|
|
|
|
/bin/systemctl stop ${dep}@$DEV
|
|
|
|
else
|
|
|
|
/bin/systemctl stop ${dep}
|
|
|
|
fi
|
|
|
|
done
|
2021-02-18 17:48:11 -06:00
|
|
|
for dep in ${DEPENDENT}; do
|
|
|
|
/bin/systemctl stop ${dep}
|
|
|
|
done
|
|
|
|
if [[ ! -z $DEV ]]; then
|
|
|
|
/bin/systemctl stop ${PEER}@$DEV
|
|
|
|
else
|
|
|
|
/bin/systemctl stop ${PEER}
|
|
|
|
fi
|
2019-03-04 18:46:55 -06:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2018-09-24 18:35:01 -05:00
|
|
|
start() {
|
2020-03-31 12:06:19 -05:00
|
|
|
debug "Starting ${SERVICE}$DEV service..."
|
2018-09-24 18:35:01 -05:00
|
|
|
|
|
|
|
lock_service_state_change
|
|
|
|
|
|
|
|
wait_for_database_service
|
|
|
|
check_warm_boot
|
2018-10-02 08:08:26 -05:00
|
|
|
validate_restore_count
|
2018-08-25 03:39:09 -05:00
|
|
|
|
2020-03-31 12:06:19 -05:00
|
|
|
debug "Warm boot flag: ${SERVICE}$DEV ${WARM_BOOT}."
|
2018-09-24 18:35:01 -05:00
|
|
|
|
|
|
|
# Don't flush DB during warm boot
|
|
|
|
if [[ x"$WARM_BOOT" != x"true" ]]; then
|
2019-02-19 23:48:43 -06:00
|
|
|
debug "Flushing APP, ASIC, COUNTER, CONFIG, and partial STATE databases ..."
|
2020-05-08 23:24:05 -05:00
|
|
|
$SONIC_DB_CLI APPL_DB FLUSHDB
|
|
|
|
$SONIC_DB_CLI ASIC_DB FLUSHDB
|
|
|
|
$SONIC_DB_CLI COUNTERS_DB FLUSHDB
|
|
|
|
$SONIC_DB_CLI FLEX_COUNTER_DB FLUSHDB
|
2021-10-28 18:06:28 -05:00
|
|
|
$SONIC_DB_CLI RESTAPI_DB FLUSHDB
|
2020-01-22 13:26:23 -06:00
|
|
|
clean_up_tables STATE_DB "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'LAG_TABLE*', 'LAG_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*', 'VRF_TABLE*', 'FDB_TABLE*'"
|
2018-08-22 15:02:32 -05:00
|
|
|
fi
|
|
|
|
|
2018-09-24 18:35:01 -05:00
|
|
|
# start service docker
|
2020-03-31 12:06:19 -05:00
|
|
|
/usr/bin/${SERVICE}.sh start $DEV
|
|
|
|
debug "Started ${SERVICE}$DEV service..."
|
2018-09-24 18:35:01 -05:00
|
|
|
|
|
|
|
# Unlock has to happen before reaching out to peer service
|
|
|
|
unlock_service_state_change
|
2019-03-02 17:28:34 -06:00
|
|
|
}
|
|
|
|
|
2019-03-08 12:59:41 -06:00
|
|
|
wait() {
|
2019-08-08 17:45:17 -05:00
|
|
|
start_peer_and_dependent_services
|
2019-11-09 12:26:39 -06:00
|
|
|
|
|
|
|
# Allow some time for peer container to start
|
|
|
|
# NOTE: This assumes Docker containers share the same names as their
|
|
|
|
# corresponding services
|
|
|
|
for SECS in {1..60}; do
|
2020-03-31 12:06:19 -05:00
|
|
|
if [[ ! -z $DEV ]]; then
|
|
|
|
RUNNING=$(docker inspect -f '{{.State.Running}}' ${PEER}$DEV)
|
|
|
|
else
|
|
|
|
RUNNING=$(docker inspect -f '{{.State.Running}}' ${PEER})
|
|
|
|
fi
|
[docker-teamd]: Add teamd as a depedent service to swss (#5628)
**- Why I did it**
On teamd docker restart, the swss and syncd needs to be restarted as there are dependent resources present.
**- How I did it**
Add the teamd as a dependent service for swss
Updated the docker-wait script to handle service and dependent services separately.
Handle the case of warm-restart for the dependent service
**- How to verify it**
Verified the following scenario's with the following testbed
VM1 ----------------------------[DUT 6100] -----------------------VM2, ping traffic continuous between VMs
1. Stop teamd docker alone
> swss, syncd dockers seen going away
> The LAG reference count error messages seen for a while till swss docker stops.
> Dockers back up.
2. Enable WR mode for teamd. Stop teamd docker alone
> swss, syncd dockers not removed.
> The LAG reference count error messages not seen
> Repeated stop teamd docker test - same result, no effect on swss/syncd.
3. Stop swss docker.
> swss, teamd, syncd goes off - dockers comes back correctly, interfaces up
4. Enable WR mode for swss . Stop swss docker
> swss goes off not affecting syncd/teamd dockers.
5. Config reload
> no reference counter error seen, dockers comes back correctly, with interfaces up
6. Warm reboot, observations below
> swss docker goes off first
> teamd + syncd goes off to the end of WR process.
> dockers comes back up fine.
> ping traffic between VM's was NOT HIT
7. Fast reboot, observations below
> teamd goes off first ( **confirmed swss don't exit here** )
> swss goes off next
> syncd goes away at the end of the FR process
> dockers comes back up fine.
> there is a traffic HIT as per fast-reboot
8. Verified in multi-asic platform, the tests above other than WR/FB scenarios
2020-10-23 02:41:16 -05:00
|
|
|
ALL_DEPS_RUNNING=true
|
|
|
|
for dep in ${MULTI_INST_DEPENDENT}; do
|
|
|
|
if [[ ! -z $DEV ]]; then
|
|
|
|
DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep}$DEV)
|
|
|
|
else
|
|
|
|
DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep})
|
|
|
|
fi
|
|
|
|
if [[ x"$DEP_RUNNING" != x"true" ]]; then
|
|
|
|
ALL_DEPS_RUNNING=false
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
if [[ x"$RUNNING" == x"true" && x"$ALL_DEPS_RUNNING" == x"true" ]]; then
|
2019-11-09 12:26:39 -06:00
|
|
|
break
|
|
|
|
else
|
|
|
|
sleep 1
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
# NOTE: This assumes Docker containers share the same names as their
|
|
|
|
# corresponding services
|
[docker-teamd]: Add teamd as a depedent service to swss (#5628)
**- Why I did it**
On teamd docker restart, the swss and syncd needs to be restarted as there are dependent resources present.
**- How I did it**
Add the teamd as a dependent service for swss
Updated the docker-wait script to handle service and dependent services separately.
Handle the case of warm-restart for the dependent service
**- How to verify it**
Verified the following scenario's with the following testbed
VM1 ----------------------------[DUT 6100] -----------------------VM2, ping traffic continuous between VMs
1. Stop teamd docker alone
> swss, syncd dockers seen going away
> The LAG reference count error messages seen for a while till swss docker stops.
> Dockers back up.
2. Enable WR mode for teamd. Stop teamd docker alone
> swss, syncd dockers not removed.
> The LAG reference count error messages not seen
> Repeated stop teamd docker test - same result, no effect on swss/syncd.
3. Stop swss docker.
> swss, teamd, syncd goes off - dockers comes back correctly, interfaces up
4. Enable WR mode for swss . Stop swss docker
> swss goes off not affecting syncd/teamd dockers.
5. Config reload
> no reference counter error seen, dockers comes back correctly, with interfaces up
6. Warm reboot, observations below
> swss docker goes off first
> teamd + syncd goes off to the end of WR process.
> dockers comes back up fine.
> ping traffic between VM's was NOT HIT
7. Fast reboot, observations below
> teamd goes off first ( **confirmed swss don't exit here** )
> swss goes off next
> syncd goes away at the end of the FR process
> dockers comes back up fine.
> there is a traffic HIT as per fast-reboot
8. Verified in multi-asic platform, the tests above other than WR/FB scenarios
2020-10-23 02:41:16 -05:00
|
|
|
for dep in ${MULTI_INST_DEPENDENT}; do
|
|
|
|
if [[ ! -z $DEV ]]; then
|
|
|
|
ALL_DEPS="$ALL_DEPS ${dep}$DEV"
|
|
|
|
else
|
|
|
|
ALL_DEPS="$ALL_DEPS ${dep}"
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2020-03-31 12:06:19 -05:00
|
|
|
if [[ ! -z $DEV ]]; then
|
[docker-teamd]: Add teamd as a depedent service to swss (#5628)
**- Why I did it**
On teamd docker restart, the swss and syncd needs to be restarted as there are dependent resources present.
**- How I did it**
Add the teamd as a dependent service for swss
Updated the docker-wait script to handle service and dependent services separately.
Handle the case of warm-restart for the dependent service
**- How to verify it**
Verified the following scenario's with the following testbed
VM1 ----------------------------[DUT 6100] -----------------------VM2, ping traffic continuous between VMs
1. Stop teamd docker alone
> swss, syncd dockers seen going away
> The LAG reference count error messages seen for a while till swss docker stops.
> Dockers back up.
2. Enable WR mode for teamd. Stop teamd docker alone
> swss, syncd dockers not removed.
> The LAG reference count error messages not seen
> Repeated stop teamd docker test - same result, no effect on swss/syncd.
3. Stop swss docker.
> swss, teamd, syncd goes off - dockers comes back correctly, interfaces up
4. Enable WR mode for swss . Stop swss docker
> swss goes off not affecting syncd/teamd dockers.
5. Config reload
> no reference counter error seen, dockers comes back correctly, with interfaces up
6. Warm reboot, observations below
> swss docker goes off first
> teamd + syncd goes off to the end of WR process.
> dockers comes back up fine.
> ping traffic between VM's was NOT HIT
7. Fast reboot, observations below
> teamd goes off first ( **confirmed swss don't exit here** )
> swss goes off next
> syncd goes away at the end of the FR process
> dockers comes back up fine.
> there is a traffic HIT as per fast-reboot
8. Verified in multi-asic platform, the tests above other than WR/FB scenarios
2020-10-23 02:41:16 -05:00
|
|
|
/usr/bin/docker-wait-any -s ${SERVICE}$DEV -d ${PEER}$DEV ${ALL_DEPS}
|
2020-03-31 12:06:19 -05:00
|
|
|
else
|
[docker-teamd]: Add teamd as a depedent service to swss (#5628)
**- Why I did it**
On teamd docker restart, the swss and syncd needs to be restarted as there are dependent resources present.
**- How I did it**
Add the teamd as a dependent service for swss
Updated the docker-wait script to handle service and dependent services separately.
Handle the case of warm-restart for the dependent service
**- How to verify it**
Verified the following scenario's with the following testbed
VM1 ----------------------------[DUT 6100] -----------------------VM2, ping traffic continuous between VMs
1. Stop teamd docker alone
> swss, syncd dockers seen going away
> The LAG reference count error messages seen for a while till swss docker stops.
> Dockers back up.
2. Enable WR mode for teamd. Stop teamd docker alone
> swss, syncd dockers not removed.
> The LAG reference count error messages not seen
> Repeated stop teamd docker test - same result, no effect on swss/syncd.
3. Stop swss docker.
> swss, teamd, syncd goes off - dockers comes back correctly, interfaces up
4. Enable WR mode for swss . Stop swss docker
> swss goes off not affecting syncd/teamd dockers.
5. Config reload
> no reference counter error seen, dockers comes back correctly, with interfaces up
6. Warm reboot, observations below
> swss docker goes off first
> teamd + syncd goes off to the end of WR process.
> dockers comes back up fine.
> ping traffic between VM's was NOT HIT
7. Fast reboot, observations below
> teamd goes off first ( **confirmed swss don't exit here** )
> swss goes off next
> syncd goes away at the end of the FR process
> dockers comes back up fine.
> there is a traffic HIT as per fast-reboot
8. Verified in multi-asic platform, the tests above other than WR/FB scenarios
2020-10-23 02:41:16 -05:00
|
|
|
/usr/bin/docker-wait-any -s ${SERVICE} -d ${PEER} ${ALL_DEPS}
|
2020-03-31 12:06:19 -05:00
|
|
|
fi
|
2018-08-22 15:02:32 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
stop() {
|
2020-03-31 12:06:19 -05:00
|
|
|
debug "Stopping ${SERVICE}$DEV service..."
|
2018-08-25 03:39:09 -05:00
|
|
|
|
2018-09-24 18:35:01 -05:00
|
|
|
[[ -f ${LOCKFILE} ]] || /usr/bin/touch ${LOCKFILE}
|
|
|
|
|
|
|
|
lock_service_state_change
|
|
|
|
check_warm_boot
|
2020-03-31 12:06:19 -05:00
|
|
|
debug "Warm boot flag: ${SERVICE}$DEV ${WARM_BOOT}."
|
2018-09-24 18:35:01 -05:00
|
|
|
|
2020-03-31 12:06:19 -05:00
|
|
|
/usr/bin/${SERVICE}.sh stop $DEV
|
|
|
|
debug "Stopped ${SERVICE}$DEV service..."
|
2018-08-25 03:39:09 -05:00
|
|
|
|
2019-12-16 09:58:16 -06:00
|
|
|
# Flush FAST_REBOOT table when swss needs to stop. The only
|
|
|
|
# time when this would take effect is when fast-reboot
|
|
|
|
# encountered error, e.g. syncd crashed. And swss needs to
|
|
|
|
# be restarted.
|
|
|
|
debug "Clearing FAST_REBOOT flag..."
|
2020-01-22 13:26:23 -06:00
|
|
|
clean_up_tables STATE_DB "'FAST_REBOOT*'"
|
2019-12-16 09:58:16 -06:00
|
|
|
|
2018-09-24 18:35:01 -05:00
|
|
|
# Unlock has to happen before reaching out to peer service
|
|
|
|
unlock_service_state_change
|
2018-08-22 15:02:32 -05:00
|
|
|
|
2019-08-08 17:45:17 -05:00
|
|
|
stop_peer_and_dependent_services
|
2018-08-22 15:02:32 -05:00
|
|
|
}
|
|
|
|
|
2020-03-31 12:06:19 -05:00
|
|
|
DEV=$2
|
|
|
|
|
|
|
|
SERVICE="swss"
|
|
|
|
PEER="syncd"
|
|
|
|
DEBUGLOG="/tmp/swss-syncd-debug$DEV.log"
|
|
|
|
LOCKFILE="/tmp/swss-syncd-lock$DEV"
|
2020-05-08 23:24:05 -05:00
|
|
|
NAMESPACE_PREFIX="asic"
|
2020-03-31 12:06:19 -05:00
|
|
|
if [ "$DEV" ]; then
|
2020-05-08 23:24:05 -05:00
|
|
|
NET_NS="$NAMESPACE_PREFIX$DEV" #name of the network namespace
|
|
|
|
SONIC_DB_CLI="sonic-db-cli -n $NET_NS"
|
2020-03-31 12:06:19 -05:00
|
|
|
else
|
2020-05-08 23:24:05 -05:00
|
|
|
NET_NS=""
|
|
|
|
SONIC_DB_CLI="sonic-db-cli"
|
2020-03-31 12:06:19 -05:00
|
|
|
fi
|
|
|
|
|
2018-08-22 15:02:32 -05:00
|
|
|
case "$1" in
|
2019-03-08 12:59:41 -06:00
|
|
|
start|wait|stop)
|
2018-08-22 15:02:32 -05:00
|
|
|
$1
|
|
|
|
;;
|
|
|
|
*)
|
2019-03-08 12:59:41 -06:00
|
|
|
echo "Usage: $0 {start|wait|stop}"
|
2018-08-22 15:02:32 -05:00
|
|
|
exit 1
|
|
|
|
;;
|
|
|
|
esac
|