sonic-buildimage/files/image_config/warmboot-finalizer/finalize-warmboot.sh
Vaibhav Hemant Dixit 0b83639068
Fix CONFIG_DB_INITIALIZED flag check logic and set/reset flag for warmboot (#15685) (#16217)
Cherypick of #15685

MSFT ADO: 24274591

Why I did it
Two changes:

1 Fix a day1 issue, where check to wait until CONFIG_DB_INITIALIZED is incorrect.
There are multiple places where same incorrect logic is used.

Current logic (until [[ $($SONIC_DB_CLI CONFIG_DB GET "CONFIG_DB_INITIALIZED") ]];) will always result in pass, irrespective of the result of GET operation.

root@str2-7060cx-32s-29:~# sonic-db-cli CONFIG_DB GET "CONFIG_DB_INITIALIZED"
1
root@str2-7060cx-32s-29:~# until [[ $(sonic-db-cli CONFIG_DB GET "CONFIG_DB_INITIALIZED") ]]; do echo "entered here"; done
root@str2-7060cx-32s-29:~# 

root@str2-7060cx-32s-29:~# 
root@str2-7060cx-32s-29:~# sonic-db-cli CONFIG_DB GET "CONFIG_DB_INITIALIZED"                                             
0
root@str2-7060cx-32s-29:~# until [[ $(sonic-db-cli CONFIG_DB GET "CONFIG_DB_INITIALIZED") ]]; do echo "entered here"; done
root@str2-7060cx-32s-29:~# 
Fix this logic by checking for value of flag to be "1".

root@str2-7060cx-32s-29:~# until [[ $(sonic-db-cli CONFIG_DB GET "CONFIG_DB_INITIALIZED") -eq 1 ]]; do echo "entered here"; done
entered here
entered here
entered here
This gap in logic was highlighted when another fix was merged: #14933
The issue being fixed here caused warmboot-finalizer to not wait until config-db is initialized.

2 Set and unset CONFIG_DB_INITIALIZED for warm-reboot case
Currently, during warm shutdown CONFIG_DB_INITIALIZED's value is stored in redis db backup. This is restored back when the dump is loaded during warm-recovery.
So the value of CONFIG_DB_INITIALIZED does not depend on config db's state, however it remain what it was before reboot.

Fix this by setting CONFIG_DB_INITIALIZED to 0 as when the DB is loaded, and set it to 1 after db_migrator is done.

Work item tracking
Microsoft ADO (number only):
How I did it
How to verify it
2023-08-24 16:58:24 +08:00

189 lines
4.5 KiB
Bash
Executable File

#! /bin/bash
VERBOSE=no
# Define components that needs to reconcile during warm
# boot:
# The key is the name of the service that the components belong to.
# The value is list of component names that will reconcile.
declare -A RECONCILE_COMPONENTS=( \
["swss"]="orchagent neighsyncd" \
["bgp"]="bgp" \
["nat"]="natsyncd" \
["mux"]="linkmgrd" \
)
for reconcile_file in $(find /etc/sonic/ -iname '*_reconcile' -type f); do
file_basename=$(basename $reconcile_file)
docker_container_name=${file_basename%_reconcile}
RECONCILE_COMPONENTS[$docker_container_name]=$(cat $reconcile_file)
done
EXP_STATE="reconciled"
ASSISTANT_SCRIPT="/usr/local/bin/neighbor_advertiser"
function debug()
{
/usr/bin/logger "WARMBOOT_FINALIZER : $1"
if [[ x"${VERBOSE}" == x"yes" ]]; then
echo `date` "- $1"
fi
}
function get_component_list()
{
SVC_LIST=${!RECONCILE_COMPONENTS[@]}
COMPONENT_LIST=""
for service in ${SVC_LIST}; do
components=${RECONCILE_COMPONENTS[${service}]}
status=$(sonic-db-cli CONFIG_DB HGET "FEATURE|${service}" state)
if [[ x"${status}" == x"enabled" || x"${status}" == x"always_enabled" ]]; then
COMPONENT_LIST="${COMPONENT_LIST} ${components}"
fi
done
}
function check_warm_boot()
{
WARM_BOOT=`sonic-db-cli STATE_DB hget "WARM_RESTART_ENABLE_TABLE|system" enable`
}
function check_fast_reboot()
{
debug "Checking if fast-reboot is enabled..."
FAST_REBOOT=`sonic-db-cli STATE_DB hget "FAST_RESTART_ENABLE_TABLE|system" enable`
if [[ x"${FAST_REBOOT}" == x"true" ]]; then
debug "Fast-reboot is enabled..."
else
debug "Fast-reboot is disabled..."
fi
}
function wait_for_database_service()
{
debug "Wait for database to become ready..."
# Wait for redis server start before database clean
until [[ $(sonic-db-cli PING | grep -c PONG) -gt 0 ]]; do
sleep 1;
done
# Wait for configDB initialization
until [[ $(sonic-db-cli CONFIG_DB GET "CONFIG_DB_INITIALIZED") -eq 1 ]];
do sleep 1;
done
debug "Database is ready..."
}
function get_component_state()
{
sonic-db-cli STATE_DB hget "WARM_RESTART_TABLE|$1" state
}
function check_list()
{
RET_LIST=''
for comp in $@; do
state=`get_component_state ${comp}`
if [[ x"${state}" != x"${EXP_STATE}" ]]; then
RET_LIST="${RET_LIST} ${comp}"
fi
done
echo ${RET_LIST}
}
function finalize_warm_boot()
{
debug "Finalizing warmboot..."
sudo config warm_restart disable
}
function finalize_fast_reboot()
{
debug "Finalizing fast-reboot..."
sonic-db-cli STATE_DB hset "FAST_RESTART_ENABLE_TABLE|system" "enable" "false" &>/dev/null
sonic-db-cli CONFIG_DB DEL "WARM_RESTART|teamd" &>/dev/null
}
function stop_control_plane_assistant()
{
if [[ -x ${ASSISTANT_SCRIPT} ]]; then
debug "Tearing down control plane assistant ..."
${ASSISTANT_SCRIPT} -m reset
fi
}
function restore_counters_folder()
{
debug "Restoring counters folder after warmboot..."
cache_counters_folder="/host/counters"
if [[ -d $cache_counters_folder ]]; then
mv $cache_counters_folder /tmp/cache
chown -R admin:admin /tmp/cache
fi
}
wait_for_database_service
check_fast_reboot
check_warm_boot
if [[ x"${WARM_BOOT}" != x"true" ]]; then
debug "warmboot is not enabled ..."
if [[ x"${FAST_REBOOT}" != x"true" ]]; then
debug "fastboot is not enabled ..."
exit 0
fi
fi
if [[ (x"${WARM_BOOT}" == x"true") && (x"${FAST_REBOOT}" != x"true") ]]; then
restore_counters_folder
fi
get_component_list
debug "Waiting for components: '${COMPONENT_LIST}' to reconcile ..."
list=${COMPONENT_LIST}
# Wait up to 5 minutes
for i in `seq 60`; do
list=`check_list ${list}`
if [[ -z "${list}" ]]; then
break
fi
sleep 5
done
if [[ (x"${WARM_BOOT}" == x"true") && (x"${FAST_REBOOT}" != x"true") ]]; then
stop_control_plane_assistant
fi
if [[ -n "${list}" ]]; then
debug "Some components didn't finish reconcile: ${list} ..."
fi
if [ x"${FAST_REBOOT}" == x"true" ]; then
finalize_fast_reboot
fi
if [ x"${WARM_BOOT}" == x"true" ]; then
finalize_warm_boot
fi
# Save DB after stopped control plane assistant to avoid extra entries
debug "Save in-memory database after warm/fast reboot ..."
config save -y