diff --git a/files/image_config/misc/docker-wait-any b/files/image_config/misc/docker-wait-any index 3988a9fbdf..6c764fb786 100755 --- a/files/image_config/misc/docker-wait-any +++ b/files/image_config/misc/docker-wait-any @@ -3,50 +3,96 @@ """ docker-wait-any This script takes one or more Docker container names as arguments, - and it will block indefinitely while all of the specified containers - are running. If any of the specified containers stop, the script will + [-s] argument is for the service which invokes this script + [-d] argument is to list the dependent services for the above service. + It will block indefinitely while all of the specified containers + are running.If any of the specified containers stop, the script will exit. + This script was created because the 'docker wait' command is lacking this functionality. It will block until ALL specified containers have stopped running. Here, we spawn multiple threads and wait on one container per thread. If any of the threads exit, the entire - application will exit. - NOTE: This script is written against docker-py version 1.6.0. Newer - versions of docker-py have a different API. -""" + application will exit, unless we are in a scenario where the following + conditions are met. + (i) the container is a dependent service + (ii) warm restart is enabled at system level or for that container OR + fast reboot is enabled system level + In this scenario, the g_thread_exit_event won't be propogated to the parent, + instead the thread will continue to do docker_client.wait again.This help's + cases where we need the dependent container to be warm-restarted without + affecting other services (eg: warm restart of teamd service) + NOTE: This script is written against docker Python package 4.1.0. Newer + versions of docker may have a different API. +""" +import argparse import sys import threading from docker import Client +import time + +from docker import APIClient +from sonic_py_common import logger, device_info + +SYSLOG_IDENTIFIER = 'docker-wait-any' + +# Global logger instance +log = logger.Logger(SYSLOG_IDENTIFIER) # Instantiate a global event to share among our threads g_thread_exit_event = threading.Event() - - -def usage(): - print("Usage: {} [ ...]".format(sys.argv[0])) - sys.exit(1) - +g_service = [] +g_dep_services = [] def wait_for_container(docker_client, container_name): - docker_client.wait(container_name) + while True: + while docker_client.inspect_container(container_name)['State']['Status'] != "running": + time.sleep(1) - print("No longer waiting on container '{}'".format(container_name)) + docker_client.wait(container_name) - # Signal the main thread to exit - g_thread_exit_event.set() + log.log_info("No longer waiting on container '{}'".format(container_name)) + # If this is a dependent service and warm restart is enabled for the system/container, + # OR if the system is going through a fast-reboot, DON'T signal main thread to exit + if (container_name in g_dep_services and + (device_info.is_warm_restart_enabled(container_name) or device_info.is_fast_reboot_enabled())): + continue + + # Signal the main thread to exit + g_thread_exit_event.set() def main(): thread_list = [] docker_client = Client(base_url='unix://var/run/docker.sock') - # Ensure we were passed at least one argument - if len(sys.argv) < 2: - usage() + parser = argparse.ArgumentParser(description='Wait for dependent docker services', + version='1.0.0', + formatter_class=argparse.RawTextHelpFormatter, + epilog=""" +Examples: + docker-wait-any -s swss -d syncd teamd +""") - container_names = sys.argv[1:] + parser.add_argument('-s','--service', nargs='+', default=None, help='name of the service') + parser.add_argument('-d','--dependent', nargs='*', default=None, help='other dependent services') + args = parser.parse_args() + + global g_service + global g_dep_services + + if args.service is not None: + g_service = args.service + if args.dependent is not None: + g_dep_services = args.dependent + + container_names = g_service + g_dep_services + + # If the service and dependents passed as args is empty, then exit + if container_names == []: + sys.exit(0) for container_name in container_names: t = threading.Thread(target=wait_for_container, args=[docker_client, container_name]) diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index 74828c740f..9053243872 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -161,7 +161,20 @@ wait() { else RUNNING=$(docker inspect -f '{{.State.Running}}' ${PEER}) fi - if [[ x"$RUNNING" == x"true" ]]; then + ALL_DEPS_RUNNING=true + for dep in ${MULTI_INST_DEPENDENT}; do + if [[ ! -z $DEV ]]; then + DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep}$DEV) + else + DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep}) + fi + if [[ x"$DEP_RUNNING" != x"true" ]]; then + ALL_DEPS_RUNNING=false + break + fi + done + + if [[ x"$RUNNING" == x"true" && x"$ALL_DEPS_RUNNING" == x"true" ]]; then break else sleep 1 @@ -170,10 +183,18 @@ wait() { # NOTE: This assumes Docker containers share the same names as their # corresponding services + for dep in ${MULTI_INST_DEPENDENT}; do + if [[ ! -z $DEV ]]; then + ALL_DEPS="$ALL_DEPS ${dep}$DEV" + else + ALL_DEPS="$ALL_DEPS ${dep}" + fi + done + if [[ ! -z $DEV ]]; then - /usr/bin/docker-wait-any ${SERVICE}$DEV ${PEER}$DEV + /usr/bin/docker-wait-any -s ${SERVICE}$DEV -d ${PEER}$DEV ${ALL_DEPS} else - /usr/bin/docker-wait-any ${SERVICE} ${PEER} + /usr/bin/docker-wait-any -s ${SERVICE} -d ${PEER} ${ALL_DEPS} fi } diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index 1393e19589..7f75ea6cdc 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -9,7 +9,7 @@ import subprocess from natsort import natsorted # TODD: Replace with swsscommon -from swsssdk import ConfigDBConnector, SonicDBConfig +from swsssdk import ConfigDBConnector, SonicDBConfig, SonicV2Connector USR_SHARE_SONIC_PATH = "/usr/share/sonic" HOST_DEVICE_PATH = USR_SHARE_SONIC_PATH + "/device" @@ -363,3 +363,38 @@ def get_system_routing_stack(): raise OSError("Cannot detect routing stack") return result + +# Check if System warm reboot or Container warm restart is enabled. +def is_warm_restart_enabled(container_name): + state_db = SonicV2Connector(host='127.0.0.1') + state_db.connect(state_db.STATE_DB, False) + + TABLE_NAME_SEPARATOR = '|' + prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR + + # Get the system warm reboot enable state + _hash = '{}{}'.format(prefix, 'system') + wr_system_state = state_db.get(state_db.STATE_DB, _hash, "enable") + wr_enable_state = True if wr_system_state == "true" else False + + # Get the container warm reboot enable state + _hash = '{}{}'.format(prefix, container_name) + wr_container_state = state_db.get(state_db.STATE_DB, _hash, "enable") + wr_enable_state |= True if wr_container_state == "true" else False + + state_db.close(state_db.STATE_DB) + return wr_enable_state + +# Check if System fast reboot is enabled. +def is_fast_reboot_enabled(): + fb_system_state = 0 + cmd = 'sonic-db-cli STATE_DB get "FAST_REBOOT|system"' + proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + (stdout, stderr) = proc.communicate() + + if proc.returncode != 0: + log.log_error("Error running command '{}'".format(cmd)) + elif stdout: + fb_system_state = stdout.rstrip('\n') + + return fb_system_state