diff --git a/dockers/docker-config-engine-bullseye/Dockerfile.j2 b/dockers/docker-config-engine-bullseye/Dockerfile.j2 index 57631df95c..f6804de903 100644 --- a/dockers/docker-config-engine-bullseye/Dockerfile.j2 +++ b/dockers/docker-config-engine-bullseye/Dockerfile.j2 @@ -44,6 +44,7 @@ RUN pip3 install redis==4.5.4 # Copy files COPY ["files/swss_vars.j2", "/usr/share/sonic/templates/"] +COPY ["files/readiness_probe.sh", "/usr/bin/"] COPY ["files/container_startup.py", "/usr/share/sonic/scripts/"] ## Clean up diff --git a/dockers/docker-config-engine-buster/Dockerfile.j2 b/dockers/docker-config-engine-buster/Dockerfile.j2 index ccadb6e439..084956eed5 100644 --- a/dockers/docker-config-engine-buster/Dockerfile.j2 +++ b/dockers/docker-config-engine-buster/Dockerfile.j2 @@ -44,6 +44,7 @@ RUN pip3 install redis==4.5.4 # Copy files COPY ["files/swss_vars.j2", "/usr/share/sonic/templates/"] +COPY ["files/readiness_probe.sh", "/usr/bin/"] COPY ["files/container_startup.py", "/usr/share/sonic/scripts/"] ## Clean up diff --git a/rules/docker-config-engine-bullseye.mk b/rules/docker-config-engine-bullseye.mk index 808905496e..084b4b82dc 100644 --- a/rules/docker-config-engine-bullseye.mk +++ b/rules/docker-config-engine-bullseye.mk @@ -19,6 +19,7 @@ $(DOCKER_CONFIG_ENGINE_BULLSEYE)_LOAD_DOCKERS += $(DOCKER_BASE_BULLSEYE) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $(SWSS_VARS_TEMPLATE) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $(RSYSLOG_PLUGIN_CONF_J2) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $($(SONIC_CTRMGRD)_CONTAINER_SCRIPT) +$(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $($(SONIC_CTRMGRD)_HEALTH_PROBE) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $($(SONIC_CTRMGRD)_STARTUP_SCRIPT) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_DBG_DEPENDS = $($(DOCKER_BASE_BULLSEYE)_DBG_DEPENDS) \ diff --git a/rules/docker-config-engine-buster.mk b/rules/docker-config-engine-buster.mk index 474617bbbd..4d1e7e0a4b 100644 --- a/rules/docker-config-engine-buster.mk +++ b/rules/docker-config-engine-buster.mk @@ -18,6 +18,7 @@ $(DOCKER_CONFIG_ENGINE_BUSTER)_LOAD_DOCKERS += $(DOCKER_BASE_BUSTER) $(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $(SWSS_VARS_TEMPLATE) $(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $(RSYSLOG_PLUGIN_CONF_J2) $(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $($(SONIC_CTRMGRD)_CONTAINER_SCRIPT) +$(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $($(SONIC_CTRMGRD)_HEALTH_PROBE) $(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $($(SONIC_CTRMGRD)_STARTUP_SCRIPT) $(DOCKER_CONFIG_ENGINE_BUSTER)_DBG_DEPENDS = $($(DOCKER_BASE_BUSTER)_DBG_DEPENDS) \ diff --git a/rules/sonic-ctrmgrd.mk b/rules/sonic-ctrmgrd.mk index 659a2cf4ac..167d78c43c 100644 --- a/rules/sonic-ctrmgrd.mk +++ b/rules/sonic-ctrmgrd.mk @@ -20,12 +20,16 @@ $($(SONIC_CTRMGRD)_CFG_JSON)_PATH = $($(SONIC_CTRMGRD)_FILES_PATH) $(SONIC_CTRMGRD)_SERVICE = ctrmgrd.service $($(SONIC_CTRMGRD)_SERVICE)_PATH = $($(SONIC_CTRMGRD)_FILES_PATH) +$(SONIC_CTRMGRD)_HEALTH_PROBE = readiness_probe.sh +$($(SONIC_CTRMGRD)_HEALTH_PROBE)_PATH = $($(SONIC_CTRMGRD)_FILES_PATH) + SONIC_PYTHON_WHEELS += $(SONIC_CTRMGRD) $(SONIC_CTRMGRD)_FILES = $($(SONIC_CTRMGRD)_CONTAINER_SCRIPT) $(SONIC_CTRMGRD)_FILES += $($(SONIC_CTRMGRD)_STARTUP_SCRIPT) $(SONIC_CTRMGRD)_FILES += $($(SONIC_CTRMGRD)_CFG_JSON) $(SONIC_CTRMGRD)_FILES += $($(SONIC_CTRMGRD)_SERVICE) +$(SONIC_CTRMGRD)_FILES += $($(SONIC_CTRMGRD)_HEALTH_PROBE) SONIC_COPY_FILES += $($(SONIC_CTRMGRD)_FILES) diff --git a/src/sonic-ctrmgrd/ctrmgr/readiness_probe.sh b/src/sonic-ctrmgrd/ctrmgr/readiness_probe.sh new file mode 100644 index 0000000000..9e796ca038 --- /dev/null +++ b/src/sonic-ctrmgrd/ctrmgr/readiness_probe.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# This script is used by k8s to check the readiness of containers +# Check if the container is readiness or not, exit code 0 means readiness, others mean not readiness + +#### exit code contract, k8s only cares zero or not none-zero, but we want to use none-zero code to indicate different error +# 0: readiness +# 1: if the hook script is python code, the default crash exit code is 1 +# 2: supervisor start service doesn't exit normally +# other exit code: returned by post_check_script, define in the post_check_script, should not include 1,2 + +# check if the start service exists +# if the start service doesn't exist, do nothing +# if the start service exists, check if it exits normally +# if the start service doesn't exit normally, exit with code 2 +pre_check_service_name="start" +no_process_string="ERROR (no such process)" +service_status=$(supervisorctl status $pre_check_service_name) +if [[ $service_status != *"$no_process_string"* ]] && [[ $(echo $service_status |awk '{print $2}') != 'EXITED' ]]; then + exit 2 +fi + +# feature owner can add their own readiness check script +# check if the post_check_script exists +# if the post_check_script exists, run it +# if the post_check_script exits with non-zero code, exit with the code +post_check_script="/usr/bin/readiness_probe_hook" +if [ -x $post_check_script ]; then + $post_check_script + post_check_result=$? + if [ $post_check_result != 0 ]; then + exit $post_check_result + fi +fi + +exit 0