[container_checker] Use Feature table to get running containers (#7474)

Why I did it
Finding running containers through "docker ps" breaks when kubernetes deploys container, as the names are mangled.

How I did it
The data is is available from FEATURE table, which takes care of kubernetes deployment too.

How to verify it
Deploy a feature via kubernetes and don't expect error from container_check.
This commit is contained in:
Renuka Manavalan 2021-05-07 08:42:15 -07:00 committed by GitHub
parent 3dddbf22fa
commit 7a575b3d00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -16,50 +16,31 @@ check program container_checker with path "/usr/bin/container_checker"
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
"""
import subprocess
import docker
import sys
import swsssdk
from sonic_py_common import multi_asic
def get_command_result(command):
"""
@summary: This function will execute the command and return the resulting output.
@return: A string which contains the output of command.
"""
command_stdout = ""
try:
proc_instance = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True, universal_newlines=True)
command_stdout, command_stderr = proc_instance.communicate()
if proc_instance.returncode != 0:
print("Failed to execute the command '{}'. Return code: '{}'".format(
command, proc_instance.returncode))
sys.exit(1)
except (OSError, ValueError) as err:
print("Failed to execute the command '{}'. Error: '{}'".format(command, err))
sys.exit(2)
return command_stdout.rstrip().split("\n")
from swsscommon import swsscommon
def get_expected_running_containers():
"""
@summary: This function will get the expected running containers by following the rule:
@summary: This function will get the expected running & always-enabled containers by following the rule:
The 'state' field of container in 'FEATURE' table should not be 'disabled' or 'always_disabled'.
If the device has Multi-ASIC, this function will get container list by determining the
value of field 'has_global_scope', the number of ASICs and the value of field
'has_per_asic_scope'.
If the device has single ASIC, the container name was put into the list.
@return: A set which contains the expected running containers.
@return: A set which contains the expected running containers and a set that has
containers marked as "always_enabled".
"""
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
feature_table = config_db.get_table("FEATURE")
expected_running_containers = set()
always_running_containers = set()
for container_name in feature_table.keys():
if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]:
@ -70,37 +51,95 @@ def get_expected_running_containers():
num_asics = multi_asic.get_num_asics()
for asic_id in range(num_asics):
expected_running_containers.add(container_name + str(asic_id))
elif feature_table[container_name]["state"] == 'always_enabled':
always_running_containers.add(container_name)
else:
expected_running_containers.add(container_name)
return expected_running_containers
return expected_running_containers, always_running_containers
def get_current_running_containers():
def get_current_running_from_DB(always_running_containers):
"""
@summary: This function will get the current running container list by analyzing the
output of command `docker ps`.
@return: A set which contains the current running contianers.
@summary: This function will get the current running container list
from FEATURE table @ STATE_DB, if this table is available.
@return: a tuple
First: Return value indicating if info can be obtained from
DB or not.
Second: A set which contains the current running containers,
if this info is available in DB.
"""
running_containers = set()
command = "docker ps"
command_stdout = get_command_result(command)
for line in command_stdout[1:]:
running_containers.add(line.split()[-1].strip())
state_db = swsscommon.DBConnector("STATE_DB", 0)
tbl = swsscommon.Table(state_db, "FEATURE")
if not tbl.getKeys():
return False, None
for name in tbl.getKeys():
data = dict(tbl.get(name)[1])
if data.get('container_id'):
running_containers.add(name)
DOCKER_CLIENT = docker.DockerClient(base_url='unix://var/run/docker.sock')
RUNNING = 'running'
for name in always_running_containers:
try:
container = DOCKER_CLIENT.containers.get(name)
container_state = container.attrs.get('State', {})
if container_state.get('Status', "") == RUNNING:
running_containers.add(name)
except (docker.errors.NotFound, docker.errors.APIError) as err:
print("Failed to get container '{}'. Error: '{}'".format(name, err))
pass
return True, running_containers
def get_current_running_from_dockers():
"""
@summary: This function will get all running containers from
the list of docker containers in running state.
@return: A set which contains containers that are
in running state.
"""
DOCKER_CLIENT = docker.DockerClient(base_url='unix://var/run/docker.sock')
running_containers = set()
ctrs = DOCKER_CLIENT.containers
try:
lst = ctrs.list(filters={"status": "running"})
for ctr in lst:
running_containers.add(ctr.name)
except docker.errors.APIError as err:
print("Failed to retrieve the running container list. Error: '{}'".format(err))
pass
return running_containers
def get_current_running_containers(always_running_containers):
"""
@summary: This function will get the list of currently running containers.
If available in STATE-DB, get from DB else from list of dockers.
@return: A set of currently running containers.
"""
ret, current_running_containers = get_current_running_from_DB(always_running_containers)
if not ret:
current_running_containers = get_current_running_from_dockers()
return current_running_containers
def main():
"""
@summary: This function will compare the difference between the current running containers
and the containers which were expected to run. If containers which were exepcted
to run were not running, then an alerting message will be written into syslog.
"""
expected_running_containers = get_expected_running_containers()
current_running_containers = get_current_running_containers()
expected_running_containers, always_running_containers = get_expected_running_containers()
current_running_containers = get_current_running_containers(always_running_containers)
expected_running_containers |= always_running_containers
not_running_containers = expected_running_containers.difference(current_running_containers)
if not_running_containers:
print("Expected containers not running: " + ", ".join(not_running_containers))
@ -114,3 +153,4 @@ def main():
if __name__ == "__main__":
main()
sys.exit(0)