[container_checker] Use Feature table to get running containers (#7474)
Why I did it Finding running containers through "docker ps" breaks when kubernetes deploys container, as the names are mangled. How I did it The data is is available from FEATURE table, which takes care of kubernetes deployment too. How to verify it Deploy a feature via kubernetes and don't expect error from container_check.
This commit is contained in:
parent
3dddbf22fa
commit
7a575b3d00
@ -16,50 +16,31 @@ check program container_checker with path "/usr/bin/container_checker"
|
||||
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import docker
|
||||
import sys
|
||||
|
||||
import swsssdk
|
||||
from sonic_py_common import multi_asic
|
||||
|
||||
|
||||
def get_command_result(command):
|
||||
"""
|
||||
@summary: This function will execute the command and return the resulting output.
|
||||
@return: A string which contains the output of command.
|
||||
"""
|
||||
command_stdout = ""
|
||||
|
||||
try:
|
||||
proc_instance = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
shell=True, universal_newlines=True)
|
||||
command_stdout, command_stderr = proc_instance.communicate()
|
||||
if proc_instance.returncode != 0:
|
||||
print("Failed to execute the command '{}'. Return code: '{}'".format(
|
||||
command, proc_instance.returncode))
|
||||
sys.exit(1)
|
||||
except (OSError, ValueError) as err:
|
||||
print("Failed to execute the command '{}'. Error: '{}'".format(command, err))
|
||||
sys.exit(2)
|
||||
|
||||
return command_stdout.rstrip().split("\n")
|
||||
from swsscommon import swsscommon
|
||||
|
||||
|
||||
def get_expected_running_containers():
|
||||
"""
|
||||
@summary: This function will get the expected running containers by following the rule:
|
||||
@summary: This function will get the expected running & always-enabled containers by following the rule:
|
||||
The 'state' field of container in 'FEATURE' table should not be 'disabled' or 'always_disabled'.
|
||||
If the device has Multi-ASIC, this function will get container list by determining the
|
||||
value of field 'has_global_scope', the number of ASICs and the value of field
|
||||
'has_per_asic_scope'.
|
||||
If the device has single ASIC, the container name was put into the list.
|
||||
@return: A set which contains the expected running containers.
|
||||
@return: A set which contains the expected running containers and a set that has
|
||||
containers marked as "always_enabled".
|
||||
"""
|
||||
config_db = swsssdk.ConfigDBConnector()
|
||||
config_db.connect()
|
||||
feature_table = config_db.get_table("FEATURE")
|
||||
|
||||
expected_running_containers = set()
|
||||
always_running_containers = set()
|
||||
|
||||
for container_name in feature_table.keys():
|
||||
if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]:
|
||||
@ -70,37 +51,95 @@ def get_expected_running_containers():
|
||||
num_asics = multi_asic.get_num_asics()
|
||||
for asic_id in range(num_asics):
|
||||
expected_running_containers.add(container_name + str(asic_id))
|
||||
elif feature_table[container_name]["state"] == 'always_enabled':
|
||||
always_running_containers.add(container_name)
|
||||
else:
|
||||
expected_running_containers.add(container_name)
|
||||
|
||||
return expected_running_containers
|
||||
return expected_running_containers, always_running_containers
|
||||
|
||||
|
||||
def get_current_running_containers():
|
||||
def get_current_running_from_DB(always_running_containers):
|
||||
"""
|
||||
@summary: This function will get the current running container list by analyzing the
|
||||
output of command `docker ps`.
|
||||
@return: A set which contains the current running contianers.
|
||||
@summary: This function will get the current running container list
|
||||
from FEATURE table @ STATE_DB, if this table is available.
|
||||
@return: a tuple
|
||||
First: Return value indicating if info can be obtained from
|
||||
DB or not.
|
||||
Second: A set which contains the current running containers,
|
||||
if this info is available in DB.
|
||||
"""
|
||||
running_containers = set()
|
||||
|
||||
command = "docker ps"
|
||||
command_stdout = get_command_result(command)
|
||||
for line in command_stdout[1:]:
|
||||
running_containers.add(line.split()[-1].strip())
|
||||
state_db = swsscommon.DBConnector("STATE_DB", 0)
|
||||
tbl = swsscommon.Table(state_db, "FEATURE")
|
||||
if not tbl.getKeys():
|
||||
return False, None
|
||||
|
||||
for name in tbl.getKeys():
|
||||
data = dict(tbl.get(name)[1])
|
||||
if data.get('container_id'):
|
||||
running_containers.add(name)
|
||||
|
||||
DOCKER_CLIENT = docker.DockerClient(base_url='unix://var/run/docker.sock')
|
||||
RUNNING = 'running'
|
||||
for name in always_running_containers:
|
||||
try:
|
||||
container = DOCKER_CLIENT.containers.get(name)
|
||||
container_state = container.attrs.get('State', {})
|
||||
if container_state.get('Status', "") == RUNNING:
|
||||
running_containers.add(name)
|
||||
except (docker.errors.NotFound, docker.errors.APIError) as err:
|
||||
print("Failed to get container '{}'. Error: '{}'".format(name, err))
|
||||
pass
|
||||
|
||||
return True, running_containers
|
||||
|
||||
|
||||
def get_current_running_from_dockers():
|
||||
"""
|
||||
@summary: This function will get all running containers from
|
||||
the list of docker containers in running state.
|
||||
@return: A set which contains containers that are
|
||||
in running state.
|
||||
"""
|
||||
DOCKER_CLIENT = docker.DockerClient(base_url='unix://var/run/docker.sock')
|
||||
running_containers = set()
|
||||
ctrs = DOCKER_CLIENT.containers
|
||||
try:
|
||||
lst = ctrs.list(filters={"status": "running"})
|
||||
for ctr in lst:
|
||||
running_containers.add(ctr.name)
|
||||
except docker.errors.APIError as err:
|
||||
print("Failed to retrieve the running container list. Error: '{}'".format(err))
|
||||
pass
|
||||
return running_containers
|
||||
|
||||
|
||||
def get_current_running_containers(always_running_containers):
|
||||
"""
|
||||
@summary: This function will get the list of currently running containers.
|
||||
If available in STATE-DB, get from DB else from list of dockers.
|
||||
|
||||
@return: A set of currently running containers.
|
||||
"""
|
||||
|
||||
ret, current_running_containers = get_current_running_from_DB(always_running_containers)
|
||||
if not ret:
|
||||
current_running_containers = get_current_running_from_dockers()
|
||||
return current_running_containers
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
@summary: This function will compare the difference between the current running containers
|
||||
and the containers which were expected to run. If containers which were exepcted
|
||||
to run were not running, then an alerting message will be written into syslog.
|
||||
"""
|
||||
expected_running_containers = get_expected_running_containers()
|
||||
current_running_containers = get_current_running_containers()
|
||||
expected_running_containers, always_running_containers = get_expected_running_containers()
|
||||
current_running_containers = get_current_running_containers(always_running_containers)
|
||||
|
||||
expected_running_containers |= always_running_containers
|
||||
not_running_containers = expected_running_containers.difference(current_running_containers)
|
||||
if not_running_containers:
|
||||
print("Expected containers not running: " + ", ".join(not_running_containers))
|
||||
@ -114,3 +153,4 @@ def main():
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
sys.exit(0)
|
||||
|
Reference in New Issue
Block a user