117 lines
4.5 KiB
Plaintext
117 lines
4.5 KiB
Plaintext
|
#!/usr/bin/env python3
|
||
|
|
||
|
"""
|
||
|
container_checker
|
||
|
|
||
|
This script is intended to be run by Monit. It will write an alerting message into
|
||
|
syslog if it found containers which were expected to run but were not running. At
|
||
|
the same time, if some containers were unexpected to run, it also writes an alerting
|
||
|
syslog message. Note that if print(...) statement in this script was executed, the
|
||
|
string in it will be appended to Monit syslog messages.
|
||
|
|
||
|
The following is an example in Monit configuration file to show how Monit will run
|
||
|
this script:
|
||
|
|
||
|
check program container_checker with path "/usr/bin/container_checker"
|
||
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||
|
"""
|
||
|
|
||
|
import subprocess
|
||
|
import sys
|
||
|
|
||
|
import swsssdk
|
||
|
from sonic_py_common import multi_asic
|
||
|
|
||
|
|
||
|
def get_command_result(command):
|
||
|
"""
|
||
|
@summary: This function will execute the command and return the resulting output.
|
||
|
@return: A string which contains the output of command.
|
||
|
"""
|
||
|
command_stdout = ""
|
||
|
|
||
|
try:
|
||
|
proc_instance = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||
|
shell=True, universal_newlines=True)
|
||
|
command_stdout, command_stderr = proc_instance.communicate()
|
||
|
if proc_instance.returncode != 0:
|
||
|
print("Failed to execute the command '{}'. Return code: '{}'".format(
|
||
|
command, proc_instance.returncode))
|
||
|
sys.exit(1)
|
||
|
except (OSError, ValueError) as err:
|
||
|
print("Failed to execute the command '{}'. Error: '{}'".format(command, err))
|
||
|
sys.exit(2)
|
||
|
|
||
|
return command_stdout.rstrip().split("\n")
|
||
|
|
||
|
|
||
|
def get_expected_running_containers():
|
||
|
"""
|
||
|
@summary: This function will get the expected running containers by following the rule:
|
||
|
The 'state' field of container in 'FEATURE' table should not be 'disabled'. Then
|
||
|
if the device has Multi-ASIC, this function will get container list by determining the
|
||
|
value of field 'has_global_scope', the number of ASICs and the value of field
|
||
|
'has_per_asic_scope'. If the device has single ASIC, the container name was put into
|
||
|
the list.
|
||
|
@return: A set which contains the expected running containers.
|
||
|
"""
|
||
|
config_db = swsssdk.ConfigDBConnector()
|
||
|
config_db.connect()
|
||
|
feature_table = config_db.get_table("FEATURE")
|
||
|
|
||
|
expected_running_containers = set()
|
||
|
|
||
|
for container_name in feature_table.keys():
|
||
|
if feature_table[container_name]["state"] != "disabled":
|
||
|
if multi_asic.is_multi_asic():
|
||
|
if feature_table[container_name]["has_global_scope"] == "True":
|
||
|
expected_running_containers.add(container_name)
|
||
|
if feature_table[container_name]["has_per_asic_scope"] == "True":
|
||
|
num_asics = multi_asic.get_num_asics()
|
||
|
for asic_id in range(num_asics):
|
||
|
expected_running_containers.add(container_name + str(asic_id))
|
||
|
else:
|
||
|
expected_running_containers.add(container_name)
|
||
|
|
||
|
return expected_running_containers
|
||
|
|
||
|
|
||
|
def get_current_running_containers():
|
||
|
"""
|
||
|
@summary: This function will get the current running container list by analyzing the
|
||
|
output of command `docker ps`.
|
||
|
@return: A set which contains the current running contianers.
|
||
|
"""
|
||
|
running_containers = set()
|
||
|
|
||
|
command = "docker ps"
|
||
|
command_stdout = get_command_result(command)
|
||
|
for line in command_stdout[1:]:
|
||
|
running_containers.add(line.split()[-1].strip())
|
||
|
|
||
|
return running_containers
|
||
|
|
||
|
|
||
|
def main():
|
||
|
"""
|
||
|
@summary: This function will compare the difference between the current running containers
|
||
|
and the containers which were expected to run. If containers which were exepcted
|
||
|
to run were not running, then an alerting message will be written into syslog.
|
||
|
"""
|
||
|
expected_running_containers = get_expected_running_containers()
|
||
|
current_running_containers = get_current_running_containers()
|
||
|
|
||
|
not_running_containers = expected_running_containers.difference(current_running_containers)
|
||
|
if not_running_containers:
|
||
|
print("Expected containers not running: " + ", ".join(not_running_containers))
|
||
|
sys.exit(3)
|
||
|
|
||
|
unexpected_running_containers = current_running_containers.difference(expected_running_containers)
|
||
|
if unexpected_running_containers:
|
||
|
print("Unexpected running containers: " + ", ".join(unexpected_running_containers))
|
||
|
sys.exit(4)
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|