[multi-asic] Enhancing monit process checker for multi-asic. (#6100)

Added Support of process checker for work on multi-asic platforms.
This commit is contained in:
abdosi 2020-12-04 10:39:43 -08:00 committed by Abhishek Dosi
parent bf0ce16ebd
commit 3a24e7f31f
2 changed files with 50 additions and 11 deletions

View File

@ -1,9 +1,11 @@
#!/usr/bin/python #!/usr/bin/python
import argparse import argparse
import ast
import sys import sys
import syslog import syslog
import psutil import psutil
from sonic_py_common import multi_asic
import swsssdk import swsssdk
@ -26,20 +28,55 @@ def check_process_existence(container_name, process_cmdline):
# We leveraged the psutil library to help us check whether the process is running or not. # We leveraged the psutil library to help us check whether the process is running or not.
# If the process entity is found in process tree and it is also in the 'running' or 'sleeping' # If the process entity is found in process tree and it is also in the 'running' or 'sleeping'
# state, then it will be marked as 'running'. # state, then it will be marked as 'running'.
is_running = False
for process in psutil.process_iter(["cmdline", "status"]): # For given feature we get the host and network namespace instances it's processes should be running
# based on it's scope and add it to expected set.
# From psutil we get number of running instances of the processes and add it to the the actual set
# Difference bwetween expected and actual set provides instances where the processes are not running
# and will be logged as syslog message by monit
process_namespace_expected_set = set()
process_namespace_found_set = set()
has_global_scope = ast.literal_eval(feature_table[container_name].get('has_global_scope', 'True'))
has_per_asic_scope = ast.literal_eval(feature_table[container_name].get('has_per_asic_scope', 'False'))
if has_global_scope:
process_namespace_expected_set.add(multi_asic.DEFAULT_NAMESPACE)
if has_per_asic_scope:
process_namespace_expected_set.update(multi_asic.get_namespace_list())
for process in psutil.process_iter(["cmdline", "status", "pid"]):
try: try:
if ((' '.join(process.cmdline())).startswith(process_cmdline) and process.status() in ["running", "sleeping"]): if ((' '.join(process.cmdline())).startswith(process_cmdline) and process.status() in ["running", "sleeping"]):
is_running = True process_namespace_found_set.add(multi_asic.get_current_namespace(process.info['pid']))
break
except psutil.NoSuchProcess: except psutil.NoSuchProcess:
pass pass
if not is_running: process_namespace_diff_set = process_namespace_expected_set.difference(process_namespace_found_set)
# If this script is run by Monit, then the following output will be appended to
# Monit's syslog message. if process_namespace_diff_set:
print("'{}' is not running.".format(process_cmdline)) host_display_str = ""
sys.exit(1) namespace_display_str = ""
for ns in process_namespace_diff_set:
if ns == multi_asic.DEFAULT_NAMESPACE:
host_display_str = " in host"
else:
if not namespace_display_str:
namespace_display_str = " in namespace " + ns
else:
namespace_display_str += ", " + ns
join_str = " and" if host_display_str and namespace_display_str else ""
# If this script is run by Monit, then the following output will be appended to
# Monit's syslog message.
print("'{}' is not running{}{}{}".format(process_cmdline, host_display_str, join_str, namespace_display_str))
sys.exit(1)
else: else:
syslog.syslog(syslog.LOG_ERR, "container '{}' is not included in SONiC image or the given container name is invalid!" syslog.syslog(syslog.LOG_ERR, "container '{}' is not included in SONiC image or the given container name is invalid!"
.format(container_name)) .format(container_name))

View File

@ -166,14 +166,14 @@ def get_asic_device_id(asic_id):
return None return None
def get_current_namespace(): def get_current_namespace(pid=None):
""" """
This API returns the network namespace in which it is This API returns the network namespace in which it is
invoked. In case of global namepace the API returns None invoked. In case of global namepace the API returns None
""" """
net_namespace = None net_namespace = None
command = ["/bin/ip netns identify", str(os.getpid())] command = ["sudo /bin/ip netns identify {}".format(os.getpid() if not pid else pid)]
proc = subprocess.Popen(command, proc = subprocess.Popen(command,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
shell=True, shell=True,
@ -186,6 +186,8 @@ def get_current_namespace():
) )
if stdout.rstrip('\n') != "": if stdout.rstrip('\n') != "":
net_namespace = stdout.rstrip('\n') net_namespace = stdout.rstrip('\n')
else:
net_namespace = DEFAULT_NAMESPACE
except OSError as e: except OSError as e:
raise OSError("Error running command {}".format(command)) raise OSError("Error running command {}".format(command))