From f37dd770cd28607337aaaadfc4277a8522587141 Mon Sep 17 00:00:00 2001 From: Senthil Kumar Guruswamy <75792349+sg893052@users.noreply.github.com> Date: Sat, 21 May 2022 01:55:11 +0530 Subject: [PATCH] System Ready (#10479) Why I did it At present, there is no mechanism in an event driven model to know that the system is up with all the essential sonic services and also, all the docker apps are ready along with port ready status to start the network traffic. With the asynchronous architecture of SONiC, we will not be able to verify if the config has been applied all the way down to the HW. But we can get the closest up status of each app and arrive at the system readiness. How I did it A new python based system monitor tool is introduced under system-health framework to monitor all the essential system host services including docker wrapper services on an event based model and declare the system is ready. This framework gives provision for docker apps to notify its closest up status. CLIs are provided to fetch the current system status and also service running status and its app ready status along with failure reason if any. How to verify it "show system-health sysready-status" click CLI Syslogs for system ready --- files/build_templates/init_cfg.json.j2 | 6 + .../build_templates/sonic_debian_extension.j2 | 3 + ...rvices-data.determine-reboot-cause.service | 1 + .../yang-models/sonic-feature.yang | 7 + .../health_checker/sysmonitor.py | 436 ++++++++++++++++++ src/system-health/scripts/healthd | 5 + src/system-health/tests/mock_connector.py | 9 + src/system-health/tests/test_system_health.py | 215 +++++++++ 8 files changed, 682 insertions(+) create mode 100755 src/system-health/health_checker/sysmonitor.py diff --git a/files/build_templates/init_cfg.json.j2 b/files/build_templates/init_cfg.json.j2 index b51aaf2f0c..5d44b0a64b 100644 --- a/files/build_templates/init_cfg.json.j2 +++ b/files/build_templates/init_cfg.json.j2 @@ -57,6 +57,12 @@ "has_global_scope": {% if feature + '.service' in installer_services.split(' ') %}true{% else %}false{% endif %}, "has_per_asic_scope": {% if feature + '@.service' in installer_services.split(' ') %}true{% else %}false{% endif %}, "auto_restart": "{{autorestart}}", +{# Set check_up_status to true here when app readiness will be marked in state db #} +{# For now, to support the infrastrucure, setting the check_up_status to false for bgp,swss,pmon #} +{# Once apps like bgp,synd supports app readiness, then bgp,syncd can set check_up_status to true #} +{%- if feature in ["bgp", "swss", "pmon"] %} + "check_up_status" : "false", +{%- endif %} {%- if include_kubernetes == "y" %} {%- if feature in ["lldp", "pmon", "radv", "snmp", "telemetry"] %} "set_owner": "kube", {% else %} diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index aebd6b2fcd..185184fb91 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -890,3 +890,6 @@ sudo cp $BUILD_SCRIPTS_DIR/mask_disabled_services.py $FILESYSTEM_ROOT/tmp/ sudo chmod a+x $FILESYSTEM_ROOT/tmp/mask_disabled_services.py sudo LANG=C chroot $FILESYSTEM_ROOT /tmp/mask_disabled_services.py sudo rm -rf $FILESYSTEM_ROOT/tmp/mask_disabled_services.py + + +sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install python3-dbus diff --git a/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service b/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service index f0d9e91fe9..50d79b3e76 100644 --- a/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service +++ b/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service @@ -5,6 +5,7 @@ After=rc-local.service database.service [Service] Type=simple +RemainAfterExit=yes ExecStart=/usr/local/bin/determine-reboot-cause [Install] diff --git a/src/sonic-yang-models/yang-models/sonic-feature.yang b/src/sonic-yang-models/yang-models/sonic-feature.yang index be46bef198..54133ef106 100644 --- a/src/sonic-yang-models/yang-models/sonic-feature.yang +++ b/src/sonic-yang-models/yang-models/sonic-feature.yang @@ -86,6 +86,13 @@ module sonic-feature{ type feature-owner; default "local"; } + + leaf check_up_status { + description "This configuration controls the system ready tool to check + the app ready/up status"; + type boolean; + default false; + } } } } diff --git a/src/system-health/health_checker/sysmonitor.py b/src/system-health/health_checker/sysmonitor.py new file mode 100755 index 0000000000..a4058f8c09 --- /dev/null +++ b/src/system-health/health_checker/sysmonitor.py @@ -0,0 +1,436 @@ +#!/usr/bin/python3 + +import os +import sys +import glob +import multiprocessing +from datetime import datetime +from swsscommon import swsscommon +from sonic_py_common.logger import Logger +from . import utils +from sonic_py_common.task_base import ProcessTaskBase +from .config import Config + +SYSLOG_IDENTIFIER = "system#monitor" +REDIS_TIMEOUT_MS = 0 +system_allsrv_state = "DOWN" +spl_srv_list = ['database-chassis', 'gbsyncd'] +SELECT_TIMEOUT_MSECS = 1000 +QUEUE_TIMEOUT = 15 +TASK_STOP_TIMEOUT = 10 +mpmgr = multiprocessing.Manager() +logger = Logger(log_identifier=SYSLOG_IDENTIFIER) + + +#Subprocess which subscribes to STATE_DB FEATURE table for any update +#and push service events to main process via queue +class MonitorStateDbTask(ProcessTaskBase): + + def __init__(self,myQ): + ProcessTaskBase.__init__(self) + self.task_queue = myQ + + def subscribe_statedb(self): + state_db = swsscommon.DBConnector("STATE_DB", REDIS_TIMEOUT_MS, True) + sel = swsscommon.Select() + cst = swsscommon.SubscriberStateTable(state_db, "FEATURE") + sel.addSelectable(cst) + + while not self.task_stopping_event.is_set(): + (state, c) = sel.select(SELECT_TIMEOUT_MSECS) + if state == swsscommon.Select.TIMEOUT: + continue + if state != swsscommon.Select.OBJECT: + logger.log_warning("sel.select() did not return swsscommon.Select.OBJECT") + continue + (key, op, cfvs) = cst.pop() + key_ext = key + ".service" + timestamp = "{}".format(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")) + msg={"unit": key_ext, "evt_src":"feature", "time":timestamp} + self.task_notify(msg) + + + def task_worker(self): + if self.task_stopping_event.is_set(): + return + try: + self.subscribe_statedb() + except Exception as e: + logger.log_error("subscribe_statedb exited- {}".format(str(e))) + + def task_notify(self, msg): + if self.task_stopping_event.is_set(): + return + self.task_queue.put(msg) + + +#Subprocess which subscribes to system dbus to listen for systemd events +#and push service events to main process via queue +class MonitorSystemBusTask(ProcessTaskBase): + + def __init__(self,myQ): + ProcessTaskBase.__init__(self) + self.task_queue = myQ + + def on_job_removed(self, id, job, unit, result): + if result == "done": + timestamp = "{}".format(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")) + msg = {"unit": unit, "evt_src":"sysbus", "time":timestamp} + self.task_notify(msg) + return + + #Function for listening the systemd event on dbus + def subscribe_sysbus(self): + import dbus + from gi.repository import GLib + from dbus.mainloop.glib import DBusGMainLoop + + DBusGMainLoop(set_as_default=True) + bus = dbus.SystemBus() + systemd = bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1') + manager = dbus.Interface(systemd, 'org.freedesktop.systemd1.Manager') + manager.Subscribe() + manager.connect_to_signal('JobRemoved', self.on_job_removed) + + loop = GLib.MainLoop() + loop.run() + + def task_worker(self): + if self.task_stopping_event.is_set(): + return + logger.log_info("Start Listening to systemd bus (pid {0})".format(os.getpid())) + self.subscribe_sysbus() + + def task_notify(self, msg): + if self.task_stopping_event.is_set(): + return + self.task_queue.put(msg) + +#Mainprocess which launches 2 subtasks - systembus task and statedb task +#and on receiving events, checks and updates the system ready status to state db +class Sysmonitor(ProcessTaskBase): + + def __init__(self): + ProcessTaskBase.__init__(self) + self._stop_timeout_secs = TASK_STOP_TIMEOUT + self.dnsrvs_name = set() + self.state_db = None + self.config_db = None + self.config = Config() + + #Sets system ready status to state db + def post_system_status(self, state): + try: + if not self.state_db: + self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1') + self.state_db.connect(self.state_db.STATE_DB) + + self.state_db.set(self.state_db.STATE_DB, "SYSTEM_READY|SYSTEM_STATE", "Status", state) + logger.log_info("Posting system ready status {} to statedb".format(state)) + + except Exception as e: + logger.log_error("Unable to post system ready status: {}".format(str(e))) + + #Forms the service list to be monitored + def get_all_service_list(self): + + if not self.config_db: + self.config_db = swsscommon.ConfigDBConnector() + self.config_db.connect() + + dir_list = [] + #add the services from the below targets + targets= ["/etc/systemd/system/multi-user.target.wants", "/etc/systemd/system/sonic.target.wants"] + for path in targets: + dir_list += [os.path.basename(i) for i in glob.glob('{}/*.service'.format(path))] + + #add the enabled docker services from config db feature table + feature_table = self.config_db.get_table("FEATURE") + for srv in feature_table.keys(): + if feature_table[srv]["state"] not in ["disabled", "always_disabled"]: + srvext = srv + ".service" + if srvext not in dir_list: + dir_list.append(srvext) + + self.config.load_config() + if self.config and self.config.ignore_services: + for srv in self.config.ignore_services: + if srv in dir_list: + dir_list.remove(srv) + + dir_list.sort() + return dir_list + + + #Checks FEATURE table from config db for the service' check_up_status flag + #if marked to true, then read the service up_status from FEATURE table of state db. + #else, just return Up + def get_app_ready_status(self, service): + if not self.state_db: + self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1') + self.state_db.connect(self.state_db.STATE_DB) + if not self.config_db: + self.config_db = swsscommon.ConfigDBConnector() + self.config_db.connect() + + fail_reason = "" + check_app_up_status = "" + up_status_flag = "" + configdb_feature_table = self.config_db.get_table('FEATURE') + update_time = "-" + + if service not in configdb_feature_table.keys(): + pstate = "Up" + else: + check_app_up_status = configdb_feature_table[service].get('check_up_status') + if check_app_up_status is not None and (check_app_up_status.lower()) == "true": + up_status_flag = self.state_db.get(self.state_db.STATE_DB, 'FEATURE|{}'.format(service), 'up_status') + if up_status_flag is not None and (up_status_flag.lower()) == "true": + pstate = "Up" + else: + fail_reason = self.state_db.get(self.state_db.STATE_DB, 'FEATURE|{}'.format(service), 'fail_reason') + if fail_reason is None: + fail_reason = "NA" + pstate = "Down" + + update_time = self.state_db.get(self.state_db.STATE_DB, 'FEATURE|{}'.format(service), 'update_time') + if update_time is None: + update_time = "-" + else: + #Either check_up_status marked False or entry does not exist + pstate = "Up" + + return pstate,fail_reason,update_time + + #Gets the service properties + def run_systemctl_show(self, service): + command = ('systemctl show {} --property=Id,LoadState,UnitFileState,Type,ActiveState,SubState,Result'.format(service)) + output = utils.run_command(command) + srv_properties = output.split('\n') + prop_dict = {} + for prop in srv_properties: + kv = prop.split("=", 1) + if len(kv) == 2: + prop_dict[kv[0]] = kv[1] + + return prop_dict + + #Sets the service status to state db + def post_unit_status(self, srv_name, srv_status, app_status, fail_reason, update_time): + if not self.state_db: + self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1') + self.state_db.connect(self.state_db.STATE_DB) + + key = 'ALL_SERVICE_STATUS|{}'.format(srv_name) + statusvalue = {} + statusvalue['service_status'] = srv_status + statusvalue['app_ready_status'] = app_status + statusvalue['fail_reason'] = fail_reason + statusvalue['update_time'] = update_time + self.state_db.hmset(self.state_db.STATE_DB, key, statusvalue) + + #Reads the current status of the service and posts it to state db + def get_unit_status(self, event): + """ Get a unit status""" + global spl_srv_list + unit_status = "NOT OK" + update_time = "-" + + try: + service_status = "Down" + service_up_status = "Down" + service_name,last_name = event.split('.') + + sysctl_show = self.run_systemctl_show(event) + + load_state = sysctl_show['LoadState'] + if load_state == "loaded": + status = sysctl_show['UnitFileState'] + fail_reason = sysctl_show['Result'] + active_state = sysctl_show['ActiveState'] + sub_state = sysctl_show['SubState'] + srv_type = sysctl_show['Type'] + + #Raise syslog for service state change + logger.log_info("{} service state changed to [{}/{}]".format(event, active_state, sub_state)) + + if status == "enabled" or status == "enabled-runtime" or status == "static": + if fail_reason == "success": + fail_reason = "-" + if (active_state == "active" and sub_state == "exited"): + service_status = "OK" + service_up_status = "OK" + unit_status = "OK" + elif active_state == "active" and sub_state == "running": + service_status = "OK" + init_state,app_fail_reason,update_time = self.get_app_ready_status(service_name) + if init_state == "Up": + service_up_status = "OK" + unit_status = "OK" + else: + fail_reason = app_fail_reason + unit_status = "NOT OK" + if fail_reason == "docker start": + service_up_status = "Starting" + fail_reason = "-" + elif active_state == "activating": + service_status = "Starting" + service_up_status = "Starting" + elif active_state == "deactivating": + service_status = "Stopping" + service_up_status = "Stopping" + elif active_state == "inactive": + if srv_type == "oneshot" or service_name in spl_srv_list: + service_status = "OK" + service_up_status = "OK" + unit_status = "OK" + else: + unit_status = "NOT OK" + if fail_reason == "-": + fail_reason = "Inactive" + else: + unit_status = "NOT OK" + + self.post_unit_status(service_name, service_status, service_up_status, fail_reason, update_time) + + return unit_status + + except Exception as e: + logger.log_error("Get unit status {}-{}".format(service_name, str(e))) + + + #Gets status of all the services from service list + def get_all_system_status(self): + """ Shows the system ready status""" + #global dnsrvs_name + scan_srv_list = [] + + scan_srv_list = self.get_all_service_list() + for service in scan_srv_list: + ustate = self.get_unit_status(service) + if ustate == "NOT OK": + if service not in self.dnsrvs_name: + self.dnsrvs_name.add(service) + + if len(self.dnsrvs_name) == 0: + return "UP" + else: + return "DOWN" + + #Displays the system ready status message on console + def print_console_message(self, message): + with open('/dev/console', 'w') as console: + console.write("\n{} {}\n".format(datetime.now().strftime("%b %d %H:%M:%S.%f"), message)) + + #Publish the system ready status message on logger,console and state db + def publish_system_status(self, astate): + global system_allsrv_state + if system_allsrv_state != astate: + system_allsrv_state = astate + if astate == "DOWN": + msg = "System is not ready - one or more services are not up" + elif astate == "UP": + msg = "System is ready" + logger.log_notice(msg) + self.print_console_message(msg) + self.post_system_status(astate) + + #Checks all the services and updates the current system status + def update_system_status(self): + try: + astate = self.get_all_system_status() + self.publish_system_status(astate) + + except Exception as e: + logger.log_error("update system status exception:{}".format(str(e))) + + #Checks a service status and updates the system status + def check_unit_status(self, event): + #global dnsrvs_name + if not self.state_db: + self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1') + self.state_db.connect(self.state_db.STATE_DB) + astate = "DOWN" + + full_srv_list = self.get_all_service_list() + if event in full_srv_list: + ustate = self.get_unit_status(event) + if ustate == "OK" and system_allsrv_state == "UP": + astate = "UP" + elif ustate == "OK" and system_allsrv_state == "DOWN": + if event in self.dnsrvs_name: + self.dnsrvs_name.remove(event) + if len(self.dnsrvs_name) == 0: + astate = "UP" + else: + astate = "DOWN" + else: + if event not in self.dnsrvs_name: + self.dnsrvs_name.add(event) + astate = "DOWN" + + self.publish_system_status(astate) + else: + #if received event is not in current full service list but exists in STATE_DB & set, + #then it should be removed from STATE_DB & set + if event in self.dnsrvs_name: + self.dnsrvs_name.remove(event) + + srv_name,last = event.split('.') + key = 'ALL_SERVICE_STATUS|{}'.format(srv_name) + key_exists = self.state_db.exists(self.state_db.STATE_DB, key) + if key_exists == 1: + self.state_db.delete(self.state_db.STATE_DB, key) + + return 0 + + def system_service(self): + if not self.state_db: + self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1') + self.state_db.connect(self.state_db.STATE_DB) + + myQ = mpmgr.Queue() + try: + monitor_system_bus = MonitorSystemBusTask(myQ) + monitor_system_bus.task_run() + + monitor_statedb_table = MonitorStateDbTask(myQ) + monitor_statedb_table.task_run() + + except Exception as e: + logger.log_error("SubProcess-{}".format(str(e))) + sys.exit(1) + + + self.update_system_status() + + from queue import Empty + # Queue to receive the STATEDB and Systemd state change event + while not self.task_stopping_event.is_set(): + try: + msg = myQ.get(timeout=QUEUE_TIMEOUT) + event = msg["unit"] + event_src = msg["evt_src"] + event_time = msg["time"] + logger.log_debug("Main process- received event:{} from source:{} time:{}".format(event,event_src,event_time)) + logger.log_info("check_unit_status for [ "+event+" ] ") + self.check_unit_status(event) + except Empty: + pass + except Exception as e: + logger.log_error("system_service"+str(e)) + + #cleanup tables "'ALL_SERVICE_STATUS*', 'SYSTEM_READY*'" from statedb + self.state_db.delete_all_by_pattern(self.state_db.STATE_DB, "ALL_SERVICE_STATUS|*") + self.state_db.delete_all_by_pattern(self.state_db.STATE_DB, "SYSTEM_READY|*") + + monitor_system_bus.task_stop() + monitor_statedb_table.task_stop() + + def task_worker(self): + if self.task_stopping_event.is_set(): + return + self.system_service() + + + diff --git a/src/system-health/scripts/healthd b/src/system-health/scripts/healthd index dd276df2fa..df52969d3a 100644 --- a/src/system-health/scripts/healthd +++ b/src/system-health/scripts/healthd @@ -12,6 +12,8 @@ from sonic_py_common.daemon_base import DaemonBase from swsscommon.swsscommon import SonicV2Connector from health_checker.manager import HealthCheckerManager +from health_checker.sysmonitor import Sysmonitor + SYSLOG_IDENTIFIER = 'healthd' @@ -75,6 +77,8 @@ class HealthDaemon(DaemonBase): if not manager.config.config_file_exists(): self.log_warning("System health configuration file not found, exit...") return + sysmon = Sysmonitor() + sysmon.task_run() while 1: stat = manager.check(chassis) self._process_stat(chassis, manager.config, stat) @@ -85,6 +89,7 @@ class HealthDaemon(DaemonBase): self.log_warning("sonic_platform package not installed. Cannot start system-health daemon") self.deinit() + sysmon.task_stop() def _process_stat(self, chassis, config, stat): from health_checker.health_checker import HealthChecker diff --git a/src/system-health/tests/mock_connector.py b/src/system-health/tests/mock_connector.py index d32017ff84..d602c8eaf1 100644 --- a/src/system-health/tests/mock_connector.py +++ b/src/system-health/tests/mock_connector.py @@ -22,3 +22,12 @@ class MockConnector(object): def get_all(self, db_id, key): return MockConnector.data[key] + + def set(self, db_id, key, field, value): + self.data[key] = {} + self.data[key][field] = value + + def hmset(self, db_id, key, fieldsvalues): + self.data[key] = {} + for field,value in fieldsvalues.items(): + self.data[key][field] = value diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index 14d58c0f44..76f3ceea5d 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -3,6 +3,7 @@ 1. test_user_defined_checker mocks the output of a user defined checker and verify class UserDefinedChecker 2. test_service_checker mocks the output of monit service and verify class ServiceChecker 3. test_hardware_checker mocks the hardware status data in db and verify class HardwareChecker + 4. Mocks and tests the system ready status and verify class Sysmonitor And there are class that are not covered by unit test. These class will be covered by sonic-mgmt regression test. 1. HealthDaemon 2. HealthCheckerManager @@ -30,6 +31,9 @@ from health_checker.health_checker import HealthChecker from health_checker.manager import HealthCheckerManager from health_checker.service_checker import ServiceChecker from health_checker.user_defined_checker import UserDefinedChecker +from health_checker.sysmonitor import Sysmonitor +from health_checker.sysmonitor import MonitorStateDbTask +from health_checker.sysmonitor import MonitorSystemBusTask mock_supervisorctl_output = """ snmpd RUNNING pid 67, uptime 1:03:56 @@ -505,3 +509,214 @@ def test_utils(): output = utils.run_command('ls') assert output + + +@patch('swsscommon.swsscommon.ConfigDBConnector.connect', MagicMock()) +@patch('sonic_py_common.multi_asic.is_multi_asic', MagicMock(return_value=False)) +@patch('docker.DockerClient') +@patch('health_checker.utils.run_command') +@patch('swsscommon.swsscommon.ConfigDBConnector') +def test_get_all_service_list(mock_config_db, mock_run, mock_docker_client): + mock_db_data = MagicMock() + mock_get_table = MagicMock() + mock_db_data.get_table = mock_get_table + mock_config_db.return_value = mock_db_data + mock_get_table.return_value = { + 'radv': { + 'state': 'enabled', + 'has_global_scope': 'True', + 'has_per_asic_scope': 'False', + }, + 'bgp': { + 'state': 'enabled', + 'has_global_scope': 'True', + 'has_per_asic_scope': 'False', + }, + 'pmon': { + 'state': 'disabled', + 'has_global_scope': 'True', + 'has_per_asic_scope': 'False', + } + } + sysmon = Sysmonitor() + print("mock get table:{}".format(mock_get_table.return_value)) + result = sysmon.get_all_service_list() + print("result get all service list:{}".format(result)) + assert 'radv.service' in result + assert 'pmon.service' not in result + + +@patch('swsscommon.swsscommon.ConfigDBConnector.connect', MagicMock()) +@patch('sonic_py_common.multi_asic.is_multi_asic', MagicMock(return_value=False)) +@patch('docker.DockerClient') +@patch('health_checker.utils.run_command') +@patch('swsscommon.swsscommon.ConfigDBConnector') +def test_get_app_ready_status(mock_config_db, mock_run, mock_docker_client): + mock_db_data = MagicMock() + mock_get_table = MagicMock() + mock_db_data.get_table = mock_get_table + mock_config_db.return_value = mock_db_data + mock_get_table.return_value = { + 'radv': { + 'state': 'enabled', + 'has_global_scope': 'True', + 'has_per_asic_scope': 'False', + 'check_up_status': 'True' + }, + 'bgp': { + 'state': 'enabled', + 'has_global_scope': 'True', + 'has_per_asic_scope': 'False', + 'check_up_status': 'True' + }, + 'snmp': { + 'state': 'enabled', + 'has_global_scope': 'True', + 'has_per_asic_scope': 'False', + 'check_up_status': 'False' + } + } + + MockConnector.data.update({ + 'FEATURE|radv': { + 'up_status': 'True', + 'fail_reason': '-', + 'update_time': '-' + }, + 'FEATURE|bgp': { + 'up_status': 'False', + 'fail_reason': 'some error', + 'update_time': '-' + }}) + + sysmon = Sysmonitor() + result = sysmon.get_app_ready_status('radv') + print(result) + assert 'Up' in result + result = sysmon.get_app_ready_status('bgp') + print(result) + assert 'Down' in result + result = sysmon.get_app_ready_status('snmp') + print(result) + assert 'Up' in result + + +mock_srv_props={ +'mock_radv.service':{'Type': 'simple', 'Result': 'success', 'Id': 'mock_radv.service', 'LoadState': 'loaded', 'ActiveState': 'active', 'SubState': 'running', 'UnitFileState': 'enabled'}, +'mock_bgp.service':{'Type': 'simple', 'Result': 'success', 'Id': 'mock_bgp.service', 'LoadState': 'loaded', 'ActiveState': 'inactive', 'SubState': 'dead', 'UnitFileState': 'enabled'} +} + +@patch('health_checker.sysmonitor.Sysmonitor.get_all_service_list', MagicMock(return_value=['mock_snmp.service', 'mock_bgp.service', 'mock_ns.service'])) +@patch('health_checker.sysmonitor.Sysmonitor.run_systemctl_show', MagicMock(return_value=mock_srv_props['mock_bgp.service'])) +@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value=('Down','-','-'))) +@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock()) +def test_check_unit_status(): + sysmon = Sysmonitor() + sysmon.check_unit_status('mock_bgp.service') + assert 'mock_bgp.service' in sysmon.dnsrvs_name + + + +@patch('health_checker.sysmonitor.Sysmonitor.run_systemctl_show', MagicMock(return_value=mock_srv_props['mock_radv.service'])) +@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value=('Up','-','-'))) +@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock()) +def test_get_unit_status_ok(): + sysmon = Sysmonitor() + result = sysmon.get_unit_status('mock_radv.service') + print("get_unit_status:{}".format(result)) + assert result == 'OK' + + +@patch('health_checker.sysmonitor.Sysmonitor.run_systemctl_show', MagicMock(return_value=mock_srv_props['mock_bgp.service'])) +@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value=('Up','-','-'))) +@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock()) +def test_get_unit_status_not_ok(): + sysmon = Sysmonitor() + result = sysmon.get_unit_status('mock_bgp.service') + print("get_unit_status:{}".format(result)) + assert result == 'NOT OK' + + +@patch('health_checker.sysmonitor.Sysmonitor.get_all_service_list', MagicMock(return_value=['mock_snmp.service', 'mock_ns.service'])) +@patch('health_checker.sysmonitor.Sysmonitor.get_unit_status', MagicMock(return_value= 'OK')) +@patch('health_checker.sysmonitor.Sysmonitor.publish_system_status', MagicMock()) +@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock()) +@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value='Up')) +def test_get_all_system_status_ok(): + sysmon = Sysmonitor() + result = sysmon.get_all_system_status() + print("result:{}".format(result)) + assert result == 'UP' + + +@patch('health_checker.sysmonitor.Sysmonitor.get_all_service_list', MagicMock(return_value=['mock_snmp.service', 'mock_ns.service'])) +@patch('health_checker.sysmonitor.Sysmonitor.get_unit_status', MagicMock(return_value= 'NOT OK')) +@patch('health_checker.sysmonitor.Sysmonitor.publish_system_status', MagicMock()) +@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock()) +@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value='Up')) +def test_get_all_system_status_not_ok(): + sysmon = Sysmonitor() + result = sysmon.get_all_system_status() + print("result:{}".format(result)) + assert result == 'DOWN' + +def test_post_unit_status(): + sysmon = Sysmonitor() + sysmon.post_unit_status("mock_bgp", 'OK', 'Down', 'mock reason', '-') + result = swsscommon.SonicV2Connector.get_all(MockConnector, 0, 'ALL_SERVICE_STATUS|mock_bgp') + print(result) + assert result['service_status'] == 'OK' + assert result['app_ready_status'] == 'Down' + assert result['fail_reason'] == 'mock reason' + +def test_post_system_status(): + sysmon = Sysmonitor() + sysmon.post_system_status("UP") + result = swsscommon.SonicV2Connector.get(MockConnector, 0, "SYSTEM_READY|SYSTEM_STATE", 'Status') + print("post system status result:{}".format(result)) + assert result == "UP" + +@patch('health_checker.sysmonitor.Sysmonitor.publish_system_status', MagicMock()) +@patch('health_checker.sysmonitor.Sysmonitor.post_system_status', test_post_system_status()) +@patch('health_checker.sysmonitor.Sysmonitor.print_console_message', MagicMock()) +def test_publish_system_status(): + sysmon = Sysmonitor() + sysmon.publish_system_status('UP') + result = swsscommon.SonicV2Connector.get(MockConnector, 0, "SYSTEM_READY|SYSTEM_STATE", 'Status') + assert result == "UP" + +@patch('health_checker.sysmonitor.Sysmonitor.get_all_system_status', test_get_all_system_status_ok()) +@patch('health_checker.sysmonitor.Sysmonitor.publish_system_status', test_publish_system_status()) +def test_update_system_status(): + sysmon = Sysmonitor() + sysmon.update_system_status() + result = swsscommon.SonicV2Connector.get(MockConnector, 0, "SYSTEM_READY|SYSTEM_STATE", 'Status') + assert result == "UP" + +from sonic_py_common.task_base import ProcessTaskBase +import multiprocessing +mpmgr = multiprocessing.Manager() + +myQ = mpmgr.Queue() +def test_monitor_statedb_task(): + sysmon = MonitorStateDbTask(myQ) + sysmon.SubscriberStateTable = MagicMock() + sysmon.task_run() + assert sysmon._task_process is not None + sysmon.task_stop() + +@patch('health_checker.sysmonitor.MonitorSystemBusTask.subscribe_sysbus', MagicMock()) +def test_monitor_sysbus_task(): + sysmon = MonitorSystemBusTask(myQ) + sysmon.SubscriberStateTable = MagicMock() + sysmon.task_run() + assert sysmon._task_process is not None + sysmon.task_stop() + +@patch('health_checker.sysmonitor.MonitorSystemBusTask.subscribe_sysbus', MagicMock()) +@patch('health_checker.sysmonitor.MonitorStateDbTask.subscribe_statedb', MagicMock()) +def test_system_service(): + sysmon = Sysmonitor() + sysmon.task_run() + assert sysmon._task_process is not None + sysmon.task_stop()