System Ready (#10479)

Why I did it
At present, there is no mechanism in an event driven model to know that the system is up with all the essential sonic services and also, all the docker apps are ready along with port ready status to start the network traffic. With the asynchronous architecture of SONiC, we will not be able to verify if the config has been applied all the way down to the HW. But we can get the closest up status of each app and arrive at the system readiness.

How I did it
A new python based system monitor tool is introduced under system-health framework to monitor all the essential system host services including docker wrapper services on an event based model and declare the system is ready. This framework gives provision for docker apps to notify its closest up status. CLIs are provided to fetch the current system status and also service running status and its app ready status along with failure reason if any.

How to verify it
"show system-health sysready-status" click CLI
Syslogs for system ready
This commit is contained in:
Senthil Kumar Guruswamy 2022-05-21 01:55:11 +05:30 committed by GitHub
parent f6927606b3
commit f37dd770cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 682 additions and 0 deletions

View File

@ -57,6 +57,12 @@
"has_global_scope": {% if feature + '.service' in installer_services.split(' ') %}true{% else %}false{% endif %}, "has_global_scope": {% if feature + '.service' in installer_services.split(' ') %}true{% else %}false{% endif %},
"has_per_asic_scope": {% if feature + '@.service' in installer_services.split(' ') %}true{% else %}false{% endif %}, "has_per_asic_scope": {% if feature + '@.service' in installer_services.split(' ') %}true{% else %}false{% endif %},
"auto_restart": "{{autorestart}}", "auto_restart": "{{autorestart}}",
{# Set check_up_status to true here when app readiness will be marked in state db #}
{# For now, to support the infrastrucure, setting the check_up_status to false for bgp,swss,pmon #}
{# Once apps like bgp,synd supports app readiness, then bgp,syncd can set check_up_status to true #}
{%- if feature in ["bgp", "swss", "pmon"] %}
"check_up_status" : "false",
{%- endif %}
{%- if include_kubernetes == "y" %} {%- if include_kubernetes == "y" %}
{%- if feature in ["lldp", "pmon", "radv", "snmp", "telemetry"] %} {%- if feature in ["lldp", "pmon", "radv", "snmp", "telemetry"] %}
"set_owner": "kube", {% else %} "set_owner": "kube", {% else %}

View File

@ -890,3 +890,6 @@ sudo cp $BUILD_SCRIPTS_DIR/mask_disabled_services.py $FILESYSTEM_ROOT/tmp/
sudo chmod a+x $FILESYSTEM_ROOT/tmp/mask_disabled_services.py sudo chmod a+x $FILESYSTEM_ROOT/tmp/mask_disabled_services.py
sudo LANG=C chroot $FILESYSTEM_ROOT /tmp/mask_disabled_services.py sudo LANG=C chroot $FILESYSTEM_ROOT /tmp/mask_disabled_services.py
sudo rm -rf $FILESYSTEM_ROOT/tmp/mask_disabled_services.py sudo rm -rf $FILESYSTEM_ROOT/tmp/mask_disabled_services.py
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install python3-dbus

View File

@ -5,6 +5,7 @@ After=rc-local.service database.service
[Service] [Service]
Type=simple Type=simple
RemainAfterExit=yes
ExecStart=/usr/local/bin/determine-reboot-cause ExecStart=/usr/local/bin/determine-reboot-cause
[Install] [Install]

View File

@ -86,6 +86,13 @@ module sonic-feature{
type feature-owner; type feature-owner;
default "local"; default "local";
} }
leaf check_up_status {
description "This configuration controls the system ready tool to check
the app ready/up status";
type boolean;
default false;
}
} }
} }
} }

View File

@ -0,0 +1,436 @@
#!/usr/bin/python3
import os
import sys
import glob
import multiprocessing
from datetime import datetime
from swsscommon import swsscommon
from sonic_py_common.logger import Logger
from . import utils
from sonic_py_common.task_base import ProcessTaskBase
from .config import Config
SYSLOG_IDENTIFIER = "system#monitor"
REDIS_TIMEOUT_MS = 0
system_allsrv_state = "DOWN"
spl_srv_list = ['database-chassis', 'gbsyncd']
SELECT_TIMEOUT_MSECS = 1000
QUEUE_TIMEOUT = 15
TASK_STOP_TIMEOUT = 10
mpmgr = multiprocessing.Manager()
logger = Logger(log_identifier=SYSLOG_IDENTIFIER)
#Subprocess which subscribes to STATE_DB FEATURE table for any update
#and push service events to main process via queue
class MonitorStateDbTask(ProcessTaskBase):
def __init__(self,myQ):
ProcessTaskBase.__init__(self)
self.task_queue = myQ
def subscribe_statedb(self):
state_db = swsscommon.DBConnector("STATE_DB", REDIS_TIMEOUT_MS, True)
sel = swsscommon.Select()
cst = swsscommon.SubscriberStateTable(state_db, "FEATURE")
sel.addSelectable(cst)
while not self.task_stopping_event.is_set():
(state, c) = sel.select(SELECT_TIMEOUT_MSECS)
if state == swsscommon.Select.TIMEOUT:
continue
if state != swsscommon.Select.OBJECT:
logger.log_warning("sel.select() did not return swsscommon.Select.OBJECT")
continue
(key, op, cfvs) = cst.pop()
key_ext = key + ".service"
timestamp = "{}".format(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
msg={"unit": key_ext, "evt_src":"feature", "time":timestamp}
self.task_notify(msg)
def task_worker(self):
if self.task_stopping_event.is_set():
return
try:
self.subscribe_statedb()
except Exception as e:
logger.log_error("subscribe_statedb exited- {}".format(str(e)))
def task_notify(self, msg):
if self.task_stopping_event.is_set():
return
self.task_queue.put(msg)
#Subprocess which subscribes to system dbus to listen for systemd events
#and push service events to main process via queue
class MonitorSystemBusTask(ProcessTaskBase):
def __init__(self,myQ):
ProcessTaskBase.__init__(self)
self.task_queue = myQ
def on_job_removed(self, id, job, unit, result):
if result == "done":
timestamp = "{}".format(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
msg = {"unit": unit, "evt_src":"sysbus", "time":timestamp}
self.task_notify(msg)
return
#Function for listening the systemd event on dbus
def subscribe_sysbus(self):
import dbus
from gi.repository import GLib
from dbus.mainloop.glib import DBusGMainLoop
DBusGMainLoop(set_as_default=True)
bus = dbus.SystemBus()
systemd = bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1')
manager = dbus.Interface(systemd, 'org.freedesktop.systemd1.Manager')
manager.Subscribe()
manager.connect_to_signal('JobRemoved', self.on_job_removed)
loop = GLib.MainLoop()
loop.run()
def task_worker(self):
if self.task_stopping_event.is_set():
return
logger.log_info("Start Listening to systemd bus (pid {0})".format(os.getpid()))
self.subscribe_sysbus()
def task_notify(self, msg):
if self.task_stopping_event.is_set():
return
self.task_queue.put(msg)
#Mainprocess which launches 2 subtasks - systembus task and statedb task
#and on receiving events, checks and updates the system ready status to state db
class Sysmonitor(ProcessTaskBase):
def __init__(self):
ProcessTaskBase.__init__(self)
self._stop_timeout_secs = TASK_STOP_TIMEOUT
self.dnsrvs_name = set()
self.state_db = None
self.config_db = None
self.config = Config()
#Sets system ready status to state db
def post_system_status(self, state):
try:
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db.connect(self.state_db.STATE_DB)
self.state_db.set(self.state_db.STATE_DB, "SYSTEM_READY|SYSTEM_STATE", "Status", state)
logger.log_info("Posting system ready status {} to statedb".format(state))
except Exception as e:
logger.log_error("Unable to post system ready status: {}".format(str(e)))
#Forms the service list to be monitored
def get_all_service_list(self):
if not self.config_db:
self.config_db = swsscommon.ConfigDBConnector()
self.config_db.connect()
dir_list = []
#add the services from the below targets
targets= ["/etc/systemd/system/multi-user.target.wants", "/etc/systemd/system/sonic.target.wants"]
for path in targets:
dir_list += [os.path.basename(i) for i in glob.glob('{}/*.service'.format(path))]
#add the enabled docker services from config db feature table
feature_table = self.config_db.get_table("FEATURE")
for srv in feature_table.keys():
if feature_table[srv]["state"] not in ["disabled", "always_disabled"]:
srvext = srv + ".service"
if srvext not in dir_list:
dir_list.append(srvext)
self.config.load_config()
if self.config and self.config.ignore_services:
for srv in self.config.ignore_services:
if srv in dir_list:
dir_list.remove(srv)
dir_list.sort()
return dir_list
#Checks FEATURE table from config db for the service' check_up_status flag
#if marked to true, then read the service up_status from FEATURE table of state db.
#else, just return Up
def get_app_ready_status(self, service):
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db.connect(self.state_db.STATE_DB)
if not self.config_db:
self.config_db = swsscommon.ConfigDBConnector()
self.config_db.connect()
fail_reason = ""
check_app_up_status = ""
up_status_flag = ""
configdb_feature_table = self.config_db.get_table('FEATURE')
update_time = "-"
if service not in configdb_feature_table.keys():
pstate = "Up"
else:
check_app_up_status = configdb_feature_table[service].get('check_up_status')
if check_app_up_status is not None and (check_app_up_status.lower()) == "true":
up_status_flag = self.state_db.get(self.state_db.STATE_DB, 'FEATURE|{}'.format(service), 'up_status')
if up_status_flag is not None and (up_status_flag.lower()) == "true":
pstate = "Up"
else:
fail_reason = self.state_db.get(self.state_db.STATE_DB, 'FEATURE|{}'.format(service), 'fail_reason')
if fail_reason is None:
fail_reason = "NA"
pstate = "Down"
update_time = self.state_db.get(self.state_db.STATE_DB, 'FEATURE|{}'.format(service), 'update_time')
if update_time is None:
update_time = "-"
else:
#Either check_up_status marked False or entry does not exist
pstate = "Up"
return pstate,fail_reason,update_time
#Gets the service properties
def run_systemctl_show(self, service):
command = ('systemctl show {} --property=Id,LoadState,UnitFileState,Type,ActiveState,SubState,Result'.format(service))
output = utils.run_command(command)
srv_properties = output.split('\n')
prop_dict = {}
for prop in srv_properties:
kv = prop.split("=", 1)
if len(kv) == 2:
prop_dict[kv[0]] = kv[1]
return prop_dict
#Sets the service status to state db
def post_unit_status(self, srv_name, srv_status, app_status, fail_reason, update_time):
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db.connect(self.state_db.STATE_DB)
key = 'ALL_SERVICE_STATUS|{}'.format(srv_name)
statusvalue = {}
statusvalue['service_status'] = srv_status
statusvalue['app_ready_status'] = app_status
statusvalue['fail_reason'] = fail_reason
statusvalue['update_time'] = update_time
self.state_db.hmset(self.state_db.STATE_DB, key, statusvalue)
#Reads the current status of the service and posts it to state db
def get_unit_status(self, event):
""" Get a unit status"""
global spl_srv_list
unit_status = "NOT OK"
update_time = "-"
try:
service_status = "Down"
service_up_status = "Down"
service_name,last_name = event.split('.')
sysctl_show = self.run_systemctl_show(event)
load_state = sysctl_show['LoadState']
if load_state == "loaded":
status = sysctl_show['UnitFileState']
fail_reason = sysctl_show['Result']
active_state = sysctl_show['ActiveState']
sub_state = sysctl_show['SubState']
srv_type = sysctl_show['Type']
#Raise syslog for service state change
logger.log_info("{} service state changed to [{}/{}]".format(event, active_state, sub_state))
if status == "enabled" or status == "enabled-runtime" or status == "static":
if fail_reason == "success":
fail_reason = "-"
if (active_state == "active" and sub_state == "exited"):
service_status = "OK"
service_up_status = "OK"
unit_status = "OK"
elif active_state == "active" and sub_state == "running":
service_status = "OK"
init_state,app_fail_reason,update_time = self.get_app_ready_status(service_name)
if init_state == "Up":
service_up_status = "OK"
unit_status = "OK"
else:
fail_reason = app_fail_reason
unit_status = "NOT OK"
if fail_reason == "docker start":
service_up_status = "Starting"
fail_reason = "-"
elif active_state == "activating":
service_status = "Starting"
service_up_status = "Starting"
elif active_state == "deactivating":
service_status = "Stopping"
service_up_status = "Stopping"
elif active_state == "inactive":
if srv_type == "oneshot" or service_name in spl_srv_list:
service_status = "OK"
service_up_status = "OK"
unit_status = "OK"
else:
unit_status = "NOT OK"
if fail_reason == "-":
fail_reason = "Inactive"
else:
unit_status = "NOT OK"
self.post_unit_status(service_name, service_status, service_up_status, fail_reason, update_time)
return unit_status
except Exception as e:
logger.log_error("Get unit status {}-{}".format(service_name, str(e)))
#Gets status of all the services from service list
def get_all_system_status(self):
""" Shows the system ready status"""
#global dnsrvs_name
scan_srv_list = []
scan_srv_list = self.get_all_service_list()
for service in scan_srv_list:
ustate = self.get_unit_status(service)
if ustate == "NOT OK":
if service not in self.dnsrvs_name:
self.dnsrvs_name.add(service)
if len(self.dnsrvs_name) == 0:
return "UP"
else:
return "DOWN"
#Displays the system ready status message on console
def print_console_message(self, message):
with open('/dev/console', 'w') as console:
console.write("\n{} {}\n".format(datetime.now().strftime("%b %d %H:%M:%S.%f"), message))
#Publish the system ready status message on logger,console and state db
def publish_system_status(self, astate):
global system_allsrv_state
if system_allsrv_state != astate:
system_allsrv_state = astate
if astate == "DOWN":
msg = "System is not ready - one or more services are not up"
elif astate == "UP":
msg = "System is ready"
logger.log_notice(msg)
self.print_console_message(msg)
self.post_system_status(astate)
#Checks all the services and updates the current system status
def update_system_status(self):
try:
astate = self.get_all_system_status()
self.publish_system_status(astate)
except Exception as e:
logger.log_error("update system status exception:{}".format(str(e)))
#Checks a service status and updates the system status
def check_unit_status(self, event):
#global dnsrvs_name
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db.connect(self.state_db.STATE_DB)
astate = "DOWN"
full_srv_list = self.get_all_service_list()
if event in full_srv_list:
ustate = self.get_unit_status(event)
if ustate == "OK" and system_allsrv_state == "UP":
astate = "UP"
elif ustate == "OK" and system_allsrv_state == "DOWN":
if event in self.dnsrvs_name:
self.dnsrvs_name.remove(event)
if len(self.dnsrvs_name) == 0:
astate = "UP"
else:
astate = "DOWN"
else:
if event not in self.dnsrvs_name:
self.dnsrvs_name.add(event)
astate = "DOWN"
self.publish_system_status(astate)
else:
#if received event is not in current full service list but exists in STATE_DB & set,
#then it should be removed from STATE_DB & set
if event in self.dnsrvs_name:
self.dnsrvs_name.remove(event)
srv_name,last = event.split('.')
key = 'ALL_SERVICE_STATUS|{}'.format(srv_name)
key_exists = self.state_db.exists(self.state_db.STATE_DB, key)
if key_exists == 1:
self.state_db.delete(self.state_db.STATE_DB, key)
return 0
def system_service(self):
if not self.state_db:
self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1')
self.state_db.connect(self.state_db.STATE_DB)
myQ = mpmgr.Queue()
try:
monitor_system_bus = MonitorSystemBusTask(myQ)
monitor_system_bus.task_run()
monitor_statedb_table = MonitorStateDbTask(myQ)
monitor_statedb_table.task_run()
except Exception as e:
logger.log_error("SubProcess-{}".format(str(e)))
sys.exit(1)
self.update_system_status()
from queue import Empty
# Queue to receive the STATEDB and Systemd state change event
while not self.task_stopping_event.is_set():
try:
msg = myQ.get(timeout=QUEUE_TIMEOUT)
event = msg["unit"]
event_src = msg["evt_src"]
event_time = msg["time"]
logger.log_debug("Main process- received event:{} from source:{} time:{}".format(event,event_src,event_time))
logger.log_info("check_unit_status for [ "+event+" ] ")
self.check_unit_status(event)
except Empty:
pass
except Exception as e:
logger.log_error("system_service"+str(e))
#cleanup tables "'ALL_SERVICE_STATUS*', 'SYSTEM_READY*'" from statedb
self.state_db.delete_all_by_pattern(self.state_db.STATE_DB, "ALL_SERVICE_STATUS|*")
self.state_db.delete_all_by_pattern(self.state_db.STATE_DB, "SYSTEM_READY|*")
monitor_system_bus.task_stop()
monitor_statedb_table.task_stop()
def task_worker(self):
if self.task_stopping_event.is_set():
return
self.system_service()

View File

@ -12,6 +12,8 @@ from sonic_py_common.daemon_base import DaemonBase
from swsscommon.swsscommon import SonicV2Connector from swsscommon.swsscommon import SonicV2Connector
from health_checker.manager import HealthCheckerManager from health_checker.manager import HealthCheckerManager
from health_checker.sysmonitor import Sysmonitor
SYSLOG_IDENTIFIER = 'healthd' SYSLOG_IDENTIFIER = 'healthd'
@ -75,6 +77,8 @@ class HealthDaemon(DaemonBase):
if not manager.config.config_file_exists(): if not manager.config.config_file_exists():
self.log_warning("System health configuration file not found, exit...") self.log_warning("System health configuration file not found, exit...")
return return
sysmon = Sysmonitor()
sysmon.task_run()
while 1: while 1:
stat = manager.check(chassis) stat = manager.check(chassis)
self._process_stat(chassis, manager.config, stat) self._process_stat(chassis, manager.config, stat)
@ -85,6 +89,7 @@ class HealthDaemon(DaemonBase):
self.log_warning("sonic_platform package not installed. Cannot start system-health daemon") self.log_warning("sonic_platform package not installed. Cannot start system-health daemon")
self.deinit() self.deinit()
sysmon.task_stop()
def _process_stat(self, chassis, config, stat): def _process_stat(self, chassis, config, stat):
from health_checker.health_checker import HealthChecker from health_checker.health_checker import HealthChecker

View File

@ -22,3 +22,12 @@ class MockConnector(object):
def get_all(self, db_id, key): def get_all(self, db_id, key):
return MockConnector.data[key] return MockConnector.data[key]
def set(self, db_id, key, field, value):
self.data[key] = {}
self.data[key][field] = value
def hmset(self, db_id, key, fieldsvalues):
self.data[key] = {}
for field,value in fieldsvalues.items():
self.data[key][field] = value

View File

@ -3,6 +3,7 @@
1. test_user_defined_checker mocks the output of a user defined checker and verify class UserDefinedChecker 1. test_user_defined_checker mocks the output of a user defined checker and verify class UserDefinedChecker
2. test_service_checker mocks the output of monit service and verify class ServiceChecker 2. test_service_checker mocks the output of monit service and verify class ServiceChecker
3. test_hardware_checker mocks the hardware status data in db and verify class HardwareChecker 3. test_hardware_checker mocks the hardware status data in db and verify class HardwareChecker
4. Mocks and tests the system ready status and verify class Sysmonitor
And there are class that are not covered by unit test. These class will be covered by sonic-mgmt regression test. And there are class that are not covered by unit test. These class will be covered by sonic-mgmt regression test.
1. HealthDaemon 1. HealthDaemon
2. HealthCheckerManager 2. HealthCheckerManager
@ -30,6 +31,9 @@ from health_checker.health_checker import HealthChecker
from health_checker.manager import HealthCheckerManager from health_checker.manager import HealthCheckerManager
from health_checker.service_checker import ServiceChecker from health_checker.service_checker import ServiceChecker
from health_checker.user_defined_checker import UserDefinedChecker from health_checker.user_defined_checker import UserDefinedChecker
from health_checker.sysmonitor import Sysmonitor
from health_checker.sysmonitor import MonitorStateDbTask
from health_checker.sysmonitor import MonitorSystemBusTask
mock_supervisorctl_output = """ mock_supervisorctl_output = """
snmpd RUNNING pid 67, uptime 1:03:56 snmpd RUNNING pid 67, uptime 1:03:56
@ -505,3 +509,214 @@ def test_utils():
output = utils.run_command('ls') output = utils.run_command('ls')
assert output assert output
@patch('swsscommon.swsscommon.ConfigDBConnector.connect', MagicMock())
@patch('sonic_py_common.multi_asic.is_multi_asic', MagicMock(return_value=False))
@patch('docker.DockerClient')
@patch('health_checker.utils.run_command')
@patch('swsscommon.swsscommon.ConfigDBConnector')
def test_get_all_service_list(mock_config_db, mock_run, mock_docker_client):
mock_db_data = MagicMock()
mock_get_table = MagicMock()
mock_db_data.get_table = mock_get_table
mock_config_db.return_value = mock_db_data
mock_get_table.return_value = {
'radv': {
'state': 'enabled',
'has_global_scope': 'True',
'has_per_asic_scope': 'False',
},
'bgp': {
'state': 'enabled',
'has_global_scope': 'True',
'has_per_asic_scope': 'False',
},
'pmon': {
'state': 'disabled',
'has_global_scope': 'True',
'has_per_asic_scope': 'False',
}
}
sysmon = Sysmonitor()
print("mock get table:{}".format(mock_get_table.return_value))
result = sysmon.get_all_service_list()
print("result get all service list:{}".format(result))
assert 'radv.service' in result
assert 'pmon.service' not in result
@patch('swsscommon.swsscommon.ConfigDBConnector.connect', MagicMock())
@patch('sonic_py_common.multi_asic.is_multi_asic', MagicMock(return_value=False))
@patch('docker.DockerClient')
@patch('health_checker.utils.run_command')
@patch('swsscommon.swsscommon.ConfigDBConnector')
def test_get_app_ready_status(mock_config_db, mock_run, mock_docker_client):
mock_db_data = MagicMock()
mock_get_table = MagicMock()
mock_db_data.get_table = mock_get_table
mock_config_db.return_value = mock_db_data
mock_get_table.return_value = {
'radv': {
'state': 'enabled',
'has_global_scope': 'True',
'has_per_asic_scope': 'False',
'check_up_status': 'True'
},
'bgp': {
'state': 'enabled',
'has_global_scope': 'True',
'has_per_asic_scope': 'False',
'check_up_status': 'True'
},
'snmp': {
'state': 'enabled',
'has_global_scope': 'True',
'has_per_asic_scope': 'False',
'check_up_status': 'False'
}
}
MockConnector.data.update({
'FEATURE|radv': {
'up_status': 'True',
'fail_reason': '-',
'update_time': '-'
},
'FEATURE|bgp': {
'up_status': 'False',
'fail_reason': 'some error',
'update_time': '-'
}})
sysmon = Sysmonitor()
result = sysmon.get_app_ready_status('radv')
print(result)
assert 'Up' in result
result = sysmon.get_app_ready_status('bgp')
print(result)
assert 'Down' in result
result = sysmon.get_app_ready_status('snmp')
print(result)
assert 'Up' in result
mock_srv_props={
'mock_radv.service':{'Type': 'simple', 'Result': 'success', 'Id': 'mock_radv.service', 'LoadState': 'loaded', 'ActiveState': 'active', 'SubState': 'running', 'UnitFileState': 'enabled'},
'mock_bgp.service':{'Type': 'simple', 'Result': 'success', 'Id': 'mock_bgp.service', 'LoadState': 'loaded', 'ActiveState': 'inactive', 'SubState': 'dead', 'UnitFileState': 'enabled'}
}
@patch('health_checker.sysmonitor.Sysmonitor.get_all_service_list', MagicMock(return_value=['mock_snmp.service', 'mock_bgp.service', 'mock_ns.service']))
@patch('health_checker.sysmonitor.Sysmonitor.run_systemctl_show', MagicMock(return_value=mock_srv_props['mock_bgp.service']))
@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value=('Down','-','-')))
@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock())
def test_check_unit_status():
sysmon = Sysmonitor()
sysmon.check_unit_status('mock_bgp.service')
assert 'mock_bgp.service' in sysmon.dnsrvs_name
@patch('health_checker.sysmonitor.Sysmonitor.run_systemctl_show', MagicMock(return_value=mock_srv_props['mock_radv.service']))
@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value=('Up','-','-')))
@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock())
def test_get_unit_status_ok():
sysmon = Sysmonitor()
result = sysmon.get_unit_status('mock_radv.service')
print("get_unit_status:{}".format(result))
assert result == 'OK'
@patch('health_checker.sysmonitor.Sysmonitor.run_systemctl_show', MagicMock(return_value=mock_srv_props['mock_bgp.service']))
@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value=('Up','-','-')))
@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock())
def test_get_unit_status_not_ok():
sysmon = Sysmonitor()
result = sysmon.get_unit_status('mock_bgp.service')
print("get_unit_status:{}".format(result))
assert result == 'NOT OK'
@patch('health_checker.sysmonitor.Sysmonitor.get_all_service_list', MagicMock(return_value=['mock_snmp.service', 'mock_ns.service']))
@patch('health_checker.sysmonitor.Sysmonitor.get_unit_status', MagicMock(return_value= 'OK'))
@patch('health_checker.sysmonitor.Sysmonitor.publish_system_status', MagicMock())
@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock())
@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value='Up'))
def test_get_all_system_status_ok():
sysmon = Sysmonitor()
result = sysmon.get_all_system_status()
print("result:{}".format(result))
assert result == 'UP'
@patch('health_checker.sysmonitor.Sysmonitor.get_all_service_list', MagicMock(return_value=['mock_snmp.service', 'mock_ns.service']))
@patch('health_checker.sysmonitor.Sysmonitor.get_unit_status', MagicMock(return_value= 'NOT OK'))
@patch('health_checker.sysmonitor.Sysmonitor.publish_system_status', MagicMock())
@patch('health_checker.sysmonitor.Sysmonitor.post_unit_status', MagicMock())
@patch('health_checker.sysmonitor.Sysmonitor.get_app_ready_status', MagicMock(return_value='Up'))
def test_get_all_system_status_not_ok():
sysmon = Sysmonitor()
result = sysmon.get_all_system_status()
print("result:{}".format(result))
assert result == 'DOWN'
def test_post_unit_status():
sysmon = Sysmonitor()
sysmon.post_unit_status("mock_bgp", 'OK', 'Down', 'mock reason', '-')
result = swsscommon.SonicV2Connector.get_all(MockConnector, 0, 'ALL_SERVICE_STATUS|mock_bgp')
print(result)
assert result['service_status'] == 'OK'
assert result['app_ready_status'] == 'Down'
assert result['fail_reason'] == 'mock reason'
def test_post_system_status():
sysmon = Sysmonitor()
sysmon.post_system_status("UP")
result = swsscommon.SonicV2Connector.get(MockConnector, 0, "SYSTEM_READY|SYSTEM_STATE", 'Status')
print("post system status result:{}".format(result))
assert result == "UP"
@patch('health_checker.sysmonitor.Sysmonitor.publish_system_status', MagicMock())
@patch('health_checker.sysmonitor.Sysmonitor.post_system_status', test_post_system_status())
@patch('health_checker.sysmonitor.Sysmonitor.print_console_message', MagicMock())
def test_publish_system_status():
sysmon = Sysmonitor()
sysmon.publish_system_status('UP')
result = swsscommon.SonicV2Connector.get(MockConnector, 0, "SYSTEM_READY|SYSTEM_STATE", 'Status')
assert result == "UP"
@patch('health_checker.sysmonitor.Sysmonitor.get_all_system_status', test_get_all_system_status_ok())
@patch('health_checker.sysmonitor.Sysmonitor.publish_system_status', test_publish_system_status())
def test_update_system_status():
sysmon = Sysmonitor()
sysmon.update_system_status()
result = swsscommon.SonicV2Connector.get(MockConnector, 0, "SYSTEM_READY|SYSTEM_STATE", 'Status')
assert result == "UP"
from sonic_py_common.task_base import ProcessTaskBase
import multiprocessing
mpmgr = multiprocessing.Manager()
myQ = mpmgr.Queue()
def test_monitor_statedb_task():
sysmon = MonitorStateDbTask(myQ)
sysmon.SubscriberStateTable = MagicMock()
sysmon.task_run()
assert sysmon._task_process is not None
sysmon.task_stop()
@patch('health_checker.sysmonitor.MonitorSystemBusTask.subscribe_sysbus', MagicMock())
def test_monitor_sysbus_task():
sysmon = MonitorSystemBusTask(myQ)
sysmon.SubscriberStateTable = MagicMock()
sysmon.task_run()
assert sysmon._task_process is not None
sysmon.task_stop()
@patch('health_checker.sysmonitor.MonitorSystemBusTask.subscribe_sysbus', MagicMock())
@patch('health_checker.sysmonitor.MonitorStateDbTask.subscribe_statedb', MagicMock())
def test_system_service():
sysmon = Sysmonitor()
sysmon.task_run()
assert sysmon._task_process is not None
sysmon.task_stop()