From 5f11eb320e49ee9410557f5178a186409daaea26 Mon Sep 17 00:00:00 2001 From: Guohan Lu Date: Wed, 10 Nov 2021 15:36:20 -0800 Subject: [PATCH] Revert "sysready (#8889)" This reverts commit d7e5372e54f7416b0d435d06a5c8892e74d32b77. --- files/build_templates/init_cfg.json.j2 | 22 +- .../build_templates/sonic_debian_extension.j2 | 8 - files/image_config/sysmonitor/sysmonitor.py | 691 ------------------ .../sysmonitor/sysmonitor.service | 17 - ...rvices-data.determine-reboot-cause.service | 1 - .../yang-models/sonic-feature.yang | 33 +- 6 files changed, 2 insertions(+), 770 deletions(-) delete mode 100755 files/image_config/sysmonitor/sysmonitor.py delete mode 100644 files/image_config/sysmonitor/sysmonitor.service diff --git a/files/build_templates/init_cfg.json.j2 b/files/build_templates/init_cfg.json.j2 index de348c6d22..2ef6682d97 100644 --- a/files/build_templates/init_cfg.json.j2 +++ b/files/build_templates/init_cfg.json.j2 @@ -51,27 +51,7 @@ {%- if feature in ["lldp", "pmon", "radv", "snmp", "telemetry"] %} "set_owner": "kube", {% else %} "set_owner": "local", {% endif %} {% endif %} - "high_mem_alert": "disabled", -{%- if feature in ["bgp", "swss", "pmon", "nat", "teamd", "dhcp_relay", "sflow", "l2mcd", "udld", "stp", "snmp", "lldp", "radv", "iccpd", "syncd", "vrrp", "mgmt-framework", "tam"] %} - "check_up_status" : "false" -{%- else %} - "check_up_status" : "false" -{%- endif %} - }{% if not loop.last %},{% endif -%} -{% endfor %} - }, -{%- set host_features = [("caclmgrd", false), - ("cron", false), - ("docker", false), - ("hostcfgd", false)] %} - "HOST_FEATURE": { -{%- for host_feature, check_up_status in host_features %} - "{{host_feature}}": { -{%- if host_feature in ["caclmgrd", "hostcfgd"] %} - "check_up_status" : "false" -{%- else %} - "check_up_status" : "false" -{%- endif %} + "high_mem_alert": "disabled" }{% if not loop.last %},{% endif -%} {% endfor %} } diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 1f49f751ed..3c0f18e8ac 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -346,14 +346,6 @@ echo "ntp.service" | sudo tee -a $GENERATED_SERVICE_FILE sudo LANG=C cp $IMAGE_CONFIGS/warmboot-finalizer/finalize-warmboot.sh $FILESYSTEM_ROOT/usr/local/bin/finalize-warmboot.sh sudo LANG=C cp $IMAGE_CONFIGS/warmboot-finalizer/warmboot-finalizer.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM echo "warmboot-finalizer.service" | sudo tee -a $GENERATED_SERVICE_FILE -sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable warmboot-finalizer.service - -# Copy sysmonitor files -sudo LANG=C cp $IMAGE_CONFIGS/sysmonitor/sysmonitor.py $FILESYSTEM_ROOT/usr/local/bin/sysmonitor.py -sudo LANG=C cp $IMAGE_CONFIGS/sysmonitor/sysmonitor.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM -echo "sysmonitor.service" | sudo tee -a $GENERATED_SERVICE_FILE -sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable sysmonitor.service - # Copy watchdog-control files sudo LANG=C cp $IMAGE_CONFIGS/watchdog-control/watchdog-control.sh $FILESYSTEM_ROOT/usr/local/bin/watchdog-control.sh diff --git a/files/image_config/sysmonitor/sysmonitor.py b/files/image_config/sysmonitor/sysmonitor.py deleted file mode 100755 index 9dadab056f..0000000000 --- a/files/image_config/sysmonitor/sysmonitor.py +++ /dev/null @@ -1,691 +0,0 @@ -#!/usr/bin/python3 - -from datetime import datetime -import time -import datetime -import os -import subprocess -import sys -import threading -import syslog -import argparse -import multiprocessing as mp -from swsssdk import ConfigDBConnector -from swsssdk import SonicV2Connector -import socket -import json -import fcntl -import stat - -SYSLOG_IDENTIFIER="system#monitor" -STATE_FEATURE_TABLE_NAME = "FEATURE" -REDIS_TIMEOUT_MS = 0 -SYSTEM_STATE="DOWN" -logger = None -SYSTEM_CORESRV_STATE="DOWN" -SYSTEM_ALLSRV_STATE="DOWN" -SYSREADY_LOCKFILE="/var/run/sysready.lock" -core_dnsrvs_name_list=[] -dnsrvs_name_list=[] -allsrvs_dict={} -coresrvs_dict={} -allsrvs_status="DOWN" -coresrvs_status="DOWN" -spl_srv_list= ['database-chassis', 'gbsyncd'] -core_srv_list = [ - 'swss.service', - 'bgp.service', - 'teamd.service', - 'pmon.service', - 'syncd.service', - 'database.service', - 'mgmt-framework.service', -] - -class FileLock: - def __init__(self, lock_file): - self.f = open(lock_file, 'w') - - def lock(self): - fcntl.flock(self.f, fcntl.LOCK_EX) - - def unlock(self): - fcntl.flock(self.f, fcntl.LOCK_UN) - -sysready_lock = FileLock(SYSREADY_LOCKFILE) - - -class Logger(object): - def __init__(self, syslog_identifier): - syslog.openlog(ident=syslog_identifier, logoption=syslog.LOG_NDELAY, facility=syslog.LOG_DAEMON) - - #def __del__(self): - #syslog.closelog() - - def log_emerg(self, msg, also_print_to_console=False): - syslog.syslog(syslog.LOG_EMERG, msg) - - if also_print_to_console: - print(msg) - - def log_crit(self, msg, also_print_to_console=False): - syslog.syslog(syslog.LOG_CRIT, msg) - - if also_print_to_console: - print(msg) - - def log_alert(self, msg, also_print_to_console=False): - syslog.syslog(syslog.LOG_ALERT, msg) - - if also_print_to_console: - print(msg) - - - def log_error(self, msg, also_print_to_console=False): - syslog.syslog(syslog.LOG_ERR, msg) - - if also_print_to_console: - print(msg) - - def log_warning(self, msg, also_print_to_console=False): - syslog.syslog(syslog.LOG_WARNING, msg) - - if also_print_to_console: - print(msg) - - def log_notice(self, msg, also_print_to_console=False): - syslog.syslog(syslog.LOG_NOTICE, msg) - - if also_print_to_console: - print(msg) - - def log_info(self, msg, also_print_to_console=False): - syslog.syslog(syslog.LOG_INFO, msg) - - if also_print_to_console: - print(msg) - - def log_debug(self, msg, also_print_to_console=False): - syslog.syslog(syslog.LOG_DEBUG, msg) - - if also_print_to_console: - print(msg) - -#Initalise the syslog infrastructure -logger = Logger(SYSLOG_IDENTIFIER) - -class Dict2Obj(object): - """dict to dict2obj - d: data""" - - def __init__(self, d): - for a, b in list(d.items()): - if isinstance(b, (list, tuple)): - setattr(self, a, [Dict2Obj(x) if isinstance( - x, dict) else x for x in b]) - else: - setattr(self, a, Dict2Obj(b) if isinstance(b, dict) else b) - - -def print_console_message(message): - with open('/dev/console', 'w') as console: - console.write("\n{} {} \n ".format(datetime.datetime.now().strftime("%b %d %H:%M:%S.%f"), message)) - -def post_system_status_core(state, st_db): - if st_db: - st_db.set(st_db.STATE_DB, "SYSTEM_READY|SYSTEM_STATE_CORE", "status", state) - -def post_system_status_all(state, st_db): - if st_db: - st_db.set(st_db.STATE_DB, "SYSTEM_READY|SYSTEM_STATE_ALL", "status", state) - -def run_systemctl_show(service): - a = subprocess.check_output(["systemctl", "show", service, "--property=Id,LoadState,UnitFileState,Type,ActiveState,SubState,Result"], universal_newlines=True).split('\n') - json_dict = {} - for e in a: - kv = e.split("=", 1) - if len(kv) == 2: - json_dict[kv[0]] = kv[1] - result = Dict2Obj(json_dict) - return result - -def get_all_service_list(config_db): - dir_list=[] - - #add the services from the below targets - path= ["/etc/systemd/system/multi-user.target.wants", "/etc/systemd/system/sonic.target.wants"] - for p in path: - if os.path.exists(p): - dir_list+= os.listdir(p) - - #add the enabled docker services from config db feature table - feature_table = config_db.get_table("FEATURE") - for srv in feature_table.keys(): - if feature_table[srv]["state"] not in ["disabled", "always_disabled"]: - srvext=srv+".service" - if srvext not in dir_list: - dir_list.append(srvext) - - #Keep ZTP in exclusion list - exclude_list= ['aaastatsd.service', 'aaastatsd.timer' , 'rasdaemon.service', 'ztp.service', 'sonic.target', 'sonic-delayed.target'] - for l in exclude_list: - if l in dir_list: - dir_list.remove(l) - - #sort it - dir_list.sort() - - return dir_list - - -def get_app_ready_status(service, ap_db, st_db, config_db): - #check FEATURE table from config db for the service' check_up_status flag - #if marked to true, then read the service up_status field from FEATURE table of state db. - #else, just return true (or) Up - fail_reason="" - configdb_feature_table = config_db.get_table('FEATURE') - configdb_host_feature_table = config_db.get_table('HOST_FEATURE') - service_name = service - - if service_name not in configdb_feature_table.keys() and service_name not in configdb_host_feature_table.keys(): - pstate = "Up" - else: - if service_name in configdb_feature_table.keys(): - check_app_up_status = configdb_feature_table[service_name].get('check_up_status') - elif service_name in configdb_host_feature_table.keys(): - check_app_up_status = configdb_host_feature_table[service_name].get('check_up_status') - - if check_app_up_status == "true": - up_status_flag = st_db.get(st_db.STATE_DB, 'FEATURE|{}'.format(service_name), 'up_status') - if up_status_flag == "true": - pstate = "Up" - else: - fail_reason = st_db.get(st_db.STATE_DB, 'FEATURE|{}'.format(service_name), 'fail_reason') - if fail_reason is None: - fail_reason = "NA" - pstate = "Down" - else: - #Either check_up_status marked false or entry does not exist - pstate = "Up" - - return pstate,fail_reason - - -def get_unit_status(event, ap_db, st_db, config_db): - """ Get a unit status""" - global coresrvs_dict - global core_srv_list - global allsrvs_dict - global spl_srv_list - unit_status = "NOTOK" - fail_reason="Unknown" - try: - service_status = "Not OK" - service_up_status = "Not OK" - service_name,last_name = event.split('.') - sysctl_show = run_systemctl_show(event) - load_state = sysctl_show.LoadState - if load_state == "loaded": - status = sysctl_show.UnitFileState - fail_reason = sysctl_show.Result - active_state = sysctl_show.ActiveState - sub_state = sysctl_show.SubState - srv_type = sysctl_show.Type - - #Raise syslog for service state change - logger.log_info("{} service state changed to [{}/{}]".format(event, active_state, sub_state)) - - if status == "enabled" or status == "enabled-runtime" or status == "static": - if fail_reason == "success": - fail_reason = "-" - if (active_state == "active" and sub_state == "exited"): - service_status = "OK" - service_up_status = "OK" - unit_status = "OK" - elif active_state == "active" and sub_state == "running": - service_status = "OK" - init_state,app_fail_reason = get_app_ready_status(service_name, ap_db, st_db, config_db) - if init_state == "Up": - service_up_status = "OK" - unit_status = "OK" - else: - fail_reason = app_fail_reason - unit_status = "NOTOK" - if fail_reason == "docker start": - service_up_status = "Starting" - fail_reason = "-" - elif active_state == "activating": - service_status = "Starting" - service_up_status = "Starting" - elif active_state == "deactivating": - service_status = "Stopping" - service_up_status = "Stopping" - elif active_state == "inactive": - if srv_type == "oneshot" or service_name in spl_srv_list: - service_status = "OK" - service_up_status = "OK" - unit_status = "OK" - else: - unit_status = "NOTOK" - if fail_reason == "-": - fail_reason = "Inactive" - else: - unit_status = "NOTOK" - - if event in core_srv_list: - coresrvs_dict[service_name] = {"service_status":service_status, "service_up_status":service_up_status, "fail_reason":fail_reason} - - allsrvs_dict[service_name] = {"service_status":service_status, "service_up_status":service_up_status, "fail_reason":fail_reason} - - return unit_status - - except Exception as e: - logger.log_error("Get unit status {}-{}".format(service_name, str(e))) - - -def get_all_system_status(ap_db, st_db, config_db): - """ Shows the system ready status""" - global dnsrvs_name_list - global allsrvs_status - scan_srv_list=[] - overall_ok_flag = 1 - - scan_srv_list=get_all_service_list(config_db) - #logger.log_info("scan_srv_list:[{}]".format(scan_srv_list)) - - for service in scan_srv_list: - ustate = get_unit_status(service,ap_db,st_db,config_db) - if ustate == "NOTOK": - overall_ok_flag &= 0 - dnsrvs_name_list.append(service) - - if overall_ok_flag == 1: - allsrvs_status = "UP" - return ("UP", "System is ready with all the services") - else: - allsrvs_status = "DOWN" - return ("DOWN", "System is not ready - one or more services are not up") - - -def get_core_system_status(ap_db, st_db,config_db): - """ Shows the core system ready status""" - global core_srv_list - global core_dnsrvs_name_list - global coresrvs_status - core_ok_flag = 1 - - for service in core_srv_list: - ustate = get_unit_status(service,ap_db,st_db,config_db) - if ustate == "NOTOK": - core_ok_flag &= 0 - core_dnsrvs_name_list.append(service) - - if core_ok_flag == 1: - coresrvs_status = "UP" - return ("UP", "System is ready") - else: - coresrvs_status = "DOWN" - return ("DOWN", "System is not ready - core services are not ok") - - -#Checks current system status -def check_system_status(event, st_db, ap_db, config_db): - global SYSTEM_STATE - (cstate, msg) = get_core_system_status(ap_db, st_db,config_db) - if SYSTEM_STATE != cstate: - SYSTEM_STATE=cstate - logger.log_notice(msg) - print_console_message(msg) - post_system_status_core(cstate, st_db) - - - global SYSTEM_ALLSRV_STATE - (astate, msg) = get_all_system_status(ap_db, st_db, config_db) - if SYSTEM_ALLSRV_STATE != astate: - SYSTEM_ALLSRV_STATE=astate - logger.log_info(msg) - print_console_message(msg) - post_system_status_all(astate, st_db) - -#Checks the unit status and updates the system status -def check_unit_status(event, st_db, ap_db, config_db): - global SYSTEM_STATE - global SYSTEM_ALLSRV_STATE - global core_dnsrvs_name_list - global dnsrvs_name_list - global core_srv_list - global coresrvs_status - global allsrvs_status - global allsrvs_dict - - #astate="DOWN" - #cstate="DOWN" - #msg="" - - #check for core status - if event in core_srv_list: - ustate = get_unit_status(event,ap_db,st_db,config_db) - if ustate == "OK" and SYSTEM_STATE == "UP": - cstate = "UP" - elif ustate == "OK" and SYSTEM_STATE == "DOWN": - if event in core_dnsrvs_name_list: - core_dnsrvs_name_list.remove(event) - #need to check if need to set cstate to UP if this was the only down service before, which became UP now. - if len(core_dnsrvs_name_list) == 0: - cstate = "UP" - else: - cstate = "DOWN" - else: - if event not in core_dnsrvs_name_list: - core_dnsrvs_name_list.append(event) - cstate = "DOWN" - - if cstate == "DOWN": - msg = "System is not ready - core services are not ok" - coresrvs_status = "DOWN" - else: - msg = "System is ready with core services" - coresrvs_status = "UP" - - #logger.log_info("core - event:{} ustate:{} cstate:{} dnsrv:{}".format(event,ustate,cstate,core_dnsrvs_name_list)) - - if SYSTEM_STATE != cstate: - SYSTEM_STATE=cstate - logger.log_notice(msg) - print_console_message(msg) - post_system_status_core(cstate, st_db) - - #check for all status - full_srv_list=get_all_service_list(config_db) - #logger.log_info("full srv list:{}".format(full_srv_list)) - if event in full_srv_list: - ustate = get_unit_status(event,ap_db,st_db,config_db) - if ustate == "OK" and SYSTEM_ALLSRV_STATE == "UP": - astate = "UP" - elif ustate == "OK" and SYSTEM_ALLSRV_STATE == "DOWN": - if event in dnsrvs_name_list: - dnsrvs_name_list.remove(event) - #need to check if need to set cstate to UP if this was the only down service before, which became UP now. - if len(dnsrvs_name_list) == 0: - astate = "UP" - else: - astate = "DOWN" - else: - if event not in dnsrvs_name_list: - dnsrvs_name_list.append(event) - astate = "DOWN" - - if astate == "DOWN": - msg = "System is not ready - one or more services are not ok" - allsrvs_status = "DOWN" - else: - msg = "System is ready with all the services" - allsrvs_status = "UP" - - #logger.log_info("all - event:{} ustate:{} astate:{} dnsrvs:{}".format(event,ustate,astate,dnsrvs_name_list)) - - if SYSTEM_ALLSRV_STATE != astate: - SYSTEM_ALLSRV_STATE=astate - logger.log_info(msg) - print_console_message(msg) - post_system_status_all(astate, st_db) - - else: - #if received event is not in current full service list but exists in global dictionary & list, then it should be removed from dictionary & list - srv_name,last_name = event.split('.') - if allsrvs_dict.__contains__(srv_name): - allsrvs_dict.pop(srv_name) - - #also remove from dnsrvslist - if event in dnsrvs_name_list: - dnsrvs_name_list.remove(event) - - - - -############################################################## -# Listen for STATEDB state event # -############################################################## - -def subscribe_statedb(queue): - from swsscommon import swsscommon - - while True: - try: - logger.log_info( "Listening for StateDB event, Pid:{}".format(os.getpid())) - SELECT_TIMEOUT_MS = 1000 * 2 - - db = swsscommon.DBConnector("STATE_DB", REDIS_TIMEOUT_MS, True) - sel = swsscommon.Select() - cst = swsscommon.SubscriberStateTable(db, STATE_FEATURE_TABLE_NAME) - sel.addSelectable(cst) - - while True: - (state, c) = sel.select(SELECT_TIMEOUT_MS) - if state == swsscommon.Select.OBJECT: - (key, op, cfvs) = cst.pop() - #logger.log_info(key+"featureevent") - key_ext = key+".service" - queue.put(key_ext) - except Exception as e: - logger.log_error( str(e)) - - time.sleep(2) - - -def subscribe_statedb_event_thread(queue): - while True: - try: - process_statedb_event = mp.Process(target=subscribe_statedb, args=(queue,) ) - process_statedb_event.start() - process_statedb_event.join() - except Exception as e: - logger.log_error( str(e)) - - time.sleep(1) - -############################################################## -# Listening for System service event # -############################################################## - -QUEUE=None -def OnJobRemoved(id, job, unit, result): - - global QUEUE - - #logger.log_debug('{}: Job Removed: {}, {}, {} '.format( id, job, unit, result)) - if result == "done": - QUEUE.put(unit) - return - - -#Sub process for listening the systemd event on dbus -def subscribe_service_event(queue): - import dbus - from gi.repository import GObject - from dbus.mainloop.glib import DBusGMainLoop - - #logger.log_info( "Listening for systemd service event, Pid:{}".format(os.getpid())) - DBusGMainLoop(set_as_default=True) - - bus = dbus.SystemBus() - systemd = bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1') - manager = dbus.Interface(systemd, 'org.freedesktop.systemd1.Manager') - - manager.Subscribe() - manager.connect_to_signal('JobRemoved', OnJobRemoved) - - loop = GObject.MainLoop() - loop.run() - - -#Start the subprocess to listen the systemd service state change event -def subscribe_service_event_thread(queue): - retry_count=0 - while True: - try: - process_service_event = mp.Process(target=subscribe_service_event, args=(queue,) ) - process_service_event.start() - process_service_event.join() - except Exception as e: - logger.log_error( str(e)) - - time.sleep(60) - retry_count+=1 - if retry_count > 10: - logger.log_error("dbus subscription for systemd1 failed multiple times, exiting the subscription") - break - - -def status_core(req): - """shows the system status core""" - global coresrvs_dict - global coresrvs_status - coresrvs="" - - sysready_lock.lock() - if coresrvs_status == "UP": - msg = "System is ready with core services" - else: - msg = "System is not ready - core services are not ok" - - coresrvs+="{:30s} {:20s} {:20s} {:20s}\n".format("Service-Name","Service-Status","App-Ready-Status", "Fail-Reason") - for srv in coresrvs_dict.keys(): - coresrvs+="{:30s} {:20s} {:20s} {:20s}\n".format(srv, coresrvs_dict[srv]['service_status'], - coresrvs_dict[srv]['service_up_status'], - coresrvs_dict[srv]['fail_reason']) - sysready_lock.unlock() - - return {"status":msg, "coresrvs":coresrvs} - - -def status_all(req): - """shows the system status all""" - global allsrvs_dict - global allsrvs_status - global dnsrvs_name_list - str1=" " - allsrvs="" - dnsrvs_name="" - - sysready_lock.lock() - if allsrvs_status == "UP": - msg = "System is ready with all the services" - else: - msg = "System is not ready - one or more services are not ok" - - allsrvs+="{:30s} {:20s} {:20s} {:20s}\n".format("Service-Name","Service-Status","App-Ready-Status", "Fail-Reason") - for srv in allsrvs_dict.keys(): - allsrvs+="{:30s} {:20s} {:20s} {:20s}\n".format(srv, allsrvs_dict[srv]['service_status'], - allsrvs_dict[srv]['service_up_status'], - allsrvs_dict[srv]['fail_reason']) - - dnsrvs_name=str1.join(dnsrvs_name_list) - sysready_lock.unlock() - - return {"status":msg, "allsrvs":allsrvs, "dnsrvs_name":dnsrvs_name} - - -def sysready_listen(): - SERVER_ADDRESS = '/var/run/sysready.socket' - - if os.path.exists(SERVER_ADDRESS): - os.remove(SERVER_ADDRESS) - - sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - sock.bind(SERVER_ADDRESS) - os.chmod(SERVER_ADDRESS, stat.S_IRWXU |stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH) - sock.listen(1) - fail_res={"status":False, "msg":None} - while True: - connection, client_address = sock.accept() - try: - request = connection.recv(10240) - #logger.log_info("sysready [ REQ ] {}".format(request)) - if request is None: - continue - - req=Dict2Obj(json.loads(request.decode('utf-8'))) - - response = globals()[req.command](req) - res=json.dumps(response) - #logger.log_info("sysready [ RES ] {}".format(res)) - connection.sendall(res.encode('utf-8')) - except Exception as e: - logger.log_error("sysready {}".format(str(e))) - fail_res['msg']=str(e) - connection.sendall(json.dumps(fail_res).encode('utf-8')) - - connection.close() - - #sock.close() #lgtm [py/unreachable-statement] - - -def db_connect(): - try: - st_db = SonicV2Connector() - st_db.connect(st_db.STATE_DB,True) - ap_db = SonicV2Connector() - ap_db.connect(ap_db.APPL_DB,True) - config_db = ConfigDBConnector() - config_db.connect() - except Exception as e: - logger.log_error("Error: Connection to the DB failed {}".format(str(e))) - sys.exit(1) - - return st_db,ap_db,config_db - - -def system_service(): - - global QUEUE - QUEUE = mp.Queue() - - st_db,ap_db,config_db = db_connect() - - thread_service_event = threading.Thread(target=subscribe_service_event_thread, name='service', args=(QUEUE,)) - thread_service_event.start() - - thread_sysready = threading.Thread(target=sysready_listen, name='sysready', args=()) - thread_sysready.start() - - thread_statedb = threading.Thread(target=subscribe_statedb_event_thread, name='statedb', args=(QUEUE,)) - thread_statedb.start() - - event = 'SERVICE_EVENT' - sysready_lock.lock() - #This is run only once when sysmonitor bootsup - check_system_status(event, st_db, ap_db, config_db) - sysready_lock.unlock() - - # Queue to receive the STATEDB and Systemd state change event - while True: - event = QUEUE.get() - #logger.log_info( "System event [ "+event+" ] is received") - try: - sysready_lock.lock() - check_unit_status(event, st_db, ap_db, config_db) - sysready_lock.unlock() - except Exception as e: - logger.log_error( str(e)) - time.sleep(2) - - -#Main method to lanch the process in background -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument("--daemon", action='store_true', help="Start with daemon mode") - args = parser.parse_args() - - if args.daemon: - try: - pid = os.fork() - except OSError: - logger.log_error("Could not create a child process\n") - #parent - if pid != 0: - exit() - - system_service() - diff --git a/files/image_config/sysmonitor/sysmonitor.service b/files/image_config/sysmonitor/sysmonitor.service deleted file mode 100644 index 0340694572..0000000000 --- a/files/image_config/sysmonitor/sysmonitor.service +++ /dev/null @@ -1,17 +0,0 @@ -[Unit] -Description=Watchdog service for system services and app readiness -Requires=database.service -After=database.service -StartLimitIntervalSec=60 -StartLimitBurst=3 - - -[Service] -Type=forking -Restart=always -ExecStart=/usr/local/bin/sysmonitor.py --daemon -ExecReload=/bin/kill -HUP $MAINPID - -[Install] -WantedBy=multi-user.target - diff --git a/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service b/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service index 11d3521a5c..f0d9e91fe9 100644 --- a/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service +++ b/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service @@ -6,7 +6,6 @@ After=rc-local.service database.service [Service] Type=simple ExecStart=/usr/local/bin/determine-reboot-cause -RemainAfterExit=yes [Install] WantedBy=multi-user.target diff --git a/src/sonic-yang-models/yang-models/sonic-feature.yang b/src/sonic-yang-models/yang-models/sonic-feature.yang index c13bb95236..0a7b29c6ae 100644 --- a/src/sonic-yang-models/yang-models/sonic-feature.yang +++ b/src/sonic-yang-models/yang-models/sonic-feature.yang @@ -66,38 +66,7 @@ module sonic-feature{ type stypes:feature_state; default "disabled"; } - - leaf check_up_status { - description "This configuration controls the system ready tool to check - the app ready/up status"; - type boolean; - default false; - } - } - } - container HOST_FEATURE { - - description "host feature table in config_db.json"; - - list HOST_FEATURE_LIST { - - key "name"; - - leaf name { - description "host feature name in Host Feature table - Example - caclmgrd, hostcfgd"; - type string { - length 1..32; - } - } - - leaf check_up_status { - description "This configuration controls the system ready tool to check - the app ready/up status"; - type boolean; - default false; - } } } } -} +} \ No newline at end of file