diff --git a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py index 27648dd22d..516db2ef05 100755 --- a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py +++ b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py @@ -6,7 +6,7 @@ import json import os import sys import syslog - +import subprocess from collections import defaultdict from ctrmgr.ctrmgr_iptables import iptable_proxy_rule_upd @@ -141,7 +141,7 @@ def ts_now(): def is_systemd_active(feat): if not UNIT_TESTING: - status = os.system('systemctl is-active --quiet {}'.format(feat)) + status = subprocess.call(['systemctl', 'is-active', '--quiet', str(feat)]) else: status = UNIT_TESTING_ACTIVE log_debug("system status for {}: {}".format(feat, str(status))) @@ -151,7 +151,8 @@ def is_systemd_active(feat): def restart_systemd_service(server, feat, owner): log_debug("Restart service {} to owner:{}".format(feat, owner)) if not UNIT_TESTING: - status = os.system("systemctl restart {}".format(feat)) + subprocess.call(["systemctl", "reset-failed", str(feat)]) + status = subprocess.call(["systemctl", "restart", str(feat)]) else: server.mod_db_entry(STATE_DB_NAME, FEATURE_TABLE, feat, {"restart": "true"}) @@ -551,6 +552,7 @@ class FeatureTransitionHandler: self.st_data[key] = _update_entry(dflt_st_feat, data) remote_state = self.st_data[key][ST_FEAT_REMOTE_STATE] + current_owner = self.st_data[key][ST_FEAT_OWNER] if (remote_state == REMOTE_RUNNING) and (old_remote_state != remote_state): # Tag latest @@ -563,6 +565,13 @@ class FeatureTransitionHandler: log_debug("try to tag latest label after {} seconds @{}".format( remote_ctr_config[TAG_IMAGE_LATEST], start_time)) + + # This is for going back to local without waiting the systemd restart time + # when k8s is down, can't deploy containers to worker and need to go back to local + # if current owner is already local, we don't do restart + if (current_owner != OWNER_LOCAL) and (remote_state == REMOTE_NONE) and (old_remote_state == REMOTE_STOPPED): + restart_systemd_service(self.server, key, OWNER_LOCAL) + return if (not init): if (old_remote_state == remote_state): diff --git a/src/sonic-ctrmgrd/tests/ctrmgrd_test.py b/src/sonic-ctrmgrd/tests/ctrmgrd_test.py index 0304985224..76651309ce 100755 --- a/src/sonic-ctrmgrd/tests/ctrmgrd_test.py +++ b/src/sonic-ctrmgrd/tests/ctrmgrd_test.py @@ -324,6 +324,37 @@ feature_test_data = { } } } + }, + 4: { + common_test.DESCR: "Restart immediately to go back to local when remote_state changes to none from stopped", + common_test.ARGS: "ctrmgrd", + common_test.PRE: { + common_test.STATE_DB_NO: { + common_test.FEATURE_TABLE: { + "snmp": { + "remote_state": "stopped", + } + } + } + }, + common_test.UPD: { + common_test.STATE_DB_NO: { + common_test.FEATURE_TABLE: { + "snmp": { + "remote_state": "none", + } + } + } + }, + common_test.POST: { + common_test.STATE_DB_NO: { + common_test.FEATURE_TABLE: { + "snmp": { + "restart": "true" + } + } + } + } } }