[k8s]: Bypass the systemd service restart limit and do immediately restart when change to local mode (#15432) (#15868)

This commit is contained in:
mssonicbld 2023-07-19 20:04:23 +08:00 committed by GitHub
parent 38e721bc24
commit f4a7e22e4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 0 deletions

View File

@ -151,6 +151,7 @@ def is_systemd_active(feat):
def restart_systemd_service(server, feat, owner):
log_debug("Restart service {} to owner:{}".format(feat, owner))
if not UNIT_TESTING:
subprocess.call(["systemctl", "reset-failed", str(feat)])
status = subprocess.call(["systemctl", "restart", str(feat)])
else:
server.mod_db_entry(STATE_DB_NAME,
@ -551,6 +552,7 @@ class FeatureTransitionHandler:
self.st_data[key] = _update_entry(dflt_st_feat, data)
remote_state = self.st_data[key][ST_FEAT_REMOTE_STATE]
current_owner = self.st_data[key][ST_FEAT_OWNER]
if (remote_state == REMOTE_RUNNING) and (old_remote_state != remote_state):
# Tag latest
@ -564,6 +566,13 @@ class FeatureTransitionHandler:
log_debug("try to tag latest label after {} seconds @{}".format(
remote_ctr_config[TAG_IMAGE_LATEST], start_time))
# This is for going back to local without waiting the systemd restart time
# when k8s is down, can't deploy containers to worker and need to go back to local
# if current owner is already local, we don't do restart
if (current_owner != OWNER_LOCAL) and (remote_state == REMOTE_NONE) and (old_remote_state == REMOTE_STOPPED):
restart_systemd_service(self.server, key, OWNER_LOCAL)
return
if (not init):
if (old_remote_state == remote_state):
# if no remote state change, do nothing.

View File

@ -324,6 +324,37 @@ feature_test_data = {
}
}
}
},
4: {
common_test.DESCR: "Restart immediately to go back to local when remote_state changes to none from stopped",
common_test.ARGS: "ctrmgrd",
common_test.PRE: {
common_test.STATE_DB_NO: {
common_test.FEATURE_TABLE: {
"snmp": {
"remote_state": "stopped",
}
}
}
},
common_test.UPD: {
common_test.STATE_DB_NO: {
common_test.FEATURE_TABLE: {
"snmp": {
"remote_state": "none",
}
}
}
},
common_test.POST: {
common_test.STATE_DB_NO: {
common_test.FEATURE_TABLE: {
"snmp": {
"restart": "true"
}
}
}
}
}
}