[containerd]Fixing container commands when mode is local and state is disabled (#9986)
Why I did it
During warm-reboot and fast-reboot the below error logs appear
Feb 3 22:05:15.187408 r-lionfish-13 ERR container: docker cmd: kill for nat failed with 404 Client Error for http+docker://localhost/v1.41/containers/nat/json: Not Found ("No such container: nat")
The container command when called for local mode doesn't check if it is enabled before calling docker kill which throws the above errors.
b6ca76b482/scripts/fast-reboot (L699)
How I did it
Checking feature state if local mode and returning error exit code along with valid debug message.
How to verify it
Manually tested with warm-reboot and fast-reboot
Added UT to verify it.
This commit is contained in:
parent
43c32dfa5d
commit
4da20e7ff9
@ -26,6 +26,7 @@ CONTAINER_ID = "container_id"
|
||||
REMOTE_STATE = "remote_state"
|
||||
VERSION = "container_version"
|
||||
SYSTEM_STATE = "system_state"
|
||||
STATE = "state"
|
||||
|
||||
KUBE_LABEL_TABLE = "KUBE_LABELS"
|
||||
KUBE_LABEL_SET_KEY = "SET"
|
||||
@ -38,6 +39,9 @@ SONIC_CTR_CONFIG_PEND_SECS = "revert_to_local_on_wait_seconds"
|
||||
DEFAULT_PEND_SECS = ( 5 * 60 )
|
||||
WAIT_POLL_SECS = 2
|
||||
|
||||
SUCCESS = 0
|
||||
FAILURE = -1
|
||||
|
||||
remote_ctr_enabled = False
|
||||
|
||||
def debug_msg(m):
|
||||
@ -87,10 +91,10 @@ def read_data(is_config, feature, fields):
|
||||
|
||||
def read_config(feature):
|
||||
""" Read requried feature config """
|
||||
set_owner, no_fallback = read_data(True, feature,
|
||||
[(SET_OWNER, "local"), (NO_FALLBACK, False)])
|
||||
set_owner, no_fallback, state = read_data(True, feature,
|
||||
[(SET_OWNER, "local"), (NO_FALLBACK, False), (STATE, "disabled")])
|
||||
|
||||
return (set_owner, not no_fallback)
|
||||
return (set_owner, not no_fallback, state)
|
||||
|
||||
|
||||
def read_state(feature):
|
||||
@ -107,12 +111,12 @@ def docker_action(action, feature, **kwargs):
|
||||
container = client.containers.get(feature)
|
||||
getattr(container, action)(**kwargs)
|
||||
syslog.syslog(syslog.LOG_INFO, "docker cmd: {} for {}".format(action, feature))
|
||||
return 0
|
||||
return SUCCESS
|
||||
|
||||
except (docker.errors.NotFound, docker.errors.APIError) as err:
|
||||
syslog.syslog(syslog.LOG_ERR, "docker cmd: {} for {} failed with {}".
|
||||
format(action, feature, str(err)))
|
||||
return -1
|
||||
return FAILURE
|
||||
|
||||
|
||||
def set_label(feature, create):
|
||||
@ -186,7 +190,7 @@ def container_start(feature, **kwargs):
|
||||
|
||||
init()
|
||||
|
||||
set_owner, fallback = read_config(feature)
|
||||
set_owner, fallback, _ = read_config(feature)
|
||||
_, remote_state, _ = read_state(feature)
|
||||
|
||||
debug_msg("{}: set_owner:{} fallback:{} remote_state:{}".format(
|
||||
@ -244,8 +248,8 @@ def container_stop(feature, **kwargs):
|
||||
debug_msg("BEGIN")
|
||||
|
||||
init()
|
||||
|
||||
set_owner, _ = read_config(feature)
|
||||
ret = SUCCESS
|
||||
set_owner, _ , _ = read_config(feature)
|
||||
current_owner, remote_state, _ = read_state(feature)
|
||||
docker_id = container_id(feature)
|
||||
remove_label = (remote_state != "pending") or (set_owner == "local")
|
||||
@ -257,7 +261,7 @@ def container_stop(feature, **kwargs):
|
||||
set_label(feature, False)
|
||||
|
||||
if docker_id:
|
||||
docker_action("stop", docker_id, **kwargs)
|
||||
ret = docker_action("stop", docker_id, **kwargs)
|
||||
else:
|
||||
syslog.syslog(
|
||||
syslog.LOG_ERR if current_owner != "none" else syslog.LOG_INFO,
|
||||
@ -286,6 +290,7 @@ def container_stop(feature, **kwargs):
|
||||
update_data(feature, data)
|
||||
|
||||
debug_msg("END")
|
||||
return ret
|
||||
|
||||
|
||||
def container_kill(feature, **kwargs):
|
||||
@ -301,19 +306,25 @@ def container_kill(feature, **kwargs):
|
||||
|
||||
init()
|
||||
|
||||
set_owner, _ = read_config(feature)
|
||||
ret = SUCCESS
|
||||
set_owner, _ , state = read_config(feature)
|
||||
current_owner, remote_state, _ = read_state(feature)
|
||||
docker_id = container_id(feature)
|
||||
remove_label = (set_owner != "local") or (current_owner != "local")
|
||||
|
||||
debug_msg("{}: set_owner:{} current_owner:{} remote_state:{} docker_id:{}".format(
|
||||
feature, set_owner, current_owner, remote_state, docker_id))
|
||||
debug_msg("{}: set_owner:{} current_owner:{} remote_state:{} docker_id:{} state:{}".format(
|
||||
feature, set_owner, current_owner, remote_state, docker_id, state))
|
||||
|
||||
if remove_label:
|
||||
set_label(feature, False)
|
||||
|
||||
if set_owner == "local":
|
||||
if state not in ["enabled", "always_enabled"]:
|
||||
debug_msg("{} is not enabled".format(feature))
|
||||
return FAILURE
|
||||
|
||||
if docker_id:
|
||||
docker_action("kill", docker_id, **kwargs)
|
||||
ret = docker_action("kill", docker_id, **kwargs)
|
||||
|
||||
else:
|
||||
syslog.syslog(
|
||||
@ -322,6 +333,7 @@ def container_kill(feature, **kwargs):
|
||||
|
||||
|
||||
debug_msg("END")
|
||||
return ret
|
||||
|
||||
|
||||
def container_wait(feature, **kwargs):
|
||||
@ -341,10 +353,11 @@ def container_wait(feature, **kwargs):
|
||||
|
||||
init()
|
||||
|
||||
set_owner, fallback = read_config(feature)
|
||||
set_owner, fallback, _ = read_config(feature)
|
||||
current_owner, remote_state, _ = read_state(feature)
|
||||
docker_id = container_id(feature)
|
||||
pend_wait_secs = 0
|
||||
ret = SUCCESS
|
||||
|
||||
if not docker_id and fallback:
|
||||
pend_wait_secs = get_config_data(
|
||||
@ -377,8 +390,9 @@ def container_wait(feature, **kwargs):
|
||||
format(feature))
|
||||
else:
|
||||
debug_msg("END -- transitioning to docker wait")
|
||||
docker_action("wait", docker_id, **kwargs)
|
||||
ret = docker_action("wait", docker_id, **kwargs)
|
||||
|
||||
return ret
|
||||
|
||||
def main():
|
||||
parser=argparse.ArgumentParser(description="container commands for start/stop/wait/kill/id")
|
||||
@ -389,24 +403,26 @@ def main():
|
||||
args = parser.parse_args()
|
||||
kwargs = {}
|
||||
|
||||
ret = 0
|
||||
if args.action == "start":
|
||||
container_start(args.name, **kwargs)
|
||||
ret = container_start(args.name, **kwargs)
|
||||
|
||||
elif args.action == "stop":
|
||||
if args.timeout is not None:
|
||||
kwargs['timeout'] = args.timeout
|
||||
container_stop(args.name, **kwargs)
|
||||
ret = container_stop(args.name, **kwargs)
|
||||
|
||||
elif args.action == "kill":
|
||||
container_kill(args.name, **kwargs)
|
||||
ret = container_kill(args.name, **kwargs)
|
||||
|
||||
elif args.action == "wait":
|
||||
container_wait(args.name, **kwargs)
|
||||
ret = container_wait(args.name, **kwargs)
|
||||
|
||||
elif args.action == "id":
|
||||
id = container_id(args.name, **kwargs)
|
||||
print(id)
|
||||
|
||||
return ret
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -269,7 +269,8 @@ kill_test_data = {
|
||||
common_test.CONFIG_DB_NO: {
|
||||
common_test.FEATURE_TABLE: {
|
||||
"snmp": {
|
||||
"set_owner": "local"
|
||||
"set_owner": "local",
|
||||
"state": "enabled"
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -348,6 +349,30 @@ kill_test_data = {
|
||||
}
|
||||
}
|
||||
|
||||
# container_kill test cases
|
||||
# test case 0 -- container kill local disabled container
|
||||
# -- no change in state-db
|
||||
# -- no label update
|
||||
#
|
||||
invalid_kill_test_data = {
|
||||
0: {
|
||||
common_test.DESCR: "container kill for local disabled container",
|
||||
common_test.PRE: {
|
||||
common_test.CONFIG_DB_NO: {
|
||||
common_test.FEATURE_TABLE: {
|
||||
"sflow": {
|
||||
"set_owner": "local"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
common_test.POST: {
|
||||
},
|
||||
common_test.ACTIONS: {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# container_wait test cases
|
||||
# test case 0 -- container wait local
|
||||
@ -498,6 +523,24 @@ class TestContainer(object):
|
||||
ret = common_test.check_mock_containers()
|
||||
assert ret == 0
|
||||
|
||||
@patch("container.swsscommon.DBConnector")
|
||||
@patch("container.swsscommon.Table")
|
||||
@patch("container.docker.from_env")
|
||||
def test_invalid_kill(self, mock_docker, mock_table, mock_conn):
|
||||
self.init()
|
||||
common_test.set_mock(mock_table, mock_conn, mock_docker)
|
||||
|
||||
for (i, ct_data) in invalid_kill_test_data.items():
|
||||
common_test.do_start_test("container_test:container_kill", i, ct_data)
|
||||
|
||||
ret = container.container_kill("sflow")
|
||||
assert ret != 0
|
||||
|
||||
ret = common_test.check_tables_returned()
|
||||
assert ret == 0
|
||||
|
||||
ret = common_test.check_mock_containers()
|
||||
assert ret == 0
|
||||
|
||||
@patch("container.swsscommon.DBConnector")
|
||||
@patch("container.swsscommon.Table")
|
||||
|
Loading…
Reference in New Issue
Block a user