Disable systemd auto-restart of dependent services for spineRouters (#17203)

Currently hostcfgd script overrides the systemd service files of the features depending upon auto_restart enable/disable.
I am skipping dependent features(syncd, gbsyncd for now) to have "RESTART=Always"
for them to not start immediately, and instead get started by SWSS through swss.sh script.
The issue of syncd double stop is also applicable to pizza box platforms, however no traffic impact is seen there, whereas on VOQ chassis, we do see traffic impact due to early start of syncd service.
This commit is contained in:
Deepak Singhal 2023-11-20 14:38:20 -08:00 committed by GitHub
parent f59dc50eae
commit 894046e199
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 123 additions and 19 deletions

View File

@ -364,7 +364,20 @@ class FeatureHandler(object):
Returns:
None.
"""
restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no"
# As per the current code(due to various dependencies) SWSS service stop/start also stops/starts the dependent services(syncd, teamd, bgpd etc)
# There is an issue seen of syncd service getting stopped twice upon a critical process crash in syncd service due to above reason.
# Also early start of syncd service has traffic impact on VOQ chassis.
# to fix the above issue, we are disabling the auto restart of syncd service as it will be started by swss service.
# This change can be extended to other dependent services as well in future and also on pizza box platforms.
device_type = self._device_config.get('DEVICE_METADATA', {}).get('localhost', {}).get('type')
is_dependent_service = feature_config.name in ['syncd', 'gbsyncd']
if device_type == 'SpineRouter' and is_dependent_service:
syslog.syslog(syslog.LOG_INFO, "Skipped setting Restart field in systemd for {}".format(feature_config.name))
restart_field_str = "no"
else:
restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no"
feature_systemd_config = "[Service]\nRestart={}\n".format(restart_field_str)
feature_names, feature_suffixes = self.get_multiasic_feature_instances(feature_config)

View File

@ -45,7 +45,7 @@ class TestFeatureHandler(TestCase):
return True if not ddiff else False
def checks_systemd_config_file(self, feature_table, feature_systemd_name_map=None):
def checks_systemd_config_file(self, device_type, feature_table, feature_systemd_name_map=None):
"""Checks whether the systemd configuration file of each feature was created or not
and whether the `Restart=` field in the file is set correctly or not.
@ -62,6 +62,7 @@ class TestFeatureHandler(TestCase):
'auto_restart.conf')
for feature_name in feature_table:
is_dependent_feature = True if feature_name in ['syncd', 'gbsyncd'] else False
auto_restart_status = feature_table[feature_name].get('auto_restart', 'disabled')
if "enabled" in auto_restart_status:
auto_restart_status = "enabled"
@ -77,7 +78,10 @@ class TestFeatureHandler(TestCase):
with open(feature_systemd_config_file_path) as systemd_config_file:
status = systemd_config_file.read().strip()
assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status])
if device_type == 'SpineRouter' and is_dependent_feature:
assert status == '[Service]\nRestart=no'
else:
assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status])
def get_state_db_set_calls(self, feature_table):
"""Returns a Mock call objects which recorded the `set` calls to `FEATURE` table in `STATE_DB`.
@ -134,6 +138,7 @@ class TestFeatureHandler(TestCase):
device_config = {}
device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA']
device_config.update(config_data['device_runtime_metadata'])
device_type = MockConfigDb.CONFIG_DB['DEVICE_METADATA']['localhost']['type']
feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config)
feature_table = MockConfigDb.CONFIG_DB['FEATURE']
@ -158,13 +163,13 @@ class TestFeatureHandler(TestCase):
feature_table_state_db_calls = self.get_state_db_set_calls(feature_table)
self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map)
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
any_order=True)
mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
any_order=True)
feature_state_table_mock.set.assert_has_calls(feature_table_state_db_calls)
self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map)
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
@parameterized.expand(HOSTCFGD_TEST_VECTOR)
@patchfs
@ -196,6 +201,7 @@ class TestFeatureHandler(TestCase):
device_config = {}
device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA']
device_config.update(config_data['device_runtime_metadata'])
device_type = MockConfigDb.CONFIG_DB['DEVICE_METADATA']['localhost']['type']
feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config)
feature_table = MockConfigDb.CONFIG_DB['FEATURE']
@ -207,7 +213,7 @@ class TestFeatureHandler(TestCase):
feature_names, _ = feature_handler.get_multiasic_feature_instances(feature)
feature_systemd_name_map[feature_name] = feature_names
self.checks_systemd_config_file(config_data['config_db']['FEATURE'], feature_systemd_name_map)
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
any_order=True)
mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],

View File

@ -580,8 +580,14 @@ HOSTCFGD_TEST_VECTOR = [
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
@ -610,12 +616,23 @@ HOSTCFGD_TEST_VECTOR = [
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
call("sudo systemctl stop bgp.service", shell=True),
call("sudo systemctl disable bgp.service", shell=True),
call("sudo systemctl mask bgp.service", shell=True),
call("sudo systemctl start syncd.service", shell=True),
call("sudo systemctl enable syncd.service", shell=True),
call("sudo systemctl unmask syncd.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
@ -675,8 +692,14 @@ HOSTCFGD_TEST_VECTOR = [
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
@ -705,12 +728,23 @@ HOSTCFGD_TEST_VECTOR = [
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
call("sudo systemctl stop bgp.service", shell=True),
call("sudo systemctl disable bgp.service", shell=True),
call("sudo systemctl mask bgp.service", shell=True),
call("sudo systemctl start syncd.service", shell=True),
call("sudo systemctl enable syncd.service", shell=True),
call("sudo systemctl unmask syncd.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
@ -770,8 +804,14 @@ HOSTCFGD_TEST_VECTOR = [
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
@ -800,6 +840,14 @@ HOSTCFGD_TEST_VECTOR = [
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
@ -809,7 +857,9 @@ HOSTCFGD_TEST_VECTOR = [
call("sudo systemctl start teamd.service", shell=True),
call("sudo systemctl enable teamd.service", shell=True),
call("sudo systemctl unmask teamd.service", shell=True),
call("sudo systemctl start syncd.service", shell=True),
call("sudo systemctl enable syncd.service", shell=True),
call("sudo systemctl unmask syncd.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
@ -869,8 +919,14 @@ HOSTCFGD_TEST_VECTOR = [
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
@ -899,6 +955,14 @@ HOSTCFGD_TEST_VECTOR = [
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
@ -908,7 +972,9 @@ HOSTCFGD_TEST_VECTOR = [
call("sudo systemctl start teamd.service", shell=True),
call("sudo systemctl enable teamd.service", shell=True),
call("sudo systemctl unmask teamd.service", shell=True),
call("sudo systemctl start syncd.service", shell=True),
call("sudo systemctl enable syncd.service", shell=True),
call("sudo systemctl unmask syncd.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
@ -969,8 +1035,14 @@ HOSTCFGD_TEST_VECTOR = [
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
"syncd": {
"state": "enabled",
"has_timer": "False",
"has_global_scope": "False",
"has_per_asic_scope": "True",
"auto_restart": "enabled",
"high_mem_alert": "disabled"
},
},
},
"expected_config_db": {
@ -999,6 +1071,14 @@ HOSTCFGD_TEST_VECTOR = [
"high_mem_alert": "disabled",
"state": "enabled"
},
"syncd": {
"auto_restart": "enabled",
"has_global_scope": "False",
"has_per_asic_scope": "True",
"has_timer": "False",
"high_mem_alert": "disabled",
"state": "enabled"
},
},
},
"enable_feature_subprocess_calls": [
@ -1020,7 +1100,12 @@ HOSTCFGD_TEST_VECTOR = [
call("sudo systemctl stop lldp@1.service", shell=True),
call("sudo systemctl disable lldp@1.service", shell=True),
call("sudo systemctl mask lldp@1.service", shell=True),
call("sudo systemctl start syncd@0.service", shell=True),
call("sudo systemctl enable syncd@0.service", shell=True),
call("sudo systemctl unmask syncd@0.service", shell=True),
call("sudo systemctl start syncd@1.service", shell=True),
call("sudo systemctl enable syncd@1.service", shell=True),
call("sudo systemctl unmask syncd@1.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),