[hostcfgd] Initialize Restart= in feature's systemd config by the value of auto_restart in CONFIG_DB (#10915)

Why I did it
Recently the nightly testing pipeline found that the autorestart test case was failed when it was run against master image. The reason is Restart= field in each container's systemd configuration file was set to Restart=no even the value of auto_restart field in FEATURE table of CONFIG_DB is enabled.

This issue introduced by #10168 can be reproduced by the following steps:

Issues the config command to disable the auto-restart feature of a container
Runs command config reload or config reload minigraph to enable auto-restart of the container
Checks Restart= field in the container's systemd config file mentioned in step 1 by running the command
sudo systemctl cat <container_name>.service
Initially this PR (#10168) wants to revert the changes proposed by this: #8861. However, it did not fully revert all the changes.

How I did it
When hostcfgd started or was restarted, the Restart= field in each container's systemd configuration file should be initialized according to the value of auto_restart field in FEATURE table of CONFIG_DB.

How to verify it
I verified this change by running auto-restart test case against newly built master image and also ran the unittest:
This commit is contained in:
yozhao101 2022-06-02 15:59:36 -07:00 committed by GitHub
parent 1cc602c6af
commit 4ef8b38edb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 191 additions and 117 deletions

View File

@ -104,6 +104,7 @@ def obfuscate(data):
else:
return data
def get_pid(procname):
for dirname in os.listdir('/proc'):
if dirname == 'curproc':
@ -117,6 +118,7 @@ def get_pid(procname):
return dirname
return ""
class Feature(object):
""" Represents a feature configuration from CONFIG_DB data. """
@ -182,10 +184,10 @@ class FeatureHandler(object):
self._cached_config = {}
self.is_multi_npu = device_info.is_multi_npu()
def handle(self, feature_name, op, feature_cfg):
def handler(self, feature_name, op, feature_cfg):
if not feature_cfg:
syslog.syslog(syslog.LOG_INFO, "Deregistering feature {}".format(feature_name))
self._cached_config.pop(feature_name)
self._cached_config.pop(feature_name, None)
self._feature_state_table._del(feature_name)
return
@ -197,7 +199,11 @@ class FeatureHandler(object):
# the next called self.update_feature_state will start it again. If it will fail
# again the auto restart will kick-in. Another order may leave it in failed state
# and not auto restart.
self.update_feature_auto_restart(feature, feature_name)
if self._cached_config[feature_name].auto_restart != feature.auto_restart:
syslog.syslog(syslog.LOG_INFO, "Auto-restart status of feature '{}' is changed from '{}' to '{}' ..."
.format(feature_name, self._cached_config[feature_name].auto_restart, feature.auto_restart))
self.update_systemd_config(feature)
self._cached_config[feature_name].auto_restart = feature.auto_restart
# Enable/disable the container service if the feature state was changed from its previous state.
if self._cached_config[feature_name].state != feature.state:
@ -220,7 +226,7 @@ class FeatureHandler(object):
feature = Feature(feature_name, feature_table[feature_name], self._device_config)
self._cached_config.setdefault(feature_name, feature)
self.update_feature_auto_restart(feature, feature_name)
self.update_systemd_config(feature)
self.update_feature_state(feature)
self.resync_feature_state(feature)
@ -265,41 +271,45 @@ class FeatureHandler(object):
return True
def update_feature_auto_restart(self, feature, feature_name):
dir_name = self.SYSTEMD_SERVICE_CONF_DIR.format(feature_name)
auto_restart_conf = os.path.join(dir_name, 'auto_restart.conf')
def update_systemd_config(self, feature_config):
"""Updates `Restart=` field in feature's systemd configuration file
according to the value of `auto_restart` field in `FEATURE` table of `CONFIG_DB`.
write_conf = False
if not os.path.exists(auto_restart_conf): # if the auto_restart_conf file is not found, set it
write_conf = True
Args:
feature: An object represents a feature's configuration in `FEATURE`
table of `CONFIG_DB`.
if self._cached_config[feature_name].auto_restart != feature.auto_restart:
write_conf = True
Returns:
None.
"""
restart_field_str = "always" if "enabled" in feature_config.auto_restart else "no"
feature_systemd_config = "[Service]\nRestart={}\n".format(restart_field_str)
feature_names, feature_suffixes = self.get_multiasic_feature_instances(feature_config)
if not write_conf:
return
# On multi-ASIC device, creates systemd configuration file for each feature instance
# residing in difference namespace.
for feature_name in feature_names:
syslog.syslog(syslog.LOG_INFO, "Updating feature '{}' systemd config file related to auto-restart ..."
.format(feature_name))
feature_systemd_config_dir_path = self.SYSTEMD_SERVICE_CONF_DIR.format(feature_name)
feature_systemd_config_file_path = os.path.join(feature_systemd_config_dir_path, 'auto_restart.conf')
self._cached_config[feature_name].auto_restart = feature.auto_restart # Update Cache
if not os.path.exists(feature_systemd_config_dir_path):
os.mkdir(feature_systemd_config_dir_path)
with open(feature_systemd_config_file_path, 'w') as feature_systemd_config_file_handler:
feature_systemd_config_file_handler.write(feature_systemd_config)
restart_config = "always" if feature.auto_restart == "enabled" else "no"
service_conf = "[Service]\nRestart={}\n".format(restart_config)
feature_names, feature_suffixes = self.get_feature_attribute(feature)
for name in feature_names:
dir_name = self.SYSTEMD_SERVICE_CONF_DIR.format(name)
auto_restart_conf = os.path.join(dir_name, 'auto_restart.conf')
if not os.path.exists(dir_name):
os.mkdir(dir_name)
with open(auto_restart_conf, 'w') as cfgfile:
cfgfile.write(service_conf)
syslog.syslog(syslog.LOG_INFO, "Feautre '{}' systemd config file related to auto-restart is updated!"
.format(feature_name))
try:
syslog.syslog(syslog.LOG_INFO, "Reloading systemd configuration files ...")
run_cmd("sudo systemctl daemon-reload", raise_exception=True)
syslog.syslog(syslog.LOG_INFO, "Systemd configuration files are reloaded!")
except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to configure auto_restart".format(feature.name))
return
syslog.syslog(syslog.LOG_ERR, "Failed to reload systemd configuration files!")
def get_feature_attribute(self, feature):
def get_multiasic_feature_instances(self, feature):
# Create feature name suffix depending feature is running in host or namespace or in both
feature_names = (
([feature.name] if feature.has_global_scope or not self.is_multi_npu else []) +
@ -330,7 +340,7 @@ class FeatureHandler(object):
def enable_feature(self, feature):
cmds = []
feature_names, feature_suffixes = self.get_feature_attribute(feature)
feature_names, feature_suffixes = self.get_multiasic_feature_instances(feature)
for feature_name in feature_names:
# Check if it is already enabled, if yes skip the system call
unit_file_state = self.get_systemd_unit_state("{}.{}".format(feature_name, feature_suffixes[-1]))
@ -352,7 +362,7 @@ class FeatureHandler(object):
run_cmd(cmd, raise_exception=True)
except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
.format(feature.name, feature_suffixes[-1]))
.format(feature.name, feature_suffixes[-1]))
self.set_feature_state(feature, self.FEATURE_STATE_FAILED)
return
@ -360,7 +370,7 @@ class FeatureHandler(object):
def disable_feature(self, feature):
cmds = []
feature_names, feature_suffixes = self.get_feature_attribute(feature)
feature_names, feature_suffixes = self.get_multiasic_feature_instances(feature)
for feature_name in feature_names:
# Check if it is already disabled, if yes skip the system call
unit_file_state = self.get_systemd_unit_state("{}.{}".format(feature_name, feature_suffixes[-1]))
@ -377,7 +387,7 @@ class FeatureHandler(object):
run_cmd(cmd, raise_exception=True)
except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be stopped and disabled"
.format(feature.name, feature_suffixes[-1]))
.format(feature.name, feature_suffixes[-1]))
self.set_feature_state(feature, self.FEATURE_STATE_FAILED)
return
@ -1212,7 +1222,7 @@ class HostConfigDaemon:
self.config_db.subscribe('KDUMP', make_callback(self.kdump_handler))
# Handle FEATURE updates before other tables
self.config_db.subscribe('FEATURE', make_callback(self.feature_handler.handle))
self.config_db.subscribe('FEATURE', make_callback(self.feature_handler.handler))
# Handle AAA, TACACS and RADIUS related tables
self.config_db.subscribe('AAA', make_callback(self.aaa_handler))
self.config_db.subscribe('TACPLUS', make_callback(self.tacacs_global_handler))

View File

@ -27,112 +27,162 @@ hostcfgd.DBConnector = MockDBConnector
hostcfgd.Table = mock.Mock()
class TestHostcfgd(TestCase):
class TestFeatureHandler(TestCase):
"""Test methods of `FeatureHandler` class.
"""
Test hostcfd daemon - feature
"""
def __verify_table(self, table, feature_state_table, expected_table):
def checks_config_table(self, feature_table, expected_table):
"""Compares `FEATURE` table in `CONFIG_DB` with expected output table.
Args:
feature_table: A dictionary indicates current `FEATURE` table in `CONFIG_DB`.
expected_table A dictionary indicates the expected `FEATURE` table in `CONFIG_DB`.
Returns:
Returns True if `FEATURE` table in `CONFIG_DB` was not modified unexpectedly;
otherwise, returns False.
"""
verify config db tables
ddiff = DeepDiff(feature_table, expected_table, ignore_order=True)
Compares Config DB table (FEATURE) with expected output table.
Verifies that State DB table (FEATURE) is updated.
Args:
table(dict): Current Config Db table
feature_state_table(Mock): Mocked State DB FEATURE table
expected_table(dict): Expected Config Db table
Returns:
None
"""
ddiff = DeepDiff(table, expected_table, ignore_order=True)
print('DIFF:', ddiff)
def get_state(cfg_state):
""" Translates CONFIG DB state field into STATE DB state field """
if cfg_state == 'always_disabled':
return 'disabled'
elif cfg_state == 'always_enabled':
return 'enabled'
else:
return cfg_state
feature_state_table.set.assert_has_calls([
mock.call(feature, [('state', get_state(table[feature]['state']))]) for feature in table
])
return True if not ddiff else False
def __verify_fs(self, table):
"""
verify filesystem changes made by hostcfgd.
def checks_systemd_config_file(self, feature_table):
"""Checks whether the systemd configuration file of each feature was created or not
and whether the `Restart=` field in the file is set correctly or not.
Checks whether systemd override configuration files
were generated and Restart= for systemd unit is set
correctly
Args:
feature_table: A dictionary indicates `Feature` table in `CONFIG_DB`.
Args:
table(dict): Current Config Db table
Returns: Boolean wether test passed.
Returns: Boolean value indicates whether test passed or not.
"""
exp_dict = {
'enabled': 'always',
'disabled': 'no',
}
auto_restart_conf = os.path.join(hostcfgd.FeatureHandler.SYSTEMD_SERVICE_CONF_DIR, 'auto_restart.conf')
truth_table = {'enabled': 'always',
'disabled': 'no'}
for feature in table:
auto_restart = table[feature].get('auto_restart', 'disabled')
with open(auto_restart_conf.format(feature)) as conf:
conf = conf.read().strip()
assert conf == '[Service]\nRestart={}'.format(exp_dict[auto_restart])
systemd_config_file_path = os.path.join(hostcfgd.FeatureHandler.SYSTEMD_SERVICE_CONF_DIR,
'auto_restart.conf')
for feature_name in feature_table:
auto_restart_status = feature_table[feature_name].get('auto_restart', 'disabled')
if "enabled" in auto_restart_status:
auto_restart_status = "enabled"
elif "disabled" in auto_restart_status:
auto_restart_status = "disabled"
feature_systemd_config_file_path = systemd_config_file_path.format(feature_name)
is_config_file_existing = os.path.exists(feature_systemd_config_file_path)
assert is_config_file_existing, "Systemd configuration file of feature '{}' does not exist!".format(feature_name)
with open(feature_systemd_config_file_path) as systemd_config_file:
status = systemd_config_file.read().strip()
assert status == '[Service]\nRestart={}'.format(truth_table[auto_restart_status])
def get_state_db_set_calls(self, feature_table):
"""Returns a Mock call objects which recorded the `set` calls to `FEATURE` table in `STATE_DB`.
Args:
feature_table: A dictionary indicates `FEATURE` table in `CONFIG_DB`.
Returns:
set_call_list: A list indicates Mock call objects.
"""
set_call_list = []
for feature_name in feature_table.keys():
feature_state = ""
if "enabled" in feature_table[feature_name]["state"]:
feature_state = "enabled"
elif "disabled" in feature_table[feature_name]["state"]:
feature_state = "disabled"
else:
feature_state = feature_table[feature_name]["state"]
set_call_list.append(mock.call(feature_name, [("state", feature_state)]))
return set_call_list
@parameterized.expand(HOSTCFGD_TEST_VECTOR)
@patchfs
def test_hostcfgd_feature_handler(self, test_name, test_data, fs):
"""
Test feature config capability in the hostcfd
def test_sync_state_field(self, test_scenario_name, config_data, fs):
"""Tests the method `sync_state_field(...)` of `FeatureHandler` class.
Args:
test_name(str): test name
test_data(dict): test data which contains initial Config Db tables, and expected results
Args:
test_secnario_name: A string indicates different testing scenario.
config_data: A dictionary contains initial `CONFIG_DB` tables and expected results.
Returns:
None
Returns:
Boolean value indicates whether test will pass or not.
"""
fs.add_real_paths(swsscommon_package.__path__) # add real path of swsscommon for database_config.json
# add real path of sesscommon for database_config.json
fs.add_real_paths(swsscommon_package.__path__)
fs.create_dir(hostcfgd.FeatureHandler.SYSTEMD_SYSTEM_DIR)
MockConfigDb.set_config_db(test_data['config_db'])
MockConfigDb.set_config_db(config_data['config_db'])
feature_state_table_mock = mock.Mock()
with mock.patch('hostcfgd.subprocess') as mocked_subprocess:
popen_mock = mock.Mock()
attrs = test_data['popen_attributes']
attrs = config_data['popen_attributes']
popen_mock.configure_mock(**attrs)
mocked_subprocess.Popen.return_value = popen_mock
# Initialize Feature Handler
device_config = {}
device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA']
feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config)
# sync the state field and Handle Feature Updates
features = MockConfigDb.CONFIG_DB['FEATURE']
feature_handler.sync_state_field(features)
for key, fvs in features.items():
feature_handler.handle(key, 'SET', fvs)
feature_table = MockConfigDb.CONFIG_DB['FEATURE']
feature_handler.sync_state_field(feature_table)
# Verify if the updates are properly updated
assert self.__verify_table(
MockConfigDb.get_config_db()['FEATURE'],
feature_state_table_mock,
test_data['expected_config_db']['FEATURE']
), 'Test failed for test data: {0}'.format(test_data)
mocked_subprocess.check_call.assert_has_calls(test_data['expected_subprocess_calls'], any_order=True)
is_any_difference = self.checks_config_table(MockConfigDb.get_config_db()['FEATURE'],
config_data['expected_config_db']['FEATURE'])
assert is_any_difference, "'FEATURE' table in 'CONFIG_DB' is modified unexpectedly!"
self.__verify_fs(test_data['config_db']['FEATURE'])
feature_table_state_db_calls = self.get_state_db_set_calls(feature_table)
self.checks_systemd_config_file(config_data['config_db']['FEATURE'])
mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
any_order=True)
mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
any_order=True)
feature_state_table_mock.set.assert_has_calls(feature_table_state_db_calls)
self.checks_systemd_config_file(config_data['config_db']['FEATURE'])
@parameterized.expand(HOSTCFGD_TEST_VECTOR)
@patchfs
def test_handler(self, test_scenario_name, config_data, fs):
"""Tests the method `handle(...)` of `FeatureHandler` class.
Args:
test_secnario_name: A string indicates different testing scenario.
config_data: A dictionary contains initial `CONFIG_DB` tables and expected results.
Returns:
Boolean value indicates whether test will pass or not.
"""
# add real path of sesscommon for database_config.json
fs.add_real_paths(swsscommon_package.__path__)
fs.create_dir(hostcfgd.FeatureHandler.SYSTEMD_SYSTEM_DIR)
MockConfigDb.set_config_db(config_data['config_db'])
feature_state_table_mock = mock.Mock()
with mock.patch('hostcfgd.subprocess') as mocked_subprocess:
popen_mock = mock.Mock()
attrs = config_data['popen_attributes']
popen_mock.configure_mock(**attrs)
mocked_subprocess.Popen.return_value = popen_mock
device_config = {}
device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA']
feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config)
feature_table = MockConfigDb.CONFIG_DB['FEATURE']
for feature_name, feature_config in feature_table.items():
feature_handler.handler(feature_name, 'SET', feature_config)
self.checks_systemd_config_file(config_data['config_db']['FEATURE'])
mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
any_order=True)
mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
any_order=True)
def test_feature_config_parsing(self):
swss_feature = hostcfgd.Feature('swss', {

View File

@ -41,7 +41,7 @@ HOSTCFGD_TEST_VECTOR = [
"state": "{% if 'subtype' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['subtype'] == 'DualToR' %}enabled{% else %}always_disabled{% endif %}"
},
"telemetry": {
"auto_restart": "disabled",
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "True",
@ -73,7 +73,7 @@ HOSTCFGD_TEST_VECTOR = [
"state": "enabled"
},
"telemetry": {
"auto_restart": "disabled",
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "True",
@ -84,7 +84,7 @@ HOSTCFGD_TEST_VECTOR = [
},
},
},
"expected_subprocess_calls": [
"enable_feature_subprocess_calls": [
call("sudo systemctl unmask dhcp_relay.service", shell=True),
call("sudo systemctl enable dhcp_relay.service", shell=True),
call("sudo systemctl start dhcp_relay.service", shell=True),
@ -96,6 +96,9 @@ HOSTCFGD_TEST_VECTOR = [
call("sudo systemctl enable telemetry.timer", shell=True),
call("sudo systemctl start telemetry.timer", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
],
"popen_attributes": {
'communicate.return_value': ('output', 'error')
},
@ -198,7 +201,7 @@ HOSTCFGD_TEST_VECTOR = [
},
},
},
"expected_subprocess_calls": [
"enable_feature_subprocess_calls": [
call("sudo systemctl stop mux.service", shell=True),
call("sudo systemctl disable mux.service", shell=True),
call("sudo systemctl mask mux.service", shell=True),
@ -210,6 +213,9 @@ HOSTCFGD_TEST_VECTOR = [
call("sudo systemctl enable sflow.service", shell=True),
call("sudo systemctl start sflow.service", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
],
"popen_attributes": {
'communicate.return_value': ('output', 'error')
},
@ -294,7 +300,7 @@ HOSTCFGD_TEST_VECTOR = [
},
},
},
"expected_subprocess_calls": [
"enable_feature_subprocess_calls": [
call("sudo systemctl stop mux.service", shell=True),
call("sudo systemctl disable mux.service", shell=True),
call("sudo systemctl mask mux.service", shell=True),
@ -303,6 +309,9 @@ HOSTCFGD_TEST_VECTOR = [
call("sudo systemctl enable telemetry.timer", shell=True),
call("sudo systemctl start telemetry.timer", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
],
"popen_attributes": {
'communicate.return_value': ('output', 'error')
},
@ -387,7 +396,7 @@ HOSTCFGD_TEST_VECTOR = [
},
},
},
"expected_subprocess_calls": [
"enable_feature_subprocess_calls": [
call("sudo systemctl unmask dhcp_relay.service", shell=True),
call("sudo systemctl enable dhcp_relay.service", shell=True),
call("sudo systemctl start dhcp_relay.service", shell=True),
@ -399,6 +408,9 @@ HOSTCFGD_TEST_VECTOR = [
call("sudo systemctl enable telemetry.timer", shell=True),
call("sudo systemctl start telemetry.timer", shell=True),
],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
],
"popen_attributes": {
'communicate.return_value': ('output', 'error')
},
@ -484,7 +496,9 @@ HOSTCFGD_TEST_VECTOR = [
},
},
},
"expected_subprocess_calls": [
"enable_feature_subprocess_calls": [],
"daemon_reload_subprocess_call": [
call("sudo systemctl daemon-reload", shell=True),
],
"popen_attributes": {
'communicate.return_value': ('enabled', 'error')