From 2919b4820f95bd49ceb579986f5c58b4e3532c6e Mon Sep 17 00:00:00 2001 From: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com> Date: Mon, 14 Mar 2022 13:45:27 +0200 Subject: [PATCH] [hostcfgd] record feature state in STATE DB (#9842) - Why I did it To implement blocking feature state change. - How I did it Record the actual feature state in STATE DB from hostcfg. - How to verify it UT + verification by running on the switch and checking STATE DB. Signed-off-by: Stepan Blyschak --- files/image_config/monit/container_checker | 13 +++---- src/sonic-host-services/scripts/hostcfgd | 39 ++++++++++++++----- .../tests/hostcfgd/hostcfgd_radius_test.py | 1 + .../tests/hostcfgd/hostcfgd_tacacs_test.py | 3 +- .../tests/hostcfgd/hostcfgd_test.py | 24 ++++++++++-- 5 files changed, 59 insertions(+), 21 deletions(-) diff --git a/files/image_config/monit/container_checker b/files/image_config/monit/container_checker index 197e4f04a4..a67a96a0c1 100755 --- a/files/image_config/monit/container_checker +++ b/files/image_config/monit/container_checker @@ -32,7 +32,7 @@ def get_expected_running_containers(): value of field 'has_global_scope', the number of ASICs and the value of field 'has_per_asic_scope'. If the device has single ASIC, the container name was put into the list. - @return: A set which contains the expected running containers and a set that has + @return: A set which contains the expected running containers and a set that has containers marked as "always_enabled". """ config_db = swsssdk.ConfigDBConnector() @@ -82,7 +82,7 @@ def get_current_running_from_DB(always_running_containers): state_db = swsscommon.DBConnector("STATE_DB", 0) tbl = swsscommon.Table(state_db, "FEATURE") if not tbl.getKeys(): - return False, None + return running_containers for name in tbl.getKeys(): data = dict(tbl.get(name)[1]) @@ -101,7 +101,7 @@ def get_current_running_from_DB(always_running_containers): print("Failed to get container '{}'. Error: '{}'".format(name, err)) pass - return True, running_containers + return running_containers def get_current_running_from_dockers(): @@ -128,13 +128,12 @@ def get_current_running_containers(always_running_containers): """ @summary: This function will get the list of currently running containers. If available in STATE-DB, get from DB else from list of dockers. - + @return: A set of currently running containers. """ - ret, current_running_containers = get_current_running_from_DB(always_running_containers) - if not ret: - current_running_containers = get_current_running_from_dockers() + current_running_containers = get_current_running_from_DB(always_running_containers) + current_running_containers.update(get_current_running_from_dockers()) return current_running_containers diff --git a/src/sonic-host-services/scripts/hostcfgd b/src/sonic-host-services/scripts/hostcfgd index 9b39fb5eb8..be8317259e 100755 --- a/src/sonic-host-services/scripts/hostcfgd +++ b/src/sonic-host-services/scripts/hostcfgd @@ -12,7 +12,7 @@ import signal import jinja2 from sonic_py_common import device_info from swsscommon.swsscommon import SubscriberStateTable, DBConnector, Select -from swsscommon.swsscommon import ConfigDBConnector, TableConsumable +from swsscommon.swsscommon import ConfigDBConnector, TableConsumable, Table # FILE PAM_AUTH_CONF = "/etc/pam.d/common-auth-sonic" @@ -41,6 +41,7 @@ RADIUS_PAM_AUTH_CONF_DIR = "/etc/pam_radius_auth.d/" # MISC Constants CFG_DB = "CONFIG_DB" +STATE_DB = "STATE_DB" HOSTCFGD_MAX_PRI = 10 # Used to enforce ordering b/w daemons under Hostcfgd DEFAULT_SELECT_TIMEOUT = 1000 @@ -166,16 +167,23 @@ class FeatureHandler(object): SYSTEMD_SYSTEM_DIR = '/etc/systemd/system/' SYSTEMD_SERVICE_CONF_DIR = os.path.join(SYSTEMD_SYSTEM_DIR, '{}.service.d/') - def __init__(self, config_db, device_config): + # Feature state constants + FEATURE_STATE_ENABLED = "enabled" + FEATURE_STATE_DISABLED = "disabled" + FEATURE_STATE_FAILED = "failed" + + def __init__(self, config_db, feature_state_table, device_config): self._config_db = config_db + self._feature_state_table = feature_state_table self._device_config = device_config self._cached_config = {} self.is_multi_npu = device_info.is_multi_npu() def handle(self, feature_name, op, feature_cfg): if not feature_cfg: - self._cached_config.pop(feature_name) syslog.syslog(syslog.LOG_INFO, "Deregistering feature {}".format(feature_name)) + self._cached_config.pop(feature_name) + self._feature_state_table._del(feature_name) return feature = Feature(feature_name, feature_cfg, self._device_config) @@ -253,7 +261,6 @@ class FeatureHandler(object): return True def update_feature_auto_restart(self, feature, feature_name): - dir_name = self.SYSTEMD_SERVICE_CONF_DIR.format(feature_name) auto_restart_conf = os.path.join(dir_name, 'auto_restart.conf') @@ -341,8 +348,11 @@ class FeatureHandler(object): except Exception as err: syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started" .format(feature.name, feature_suffixes[-1])) + self.set_feature_state(feature, self.FEATURE_STATE_FAILED) return + self.set_feature_state(feature, self.FEATURE_STATE_ENABLED) + def disable_feature(self, feature): cmds = [] feature_names, feature_suffixes = self.get_feature_attribute(feature) @@ -363,11 +373,17 @@ class FeatureHandler(object): except Exception as err: syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be stopped and disabled" .format(feature.name, feature_suffixes[-1])) + self.set_feature_state(feature, self.FEATURE_STATE_FAILED) return + self.set_feature_state(feature, self.FEATURE_STATE_DISABLED) + def resync_feature_state(self, feature): self._config_db.mod_entry('FEATURE', feature.name, {'state': feature.state}) + def set_feature_state(self, feature, state): + self._feature_state_table.set(feature.name, [('state', state)]) + class Iptables(object): def __init__(self): @@ -914,14 +930,14 @@ class NtpCfg(object): new_src = data.get('src_intf', '') new_src_set = set(new_src.split(";")) new_vrf = data.get('vrf', '') - + # Update the Local Cache self.ntp_global = data # check if ntp server configured, if not, do nothing if not self.ntp_servers: syslog.syslog(syslog.LOG_INFO, "No ntp server when global config change, do nothing") - return + return if orig_src_set != new_src_set: syslog.syslog(syslog.LOG_INFO, "ntp global update for source intf old {} new {}, restarting ntp-config" @@ -957,6 +973,7 @@ class HostConfigDaemon: self.config_db = ConfigDBConnector() self.config_db.connect(wait_for_init=True, retry_on=True) self.dbconn = DBConnector(CFG_DB, 0) + self.state_db_conn = DBConnector(STATE_DB, 0) self.selector = Select() syslog.syslog(syslog.LOG_INFO, 'ConfigDB connect success') @@ -964,6 +981,8 @@ class HostConfigDaemon: self.callbacks = dict() self.subscriber_map = dict() + feature_state_table = Table(self.state_db_conn, 'FEATURE') + # Load DEVICE metadata configurations self.device_config = {} self.device_config['DEVICE_METADATA'] = self.config_db.get_table('DEVICE_METADATA') @@ -976,7 +995,7 @@ class HostConfigDaemon: self.iptables = Iptables() # Intialize Feature Handler - self.feature_handler = FeatureHandler(self.config_db, self.device_config) + self.feature_handler = FeatureHandler(self.config_db, feature_state_table, self.device_config) self.feature_handler.sync_state_field() # Initialize Ntp Config Handler @@ -987,7 +1006,7 @@ class HostConfigDaemon: # Initialize AAACfg self.hostname_cache="" self.aaacfg = AaaCfg() - + def load(self): aaa = self.config_db.get_table('AAA') @@ -1004,7 +1023,7 @@ class HostConfigDaemon: self.hostname_cache = dev_meta['localhost']['hostname'] except Exception as e: pass - + # Update AAA with the hostname self.aaacfg.hostname_update(self.hostname_cache) @@ -1130,7 +1149,7 @@ class HostConfigDaemon: self.subscribe('VLAN_SUB_INTERFACE', lambda table, key, op, data: self.vlan_sub_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5) self.subscribe('PORTCHANNEL_INTERFACE', lambda table, key, op, data: self.portchannel_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5) self.subscribe('INTERFACE', lambda table, key, op, data: self.phy_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5) - + syslog.syslog(syslog.LOG_INFO, "Waiting for systemctl to finish initialization") self.wait_till_system_init_done() diff --git a/src/sonic-host-services/tests/hostcfgd/hostcfgd_radius_test.py b/src/sonic-host-services/tests/hostcfgd/hostcfgd_radius_test.py index 4e3d186481..9738f16852 100644 --- a/src/sonic-host-services/tests/hostcfgd/hostcfgd_radius_test.py +++ b/src/sonic-host-services/tests/hostcfgd/hostcfgd_radius_test.py @@ -36,6 +36,7 @@ hostcfgd.ConfigDBConnector = MockConfigDb hostcfgd.SubscriberStateTable = MockSubscriberStateTable hostcfgd.Select = MockSelect hostcfgd.DBConnector = MockDBConnector +hostcfgd.Table = mock.Mock() class TestHostcfgdRADIUS(TestCase): diff --git a/src/sonic-host-services/tests/hostcfgd/hostcfgd_tacacs_test.py b/src/sonic-host-services/tests/hostcfgd/hostcfgd_tacacs_test.py index 3cc3504d60..18bf5c17e6 100644 --- a/src/sonic-host-services/tests/hostcfgd/hostcfgd_tacacs_test.py +++ b/src/sonic-host-services/tests/hostcfgd/hostcfgd_tacacs_test.py @@ -35,6 +35,7 @@ hostcfgd.ConfigDBConnector = MockConfigDb hostcfgd.SubscriberStateTable = MockSubscriberStateTable hostcfgd.Select = MockSelect hostcfgd.DBConnector = MockDBConnector +hostcfgd.Table = mock.Mock() class TestHostcfgdTACACS(TestCase): """ @@ -44,7 +45,7 @@ class TestHostcfgdTACACS(TestCase): return subprocess.check_output('diff -uR {} {} || true'.format(file1, file2), shell=True) """ - Check different config + Check different config """ def check_config(self, test_name, test_data, config_name): t_path = templates_path diff --git a/src/sonic-host-services/tests/hostcfgd/hostcfgd_test.py b/src/sonic-host-services/tests/hostcfgd/hostcfgd_test.py index bbce866e23..db9a35075a 100644 --- a/src/sonic-host-services/tests/hostcfgd/hostcfgd_test.py +++ b/src/sonic-host-services/tests/hostcfgd/hostcfgd_test.py @@ -27,20 +27,23 @@ hostcfgd.ConfigDBConnector = MockConfigDb hostcfgd.SubscriberStateTable = MockSubscriberStateTable hostcfgd.Select = MockSelect hostcfgd.DBConnector = MockDBConnector +hostcfgd.Table = mock.Mock() class TestHostcfgd(TestCase): """ Test hostcfd daemon - feature """ - def __verify_table(self, table, expected_table): + def __verify_table(self, table, feature_state_table, expected_table): """ verify config db tables - Compares Config DB table (FEATURE) with expected output table + Compares Config DB table (FEATURE) with expected output table. + Verifies that State DB table (FEATURE) is updated. Args: table(dict): Current Config Db table + feature_state_table(Mock): Mocked State DB FEATURE table expected_table(dict): Expected Config Db table Returns: @@ -48,6 +51,19 @@ class TestHostcfgd(TestCase): """ ddiff = DeepDiff(table, expected_table, ignore_order=True) print('DIFF:', ddiff) + + def get_state(cfg_state): + """ Translates CONFIG DB state field into STATE DB state field """ + if cfg_state == 'always_disabled': + return 'disabled' + elif cfg_state == 'always_enabled': + return 'enabled' + else: + return cfg_state + + feature_state_table.set.assert_has_calls([ + mock.call(feature, [('state', get_state(table[feature]['state']))]) for feature in table + ]) return True if not ddiff else False def __verify_fs(self, table): @@ -93,6 +109,7 @@ class TestHostcfgd(TestCase): fs.add_real_paths(swsscommon_package.__path__) # add real path of swsscommon for database_config.json fs.create_dir(hostcfgd.FeatureHandler.SYSTEMD_SYSTEM_DIR) MockConfigDb.set_config_db(test_data['config_db']) + feature_state_table_mock = mock.Mock() with mock.patch('hostcfgd.subprocess') as mocked_subprocess: popen_mock = mock.Mock() attrs = test_data['popen_attributes'] @@ -102,7 +119,7 @@ class TestHostcfgd(TestCase): # Initialize Feature Handler device_config = {} device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA'] - feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), device_config) + feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config) # sync the state field and Handle Feature Updates feature_handler.sync_state_field() @@ -113,6 +130,7 @@ class TestHostcfgd(TestCase): # Verify if the updates are properly updated assert self.__verify_table( MockConfigDb.get_config_db()['FEATURE'], + feature_state_table_mock, test_data['expected_config_db']['FEATURE'] ), 'Test failed for test data: {0}'.format(test_data) mocked_subprocess.check_call.assert_has_calls(test_data['expected_subprocess_calls'], any_order=True)