[hostcfgd] record feature state in STATE DB (#9842)

- Why I did it
To implement blocking feature state change.

- How I did it
Record the actual feature state in STATE DB from hostcfg.

- How to verify it
UT + verification by running on the switch and checking STATE DB.

Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
This commit is contained in:
Stepan Blyshchak 2022-03-14 13:45:27 +02:00 committed by GitHub
parent 3fa627f290
commit 2919b4820f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 59 additions and 21 deletions

View File

@ -32,7 +32,7 @@ def get_expected_running_containers():
value of field 'has_global_scope', the number of ASICs and the value of field value of field 'has_global_scope', the number of ASICs and the value of field
'has_per_asic_scope'. 'has_per_asic_scope'.
If the device has single ASIC, the container name was put into the list. If the device has single ASIC, the container name was put into the list.
@return: A set which contains the expected running containers and a set that has @return: A set which contains the expected running containers and a set that has
containers marked as "always_enabled". containers marked as "always_enabled".
""" """
config_db = swsssdk.ConfigDBConnector() config_db = swsssdk.ConfigDBConnector()
@ -82,7 +82,7 @@ def get_current_running_from_DB(always_running_containers):
state_db = swsscommon.DBConnector("STATE_DB", 0) state_db = swsscommon.DBConnector("STATE_DB", 0)
tbl = swsscommon.Table(state_db, "FEATURE") tbl = swsscommon.Table(state_db, "FEATURE")
if not tbl.getKeys(): if not tbl.getKeys():
return False, None return running_containers
for name in tbl.getKeys(): for name in tbl.getKeys():
data = dict(tbl.get(name)[1]) data = dict(tbl.get(name)[1])
@ -101,7 +101,7 @@ def get_current_running_from_DB(always_running_containers):
print("Failed to get container '{}'. Error: '{}'".format(name, err)) print("Failed to get container '{}'. Error: '{}'".format(name, err))
pass pass
return True, running_containers return running_containers
def get_current_running_from_dockers(): def get_current_running_from_dockers():
@ -128,13 +128,12 @@ def get_current_running_containers(always_running_containers):
""" """
@summary: This function will get the list of currently running containers. @summary: This function will get the list of currently running containers.
If available in STATE-DB, get from DB else from list of dockers. If available in STATE-DB, get from DB else from list of dockers.
@return: A set of currently running containers. @return: A set of currently running containers.
""" """
ret, current_running_containers = get_current_running_from_DB(always_running_containers) current_running_containers = get_current_running_from_DB(always_running_containers)
if not ret: current_running_containers.update(get_current_running_from_dockers())
current_running_containers = get_current_running_from_dockers()
return current_running_containers return current_running_containers

View File

@ -12,7 +12,7 @@ import signal
import jinja2 import jinja2
from sonic_py_common import device_info from sonic_py_common import device_info
from swsscommon.swsscommon import SubscriberStateTable, DBConnector, Select from swsscommon.swsscommon import SubscriberStateTable, DBConnector, Select
from swsscommon.swsscommon import ConfigDBConnector, TableConsumable from swsscommon.swsscommon import ConfigDBConnector, TableConsumable, Table
# FILE # FILE
PAM_AUTH_CONF = "/etc/pam.d/common-auth-sonic" PAM_AUTH_CONF = "/etc/pam.d/common-auth-sonic"
@ -41,6 +41,7 @@ RADIUS_PAM_AUTH_CONF_DIR = "/etc/pam_radius_auth.d/"
# MISC Constants # MISC Constants
CFG_DB = "CONFIG_DB" CFG_DB = "CONFIG_DB"
STATE_DB = "STATE_DB"
HOSTCFGD_MAX_PRI = 10 # Used to enforce ordering b/w daemons under Hostcfgd HOSTCFGD_MAX_PRI = 10 # Used to enforce ordering b/w daemons under Hostcfgd
DEFAULT_SELECT_TIMEOUT = 1000 DEFAULT_SELECT_TIMEOUT = 1000
@ -166,16 +167,23 @@ class FeatureHandler(object):
SYSTEMD_SYSTEM_DIR = '/etc/systemd/system/' SYSTEMD_SYSTEM_DIR = '/etc/systemd/system/'
SYSTEMD_SERVICE_CONF_DIR = os.path.join(SYSTEMD_SYSTEM_DIR, '{}.service.d/') SYSTEMD_SERVICE_CONF_DIR = os.path.join(SYSTEMD_SYSTEM_DIR, '{}.service.d/')
def __init__(self, config_db, device_config): # Feature state constants
FEATURE_STATE_ENABLED = "enabled"
FEATURE_STATE_DISABLED = "disabled"
FEATURE_STATE_FAILED = "failed"
def __init__(self, config_db, feature_state_table, device_config):
self._config_db = config_db self._config_db = config_db
self._feature_state_table = feature_state_table
self._device_config = device_config self._device_config = device_config
self._cached_config = {} self._cached_config = {}
self.is_multi_npu = device_info.is_multi_npu() self.is_multi_npu = device_info.is_multi_npu()
def handle(self, feature_name, op, feature_cfg): def handle(self, feature_name, op, feature_cfg):
if not feature_cfg: if not feature_cfg:
self._cached_config.pop(feature_name)
syslog.syslog(syslog.LOG_INFO, "Deregistering feature {}".format(feature_name)) syslog.syslog(syslog.LOG_INFO, "Deregistering feature {}".format(feature_name))
self._cached_config.pop(feature_name)
self._feature_state_table._del(feature_name)
return return
feature = Feature(feature_name, feature_cfg, self._device_config) feature = Feature(feature_name, feature_cfg, self._device_config)
@ -253,7 +261,6 @@ class FeatureHandler(object):
return True return True
def update_feature_auto_restart(self, feature, feature_name): def update_feature_auto_restart(self, feature, feature_name):
dir_name = self.SYSTEMD_SERVICE_CONF_DIR.format(feature_name) dir_name = self.SYSTEMD_SERVICE_CONF_DIR.format(feature_name)
auto_restart_conf = os.path.join(dir_name, 'auto_restart.conf') auto_restart_conf = os.path.join(dir_name, 'auto_restart.conf')
@ -341,8 +348,11 @@ class FeatureHandler(object):
except Exception as err: except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started" syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
.format(feature.name, feature_suffixes[-1])) .format(feature.name, feature_suffixes[-1]))
self.set_feature_state(feature, self.FEATURE_STATE_FAILED)
return return
self.set_feature_state(feature, self.FEATURE_STATE_ENABLED)
def disable_feature(self, feature): def disable_feature(self, feature):
cmds = [] cmds = []
feature_names, feature_suffixes = self.get_feature_attribute(feature) feature_names, feature_suffixes = self.get_feature_attribute(feature)
@ -363,11 +373,17 @@ class FeatureHandler(object):
except Exception as err: except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be stopped and disabled" syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be stopped and disabled"
.format(feature.name, feature_suffixes[-1])) .format(feature.name, feature_suffixes[-1]))
self.set_feature_state(feature, self.FEATURE_STATE_FAILED)
return return
self.set_feature_state(feature, self.FEATURE_STATE_DISABLED)
def resync_feature_state(self, feature): def resync_feature_state(self, feature):
self._config_db.mod_entry('FEATURE', feature.name, {'state': feature.state}) self._config_db.mod_entry('FEATURE', feature.name, {'state': feature.state})
def set_feature_state(self, feature, state):
self._feature_state_table.set(feature.name, [('state', state)])
class Iptables(object): class Iptables(object):
def __init__(self): def __init__(self):
@ -914,14 +930,14 @@ class NtpCfg(object):
new_src = data.get('src_intf', '') new_src = data.get('src_intf', '')
new_src_set = set(new_src.split(";")) new_src_set = set(new_src.split(";"))
new_vrf = data.get('vrf', '') new_vrf = data.get('vrf', '')
# Update the Local Cache # Update the Local Cache
self.ntp_global = data self.ntp_global = data
# check if ntp server configured, if not, do nothing # check if ntp server configured, if not, do nothing
if not self.ntp_servers: if not self.ntp_servers:
syslog.syslog(syslog.LOG_INFO, "No ntp server when global config change, do nothing") syslog.syslog(syslog.LOG_INFO, "No ntp server when global config change, do nothing")
return return
if orig_src_set != new_src_set: if orig_src_set != new_src_set:
syslog.syslog(syslog.LOG_INFO, "ntp global update for source intf old {} new {}, restarting ntp-config" syslog.syslog(syslog.LOG_INFO, "ntp global update for source intf old {} new {}, restarting ntp-config"
@ -957,6 +973,7 @@ class HostConfigDaemon:
self.config_db = ConfigDBConnector() self.config_db = ConfigDBConnector()
self.config_db.connect(wait_for_init=True, retry_on=True) self.config_db.connect(wait_for_init=True, retry_on=True)
self.dbconn = DBConnector(CFG_DB, 0) self.dbconn = DBConnector(CFG_DB, 0)
self.state_db_conn = DBConnector(STATE_DB, 0)
self.selector = Select() self.selector = Select()
syslog.syslog(syslog.LOG_INFO, 'ConfigDB connect success') syslog.syslog(syslog.LOG_INFO, 'ConfigDB connect success')
@ -964,6 +981,8 @@ class HostConfigDaemon:
self.callbacks = dict() self.callbacks = dict()
self.subscriber_map = dict() self.subscriber_map = dict()
feature_state_table = Table(self.state_db_conn, 'FEATURE')
# Load DEVICE metadata configurations # Load DEVICE metadata configurations
self.device_config = {} self.device_config = {}
self.device_config['DEVICE_METADATA'] = self.config_db.get_table('DEVICE_METADATA') self.device_config['DEVICE_METADATA'] = self.config_db.get_table('DEVICE_METADATA')
@ -976,7 +995,7 @@ class HostConfigDaemon:
self.iptables = Iptables() self.iptables = Iptables()
# Intialize Feature Handler # Intialize Feature Handler
self.feature_handler = FeatureHandler(self.config_db, self.device_config) self.feature_handler = FeatureHandler(self.config_db, feature_state_table, self.device_config)
self.feature_handler.sync_state_field() self.feature_handler.sync_state_field()
# Initialize Ntp Config Handler # Initialize Ntp Config Handler
@ -987,7 +1006,7 @@ class HostConfigDaemon:
# Initialize AAACfg # Initialize AAACfg
self.hostname_cache="" self.hostname_cache=""
self.aaacfg = AaaCfg() self.aaacfg = AaaCfg()
def load(self): def load(self):
aaa = self.config_db.get_table('AAA') aaa = self.config_db.get_table('AAA')
@ -1004,7 +1023,7 @@ class HostConfigDaemon:
self.hostname_cache = dev_meta['localhost']['hostname'] self.hostname_cache = dev_meta['localhost']['hostname']
except Exception as e: except Exception as e:
pass pass
# Update AAA with the hostname # Update AAA with the hostname
self.aaacfg.hostname_update(self.hostname_cache) self.aaacfg.hostname_update(self.hostname_cache)
@ -1130,7 +1149,7 @@ class HostConfigDaemon:
self.subscribe('VLAN_SUB_INTERFACE', lambda table, key, op, data: self.vlan_sub_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5) self.subscribe('VLAN_SUB_INTERFACE', lambda table, key, op, data: self.vlan_sub_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5)
self.subscribe('PORTCHANNEL_INTERFACE', lambda table, key, op, data: self.portchannel_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5) self.subscribe('PORTCHANNEL_INTERFACE', lambda table, key, op, data: self.portchannel_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5)
self.subscribe('INTERFACE', lambda table, key, op, data: self.phy_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5) self.subscribe('INTERFACE', lambda table, key, op, data: self.phy_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5)
syslog.syslog(syslog.LOG_INFO, syslog.syslog(syslog.LOG_INFO,
"Waiting for systemctl to finish initialization") "Waiting for systemctl to finish initialization")
self.wait_till_system_init_done() self.wait_till_system_init_done()

View File

@ -36,6 +36,7 @@ hostcfgd.ConfigDBConnector = MockConfigDb
hostcfgd.SubscriberStateTable = MockSubscriberStateTable hostcfgd.SubscriberStateTable = MockSubscriberStateTable
hostcfgd.Select = MockSelect hostcfgd.Select = MockSelect
hostcfgd.DBConnector = MockDBConnector hostcfgd.DBConnector = MockDBConnector
hostcfgd.Table = mock.Mock()
class TestHostcfgdRADIUS(TestCase): class TestHostcfgdRADIUS(TestCase):

View File

@ -35,6 +35,7 @@ hostcfgd.ConfigDBConnector = MockConfigDb
hostcfgd.SubscriberStateTable = MockSubscriberStateTable hostcfgd.SubscriberStateTable = MockSubscriberStateTable
hostcfgd.Select = MockSelect hostcfgd.Select = MockSelect
hostcfgd.DBConnector = MockDBConnector hostcfgd.DBConnector = MockDBConnector
hostcfgd.Table = mock.Mock()
class TestHostcfgdTACACS(TestCase): class TestHostcfgdTACACS(TestCase):
""" """
@ -44,7 +45,7 @@ class TestHostcfgdTACACS(TestCase):
return subprocess.check_output('diff -uR {} {} || true'.format(file1, file2), shell=True) return subprocess.check_output('diff -uR {} {} || true'.format(file1, file2), shell=True)
""" """
Check different config Check different config
""" """
def check_config(self, test_name, test_data, config_name): def check_config(self, test_name, test_data, config_name):
t_path = templates_path t_path = templates_path

View File

@ -27,20 +27,23 @@ hostcfgd.ConfigDBConnector = MockConfigDb
hostcfgd.SubscriberStateTable = MockSubscriberStateTable hostcfgd.SubscriberStateTable = MockSubscriberStateTable
hostcfgd.Select = MockSelect hostcfgd.Select = MockSelect
hostcfgd.DBConnector = MockDBConnector hostcfgd.DBConnector = MockDBConnector
hostcfgd.Table = mock.Mock()
class TestHostcfgd(TestCase): class TestHostcfgd(TestCase):
""" """
Test hostcfd daemon - feature Test hostcfd daemon - feature
""" """
def __verify_table(self, table, expected_table): def __verify_table(self, table, feature_state_table, expected_table):
""" """
verify config db tables verify config db tables
Compares Config DB table (FEATURE) with expected output table Compares Config DB table (FEATURE) with expected output table.
Verifies that State DB table (FEATURE) is updated.
Args: Args:
table(dict): Current Config Db table table(dict): Current Config Db table
feature_state_table(Mock): Mocked State DB FEATURE table
expected_table(dict): Expected Config Db table expected_table(dict): Expected Config Db table
Returns: Returns:
@ -48,6 +51,19 @@ class TestHostcfgd(TestCase):
""" """
ddiff = DeepDiff(table, expected_table, ignore_order=True) ddiff = DeepDiff(table, expected_table, ignore_order=True)
print('DIFF:', ddiff) print('DIFF:', ddiff)
def get_state(cfg_state):
""" Translates CONFIG DB state field into STATE DB state field """
if cfg_state == 'always_disabled':
return 'disabled'
elif cfg_state == 'always_enabled':
return 'enabled'
else:
return cfg_state
feature_state_table.set.assert_has_calls([
mock.call(feature, [('state', get_state(table[feature]['state']))]) for feature in table
])
return True if not ddiff else False return True if not ddiff else False
def __verify_fs(self, table): def __verify_fs(self, table):
@ -93,6 +109,7 @@ class TestHostcfgd(TestCase):
fs.add_real_paths(swsscommon_package.__path__) # add real path of swsscommon for database_config.json fs.add_real_paths(swsscommon_package.__path__) # add real path of swsscommon for database_config.json
fs.create_dir(hostcfgd.FeatureHandler.SYSTEMD_SYSTEM_DIR) fs.create_dir(hostcfgd.FeatureHandler.SYSTEMD_SYSTEM_DIR)
MockConfigDb.set_config_db(test_data['config_db']) MockConfigDb.set_config_db(test_data['config_db'])
feature_state_table_mock = mock.Mock()
with mock.patch('hostcfgd.subprocess') as mocked_subprocess: with mock.patch('hostcfgd.subprocess') as mocked_subprocess:
popen_mock = mock.Mock() popen_mock = mock.Mock()
attrs = test_data['popen_attributes'] attrs = test_data['popen_attributes']
@ -102,7 +119,7 @@ class TestHostcfgd(TestCase):
# Initialize Feature Handler # Initialize Feature Handler
device_config = {} device_config = {}
device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA'] device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA']
feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), device_config) feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config)
# sync the state field and Handle Feature Updates # sync the state field and Handle Feature Updates
feature_handler.sync_state_field() feature_handler.sync_state_field()
@ -113,6 +130,7 @@ class TestHostcfgd(TestCase):
# Verify if the updates are properly updated # Verify if the updates are properly updated
assert self.__verify_table( assert self.__verify_table(
MockConfigDb.get_config_db()['FEATURE'], MockConfigDb.get_config_db()['FEATURE'],
feature_state_table_mock,
test_data['expected_config_db']['FEATURE'] test_data['expected_config_db']['FEATURE']
), 'Test failed for test data: {0}'.format(test_data) ), 'Test failed for test data: {0}'.format(test_data)
mocked_subprocess.check_call.assert_has_calls(test_data['expected_subprocess_calls'], any_order=True) mocked_subprocess.check_call.assert_has_calls(test_data['expected_subprocess_calls'], any_order=True)