[hostcfgd] record feature state in STATE DB (#9842)

- Why I did it
To implement blocking feature state change.

- How I did it
Record the actual feature state in STATE DB from hostcfg.

- How to verify it
UT + verification by running on the switch and checking STATE DB.

Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
This commit is contained in:
Stepan Blyshchak 2022-03-14 13:45:27 +02:00 committed by Judy Joseph
parent 7b84abc4d0
commit cec403eec2
5 changed files with 59 additions and 21 deletions

View File

@ -32,7 +32,7 @@ def get_expected_running_containers():
value of field 'has_global_scope', the number of ASICs and the value of field
'has_per_asic_scope'.
If the device has single ASIC, the container name was put into the list.
@return: A set which contains the expected running containers and a set that has
@return: A set which contains the expected running containers and a set that has
containers marked as "always_enabled".
"""
config_db = swsssdk.ConfigDBConnector()
@ -82,7 +82,7 @@ def get_current_running_from_DB(always_running_containers):
state_db = swsscommon.DBConnector("STATE_DB", 0)
tbl = swsscommon.Table(state_db, "FEATURE")
if not tbl.getKeys():
return False, None
return running_containers
for name in tbl.getKeys():
data = dict(tbl.get(name)[1])
@ -101,7 +101,7 @@ def get_current_running_from_DB(always_running_containers):
print("Failed to get container '{}'. Error: '{}'".format(name, err))
pass
return True, running_containers
return running_containers
def get_current_running_from_dockers():
@ -128,13 +128,12 @@ def get_current_running_containers(always_running_containers):
"""
@summary: This function will get the list of currently running containers.
If available in STATE-DB, get from DB else from list of dockers.
@return: A set of currently running containers.
"""
ret, current_running_containers = get_current_running_from_DB(always_running_containers)
if not ret:
current_running_containers = get_current_running_from_dockers()
current_running_containers = get_current_running_from_DB(always_running_containers)
current_running_containers.update(get_current_running_from_dockers())
return current_running_containers

View File

@ -12,7 +12,7 @@ import signal
import jinja2
from sonic_py_common import device_info
from swsscommon.swsscommon import SubscriberStateTable, DBConnector, Select
from swsscommon.swsscommon import ConfigDBConnector, TableConsumable
from swsscommon.swsscommon import ConfigDBConnector, TableConsumable, Table
# FILE
PAM_AUTH_CONF = "/etc/pam.d/common-auth-sonic"
@ -41,6 +41,7 @@ RADIUS_PAM_AUTH_CONF_DIR = "/etc/pam_radius_auth.d/"
# MISC Constants
CFG_DB = "CONFIG_DB"
STATE_DB = "STATE_DB"
HOSTCFGD_MAX_PRI = 10 # Used to enforce ordering b/w daemons under Hostcfgd
DEFAULT_SELECT_TIMEOUT = 1000
@ -166,16 +167,23 @@ class FeatureHandler(object):
SYSTEMD_SYSTEM_DIR = '/etc/systemd/system/'
SYSTEMD_SERVICE_CONF_DIR = os.path.join(SYSTEMD_SYSTEM_DIR, '{}.service.d/')
def __init__(self, config_db, device_config):
# Feature state constants
FEATURE_STATE_ENABLED = "enabled"
FEATURE_STATE_DISABLED = "disabled"
FEATURE_STATE_FAILED = "failed"
def __init__(self, config_db, feature_state_table, device_config):
self._config_db = config_db
self._feature_state_table = feature_state_table
self._device_config = device_config
self._cached_config = {}
self.is_multi_npu = device_info.is_multi_npu()
def handle(self, feature_name, op, feature_cfg):
if not feature_cfg:
self._cached_config.pop(feature_name)
syslog.syslog(syslog.LOG_INFO, "Deregistering feature {}".format(feature_name))
self._cached_config.pop(feature_name)
self._feature_state_table._del(feature_name)
return
feature = Feature(feature_name, feature_cfg, self._device_config)
@ -253,7 +261,6 @@ class FeatureHandler(object):
return True
def update_feature_auto_restart(self, feature, feature_name):
dir_name = self.SYSTEMD_SERVICE_CONF_DIR.format(feature_name)
auto_restart_conf = os.path.join(dir_name, 'auto_restart.conf')
@ -341,8 +348,11 @@ class FeatureHandler(object):
except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
.format(feature.name, feature_suffixes[-1]))
self.set_feature_state(feature, self.FEATURE_STATE_FAILED)
return
self.set_feature_state(feature, self.FEATURE_STATE_ENABLED)
def disable_feature(self, feature):
cmds = []
feature_names, feature_suffixes = self.get_feature_attribute(feature)
@ -363,11 +373,17 @@ class FeatureHandler(object):
except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be stopped and disabled"
.format(feature.name, feature_suffixes[-1]))
self.set_feature_state(feature, self.FEATURE_STATE_FAILED)
return
self.set_feature_state(feature, self.FEATURE_STATE_DISABLED)
def resync_feature_state(self, feature):
self._config_db.mod_entry('FEATURE', feature.name, {'state': feature.state})
def set_feature_state(self, feature, state):
self._feature_state_table.set(feature.name, [('state', state)])
class Iptables(object):
def __init__(self):
@ -914,14 +930,14 @@ class NtpCfg(object):
new_src = data.get('src_intf', '')
new_src_set = set(new_src.split(";"))
new_vrf = data.get('vrf', '')
# Update the Local Cache
self.ntp_global = data
# check if ntp server configured, if not, do nothing
if not self.ntp_servers:
syslog.syslog(syslog.LOG_INFO, "No ntp server when global config change, do nothing")
return
return
if orig_src_set != new_src_set:
syslog.syslog(syslog.LOG_INFO, "ntp global update for source intf old {} new {}, restarting ntp-config"
@ -957,6 +973,7 @@ class HostConfigDaemon:
self.config_db = ConfigDBConnector()
self.config_db.connect(wait_for_init=True, retry_on=True)
self.dbconn = DBConnector(CFG_DB, 0)
self.state_db_conn = DBConnector(STATE_DB, 0)
self.selector = Select()
syslog.syslog(syslog.LOG_INFO, 'ConfigDB connect success')
@ -964,6 +981,8 @@ class HostConfigDaemon:
self.callbacks = dict()
self.subscriber_map = dict()
feature_state_table = Table(self.state_db_conn, 'FEATURE')
# Load DEVICE metadata configurations
self.device_config = {}
self.device_config['DEVICE_METADATA'] = self.config_db.get_table('DEVICE_METADATA')
@ -976,7 +995,7 @@ class HostConfigDaemon:
self.iptables = Iptables()
# Intialize Feature Handler
self.feature_handler = FeatureHandler(self.config_db, self.device_config)
self.feature_handler = FeatureHandler(self.config_db, feature_state_table, self.device_config)
self.feature_handler.sync_state_field()
# Initialize Ntp Config Handler
@ -987,7 +1006,7 @@ class HostConfigDaemon:
# Initialize AAACfg
self.hostname_cache=""
self.aaacfg = AaaCfg()
def load(self):
aaa = self.config_db.get_table('AAA')
@ -1004,7 +1023,7 @@ class HostConfigDaemon:
self.hostname_cache = dev_meta['localhost']['hostname']
except Exception as e:
pass
# Update AAA with the hostname
self.aaacfg.hostname_update(self.hostname_cache)
@ -1130,7 +1149,7 @@ class HostConfigDaemon:
self.subscribe('VLAN_SUB_INTERFACE', lambda table, key, op, data: self.vlan_sub_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5)
self.subscribe('PORTCHANNEL_INTERFACE', lambda table, key, op, data: self.portchannel_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5)
self.subscribe('INTERFACE', lambda table, key, op, data: self.phy_intf_handler(key, op, data), HOSTCFGD_MAX_PRI-5)
syslog.syslog(syslog.LOG_INFO,
"Waiting for systemctl to finish initialization")
self.wait_till_system_init_done()

View File

@ -36,6 +36,7 @@ hostcfgd.ConfigDBConnector = MockConfigDb
hostcfgd.SubscriberStateTable = MockSubscriberStateTable
hostcfgd.Select = MockSelect
hostcfgd.DBConnector = MockDBConnector
hostcfgd.Table = mock.Mock()
class TestHostcfgdRADIUS(TestCase):

View File

@ -35,6 +35,7 @@ hostcfgd.ConfigDBConnector = MockConfigDb
hostcfgd.SubscriberStateTable = MockSubscriberStateTable
hostcfgd.Select = MockSelect
hostcfgd.DBConnector = MockDBConnector
hostcfgd.Table = mock.Mock()
class TestHostcfgdTACACS(TestCase):
"""
@ -44,7 +45,7 @@ class TestHostcfgdTACACS(TestCase):
return subprocess.check_output('diff -uR {} {} || true'.format(file1, file2), shell=True)
"""
Check different config
Check different config
"""
def check_config(self, test_name, test_data, config_name):
t_path = templates_path

View File

@ -27,20 +27,23 @@ hostcfgd.ConfigDBConnector = MockConfigDb
hostcfgd.SubscriberStateTable = MockSubscriberStateTable
hostcfgd.Select = MockSelect
hostcfgd.DBConnector = MockDBConnector
hostcfgd.Table = mock.Mock()
class TestHostcfgd(TestCase):
"""
Test hostcfd daemon - feature
"""
def __verify_table(self, table, expected_table):
def __verify_table(self, table, feature_state_table, expected_table):
"""
verify config db tables
Compares Config DB table (FEATURE) with expected output table
Compares Config DB table (FEATURE) with expected output table.
Verifies that State DB table (FEATURE) is updated.
Args:
table(dict): Current Config Db table
feature_state_table(Mock): Mocked State DB FEATURE table
expected_table(dict): Expected Config Db table
Returns:
@ -48,6 +51,19 @@ class TestHostcfgd(TestCase):
"""
ddiff = DeepDiff(table, expected_table, ignore_order=True)
print('DIFF:', ddiff)
def get_state(cfg_state):
""" Translates CONFIG DB state field into STATE DB state field """
if cfg_state == 'always_disabled':
return 'disabled'
elif cfg_state == 'always_enabled':
return 'enabled'
else:
return cfg_state
feature_state_table.set.assert_has_calls([
mock.call(feature, [('state', get_state(table[feature]['state']))]) for feature in table
])
return True if not ddiff else False
def __verify_fs(self, table):
@ -93,6 +109,7 @@ class TestHostcfgd(TestCase):
fs.add_real_paths(swsscommon_package.__path__) # add real path of swsscommon for database_config.json
fs.create_dir(hostcfgd.FeatureHandler.SYSTEMD_SYSTEM_DIR)
MockConfigDb.set_config_db(test_data['config_db'])
feature_state_table_mock = mock.Mock()
with mock.patch('hostcfgd.subprocess') as mocked_subprocess:
popen_mock = mock.Mock()
attrs = test_data['popen_attributes']
@ -102,7 +119,7 @@ class TestHostcfgd(TestCase):
# Initialize Feature Handler
device_config = {}
device_config['DEVICE_METADATA'] = MockConfigDb.CONFIG_DB['DEVICE_METADATA']
feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), device_config)
feature_handler = hostcfgd.FeatureHandler(MockConfigDb(), feature_state_table_mock, device_config)
# sync the state field and Handle Feature Updates
feature_handler.sync_state_field()
@ -113,6 +130,7 @@ class TestHostcfgd(TestCase):
# Verify if the updates are properly updated
assert self.__verify_table(
MockConfigDb.get_config_db()['FEATURE'],
feature_state_table_mock,
test_data['expected_config_db']['FEATURE']
), 'Test failed for test data: {0}'.format(test_data)
mocked_subprocess.check_call.assert_has_calls(test_data['expected_subprocess_calls'], any_order=True)