[hostcfgd]: Add Ability To Configure Feature During Run-time (#6700)

Features may be enabled/disabled for the same topology based on run-time
configuration. This PR adds the ability to enable/disable feature based
on config db data.

signed-off-by: Tamer Ahmed <tamer.ahmed@microsoft.com>
This commit is contained in:
Tamer Ahmed 2021-03-13 05:56:27 -08:00 committed by GitHub
parent 7f52abc621
commit 51ab39fcb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 494 additions and 60 deletions

View File

@ -9,5 +9,5 @@ with open(INIT_CFG_FILE_PATH) as init_cfg_file:
init_cfg = json.load(init_cfg_file) init_cfg = json.load(init_cfg_file)
if 'FEATURE' in init_cfg: if 'FEATURE' in init_cfg:
for feature_name, feature_props in init_cfg['FEATURE'].items(): for feature_name, feature_props in init_cfg['FEATURE'].items():
if 'state' in feature_props and feature_props['state'] == 'disabled': if 'state' in feature_props and feature_props['state'] != 'enabled' and feature_props['state'] != 'always_enabled':
subprocess.run(['systemctl', 'mask', '{}.service'.format(feature_name)]) subprocess.run(['systemctl', 'mask', '{}.service'.format(feature_name)])

View File

@ -12,5 +12,6 @@ dist/
# Unit test coverage # Unit test coverage
.coverage .coverage
.pytest_cache/
coverage.xml coverage.xml
htmlcov/ htmlcov/

View File

View File

@ -1,2 +1,2 @@
[pytest] [pytest]
addopts = --cov=scripts --cov-report html --cov-report term --cov-report xml addopts = --cov=scripts --cov-report html --cov-report term --cov-report xml --ignore=tests/hostcfgd/test_vectors.py

View File

@ -394,6 +394,10 @@ class HostConfigDaemon:
self.config_db.connect(wait_for_init=True, retry_on=True) self.config_db.connect(wait_for_init=True, retry_on=True)
syslog.syslog(syslog.LOG_INFO, 'ConfigDB connect success') syslog.syslog(syslog.LOG_INFO, 'ConfigDB connect success')
# Load DEVICE metadata configurations
self.device_config = {}
self.device_config['DEVICE_METADATA'] = self.config_db.get_table('DEVICE_METADATA')
self.aaacfg = AaaCfg() self.aaacfg = AaaCfg()
self.iptables = Iptables() self.iptables = Iptables()
self.ntpcfg = NtpCfg(self.config_db) self.ntpcfg = NtpCfg(self.config_db)
@ -421,75 +425,106 @@ class HostConfigDaemon:
ntp_global = self.config_db.get_table('NTP') ntp_global = self.config_db.get_table('NTP')
self.ntpcfg.load(ntp_global, ntp_server) self.ntpcfg.load(ntp_global, ntp_server)
def update_feature_state(self, feature_name, state, feature_table): def get_target_state(self, feature_name, state):
if self.cached_feature_states[feature_name] == "always_enabled": template = jinja2.Template(state)
if state != "always_enabled": target_state = template.render(self.device_config)
syslog.syslog(syslog.LOG_INFO, "Feature '{}' service is always enabled" entry = self.config_db.get_entry('FEATURE', feature_name)
.format(feature_name)) entry["state"] = target_state
entry = self.config_db.get_entry('FEATURE', feature_name) self.config_db.set_entry("FEATURE", feature_name, entry)
entry['state'] = 'always_enabled'
self.config_db.set_entry('FEATURE', feature_name, entry)
return
self.cached_feature_states[feature_name] = state return target_state
def get_feature_attribute(self, feature_name, feature_table):
has_timer = ast.literal_eval(feature_table[feature_name].get('has_timer', 'False')) has_timer = ast.literal_eval(feature_table[feature_name].get('has_timer', 'False'))
has_global_scope = ast.literal_eval(feature_table[feature_name].get('has_global_scope', 'True')) has_global_scope = ast.literal_eval(feature_table[feature_name].get('has_global_scope', 'True'))
has_per_asic_scope = ast.literal_eval(feature_table[feature_name].get('has_per_asic_scope', 'False')) has_per_asic_scope = ast.literal_eval(feature_table[feature_name].get('has_per_asic_scope', 'False'))
# Create feature name suffix depending feature is running in host or namespace or in both # Create feature name suffix depending feature is running in host or namespace or in both
feature_name_suffix_list = (([feature_name] if has_global_scope or not self.is_multi_npu else []) + feature_names = (
([(feature_name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus()) ([feature_name] if has_global_scope or not self.is_multi_npu else []) +
if has_per_asic_scope and self.is_multi_npu])) ([(feature_name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus())
if has_per_asic_scope and self.is_multi_npu])
)
if not feature_name_suffix_list: if not feature_names:
syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available" syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available"
.format(feature_name)) .format(feature_name))
feature_suffixes = ["service"] + (["timer"] if has_timer else []) feature_suffixes = ["service"] + (["timer"] if has_timer else [])
if state == "enabled": return feature_names, feature_suffixes
start_cmds = []
for feature_name_suffix in feature_name_suffix_list:
for suffix in feature_suffixes:
start_cmds.append("sudo systemctl unmask {}.{}".format(feature_name_suffix, suffix))
# If feature has timer associated with it, start/enable corresponding systemd .timer unit
# otherwise, start/enable corresponding systemd .service unit
start_cmds.append("sudo systemctl enable {}.{}".format(feature_name_suffix, feature_suffixes[-1]))
start_cmds.append("sudo systemctl start {}.{}".format(feature_name_suffix, feature_suffixes[-1]))
for cmd in start_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
.format(feature_name, feature_suffixes[-1]))
return
syslog.syslog(syslog.LOG_INFO, "Feature '{}.{}' is enabled and started"
.format(feature_name, feature_suffixes[-1]))
elif state == "disabled":
stop_cmds = []
for feature_name_suffix in feature_name_suffix_list:
for suffix in reversed(feature_suffixes):
stop_cmds.append("sudo systemctl stop {}.{}".format(feature_name_suffix, suffix))
stop_cmds.append("sudo systemctl disable {}.{}".format(feature_name_suffix, suffix))
stop_cmds.append("sudo systemctl mask {}.{}".format(feature_name_suffix, suffix))
for cmd in stop_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to be stopped and disabled".format(feature_name))
return
syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name))
else:
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature '{}'"
.format(state, feature_name))
def enable_feature(self, feature_names, feature_suffixes):
start_cmds = []
for feature_name in feature_names:
for suffix in feature_suffixes:
start_cmds.append("sudo systemctl unmask {}.{}".format(feature_name, suffix))
# If feature has timer associated with it, start/enable corresponding systemd .timer unit
# otherwise, start/enable corresponding systemd .service unit
start_cmds.append("sudo systemctl enable {}.{}".format(feature_name, feature_suffixes[-1]))
start_cmds.append("sudo systemctl start {}.{}".format(feature_name, feature_suffixes[-1]))
for cmd in start_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
.format(feature_name, feature_suffixes[-1]))
return
def disable_feature(self, feature_names, feature_suffixes):
stop_cmds = []
for feature_name in feature_names:
for suffix in reversed(feature_suffixes):
stop_cmds.append("sudo systemctl stop {}.{}".format(feature_name, suffix))
stop_cmds.append("sudo systemctl disable {}.{}".format(feature_name, suffix))
stop_cmds.append("sudo systemctl mask {}.{}".format(feature_name, suffix))
for cmd in stop_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to be stopped and disabled".format(feature_name))
return
def is_invariant_feature(self, feature_name, state, feature_table):
invariant_feature = self.cached_feature_states[feature_name] == "always_enabled" or \
self.cached_feature_states[feature_name] == "always_disabled"
if invariant_feature:
invariant_state = self.cached_feature_states[feature_name]
if state != invariant_state:
syslog.syslog(syslog.LOG_INFO, "Feature '{}' service is '{}'"
.format(feature_name, invariant_state))
entry = self.config_db.get_entry('FEATURE', feature_name)
entry['state'] = invariant_state
self.config_db.set_entry('FEATURE', feature_name, entry)
if state == "always_disabled":
feature_names, feature_suffixes = self.get_feature_attribute(feature_name, feature_table)
self.disable_feature(feature_names, feature_suffixes)
syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name))
return invariant_feature
def update_feature_state(self, feature_name, state, feature_table):
if not self.is_invariant_feature(feature_name, state, feature_table):
self.cached_feature_states[feature_name] = state
feature_names, feature_suffixes = self.get_feature_attribute(feature_name, feature_table)
if state == "enabled":
self.enable_feature(feature_names, feature_suffixes)
syslog.syslog(syslog.LOG_INFO, "Feature '{}.{}' is enabled and started"
.format(feature_name, feature_suffixes[-1]))
elif state == "disabled":
self.disable_feature(feature_names, feature_suffixes)
syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name))
else:
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature '{}'"
.format(state, feature_name))
def update_all_feature_states(self): def update_all_feature_states(self):
feature_table = self.config_db.get_table('FEATURE') feature_table = self.config_db.get_table('FEATURE')
@ -500,13 +535,14 @@ class HostConfigDaemon:
state = feature_table[feature_name]['state'] state = feature_table[feature_name]['state']
if not state: if not state:
syslog.syslog(syslog.LOG_WARNING, "Eanble state of feature '{}' is None".format(feature_name)) syslog.syslog(syslog.LOG_WARNING, "Enable state of feature '{}' is None".format(feature_name))
continue continue
target_state = self.get_target_state(feature_name, state)
# Store the initial value of 'state' field in 'FEATURE' table of a specific container # Store the initial value of 'state' field in 'FEATURE' table of a specific container
self.cached_feature_states[feature_name] = state self.cached_feature_states[feature_name] = target_state
self.update_feature_state(feature_name, state, feature_table) self.update_feature_state(feature_name, target_state, feature_table)
def aaa_handler(self, key, data): def aaa_handler(self, key, data):
self.aaacfg.aaa_update(key, data) self.aaacfg.aaa_update(key, data)

View File

@ -0,0 +1,78 @@
import importlib.machinery
import importlib.util
import os
import sys
import swsssdk
from parameterized import parameterized
from unittest import TestCase, mock
from tests.hostcfgd.test_vectors import HOSTCFGD_TEST_VECTOR
from tests.hostcfgd.mock_configdb import MockConfigDb
swsssdk.ConfigDBConnector = MockConfigDb
test_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
modules_path = os.path.dirname(test_path)
scripts_path = os.path.join(modules_path, "scripts")
sys.path.insert(0, modules_path)
# Load the file under test
hostcfgd_path = os.path.join(scripts_path, 'hostcfgd')
loader = importlib.machinery.SourceFileLoader('hostcfgd', hostcfgd_path)
spec = importlib.util.spec_from_loader(loader.name, loader)
hostcfgd = importlib.util.module_from_spec(spec)
loader.exec_module(hostcfgd)
sys.modules['hostcfgd'] = hostcfgd
class TestHostcfgd(TestCase):
"""
Test hostcfd daemon - feature
"""
def __verify_table(self, table, expected_table):
"""
verify config db tables
Compares Config DB table (FEATURE) with expected output table
Args:
table(dict): Current Config Db table
expected_table(dict): Expected Config Db table
Returns:
None
"""
is_equal = len(table) == len(expected_table)
if is_equal:
for key, fields in expected_table.items():
is_equal = is_equal and key in table and len(fields) == len(table[key])
if is_equal:
for field, value in fields.items():
is_equal = is_equal and value == table[key][field]
if not is_equal:
break;
else:
break
return is_equal
@parameterized.expand(HOSTCFGD_TEST_VECTOR)
def test_hostcfgd(self, test_name, test_data):
"""
Test hostcfd daemon initialization
Args:
test_name(str): test name
test_data(dict): test data which contains initial Config Db tables, and expected results
Returns:
None
"""
MockConfigDb.set_config_db(test_data["config_db"])
with mock.patch("hostcfgd.subprocess") as mocked_subprocess:
host_config_daemon = hostcfgd.HostConfigDaemon()
host_config_daemon.update_all_feature_states()
assert self.__verify_table(
MockConfigDb.get_config_db()["FEATURE"],
test_data["expected_config_db"]["FEATURE"]
), "Test failed for test data: {0}".format(test_data)
mocked_subprocess.check_call.assert_has_calls(test_data["expected_subprocess_calls"], any_order=True)

View File

@ -0,0 +1,32 @@
class MockConfigDb(object):
"""
Mock Config DB which responds to data tables requests and store updates to the data table
"""
STATE_DB = None
CONFIG_DB = None
def __init__(self):
pass
@staticmethod
def set_config_db(test_config_db):
MockConfigDb.CONFIG_DB = test_config_db
@staticmethod
def get_config_db():
return MockConfigDb.CONFIG_DB
def connect(self, wait_for_init=True, retry_on=True):
pass
def get(self, db_id, key, field):
return MockConfigDb.CONFIG_DB[key][field]
def get_entry(self, key, field):
return MockConfigDb.CONFIG_DB[key][field]
def set_entry(self, key, field, data):
MockConfigDb.CONFIG_DB[key][field] = data
def get_table(self, table_name):
return MockConfigDb.CONFIG_DB[table_name]

View File

@ -0,0 +1,287 @@
from unittest.mock import call
"""
hostcfgd test vector
"""
HOSTCFGD_TEST_VECTOR = [
[
"DualTorCase",
{
"config_db": {
"DEVICE_METADATA": {
"localhost": {
"subtype": "DualToR",
"type": "ToRRouter",
}
},
"KDUMP": {
"config": {
"enabled": "false",
"num_dumps": "3",
"memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M"
}
},
"FEATURE": {
"dhcp_relay": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled"
},
"mux": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "local",
"state": "{% if 'subtype' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['subtype'] == 'DualToR' %}enabled{% else %}always_disabled{% endif %}"
},
"telemetry": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "True",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled",
"status": "enabled"
},
},
},
"expected_config_db": {
"FEATURE": {
"dhcp_relay": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled"
},
"mux": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "local",
"state": "enabled"
},
"telemetry": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "True",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled",
"status": "enabled"
},
},
},
"expected_subprocess_calls": [
call("sudo systemctl unmask dhcp_relay.service", shell=True),
call("sudo systemctl enable dhcp_relay.service", shell=True),
call("sudo systemctl start dhcp_relay.service", shell=True),
call("sudo systemctl unmask mux.service", shell=True),
call("sudo systemctl enable mux.service", shell=True),
call("sudo systemctl start mux.service", shell=True),
call("sudo systemctl unmask telemetry.service", shell=True),
call("sudo systemctl unmask telemetry.timer", shell=True),
call("sudo systemctl enable telemetry.timer", shell=True),
call("sudo systemctl start telemetry.timer", shell=True),
],
},
],
[
"SingleToRCase",
{
"config_db": {
"DEVICE_METADATA": {
"localhost": {
"type": "ToR",
}
},
"KDUMP": {
"config": {
"enabled": "false",
"num_dumps": "3",
"memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M"
}
},
"FEATURE": {
"dhcp_relay": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled"
},
"mux": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "local",
"state": "{% if 'subtype' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['subtype'] == 'DualToR' %}enabled{% else %}always_disabled{% endif %}"
},
"telemetry": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "True",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled",
"status": "enabled"
},
},
},
"expected_config_db": {
"FEATURE": {
"dhcp_relay": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled"
},
"mux": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "local",
"state": "always_disabled"
},
"telemetry": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "True",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled",
"status": "enabled"
},
},
},
"expected_subprocess_calls": [
call("sudo systemctl unmask dhcp_relay.service", shell=True),
call("sudo systemctl enable dhcp_relay.service", shell=True),
call("sudo systemctl start dhcp_relay.service", shell=True),
call("sudo systemctl stop mux.service", shell=True),
call("sudo systemctl disable mux.service", shell=True),
call("sudo systemctl mask mux.service", shell=True),
call("sudo systemctl unmask telemetry.service", shell=True),
call("sudo systemctl unmask telemetry.timer", shell=True),
call("sudo systemctl enable telemetry.timer", shell=True),
call("sudo systemctl start telemetry.timer", shell=True),
],
},
],
[
"T1Case",
{
"config_db": {
"DEVICE_METADATA": {
"localhost": {
"type": "T1",
}
},
"KDUMP": {
"config": {
"enabled": "false",
"num_dumps": "3",
"memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M"
}
},
"FEATURE": {
"dhcp_relay": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled"
},
"mux": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "local",
"state": "{% if 'subtype' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['subtype'] == 'DualToR' %}enabled{% else %}always_disabled{% endif %}"
},
"telemetry": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "True",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled",
"status": "enabled"
},
},
},
"expected_config_db": {
"FEATURE": {
"dhcp_relay": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled"
},
"mux": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "False",
"high_mem_alert": "disabled",
"set_owner": "local",
"state": "always_disabled"
},
"telemetry": {
"auto_restart": "enabled",
"has_global_scope": "True",
"has_per_asic_scope": "False",
"has_timer": "True",
"high_mem_alert": "disabled",
"set_owner": "kube",
"state": "enabled",
"status": "enabled"
},
},
},
"expected_subprocess_calls": [
call("sudo systemctl unmask dhcp_relay.service", shell=True),
call("sudo systemctl enable dhcp_relay.service", shell=True),
call("sudo systemctl start dhcp_relay.service", shell=True),
call("sudo systemctl stop mux.service", shell=True),
call("sudo systemctl disable mux.service", shell=True),
call("sudo systemctl mask mux.service", shell=True),
call("sudo systemctl unmask telemetry.service", shell=True),
call("sudo systemctl unmask telemetry.timer", shell=True),
call("sudo systemctl enable telemetry.timer", shell=True),
call("sudo systemctl start telemetry.timer", shell=True),
],
},
],
]