[hostcfgd] Configure service auto-restart in hostcfgd. (#5744)

Before this change, a process running inside every SONiC container dealt with FEATURE table 'auto_restart' field and depending on the value decided whether a container has to be killed or not.
If killed service auto restart mechanism restarts the container.
This change moves the logic from container to the host daemon - hostcfgd.
The 'auto_restart' handling is kept in supervisor-proc-exit-listener but now it is not required for container that wants to support auto restart feature.

hostcfgd refactoring - move feature handling in another class.
override systemd service Restart= setting from hostcfgd.
remove default systemd Restart=always.
Signed-off-by: Stepan Blyshchak stepanb@nvidia.com

- Why I did it

Remove the need to deal with container orchestration logic from the container itself. Leave this logic to the orchestrator - host OS.

- How I did it

hostcfgd configures 'Restart=' value for systemd service.

- How to verify it

root@r-tigon-11:/home/admin# sudo config feature autorestart lldp enabled
root@r-tigon-11:/home/admin# show feature status | grep lldp
lldp            enabled   enabled
root@r-tigon-11:/home/admin# docker exec -it lldp pkill -9 lldpd
root@r-tigon-11:/home/admin# docker ps -a | grep lldp
65058396277c        docker-lldp:latest                   "/usr/bin/docker-lld…"   2 days ago          Exited (0) 20 seconds ago                       lldp
root@r-tigon-11:/home/admin# docker ps -a | grep lldp
65058396277c        docker-lldp:latest                   "/usr/bin/docker-lld…"   2 days ago          Up 5 seconds                            lldp
root@r-tigon-11:/home/admin# sudo config feature autorestart lldp disabled
root@r-tigon-11:/home/admin# docker exec -it lldp pkill -9 lldpd
root@r-tigon-11:/home/admin# docker ps -a | grep lldp
65058396277c        docker-lldp:latest                   "/usr/bin/docker-lld…"   2 days ago          Up 35 seconds                           lldp
root@r-tigon-11:/home/admin# docker ps -a | grep lldp
65058396277c        docker-lldp:latest                   "/usr/bin/docker-lld…"   2 days ago          Exited (0) 3 seconds ago                       lldp
root@r-tigon-11:/home/admin# docker ps -a | grep lldp
65058396277c        docker-lldp:latest                   "/usr/bin/docker-lld…"   2 days ago          Exited (0) 39 seconds ago                       lldp
root@r-tigon-11:/home/admin#
This commit is contained in:
Stepan Blyshchak 2021-06-29 19:06:21 +03:00 committed by GitHub
parent abbf627b8a
commit 9ce7c6d9fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 295 additions and 171 deletions

View File

@ -13,7 +13,6 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start
ExecStart=/usr/bin/{{ docker_container_name }}.sh wait ExecStart=/usr/bin/{{ docker_container_name }}.sh wait
ExecStop=/usr/bin/{{ docker_container_name }}.sh stop ExecStop=/usr/bin/{{ docker_container_name }}.sh stop
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -13,7 +13,6 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -16,7 +16,6 @@ ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start{% if multi_instan
ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %} ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %}
ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %} ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %}
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -17,7 +17,6 @@ User=root
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %}
ExecStart=/usr/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %} ExecStart=/usr/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %}
ExecStop=/usr/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %} ExecStop=/usr/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %}
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -19,7 +19,6 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %}
ExecStart=/usr/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %} ExecStart=/usr/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %}
ExecStop=/usr/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %} ExecStop=/usr/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %}
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -24,7 +24,6 @@ Environment=sonic_asic_platform={{ sonic_asic_platform }}
ExecStartPre=/usr/local/bin/swss.sh start{% if multi_instance == 'true' %} %i{% endif %} ExecStartPre=/usr/local/bin/swss.sh start{% if multi_instance == 'true' %} %i{% endif %}
ExecStart=/usr/local/bin/swss.sh wait{% if multi_instance == 'true' %} %i{% endif %} ExecStart=/usr/local/bin/swss.sh wait{% if multi_instance == 'true' %} %i{% endif %}
ExecStop=/usr/local/bin/swss.sh stop{% if multi_instance == 'true' %} %i{% endif %} ExecStop=/usr/local/bin/swss.sh stop{% if multi_instance == 'true' %} %i{% endif %}
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -18,7 +18,6 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %} ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %}
ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %} ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %}
ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %} ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %}
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -16,7 +16,6 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -13,7 +13,6 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/local/bin/{{ docker_container_name }}.sh start ExecStartPre=/usr/local/bin/{{ docker_container_name }}.sh start
ExecStart=/usr/local/bin/{{ docker_container_name }}.sh wait ExecStart=/usr/local/bin/{{ docker_container_name }}.sh wait
ExecStop=/usr/local/bin/{{ docker_container_name }}.sh stop ExecStop=/usr/local/bin/{{ docker_container_name }}.sh stop
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -11,7 +11,6 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -13,7 +13,6 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -13,7 +13,6 @@ User=root
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start chassisdb ExecStartPre=/usr/bin/{{docker_container_name}}.sh start chassisdb
ExecStart=/usr/bin/{{docker_container_name}}.sh wait chassisdb ExecStart=/usr/bin/{{docker_container_name}}.sh wait chassisdb
ExecStop=/usr/bin/{{docker_container_name}}.sh stop chassisdb ExecStop=/usr/bin/{{docker_container_name}}.sh stop chassisdb
Restart=always
RestartSec=30 RestartSec=30
[Install] [Install]

View File

@ -13,5 +13,4 @@ StartLimitBurst=3
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30 RestartSec=30

View File

@ -13,5 +13,4 @@ User={{ sonicadmin_user }}
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30 RestartSec=30

View File

@ -36,16 +36,30 @@ RADIUS_SERVER_TIMEOUT_DEFAULT = "5"
RADIUS_SERVER_AUTH_TYPE_DEFAULT = "pap" RADIUS_SERVER_AUTH_TYPE_DEFAULT = "pap"
RADIUS_PAM_AUTH_CONF_DIR = "/etc/pam_radius_auth.d/" RADIUS_PAM_AUTH_CONF_DIR = "/etc/pam_radius_auth.d/"
def run_cmd(cmd, log_err=True, raise_exception=False):
try:
subprocess.check_call(cmd, shell=True)
except Exception as err:
if log_err:
syslog.syslog(syslog.LOG_ERR, "{} - failed: return code - {}, output:\n{}"
.format(err.cmd, err.returncode, err.output))
if raise_exception:
raise
def is_true(val): def is_true(val):
if val == 'True' or val == 'true': if val == 'True' or val == 'true':
return True return True
else: else:
return False return False
def is_vlan_sub_interface(ifname): def is_vlan_sub_interface(ifname):
ifname_split = ifname.split(".") ifname_split = ifname.split(".")
return (len(ifname_split) == 2) return (len(ifname_split) == 2)
def sub(l, start, end): def sub(l, start, end):
return l[start:end] return l[start:end]
@ -57,15 +71,210 @@ def obfuscate(data):
return data return data
def run_cmd(cmd, log_err = True): class Feature(object):
try: """ Represents a feature configuration from CONFIG_DB data. """
subprocess.check_call(cmd, shell = True)
except Exception as err: def __init__(self, feature_name, feature_cfg, device_config=None):
if log_err: """ Initialize Feature object based on CONFIG_DB data.
syslog.syslog(syslog.LOG_ERR, "{} - failed: return code - {}, output:\n{}"
.format(err.cmd, err.returncode, err.output)) Args:
return err.returncode feature_name (str): Feature name string
return 0 feature_cfg (dict): Feature CONFIG_DB configuration
deviec_config (dict): DEVICE_METADATA section of CONFIG_DB
"""
self.name = feature_name
self.state = self._get_target_state(feature_cfg.get('state'), device_config or {})
self.auto_restart = feature_cfg.get('auto_restart', 'disabled')
self.has_timer = ast.literal_eval(feature_cfg.get('has_timer', 'False'))
self.has_global_scope = ast.literal_eval(feature_cfg.get('has_global_scope', 'True'))
self.has_per_asic_scope = ast.literal_eval(feature_cfg.get('has_per_asic_scope', 'False'))
def _get_target_state(self, state_configuration, device_config):
""" Returns the target state for the feature by rendering the state field as J2 template.
Args:
state_configuration (str): State configuration from CONFIG_DB
deviec_config (dict): DEVICE_METADATA section of CONFIG_DB
Returns:
(str): Target feature state
"""
if state_configuration is None:
return None
template = jinja2.Template(state_configuration)
target_state = template.render(device_config)
if target_state not in ('enabled', 'disabled', 'always_enabled', 'always_disabled'):
raise ValueError('Invalid state rendered for feature {}: {}'.format(self.name, target_state))
return target_state
class FeatureHandler(object):
""" Handles FEATURE table updates. """
SYSTEMD_SYSTEM_DIR = '/etc/systemd/system/'
SYSTEMD_SERVICE_CONF_DIR = os.path.join(SYSTEMD_SYSTEM_DIR, '{}.service.d/')
def __init__(self, config_db, device_config):
self._config_db = config_db
self._device_config = device_config
self._cached_config = {}
self.is_multi_npu = device_info.is_multi_npu()
def handle(self, feature_name, feature_cfg):
if not feature_cfg:
self._cached_config.pop(feature_name)
syslog.syslog(syslog.LOG_INFO, "Deregistering feature {}".format(feature_name))
return
feature = Feature(feature_name, feature_cfg, self._device_config)
self._cached_config.setdefault(feature_name, Feature(feature_name, {}))
# Change auto-restart configuration first.
# If service reached failed state before this configuration applies (e.g. on boot)
# the next called self.update_feature_state will start it again. If it will fail
# again the auto restart will kick-in. Another order may leave it in failed state
# and not auto restart.
if self._cached_config[feature_name].auto_restart != feature.auto_restart:
self.update_feature_auto_restart(feature)
self._cached_config[feature_name].auto_restart = feature.auto_restart
# Enable/disable the container service if the feature state was changed from its previous state.
if self._cached_config[feature_name].state != feature.state:
if self.update_feature_state(feature):
self._cached_config[feature_name].state = feature.state
else:
self.resync_feature_state(self._cached_config[feature_name])
def update_all_features_config(self):
feature_table = self._config_db.get_table('FEATURE')
for feature_name in feature_table.keys():
if not feature_name:
syslog.syslog(syslog.LOG_WARNING, "Feature is None")
continue
feature = Feature(feature_name, feature_table[feature_name], self._device_config)
self._cached_config.setdefault(feature_name, feature)
self.update_feature_auto_restart(feature)
self.update_feature_state(feature)
self.resync_feature_state(feature)
def update_feature_state(self, feature):
cached_feature = self._cached_config[feature.name]
enable = False
disable = False
# Allowed transitions:
# None -> always_enabled
# -> always_disabled
# -> enabled
# -> disabled
# always_enabled -> always_disabled
# enabled -> disabled
# disabled -> enabled
if cached_feature.state is None:
enable = feature.state in ("always_enabled", "enabled")
disable = feature.state in ("always_disabled", "disabled")
elif cached_feature.state in ("always_enabled", "always_disabled"):
disable = feature.state == "always_disabled"
elif cached_feature.state in ("enabled", "disabled"):
enable = feature.state == "enabled"
disable = feature.state == "disabled"
else:
syslog.syslog(syslog.LOG_INFO, "Feature {} service is {}".format(feature.name, cached_feature.state))
return False
if not enable and not disable:
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature {}"
.format(feature.state, feature.name))
return False
if enable:
self.enable_feature(feature)
syslog.syslog(syslog.LOG_INFO, "Feature {} is enabled and started".format(feature.name))
if disable:
self.disable_feature(feature)
syslog.syslog(syslog.LOG_INFO, "Feature {} is stopped and disabled".format(feature.name))
return True
def update_feature_auto_restart(self, feature):
restart_config = "always" if feature.auto_restart == "enabled" else "no"
service_conf = "[Service]\nRestart={}\n".format(restart_config)
feature_names, feature_suffixes = self.get_feature_attribute(feature)
for feature_name in feature_names:
dir_name = self.SYSTEMD_SERVICE_CONF_DIR.format(feature_name)
if not os.path.exists(dir_name):
os.mkdir(dir_name)
with open(os.path.join(dir_name, 'auto_restart.conf'), 'w') as cfgfile:
cfgfile.write(service_conf)
try:
run_cmd("sudo systemctl daemon-reload", raise_exception=True)
except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to configure auto_restart".format(feature.name))
return
def get_feature_attribute(self, feature):
# Create feature name suffix depending feature is running in host or namespace or in both
feature_names = (
([feature.name] if feature.has_global_scope or not self.is_multi_npu else []) +
([(feature.name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus())
if feature.has_per_asic_scope and self.is_multi_npu])
)
if not feature_names:
syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available"
.format(feature.name))
feature_suffixes = ["service"] + (["timer"] if feature.has_timer else [])
return feature_names, feature_suffixes
def enable_feature(self, feature):
cmds = []
feature_names, feature_suffixes = self.get_feature_attribute(feature)
for feature_name in feature_names:
for suffix in feature_suffixes:
cmds.append("sudo systemctl unmask {}.{}".format(feature_name, suffix))
# If feature has timer associated with it, start/enable corresponding systemd .timer unit
# otherwise, start/enable corresponding systemd .service unit
cmds.append("sudo systemctl enable {}.{}".format(feature_name, feature_suffixes[-1]))
cmds.append("sudo systemctl start {}.{}".format(feature_name, feature_suffixes[-1]))
for cmd in cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
run_cmd(cmd, raise_exception=True)
except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
.format(feature.name, feature_suffixes[-1]))
return
def disable_feature(self, feature):
cmds = []
feature_names, feature_suffixes = self.get_feature_attribute(feature)
for feature_name in feature_names:
for suffix in reversed(feature_suffixes):
cmds.append("sudo systemctl stop {}.{}".format(feature_name, suffix))
cmds.append("sudo systemctl disable {}.{}".format(feature_name, feature_suffixes[-1]))
cmds.append("sudo systemctl mask {}.{}".format(feature_name, feature_suffixes[-1]))
for cmd in cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
run_cmd(cmd, raise_exception=True)
except Exception as err:
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be stopped and disabled"
.format(feature.name, feature_suffixes[-1]))
return
def resync_feature_state(self, feature):
self._config_db.mod_entry('FEATURE', feature.name, {'state': feature.state})
class Iptables(object): class Iptables(object):
def __init__(self): def __init__(self):
@ -536,7 +745,7 @@ class NtpCfg(object):
def __init__(self, CfgDb): def __init__(self, CfgDb):
self.config_db = CfgDb self.config_db = CfgDb
self.ntp_global = {} self.ntp_global = {}
self.has_ntp_servers = False self.has_ntp_servers = False
def load(self, ntp_global_conf, ntp_server_conf): def load(self, ntp_global_conf, ntp_server_conf):
syslog.syslog(syslog.LOG_INFO, "NtpCfg load ...") syslog.syslog(syslog.LOG_INFO, "NtpCfg load ...")
@ -617,7 +826,7 @@ class NtpCfg(object):
self.has_ntp_servers = True self.has_ntp_servers = True
else: else:
# for runtime ntp server change, to determine if there is ntp server configured, need to # for runtime ntp server change, to determine if there is ntp server configured, need to
# get from configDB, as delete triggers 2 event handling # get from configDB, as delete triggers 2 event handling
ntp_servers_tbl = self.config_db.get_table('NTP_SERVER') ntp_servers_tbl = self.config_db.get_table('NTP_SERVER')
if ntp_servers_tbl != {}: if ntp_servers_tbl != {}:
self.has_ntp_servers = True self.has_ntp_servers = True
@ -643,9 +852,8 @@ class HostConfigDaemon:
self.hostname_cache="" self.hostname_cache=""
self.aaacfg = AaaCfg() self.aaacfg = AaaCfg()
self.iptables = Iptables() self.iptables = Iptables()
self.feature_handler = FeatureHandler(self.config_db, self.device_config)
self.ntpcfg = NtpCfg(self.config_db) self.ntpcfg = NtpCfg(self.config_db)
# Cache the values of 'state' field in 'FEATURE' table of each container
self.cached_feature_states = {}
self.is_multi_npu = device_info.is_multi_npu() self.is_multi_npu = device_info.is_multi_npu()
@ -680,125 +888,6 @@ class HostConfigDaemon:
# Update AAA with the hostname # Update AAA with the hostname
self.aaacfg.hostname_update(self.hostname_cache) self.aaacfg.hostname_update(self.hostname_cache)
def get_target_state(self, feature_name, state):
template = jinja2.Template(state)
target_state = template.render(self.device_config)
entry = self.config_db.get_entry('FEATURE', feature_name)
entry["state"] = target_state
self.config_db.set_entry("FEATURE", feature_name, entry)
return target_state
def get_feature_attribute(self, feature_name, feature_table):
has_timer = ast.literal_eval(feature_table[feature_name].get('has_timer', 'False'))
has_global_scope = ast.literal_eval(feature_table[feature_name].get('has_global_scope', 'True'))
has_per_asic_scope = ast.literal_eval(feature_table[feature_name].get('has_per_asic_scope', 'False'))
# Create feature name suffix depending feature is running in host or namespace or in both
feature_names = (
([feature_name] if has_global_scope or not self.is_multi_npu else []) +
([(feature_name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus())
if has_per_asic_scope and self.is_multi_npu])
)
if not feature_names:
syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available"
.format(feature_name))
feature_suffixes = ["service"] + (["timer"] if has_timer else [])
return feature_names, feature_suffixes
def enable_feature(self, feature_names, feature_suffixes):
start_cmds = []
for feature_name in feature_names:
for suffix in feature_suffixes:
start_cmds.append("sudo systemctl unmask {}.{}".format(feature_name, suffix))
# If feature has timer associated with it, start/enable corresponding systemd .timer unit
# otherwise, start/enable corresponding systemd .service unit
start_cmds.append("sudo systemctl enable {}.{}".format(feature_name, feature_suffixes[-1]))
start_cmds.append("sudo systemctl start {}.{}".format(feature_name, feature_suffixes[-1]))
for cmd in start_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
.format(feature_name, feature_suffixes[-1]))
return
def disable_feature(self, feature_names, feature_suffixes):
stop_cmds = []
for feature_name in feature_names:
for suffix in reversed(feature_suffixes):
stop_cmds.append("sudo systemctl stop {}.{}".format(feature_name, suffix))
stop_cmds.append("sudo systemctl disable {}.{}".format(feature_name, suffix))
stop_cmds.append("sudo systemctl mask {}.{}".format(feature_name, suffix))
for cmd in stop_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to be stopped and disabled".format(feature_name))
return
def is_invariant_feature(self, feature_name, state, feature_table):
invariant_feature = self.cached_feature_states[feature_name] == "always_enabled" or \
self.cached_feature_states[feature_name] == "always_disabled"
if invariant_feature:
invariant_state = self.cached_feature_states[feature_name]
if state != invariant_state:
syslog.syslog(syslog.LOG_INFO, "Feature '{}' service is '{}'"
.format(feature_name, invariant_state))
entry = self.config_db.get_entry('FEATURE', feature_name)
entry['state'] = invariant_state
self.config_db.set_entry('FEATURE', feature_name, entry)
if state == "always_disabled":
feature_names, feature_suffixes = self.get_feature_attribute(feature_name, feature_table)
self.disable_feature(feature_names, feature_suffixes)
syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name))
return invariant_feature
def update_feature_state(self, feature_name, state, feature_table):
if not self.is_invariant_feature(feature_name, state, feature_table):
self.cached_feature_states[feature_name] = state
feature_names, feature_suffixes = self.get_feature_attribute(feature_name, feature_table)
if state == "enabled":
self.enable_feature(feature_names, feature_suffixes)
syslog.syslog(syslog.LOG_INFO, "Feature '{}.{}' is enabled and started"
.format(feature_name, feature_suffixes[-1]))
elif state == "disabled":
self.disable_feature(feature_names, feature_suffixes)
syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name))
else:
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature '{}'"
.format(state, feature_name))
def update_all_feature_states(self):
feature_table = self.config_db.get_table('FEATURE')
for feature_name in feature_table:
if not feature_name:
syslog.syslog(syslog.LOG_WARNING, "Feature is None")
continue
state = feature_table[feature_name]['state']
if not state:
syslog.syslog(syslog.LOG_WARNING, "Enable state of feature '{}' is None".format(feature_name))
continue
target_state = self.get_target_state(feature_name, state)
# Store the initial value of 'state' field in 'FEATURE' table of a specific container
self.cached_feature_states[feature_name] = target_state
self.update_feature_state(feature_name, target_state, feature_table)
def aaa_handler(self, key, data): def aaa_handler(self, key, data):
self.aaacfg.aaa_update(key, data) self.aaacfg.aaa_update(key, data)
@ -863,24 +952,6 @@ class HostConfigDaemon:
key = ConfigDBConnector.deserialize_key(key) key = ConfigDBConnector.deserialize_key(key)
self.aaacfg.handle_radius_source_intf_ip_chg(key) self.aaacfg.handle_radius_source_intf_ip_chg(key)
def feature_state_handler(self, key, data):
feature_name = key
feature_table = self.config_db.get_table('FEATURE')
if feature_name not in feature_table:
syslog.syslog(syslog.LOG_WARNING, "Feature '{}' not in FEATURE table".format(feature_name))
return
state = feature_table[feature_name]['state']
if not state:
syslog.syslog(syslog.LOG_WARNING, "Enable state of feature '{}' is None".format(feature_name))
return
self.cached_feature_states.setdefault(feature_name, 'disabled')
# Enable/disable the container service if the feature state was changed from its previous state.
if self.cached_feature_states[feature_name] != state:
self.update_feature_state(feature_name, state, feature_table)
def ntp_server_handler (self, key, data): def ntp_server_handler (self, key, data):
syslog.syslog(syslog.LOG_INFO, 'NTP server handler...') syslog.syslog(syslog.LOG_INFO, 'NTP server handler...')
ntp_server_db = self.config_db.get_table('NTP_SERVER') ntp_server_db = self.config_db.get_table('NTP_SERVER')
@ -903,7 +974,6 @@ class HostConfigDaemon:
subprocess.call(systemctl_cmd, shell=True) subprocess.call(systemctl_cmd, shell=True)
def start(self): def start(self):
self.config_db.subscribe('AAA', lambda table, key, data: self.aaa_handler(key, data)) self.config_db.subscribe('AAA', lambda table, key, data: self.aaa_handler(key, data))
self.config_db.subscribe('TACPLUS_SERVER', lambda table, key, data: self.tacacs_server_handler(key, data)) self.config_db.subscribe('TACPLUS_SERVER', lambda table, key, data: self.tacacs_server_handler(key, data))
self.config_db.subscribe('TACPLUS', lambda table, key, data: self.tacacs_global_handler(key, data)) self.config_db.subscribe('TACPLUS', lambda table, key, data: self.tacacs_global_handler(key, data))
@ -911,11 +981,11 @@ class HostConfigDaemon:
self.config_db.subscribe('RADIUS', lambda table, key, data: self.radius_global_handler(key, data)) self.config_db.subscribe('RADIUS', lambda table, key, data: self.radius_global_handler(key, data))
self.config_db.subscribe('MGMT_INTERFACE', lambda table, key, data: self.mgmt_intf_handler(key, data)) self.config_db.subscribe('MGMT_INTERFACE', lambda table, key, data: self.mgmt_intf_handler(key, data))
self.config_db.subscribe('LOOPBACK_INTERFACE', lambda table, key, data: self.lpbk_handler(key, data)) self.config_db.subscribe('LOOPBACK_INTERFACE', lambda table, key, data: self.lpbk_handler(key, data))
self.config_db.subscribe('FEATURE', lambda table, key, data: self.feature_handler.handle(key, data))
self.config_db.subscribe('VLAN_INTERFACE', lambda table, key, data: self.vlan_intf_handler(key, data)) self.config_db.subscribe('VLAN_INTERFACE', lambda table, key, data: self.vlan_intf_handler(key, data))
self.config_db.subscribe('VLAN_SUB_INTERFACE', lambda table, key, data: self.vlan_sub_intf_handler(key, data)) self.config_db.subscribe('VLAN_SUB_INTERFACE', lambda table, key, data: self.vlan_sub_intf_handler(key, data))
self.config_db.subscribe('PORTCHANNEL_INTERFACE', lambda table, key, data: self.portchannel_intf_handler(key, data)) self.config_db.subscribe('PORTCHANNEL_INTERFACE', lambda table, key, data: self.portchannel_intf_handler(key, data))
self.config_db.subscribe('INTERFACE', lambda table, key, data: self.phy_intf_handler(key, data)) self.config_db.subscribe('INTERFACE', lambda table, key, data: self.phy_intf_handler(key, data))
self.config_db.subscribe('FEATURE', lambda table, key, data: self.feature_state_handler(key, data))
self.config_db.subscribe('NTP_SERVER', lambda table, key, data: self.ntp_server_handler(key, data)) self.config_db.subscribe('NTP_SERVER', lambda table, key, data: self.ntp_server_handler(key, data))
self.config_db.subscribe('NTP', lambda table, key, data: self.ntp_global_handler(key, data)) self.config_db.subscribe('NTP', lambda table, key, data: self.ntp_global_handler(key, data))
self.config_db.subscribe('KDUMP', lambda table, key, data: self.kdump_handler(key, data)) self.config_db.subscribe('KDUMP', lambda table, key, data: self.kdump_handler(key, data))
@ -927,7 +997,7 @@ class HostConfigDaemon:
"systemctl has finished initialization -- proceeding ...") "systemctl has finished initialization -- proceeding ...")
# Update all feature states once upon starting # Update all feature states once upon starting
self.update_all_feature_states() self.feature_handler.update_all_features_config()
# Defer load until subscribe # Defer load until subscribe
self.load() self.load()

View File

@ -37,6 +37,7 @@ setup(
tests_require = [ tests_require = [
'parameterized', 'parameterized',
'pytest', 'pytest',
'pyfakefs',
'sonic-py-common' 'sonic-py-common'
], ],
classifiers = [ classifiers = [

View File

@ -9,6 +9,8 @@ from unittest import TestCase, mock
from .test_vectors import HOSTCFGD_TEST_VECTOR from .test_vectors import HOSTCFGD_TEST_VECTOR
from .mock_configdb import MockConfigDb from .mock_configdb import MockConfigDb
from pyfakefs.fake_filesystem_unittest import patchfs
swsssdk.ConfigDBConnector = MockConfigDb swsssdk.ConfigDBConnector = MockConfigDb
test_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) test_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@ -51,8 +53,36 @@ class TestHostcfgd(TestCase):
break break
return is_equal return is_equal
def __verify_fs(self, table):
"""
verify filesystem changes made by hostcfgd.
Checks whether systemd override configuration files
were generated and Restart= for systemd unit is set
correctly
Args:
table(dict): Current Config Db table
Returns: Boolean wether test passed.
"""
exp_dict = {
"enabled": "always",
"disabled": "no",
}
auto_restart_conf = os.path.join(hostcfgd.FeatureHandler.SYSTEMD_SERVICE_CONF_DIR, "auto_restart.conf")
for feature in table:
auto_restart = table[feature].get("auto_restart", "disabled")
with open(auto_restart_conf.format(feature)) as conf:
conf = conf.read().strip()
assert conf == "[Service]\nRestart={}".format(exp_dict[auto_restart])
@parameterized.expand(HOSTCFGD_TEST_VECTOR) @parameterized.expand(HOSTCFGD_TEST_VECTOR)
def test_hostcfgd(self, test_name, test_data): @patchfs
def test_hostcfgd(self, test_name, test_data, fs):
""" """
Test hostcfd daemon initialization Test hostcfd daemon initialization
@ -63,12 +93,44 @@ class TestHostcfgd(TestCase):
Returns: Returns:
None None
""" """
fs.add_real_paths(swsssdk.__path__) # add real path of swsssdk for database_config.json
fs.create_dir(hostcfgd.FeatureHandler.SYSTEMD_SYSTEM_DIR)
MockConfigDb.set_config_db(test_data["config_db"]) MockConfigDb.set_config_db(test_data["config_db"])
with mock.patch("hostcfgd.subprocess") as mocked_subprocess: with mock.patch("hostcfgd.subprocess") as mocked_subprocess:
host_config_daemon = hostcfgd.HostConfigDaemon() host_config_daemon = hostcfgd.HostConfigDaemon()
host_config_daemon.update_all_feature_states() host_config_daemon.feature_handler.update_all_features_config()
assert self.__verify_table( assert self.__verify_table(
MockConfigDb.get_config_db()["FEATURE"], MockConfigDb.get_config_db()["FEATURE"],
test_data["expected_config_db"]["FEATURE"] test_data["expected_config_db"]["FEATURE"]
), "Test failed for test data: {0}".format(test_data) ), "Test failed for test data: {0}".format(test_data)
mocked_subprocess.check_call.assert_has_calls(test_data["expected_subprocess_calls"], any_order=True) mocked_subprocess.check_call.assert_has_calls(test_data["expected_subprocess_calls"], any_order=True)
self.__verify_fs(test_data["config_db"]["FEATURE"])
def test_feature_config_parsing(self):
swss_feature = hostcfgd.Feature('swss', {
'state': 'enabled',
'auto_restart': 'enabled',
'has_timer': 'True',
'has_global_scope': 'False',
'has_per_asic_scope': 'True',
})
assert swss_feature.name == 'swss'
assert swss_feature.state == 'enabled'
assert swss_feature.auto_restart == 'enabled'
assert swss_feature.has_timer
assert not swss_feature.has_global_scope
assert swss_feature.has_per_asic_scope
def test_feature_config_parsing_defaults(self):
swss_feature = hostcfgd.Feature('swss', {
'state': 'enabled',
})
assert swss_feature.name == 'swss'
assert swss_feature.state == 'enabled'
assert swss_feature.auto_restart == 'disabled'
assert not swss_feature.has_timer
assert swss_feature.has_global_scope
assert not swss_feature.has_per_asic_scope

View File

@ -25,6 +25,11 @@ class MockConfigDb(object):
def get_entry(self, key, field): def get_entry(self, key, field):
return MockConfigDb.CONFIG_DB[key][field] return MockConfigDb.CONFIG_DB[key][field]
def mod_entry(self, key, field, data):
existing_data = self.get_entry(key, field)
existing_data.update(data)
self.set_entry(key, field, existing_data)
def set_entry(self, key, field, data): def set_entry(self, key, field, data):
MockConfigDb.CONFIG_DB[key][field] = data MockConfigDb.CONFIG_DB[key][field] = data

View File

@ -41,7 +41,7 @@ HOSTCFGD_TEST_VECTOR = [
"state": "{% if 'subtype' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['subtype'] == 'DualToR' %}enabled{% else %}always_disabled{% endif %}" "state": "{% if 'subtype' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['subtype'] == 'DualToR' %}enabled{% else %}always_disabled{% endif %}"
}, },
"telemetry": { "telemetry": {
"auto_restart": "enabled", "auto_restart": "disabled",
"has_global_scope": "True", "has_global_scope": "True",
"has_per_asic_scope": "False", "has_per_asic_scope": "False",
"has_timer": "True", "has_timer": "True",
@ -73,7 +73,7 @@ HOSTCFGD_TEST_VECTOR = [
"state": "enabled" "state": "enabled"
}, },
"telemetry": { "telemetry": {
"auto_restart": "enabled", "auto_restart": "disabled",
"has_global_scope": "True", "has_global_scope": "True",
"has_per_asic_scope": "False", "has_per_asic_scope": "False",
"has_timer": "True", "has_timer": "True",