[hostcfgd] Configure service auto-restart in hostcfgd. (#5744)
Before this change, a process running inside every SONiC container dealt with FEATURE table 'auto_restart' field and depending on the value decided whether a container has to be killed or not. If killed service auto restart mechanism restarts the container. This change moves the logic from container to the host daemon - hostcfgd. The 'auto_restart' handling is kept in supervisor-proc-exit-listener but now it is not required for container that wants to support auto restart feature. hostcfgd refactoring - move feature handling in another class. override systemd service Restart= setting from hostcfgd. remove default systemd Restart=always. Signed-off-by: Stepan Blyshchak stepanb@nvidia.com - Why I did it Remove the need to deal with container orchestration logic from the container itself. Leave this logic to the orchestrator - host OS. - How I did it hostcfgd configures 'Restart=' value for systemd service. - How to verify it root@r-tigon-11:/home/admin# sudo config feature autorestart lldp enabled root@r-tigon-11:/home/admin# show feature status | grep lldp lldp enabled enabled root@r-tigon-11:/home/admin# docker exec -it lldp pkill -9 lldpd root@r-tigon-11:/home/admin# docker ps -a | grep lldp 65058396277c docker-lldp:latest "/usr/bin/docker-lld…" 2 days ago Exited (0) 20 seconds ago lldp root@r-tigon-11:/home/admin# docker ps -a | grep lldp 65058396277c docker-lldp:latest "/usr/bin/docker-lld…" 2 days ago Up 5 seconds lldp root@r-tigon-11:/home/admin# sudo config feature autorestart lldp disabled root@r-tigon-11:/home/admin# docker exec -it lldp pkill -9 lldpd root@r-tigon-11:/home/admin# docker ps -a | grep lldp 65058396277c docker-lldp:latest "/usr/bin/docker-lld…" 2 days ago Up 35 seconds lldp root@r-tigon-11:/home/admin# docker ps -a | grep lldp 65058396277c docker-lldp:latest "/usr/bin/docker-lld…" 2 days ago Exited (0) 3 seconds ago lldp root@r-tigon-11:/home/admin# docker ps -a | grep lldp 65058396277c docker-lldp:latest "/usr/bin/docker-lld…" 2 days ago Exited (0) 39 seconds ago lldp root@r-tigon-11:/home/admin#
This commit is contained in:
parent
abbf627b8a
commit
9ce7c6d9fe
@ -13,7 +13,6 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start
|
||||
ExecStart=/usr/bin/{{ docker_container_name }}.sh wait
|
||||
ExecStop=/usr/bin/{{ docker_container_name }}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -13,7 +13,6 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -16,7 +16,6 @@ ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start{% if multi_instan
|
||||
ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %}
|
||||
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -17,7 +17,6 @@ User=root
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %}
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -19,7 +19,6 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %}
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -24,7 +24,6 @@ Environment=sonic_asic_platform={{ sonic_asic_platform }}
|
||||
ExecStartPre=/usr/local/bin/swss.sh start{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStart=/usr/local/bin/swss.sh wait{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStop=/usr/local/bin/swss.sh stop{% if multi_instance == 'true' %} %i{% endif %}
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -18,7 +18,6 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait{% if multi_instance == 'true' %} %i{% endif %}
|
||||
ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop{% if multi_instance == 'true' %} %i{% endif %}
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -16,7 +16,6 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -13,7 +13,6 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/local/bin/{{ docker_container_name }}.sh start
|
||||
ExecStart=/usr/local/bin/{{ docker_container_name }}.sh wait
|
||||
ExecStop=/usr/local/bin/{{ docker_container_name }}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -11,7 +11,6 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -13,7 +13,6 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -13,7 +13,6 @@ User=root
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start chassisdb
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait chassisdb
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop chassisdb
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
|
@ -13,5 +13,4 @@ StartLimitBurst=3
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
@ -13,5 +13,4 @@ User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
|
||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
|
||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
@ -36,16 +36,30 @@ RADIUS_SERVER_TIMEOUT_DEFAULT = "5"
|
||||
RADIUS_SERVER_AUTH_TYPE_DEFAULT = "pap"
|
||||
RADIUS_PAM_AUTH_CONF_DIR = "/etc/pam_radius_auth.d/"
|
||||
|
||||
|
||||
def run_cmd(cmd, log_err=True, raise_exception=False):
|
||||
try:
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
except Exception as err:
|
||||
if log_err:
|
||||
syslog.syslog(syslog.LOG_ERR, "{} - failed: return code - {}, output:\n{}"
|
||||
.format(err.cmd, err.returncode, err.output))
|
||||
if raise_exception:
|
||||
raise
|
||||
|
||||
|
||||
def is_true(val):
|
||||
if val == 'True' or val == 'true':
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def is_vlan_sub_interface(ifname):
|
||||
ifname_split = ifname.split(".")
|
||||
return (len(ifname_split) == 2)
|
||||
|
||||
|
||||
def sub(l, start, end):
|
||||
return l[start:end]
|
||||
|
||||
@ -57,15 +71,210 @@ def obfuscate(data):
|
||||
return data
|
||||
|
||||
|
||||
def run_cmd(cmd, log_err = True):
|
||||
try:
|
||||
subprocess.check_call(cmd, shell = True)
|
||||
except Exception as err:
|
||||
if log_err:
|
||||
syslog.syslog(syslog.LOG_ERR, "{} - failed: return code - {}, output:\n{}"
|
||||
.format(err.cmd, err.returncode, err.output))
|
||||
return err.returncode
|
||||
return 0
|
||||
class Feature(object):
|
||||
""" Represents a feature configuration from CONFIG_DB data. """
|
||||
|
||||
def __init__(self, feature_name, feature_cfg, device_config=None):
|
||||
""" Initialize Feature object based on CONFIG_DB data.
|
||||
|
||||
Args:
|
||||
feature_name (str): Feature name string
|
||||
feature_cfg (dict): Feature CONFIG_DB configuration
|
||||
deviec_config (dict): DEVICE_METADATA section of CONFIG_DB
|
||||
"""
|
||||
|
||||
self.name = feature_name
|
||||
self.state = self._get_target_state(feature_cfg.get('state'), device_config or {})
|
||||
self.auto_restart = feature_cfg.get('auto_restart', 'disabled')
|
||||
self.has_timer = ast.literal_eval(feature_cfg.get('has_timer', 'False'))
|
||||
self.has_global_scope = ast.literal_eval(feature_cfg.get('has_global_scope', 'True'))
|
||||
self.has_per_asic_scope = ast.literal_eval(feature_cfg.get('has_per_asic_scope', 'False'))
|
||||
|
||||
def _get_target_state(self, state_configuration, device_config):
|
||||
""" Returns the target state for the feature by rendering the state field as J2 template.
|
||||
|
||||
Args:
|
||||
state_configuration (str): State configuration from CONFIG_DB
|
||||
deviec_config (dict): DEVICE_METADATA section of CONFIG_DB
|
||||
Returns:
|
||||
(str): Target feature state
|
||||
"""
|
||||
|
||||
if state_configuration is None:
|
||||
return None
|
||||
|
||||
template = jinja2.Template(state_configuration)
|
||||
target_state = template.render(device_config)
|
||||
if target_state not in ('enabled', 'disabled', 'always_enabled', 'always_disabled'):
|
||||
raise ValueError('Invalid state rendered for feature {}: {}'.format(self.name, target_state))
|
||||
return target_state
|
||||
|
||||
|
||||
class FeatureHandler(object):
|
||||
""" Handles FEATURE table updates. """
|
||||
|
||||
SYSTEMD_SYSTEM_DIR = '/etc/systemd/system/'
|
||||
SYSTEMD_SERVICE_CONF_DIR = os.path.join(SYSTEMD_SYSTEM_DIR, '{}.service.d/')
|
||||
|
||||
def __init__(self, config_db, device_config):
|
||||
self._config_db = config_db
|
||||
self._device_config = device_config
|
||||
self._cached_config = {}
|
||||
self.is_multi_npu = device_info.is_multi_npu()
|
||||
|
||||
def handle(self, feature_name, feature_cfg):
|
||||
if not feature_cfg:
|
||||
self._cached_config.pop(feature_name)
|
||||
syslog.syslog(syslog.LOG_INFO, "Deregistering feature {}".format(feature_name))
|
||||
return
|
||||
|
||||
feature = Feature(feature_name, feature_cfg, self._device_config)
|
||||
self._cached_config.setdefault(feature_name, Feature(feature_name, {}))
|
||||
|
||||
# Change auto-restart configuration first.
|
||||
# If service reached failed state before this configuration applies (e.g. on boot)
|
||||
# the next called self.update_feature_state will start it again. If it will fail
|
||||
# again the auto restart will kick-in. Another order may leave it in failed state
|
||||
# and not auto restart.
|
||||
if self._cached_config[feature_name].auto_restart != feature.auto_restart:
|
||||
self.update_feature_auto_restart(feature)
|
||||
self._cached_config[feature_name].auto_restart = feature.auto_restart
|
||||
|
||||
# Enable/disable the container service if the feature state was changed from its previous state.
|
||||
if self._cached_config[feature_name].state != feature.state:
|
||||
if self.update_feature_state(feature):
|
||||
self._cached_config[feature_name].state = feature.state
|
||||
else:
|
||||
self.resync_feature_state(self._cached_config[feature_name])
|
||||
|
||||
def update_all_features_config(self):
|
||||
feature_table = self._config_db.get_table('FEATURE')
|
||||
for feature_name in feature_table.keys():
|
||||
if not feature_name:
|
||||
syslog.syslog(syslog.LOG_WARNING, "Feature is None")
|
||||
continue
|
||||
|
||||
feature = Feature(feature_name, feature_table[feature_name], self._device_config)
|
||||
self._cached_config.setdefault(feature_name, feature)
|
||||
|
||||
self.update_feature_auto_restart(feature)
|
||||
|
||||
self.update_feature_state(feature)
|
||||
self.resync_feature_state(feature)
|
||||
|
||||
def update_feature_state(self, feature):
|
||||
cached_feature = self._cached_config[feature.name]
|
||||
enable = False
|
||||
disable = False
|
||||
|
||||
# Allowed transitions:
|
||||
# None -> always_enabled
|
||||
# -> always_disabled
|
||||
# -> enabled
|
||||
# -> disabled
|
||||
# always_enabled -> always_disabled
|
||||
# enabled -> disabled
|
||||
# disabled -> enabled
|
||||
if cached_feature.state is None:
|
||||
enable = feature.state in ("always_enabled", "enabled")
|
||||
disable = feature.state in ("always_disabled", "disabled")
|
||||
elif cached_feature.state in ("always_enabled", "always_disabled"):
|
||||
disable = feature.state == "always_disabled"
|
||||
elif cached_feature.state in ("enabled", "disabled"):
|
||||
enable = feature.state == "enabled"
|
||||
disable = feature.state == "disabled"
|
||||
else:
|
||||
syslog.syslog(syslog.LOG_INFO, "Feature {} service is {}".format(feature.name, cached_feature.state))
|
||||
return False
|
||||
|
||||
if not enable and not disable:
|
||||
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature {}"
|
||||
.format(feature.state, feature.name))
|
||||
return False
|
||||
|
||||
if enable:
|
||||
self.enable_feature(feature)
|
||||
syslog.syslog(syslog.LOG_INFO, "Feature {} is enabled and started".format(feature.name))
|
||||
|
||||
if disable:
|
||||
self.disable_feature(feature)
|
||||
syslog.syslog(syslog.LOG_INFO, "Feature {} is stopped and disabled".format(feature.name))
|
||||
|
||||
return True
|
||||
|
||||
def update_feature_auto_restart(self, feature):
|
||||
restart_config = "always" if feature.auto_restart == "enabled" else "no"
|
||||
service_conf = "[Service]\nRestart={}\n".format(restart_config)
|
||||
feature_names, feature_suffixes = self.get_feature_attribute(feature)
|
||||
|
||||
for feature_name in feature_names:
|
||||
dir_name = self.SYSTEMD_SERVICE_CONF_DIR.format(feature_name)
|
||||
if not os.path.exists(dir_name):
|
||||
os.mkdir(dir_name)
|
||||
with open(os.path.join(dir_name, 'auto_restart.conf'), 'w') as cfgfile:
|
||||
cfgfile.write(service_conf)
|
||||
|
||||
try:
|
||||
run_cmd("sudo systemctl daemon-reload", raise_exception=True)
|
||||
except Exception as err:
|
||||
syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to configure auto_restart".format(feature.name))
|
||||
return
|
||||
|
||||
def get_feature_attribute(self, feature):
|
||||
# Create feature name suffix depending feature is running in host or namespace or in both
|
||||
feature_names = (
|
||||
([feature.name] if feature.has_global_scope or not self.is_multi_npu else []) +
|
||||
([(feature.name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus())
|
||||
if feature.has_per_asic_scope and self.is_multi_npu])
|
||||
)
|
||||
|
||||
if not feature_names:
|
||||
syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available"
|
||||
.format(feature.name))
|
||||
|
||||
feature_suffixes = ["service"] + (["timer"] if feature.has_timer else [])
|
||||
|
||||
return feature_names, feature_suffixes
|
||||
|
||||
def enable_feature(self, feature):
|
||||
cmds = []
|
||||
feature_names, feature_suffixes = self.get_feature_attribute(feature)
|
||||
for feature_name in feature_names:
|
||||
for suffix in feature_suffixes:
|
||||
cmds.append("sudo systemctl unmask {}.{}".format(feature_name, suffix))
|
||||
# If feature has timer associated with it, start/enable corresponding systemd .timer unit
|
||||
# otherwise, start/enable corresponding systemd .service unit
|
||||
cmds.append("sudo systemctl enable {}.{}".format(feature_name, feature_suffixes[-1]))
|
||||
cmds.append("sudo systemctl start {}.{}".format(feature_name, feature_suffixes[-1]))
|
||||
for cmd in cmds:
|
||||
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
|
||||
try:
|
||||
run_cmd(cmd, raise_exception=True)
|
||||
except Exception as err:
|
||||
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
|
||||
.format(feature.name, feature_suffixes[-1]))
|
||||
return
|
||||
|
||||
def disable_feature(self, feature):
|
||||
cmds = []
|
||||
feature_names, feature_suffixes = self.get_feature_attribute(feature)
|
||||
for feature_name in feature_names:
|
||||
for suffix in reversed(feature_suffixes):
|
||||
cmds.append("sudo systemctl stop {}.{}".format(feature_name, suffix))
|
||||
cmds.append("sudo systemctl disable {}.{}".format(feature_name, feature_suffixes[-1]))
|
||||
cmds.append("sudo systemctl mask {}.{}".format(feature_name, feature_suffixes[-1]))
|
||||
for cmd in cmds:
|
||||
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
|
||||
try:
|
||||
run_cmd(cmd, raise_exception=True)
|
||||
except Exception as err:
|
||||
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be stopped and disabled"
|
||||
.format(feature.name, feature_suffixes[-1]))
|
||||
return
|
||||
|
||||
def resync_feature_state(self, feature):
|
||||
self._config_db.mod_entry('FEATURE', feature.name, {'state': feature.state})
|
||||
|
||||
|
||||
class Iptables(object):
|
||||
def __init__(self):
|
||||
@ -536,7 +745,7 @@ class NtpCfg(object):
|
||||
def __init__(self, CfgDb):
|
||||
self.config_db = CfgDb
|
||||
self.ntp_global = {}
|
||||
self.has_ntp_servers = False
|
||||
self.has_ntp_servers = False
|
||||
|
||||
def load(self, ntp_global_conf, ntp_server_conf):
|
||||
syslog.syslog(syslog.LOG_INFO, "NtpCfg load ...")
|
||||
@ -617,7 +826,7 @@ class NtpCfg(object):
|
||||
self.has_ntp_servers = True
|
||||
else:
|
||||
# for runtime ntp server change, to determine if there is ntp server configured, need to
|
||||
# get from configDB, as delete triggers 2 event handling
|
||||
# get from configDB, as delete triggers 2 event handling
|
||||
ntp_servers_tbl = self.config_db.get_table('NTP_SERVER')
|
||||
if ntp_servers_tbl != {}:
|
||||
self.has_ntp_servers = True
|
||||
@ -643,9 +852,8 @@ class HostConfigDaemon:
|
||||
self.hostname_cache=""
|
||||
self.aaacfg = AaaCfg()
|
||||
self.iptables = Iptables()
|
||||
self.feature_handler = FeatureHandler(self.config_db, self.device_config)
|
||||
self.ntpcfg = NtpCfg(self.config_db)
|
||||
# Cache the values of 'state' field in 'FEATURE' table of each container
|
||||
self.cached_feature_states = {}
|
||||
|
||||
self.is_multi_npu = device_info.is_multi_npu()
|
||||
|
||||
@ -680,125 +888,6 @@ class HostConfigDaemon:
|
||||
# Update AAA with the hostname
|
||||
self.aaacfg.hostname_update(self.hostname_cache)
|
||||
|
||||
def get_target_state(self, feature_name, state):
|
||||
template = jinja2.Template(state)
|
||||
target_state = template.render(self.device_config)
|
||||
entry = self.config_db.get_entry('FEATURE', feature_name)
|
||||
entry["state"] = target_state
|
||||
self.config_db.set_entry("FEATURE", feature_name, entry)
|
||||
|
||||
return target_state
|
||||
|
||||
def get_feature_attribute(self, feature_name, feature_table):
|
||||
has_timer = ast.literal_eval(feature_table[feature_name].get('has_timer', 'False'))
|
||||
has_global_scope = ast.literal_eval(feature_table[feature_name].get('has_global_scope', 'True'))
|
||||
has_per_asic_scope = ast.literal_eval(feature_table[feature_name].get('has_per_asic_scope', 'False'))
|
||||
|
||||
# Create feature name suffix depending feature is running in host or namespace or in both
|
||||
feature_names = (
|
||||
([feature_name] if has_global_scope or not self.is_multi_npu else []) +
|
||||
([(feature_name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus())
|
||||
if has_per_asic_scope and self.is_multi_npu])
|
||||
)
|
||||
|
||||
if not feature_names:
|
||||
syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available"
|
||||
.format(feature_name))
|
||||
|
||||
feature_suffixes = ["service"] + (["timer"] if has_timer else [])
|
||||
|
||||
return feature_names, feature_suffixes
|
||||
|
||||
def enable_feature(self, feature_names, feature_suffixes):
|
||||
start_cmds = []
|
||||
for feature_name in feature_names:
|
||||
for suffix in feature_suffixes:
|
||||
start_cmds.append("sudo systemctl unmask {}.{}".format(feature_name, suffix))
|
||||
# If feature has timer associated with it, start/enable corresponding systemd .timer unit
|
||||
# otherwise, start/enable corresponding systemd .service unit
|
||||
start_cmds.append("sudo systemctl enable {}.{}".format(feature_name, feature_suffixes[-1]))
|
||||
start_cmds.append("sudo systemctl start {}.{}".format(feature_name, feature_suffixes[-1]))
|
||||
for cmd in start_cmds:
|
||||
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
|
||||
try:
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
except subprocess.CalledProcessError as err:
|
||||
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
|
||||
.format(err.cmd, err.returncode, err.output))
|
||||
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
|
||||
.format(feature_name, feature_suffixes[-1]))
|
||||
return
|
||||
|
||||
def disable_feature(self, feature_names, feature_suffixes):
|
||||
stop_cmds = []
|
||||
for feature_name in feature_names:
|
||||
for suffix in reversed(feature_suffixes):
|
||||
stop_cmds.append("sudo systemctl stop {}.{}".format(feature_name, suffix))
|
||||
stop_cmds.append("sudo systemctl disable {}.{}".format(feature_name, suffix))
|
||||
stop_cmds.append("sudo systemctl mask {}.{}".format(feature_name, suffix))
|
||||
for cmd in stop_cmds:
|
||||
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
|
||||
try:
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
except subprocess.CalledProcessError as err:
|
||||
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
|
||||
.format(err.cmd, err.returncode, err.output))
|
||||
syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to be stopped and disabled".format(feature_name))
|
||||
return
|
||||
|
||||
def is_invariant_feature(self, feature_name, state, feature_table):
|
||||
invariant_feature = self.cached_feature_states[feature_name] == "always_enabled" or \
|
||||
self.cached_feature_states[feature_name] == "always_disabled"
|
||||
if invariant_feature:
|
||||
invariant_state = self.cached_feature_states[feature_name]
|
||||
if state != invariant_state:
|
||||
syslog.syslog(syslog.LOG_INFO, "Feature '{}' service is '{}'"
|
||||
.format(feature_name, invariant_state))
|
||||
entry = self.config_db.get_entry('FEATURE', feature_name)
|
||||
entry['state'] = invariant_state
|
||||
self.config_db.set_entry('FEATURE', feature_name, entry)
|
||||
|
||||
if state == "always_disabled":
|
||||
feature_names, feature_suffixes = self.get_feature_attribute(feature_name, feature_table)
|
||||
self.disable_feature(feature_names, feature_suffixes)
|
||||
syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name))
|
||||
|
||||
return invariant_feature
|
||||
|
||||
def update_feature_state(self, feature_name, state, feature_table):
|
||||
if not self.is_invariant_feature(feature_name, state, feature_table):
|
||||
self.cached_feature_states[feature_name] = state
|
||||
|
||||
feature_names, feature_suffixes = self.get_feature_attribute(feature_name, feature_table)
|
||||
if state == "enabled":
|
||||
self.enable_feature(feature_names, feature_suffixes)
|
||||
syslog.syslog(syslog.LOG_INFO, "Feature '{}.{}' is enabled and started"
|
||||
.format(feature_name, feature_suffixes[-1]))
|
||||
elif state == "disabled":
|
||||
self.disable_feature(feature_names, feature_suffixes)
|
||||
syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name))
|
||||
else:
|
||||
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature '{}'"
|
||||
.format(state, feature_name))
|
||||
|
||||
def update_all_feature_states(self):
|
||||
feature_table = self.config_db.get_table('FEATURE')
|
||||
for feature_name in feature_table:
|
||||
if not feature_name:
|
||||
syslog.syslog(syslog.LOG_WARNING, "Feature is None")
|
||||
continue
|
||||
|
||||
state = feature_table[feature_name]['state']
|
||||
if not state:
|
||||
syslog.syslog(syslog.LOG_WARNING, "Enable state of feature '{}' is None".format(feature_name))
|
||||
continue
|
||||
|
||||
target_state = self.get_target_state(feature_name, state)
|
||||
# Store the initial value of 'state' field in 'FEATURE' table of a specific container
|
||||
self.cached_feature_states[feature_name] = target_state
|
||||
|
||||
self.update_feature_state(feature_name, target_state, feature_table)
|
||||
|
||||
def aaa_handler(self, key, data):
|
||||
self.aaacfg.aaa_update(key, data)
|
||||
|
||||
@ -863,24 +952,6 @@ class HostConfigDaemon:
|
||||
key = ConfigDBConnector.deserialize_key(key)
|
||||
self.aaacfg.handle_radius_source_intf_ip_chg(key)
|
||||
|
||||
def feature_state_handler(self, key, data):
|
||||
feature_name = key
|
||||
feature_table = self.config_db.get_table('FEATURE')
|
||||
if feature_name not in feature_table:
|
||||
syslog.syslog(syslog.LOG_WARNING, "Feature '{}' not in FEATURE table".format(feature_name))
|
||||
return
|
||||
|
||||
state = feature_table[feature_name]['state']
|
||||
if not state:
|
||||
syslog.syslog(syslog.LOG_WARNING, "Enable state of feature '{}' is None".format(feature_name))
|
||||
return
|
||||
|
||||
self.cached_feature_states.setdefault(feature_name, 'disabled')
|
||||
|
||||
# Enable/disable the container service if the feature state was changed from its previous state.
|
||||
if self.cached_feature_states[feature_name] != state:
|
||||
self.update_feature_state(feature_name, state, feature_table)
|
||||
|
||||
def ntp_server_handler (self, key, data):
|
||||
syslog.syslog(syslog.LOG_INFO, 'NTP server handler...')
|
||||
ntp_server_db = self.config_db.get_table('NTP_SERVER')
|
||||
@ -903,7 +974,6 @@ class HostConfigDaemon:
|
||||
subprocess.call(systemctl_cmd, shell=True)
|
||||
|
||||
def start(self):
|
||||
|
||||
self.config_db.subscribe('AAA', lambda table, key, data: self.aaa_handler(key, data))
|
||||
self.config_db.subscribe('TACPLUS_SERVER', lambda table, key, data: self.tacacs_server_handler(key, data))
|
||||
self.config_db.subscribe('TACPLUS', lambda table, key, data: self.tacacs_global_handler(key, data))
|
||||
@ -911,11 +981,11 @@ class HostConfigDaemon:
|
||||
self.config_db.subscribe('RADIUS', lambda table, key, data: self.radius_global_handler(key, data))
|
||||
self.config_db.subscribe('MGMT_INTERFACE', lambda table, key, data: self.mgmt_intf_handler(key, data))
|
||||
self.config_db.subscribe('LOOPBACK_INTERFACE', lambda table, key, data: self.lpbk_handler(key, data))
|
||||
self.config_db.subscribe('FEATURE', lambda table, key, data: self.feature_handler.handle(key, data))
|
||||
self.config_db.subscribe('VLAN_INTERFACE', lambda table, key, data: self.vlan_intf_handler(key, data))
|
||||
self.config_db.subscribe('VLAN_SUB_INTERFACE', lambda table, key, data: self.vlan_sub_intf_handler(key, data))
|
||||
self.config_db.subscribe('PORTCHANNEL_INTERFACE', lambda table, key, data: self.portchannel_intf_handler(key, data))
|
||||
self.config_db.subscribe('INTERFACE', lambda table, key, data: self.phy_intf_handler(key, data))
|
||||
self.config_db.subscribe('FEATURE', lambda table, key, data: self.feature_state_handler(key, data))
|
||||
self.config_db.subscribe('NTP_SERVER', lambda table, key, data: self.ntp_server_handler(key, data))
|
||||
self.config_db.subscribe('NTP', lambda table, key, data: self.ntp_global_handler(key, data))
|
||||
self.config_db.subscribe('KDUMP', lambda table, key, data: self.kdump_handler(key, data))
|
||||
@ -927,7 +997,7 @@ class HostConfigDaemon:
|
||||
"systemctl has finished initialization -- proceeding ...")
|
||||
|
||||
# Update all feature states once upon starting
|
||||
self.update_all_feature_states()
|
||||
self.feature_handler.update_all_features_config()
|
||||
|
||||
# Defer load until subscribe
|
||||
self.load()
|
||||
|
@ -37,6 +37,7 @@ setup(
|
||||
tests_require = [
|
||||
'parameterized',
|
||||
'pytest',
|
||||
'pyfakefs',
|
||||
'sonic-py-common'
|
||||
],
|
||||
classifiers = [
|
||||
|
@ -9,6 +9,8 @@ from unittest import TestCase, mock
|
||||
from .test_vectors import HOSTCFGD_TEST_VECTOR
|
||||
from .mock_configdb import MockConfigDb
|
||||
|
||||
from pyfakefs.fake_filesystem_unittest import patchfs
|
||||
|
||||
|
||||
swsssdk.ConfigDBConnector = MockConfigDb
|
||||
test_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
@ -51,8 +53,36 @@ class TestHostcfgd(TestCase):
|
||||
break
|
||||
return is_equal
|
||||
|
||||
def __verify_fs(self, table):
|
||||
"""
|
||||
verify filesystem changes made by hostcfgd.
|
||||
|
||||
Checks whether systemd override configuration files
|
||||
were generated and Restart= for systemd unit is set
|
||||
correctly
|
||||
|
||||
Args:
|
||||
table(dict): Current Config Db table
|
||||
|
||||
Returns: Boolean wether test passed.
|
||||
"""
|
||||
|
||||
exp_dict = {
|
||||
"enabled": "always",
|
||||
"disabled": "no",
|
||||
}
|
||||
auto_restart_conf = os.path.join(hostcfgd.FeatureHandler.SYSTEMD_SERVICE_CONF_DIR, "auto_restart.conf")
|
||||
|
||||
for feature in table:
|
||||
auto_restart = table[feature].get("auto_restart", "disabled")
|
||||
with open(auto_restart_conf.format(feature)) as conf:
|
||||
conf = conf.read().strip()
|
||||
assert conf == "[Service]\nRestart={}".format(exp_dict[auto_restart])
|
||||
|
||||
|
||||
@parameterized.expand(HOSTCFGD_TEST_VECTOR)
|
||||
def test_hostcfgd(self, test_name, test_data):
|
||||
@patchfs
|
||||
def test_hostcfgd(self, test_name, test_data, fs):
|
||||
"""
|
||||
Test hostcfd daemon initialization
|
||||
|
||||
@ -63,12 +93,44 @@ class TestHostcfgd(TestCase):
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
fs.add_real_paths(swsssdk.__path__) # add real path of swsssdk for database_config.json
|
||||
fs.create_dir(hostcfgd.FeatureHandler.SYSTEMD_SYSTEM_DIR)
|
||||
MockConfigDb.set_config_db(test_data["config_db"])
|
||||
with mock.patch("hostcfgd.subprocess") as mocked_subprocess:
|
||||
host_config_daemon = hostcfgd.HostConfigDaemon()
|
||||
host_config_daemon.update_all_feature_states()
|
||||
host_config_daemon.feature_handler.update_all_features_config()
|
||||
assert self.__verify_table(
|
||||
MockConfigDb.get_config_db()["FEATURE"],
|
||||
test_data["expected_config_db"]["FEATURE"]
|
||||
), "Test failed for test data: {0}".format(test_data)
|
||||
mocked_subprocess.check_call.assert_has_calls(test_data["expected_subprocess_calls"], any_order=True)
|
||||
|
||||
self.__verify_fs(test_data["config_db"]["FEATURE"])
|
||||
|
||||
def test_feature_config_parsing(self):
|
||||
swss_feature = hostcfgd.Feature('swss', {
|
||||
'state': 'enabled',
|
||||
'auto_restart': 'enabled',
|
||||
'has_timer': 'True',
|
||||
'has_global_scope': 'False',
|
||||
'has_per_asic_scope': 'True',
|
||||
})
|
||||
|
||||
assert swss_feature.name == 'swss'
|
||||
assert swss_feature.state == 'enabled'
|
||||
assert swss_feature.auto_restart == 'enabled'
|
||||
assert swss_feature.has_timer
|
||||
assert not swss_feature.has_global_scope
|
||||
assert swss_feature.has_per_asic_scope
|
||||
|
||||
def test_feature_config_parsing_defaults(self):
|
||||
swss_feature = hostcfgd.Feature('swss', {
|
||||
'state': 'enabled',
|
||||
})
|
||||
|
||||
assert swss_feature.name == 'swss'
|
||||
assert swss_feature.state == 'enabled'
|
||||
assert swss_feature.auto_restart == 'disabled'
|
||||
assert not swss_feature.has_timer
|
||||
assert swss_feature.has_global_scope
|
||||
assert not swss_feature.has_per_asic_scope
|
||||
|
@ -25,6 +25,11 @@ class MockConfigDb(object):
|
||||
def get_entry(self, key, field):
|
||||
return MockConfigDb.CONFIG_DB[key][field]
|
||||
|
||||
def mod_entry(self, key, field, data):
|
||||
existing_data = self.get_entry(key, field)
|
||||
existing_data.update(data)
|
||||
self.set_entry(key, field, existing_data)
|
||||
|
||||
def set_entry(self, key, field, data):
|
||||
MockConfigDb.CONFIG_DB[key][field] = data
|
||||
|
||||
|
@ -41,7 +41,7 @@ HOSTCFGD_TEST_VECTOR = [
|
||||
"state": "{% if 'subtype' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['subtype'] == 'DualToR' %}enabled{% else %}always_disabled{% endif %}"
|
||||
},
|
||||
"telemetry": {
|
||||
"auto_restart": "enabled",
|
||||
"auto_restart": "disabled",
|
||||
"has_global_scope": "True",
|
||||
"has_per_asic_scope": "False",
|
||||
"has_timer": "True",
|
||||
@ -73,7 +73,7 @@ HOSTCFGD_TEST_VECTOR = [
|
||||
"state": "enabled"
|
||||
},
|
||||
"telemetry": {
|
||||
"auto_restart": "enabled",
|
||||
"auto_restart": "disabled",
|
||||
"has_global_scope": "True",
|
||||
"has_per_asic_scope": "False",
|
||||
"has_timer": "True",
|
||||
|
Loading…
Reference in New Issue
Block a user