sonic-buildimage/files/image_config/hostcfgd/hostcfgd
Renuka Manavalan 8cd6714ef4
hostcfgd: Handle missed tacacs updates between load & listen (#8223)
Why I did it
The time gap between last config load & db-listen seem to have increased.
Any config updates that occurred in this gap gets missed by db-listen.
This could miss updating /etc/pam.d/common-auth-sonic

How I did it
Add a one shot timer, just before db-listen. The timer will fire after the subscribe is done
When the timer fires, reload tacacs & aaa
2021-08-06 10:38:37 -07:00

577 lines
23 KiB
Python
Executable File

#!/usr/bin/python -u
# -*- coding: utf-8 -*-
import ast
import os
import time
import threading
import sys
import subprocess
import syslog
import copy
import jinja2
import ipaddr as ipaddress
from swsssdk import ConfigDBConnector, SonicDBConfig
from sonic_py_common import device_info,multi_asic
# FILE
PAM_AUTH_CONF = "/etc/pam.d/common-auth-sonic"
PAM_AUTH_CONF_TEMPLATE = "/usr/share/sonic/templates/common-auth-sonic.j2"
NSS_TACPLUS_CONF = "/etc/tacplus_nss.conf"
NSS_TACPLUS_CONF_TEMPLATE = "/usr/share/sonic/templates/tacplus_nss.conf.j2"
NSS_CONF = "/etc/nsswitch.conf"
# TACACS+
TACPLUS_SERVER_PASSKEY_DEFAULT = ""
TACPLUS_SERVER_TIMEOUT_DEFAULT = "5"
TACPLUS_SERVER_AUTH_TYPE_DEFAULT = "pap"
global_lock = None
class lock_mgr:
def __init__(self):
self.lock = global_lock
def __enter__( self ):
if self.lock:
self.lock.acquire()
def __exit__( self, exc_type, exc_value, traceback ):
if self.lock:
self.lock.release()
def is_true(val):
if val == 'True' or val == 'true':
return True
else:
return False
def sub(l, start, end):
return l[start:end]
def obfuscate(data):
if data:
return data[0] + '*****'
else:
return data
class Iptables(object):
def __init__(self):
'''
Default MSS to 1460 - (MTU 1500 - 40 (TCP/IP Overhead))
For IPv6, it would be 1440 - (MTU 1500 - 60 octects)
'''
self.tcpmss = 1460
self.tcp6mss = 1440
def is_ip_prefix_in_key(self, key):
'''
Function to check if IP address is present in the key. If it
is present, then the key would be a tuple or else, it shall be
be string
'''
return (isinstance(key, tuple))
def load(self, lpbk_table):
for row in lpbk_table:
self.iptables_tcpmss_handler(row, lpbk_table[row])
def command(self, chain, ip, ver, op):
cmd = 'iptables' if ver == '4' else 'ip6tables'
cmd += ' -t mangle --{} {} -p tcp --tcp-flags SYN SYN'.format(op, chain)
cmd += ' -d' if chain == 'PREROUTING' else ' -s'
mss = self.tcpmss if ver == '4' else self.tcp6mss
cmd += ' {} -j TCPMSS --set-mss {}'.format(ip, mss)
return cmd
def iptables_tcpmss_handler(self, key, data, add=True):
if not self.is_ip_prefix_in_key(key):
return
iface, ip = key
ip_str = ip.split("/")[0]
ip_addr = ipaddress.IPAddress(ip_str)
if isinstance(ip_addr, ipaddress.IPv6Address):
ver = '6'
else:
ver = '4'
iptables_tcpmss_cmds = []
for chain in ['PREROUTING', 'POSTROUTING']:
iptables_tcpmss_cmds.append(self.command(chain, ip_str, ver, 'delete' if not add else 'check'))
self.mangle_handler(iptables_tcpmss_cmds, add)
def mangle_handler(self, cmds, add):
iptables_cmds = []
if add:
for cmd in cmds:
'''
For add case, first check if rule exists. Iptables just appends to the chain
as a new rule even if it is the same as an existing one. Check this and
do nothing if rule exists
'''
ret = subprocess.call(cmd, shell=True)
if ret == 0:
syslog.syslog(syslog.LOG_INFO, "{} rule exists".format(cmd))
else:
# Modify command from Check to Append
iptables_cmds.append(cmd.replace("check", "append"))
else:
iptables_cmds = cmds
for cmd in iptables_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd - {}".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
class AaaCfg(object):
def __init__(self, config_db):
self.auth_default = {
'login': 'local',
}
self.tacplus_global_default = {
'auth_type': TACPLUS_SERVER_AUTH_TYPE_DEFAULT,
'timeout': TACPLUS_SERVER_TIMEOUT_DEFAULT,
'passkey': TACPLUS_SERVER_PASSKEY_DEFAULT
}
self.auth = {}
self.tacplus_global = {}
self.tacplus_servers = {}
self.debug = False
self.config_db = config_db
# Load conf from ConfigDb
def load(self):
self.modify_conf_file()
def aaa_update(self, key):
if key == 'authentication':
self.modify_conf_file()
def tacacs_global_update(self, key):
if key == 'global':
self.modify_conf_file()
def tacacs_server_update(self, key):
self.modify_conf_file()
def modify_single_file(self, filename, operations=None):
if operations:
cmd = "sed -e {0} {1} > {1}.new; mv -f {1} {1}.old; mv -f {1}.new {1}".format(' -e '.join(operations), filename)
os.system(cmd)
def modify_conf_file(self):
with lock_mgr():
self.auth = self.config_db.get_table('AAA').get("authentication", {})
if 'failthrough' in self.auth:
self.auth['failthrough'] = is_true(self.auth['failthrough'])
if 'debug' in self.auth:
self.debug = is_true(self.auth['debug'])
self.tacplus_global = self.config_db.get_table('TACPLUS').get(
"global", {})
self.tacplus_servers = self.config_db.get_table('TACPLUS_SERVER')
self._modify_conf_file()
def _modify_conf_file(self):
auth = self.auth_default.copy()
auth.update(self.auth)
tacplus_global = self.tacplus_global_default.copy()
tacplus_global.update(self.tacplus_global)
servers_conf = []
if self.tacplus_servers:
for addr in self.tacplus_servers:
server = tacplus_global.copy()
server['ip'] = addr
server.update(self.tacplus_servers[addr])
servers_conf.append(server)
servers_conf = sorted(servers_conf, key=lambda t: int(t['priority']), reverse=True)
template_file = os.path.abspath(PAM_AUTH_CONF_TEMPLATE)
env = jinja2.Environment(loader=jinja2.FileSystemLoader('/'), trim_blocks=True)
env.filters['sub'] = sub
template = env.get_template(template_file)
pam_conf = template.render(auth=auth, servers=servers_conf)
with open(PAM_AUTH_CONF, 'w') as f:
f.write(pam_conf)
# Modify common-auth include file in /etc/pam.d/login and sshd
if os.path.isfile(PAM_AUTH_CONF):
self.modify_single_file('/etc/pam.d/sshd', [ "'/^@include/s/common-auth$/common-auth-sonic/'" ])
self.modify_single_file('/etc/pam.d/login', [ "'/^@include/s/common-auth$/common-auth-sonic/'" ])
else:
self.modify_single_file('/etc/pam.d/sshd', [ "'/^@include/s/common-auth-sonic$/common-auth/'" ])
self.modify_single_file('/etc/pam.d/login', [ "'/^@include/s/common-auth-sonic$/common-auth/'" ])
# Add tacplus in nsswitch.conf if TACACS+ enable
if 'tacacs+' in auth['login']:
if os.path.isfile(NSS_CONF):
self.modify_single_file(NSS_CONF, [ "'/tacplus/b'", "'/^passwd/s/compat/tacplus &/'"])
else:
if os.path.isfile(NSS_CONF):
self.modify_single_file(NSS_CONF, [ "'/^passwd/s/tacplus //'" ])
# Set tacacs+ server in nss-tacplus conf
template_file = os.path.abspath(NSS_TACPLUS_CONF_TEMPLATE)
template = env.get_template(template_file)
nss_tacplus_conf = template.render(debug=self.debug, servers=servers_conf)
with open(NSS_TACPLUS_CONF, 'w') as f:
f.write(nss_tacplus_conf)
if 'passkey' in tacplus_global:
tacplus_global['passkey'] = obfuscate(tacplus_global['passkey'])
syslog.syslog(syslog.LOG_INFO, 'pam.d files updated auth={} global={}'.
format(auth, tacplus_global))
class MultiAsicBgpMonCfg(object):
def __init__(self):
self.ns_for_bgp_mon = 'asic4'
ip_address_get_command = "ip -n {} -4 -o addr show eth0".format(self.ns_for_bgp_mon) +\
" | awk '{print $4}' | cut -d'/' -f1 | head -1"
self.ns_docker_ip_fo_bgp_mon = self.run_ip_commands([ip_address_get_command])
self.bgp_monitor_table = 'BGP_MONITORS'
self.config_db = ConfigDBConnector()
self.config_db.connect()
SonicDBConfig.load_sonic_global_db_config()
self.backend_config_db = ConfigDBConnector(namespace=self.ns_for_bgp_mon)
self.backend_config_db.connect()
self.frontend_namespace = multi_asic.get_front_end_namespaces()
self.bgp_listen_port = 179
self.bgp_fw_mark = 1
self.iptable_handler = Iptables()
def run_ip_commands(self, commands):
"""
Given a list of shell ip commands, run them in order
Args:
commands: List of strings, each string is a shell command
"""
for cmd in commands:
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdout, stderr) = proc.communicate()
if proc.returncode != 0 and stderr.rstrip('\n') != "RTNETLINK answers: File exists":
syslog.syslog(syslog.LOG_ERR,"Error running command '{}'".format(cmd))
elif stdout:
return stdout.rstrip('\n')
def _update_bgp_mon_backend_asic(self, key, data):
op = "added" if data is not None else "deleted"
syslog.syslog(syslog.LOG_INFO,
'{} bgp mon{} on {}'.format(op, key,
self.ns_for_bgp_mon))
self.backend_config_db.set_entry(self.bgp_monitor_table, key, data)
def get_lo0_ipv4(self, lpbk_table):
"""
Extract Loopback0 ipv4 address from the Directory
:return: ipv4 address for Loopback0, None if nothing found
"""
for loopback in lpbk_table.iterkeys():
if loopback[0].startswith("Loopback0"):
if loopback[1] and isinstance(ipaddress.IPAddress(loopback[1].split("/")[0]), ipaddress.IPv4Address):
return loopback[1].split("/")[0]
return None
def load(self, lpbk_table):
'''
This function loads the bgp mon configuration from the host
config_db to a backend config_db.
It also Add the ip rule and default route in host to take BGP packet
to the ASIC running BGPMON
'''
loopback0_ipv4 = self.get_lo0_ipv4(lpbk_table)
if loopback0_ipv4:
bgpmon_ip_cmd = []
bgpmon_iptable_cmd = []
'''
Add route in host main routing table for Loopback IP to go to pre-define bgpmon backend asic
'''
bgpmon_ip_cmd.append("ip route add {}/32 via {} dev docker0".format(loopback0_ipv4, self.ns_docker_ip_fo_bgp_mon))
for front_ns in self.frontend_namespace:
'''
Add ip table rule on front asic to mark the BGP packets for destination as Loopback IP
'''
bgpmon_iptable_cmd.append("ip netns exec {} iptables -t mangle -d {}/32 -p tcp --sport {} --check PREROUTING -j MARK --set-mark {}"
.format(front_ns, loopback0_ipv4, self.bgp_listen_port, self.bgp_fw_mark))
bgpmon_iptable_cmd.append("ip netns exec {} iptables -t mangle -d {}/32 -p tcp --dport {} --check PREROUTING -j MARK --set-mark {}"
.format(front_ns, loopback0_ipv4, self.bgp_listen_port, self.bgp_fw_mark))
'''
Add ip rule in front asic namespace to match mark packet and lookup default table
'''
bgpmon_ip_cmd.append("ip -n {} rule add fwmark {} pref 101 lookup default".format(front_ns, self.bgp_fw_mark))
self.run_ip_commands(bgpmon_ip_cmd)
self.iptable_handler.mangle_handler(bgpmon_iptable_cmd, True)
else:
'''
Log error as IPv4 Loopback0 Address not present
'''
syslog.syslog(syslog.LOG_ERR, "Loopback0 IPv4 does not exist")
host_bgp_mon_table = self.config_db.get_table(self.bgp_monitor_table)
if host_bgp_mon_table:
for key in host_bgp_mon_table:
data = host_bgp_mon_table[key]
self._update_bgp_mon_backend_asic(key, data)
self.backend_config_db.set_entry(self.bgp_monitor_table, key,
data)
else:
'''
the host config_db has no bgp monitor configuration.
delete the configuration from the backend asic, if present.
'''
bkend_bgp_mon_table = self.backend_config_db.get_table(self.bgp_monitor_table)
if bkend_bgp_mon_table:
self.backend_config_db.delete_table(self.bgp_monitor_table)
def cfg_handler(self, key, data):
key = ConfigDBConnector.deserialize_key(key)
keys = self.config_db.get_keys(self.bgp_monitor_table)
if key not in keys:
data = None
self._update_bgp_mon_backend_asic(key, data)
class HostConfigDaemon:
def __init__(self):
self.config_db = ConfigDBConnector()
self.config_db.connect(wait_for_init=True, retry_on=True)
syslog.syslog(syslog.LOG_INFO, 'ConfigDB connect success')
self.aaacfg = AaaCfg(self.config_db)
self.iptables = Iptables()
# Cache the values of 'state' field in 'FEATURE' table of each container
self.cached_feature_states = {}
self.is_multi_npu = device_info.is_multi_npu()
# Default is None. Gets initialize for multi-npu platforms
self.masicBgpMonCfg = None
if self.is_multi_npu:
self.masicBgpMonCfg = MultiAsicBgpMonCfg()
def timer_load(self):
global global_lock
syslog.syslog(syslog.LOG_INFO, 'reloading tacacs from timer thread')
self.aaacfg.load()
# Remove lock as timer is one shot
global_lock = None
def load(self):
self.aaacfg.load()
lpbk_table = self.config_db.get_table('LOOPBACK_INTERFACE')
self.iptables.load(lpbk_table)
if self.masicBgpMonCfg:
self.masicBgpMonCfg.load(lpbk_table)
def masic_bgp_mon_cfg_handler(self, key, data):
if self.masicBgpMonCfg:
syslog.syslog(syslog.LOG_INFO, 'Multi asic bgp cfg handler...')
self.masicBgpMonCfg.cfg_handler(key, data)
def wait_till_system_init_done(self):
systemctl_cmd = "sudo systemctl is-system-running"
while True:
proc = subprocess.Popen(
systemctl_cmd,
shell=True,
stdout=subprocess.PIPE,
)
output = proc.communicate()[0].rstrip('\n')
if output.lower() in ["initializing", "starting"]:
time.sleep(1)
continue
return
def update_feature_state(self, feature_name, state, feature_table):
if state == "always_enabled":
syslog.syslog(syslog.LOG_INFO, "Feature '{}' service is always enabled"
.format(feature_name))
return
has_timer = ast.literal_eval(feature_table[feature_name].get('has_timer', 'False'))
has_global_scope = ast.literal_eval(feature_table[feature_name].get('has_global_scope', 'True'))
has_per_asic_scope = ast.literal_eval(feature_table[feature_name].get('has_per_asic_scope', 'False'))
# Create feature name suffix depending feature is running in host or namespace or in both
feature_name_suffix_list = (([feature_name] if has_global_scope or not self.is_multi_npu else []) +
([(feature_name + '@' + str(asic_inst)) for asic_inst in range(device_info.get_num_npus())
if has_per_asic_scope and self.is_multi_npu]))
if not feature_name_suffix_list:
syslog.syslog(syslog.LOG_ERR, "Feature '{}' service not available"
.format(feature_name))
feature_suffixes = ["service"] + (["timer"] if has_timer else [])
if state == "enabled":
start_cmds = []
for feature_name_suffix in feature_name_suffix_list:
for suffix in feature_suffixes:
start_cmds.append("sudo systemctl unmask {}.{}".format(feature_name_suffix, suffix))
# If feature has timer associated with it, start/enable corresponding systemd .timer unit
# otherwise, start/enable corresponding systemd .service unit
start_cmds.append("sudo systemctl enable {}.{}".format(feature_name_suffix, feature_suffixes[-1]))
start_cmds.append("sudo systemctl start {}.{}".format(feature_name_suffix, feature_suffixes[-1]))
for cmd in start_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
syslog.syslog(syslog.LOG_ERR, "Feature '{}.{}' failed to be enabled and started"
.format(feature_name, feature_suffixes[-1]))
return
syslog.syslog(syslog.LOG_INFO, "Feature '{}.{}' is enabled and started"
.format(feature_name, feature_suffixes[-1]))
elif state == "disabled":
stop_cmds = []
for feature_name_suffix in feature_name_suffix_list:
for suffix in reversed(feature_suffixes):
stop_cmds.append("sudo systemctl stop {}.{}".format(feature_name_suffix, suffix))
stop_cmds.append("sudo systemctl disable {}.{}".format(feature_name_suffix, suffix))
stop_cmds.append("sudo systemctl mask {}.{}".format(feature_name_suffix, suffix))
for cmd in stop_cmds:
syslog.syslog(syslog.LOG_INFO, "Running cmd: '{}'".format(cmd))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as err:
syslog.syslog(syslog.LOG_ERR, "'{}' failed. RC: {}, output: {}"
.format(err.cmd, err.returncode, err.output))
syslog.syslog(syslog.LOG_ERR, "Feature '{}' failed to be stopped and disabled".format(feature_name))
return
syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(feature_name))
else:
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature '{}'"
.format(state, feature_name))
def update_all_feature_states(self):
feature_table = self.config_db.get_table('FEATURE')
for feature_name in feature_table.keys():
if not feature_name:
syslog.syslog(syslog.LOG_WARNING, "Feature is None")
continue
state = feature_table[feature_name]['state']
if not state:
syslog.syslog(syslog.LOG_WARNING, "Eanble state of feature '{}' is None".format(feature_name))
continue
# Store the initial value of 'state' field in 'FEATURE' table of a specific container
self.cached_feature_states[feature_name] = state
self.update_feature_state(feature_name, state, feature_table)
def aaa_handler(self, key, data):
self.aaacfg.aaa_update(key)
syslog.syslog(syslog.LOG_INFO, 'value of {} changed to {}'.format(key, data))
def tacacs_server_handler(self, key, data):
self.aaacfg.tacacs_server_update(key)
log_data = copy.deepcopy(data)
if 'passkey' in log_data:
log_data['passkey'] = obfuscate(log_data['passkey'])
syslog.syslog(syslog.LOG_INFO, 'value of {} changed to {}'.format(key, log_data))
def tacacs_global_handler(self, key, data):
self.aaacfg.tacacs_global_update(key)
log_data = copy.deepcopy(data)
if 'passkey' in log_data:
log_data['passkey'] = obfuscate(log_data['passkey'])
syslog.syslog(syslog.LOG_INFO, 'value of {} changed to {}'.format(key, log_data))
def lpbk_handler(self, key, data):
key = ConfigDBConnector.deserialize_key(key)
# Check if delete operation by fetch existing keys
keys = self.config_db.get_keys('LOOPBACK_INTERFACE')
if key in keys:
add = True
else:
add = False
self.iptables.iptables_tcpmss_handler(key, data, add)
def feature_state_handler(self, key, data):
feature_name = key
feature_table = self.config_db.get_table('FEATURE')
if feature_name not in feature_table.keys():
syslog.syslog(syslog.LOG_WARNING, "Feature '{}' not in FEATURE table".format(feature_name))
return
state = feature_table[feature_name]['state']
if not state:
syslog.syslog(syslog.LOG_WARNING, "Enable state of feature '{}' is None".format(feature_name))
return
self.cached_feature_states.setdefault(feature_name, 'disabled')
# Enable/disable the container service if the feature state was changed from its previous state.
if self.cached_feature_states[feature_name] != state:
self.cached_feature_states[feature_name] = state
self.update_feature_state(feature_name, state, feature_table)
def start(self):
global global_lock
self.config_db.subscribe('AAA', lambda table, key, data: self.aaa_handler(key, data))
self.config_db.subscribe('TACPLUS_SERVER', lambda table, key, data: self.tacacs_server_handler(key, data))
self.config_db.subscribe('TACPLUS', lambda table, key, data: self.tacacs_global_handler(key, data))
self.config_db.subscribe('LOOPBACK_INTERFACE', lambda table, key, data: self.lpbk_handler(key, data))
self.config_db.subscribe('FEATURE', lambda table, key, data: self.feature_state_handler(key, data))
if self.is_multi_npu:
self.config_db.subscribe('BGP_MONITORS', lambda table, key, data: self.masic_bgp_mon_cfg_handler(key, data))
syslog.syslog(syslog.LOG_INFO,
"Waiting for systemctl to finish initialization")
self.wait_till_system_init_done()
syslog.syslog(syslog.LOG_INFO,
"systemctl has finished initialization -- proceeding ...")
# Update all feature states once upon starting
self.update_all_feature_states()
# Defer load until subscribe
self.load()
global_lock = threading.Lock()
self.tmr_thread = threading.Timer(30, self.timer_load)
self.tmr_thread.start()
self.config_db.listen()
def main():
daemon = HostConfigDaemon()
daemon.start()
if __name__ == "__main__":
main()