4dc61fcbc1
- Why I did it Fixes #14236 When a redis event quickly gets outdated during port breakout, error logs like this are seen Mar 8 01:43:26.011724 r-leopard-56 INFO ConfigMgmt: Write in DB: {'PORT': {'Ethernet64': {'admin_status': 'down'}, 'Ethernet68': {'admin_status': 'down'}}} Mar 8 01:43:26.012565 r-leopard-56 INFO ConfigMgmt: Writing in Config DB Mar 8 01:43:26.013468 r-leopard-56 INFO ConfigMgmt: Write in DB: {'PORT': {'Ethernet64': None, 'Ethernet68': None}, 'INTERFACE': None} Mar 8 01:43:26.018095 r-leopard-56 NOTICE swss#portmgrd: :- doTask: Configure Ethernet64 admin status to down Mar 8 01:43:26.018309 r-leopard-56 NOTICE swss#portmgrd: :- doTask: Delete Port: Ethernet64 Mar 8 01:43:26.018641 r-leopard-56 NOTICE lldp#lldpmgrd[32]: :- pops: Miss table key PORT_TABLE:Ethernet64, possibly outdated Mar 8 01:43:26.018654 r-leopard-56 ERR lldp#lldpmgrd[32]: unknown operation '' - How I did it Only log the error when the op is not empty and not one of ("SET" & "DEL" ) Signed-off-by: Vivek Reddy Karri <vkarri@nvidia.com>
367 lines
15 KiB
Python
Executable File
367 lines
15 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
lldpmgrd
|
|
|
|
LLDP manager daemon for SONiC
|
|
|
|
Daemon which listens for changes in the PORT table of the State DB
|
|
and updates LLDP configuration accordingly for that port by calling
|
|
lldpcli.
|
|
|
|
TODO: Also listen for changes in DEVICE_NEIGHBOR and PORT tables in
|
|
Config DB and update LLDP config upon changes.
|
|
"""
|
|
|
|
|
|
try:
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
from sonic_py_common import daemon_base
|
|
from swsscommon import swsscommon
|
|
from sonic_py_common.interface import inband_prefix, recirc_prefix
|
|
except ImportError as err:
|
|
raise ImportError("%s - required module not found" % str(err))
|
|
|
|
VERSION = "1.0"
|
|
|
|
SYSLOG_IDENTIFIER = "lldpmgrd"
|
|
PORT_INIT_TIMEOUT = 300
|
|
FAILED_CMD_TIMEOUT = 6
|
|
RETRY_LIMIT = 5
|
|
|
|
|
|
class LldpManager(daemon_base.DaemonBase):
|
|
"""
|
|
Class which subscribes to notifications of changes in the PORT table of
|
|
the Redis State database and updates LLDP configuration accordingly for
|
|
that port by calling lldpcli.
|
|
Attributes:
|
|
state_db: Handle to Redis State database via swsscommon lib
|
|
config_db: Handle to Redis Config database via swsscommon lib
|
|
pending_cmds: Dictionary where key is port name, value is pending
|
|
LLDP configuration command to run
|
|
and the last timestamp that this command was failed (used for retry mechanism)
|
|
"""
|
|
REDIS_TIMEOUT_MS = 0
|
|
|
|
def __init__(self, log_identifier):
|
|
super(LldpManager, self).__init__(log_identifier)
|
|
|
|
# Open a handle to the Config database
|
|
self.config_db = swsscommon.DBConnector("CONFIG_DB",
|
|
self.REDIS_TIMEOUT_MS,
|
|
False)
|
|
|
|
# Open a handle to the Application database
|
|
self.appl_db = swsscommon.DBConnector("APPL_DB",
|
|
self.REDIS_TIMEOUT_MS,
|
|
False)
|
|
|
|
# Open a handle to the State database
|
|
self.state_db = swsscommon.DBConnector("STATE_DB",
|
|
self.REDIS_TIMEOUT_MS,
|
|
False)
|
|
|
|
self.pending_cmds = {}
|
|
self.hostname = "None"
|
|
self.mgmt_ip = "None"
|
|
|
|
self.device_table = swsscommon.Table(self.config_db, swsscommon.CFG_DEVICE_METADATA_TABLE_NAME)
|
|
self.port_table = swsscommon.Table(self.config_db, swsscommon.CFG_PORT_TABLE_NAME)
|
|
self.mgmt_table = swsscommon.Table(self.config_db, swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME)
|
|
self.app_port_table = swsscommon.Table(self.appl_db, swsscommon.APP_PORT_TABLE_NAME)
|
|
self.state_port_table = swsscommon.Table(self.state_db, swsscommon.STATE_PORT_TABLE_NAME)
|
|
|
|
self.port_config_done = False
|
|
self.port_init_done = False
|
|
|
|
def update_hostname(self, hostname):
|
|
cmd = ["lldpcli", "configure", "system", "hostname", hostname]
|
|
self.log_debug("Running command: '{}'".format(cmd))
|
|
|
|
proc = subprocess.Popen(cmd,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
(stdout, stderr) = proc.communicate()
|
|
|
|
if proc.returncode != 0:
|
|
self.log_warning("Command failed '{}': {}".format(cmd, stderr))
|
|
else:
|
|
self.hostname = hostname
|
|
|
|
def update_mgmt_addr(self, ip):
|
|
if ip == "None":
|
|
cmd = ["lldpcli", "unconfigure", "system", "ip", "management", "pattern"]
|
|
self.log_info("Mgmt IP {0} deleted".format(self.mgmt_ip))
|
|
else:
|
|
cmd = ["lldpcli", "configure", "system", "ip", "management", "pattern", ip]
|
|
self.log_info("Mgmt IP changed old ip {0}, new ip {1}".format(self.mgmt_ip, ip))
|
|
|
|
self.log_debug("Running command: '{}'".format(cmd))
|
|
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
(stdout, stderr) = proc.communicate()
|
|
|
|
if proc.returncode != 0:
|
|
self.log_warning("Command failed '{}': {}".format(cmd, stderr))
|
|
else:
|
|
self.mgmt_ip = ip
|
|
|
|
def is_port_up(self, port_name):
|
|
"""
|
|
Determine if a port is up or down by looking into the netdev_oper_status for the port in
|
|
PORT TABLE in the State DB
|
|
"""
|
|
# Retrieve all entires for this port from the Port table
|
|
(status, fvp) = self.state_port_table.get(port_name)
|
|
if status:
|
|
# Convert list of tuples to a dictionary
|
|
port_table_dict = dict(fvp)
|
|
|
|
# Get the oper-status for the port
|
|
if "netdev_oper_status" in port_table_dict:
|
|
port_oper_status = port_table_dict.get("netdev_oper_status")
|
|
return port_oper_status == "up"
|
|
else:
|
|
return False
|
|
else:
|
|
return False
|
|
|
|
def generate_pending_lldp_config_cmd_for_port(self, port_name, port_table_dict):
|
|
"""
|
|
For port `port_name`, look up the description and alias in the Config database,
|
|
then form the appropriate lldpcli configuration command and run it.
|
|
"""
|
|
port_desc = None
|
|
|
|
# Skip recirc and inband interface prefixes. These are recycle ports exposed in PORT_TABLE for
|
|
# asic-to-asic communication in VOQ based chassis system. We do not configure LLDP on these.
|
|
if port_name.startswith(inband_prefix()) or port_name.startswith(recirc_prefix()):
|
|
return
|
|
|
|
# Get the port alias. If None or empty string, use port name instead
|
|
port_alias = port_table_dict.get("alias")
|
|
if not port_alias:
|
|
self.log_info("Unable to retrieve port alias for port '{}'. Using port name instead.".format(port_name))
|
|
port_alias = port_name
|
|
|
|
# Get the port description. If None or empty string, we'll skip this configuration
|
|
port_desc = port_table_dict.get("description")
|
|
|
|
lldpcli_cmd = ["lldpcli", "configure", "ports", port_name, "lldp", "portidsubtype", "local", port_alias]
|
|
|
|
# if there is a description available, also configure that
|
|
if port_desc:
|
|
lldpcli_cmd += ["description", port_desc]
|
|
else:
|
|
self.log_info("Unable to retrieve description for port '{}'. Not adding port description".format(port_name))
|
|
|
|
# Add the command to our dictionary of pending commands, overwriting any
|
|
# previous pending command for this port
|
|
self.pending_cmds[port_name] = { 'cmd': lldpcli_cmd, 'failed_count': 0}
|
|
|
|
def process_pending_cmds(self):
|
|
# List of port names (keys of elements) to delete from self.pending_cmds
|
|
to_delete = []
|
|
|
|
for (port_name, port_item) in self.pending_cmds.items():
|
|
cmd = port_item['cmd']
|
|
|
|
# check if linux port is up
|
|
if not self.is_port_up(port_name):
|
|
self.log_info("port %s is not up, continue"%port_name)
|
|
continue
|
|
|
|
if 'failed_timestamp' in port_item and time.time()-port_item['failed_timestamp']<FAILED_CMD_TIMEOUT:
|
|
continue
|
|
|
|
self.log_debug("Running command: '{}'".format(cmd))
|
|
rc, stderr = run_cmd(self, cmd)
|
|
# If the command succeeds, add the port name to our to_delete list.
|
|
# We will delete this command from self.pending_cmds below.
|
|
# If the command fails, log a message, but don't delete the command
|
|
# from self.pending_cmds, so that the command will be retried the
|
|
# next time this method is called.
|
|
if rc == 0:
|
|
to_delete.append(port_name)
|
|
else:
|
|
if port_item['failed_count'] >= RETRY_LIMIT:
|
|
self.log_error("Command failed '{}': {} - command was failed {} times, disabling retry".format(cmd, stderr, RETRY_LIMIT+1))
|
|
# not retrying again
|
|
to_delete.append(port_name)
|
|
else:
|
|
self.pending_cmds[port_name]['failed_count'] += 1
|
|
self.pending_cmds[port_name]['failed_timestamp'] = time.time()
|
|
self.log_info("Command failed '{}': {} - cmd failed {} times, retrying again".format(cmd, stderr, self.pending_cmds[port_name]['failed_count']))
|
|
|
|
# Delete all successful commands from self.pending_cmds
|
|
for port_name in to_delete:
|
|
self.pending_cmds.pop(port_name, None)
|
|
|
|
def lldp_get_mgmt_ip(self):
|
|
mgmt_intf_keys = self.mgmt_table.getKeys()
|
|
ipv4_addr = "None"
|
|
ipv6_addr = "None"
|
|
for key in mgmt_intf_keys:
|
|
if '|' in key:
|
|
key = key.split('|', 1)
|
|
if '/' in key[1]:
|
|
ip = key[1].split('/', 1)
|
|
if self.mgmt_ip != ip[0]:
|
|
if '.' in ip[0]:
|
|
ipv4_addr = ip[0]
|
|
else:
|
|
ipv6_addr = ip[0]
|
|
|
|
if ipv4_addr != "None":
|
|
return ipv4_addr
|
|
elif ipv6_addr != "None":
|
|
return ipv6_addr
|
|
else:
|
|
return "None"
|
|
|
|
def lldp_process_mgmt_info_change(self, op, mgmt_dict, key):
|
|
if not op in ["SET", "DEL"]:
|
|
return
|
|
self.log_info("Mgmt Config Opcode: {} Dict {} Key {} Curr {}".format(op, mgmt_dict, key, self.mgmt_ip))
|
|
if '|' in key:
|
|
key = key.split('|', 1)
|
|
if '/' in key[1]:
|
|
ip = key[1].split('/', 1)
|
|
if op == "DEL":
|
|
if self.mgmt_ip == ip[0]:
|
|
ip_addr = self.lldp_get_mgmt_ip()
|
|
self.update_mgmt_addr(ip_addr)
|
|
else:
|
|
if not self.mgmt_ip == ip[0]:
|
|
if '.' in ip[0]:
|
|
self.update_mgmt_addr(ip[0])
|
|
elif '.' not in self.mgmt_ip:
|
|
self.update_mgmt_addr(ip[0])
|
|
|
|
def lldp_process_device_table_event(self, op, device_dict, key):
|
|
if not op in ["SET", "DEL"]:
|
|
return
|
|
self.log_info("Device Config Opcode: {} Dict {} Key {}".format(op, device_dict, key))
|
|
hostname = device_dict.get("hostname")
|
|
if not self.hostname == hostname:
|
|
self.log_info("Hostname changed old {0}, new {1}".format(self.hostname, hostname))
|
|
self.update_hostname(hostname)
|
|
|
|
def lldp_process_port_table_event(self, key, op, fvp):
|
|
if (key != "PortInitDone") and (key != "PortConfigDone"):
|
|
if op == "SET":
|
|
if fvp:
|
|
if "up" in dict(fvp).get("oper_status",""):
|
|
self.generate_pending_lldp_config_cmd_for_port(key, dict(fvp))
|
|
else:
|
|
self.pending_cmds.pop(key, None)
|
|
elif op == "DEL":
|
|
self.pending_cmds.pop(key, None)
|
|
elif op:
|
|
self.log_error("unknown operation '{}'".format(op))
|
|
elif key == "PortInitDone":
|
|
self.port_init_done = True
|
|
elif key == "PortConfigDone":
|
|
self.port_config_done = True
|
|
|
|
def run(self):
|
|
"""
|
|
Subscribes to notifications of changes in the PORT table
|
|
of the Redis Config/Application database.
|
|
Subscribe to APP_DB - get port oper status
|
|
Subscribe to CONFIG_DB - get notified of port config changes
|
|
Update LLDP configuration accordingly.
|
|
"""
|
|
self.log_info("Starting up...")
|
|
|
|
if not os.geteuid() == 0:
|
|
self.log_error("Must be root to run this daemon")
|
|
print("Error: Must be root to run this daemon")
|
|
sys.exit(1)
|
|
|
|
# Set select timeout to 10 seconds
|
|
SELECT_TIMEOUT_MS = 1000 * 10
|
|
|
|
# Daemon is paused on the configuration file to avoid lldp packets with wrong information
|
|
# until all interfaces are well configured on lldpd
|
|
resume_lldp_sent = False
|
|
start_time = time.time()
|
|
|
|
sel = swsscommon.Select()
|
|
|
|
# Subscribe to PORT table notifications in the App DB
|
|
sst_appdb = swsscommon.SubscriberStateTable(self.appl_db, swsscommon.APP_PORT_TABLE_NAME)
|
|
sel.addSelectable(sst_appdb)
|
|
|
|
# Subscribe to MGMT PORT table notifications in the Config DB
|
|
sst_mgmt_ip_confdb = swsscommon.SubscriberStateTable(self.config_db, swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME)
|
|
sel.addSelectable(sst_mgmt_ip_confdb)
|
|
|
|
# Subscribe to DEVICE_METADATA table notifications in the Config DB
|
|
sst_device_confdb = swsscommon.SubscriberStateTable(self.config_db, swsscommon.CFG_DEVICE_METADATA_TABLE_NAME)
|
|
sel.addSelectable(sst_device_confdb)
|
|
|
|
# Listen for changes to the PORT table in the CONFIG_DB and APP_DB
|
|
while True:
|
|
(state, selectableObj) = sel.select(SELECT_TIMEOUT_MS)
|
|
|
|
if state == swsscommon.Select.OBJECT:
|
|
if selectableObj.getFd() == sst_mgmt_ip_confdb.getFd():
|
|
(key, op, fvp) = sst_mgmt_ip_confdb.pop()
|
|
self.lldp_process_mgmt_info_change(op, dict(fvp), key)
|
|
elif selectableObj.getFd() == sst_device_confdb.getFd():
|
|
(key, op, fvp) = sst_device_confdb.pop()
|
|
self.lldp_process_device_table_event(op, dict(fvp), key)
|
|
elif selectableObj.getFd() == sst_appdb.getFd():
|
|
(key, op, fvp) = sst_appdb.pop()
|
|
self.lldp_process_port_table_event(key, op, fvp)
|
|
else:
|
|
self.log_error("Got unexpected selectable object")
|
|
|
|
# Process all pending commands
|
|
self.process_pending_cmds()
|
|
|
|
# Resume the daemon since all interfaces data updated and configured to the lldpd so no miss leading packets will be sent
|
|
if not resume_lldp_sent:
|
|
if check_timeout(self, start_time):
|
|
self.port_init_done = self.port_config_done = True
|
|
if self.port_init_done and self.port_config_done:
|
|
self.port_init_done = self.port_config_done = False
|
|
rc, stderr = run_cmd(self, ["lldpcli", "resume"])
|
|
if rc != 0:
|
|
self.log_error("Failed to resume lldpd with command: 'lldpcli resume': {}".format(stderr))
|
|
sys.exit(1)
|
|
resume_lldp_sent = True
|
|
|
|
# ============================= Functions =============================
|
|
|
|
|
|
def main():
|
|
# Instantiate a LldpManager object
|
|
lldpmgr = LldpManager(SYSLOG_IDENTIFIER)
|
|
|
|
# Log all messages from INFO level and higher
|
|
lldpmgr.set_min_log_priority_info()
|
|
|
|
lldpmgr.run()
|
|
|
|
|
|
def run_cmd(self, cmd):
|
|
proc = subprocess.Popen(cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
(stdout, stderr) = proc.communicate()
|
|
return proc.returncode, stderr
|
|
|
|
|
|
def check_timeout(self, start_time):
|
|
if time.time() - start_time > PORT_INIT_TIMEOUT:
|
|
self.log_error("Port init timeout reached ({} seconds), resuming lldpd...".format(PORT_INIT_TIMEOUT))
|
|
return True
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|