cc938e73a3
#### Why I did it when adding and removing ports after init stage we saw two issues: first: In several cases, after removing a port, lldpmgr is continuing to try to add a port to lldp with lldpcli command. the execution of this command is continuing to fail since the port is not existing anymore. second: after adding a port, we sometimes see this warning messgae: "Command failed 'lldpcli configure ports Ethernet18 lldp portidsubtype local etp5b': 2021-07-27T14:16:54 [WARN/lldpctl] cannot find port Ethernet18" we added these changes in order to solve it. #### How I did it port create events are taken from app db only. lldpcli command is executed only when linux port is up. when delete port event is received we remove this command from pending_cmds dictionary #### How to verify it manual tests and running lldp tests #### Description for the changelog Dynamic port configuration - solve lldp issues when adding/removing ports
363 lines
14 KiB
Python
Executable File
363 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
lldpmgrd
|
|
|
|
LLDP manager daemon for SONiC
|
|
|
|
Daemon which listens for changes in the PORT table of the State DB
|
|
and updates LLDP configuration accordingly for that port by calling
|
|
lldpcli.
|
|
|
|
TODO: Also listen for changes in DEVICE_NEIGHBOR and PORT tables in
|
|
Config DB and update LLDP config upon changes.
|
|
"""
|
|
|
|
|
|
try:
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
from sonic_py_common import daemon_base
|
|
from swsscommon import swsscommon
|
|
from sonic_py_common.interface import inband_prefix
|
|
except ImportError as err:
|
|
raise ImportError("%s - required module not found" % str(err))
|
|
|
|
VERSION = "1.0"
|
|
|
|
SYSLOG_IDENTIFIER = "lldpmgrd"
|
|
PORT_INIT_TIMEOUT = 300
|
|
FAILED_CMD_TIMEOUT = 6
|
|
RETRY_LIMIT = 5
|
|
|
|
|
|
class LldpManager(daemon_base.DaemonBase):
|
|
"""
|
|
Class which subscribes to notifications of changes in the PORT table of
|
|
the Redis State database and updates LLDP configuration accordingly for
|
|
that port by calling lldpcli.
|
|
Attributes:
|
|
state_db: Handle to Redis State database via swsscommon lib
|
|
config_db: Handle to Redis Config database via swsscommon lib
|
|
pending_cmds: Dictionary where key is port name, value is pending
|
|
LLDP configuration command to run
|
|
and the last timestamp that this command was failed (used for retry mechanism)
|
|
"""
|
|
REDIS_TIMEOUT_MS = 0
|
|
|
|
def __init__(self, log_identifier):
|
|
super(LldpManager, self).__init__(log_identifier)
|
|
|
|
# Open a handle to the Config database
|
|
self.config_db = swsscommon.DBConnector("CONFIG_DB",
|
|
self.REDIS_TIMEOUT_MS,
|
|
False)
|
|
|
|
# Open a handle to the Application database
|
|
self.appl_db = swsscommon.DBConnector("APPL_DB",
|
|
self.REDIS_TIMEOUT_MS,
|
|
False)
|
|
|
|
# Open a handle to the State database
|
|
self.state_db = swsscommon.DBConnector("STATE_DB",
|
|
self.REDIS_TIMEOUT_MS,
|
|
False)
|
|
|
|
self.pending_cmds = {}
|
|
self.hostname = "None"
|
|
self.mgmt_ip = "None"
|
|
|
|
self.device_table = swsscommon.Table(self.config_db, swsscommon.CFG_DEVICE_METADATA_TABLE_NAME)
|
|
self.port_table = swsscommon.Table(self.config_db, swsscommon.CFG_PORT_TABLE_NAME)
|
|
self.mgmt_table = swsscommon.Table(self.config_db, swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME)
|
|
self.app_port_table = swsscommon.Table(self.appl_db, swsscommon.APP_PORT_TABLE_NAME)
|
|
self.state_port_table = swsscommon.Table(self.state_db, swsscommon.STATE_PORT_TABLE_NAME)
|
|
|
|
def update_hostname(self, hostname):
|
|
cmd = "lldpcli configure system hostname {0}".format(hostname)
|
|
self.log_debug("Running command: '{}'".format(cmd))
|
|
|
|
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
(stdout, stderr) = proc.communicate()
|
|
|
|
if proc.returncode != 0:
|
|
self.log_warning("Command failed '{}': {}".format(cmd, stderr))
|
|
else:
|
|
self.hostname = hostname
|
|
|
|
def update_mgmt_addr(self, ip):
|
|
if ip == "None":
|
|
cmd = "lldpcli unconfigure system ip management pattern"
|
|
self.log_info("Mgmt IP {0} deleted".format(self.mgmt_ip))
|
|
else:
|
|
cmd = "lldpcli configure system ip management pattern {0}".format(ip)
|
|
self.log_info("Mgmt IP changed old ip {0}, new ip {1}".format(self.mgmt_ip, ip))
|
|
|
|
self.log_debug("Running command: '{}'".format(cmd))
|
|
|
|
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
(stdout, stderr) = proc.communicate()
|
|
|
|
if proc.returncode != 0:
|
|
self.log_warning("Command failed '{}': {}".format(cmd, stderr))
|
|
else:
|
|
self.mgmt_ip = ip
|
|
|
|
def is_port_up(self, port_name):
|
|
"""
|
|
Determine if a port is up or down by looking into the netdev_oper_status for the port in
|
|
PORT TABLE in the State DB
|
|
"""
|
|
# Retrieve all entires for this port from the Port table
|
|
(status, fvp) = self.state_port_table.get(port_name)
|
|
if status:
|
|
# Convert list of tuples to a dictionary
|
|
port_table_dict = dict(fvp)
|
|
|
|
# Get the oper-status for the port
|
|
if "netdev_oper_status" in port_table_dict:
|
|
port_oper_status = port_table_dict.get("netdev_oper_status")
|
|
return port_oper_status == "up"
|
|
else:
|
|
return False
|
|
else:
|
|
return False
|
|
|
|
def generate_pending_lldp_config_cmd_for_port(self, port_name, port_table_dict):
|
|
"""
|
|
For port `port_name`, look up the description and alias in the Config database,
|
|
then form the appropriate lldpcli configuration command and run it.
|
|
"""
|
|
port_desc = None
|
|
|
|
# Skip inband interface prefixes. These are recycle ports exposed in PORT_TABLE for
|
|
# asic-to-asic communication in VOQ based chassis system. We do not configure LLDP on these.
|
|
if port_name.startswith(inband_prefix()):
|
|
return
|
|
|
|
# Get the port alias. If None or empty string, use port name instead
|
|
port_alias = port_table_dict.get("alias")
|
|
if not port_alias:
|
|
self.log_info("Unable to retrieve port alias for port '{}'. Using port name instead.".format(port_name))
|
|
port_alias = port_name
|
|
|
|
# Get the port description. If None or empty string, we'll skip this configuration
|
|
port_desc = port_table_dict.get("description")
|
|
|
|
lldpcli_cmd = "lldpcli configure ports {0} lldp portidsubtype local {1}".format(port_name, port_alias)
|
|
|
|
# if there is a description available, also configure that
|
|
if port_desc:
|
|
lldpcli_cmd += " description '{}'".format(port_desc)
|
|
else:
|
|
self.log_info("Unable to retrieve description for port '{}'. Not adding port description".format(port_name))
|
|
|
|
# Add the command to our dictionary of pending commands, overwriting any
|
|
# previous pending command for this port
|
|
self.pending_cmds[port_name] = { 'cmd': lldpcli_cmd, 'failed_count': 0}
|
|
|
|
def process_pending_cmds(self):
|
|
# List of port names (keys of elements) to delete from self.pending_cmds
|
|
to_delete = []
|
|
|
|
for (port_name, port_item) in self.pending_cmds.items():
|
|
cmd = port_item['cmd']
|
|
|
|
# check if linux port is up
|
|
if not self.is_port_up(port_name):
|
|
self.log_info("port %s is not up, continue"%port_name)
|
|
continue
|
|
|
|
if 'failed_timestamp' in port_item and time.time()-port_item['failed_timestamp']<FAILED_CMD_TIMEOUT:
|
|
continue
|
|
|
|
self.log_debug("Running command: '{}'".format(cmd))
|
|
rc, stderr = run_cmd(self, cmd)
|
|
# If the command succeeds, add the port name to our to_delete list.
|
|
# We will delete this command from self.pending_cmds below.
|
|
# If the command fails, log a message, but don't delete the command
|
|
# from self.pending_cmds, so that the command will be retried the
|
|
# next time this method is called.
|
|
if rc == 0:
|
|
to_delete.append(port_name)
|
|
else:
|
|
if port_item['failed_count'] >= RETRY_LIMIT:
|
|
self.log_error("Command failed '{}': {} - command was failed {} times, disabling retry".format(cmd, stderr, RETRY_LIMIT+1))
|
|
# not retrying again
|
|
to_delete.append(port_name)
|
|
else:
|
|
self.pending_cmds[port_name]['failed_count'] += 1
|
|
self.pending_cmds[port_name]['failed_timestamp'] = time.time()
|
|
self.log_info("Command failed '{}': {} - cmd failed {} times, retrying again".format(cmd, stderr, self.pending_cmds[port_name]['failed_count']))
|
|
|
|
# Delete all successful commands from self.pending_cmds
|
|
for port_name in to_delete:
|
|
self.pending_cmds.pop(port_name, None)
|
|
|
|
def lldp_get_mgmt_ip(self):
|
|
mgmt_intf_keys = self.mgmt_table.getKeys()
|
|
ipv4_addr = "None"
|
|
ipv6_addr = "None"
|
|
for key in mgmt_intf_keys:
|
|
if '|' in key:
|
|
key = key.split('|', 1)
|
|
if '/' in key[1]:
|
|
ip = key[1].split('/', 1)
|
|
if self.mgmt_ip != ip[0]:
|
|
if '.' in ip[0]:
|
|
ipv4_addr = ip[0]
|
|
else:
|
|
ipv6_addr = ip[0]
|
|
|
|
if ipv4_addr != "None":
|
|
return ipv4_addr
|
|
elif ipv6_addr != "None":
|
|
return ipv6_addr
|
|
else:
|
|
return "None"
|
|
|
|
def lldp_process_mgmt_info_change(self, op, mgmt_dict, key):
|
|
if not op in ["SET", "DEL"]:
|
|
return
|
|
self.log_info("Mgmt Config Opcode: {} Dict {} Key {} Curr {}".format(op, mgmt_dict, key, self.mgmt_ip))
|
|
if '|' in key:
|
|
key = key.split('|', 1)
|
|
if '/' in key[1]:
|
|
ip = key[1].split('/', 1)
|
|
if op == "DEL":
|
|
if self.mgmt_ip == ip[0]:
|
|
ip_addr = self.lldp_get_mgmt_ip()
|
|
self.update_mgmt_addr(ip_addr)
|
|
else:
|
|
if not self.mgmt_ip == ip[0]:
|
|
if '.' in ip[0]:
|
|
self.update_mgmt_addr(ip[0])
|
|
elif '.' not in self.mgmt_ip:
|
|
self.update_mgmt_addr(ip[0])
|
|
|
|
def lldp_process_device_table_event(self, op, device_dict, key):
|
|
if not op in ["SET", "DEL"]:
|
|
return
|
|
self.log_info("Device Config Opcode: {} Dict {} Key {}".format(op, device_dict, key))
|
|
hostname = device_dict.get("hostname")
|
|
if not self.hostname == hostname:
|
|
self.log_info("Hostname changed old {0}, new {1}".format(self.hostname, hostname))
|
|
self.update_hostname(hostname)
|
|
|
|
def run(self):
|
|
"""
|
|
Subscribes to notifications of changes in the PORT table
|
|
of the Redis Config/Application database.
|
|
Subscribe to APP_DB - get port oper status
|
|
Subscribe to CONFIG_DB - get notified of port config changes
|
|
Update LLDP configuration accordingly.
|
|
"""
|
|
self.log_info("Starting up...")
|
|
|
|
if not os.geteuid() == 0:
|
|
self.log_error("Must be root to run this daemon")
|
|
print("Error: Must be root to run this daemon")
|
|
sys.exit(1)
|
|
|
|
# Set select timeout to 10 seconds
|
|
SELECT_TIMEOUT_MS = 1000 * 10
|
|
|
|
# Daemon is paused on the configuration file to avoid lldp packets with wrong information
|
|
# until all interfaces are well configured on lldpd
|
|
port_init_done = False
|
|
port_config_done = False
|
|
resume_lldp_sent = False
|
|
start_time = time.time()
|
|
|
|
sel = swsscommon.Select()
|
|
|
|
# Subscribe to PORT table notifications in the App DB
|
|
sst_appdb = swsscommon.SubscriberStateTable(self.appl_db, swsscommon.APP_PORT_TABLE_NAME)
|
|
sel.addSelectable(sst_appdb)
|
|
|
|
# Subscribe to MGMT PORT table notifications in the Config DB
|
|
sst_mgmt_ip_confdb = swsscommon.SubscriberStateTable(self.config_db, swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME)
|
|
sel.addSelectable(sst_mgmt_ip_confdb)
|
|
|
|
# Subscribe to DEVICE_METADATA table notifications in the Config DB
|
|
sst_device_confdb = swsscommon.SubscriberStateTable(self.config_db, swsscommon.CFG_DEVICE_METADATA_TABLE_NAME)
|
|
sel.addSelectable(sst_device_confdb)
|
|
|
|
# Listen for changes to the PORT table in the CONFIG_DB and APP_DB
|
|
while True:
|
|
(state, c) = sel.select(SELECT_TIMEOUT_MS)
|
|
|
|
if state == swsscommon.Select.OBJECT:
|
|
(key, op, fvp) = sst_mgmt_ip_confdb.pop()
|
|
if key:
|
|
self.lldp_process_mgmt_info_change(op, dict(fvp), key)
|
|
|
|
(key, op, fvp) = sst_device_confdb.pop()
|
|
if key:
|
|
self.lldp_process_device_table_event(op, dict(fvp), key)
|
|
|
|
(key, op, fvp) = sst_appdb.pop()
|
|
if (key != "PortInitDone") and (key != "PortConfigDone"):
|
|
if op == "SET":
|
|
if fvp:
|
|
if "up" in dict(fvp).get("oper_status",""):
|
|
self.generate_pending_lldp_config_cmd_for_port(key, dict(fvp))
|
|
else:
|
|
self.pending_cmds.pop(key, None)
|
|
elif op == "DEL":
|
|
self.pending_cmds.pop(key, None)
|
|
else:
|
|
self.log_error("unknown operation")
|
|
|
|
elif key == "PortInitDone":
|
|
port_init_done = True
|
|
elif key == "PortConfigDone":
|
|
port_config_done = True
|
|
|
|
# Process all pending commands
|
|
self.process_pending_cmds()
|
|
|
|
# Resume the daemon since all interfaces data updated and configured to the lldpd so no miss leading packets will be sent
|
|
if not resume_lldp_sent:
|
|
if check_timeout(self, start_time):
|
|
port_init_done = port_config_done = True
|
|
if port_init_done and port_config_done:
|
|
port_init_done = port_config_done = False
|
|
rc, stderr = run_cmd(self, "lldpcli resume")
|
|
if rc != 0:
|
|
self.log_error("Failed to resume lldpd with command: 'lldpcli resume': {}".format(stderr))
|
|
sys.exit(1)
|
|
resume_lldp_sent = True
|
|
|
|
# ============================= Functions =============================
|
|
|
|
|
|
def main():
|
|
# Instantiate a LldpManager object
|
|
lldpmgr = LldpManager(SYSLOG_IDENTIFIER)
|
|
|
|
# Log all messages from INFO level and higher
|
|
lldpmgr.set_min_log_priority_info()
|
|
|
|
lldpmgr.run()
|
|
|
|
|
|
def run_cmd(self, cmd):
|
|
proc = subprocess.Popen(cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
(stdout, stderr) = proc.communicate()
|
|
return proc.returncode, stderr
|
|
|
|
|
|
def check_timeout(self, start_time):
|
|
if time.time() - start_time > PORT_INIT_TIMEOUT:
|
|
self.log_error("Port init timeout reached ({} seconds), resuming lldpd...".format(PORT_INIT_TIMEOUT))
|
|
return True
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|