[LLDP] Fix for LLDP advertisements being sent with wrong information. (#5493)

* Fix for LLDP advertisments being sent with wrong information.
Since lldpd is starting before lldpmgr, some advertisment packets might sent with default value, mac address as Port ID.
This fix hold the packets from being sent by the lldpd until all interfaces are well configured by the lldpmgrd.

Signed-off-by: Shlomi Bitton <shlomibi@nvidia.com>

* Fix comments

* Fix unit-test output caused a failure during build

* Add 'run_cmd' function and use it

* Resume lldpd even if port init timeout reached
This commit is contained in:
shlomibitton 2020-10-26 19:38:09 +02:00 committed by GitHub
parent c14b41dc30
commit e66d49a57c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 43 additions and 4 deletions

View File

@ -10,3 +10,5 @@ configure ports eth0 lldp portidsubtype local {{ mgmt_port_name }}
configure system ip management pattern {{ ipv4 }} configure system ip management pattern {{ ipv4 }}
{% endif %} {% endif %}
configure system hostname {{ DEVICE_METADATA['localhost']['hostname'] }} configure system hostname {{ DEVICE_METADATA['localhost']['hostname'] }}
{# pause lldpd operations until all interfaces are well configured, resume command will run in lldpmgrd #}
pause

View File

@ -19,6 +19,9 @@ try:
import subprocess import subprocess
import sys import sys
import syslog
import os.path
import time
from sonic_py_common import daemon_base from sonic_py_common import daemon_base
from swsscommon import swsscommon from swsscommon import swsscommon
except ImportError as err: except ImportError as err:
@ -27,6 +30,7 @@ except ImportError as err:
VERSION = "1.0" VERSION = "1.0"
SYSLOG_IDENTIFIER = "lldpmgrd" SYSLOG_IDENTIFIER = "lldpmgrd"
PORT_INIT_TIMEOUT = 300
class LldpManager(daemon_base.DaemonBase): class LldpManager(daemon_base.DaemonBase):
@ -130,16 +134,14 @@ class LldpManager(daemon_base.DaemonBase):
for (port_name, cmd) in self.pending_cmds.iteritems(): for (port_name, cmd) in self.pending_cmds.iteritems():
self.log_debug("Running command: '{}'".format(cmd)) self.log_debug("Running command: '{}'".format(cmd))
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) rc, stderr = run_cmd(self, cmd)
(stdout, stderr) = proc.communicate()
# If the command succeeds, add the port name to our to_delete list. # If the command succeeds, add the port name to our to_delete list.
# We will delete this command from self.pending_cmds below. # We will delete this command from self.pending_cmds below.
# If the command fails, log a message, but don't delete the command # If the command fails, log a message, but don't delete the command
# from self.pending_cmds, so that the command will be retried the # from self.pending_cmds, so that the command will be retried the
# next time this method is called. # next time this method is called.
if proc.returncode == 0: if rc == 0:
to_delete.append(port_name) to_delete.append(port_name)
else: else:
self.log_warning("Command failed '{}': {}".format(cmd, stderr)) self.log_warning("Command failed '{}': {}".format(cmd, stderr))
@ -166,6 +168,13 @@ class LldpManager(daemon_base.DaemonBase):
# Set select timeout to 10 seconds # Set select timeout to 10 seconds
SELECT_TIMEOUT_MS = 1000 * 10 SELECT_TIMEOUT_MS = 1000 * 10
# Daemon is paused on the configuration file to avoid lldp packets with wrong information
# until all interfaces are well configured on lldpd
port_init_done = False
port_config_done = False
resume_lldp_sent = False
start_time = time.time()
sel = swsscommon.Select() sel = swsscommon.Select()
# Subscribe to PORT table notifications in the Config DB # Subscribe to PORT table notifications in the Config DB
@ -204,9 +213,25 @@ class LldpManager(daemon_base.DaemonBase):
else: else:
self.pending_cmds.pop(key, None) self.pending_cmds.pop(key, None)
elif key == "PortInitDone":
port_init_done = True
elif key == "PortConfigDone":
port_config_done = True
# Process all pending commands # Process all pending commands
self.process_pending_cmds() self.process_pending_cmds()
# Resume the daemon since all interfaces data updated and configured to the lldpd so no miss leading packets will be sent
if not resume_lldp_sent:
if check_timeout(self, start_time):
port_init_done = port_config_done = True
if port_init_done and port_config_done:
port_init_done = port_config_done = False
rc, stderr = run_cmd(self, "lldpcli resume")
if rc != 0:
self.log_error("Failed to resume lldpd with command: 'lldpcli resume': {}".format(stderr))
sys.exit(1)
resume_lldp_sent = True
# ============================= Functions ============================= # ============================= Functions =============================
@ -219,6 +244,16 @@ def main():
lldpmgr.run() lldpmgr.run()
def run_cmd(self, cmd):
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdout, stderr) = proc.communicate()
return proc.returncode, stderr
def check_timeout(self, start_time):
if time.time() - start_time > PORT_INIT_TIMEOUT:
self.log_error("Port init timeout reached ({} seconds), resuming lldpd...".format(PORT_INIT_TIMEOUT))
return True
return False
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -1,3 +1,4 @@
configure ports eth0 lldp portidsubtype local eth0 configure ports eth0 lldp portidsubtype local eth0
configure system ip management pattern 10.0.0.100 configure system ip management pattern 10.0.0.100
configure system hostname switch-t0 configure system hostname switch-t0
pause

View File

@ -1,3 +1,4 @@
configure ports eth0 lldp portidsubtype local eth0 configure ports eth0 lldp portidsubtype local eth0
configure system ip management pattern 10.0.0.100 configure system ip management pattern 10.0.0.100
configure system hostname switch-t0 configure system hostname switch-t0
pause