From 5efb123ede3fc32ec48922998bebb4e86739bda6 Mon Sep 17 00:00:00 2001 From: Yevhen Fastiuk Date: Mon, 11 Dec 2023 23:31:35 +0200 Subject: [PATCH 01/33] [NTP] Add NTP extended configuration (#15058) hld [#1296](https://github.com/sonic-net/SONiC/pull/1296) closes [#1254](https://github.com/sonic-net/SONiC/issues/1254) depends-on [#60](https://github.com/sonic-net/sonic-host-services/pull/60), [#781](https://github.com/sonic-net/sonic-swss-common/pull/781), [#2835](https://github.com/sonic-net/sonic-utilities/pull/2835), [#10749](https://github.com/sonic-net/sonic-mgmt/pull/10749) #### Why I did it To cover the next AIs: * Configure NTP global parameters * Add/remove new NTP servers * Change the configuration for NTP servers * Show NTP status * Show NTP configuration ### How I did it * Add YANG model for a new configuration * Extend configuration templates to support new knobs ### Description for the changelog * Add ability to configure NTP global parameters such as authentication, dhcp, admin state * Change the configuration for NTP servers * Add an ability to show NTP configuration #### Link to config_db schema for YANG module changes [NTP configuration](https://github.com/sonic-net/sonic-buildimage/blob/master/src/sonic-yang-models/doc/Configuration.md#ntp-and-syslog-servers) --- files/build_templates/init_cfg.json.j2 | 10 + .../build_templates/sonic_debian_extension.j2 | 4 + files/image_config/ntp/ntp-config.sh | 2 + files/image_config/ntp/ntp-systemd-wrapper | 11 +- files/image_config/ntp/ntp.conf.j2 | 120 +++++--- files/image_config/ntp/ntp.keys.j2 | 18 ++ files/image_config/ntp/ntpsec | 100 ------- src/sonic-config-engine/sonic-cfggen | 28 ++ .../tests/data/ntp/ntp_interfaces.json | 44 ++- src/sonic-config-engine/tests/ntp.keys.j2 | 1 + .../tests/sample_output/py2/ntp.conf | 59 +--- .../tests/sample_output/py2/ntp.keys | 1 + .../tests/sample_output/py3/ntp.conf | 47 +--- .../tests/sample_output/py3/ntp.keys | 8 + src/sonic-config-engine/tests/test_j2files.py | 13 +- src/sonic-yang-models/doc/Configuration.md | 130 ++++++++- .../tests/files/sample_config_db.json | 28 +- .../tests/yang_model_tests/tests/ntp.json | 77 ++++++ .../yang_model_tests/tests_config/ntp.json | 260 +++++++++++++++++- .../yang-models/sonic-ntp.yang | 149 ++++++++++ .../yang-templates/sonic-types.yang.j2 | 16 ++ 21 files changed, 885 insertions(+), 241 deletions(-) create mode 100644 files/image_config/ntp/ntp.keys.j2 delete mode 100755 files/image_config/ntp/ntpsec create mode 120000 src/sonic-config-engine/tests/ntp.keys.j2 mode change 100644 => 120000 src/sonic-config-engine/tests/sample_output/py2/ntp.conf create mode 120000 src/sonic-config-engine/tests/sample_output/py2/ntp.keys create mode 100644 src/sonic-config-engine/tests/sample_output/py3/ntp.keys diff --git a/files/build_templates/init_cfg.json.j2 b/files/build_templates/init_cfg.json.j2 index 7ec71a3589..53091a4a24 100644 --- a/files/build_templates/init_cfg.json.j2 +++ b/files/build_templates/init_cfg.json.j2 @@ -157,5 +157,15 @@ "memory": "0M-2G:256M,2G-4G:320M,4G-8G:384M,8G-:448M", "num_dumps": "3" } + }, + "NTP": { + "global": { + "authentication": "disabled", + "dhcp": "enabled", + "server_role": "disabled", + "src_intf": "eth0", + "admin_state": "enabled", + "vrf": "default" + } } } diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index ed52539e2a..4d58d7149d 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -373,10 +373,14 @@ sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/flashrom_*.deb sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/ # Copy NTP configuration files and templates +sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT \ + apt-get -y install ntpdate +sudo rm -f $FILESYSTEM_ROOT/etc/network/if-up.d/ntpsec-ntpdate sudo cp $IMAGE_CONFIGS/ntp/ntp-config.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM echo "ntp-config.service" | sudo tee -a $GENERATED_SERVICE_FILE sudo cp $IMAGE_CONFIGS/ntp/ntp-config.sh $FILESYSTEM_ROOT/usr/bin/ sudo cp $IMAGE_CONFIGS/ntp/ntp.conf.j2 $FILESYSTEM_ROOT_USR_SHARE_SONIC_TEMPLATES/ +sudo cp $IMAGE_CONFIGS/ntp/ntp.keys.j2 $FILESYSTEM_ROOT_USR_SHARE_SONIC_TEMPLATES/ sudo cp $IMAGE_CONFIGS/ntp/ntp-systemd-wrapper $FILESYSTEM_ROOT/usr/libexec/ntpsec/ sudo mkdir $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM/ntpsec.service.d sudo cp $IMAGE_CONFIGS/ntp/sonic-target.conf $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM/ntpsec.service.d/ diff --git a/files/image_config/ntp/ntp-config.sh b/files/image_config/ntp/ntp-config.sh index cda88dbf99..13469c5896 100755 --- a/files/image_config/ntp/ntp-config.sh +++ b/files/image_config/ntp/ntp-config.sh @@ -24,6 +24,8 @@ function modify_ntp_default } sonic-cfggen -d -t /usr/share/sonic/templates/ntp.conf.j2 >/etc/ntpsec/ntp.conf +sonic-cfggen -d -t /usr/share/sonic/templates/ntp.keys.j2 >/etc/ntpsec/ntp.keys +chmod o-r /etc/ntp.keys get_database_reboot_type echo "Disabling NTP long jump for reboot type ${reboot_type} ..." diff --git a/files/image_config/ntp/ntp-systemd-wrapper b/files/image_config/ntp/ntp-systemd-wrapper index 87e8f8601b..0704a8e92b 100644 --- a/files/image_config/ntp/ntp-systemd-wrapper +++ b/files/image_config/ntp/ntp-systemd-wrapper @@ -13,7 +13,8 @@ if [ -r /etc/default/ntpsec ]; then . /etc/default/ntpsec fi -if [ "$IGNORE_DHCP" != "yes" ] && [ -e /run/ntpsec/ntp.conf.dhcp ]; then +dhcp=$(/usr/local/bin/sonic-cfggen -d -v 'NTP["global"]["dhcp"]' 2> /dev/null) +if [ "$IGNORE_DHCP" != "yes" ] && [ -e /run/ntpsec/ntp.conf.dhcp ] && [ "$dhcp" = "enabled" ]; then NTPD_OPTS="$NTPD_OPTS -c /run/ntpsec/ntp.conf.dhcp" else # List the default -c first, so if the admin has specified -c in @@ -26,6 +27,14 @@ NTPD_OPTS="$NTPD_OPTS -u ntpsec:ntpsec" # Protect the service startup against concurrent ntpdate ifup hooks ( if flock -w 180 9; then + ntpEnabled=$(/usr/local/bin/sonic-cfggen -d -v 'NTP["global"]["admin_state"]' 2> /dev/null) + if [ "$ntpEnabled" = "disabled" ] + then + echo "Stopping NTP daemon" + kill -9 $(cat $PIDFILE) + exit 0 + fi + # when mgmt vrf is configured, ntp starts in mgmt vrf by default unless user configures otherwise vrfEnabled=$(/usr/local/bin/sonic-cfggen -d -v 'MGMT_VRF_CONFIG["vrf_global"]["mgmtVrfEnabled"]' 2> /dev/null) vrfConfigured=$(/usr/local/bin/sonic-cfggen -d -v 'NTP["global"]["vrf"]' 2> /dev/null) diff --git a/files/image_config/ntp/ntp.conf.j2 b/files/image_config/ntp/ntp.conf.j2 index c0c19db3b1..0563e50444 100644 --- a/files/image_config/ntp/ntp.conf.j2 +++ b/files/image_config/ntp/ntp.conf.j2 @@ -1,9 +1,9 @@ ############################################################################### -# Managed by Ansible -# file: ansible/roles/acs/templates/ntp.conf.j2 +# This file was AUTOMATICALLY GENERATED. DO NOT MODIFY. +# Controlled by ntp-config.service ############################################################################### -# /etc/ntpsec/ntp.conf, configuration for ntpd; see ntp.conf(5) for help +# /etc/ntp.conf, configuration for ntpd; see ntp.conf(5) for help # To avoid ntpd from panic and exit if the drift between new time and # current system time is large. @@ -12,35 +12,82 @@ tinker panic 0 driftfile /var/lib/ntpsec/ntp.drift leapfile /usr/share/zoneinfo/leap-seconds.list -# To enable Network Time Security support as a server, obtain a certificate -# (e.g. with Let's Encrypt), configure the paths below, and uncomment: -# nts cert CERT_FILE -# nts key KEY_FILE -# nts enable +{# Getting NTP global configuration -#} +{% set global = (NTP | d({})).get('global', {}) -%} -# You must create /var/log/ntpsec (owned by ntpsec:ntpsec) to enable logging. -#statsdir /var/log/ntpsec/ -#statistics loopstats peerstats clockstats -#filegen loopstats file loopstats type day enable -#filegen peerstats file peerstats type day enable -#filegen clockstats file clockstats type day enable +{# Adding NTP servers. We need to know if we have some pools, to set proper +config -#} +{% set ns = namespace(is_pools=false) %} +{% for server in NTP_SERVER if NTP_SERVER[server].admin_state != 'disabled' and + NTP_SERVER[server].resolve_as and + NTP_SERVER[server].association_type -%} + {% set config = NTP_SERVER[server] -%} + {# Server options -#} + {% set soptions = '' -%} + {# Server access control options -#} + {% set aoptions = '' -%} -# Specify one or more NTP servers. + {# Authentication key -#} + {% if global.authentication == 'enabled' -%} + {% if config.key -%} + {% set soptions = soptions ~ ' key ' ~ config.key -%} + {% endif -%} + {% endif -%} -# Public NTP servers supporting Network Time Security: -# server time.cloudflare.com nts -{% for ntp_server in NTP_SERVER %} -server {{ ntp_server }} iburst + {# Aggressive polling -#} + {% if config.iburst -%} + {% set soptions = soptions ~ ' iburst' -%} + {% endif -%} + + {# Protocol version -#} + {% if config.version -%} + {% set soptions = soptions ~ ' version ' ~ config.version -%} + {% endif -%} + + {# Check if there are any pool configured. BTW it doesn't matter what was + configured as "resolve_as" for pools. If they were configured with FQDN they + must remain like that -#} + {% set config_as = config.resolve_as -%} + {% if config.association_type == 'pool' -%} + {% set ns.is_pools = true -%} + {% set config_as = server -%} + {% else -%} + {% set aoptions = aoptions ~ ' nopeer' -%} + {% endif -%} + +{{ config.association_type }} {{ config_as }}{{ soptions }} +{% if global.server_role == 'disabled' %} +restrict {{ config_as }} kod limited nomodify notrap noquery{{ aoptions }} +{% endif %} + +{% endfor -%} + +{% set trusted_keys_arr = [] -%} +{% for key in NTP_KEY -%} + {% set keydata = NTP_KEY[key] -%} + {% if keydata.trusted == 'yes' -%} + {% set trusted_keys_arr = trusted_keys_arr.append(key) -%} + {% endif -%} {% endfor %} -# pool.ntp.org maps to about 1000 low-stratum NTP servers. Your server will -# pick a different set every time it starts up. Please consider joining the -# pool: +{% if global.authentication == 'enabled' %} +keys /etc/ntpsec/ntp.keys +{% if trusted_keys_arr != [] %} +trustedkey {{ trusted_keys_arr|join(' ') }} +{% endif %} +{% endif %} -#listen on source interface if configured, else -#only listen on MGMT_INTERFACE, LOOPBACK_INTERFACE ip when MGMT_INTERFACE is not defined, or eth0 -# if we don't have both of them (default is to listen on all ip addresses) +{# listen on source interface if configured, else only listen on MGMT_INTERFACE, +LOOPBACK_INTERFACE ip when MGMT_INTERFACE is not defined, or eth0 if we don't +have both of them (default is to listen on all ip addresses) -#} interface ignore wildcard + +{# Set interface to listen on: + * Set global variable for configured source interface name. + * Set global boolean to indicate if the ip of the configured source + interface is configured. + * If the source interface is configured but no ip on that + interface, then listen on another interface based on existing logic. -#} {%- macro check_ip_on_interface(interface_name, table_name) %} {%- set ns = namespace(valid_intf = 'false') %} {%- if table_name %} @@ -55,8 +102,8 @@ interface ignore wildcard {% set ns = namespace(source_intf = "") %} {%- set ns = namespace(source_intf_ip = 'false') %} -{%- if (NTP) and (NTP['global']['src_intf']) %} - {%- set ns.source_intf = (NTP['global']['src_intf']) %} +{%- if global.src_intf %} + {%- set ns.source_intf = global.src_intf %} {%- if ns.source_intf != "" %} {%- if ns.source_intf == "eth0" %} {%- set ns.source_intf_ip = 'true' %} @@ -91,16 +138,19 @@ interface listen eth0 {% endif %} interface listen 127.0.0.1 -# Access control configuration; see /usr/share/doc/ntpsec-doc/html/accopt.html -# for details. -# -# Note that "restrict" applies to both servers and clients, so a configuration -# that might be intended to block requests from certain clients could also end -# up blocking replies from your own upstream servers. +{# Access control options -#} +{% set options = '' -%} +{# Disable NTP server functionality. Should stay on when dhcp is enabled -#} +{# {% if global.server_role == 'disabled' and global.dhcp == 'disabled' -%} + {% set options = options ~ ' ignore' -%} +{% endif -%} #} + +# Access control configuration # By default, exchange time with everybody, but don't allow configuration. -# NTPsec doesn't establish peer associations, and so nopeer has no effect, and has been removed from here -restrict default kod nomodify noquery limited +# NTPsec doesn't establish peer associations, and so nopeer has no effect, and +# has been removed from here +restrict default kod nomodify noquery limited{{ options }} # Local users may interrogate the ntp server more closely. restrict 127.0.0.1 diff --git a/files/image_config/ntp/ntp.keys.j2 b/files/image_config/ntp/ntp.keys.j2 new file mode 100644 index 0000000000..961fc75326 --- /dev/null +++ b/files/image_config/ntp/ntp.keys.j2 @@ -0,0 +1,18 @@ +############################################################################### +# This file was AUTOMATICALLY GENERATED. DO NOT MODIFY. +# Controlled by ntp-config.service +############################################################################### + +{# We can connect only to the servers we trust. Determine those servers -#} +{% set trusted_arr = [] -%} +{% for server in NTP_SERVER if NTP_SERVER[server].trusted == 'yes' and + NTP_SERVER[server].resolve_as -%} + {% set _ = trusted_arr.append(NTP_SERVER[server].resolve_as) -%} +{% endfor -%} + +{# Define authentication keys inventory -#} +{% set trusted_str = ' ' ~ trusted_arr|join(',') -%} +{% for keyid in NTP_KEY if NTP_KEY[keyid].type and NTP_KEY[keyid].value %} +{% set keyval = NTP_KEY[keyid].value | b64decode %} +{{ keyid }} {{ NTP_KEY[keyid].type }} {{ keyval }}{{trusted_str}} +{% endfor -%} diff --git a/files/image_config/ntp/ntpsec b/files/image_config/ntp/ntpsec deleted file mode 100755 index 2056a0e125..0000000000 --- a/files/image_config/ntp/ntpsec +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/sh - -# This file was originally created automatically as part of default NTP application installation from debian package. -# This is now manually modified for supporting NTP in management VRF. -# When management VRF is enabled, the NTP application should be started using "cgexec -g l3mdev:mgmt". -# Check has been added to verify the management VRF enabled status and use cgexec when it is enabled. -# This file will be copied on top of the etc/init.d/ntpsec file that gets created during build process. - -### BEGIN INIT INFO -# Provides: ntpsec -# Required-Start: $network $remote_fs $syslog -# Required-Stop: $network $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: -# Short-Description: Start NTP daemon -# Description: NTP, the Network Time Protocol, is used to keep computer -# clocks accurate by synchronizing them over the Internet or -# a local network, or by following an accurate hardware -# receiver that interprets GPS, DCF-77, or similar time -# signals. -### END INIT INFO - -PATH=/sbin:/bin:/usr/sbin:/usr/bin - -. /lib/lsb/init-functions - -DAEMON=/usr/sbin/ntpd -PIDFILE=/run/ntpd.pid - -test -x $DAEMON || exit 5 - -if [ -r /etc/default/ntpsec ]; then - . /etc/default/ntpsec -fi - -if [ "$IGNORE_DHCP" != "yes" ] && [ -e /run/ntpsec/ntp.conf.dhcp ]; then - NTPD_OPTS="$NTPD_OPTS -c /run/ntpsec/ntp.conf.dhcp" -else - # List the default -c first, so if the admin has specified -c in - # NTPD_OPTS, it is honored. - NTPD_OPTS="-c /etc/ntpsec/ntp.conf $NTPD_OPTS" -fi - -NTPD_OPTS="$NTPD_OPTS -u ntpsec:ntpsec" - -LOCKFILE=/run/lock/ntpsec-ntpdate - -case $1 in - start) - log_daemon_msg "Starting NTP server" "ntpd" - ( - flock -w 180 9 - - # when mgmt vrf is configured, ntp starts in mgmt vrf by default unless user configures otherwise - vrfEnabled=$(/usr/local/bin/sonic-cfggen -d -v 'MGMT_VRF_CONFIG["vrf_global"]["mgmtVrfEnabled"]' 2> /dev/null) - vrfConfigured=$(/usr/local/bin/sonic-cfggen -d -v 'NTP["global"]["vrf"]' 2> /dev/null) - if [ "$vrfEnabled" = "true" ] - then - if [ "$vrfConfigured" = "default" ] - then - log_daemon_msg "Starting NTP server in default-vrf for default set as NTP vrf" "ntpd" - start-stop-daemon --start --quiet --oknodo --pidfile $PIDFILE --startas $DAEMON -- -p $PIDFILE $NTPD_OPTS - else - log_daemon_msg "Starting NTP server in mgmt-vrf" "ntpd" - cgexec -g l3mdev:mgmt start-stop-daemon --start --quiet --oknodo --pidfile $PIDFILE --startas $DAEMON -- -p $PIDFILE $NTPD_OPTS - fi - else - log_daemon_msg "Starting NTP server in default-vrf" "ntpd" - start-stop-daemon --start --quiet --oknodo --pidfile $PIDFILE --startas $DAEMON -- -p $PIDFILE $NTPD_OPTS - fi - ) 9>$LOCKFILE - log_end_msg $? - ;; - stop) - log_daemon_msg "Stopping NTP server" "ntpd" - start-stop-daemon --stop --quiet --oknodo --pidfile $PIDFILE --retry=TERM/30/KILL/5 --exec $DAEMON - log_end_msg $? - rm -f $PIDFILE - ;; - restart|force-reload) - $0 stop && sleep 2 && $0 start - ;; - try-restart) - if $0 status >/dev/null; then - $0 restart - else - exit 0 - fi - ;; - reload) - exit 3 - ;; - status) - status_of_proc $DAEMON "NTP server" - ;; - *) - echo "Usage: $0 {start|stop|restart|try-restart|force-reload|status}" - exit 2 - ;; -esac diff --git a/src/sonic-config-engine/sonic-cfggen b/src/sonic-config-engine/sonic-cfggen index 58bdfaa1f4..215ca3ba5f 100755 --- a/src/sonic-config-engine/sonic-cfggen +++ b/src/sonic-config-engine/sonic-cfggen @@ -26,6 +26,7 @@ import os import sys import yaml import ipaddress +import base64 from collections import OrderedDict from config_samples import generate_sample_config, get_available_config @@ -139,6 +140,29 @@ def ip_network(value): return "Invalid ip address %s" % value return r_v.network +def b64encode(value): + """Base64 encoder + Return: + encoded string or the same value in case of error + """ + try: + ret = base64.b64encode(value.encode()).decode() + except: + return value + return ret + + +def b64decode(value): + """Base64 decoder + Return: + decoded string or the same value in case of error + """ + try: + ret = base64.b64decode(value.encode()).decode() + except: + return value + return ret + def get_primary_addr(value): if not value: return "" @@ -274,6 +298,10 @@ def _get_jinja2_env(paths): for attr in ['ip', 'network', 'prefixlen', 'netmask', 'broadcast']: env.filters[attr] = partial(prefix_attr, attr) + # Base64 encoder/decoder + env.filters['b64encode'] = b64encode + env.filters['b64decode'] = b64decode + return env def main(): diff --git a/src/sonic-config-engine/tests/data/ntp/ntp_interfaces.json b/src/sonic-config-engine/tests/data/ntp/ntp_interfaces.json index 2847583014..a8da7f1331 100644 --- a/src/sonic-config-engine/tests/data/ntp/ntp_interfaces.json +++ b/src/sonic-config-engine/tests/data/ntp/ntp_interfaces.json @@ -1,7 +1,49 @@ { "NTP": { "global": { - "src_intf": "Ethernet0" + "src_intf": "eth0", + "vrf": "default", + "authentication": "enabled", + "dhcp": "disabled", + "server_role": "disabled", + "admin_state": "enabled" + } + }, + "NTP_SERVER": { + "my_ntp_server": { + "association_type": "server", + "iburst": "off", + "admin_state": "disabled", + "version": 3, + "resolve_as": "10.20.30.40" + }, + "server2": { + "association_type": "server", + "iburst": "off", + "admin_state": "enabled", + "version": 3, + "resolve_as": "10.20.30.50", + "key": 42, + "trusted": "no" + }, + "pool.ntp.org": { + "association_type": "pool", + "iburst": "on", + "admin_state": "enabled", + "version": 3, + "resolve_as": "pool.ntp.org" + } + }, + "NTP_KEY": { + "1": { + "type": "md5", + "trusted": "no", + "value": "blabla" + }, + "42": { + "type": "sha1", + "trusted": "yes", + "value": "the_answer" } }, "INTERFACE": { diff --git a/src/sonic-config-engine/tests/ntp.keys.j2 b/src/sonic-config-engine/tests/ntp.keys.j2 new file mode 120000 index 0000000000..a95603db8b --- /dev/null +++ b/src/sonic-config-engine/tests/ntp.keys.j2 @@ -0,0 +1 @@ +../../../files/image_config/ntp/ntp.keys.j2 \ No newline at end of file diff --git a/src/sonic-config-engine/tests/sample_output/py2/ntp.conf b/src/sonic-config-engine/tests/sample_output/py2/ntp.conf deleted file mode 100644 index fc2228e269..0000000000 --- a/src/sonic-config-engine/tests/sample_output/py2/ntp.conf +++ /dev/null @@ -1,58 +0,0 @@ -############################################################################### -# Managed by Ansible -# file: ansible/roles/acs/templates/ntp.conf.j2 -############################################################################### - -# /etc/ntpsec/ntp.conf, configuration for ntpd; see ntp.conf(5) for help - -# To avoid ntpd from panic and exit if the drift between new time and -# current system time is large. -tinker panic 0 - -driftfile /var/lib/ntpsec/ntp.drift -leapfile /usr/share/zoneinfo/leap-seconds.list - -# To enable Network Time Security support as a server, obtain a certificate -# (e.g. with Let's Encrypt), configure the paths below, and uncomment: -# nts cert CERT_FILE -# nts key KEY_FILE -# nts enable - -# You must create /var/log/ntpsec (owned by ntpsec:ntpsec) to enable logging. -#statsdir /var/log/ntpsec/ -#statistics loopstats peerstats clockstats -#filegen loopstats file loopstats type day enable -#filegen peerstats file peerstats type day enable -#filegen clockstats file clockstats type day enable - -# Specify one or more NTP servers. - -# Public NTP servers supporting Network Time Security: -# server time.cloudflare.com nts - -# pool.ntp.org maps to about 1000 low-stratum NTP servers. Your server will -# pick a different set every time it starts up. Please consider joining the -# pool: - -#listen on source interface if configured, else -#only listen on MGMT_INTERFACE, LOOPBACK_INTERFACE ip when MGMT_INTERFACE is not defined, or eth0 -# if we don't have both of them (default is to listen on all ip addresses) -interface ignore wildcard - -interface listen Ethernet0 -interface listen 127.0.0.1 - -# Access control configuration; see /usr/share/doc/ntpsec-doc/html/accopt.html -# for details. -# -# Note that "restrict" applies to both servers and clients, so a configuration -# that might be intended to block requests from certain clients could also end -# up blocking replies from your own upstream servers. - -# By default, exchange time with everybody, but don't allow configuration. -# NTPsec doesn't establish peer associations, and so nopeer has no effect, and has been removed from here -restrict default kod nomodify noquery limited - -# Local users may interrogate the ntp server more closely. -restrict 127.0.0.1 -restrict ::1 diff --git a/src/sonic-config-engine/tests/sample_output/py2/ntp.conf b/src/sonic-config-engine/tests/sample_output/py2/ntp.conf new file mode 120000 index 0000000000..5ebe399367 --- /dev/null +++ b/src/sonic-config-engine/tests/sample_output/py2/ntp.conf @@ -0,0 +1 @@ +../py3/ntp.conf \ No newline at end of file diff --git a/src/sonic-config-engine/tests/sample_output/py2/ntp.keys b/src/sonic-config-engine/tests/sample_output/py2/ntp.keys new file mode 120000 index 0000000000..5f1ab315e5 --- /dev/null +++ b/src/sonic-config-engine/tests/sample_output/py2/ntp.keys @@ -0,0 +1 @@ +../py3/ntp.keys \ No newline at end of file diff --git a/src/sonic-config-engine/tests/sample_output/py3/ntp.conf b/src/sonic-config-engine/tests/sample_output/py3/ntp.conf index fc2228e269..4136157394 100644 --- a/src/sonic-config-engine/tests/sample_output/py3/ntp.conf +++ b/src/sonic-config-engine/tests/sample_output/py3/ntp.conf @@ -1,9 +1,9 @@ ############################################################################### -# Managed by Ansible -# file: ansible/roles/acs/templates/ntp.conf.j2 +# This file was AUTOMATICALLY GENERATED. DO NOT MODIFY. +# Controlled by ntp-config.service ############################################################################### -# /etc/ntpsec/ntp.conf, configuration for ntpd; see ntp.conf(5) for help +# /etc/ntp.conf, configuration for ntpd; see ntp.conf(5) for help # To avoid ntpd from panic and exit if the drift between new time and # current system time is large. @@ -12,45 +12,28 @@ tinker panic 0 driftfile /var/lib/ntpsec/ntp.drift leapfile /usr/share/zoneinfo/leap-seconds.list -# To enable Network Time Security support as a server, obtain a certificate -# (e.g. with Let's Encrypt), configure the paths below, and uncomment: -# nts cert CERT_FILE -# nts key KEY_FILE -# nts enable +server 10.20.30.50 key 42 iburst version 3 +restrict 10.20.30.50 kod limited nomodify notrap noquery nopeer -# You must create /var/log/ntpsec (owned by ntpsec:ntpsec) to enable logging. -#statsdir /var/log/ntpsec/ -#statistics loopstats peerstats clockstats -#filegen loopstats file loopstats type day enable -#filegen peerstats file peerstats type day enable -#filegen clockstats file clockstats type day enable +pool pool.ntp.org iburst version 3 +restrict pool.ntp.org kod limited nomodify notrap noquery -# Specify one or more NTP servers. -# Public NTP servers supporting Network Time Security: -# server time.cloudflare.com nts +keys /etc/ntpsec/ntp.keys +trustedkey 42 -# pool.ntp.org maps to about 1000 low-stratum NTP servers. Your server will -# pick a different set every time it starts up. Please consider joining the -# pool: - -#listen on source interface if configured, else -#only listen on MGMT_INTERFACE, LOOPBACK_INTERFACE ip when MGMT_INTERFACE is not defined, or eth0 -# if we don't have both of them (default is to listen on all ip addresses) interface ignore wildcard -interface listen Ethernet0 + + +interface listen eth0 interface listen 127.0.0.1 -# Access control configuration; see /usr/share/doc/ntpsec-doc/html/accopt.html -# for details. -# -# Note that "restrict" applies to both servers and clients, so a configuration -# that might be intended to block requests from certain clients could also end -# up blocking replies from your own upstream servers. +# Access control configuration # By default, exchange time with everybody, but don't allow configuration. -# NTPsec doesn't establish peer associations, and so nopeer has no effect, and has been removed from here +# NTPsec doesn't establish peer associations, and so nopeer has no effect, and +# has been removed from here restrict default kod nomodify noquery limited # Local users may interrogate the ntp server more closely. diff --git a/src/sonic-config-engine/tests/sample_output/py3/ntp.keys b/src/sonic-config-engine/tests/sample_output/py3/ntp.keys new file mode 100644 index 0000000000..4a1a37b693 --- /dev/null +++ b/src/sonic-config-engine/tests/sample_output/py3/ntp.keys @@ -0,0 +1,8 @@ +############################################################################### +# This file was AUTOMATICALLY GENERATED. DO NOT MODIFY. +# Controlled by ntp-config.service +############################################################################### + +1 md5 blabla +42 sha1 the_answer + diff --git a/src/sonic-config-engine/tests/test_j2files.py b/src/sonic-config-engine/tests/test_j2files.py index a65dea2b04..6a660bec28 100644 --- a/src/sonic-config-engine/tests/test_j2files.py +++ b/src/sonic-config-engine/tests/test_j2files.py @@ -675,10 +675,19 @@ class TestJ2Files(TestCase): def test_ntp_conf(self): conf_template = os.path.join(self.test_dir, "ntp.conf.j2") - ntp_interfaces_json = os.path.join(self.test_dir, "data", "ntp", "ntp_interfaces.json") + config_db_ntp_json = os.path.join(self.test_dir, "data", "ntp", "ntp_interfaces.json") expected = os.path.join(self.test_dir, "sample_output", utils.PYvX_DIR, "ntp.conf") - argument = ['-j', ntp_interfaces_json, '-t', conf_template] + argument = ['-j', config_db_ntp_json, '-t', conf_template] + self.run_script(argument, output_file=self.output_file) + assert utils.cmp(expected, self.output_file), self.run_diff(expected, self.output_file) + + def test_ntp_keys(self): + conf_template = os.path.join(self.test_dir, "ntp.keys.j2") + config_db_ntp_json = os.path.join(self.test_dir, "data", "ntp", "ntp_interfaces.json") + expected = os.path.join(self.test_dir, "sample_output", utils.PYvX_DIR, "ntp.keys") + + argument = ['-j', config_db_ntp_json, '-t', conf_template] self.run_script(argument, output_file=self.output_file) assert utils.cmp(expected, self.output_file), self.run_diff(expected, self.output_file) diff --git a/src/sonic-yang-models/doc/Configuration.md b/src/sonic-yang-models/doc/Configuration.md index a28ffa5c57..5ee894f7aa 100644 --- a/src/sonic-yang-models/doc/Configuration.md +++ b/src/sonic-yang-models/doc/Configuration.md @@ -1538,6 +1538,35 @@ These configuration options are used to modify the way that ntp binds to the ports on the switch and which port it uses to make ntp update requests from. +***NTP Admin state*** + +If this option is set to `enabled` then ntp client will try to sync system time with configured NTP servers. +Otherwise, NTP client feature will be disabled. +``` +{ + "NTP": { + "global": { + "admin_state": "enabled" + } + } +} +``` + +***NTP Server role*** + +This option is used to control NTP server state on the switch. +If this option is set to `enabled` switch will act as NTP server. +By default `server_role` is `disabled`. +``` +{ + "NTP": { + "global": { + "server_role": "enabled" + } + } +} +``` + ***NTP VRF*** If this option is set to `default` then ntp will run within the default vrf @@ -1575,6 +1604,36 @@ for that address. } ``` +***NTP Authentication*** + +If this option is set to `enabled` then ntp will try to verify NTP servers it connects to. +This option **has no effect** if key is not set for NTP server. +By default it is `disabled` +``` +{ + "NTP": { + "global": { + "authentication": "enabled" + } + } +} +``` + +***NTP DHCP leases*** + +If this option is set to `enabled` then ntp client will try to use NTP servers provided by DHCP server. +If this option is set to `disabled` you will be able to use the user-configured NTP servers. +By default it is `enabled` +``` +{ + "NTP": { + "global": { + "dhcp": "enabled" + } + } +} +``` + ### NTP servers These information are configured in individual tables. Domain name or IP @@ -1585,18 +1644,77 @@ attributes in those objects. ``` { "NTP_SERVER": { - "2.debian.pool.ntp.org": {}, - "1.debian.pool.ntp.org": {}, - "3.debian.pool.ntp.org": {}, - "0.debian.pool.ntp.org": {} + "2.debian.pool.ntp.org": { + "association_type": "pool", + "iburst": "on", + "admin_state": "enabled", + "version": 4 + }, + "1.debian.pool.ntp.org": { + "association_type": "pool", + "iburst": "off", + "admin_state": "enabled", + "version": 3 + }, + "3.debian.pool.ntp.org": { + "association_type": "pool", + "iburst": "on", + "admin_state": "disabled", + "version": 4 + }, + "0.debian.pool.ntp.org": { + "association_type": "pool", + "iburst": "off", + "admin_state": "disabled", + "version": 3 + } }, "NTP_SERVER": { - "23.92.29.245": {}, - "204.2.134.164": {} + "23.92.29.245": { + "association_type": "server", + "iburst": "on", + "admin_state": "enabled", + "version": 4, + "key": 3, + "trusted": "yes" + }, + "204.2.134.164": { + "association_type": "server", + "iburst": "on", + "admin_state": "enabled", + "version": 3 + } } } ``` +* `association_type` - is used to control the type of the server. It can be `server` or `pool`. +* `iburst` - agressive server polling `{on, off}`. +* `version` - NTP protool version to use `[3..4]`. +* `key` - authentication key id `[1..65535]` to use to auth the server. +* `admin_state` - enable or disable specific server. +* `trusted` - trust this server when auth is enabled. + +***NTP keys*** +``` +{ + "NTP_KEY": { + "1": { + "type": "md5", + "value": "bXlwYXNzd29yZA==", + "trusted": "yes" + }, + "42": { + "type": "sha1", + "value": "dGhlYW5zd2Vy", + "trusted": "no" + } + } +} +``` +* `type` - key type to use `{md5, sha1, sha256, sha384, sha512}`. +* `value` - base64 encoded key value. +* `trusted` - trust this NTP key `{yes, no}`. ### Peer Switch diff --git a/src/sonic-yang-models/tests/files/sample_config_db.json b/src/sonic-yang-models/tests/files/sample_config_db.json index 4eba94e72f..b0e3f5f589 100644 --- a/src/sonic-yang-models/tests/files/sample_config_db.json +++ b/src/sonic-yang-models/tests/files/sample_config_db.json @@ -477,14 +477,36 @@ }, "NTP": { "global": { + "authentication": "disabled", + "dhcp": "enabled", + "server_role": "disabled", + "admin_state": "enabled", "vrf": "mgmt", "src_intf": "eth0;Loopback0" } }, "NTP_SERVER": { - "0.debian.pool.ntp.org": {}, - "23.92.29.245": {}, - "2001:aa:aa::aa": {} + "0.debian.pool.ntp.org": { + "association_type": "pool", + "resolve_as": "0.debian.pool.ntp.org" + }, + "time.google.com": { + "association_type": "server", + "resolve_as": "216.239.35.4" + }, + "23.92.29.245": { + "admin_state": "enabled", + "association_type": "server", + "resolve_as": "23.92.29.245", + "iburst": "off", + "trusted": "yes" + }, + "2001:aa:aa::aa": { + "admin_state": "disabled", + "iburst": "on", + "association_type": "server", + "resolve_as": "2001:aa:aa::aa" + } }, "SYSLOG_SERVER" : { "10.13.14.17": { diff --git a/src/sonic-yang-models/tests/yang_model_tests/tests/ntp.json b/src/sonic-yang-models/tests/yang_model_tests/tests/ntp.json index c798de7d83..f380162b83 100644 --- a/src/sonic-yang-models/tests/yang_model_tests/tests/ntp.json +++ b/src/sonic-yang-models/tests/yang_model_tests/tests/ntp.json @@ -58,5 +58,82 @@ "desc": "CONFIGURE NON-EXISTING MGMT INTERFACE AS NTP SOURCE INTERFACE.", "eStrKey": "InvalidValue", "eStr": ["src"] + }, + "NTP_GLOB_VALID1": { + "desc": "NTP global params valid config 1" + }, + "NTP_GLOB_VALID2": { + "desc": "NTP global params valid config 2" + }, + "NTP_AUTH_INVALID1": { + "desc": "NTP authentication state invalid 1", + "eStrKey": "InvalidValue" + }, + "NTP_AUTH_INVALID2": { + "desc": "NTP authentication state invalid 2", + "eStrKey": "InvalidValue" + }, + "NTP_DHCP_INVALID1": { + "desc": "NTP DHCP state invalid 1", + "eStrKey": "InvalidValue" + }, + "NTP_DHCP_INVALID2": { + "desc": "NTP DHCP state invalid 2", + "eStrKey": "InvalidValue" + }, + "NTP_SERVER_ROLE_INVALID1": { + "desc": "NTP server role state invalid 1", + "eStrKey": "InvalidValue" + }, + "NTP_SERVER_ROLE_INVALID2": { + "desc": "NTP server role state invalid 2", + "eStrKey": "InvalidValue" + }, + "NTP_STATE_INVALID1": { + "desc": "NTP daemon state invalid 1", + "eStrKey": "InvalidValue" + }, + "NTP_STATE_INVALID2": { + "desc": "NTP daemon state invalid 2", + "eStrKey": "InvalidValue" + }, + "NTP_SERVER_ASSOCIATION_INVALID": { + "desc": "NTP server type invalid", + "eStrKey": "InvalidValue" + }, + "NTP_SERVER_IBURST_INVALID": { + "desc": "NTP server aggressive mode invalid", + "eStrKey": "InvalidValue" + }, + "NTP_SERVER_KEY_INVALID": { + "desc": "NTP server authentication key invalid", + "eStrKey": "InvalidValue" + }, + "NTP_SERVER_STATE_INVALID": { + "desc": "NTP server state invalid", + "eStrKey": "InvalidValue" + }, + "NTP_SERVER_TRUSTED_INVALID": { + "desc": "NTP server trusted mode invalid", + "eStrKey": "InvalidValue" + }, + "NTP_KEY_VALID": { + "desc": "NTP authentication keys inventory" + }, + "NTP_KEY_ID_INVALID": { + "desc": "NTP authentication keys invalid key id", + "eStrKey": "InvalidValue" + }, + "NTP_KEY_TRUSTED_INVALID": { + "desc": "NTP authentication keys invalid trustiness", + "eStrKey": "InvalidValue" + }, + "NTP_KEY_TYPE_INVALID": { + "desc": "NTP authentication keys invalid key type", + "eStrKey": "InvalidValue" + }, + "NTP_KEY_VALUE_INVALID": { + "desc": "NTP authentication keys bad key value", + "eStrKey": "Range" } } diff --git a/src/sonic-yang-models/tests/yang_model_tests/tests_config/ntp.json b/src/sonic-yang-models/tests/yang_model_tests/tests_config/ntp.json index 5b9ab1495a..c886bec3d2 100644 --- a/src/sonic-yang-models/tests/yang_model_tests/tests_config/ntp.json +++ b/src/sonic-yang-models/tests/yang_model_tests/tests_config/ntp.json @@ -4,13 +4,38 @@ "sonic-ntp:NTP_SERVER": { "NTP_SERVER_LIST": [ { - "server_address": "10.11.12.13" + "server_address": "10.11.12.13", + "association_type": "server", + "iburst": "on", + "key": 10, + "admin_state": "enabled", + "trusted": "no" }, { - "server_address": "2001:aa:aa::aa" + "server_address": "2001:aa:aa::aa", + "association_type": "server", + "iburst": "off", + "key": 15, + "admin_state": "disabled", + "trusted": "yes" }, { - "server_address": "pool.ntp.org" + "server_address": "pool.ntp.org", + "association_type": "pool", + "iburst": "on", + "admin_state": "enabled" + } + ] + }, + "sonic-ntp:NTP_KEY": { + "NTP_KEYS_LIST": [ + { + "id": 10, + "value": "bHVtb3M=" + }, + { + "id": 15, + "value": "Ym9tYmFyZGE=" } ] } @@ -237,5 +262,234 @@ ] } } + }, + "NTP_GLOB_VALID1": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "authentication": "enabled", + "dhcp": "enabled", + "server_role": "enabled", + "admin_state": "enabled" + } + } + } + }, + "NTP_GLOB_VALID2": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "authentication": "disabled", + "dhcp": "disabled", + "server_role": "disabled", + "admin_state": "disabled" + } + } + } + }, + "NTP_AUTH_INVALID1": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "authentication": "" + } + } + } + }, + "NTP_AUTH_INVALID2": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "authentication": "blahblah" + } + } + } + }, + "NTP_DHCP_INVALID1": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "dhcp": "" + } + } + } + }, + "NTP_DHCP_INVALID2": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "dhcp": "abracadabra" + } + } + } + }, + "NTP_SERVER_ROLE_INVALID1": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "server_role": "" + } + } + } + }, + "NTP_SERVER_ROLE_INVALID2": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "server_role": "olololo" + } + } + } + }, + "NTP_STATE_INVALID1": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "admin_state": "" + } + } + } + }, + "NTP_STATE_INVALID2": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP": { + "sonic-ntp:global": { + "admin_state": "azazaza" + } + } + } + }, + "NTP_SERVER_ASSOCIATION_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_SERVER": { + "NTP_SERVER_LIST": [ + { + "server_address": "2001:aa:aa:aa", + "association_type": "puul" + } + ] + } + } + }, + "NTP_SERVER_IBURST_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_SERVER": { + "NTP_SERVER_LIST": [ + { + "server_address": "2001:aa:aa:aa", + "iburst": "of" + } + ] + } + } + }, + "NTP_SERVER_KEY_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_SERVER": { + "NTP_SERVER_LIST": [ + { + "server_address": "2001:aa:aa:aa", + "key": 0 + } + ] + } + } + }, + "NTP_SERVER_STATE_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_SERVER": { + "NTP_SERVER_LIST": [ + { + "server_address": "2001:aa:aa:aa", + "admin_state": "enable" + } + ] + } + } + }, + "NTP_SERVER_TRUSTED_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_SERVER": { + "NTP_SERVER_LIST": [ + { + "server_address": "2001:aa:aa:aa", + "trusted": "not" + } + ] + } + } + }, + "NTP_KEY_VALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_KEY": { + "NTP_KEYS_LIST": [ + { + "id": 20, + "type": "md5", + "value": "anNkZjg4MzIwZnNkMkBANDQ1", + "trusted": "no" + }, + { + "id": 30, + "type": "sha1", + "value": "YWFiYmNjZGRlZWZm", + "trusted": "yes" + }, + { + "id": 42, + "type": "md5", + "value": "dGhlYW5zd2Vy", + "trusted": "yes" + } + ] + } + } + }, + "NTP_KEY_ID_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_KEY": { + "NTP_KEYS_LIST": [ + { + "id": 100000 + } + ] + } + } + }, + "NTP_KEY_TRUSTED_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_KEY": { + "NTP_KEYS_LIST": [ + { + "id": 20, + "trusted": "nope" + } + ] + } + } + }, + "NTP_KEY_TYPE_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_KEY": { + "NTP_KEYS_LIST": [ + { + "id": 20, + "type": "md6" + } + ] + } + } + }, + "NTP_KEY_VALUE_INVALID": { + "sonic-ntp:sonic-ntp": { + "sonic-ntp:NTP_KEY": { + "NTP_KEYS_LIST": [ + { + "id": 20, + "value": "" + } + ] + } + } } } diff --git a/src/sonic-yang-models/yang-models/sonic-ntp.yang b/src/sonic-yang-models/yang-models/sonic-ntp.yang index f28e105209..2591f8c7a5 100644 --- a/src/sonic-yang-models/yang-models/sonic-ntp.yang +++ b/src/sonic-yang-models/yang-models/sonic-ntp.yang @@ -33,6 +33,10 @@ module sonic-ntp { prefix mprt; } + import sonic-types { + prefix stypes; + } + description "NTP yang Module for SONiC OS"; @@ -41,6 +45,39 @@ module sonic-ntp { "First revision"; } + revision 2023-03-20 { + description + "Add extended configuration options"; + } + + typedef association-type { + description "NTP server association type"; + type enumeration { + enum server; + enum pool; + } + } + + typedef key-type { + description "NTP key encryption type"; + type enumeration { + enum md5; + enum sha1; + enum sha256; + enum sha384; + enum sha512; + } + } + + typedef key-id { + description "NTP key ID"; + type uint16 { + range 1..65535 { + error-message "Failed NTP key ID"; + } + } + } + container sonic-ntp { container NTP { @@ -68,6 +105,9 @@ module sonic-ntp { type leafref { path /mprt:sonic-mgmt_port/mprt:MGMT_PORT/mprt:MGMT_PORT_LIST/mprt:name; } + type string { + pattern 'eth0'; + } } description @@ -92,6 +132,30 @@ module sonic-ntp { default VRF or Management VRF."; } + leaf authentication { + type stypes:admin_mode; + default disabled; + description "NTP authentication state"; + } + + leaf dhcp { + type stypes:admin_mode; + default enabled; + description "Use NTP servers distributed by DHCP"; + } + + leaf server_role { + type stypes:admin_mode; + default enabled; + description "NTP server functionality state"; + } + + leaf admin_state { + type stypes:admin_mode; + default enabled; + description "NTP feature state"; + } + } /* end of container global */ } /* end of container NTP */ @@ -112,10 +176,95 @@ module sonic-ntp { leaf server_address { type inet:host; } + + leaf association_type { + type association-type; + default server; + description "NTP remote association type: server or pool."; + } + + leaf iburst { + type stypes:on-off; + default on; + description "NTP aggressive polling"; + } + + leaf key { + description "NTP server key ID"; + type leafref { + path /ntp:sonic-ntp/ntp:NTP_KEY/ntp:NTP_KEYS_LIST/ntp:id; + } + } + + leaf resolve_as { + type inet:host; + description "Server resolved IP address"; + } + + leaf admin_state { + type stypes:admin_mode; + default enabled; + description "NTP server state"; + } + + leaf trusted { + type stypes:yes-no; + default no; + description "Trust this server. It will force time + synchronization only to this server when + authentication is enabled"; + } + + leaf version { + type uint8 { + range "3..4" { + error-message "Failed NTP version"; + } + } + default 4; + description "NTP proto version to communicate with NTP + server"; + } + } /* end of list NTP_SERVER_LIST */ } /* end of container NTP_SERVER */ + container NTP_KEY { + + description "NTP authentication keys inventory"; + + list NTP_KEYS_LIST { + description "NTP authentication keys inventory"; + key "id"; + + leaf id { + type key-id; + description "NTP key ID"; + } + + leaf trusted { + type stypes:yes-no; + default no; + description "Trust this NTP key"; + } + + leaf value { + type string { + length 1..64; + } + description "NTP encrypted authentication key"; + } + + leaf type { + type key-type; + default md5; + description "NTP authentication key type"; + } + } /* end of list NTP_KEYS_LIST */ + + } /* end of container NTP_KEY */ + } /* end of container sonic-ntp */ } /* end of module sonic-ntp */ diff --git a/src/sonic-yang-models/yang-templates/sonic-types.yang.j2 b/src/sonic-yang-models/yang-templates/sonic-types.yang.j2 index 4007a386b3..909a883a0e 100644 --- a/src/sonic-yang-models/yang-templates/sonic-types.yang.j2 +++ b/src/sonic-yang-models/yang-templates/sonic-types.yang.j2 @@ -360,6 +360,22 @@ module sonic-types { "BCP 175: Procedures for Maintaining the Time Zone Database"; } + typedef yes-no { + description "Yes/No configuration"; + type enumeration { + enum yes; + enum no; + } + } + + typedef on-off { + description "On/Off configuration"; + type enumeration { + enum on; + enum off; + } + } + {% if yang_model_type == "cvl" %} /* Required for CVL */ container operation { From f82980784de91fb9106be133282e894991f07147 Mon Sep 17 00:00:00 2001 From: Zain Budhwani <99770260+zbud-msft@users.noreply.github.com> Date: Tue, 12 Dec 2023 11:35:46 -0800 Subject: [PATCH 02/33] Change leaf value of used_cnt of sonic-events-swss:chk_crm_threshold (#17430) ### Why I did it Current YANG model of sonic-events-swss:chk_crm_threshold has the type uint8 for leaf used_cnt which is too small of a range to hold values of used_cnt which can greatly exceed that. Updating leaf type of used_cnt and free_cnt to match defined definition. Changed to uint32 as per defined here: https://github.com/sonic-net/sonic-swss/blob/master/orchagent/crmorch.h#L99 ##### Work item tracking - Microsoft ADO **(number only)**:26091912 #### How I did it Update leaf value #### How to verify it UT and sonic-mgmt PR checker --- .../yang_model_tests/tests_config/sonic-events-swss.json | 6 +++--- src/sonic-yang-models/yang-models/sonic-events-swss.yang | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/sonic-yang-models/tests/yang_model_tests/tests_config/sonic-events-swss.json b/src/sonic-yang-models/tests/yang_model_tests/tests_config/sonic-events-swss.json index 6b50ccd55a..e56acc0a47 100644 --- a/src/sonic-yang-models/tests/yang_model_tests/tests_config/sonic-events-swss.json +++ b/src/sonic-yang-models/tests/yang_model_tests/tests_config/sonic-events-swss.json @@ -180,9 +180,9 @@ "SONIC_EVENTS_SWSS_CHK_CRM_THRESHOLD_VALID": { "sonic-events-swss:sonic-events-swss": { "sonic-events-swss:chk_crm_threshold": { - "percent": 0, - "used_cnt": 0, - "free_cnt": 0, + "percent": 80, + "used_cnt": 6414, + "free_cnt": 65300, "timestamp": "1985-04-12T23:20:50.52Z" } } diff --git a/src/sonic-yang-models/yang-models/sonic-events-swss.yang b/src/sonic-yang-models/yang-models/sonic-events-swss.yang index f7b3ca3ea5..93c617776c 100644 --- a/src/sonic-yang-models/yang-models/sonic-events-swss.yang +++ b/src/sonic-yang-models/yang-models/sonic-events-swss.yang @@ -102,11 +102,11 @@ module sonic-events-swss { } leaf used_cnt { - type uint8; + type uint32; } leaf free_cnt { - type uint64; + type uint32; } uses evtcmn:sonic-events-cmn; From dd39dd0e0393bac906e6fd7efb90303965c2d984 Mon Sep 17 00:00:00 2001 From: Sudharsan Dhamal Gopalarathnam Date: Wed, 13 Dec 2023 02:48:49 -0800 Subject: [PATCH 03/33] [Mellanox] Update SAI to 2311.26.0.28, SDK/FW to 4.6.2134/2012.2134 (#17481) - Why I did it Update SAI version to SAIBuild2311.26.0.28 Fixed issues 1. Traffic with unicast destination ip and multicast destination mac wasn't properly dropped 2. When working with SAI_DEFAULT_SWITCHING_MODE_STORE_FORWARD key/value enabled, trying to add a LAG member to a LAG which is created after warm boot initial configuration phase ended, will fail. 3. Optional feature of Port IP counters (SAI_PORT_STAT_IP*) , enabled by SAI XML per-port-ip-counter-enabled config node, wasn't initialized properly. 4. Creating BFD session for non default VRF fails (SAI_BFD_SESSION_ATTR_VIRTUAL_ROUTER != SAI_SWITCH_ATTR_DEFAULT_VIRTUAL_ROUTER_ID). 5. The default value for port FEC during switch init for Spectrum3 was initialized as 'auto' and not aligned to SAI header default 'none'. Note if setups has invalid configuration and relied previously on auto, now it might be necessary for the user to provide explicit valid value for SAI_PORT_ATTR_FEC_MODE Update SDK/FW version to 4.6.2134/2012.2134 Fixed issues: 1. Updated SN3700C to enable limit to 100G speed. 2. Recovering from Low power mode might ends with port down. - How I did it Updating the versions in makefile - How to verify it Confirm issues fixed and run sonic-mgmt tests --- platform/mellanox/fw.mk | 10 +++++----- platform/mellanox/mlnx-sai.mk | 2 +- platform/mellanox/sdk.mk | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/platform/mellanox/fw.mk b/platform/mellanox/fw.mk index 1c54fec125..1ba74ca5c2 100644 --- a/platform/mellanox/fw.mk +++ b/platform/mellanox/fw.mk @@ -25,29 +25,29 @@ SIMX_VERSION = 24.1-1007 FW_FROM_URL = y -MLNX_FW_ASSETS_RELEASE_TAG = fw-2012.2104 +MLNX_FW_ASSETS_RELEASE_TAG = fw-2012.2134 MLNX_FW_ASSETS_URL = $(MLNX_ASSETS_GITHUB_URL)/releases/download/$(MLNX_FW_ASSETS_RELEASE_TAG) ifeq ($(MLNX_FW_BASE_URL), ) MLNX_FW_BASE_URL = $(MLNX_FW_ASSETS_URL) endif -MLNX_SPC_FW_VERSION = 13.2012.2104 +MLNX_SPC_FW_VERSION = 13.2012.2134 MLNX_SPC_FW_FILE = fw-SPC-rel-$(subst .,_,$(MLNX_SPC_FW_VERSION))-EVB.mfa $(MLNX_SPC_FW_FILE)_PATH = $(MLNX_FW_BASE_PATH) $(MLNX_SPC_FW_FILE)_URL = $(MLNX_FW_BASE_URL)/$(MLNX_SPC_FW_FILE) -MLNX_SPC2_FW_VERSION = 29.2012.2104 +MLNX_SPC2_FW_VERSION = 29.2012.2134 MLNX_SPC2_FW_FILE = fw-SPC2-rel-$(subst .,_,$(MLNX_SPC2_FW_VERSION))-EVB.mfa $(MLNX_SPC2_FW_FILE)_PATH = $(MLNX_FW_BASE_PATH) $(MLNX_SPC2_FW_FILE)_URL = $(MLNX_FW_BASE_URL)/$(MLNX_SPC2_FW_FILE) -MLNX_SPC3_FW_VERSION = 30.2012.2104 +MLNX_SPC3_FW_VERSION = 30.2012.2134 MLNX_SPC3_FW_FILE = fw-SPC3-rel-$(subst .,_,$(MLNX_SPC3_FW_VERSION))-EVB.mfa $(MLNX_SPC3_FW_FILE)_PATH = $(MLNX_FW_BASE_PATH) $(MLNX_SPC3_FW_FILE)_URL = $(MLNX_FW_BASE_URL)/$(MLNX_SPC3_FW_FILE) -MLNX_SPC4_FW_VERSION = 34.2012.2104 +MLNX_SPC4_FW_VERSION = 34.2012.2134 MLNX_SPC4_FW_FILE = fw-SPC4-rel-$(subst .,_,$(MLNX_SPC4_FW_VERSION))-EVB.mfa $(MLNX_SPC4_FW_FILE)_PATH = $(MLNX_FW_BASE_PATH) $(MLNX_SPC4_FW_FILE)_URL = $(MLNX_FW_BASE_URL)/$(MLNX_SPC4_FW_FILE) diff --git a/platform/mellanox/mlnx-sai.mk b/platform/mellanox/mlnx-sai.mk index 733be9d927..39eaeabf7c 100644 --- a/platform/mellanox/mlnx-sai.mk +++ b/platform/mellanox/mlnx-sai.mk @@ -1,6 +1,6 @@ # Mellanox SAI -MLNX_SAI_VERSION = SAIBuild2311.25.0.36 +MLNX_SAI_VERSION = SAIBuild2311.26.0.28 MLNX_SAI_ASSETS_GITHUB_URL = https://github.com/Mellanox/Spectrum-SDK-Drivers-SONiC-Bins MLNX_SAI_ASSETS_RELEASE_TAG = sai-$(MLNX_SAI_VERSION)-$(BLDENV)-$(CONFIGURED_ARCH) MLNX_SAI_ASSETS_URL = $(MLNX_SAI_ASSETS_GITHUB_URL)/releases/download/$(MLNX_SAI_ASSETS_RELEASE_TAG) diff --git a/platform/mellanox/sdk.mk b/platform/mellanox/sdk.mk index b573dfb29d..7188cfe05f 100644 --- a/platform/mellanox/sdk.mk +++ b/platform/mellanox/sdk.mk @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -MLNX_SDK_VERSION = 4.6.2104 +MLNX_SDK_VERSION = 4.6.2134 MLNX_SDK_ISSU_VERSION = 101 MLNX_SDK_DRIVERS_GITHUB_URL = https://github.com/Mellanox/Spectrum-SDK-Drivers From 0d62cf0e925d95313883136e590c04470bbc06dd Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Wed, 13 Dec 2023 20:16:40 +0800 Subject: [PATCH 04/33] [Mellanox] Remove EEPROM write limitation if it is software control (#17030) - Why I did it When module is under software control (CMIS host management enabled), EEPROM should be controlled by software and there should be no limitation for any write operation. - How I did it Remove EEPROM write limitation if a module is under software control - How to verify it Manual test UT --- platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py | 7 +++++++ platform/mellanox/mlnx-platform-api/tests/test_sfp.py | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index e0e11a50ef..9b9e97d189 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -771,6 +771,13 @@ class SFP(NvidiaSFPCommon): Returns: bool: True if the limited bytes is hit """ + try: + if self.is_sw_control(): + return False + except Exception as e: + logger.log_notice(f'Module is under initialization, cannot write module EEPROM - {e}') + return True + eeprom_path = self._get_eeprom_path() limited_data = limited_eeprom.get(self._get_sfp_type_str(eeprom_path)) if not limited_data: diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index 4dfcf0f073..6bdc82b5b4 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -162,8 +162,13 @@ class TestSfp: @mock.patch('sonic_platform.sfp.SFP._get_eeprom_path', mock.MagicMock(return_value = None)) @mock.patch('sonic_platform.sfp.SFP._get_sfp_type_str') - def test_is_write_protected(self, mock_get_type_str): + @mock.patch('sonic_platform.sfp.SFP.is_sw_control') + def test_is_write_protected(self, mock_sw_control, mock_get_type_str): sfp = SFP(0) + mock_sw_control.return_value = True + assert not sfp._is_write_protected(page=0, page_offset=26, num_bytes=1) + + mock_sw_control.return_value = False mock_get_type_str.return_value = 'cmis' assert sfp._is_write_protected(page=0, page_offset=26, num_bytes=1) assert not sfp._is_write_protected(page=0, page_offset=27, num_bytes=1) From 1b84f3daa54ee0d644677e36e0e8d64947147f23 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Wed, 13 Dec 2023 20:19:44 +0800 Subject: [PATCH 05/33] [Mellanox] update asic and module temperature in a thread for CMIS management (#16955) - Why I did it When module is totally under software control, driver cannot get module temperature/temperature threshold from firmware. In this case, sonic needs to get temperature/temperature threshold from EEPROM. In this PR, a thread thermal updater is created to update module temperature/temperature threshold while software control is enabled. - How I did it Query ASIC temperature from SDK sysfs and update hw-management-tc periodically Query Module temperature from EEPROM and update hw-management-tc periodically - How to verify it Manual test New Unit tests --- .../sonic_platform/chassis.py | 4 + .../mlnx-platform-api/sonic_platform/sfp.py | 79 ++++++- .../sonic_platform/thermal.py | 114 ++++++++-- .../sonic_platform/thermal_manager.py | 27 +++ .../sonic_platform/thermal_updater.py | 213 ++++++++++++++++++ .../mlnx-platform-api/sonic_platform/utils.py | 55 +++++ .../mlnx-platform-api/tests/test_sfp.py | 40 ++++ .../mlnx-platform-api/tests/test_thermal.py | 21 +- .../tests/test_thermal_updater.py | 128 +++++++++++ .../mlnx-platform-api/tests/test_utils.py | 20 ++ 10 files changed, 675 insertions(+), 26 deletions(-) create mode 100644 platform/mellanox/mlnx-platform-api/sonic_platform/thermal_updater.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py index 6efdfb61f4..f0b73de66c 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py @@ -82,6 +82,8 @@ class Chassis(ChassisBase): # System UID LED _led_uid = None + chassis_instance = None + def __init__(self): super(Chassis, self).__init__() @@ -127,6 +129,8 @@ class Chassis(ChassisBase): self._RJ45_port_inited = False self._RJ45_port_list = None + Chassis.chassis_instance = self + self.modules_mgmt_thread = threading.Thread() self.modules_changes_queue = queue.Queue() self.modules_mgmt_task_stopping_event = threading.Event() diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index 9b9e97d189..c5bcfddaf1 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -31,6 +31,8 @@ try: from . import utils from .device_data import DeviceDataManager from sonic_platform_base.sonic_xcvr.sfp_optoe_base import SfpOptoeBase + from sonic_platform_base.sonic_xcvr.fields import consts + from sonic_platform_base.sonic_xcvr.api.public import sff8636, sff8436 except ImportError as e: raise ImportError (str(e) + "- required module not found") @@ -155,6 +157,10 @@ SFP_TYPE_SFF8636 = 'sff8636' # SFP stderr SFP_EEPROM_NOT_AVAILABLE = 'Input/output error' +SFP_DEFAULT_TEMP_WARNNING_THRESHOLD = 70.0 +SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD = 80.0 +SFP_TEMPERATURE_SCALE = 8.0 + # SFP EEPROM limited bytes limited_eeprom = { SFP_TYPE_CMIS: { @@ -264,7 +270,7 @@ class SFP(NvidiaSFPCommon): if slot_id == 0: # For non-modular chassis from .thermal import initialize_sfp_thermal - self._thermal_list = initialize_sfp_thermal(sfp_index) + self._thermal_list = initialize_sfp_thermal(self) else: # For modular chassis # (slot_id % MAX_LC_CONUNT - 1) * MAX_PORT_COUNT + (sfp_index + 1) * (MAX_PORT_COUNT / LC_PORT_COUNT) max_linecard_count = DeviceDataManager.get_linecard_count() @@ -822,6 +828,77 @@ class SFP(NvidiaSFPCommon): api = self.get_xcvr_api() return [False] * api.NUM_CHANNELS if api else None + def get_temperature(self): + try: + if not self.is_sw_control(): + temp_file = f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/input' + if not os.path.exists(temp_file): + logger.log_error(f'Failed to read from file {temp_file} - not exists') + return None + temperature = utils.read_int_from_file(temp_file, + log_func=None) + return temperature / SFP_TEMPERATURE_SCALE if temperature is not None else None + except: + return 0.0 + + self.reinit() + temperature = super().get_temperature() + return temperature if temperature is not None else None + + def get_temperature_warning_threashold(self): + """Get temperature warning threshold + + Returns: + int: temperature warning threshold + """ + try: + if not self.is_sw_control(): + emergency = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/emergency', + log_func=None, + default=None) + return emergency / SFP_TEMPERATURE_SCALE if emergency is not None else SFP_DEFAULT_TEMP_WARNNING_THRESHOLD + except: + return SFP_DEFAULT_TEMP_WARNNING_THRESHOLD + + thresh = self._get_temperature_threshold() + if thresh and consts.TEMP_HIGH_WARNING_FIELD in thresh: + return thresh[consts.TEMP_HIGH_WARNING_FIELD] + return SFP_DEFAULT_TEMP_WARNNING_THRESHOLD + + def get_temperature_critical_threashold(self): + """Get temperature critical threshold + + Returns: + int: temperature critical threshold + """ + try: + if not self.is_sw_control(): + critical = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/critical', + log_func=None, + default=None) + return critical / SFP_TEMPERATURE_SCALE if critical is not None else SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD + except: + return SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD + + thresh = self._get_temperature_threshold() + if thresh and consts.TEMP_HIGH_ALARM_FIELD in thresh: + return thresh[consts.TEMP_HIGH_ALARM_FIELD] + return SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD + + def _get_temperature_threshold(self): + self.reinit() + api = self.get_xcvr_api() + if not api: + return None + + thresh_support = api.get_transceiver_thresholds_support() + if thresh_support: + if isinstance(api, sff8636.Sff8636Api) or isinstance(api, sff8436.Sff8436Api): + return api.xcvr_eeprom.read(consts.TEMP_THRESHOLDS_FIELD) + return api.xcvr_eeprom.read(consts.THRESHOLDS_FIELD) + else: + return None + def get_xcvr_api(self): """ Retrieves the XcvrApi associated with this SFP diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py index 1a6b45da63..eadc822e3d 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py @@ -36,6 +36,8 @@ except ImportError as e: # Global logger class instance logger = Logger() +DEFAULT_TEMP_SCALE = 1000 + """ The most important information for creating a Thermal object is 3 sysfs files: temperature file, high threshold file and high critical threshold file. There is no common naming rule for thermal objects on Nvidia platform. There are two types @@ -72,9 +74,11 @@ THERMAL_NAMING_RULE = { "chassis thermals": [ { "name": "ASIC", - "temperature": "asic", - "high_threshold": "asic_temp_emergency", - "high_critical_threshold": "asic_temp_trip_crit" + "temperature": "input", + "high_threshold_default": 105, + "high_critical_threshold_default": 120, + "sysfs_folder": "/sys/module/sx_core/asic0/temperature", + "scale": 8 }, { "name": "Ambient Port Side Temp", @@ -187,8 +191,8 @@ def initialize_psu_thermal(psu_index, presence_cb): return [create_indexable_thermal(THERMAL_NAMING_RULE['psu thermals'], psu_index, CHASSIS_THERMAL_SYSFS_FOLDER, 1, presence_cb)] -def initialize_sfp_thermal(sfp_index): - return [create_indexable_thermal(THERMAL_NAMING_RULE['sfp thermals'], sfp_index, CHASSIS_THERMAL_SYSFS_FOLDER, 1)] +def initialize_sfp_thermal(sfp): + return [ModuleThermal(sfp)] def initialize_linecard_thermals(lc_name, lc_index): @@ -214,6 +218,7 @@ def initialize_linecard_sfp_thermal(lc_name, lc_index, sfp_index): def create_indexable_thermal(rule, index, sysfs_folder, position, presence_cb=None): index += rule.get('start_index', 1) name = rule['name'].format(index) + sysfs_folder = rule.get('sysfs_folder', sysfs_folder) temp_file = os.path.join(sysfs_folder, rule['temperature'].format(index)) _check_thermal_sysfs_existence(temp_file) if 'high_threshold' in rule: @@ -226,10 +231,13 @@ def create_indexable_thermal(rule, index, sysfs_folder, position, presence_cb=No _check_thermal_sysfs_existence(high_crit_th_file) else: high_crit_th_file = None + high_th_default = rule.get('high_threshold_default') + high_crit_th_default = rule.get('high_critical_threshold_default') + scale = rule.get('scale', DEFAULT_TEMP_SCALE) if not presence_cb: - return Thermal(name, temp_file, high_th_file, high_crit_th_file, position) + return Thermal(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position) else: - return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, position, presence_cb) + return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position, presence_cb) def create_single_thermal(rule, sysfs_folder, position, presence_cb=None): @@ -243,6 +251,7 @@ def create_single_thermal(rule, sysfs_folder, position, presence_cb=None): elif not default_present: return None + sysfs_folder = rule.get('sysfs_folder', sysfs_folder) temp_file = os.path.join(sysfs_folder, temp_file) _check_thermal_sysfs_existence(temp_file) if 'high_threshold' in rule: @@ -255,11 +264,14 @@ def create_single_thermal(rule, sysfs_folder, position, presence_cb=None): _check_thermal_sysfs_existence(high_crit_th_file) else: high_crit_th_file = None + high_th_default = rule.get('high_threshold_default') + high_crit_th_default = rule.get('high_critical_threshold_default') + scale = rule.get('scale', DEFAULT_TEMP_SCALE) name = rule['name'] if not presence_cb: - return Thermal(name, temp_file, high_th_file, high_crit_th_file, position) + return Thermal(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position) else: - return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, position, presence_cb) + return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position, presence_cb) def _check_thermal_sysfs_existence(file_path): @@ -268,7 +280,7 @@ def _check_thermal_sysfs_existence(file_path): class Thermal(ThermalBase): - def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position): + def __init__(self, name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position): """ index should be a string for category ambient and int for other categories """ @@ -278,6 +290,9 @@ class Thermal(ThermalBase): self.temperature = temp_file self.high_threshold = high_th_file self.high_critical_threshold = high_crit_th_file + self.high_th_default = high_th_default + self.high_crit_th_default = high_crit_th_default + self.scale = scale def get_name(self): """ @@ -297,7 +312,7 @@ class Thermal(ThermalBase): of one degree Celsius, e.g. 30.125 """ value = utils.read_float_from_file(self.temperature, None, log_func=logger.log_info) - return value / 1000.0 if (value is not None and value != 0) else None + return value / self.scale if (value is not None and value != 0) else None def get_high_threshold(self): """ @@ -308,9 +323,9 @@ class Thermal(ThermalBase): up to nearest thousandth of one degree Celsius, e.g. 30.125 """ if self.high_threshold is None: - return None + return self.high_th_default value = utils.read_float_from_file(self.high_threshold, None, log_func=logger.log_info) - return value / 1000.0 if (value is not None and value != 0) else None + return value / self.scale if (value is not None and value != 0) else self.high_th_default def get_high_critical_threshold(self): """ @@ -321,9 +336,9 @@ class Thermal(ThermalBase): up to nearest thousandth of one degree Celsius, e.g. 30.125 """ if self.high_critical_threshold is None: - return None + return self.high_crit_th_default value = utils.read_float_from_file(self.high_critical_threshold, None, log_func=logger.log_info) - return value / 1000.0 if (value is not None and value != 0) else None + return value / self.scale if (value is not None and value != 0) else self.high_crit_th_default def get_position_in_parent(self): """ @@ -343,8 +358,8 @@ class Thermal(ThermalBase): class RemovableThermal(Thermal): - def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position, presence_cb): - super(RemovableThermal, self).__init__(name, temp_file, high_th_file, high_crit_th_file, position) + def __init__(self, name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position, presence_cb): + super(RemovableThermal, self).__init__(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position) self.presence_cb = presence_cb def get_temperature(self): @@ -388,3 +403,68 @@ class RemovableThermal(Thermal): logger.log_debug("get_high_critical_threshold for {} failed due to {}".format(self.name, hint)) return None return super(RemovableThermal, self).get_high_critical_threshold() + + +class ModuleThermal(ThermalBase): + def __init__(self, sfp): + """ + index should be a string for category ambient and int for other categories + """ + super(ModuleThermal, self).__init__() + self.name = f'xSFP module {sfp.sdk_index + 1} Temp' + self.sfp = sfp + + def get_name(self): + """ + Retrieves the name of the device + + Returns: + string: The name of the device + """ + return self.name + + def get_temperature(self): + """ + Retrieves current temperature reading from thermal + + Returns: + A float number of current temperature in Celsius up to nearest thousandth + of one degree Celsius, e.g. 30.125 + """ + return self.sfp.get_temperature() + + def get_high_threshold(self): + """ + Retrieves the high threshold temperature of thermal + + Returns: + A float number, the high threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + return self.sfp.get_temperature_warning_threashold() + + def get_high_critical_threshold(self): + """ + Retrieves the high critical threshold temperature of thermal + + Returns: + A float number, the high critical threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + return self.sfp.get_temperature_critical_threashold() + + def get_position_in_parent(self): + """ + Retrieves 1-based relative physical position in parent device + Returns: + integer: The 1-based relative physical position in parent device + """ + return 1 + + def is_replaceable(self): + """ + Indicate whether this device is replaceable. + Returns: + bool: True if it is replaceable. + """ + return False diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py index dd3d794d85..9e1aaded05 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py @@ -15,9 +15,36 @@ # limitations under the License. # from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase +from . import thermal_updater +from .device_data import DeviceDataManager class ThermalManager(ThermalManagerBase): + thermal_updater_task = None + @classmethod def run_policy(cls, chassis): pass + + @classmethod + def initialize(cls): + """ + Initialize thermal manager, including register thermal condition types and thermal action types + and any other vendor specific initialization. + :return: + """ + if DeviceDataManager.is_independent_mode(): + from .chassis import Chassis + cls.thermal_updater_task = thermal_updater.ThermalUpdater(Chassis.chassis_instance.get_all_sfps()) + cls.thermal_updater_task.start() + + + @classmethod + def deinitialize(cls): + """ + Destroy thermal manager, including any vendor specific cleanup. The default behavior of this function + is a no-op. + :return: + """ + if DeviceDataManager.is_independent_mode(): + cls.thermal_updater_task.stop() diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_updater.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_updater.py new file mode 100644 index 0000000000..ad0b92ef4e --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_updater.py @@ -0,0 +1,213 @@ +# +# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from . import utils +from sonic_py_common import logger + +import sys +import time + +sys.path.append('/run/hw-management/bin') + +try: + import hw_management_independent_mode_update +except ImportError: + # For unit test only + from unittest import mock + hw_management_independent_mode_update = mock.MagicMock() + hw_management_independent_mode_update.module_data_set_module_counter = mock.MagicMock() + hw_management_independent_mode_update.thermal_data_set_asic = mock.MagicMock() + hw_management_independent_mode_update.thermal_data_set_module = mock.MagicMock() + hw_management_independent_mode_update.thermal_data_clean_asic = mock.MagicMock() + hw_management_independent_mode_update.thermal_data_clean_module = mock.MagicMock() + + +SFP_TEMPERATURE_SCALE = 1000 +ASIC_TEMPERATURE_SCALE = 125 +ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD = 105000 +ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD = 120000 + +ERROR_READ_THERMAL_DATA = 254000 + +TC_CONFIG_FILE = '/run/hw-management/config/tc_config.json' +logger = logger.Logger('thermal-updater') + + +class ThermalUpdater: + def __init__(self, sfp_list): + self._sfp_list = sfp_list + self._sfp_status = {} + self._timer = utils.Timer() + + def load_tc_config(self): + asic_poll_interval = 1 + sfp_poll_interval = 10 + data = utils.load_json_file(TC_CONFIG_FILE) + if not data: + logger.log_notice(f'{TC_CONFIG_FILE} does not exist, use default polling interval') + + if data: + dev_parameters = data.get('dev_parameters') + if dev_parameters is not None: + asic_parameter = dev_parameters.get('asic') + if asic_parameter is not None: + asic_poll_interval_config = asic_parameter.get('poll_time') + if asic_poll_interval_config: + asic_poll_interval = int(asic_poll_interval_config) / 2 + module_parameter = dev_parameters.get('module\\d+') + if module_parameter is not None: + sfp_poll_interval_config = module_parameter.get('poll_time') + if sfp_poll_interval_config: + sfp_poll_interval = int(sfp_poll_interval_config) / 2 + + logger.log_notice(f'ASIC polling interval: {asic_poll_interval}') + self._timer.schedule(asic_poll_interval, self.update_asic) + logger.log_notice(f'Module polling interval: {sfp_poll_interval}') + self._timer.schedule(sfp_poll_interval, self.update_module) + + def start(self): + self.clean_thermal_data() + if not self.wait_all_sfp_ready(): + logger.log_error('Failed to wait for all SFP ready, will put hw-management-tc to suspend') + self.control_tc(True) + return + self.control_tc(False) + self.load_tc_config() + self._timer.start() + + def stop(self): + self._timer.stop() + self.control_tc(True) + + def control_tc(self, suspend): + logger.log_notice(f'Set hw-management-tc to {"suspend" if suspend else "resume"}') + utils.write_file('/run/hw-management/config/suspend', 1 if suspend else 0) + + def clean_thermal_data(self): + hw_management_independent_mode_update.module_data_set_module_counter(len(self._sfp_list)) + hw_management_independent_mode_update.thermal_data_clean_asic(0) + for sfp in self._sfp_list: + hw_management_independent_mode_update.thermal_data_clean_module( + 0, + sfp.sdk_index + 1 + ) + + def wait_all_sfp_ready(self): + logger.log_notice('Waiting for all SFP modules ready...') + max_wait_time = 60 + ready_set = set() + while len(ready_set) != len(self._sfp_list): + for sfp in self._sfp_list: + try: + sfp.is_sw_control() + ready_set.add(sfp) + except: + continue + max_wait_time -= 1 + if max_wait_time == 0: + return False + time.sleep(1) + + logger.log_notice('All SFP modules are ready') + return True + + def get_asic_temp(self): + temperature = utils.read_int_from_file('/sys/module/sx_core/asic0/temperature/input', default=None) + return temperature * ASIC_TEMPERATURE_SCALE if temperature is not None else None + + def get_asic_temp_warning_threashold(self): + emergency = utils.read_int_from_file('/sys/module/sx_core/asic0/temperature/emergency', default=None, log_func=None) + return emergency * ASIC_TEMPERATURE_SCALE if emergency is not None else ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD + + def get_asic_temp_critical_threashold(self): + critical = utils.read_int_from_file('/sys/module/sx_core/asic0/temperature/critical', default=None, log_func=None) + return critical * ASIC_TEMPERATURE_SCALE if critical is not None else ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD + + def update_single_module(self, sfp): + try: + presence = sfp.get_presence() + pre_presence = self._sfp_status.get(sfp.sdk_index) + if presence: + temperature = sfp.get_temperature() + if temperature == 0: + warning_thresh = 0 + critical_thresh = 0 + fault = 0 + else: + warning_thresh = sfp.get_temperature_warning_threashold() + critical_thresh = sfp.get_temperature_critical_threashold() + fault = ERROR_READ_THERMAL_DATA if (temperature is None or warning_thresh is None or critical_thresh is None) else 0 + temperature = 0 if temperature is None else int(temperature * SFP_TEMPERATURE_SCALE) + warning_thresh = 0 if warning_thresh is None else int(warning_thresh * SFP_TEMPERATURE_SCALE) + critical_thresh = 0 if critical_thresh is None else int(critical_thresh * SFP_TEMPERATURE_SCALE) + + hw_management_independent_mode_update.thermal_data_set_module( + 0, # ASIC index always 0 for now + sfp.sdk_index + 1, + temperature, + critical_thresh, + warning_thresh, + fault + ) + else: + if pre_presence != presence: + hw_management_independent_mode_update.thermal_data_clean_module(0, sfp.sdk_index + 1) + + if pre_presence != presence: + self._sfp_status[sfp.sdk_index] = presence + except Exception as e: + logger.log_error('Failed to update module {sfp.sdk_index} thermal data - {e}') + hw_management_independent_mode_update.thermal_data_set_module( + 0, # ASIC index always 0 for now + sfp.sdk_index + 1, + 0, + 0, + 0, + ERROR_READ_THERMAL_DATA + ) + + def update_module(self): + for sfp in self._sfp_list: + self.update_single_module(sfp) + + def update_asic(self): + try: + asic_temp = self.get_asic_temp() + warn_threshold = self.get_asic_temp_warning_threashold() + critical_threshold = self.get_asic_temp_critical_threashold() + fault = 0 + if asic_temp is None: + logger.log_error('Failed to read ASIC temperature, send fault to hw-management-tc') + asic_temp = warn_threshold + fault = ERROR_READ_THERMAL_DATA + + hw_management_independent_mode_update.thermal_data_set_asic( + 0, # ASIC index always 0 for now + asic_temp, + critical_threshold, + warn_threshold, + fault + ) + except Exception as e: + logger.log_error('Failed to update ASIC thermal data - {e}') + hw_management_independent_mode_update.thermal_data_set_asic( + 0, # ASIC index always 0 for now + 0, + 0, + 0, + ERROR_READ_THERMAL_DATA + ) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py index 51e9bc7f03..9db38e6b41 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py @@ -18,6 +18,7 @@ import ctypes import functools import subprocess import json +import queue import sys import threading import time @@ -289,6 +290,60 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs): return False +class TimerEvent: + def __init__(self, interval, cb, repeat): + self.interval = interval + self._cb = cb + self.repeat = repeat + + def execute(self): + self._cb() + + +class Timer(threading.Thread): + def __init__(self): + super(Timer, self).__init__() + self._timestamp_queue = queue.PriorityQueue() + self._wait_event = threading.Event() + self._stop_event = threading.Event() + self._min_timestamp = None + + def schedule(self, interval, cb, repeat=True, run_now=True): + timer_event = TimerEvent(interval, cb, repeat) + self.add_timer_event(timer_event, run_now) + + def add_timer_event(self, timer_event, run_now=True): + timestamp = time.time() + if not run_now: + timestamp += timer_event.interval + + self._timestamp_queue.put_nowait((timestamp, timer_event)) + if self._min_timestamp is not None and timestamp < self._min_timestamp: + self._wait_event.set() + + def stop(self): + if self.is_alive(): + self._wait_event.set() + self._stop_event.set() + self.join() + + def run(self): + while not self._stop_event.is_set(): + now = time.time() + item = self._timestamp_queue.get() + self._min_timestamp = item[0] + if self._min_timestamp > now: + self._wait_event.wait(self._min_timestamp - now) + self._wait_event.clear() + self._timestamp_queue.put(item) + continue + + timer_event = item[1] + timer_event.execute() + if timer_event.repeat: + self.add_timer_event(timer_event, False) + + class DbUtils: lock = threading.Lock() db_instances = threading.local() diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index 6bdc82b5b4..dccc727bfe 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -292,6 +292,46 @@ class TestSfp: assert sfp.get_transceiver_threshold_info() sfp.reinit() + @mock.patch('os.path.exists') + @mock.patch('sonic_platform.utils.read_int_from_file') + def test_get_temperature(self, mock_read, mock_exists): + sfp = SFP(0) + sfp.is_sw_control = mock.MagicMock(return_value=True) + mock_exists.return_value = False + assert sfp.get_temperature() == None + + mock_exists.return_value = True + assert sfp.get_temperature() == None + + mock_read.return_value = None + sfp.is_sw_control.return_value = False + assert sfp.get_temperature() == None + + mock_read.return_value = 448 + assert sfp.get_temperature() == 56.0 + + def test_get_temperature_threshold(self): + sfp = SFP(0) + sfp.is_sw_control = mock.MagicMock(return_value=True) + assert sfp.get_temperature_warning_threashold() == 70.0 + assert sfp.get_temperature_critical_threashold() == 80.0 + + mock_api = mock.MagicMock() + mock_api.get_transceiver_thresholds_support = mock.MagicMock(return_value=False) + sfp.get_xcvr_api = mock.MagicMock(return_value=mock_api) + assert sfp.get_temperature_warning_threashold() == 70.0 + assert sfp.get_temperature_critical_threashold() == 80.0 + + from sonic_platform_base.sonic_xcvr.fields import consts + mock_api.get_transceiver_thresholds_support.return_value = True + mock_api.xcvr_eeprom = mock.MagicMock() + mock_api.xcvr_eeprom.read = mock.MagicMock(return_value={ + consts.TEMP_HIGH_ALARM_FIELD: 85.0, + consts.TEMP_HIGH_WARNING_FIELD: 75.0 + }) + assert sfp.get_temperature_warning_threashold() == 75.0 + assert sfp.get_temperature_critical_threashold() == 85.0 + @mock.patch('sonic_platform.utils.read_int_from_file') @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') @mock.patch('sonic_platform.utils.DbUtils.get_db_instance') diff --git a/platform/mellanox/mlnx-platform-api/tests/test_thermal.py b/platform/mellanox/mlnx-platform-api/tests/test_thermal.py index db81f73096..a59b8dda40 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_thermal.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_thermal.py @@ -31,6 +31,7 @@ sys.path.insert(0, modules_path) import sonic_platform.chassis from sonic_platform.chassis import Chassis from sonic_platform.device_data import DeviceDataManager +from sonic_platform.sfp import SFP sonic_platform.chassis.extract_RJ45_ports_index = mock.MagicMock(return_value=[]) @@ -148,23 +149,27 @@ class TestThermal: @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) def test_sfp_thermal(self): - from sonic_platform.thermal import initialize_sfp_thermal, THERMAL_NAMING_RULE - thermal_list = initialize_sfp_thermal(0) + from sonic_platform.thermal import THERMAL_NAMING_RULE + sfp = SFP(0) + thermal_list = sfp.get_all_thermals() assert len(thermal_list) == 1 thermal = thermal_list[0] rule = THERMAL_NAMING_RULE['sfp thermals'] start_index = rule.get('start_index', 1) assert thermal.get_name() == rule['name'].format(start_index) - assert rule['temperature'].format(start_index) in thermal.temperature - assert rule['high_threshold'].format(start_index) in thermal.high_threshold - assert rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold assert thermal.get_position_in_parent() == 1 assert thermal.is_replaceable() == False + sfp.get_temperature = mock.MagicMock(return_value=35.4) + sfp.get_temperature_warning_threashold = mock.MagicMock(return_value=70) + sfp.get_temperature_critical_threashold = mock.MagicMock(return_value=80) + assert thermal.get_temperature() == 35.4 + assert thermal.get_high_threshold() == 70 + assert thermal.get_high_critical_threshold() == 80 @mock.patch('sonic_platform.utils.read_float_from_file') def test_get_temperature(self, mock_read): from sonic_platform.thermal import Thermal - thermal = Thermal('test', 'temp_file', None, None, 1) + thermal = Thermal('test', 'temp_file', None, None, None, None, 1000, 1) mock_read.return_value = 35727 assert thermal.get_temperature() == 35.727 @@ -177,7 +182,7 @@ class TestThermal: @mock.patch('sonic_platform.utils.read_float_from_file') def test_get_high_threshold(self, mock_read): from sonic_platform.thermal import Thermal - thermal = Thermal('test', None, None, None, 1) + thermal = Thermal('test', None, None, None, None, None, 1000, 1) assert thermal.get_high_threshold() is None thermal.high_threshold = 'high_th_file' @@ -193,7 +198,7 @@ class TestThermal: @mock.patch('sonic_platform.utils.read_float_from_file') def test_get_high_critical_threshold(self, mock_read): from sonic_platform.thermal import Thermal - thermal = Thermal('test', None, None, None, 1) + thermal = Thermal('test', None, None, None, None, None, 1000, 1) assert thermal.get_high_critical_threshold() is None thermal.high_critical_threshold = 'high_th_file' diff --git a/platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py b/platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py new file mode 100644 index 0000000000..1a34a7440a --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +from unittest import mock + +from sonic_platform import utils +from sonic_platform.thermal_updater import ThermalUpdater, hw_management_independent_mode_update +from sonic_platform.thermal_updater import ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD, \ + ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD + + +mock_tc_config = """ +{ + "dev_parameters": { + "asic": { + "pwm_min": 20, + "pwm_max": 100, + "val_min": "!70000", + "val_max": "!105000", + "poll_time": 3 + }, + "module\\\\d+": { + "pwm_min": 20, + "pwm_max": 100, + "val_min": 60000, + "val_max": 80000, + "poll_time": 20 + } + } +} +""" + + +class TestThermalUpdater: + def test_load_tc_config_non_exists(self): + updater = ThermalUpdater(None) + updater.load_tc_config() + assert updater._timer._timestamp_queue.qsize() == 2 + + def test_load_tc_config_mocked(self): + updater = ThermalUpdater(None) + mock_os_open = mock.mock_open(read_data=mock_tc_config) + with mock.patch('sonic_platform.utils.open', mock_os_open): + updater.load_tc_config() + assert updater._timer._timestamp_queue.qsize() == 2 + + @mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_asic', mock.MagicMock()) + @mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_module', mock.MagicMock()) + @mock.patch('sonic_platform.thermal_updater.ThermalUpdater.wait_all_sfp_ready') + @mock.patch('sonic_platform.utils.write_file') + def test_start_stop(self, mock_write, mock_wait): + mock_wait.return_value = True + mock_sfp = mock.MagicMock() + mock_sfp.sdk_index = 1 + updater = ThermalUpdater([mock_sfp]) + updater.start() + mock_write.assert_called_once_with('/run/hw-management/config/suspend', 0) + utils.wait_until(updater._timer.is_alive, timeout=5) + + mock_write.reset_mock() + updater.stop() + assert not updater._timer.is_alive() + mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1) + + mock_wait.return_value = False + mock_write.reset_mock() + updater.start() + mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1) + updater.stop() + + @mock.patch('sonic_platform.thermal_updater.time.sleep', mock.MagicMock()) + def test_wait_all_sfp_ready(self): + mock_sfp = mock.MagicMock() + mock_sfp.is_sw_control = mock.MagicMock(return_value=True) + updater = ThermalUpdater([mock_sfp]) + assert updater.wait_all_sfp_ready() + mock_sfp.is_sw_control.side_effect = Exception('') + assert not updater.wait_all_sfp_ready() + + @mock.patch('sonic_platform.utils.read_int_from_file') + def test_update_asic(self, mock_read): + mock_read.return_value = 8 + updater = ThermalUpdater(None) + assert updater.get_asic_temp() == 1000 + assert updater.get_asic_temp_warning_threashold() == 1000 + assert updater.get_asic_temp_critical_threashold() == 1000 + updater.update_asic() + hw_management_independent_mode_update.thermal_data_set_asic.assert_called_once() + + mock_read.return_value = None + assert updater.get_asic_temp() is None + assert updater.get_asic_temp_warning_threashold() == ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD + assert updater.get_asic_temp_critical_threashold() == ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD + + def test_update_module(self): + mock_sfp = mock.MagicMock() + mock_sfp.sdk_index = 10 + mock_sfp.get_presence = mock.MagicMock(return_value=True) + mock_sfp.get_temperature = mock.MagicMock(return_value=55.0) + mock_sfp.get_temperature_warning_threashold = mock.MagicMock(return_value=70.0) + mock_sfp.get_temperature_critical_threashold = mock.MagicMock(return_value=80.0) + updater = ThermalUpdater([mock_sfp]) + updater.update_module() + hw_management_independent_mode_update.thermal_data_set_module.assert_called_once_with(0, 11, 55000, 80000, 70000, 0) + + mock_sfp.get_temperature = mock.MagicMock(return_value=0.0) + hw_management_independent_mode_update.reset_mock() + updater.update_module() + hw_management_independent_mode_update.thermal_data_set_module.assert_called_once_with(0, 11, 0, 0, 0, 0) + + mock_sfp.get_presence = mock.MagicMock(return_value=False) + updater.update_module() + hw_management_independent_mode_update.thermal_data_clean_module.assert_called_once_with(0, 11) diff --git a/platform/mellanox/mlnx-platform-api/tests/test_utils.py b/platform/mellanox/mlnx-platform-api/tests/test_utils.py index 04b00f82f4..2a186de7e5 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_utils.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_utils.py @@ -191,6 +191,26 @@ class TestUtils: mock_os_open = mock.mock_open(read_data='a:b') with mock.patch('sonic_platform.utils.open', mock_os_open): assert utils.read_key_value_file('some_file') == {'a':'b'} + mock_os_open = mock.mock_open(read_data='a=b') with mock.patch('sonic_platform.utils.open', mock_os_open): assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'} + + def test_timer(self): + timer = utils.Timer() + timer.start() + mock_cb_1000_run_now = mock.MagicMock() + mock_cb_1000_run_future = mock.MagicMock() + mock_cb_1_run_future_once = mock.MagicMock() + mock_cb_1_run_future_repeat = mock.MagicMock() + timer.schedule(1000, cb=mock_cb_1000_run_now, repeat=False, run_now=True) + timer.schedule(1000, cb=mock_cb_1000_run_future, repeat=False, run_now=False) + timer.schedule(1, cb=mock_cb_1_run_future_once, repeat=False, run_now=False) + timer.schedule(1, cb=mock_cb_1_run_future_repeat, repeat=True, run_now=False) + time.sleep(3) + timer.stop() + + mock_cb_1000_run_now.assert_called_once() + mock_cb_1000_run_future.assert_not_called() + mock_cb_1_run_future_once.assert_called_once() + assert mock_cb_1_run_future_repeat.call_count > 1 From 6a9ec987b5e4da5d6126c1739c2033699219683b Mon Sep 17 00:00:00 2001 From: zitingguo-ms Date: Thu, 14 Dec 2023 09:37:35 +0800 Subject: [PATCH 06/33] change branch name (#17267) Why I did it Upgrade xgs SAI to 10.1 version. Work item tracking Microsoft ADO (number only): 25931321 How I did it Upgrade xgs SAI version in sai.mk file. How to verify it Run full qualification on 7050cx3/7260cx3: 7050cx3: https://dev.azure.com/mssonic/internal/_build/results?buildId=425450&view=results https://dev.azure.com/mssonic/internal/_build/results?buildId=425449&view=results 7260cx3: https://elastictest.org/scheduler/testplan/656f2b2b617fb27e41557494?leftSideViewMode=detail&prop=status&order=ascending --- platform/broadcom/sai.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/platform/broadcom/sai.mk b/platform/broadcom/sai.mk index 92a1d95981..eea4f47a4c 100644 --- a/platform/broadcom/sai.mk +++ b/platform/broadcom/sai.mk @@ -1,6 +1,6 @@ -LIBSAIBCM_XGS_VERSION = 8.4.31.0 +LIBSAIBCM_XGS_VERSION = 10.1.0.0 LIBSAIBCM_DNX_VERSION = 7.1.111.1 -LIBSAIBCM_XGS_BRANCH_NAME = SAI_8.4.0_GA +LIBSAIBCM_XGS_BRANCH_NAME = SAI_10.1.0_GA LIBSAIBCM_DNX_BRANCH_NAME = REL_7.0_SAI_1.11 LIBSAIBCM_XGS_URL_PREFIX = "https://sonicstorage.blob.core.windows.net/public/sai/sai-broadcom/$(LIBSAIBCM_XGS_BRANCH_NAME)/$(LIBSAIBCM_XGS_VERSION)/xgs" LIBSAIBCM_DNX_URL_PREFIX = "https://sonicstorage.blob.core.windows.net/public/sai/bcmsai/$(LIBSAIBCM_DNX_BRANCH_NAME)/$(LIBSAIBCM_DNX_VERSION)" From 53be9de7439440590373e5e9a19f0c9a33d85670 Mon Sep 17 00:00:00 2001 From: Junhua Zhai Date: Thu, 14 Dec 2023 09:37:44 +0800 Subject: [PATCH 07/33] Fix syncd_request_shutdown coredump in config reload on KVM sonic (#17486) The issue is related to #16812. Process syncd does not run in the container gbsyncd on kvm sonic with default hwsku. Microsoft ADO : 26151608 How I did it If syncd has not run in container gbsyncd, it is not needed to trigger graceful shudown of syncd. How to verify it None of syncd_request_shutdown coredump in config reload on KVM sonic --- files/scripts/gbsyncd.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/files/scripts/gbsyncd.sh b/files/scripts/gbsyncd.sh index fc6b018b3a..34bcb7044b 100755 --- a/files/scripts/gbsyncd.sh +++ b/files/scripts/gbsyncd.sh @@ -20,6 +20,11 @@ function waitplatform() { } function stopplatform1() { + if ! docker top gbsyncd$DEV | grep -q /usr/bin/syncd; then + debug "syncd process in container gbsyncd$DEV is not running" + return + fi + # Invoke platform specific pre shutdown routine. PLATFORM=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' platform` PLATFORM_PRE_SHUTDOWN="/usr/share/sonic/device/$PLATFORM/plugins/gbsyncd_request_pre_shutdown" From 6d043a25bd56d8b5ae3759ac3244dbdcc45e7a17 Mon Sep 17 00:00:00 2001 From: Nazarii Hnydyn Date: Thu, 14 Dec 2023 08:41:12 +0200 Subject: [PATCH 08/33] [installer] Create a blank grubenv if doesn't exist. (#17414) - Why I did it To fix BIOS firmware update after fresh image installation from ONiE - How I did it Initialized empty GRUB environment file after ONiE installation - How to verify it Install image from ONiE Run BIOS firmware upgrade Signed-off-by: Nazarii Hnydyn --- installer/default_platform.conf | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/installer/default_platform.conf b/installer/default_platform.conf index cf814c2013..319b7009f5 100755 --- a/installer/default_platform.conf +++ b/installer/default_platform.conf @@ -264,14 +264,6 @@ demo_install_grub() exit 1 } - # Create a blank environment block file. - if [ ! -f "$onie_initrd_tmp/$demo_mnt/grub/grubenv" ]; then - grub-editenv "$onie_initrd_tmp/$demo_mnt/grub/grubenv" create || { - echo "ERROR: grub-editenv failed on: $blk_dev" - exit 1 - } - fi - if [ "$demo_type" = "DIAG" ] ; then # Install GRUB in the partition also. This allows for # chainloading the DIAG image from another OS. @@ -354,14 +346,6 @@ demo_install_uefi_grub() } rm -f $grub_install_log - # Create a blank environment block file. - if [ ! -f "$demo_mnt/grub/grubenv" ]; then - grub-editenv "$demo_mnt/grub/grubenv" create || { - echo "ERROR: grub-editenv failed on: $blk_dev" - exit 1 - } - fi - # Configure EFI NVRAM Boot variables. --create also sets the # new boot number as active. grub=$(find /boot/efi/EFI/$demo_volume_label/ -name grub*.efi -exec basename {} \;) @@ -631,6 +615,14 @@ EOF umount $demo_mnt else cp $grub_cfg $onie_initrd_tmp/$demo_mnt/grub/grub.cfg + + # Create a blank environment block file. + if [ ! -f "$onie_initrd_tmp/$demo_mnt/grub/grubenv" ]; then + grub-editenv "$onie_initrd_tmp/$demo_mnt/grub/grubenv" create || { + echo "ERROR: grub-editenv failed on: $blk_dev" + exit 1 + } + fi fi cd / From b21f33b8b19e396752e509eea9a0b892df44e99b Mon Sep 17 00:00:00 2001 From: Ze Gan Date: Thu, 14 Dec 2023 14:49:04 +0800 Subject: [PATCH 09/33] [Azp]: Fix azp on building ubuntu20.04 and sonic-mgmt (#17439) The Azp failed on ubuntu20.04 and sonic-mgmt building due to sonic-dash-api updating. Signed-off-by: Ze Gan --- .azure-pipelines/azure-pipelines-build-ubuntu-2004.yml | 2 +- dockers/docker-sonic-mgmt/Dockerfile.j2 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/azure-pipelines-build-ubuntu-2004.yml b/.azure-pipelines/azure-pipelines-build-ubuntu-2004.yml index 697c87b3c9..1ff50719ce 100644 --- a/.azure-pipelines/azure-pipelines-build-ubuntu-2004.yml +++ b/.azure-pipelines/azure-pipelines-build-ubuntu-2004.yml @@ -34,7 +34,7 @@ stages: mkdir -p /tmp/artifacts displayName: "Install dependencies" - script: | - SONIC_CONFIG_MAKE_JOBS=$(nproc) CONFIGURED_ARCH=amd64 DEST=/tmp/artifacts make -f ../rules/protobuf.mk -f protobuf/Makefile + BLDENV=bullseye SONIC_CONFIG_MAKE_JOBS=$(nproc) CONFIGURED_ARCH=amd64 DEST=/tmp/artifacts make -f ../rules/protobuf.mk -f protobuf/Makefile workingDirectory: src displayName: "Build protobuf" - script: | diff --git a/dockers/docker-sonic-mgmt/Dockerfile.j2 b/dockers/docker-sonic-mgmt/Dockerfile.j2 index 5e634f0b98..c42478d9a2 100755 --- a/dockers/docker-sonic-mgmt/Dockerfile.j2 +++ b/dockers/docker-sonic-mgmt/Dockerfile.j2 @@ -224,7 +224,7 @@ RUN mkdir -p /tmp/protobuf \ # Install dash-api RUN cd /tmp \ && mkdir -p /usr/lib/python3/dist-packages/dash_api \ - && wget https://raw.githubusercontent.com/sonic-net/sonic-buildimage/master/src/sonic-dash-api/pypkg/__init__.py -O /usr/lib/python3/dist-packages/dash_api/__init__.py \ + && wget https://raw.githubusercontent.com/sonic-net/sonic-dash-api/master/misc/pypkg/dash_api/__init__.py -O /usr/lib/python3/dist-packages/dash_api/__init__.py \ && git clone https://github.com/sonic-net/sonic-dash-api.git \ && protoc -I=sonic-dash-api/proto --python_out=/usr/lib/python3/dist-packages/dash_api sonic-dash-api/proto/*.proto \ && rm -rf /tmp/sonic-dash-api From fa5829bca80ead2a882ba5bd1aa719e44c0ac68f Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:38:45 +0800 Subject: [PATCH 10/33] [submodule] Update submodule sonic-utilities to the latest HEAD automatically (#17502) --- src/sonic-utilities | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-utilities b/src/sonic-utilities index 3037959da0..377e5812f6 160000 --- a/src/sonic-utilities +++ b/src/sonic-utilities @@ -1 +1 @@ -Subproject commit 3037959da0db3639952d15c9cb0ba1b7a04c1e64 +Subproject commit 377e5812f66d4adba09221028191d459775d9a25 From 67c0543127d1ca9d7a0dfe26e334e229bdea1391 Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:42:08 +0800 Subject: [PATCH 11/33] [submodule] Update submodule sonic-linux-kernel to the latest HEAD automatically (#17498) --- src/sonic-linux-kernel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-linux-kernel b/src/sonic-linux-kernel index b2601c7b1a..63705029d7 160000 --- a/src/sonic-linux-kernel +++ b/src/sonic-linux-kernel @@ -1 +1 @@ -Subproject commit b2601c7b1a10f42488d0f21c6c133f5c60c2387d +Subproject commit 63705029d7d3bbf480a18839a1e12524bd949fa5 From c7e7dffb6ee63fa7a90a5bada79dfb69064cbf72 Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:42:54 +0800 Subject: [PATCH 12/33] [submodule] Update submodule sonic-sairedis to the latest HEAD automatically (#17500) --- src/sonic-sairedis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-sairedis b/src/sonic-sairedis index e7ad3566a8..641b7304f2 160000 --- a/src/sonic-sairedis +++ b/src/sonic-sairedis @@ -1 +1 @@ -Subproject commit e7ad3566a8265e1b4cc96a1ed50978eee319dc6a +Subproject commit 641b7304f2a10762a119b4c56c1621b4a88616b9 From 953d3dc1751728fa574d119dc0619cb4478be08d Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:47:12 +0800 Subject: [PATCH 13/33] [submodule] Update submodule sonic-dash-api to the latest HEAD automatically (#17503) --- src/sonic-dash-api | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-dash-api b/src/sonic-dash-api index d4448c78b4..910814ffb4 160000 --- a/src/sonic-dash-api +++ b/src/sonic-dash-api @@ -1 +1 @@ -Subproject commit d4448c78b4e0afd1ec6dfaa390aef5c650cee4b3 +Subproject commit 910814ffb4fd2e44e183d8d92086a724c62f5f1d From e59ac879e61c795223df83ca7148778f6813747d Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:15:27 +0800 Subject: [PATCH 14/33] [submodule] Update submodule sonic-host-services to the latest HEAD automatically (#17497) --- src/sonic-host-services | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-host-services b/src/sonic-host-services index e8ae2afd61..722b7962f9 160000 --- a/src/sonic-host-services +++ b/src/sonic-host-services @@ -1 +1 @@ -Subproject commit e8ae2afd612ef7fd08b7d855c48c78fe54b34ec4 +Subproject commit 722b7962f95e8d602a9a60e77356826e9f68891c From da3e7cbbba436fd782d647e032304900d6f14a87 Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:34:10 +0800 Subject: [PATCH 15/33] [submodule] Update submodule linkmgrd to the latest HEAD automatically (#17476) #### Why I did it src/linkmgrd ``` * 79c3872 - (HEAD -> master, origin/master, origin/HEAD) [active-standby] Fix `show mux status` inconsistency introduced by orchagent rollback (#225) (24 hours ago) [Jing Zhang] * ba913c0 - [warmboot] use config_db connector to update mux mode config instead of CLI (#223) (2 days ago) [Jing Zhang] ``` #### How I did it #### How to verify it #### Description for the changelog --- src/linkmgrd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linkmgrd b/src/linkmgrd index e420df41a0..79c3872b38 160000 --- a/src/linkmgrd +++ b/src/linkmgrd @@ -1 +1 @@ -Subproject commit e420df41a08ea9ec421687fb06d1f96535ebf151 +Subproject commit 79c3872b38b8ae464dd04907610657bb2a611360 From f373a16e957d559c646a2829799b8e4b2e21fbd7 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Thu, 14 Dec 2023 18:01:11 +0800 Subject: [PATCH 16/33] [Mellanox] Fix race condition while creating SFP (#17441) - Why I did it Fix issue xcvrd crashes due to cannot import name 'initialize_sfp_thermal': Nov 27 09:47:16.388639 sonic ERR pmon#xcvrd: Exception occured at CmisManagerTask thread due to ImportError("cannot import name 'initialize_sfp_thermal' from partially initialized module 'sonic_platform.thermal' (most likely due to a circular import) (/usr/local/lib/python3.9/dist-packages/sonic_platform/thermal.py)") - How I did it Add lock for creating SFP object - How to verify it Unit test Manual Test --- .../sonic_platform/chassis.py | 68 +++++++++++-------- .../mlnx-platform-api/tests/test_chassis.py | 26 +++++++ 2 files changed, 66 insertions(+), 28 deletions(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py index f0b73de66c..5870d7e6b6 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py @@ -124,6 +124,7 @@ class Chassis(ChassisBase): self.reboot_cause_initialized = False self.sfp_module = None + self.sfp_lock = threading.Lock() # Build the RJ45 port list from platform.json and hwsku.json self._RJ45_port_inited = False @@ -277,38 +278,49 @@ class Chassis(ChassisBase): def initialize_single_sfp(self, index): sfp_count = self.get_num_sfps() + # Use double checked locking mechanism for: + # 1. protect shared resource self._sfp_list + # 2. performance (avoid locking every time) if index < sfp_count: - if not self._sfp_list: - self._sfp_list = [None] * sfp_count + if not self._sfp_list or not self._sfp_list[index]: + with self.sfp_lock: + if not self._sfp_list: + self._sfp_list = [None] * sfp_count - if not self._sfp_list[index]: - sfp_module = self._import_sfp_module() - if self.RJ45_port_list and index in self.RJ45_port_list: - self._sfp_list[index] = sfp_module.RJ45Port(index) - else: - self._sfp_list[index] = sfp_module.SFP(index) - self.sfp_initialized_count += 1 + if not self._sfp_list[index]: + sfp_module = self._import_sfp_module() + if self.RJ45_port_list and index in self.RJ45_port_list: + self._sfp_list[index] = sfp_module.RJ45Port(index) + else: + self._sfp_list[index] = sfp_module.SFP(index) + self.sfp_initialized_count += 1 def initialize_sfp(self): - if not self._sfp_list: - sfp_module = self._import_sfp_module() - sfp_count = self.get_num_sfps() - for index in range(sfp_count): - if self.RJ45_port_list and index in self.RJ45_port_list: - sfp_object = sfp_module.RJ45Port(index) - else: - sfp_object = sfp_module.SFP(index) - self._sfp_list.append(sfp_object) - self.sfp_initialized_count = sfp_count - elif self.sfp_initialized_count != len(self._sfp_list): - sfp_module = self._import_sfp_module() - for index in range(len(self._sfp_list)): - if self._sfp_list[index] is None: - if self.RJ45_port_list and index in self.RJ45_port_list: - self._sfp_list[index] = sfp_module.RJ45Port(index) - else: - self._sfp_list[index] = sfp_module.SFP(index) - self.sfp_initialized_count = len(self._sfp_list) + sfp_count = self.get_num_sfps() + # Use double checked locking mechanism for: + # 1. protect shared resource self._sfp_list + # 2. performance (avoid locking every time) + if sfp_count != self.sfp_initialized_count: + with self.sfp_lock: + if sfp_count != self.sfp_initialized_count: + if not self._sfp_list: + sfp_module = self._import_sfp_module() + for index in range(sfp_count): + if self.RJ45_port_list and index in self.RJ45_port_list: + sfp_object = sfp_module.RJ45Port(index) + else: + sfp_object = sfp_module.SFP(index) + self._sfp_list.append(sfp_object) + self.sfp_initialized_count = sfp_count + elif self.sfp_initialized_count != len(self._sfp_list): + sfp_module = self._import_sfp_module() + for index in range(len(self._sfp_list)): + if self._sfp_list[index] is None: + if self.RJ45_port_list and index in self.RJ45_port_list: + self._sfp_list[index] = sfp_module.RJ45Port(index) + else: + self._sfp_list[index] = sfp_module.SFP(index) + self.sfp_initialized_count = len(self._sfp_list) def get_num_sfps(self): """ diff --git a/platform/mellanox/mlnx-platform-api/tests/test_chassis.py b/platform/mellanox/mlnx-platform-api/tests/test_chassis.py index fce9bd00b0..ffe86aaf3d 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_chassis.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_chassis.py @@ -16,8 +16,10 @@ # import os +import random import sys import subprocess +import threading from mock import MagicMock if sys.version_info.major == 3: @@ -167,6 +169,30 @@ class TestChassis: assert len(sfp_list) == 3 assert chassis.sfp_initialized_count == 3 + def test_create_sfp_in_multi_thread(self): + DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=3) + + iteration_num = 100 + while iteration_num > 0: + chassis = Chassis() + assert chassis.sfp_initialized_count == 0 + t1 = threading.Thread(target=lambda: chassis.get_sfp(1)) + t2 = threading.Thread(target=lambda: chassis.get_sfp(1)) + t3 = threading.Thread(target=lambda: chassis.get_all_sfps()) + t4 = threading.Thread(target=lambda: chassis.get_all_sfps()) + threads = [t1, t2, t3, t4] + random.shuffle(threads) + for t in threads: + t.start() + for t in threads: + t.join() + assert len(chassis.get_all_sfps()) == 3 + assert chassis.sfp_initialized_count == 3 + for index, s in enumerate(chassis.get_all_sfps()): + assert s.sdk_index == index + iteration_num -= 1 + + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', MagicMock(return_value=3)) def test_change_event(self): chassis = Chassis() From c1cb292310dca70562ba87f710575955e718864c Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Thu, 14 Dec 2023 18:04:24 +0800 Subject: [PATCH 17/33] [Mellanox] implement platform wait in python code (#17398) - Why I did it New implementation of Nvidia platform_wait due to: 1. sysfs deprecated by hw-mgmt 2. new dependencies to SDK 3. For CMIS host management mode - How I did it wait hw-management ready wait SDK sysfs nodes ready - How to verify it manual test unit test sonic-mgmt regression --- .../x86_64-mlnx_msn2700-r0/platform_wait | 92 ++++++------------- .../sonic_platform/device_data.py | 28 +++++- .../mlnx-platform-api/sonic_platform/utils.py | 24 +++++ .../tests/test_device_data.py | 24 ++++- .../mlnx-platform-api/tests/test_utils.py | 7 ++ 5 files changed, 107 insertions(+), 68 deletions(-) diff --git a/device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait b/device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait index 0809748687..ea76db07a6 100755 --- a/device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait +++ b/device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait @@ -1,68 +1,32 @@ -#!/bin/bash +#!/usr/bin/python3 -declare -r SYSLOG_LOGGER="/usr/bin/logger" -declare -r SYSLOG_IDENTIFIER="platform_wait" -declare -r SYSLOG_ERROR="error" -declare -r SYSLOG_NOTICE="notice" -declare -r SYSLOG_INFO="info" +# +# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -declare -r HW_MGMT_CONFIG="/var/run/hw-management/config" +import sys +from sonic_platform.device_data import DeviceDataManager +from sonic_py_common.logger import Logger -declare -r ASIC_INIT_DONE="${HW_MGMT_CONFIG}/asics_init_done" -declare -r NUM_ASICS="${HW_MGMT_CONFIG}/asic_num" -declare -r ASIC_CHIPUP_COMPLETED="${HW_MGMT_CONFIG}/asic_chipup_completed" -declare -r EXIT_SUCCESS="0" -declare -r EXIT_TIMEOUT="1" - -function log_error() { - eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_ERROR} $@" -} - -function log_notice() { - eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_NOTICE} $@" -} - -function log_info() { - eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@" -} - -function wait_for_asic_chipup() { - - local _ASIC_INIT="0" - local _ASIC_COUNT="0" - local _ASICS_CHIPUP="0" - - local -i _WDOG_CNT="1" - local -ir _WDOG_MAX="300" - - local -r _TIMEOUT="1s" - - while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do - _ASIC_INIT="$(cat ${ASIC_INIT_DONE} 2>&1)" - _ASIC_COUNT="$(cat ${NUM_ASICS} 2>&1)" - _ASICS_CHIPUP="$(cat ${ASIC_CHIPUP_COMPLETED} 2>&1)" - - if [[ "${_ASIC_INIT}" -eq 1 && "${_ASIC_COUNT}" -eq "${_ASICS_CHIPUP}" ]]; then - return "${EXIT_SUCCESS}" - fi - - let "_WDOG_CNT++" - sleep "${_TIMEOUT}" - done - - log_error "Mellanox ASIC is not ready: INIT: ${_ASIC_INIT}, NUM_ASIC: ${_ASIC_COUNT}, CHIPUP: ${_ASICS_CHIPUP} timeout...." - return "${EXIT_TIMEOUT}" -} - -log_info "Wait for Mellanox ASIC to be ready" - -wait_for_asic_chipup -EXIT_CODE="$?" -if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then - exit "${EXIT_CODE}" -fi - -log_notice "Mellanox ASIC is ready" - -exit "${EXIT_SUCCESS}" +logger = Logger(log_identifier='platform_wait') +logger.log_notice('Nvidia: Wait for PMON dependencies to be ready') +if DeviceDataManager.wait_platform_ready(): + logger.log_notice('Nvidia: PMON dependencies are ready') + sys.exit(0) +else: + logger.log_error('Nvidia: PMON dependencies are not ready: timeout') + sys.exit(-1) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py index 6bf0a9945a..aeceb15d19 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py @@ -17,6 +17,7 @@ import glob import os +import time from . import utils @@ -167,8 +168,11 @@ class DeviceDataManager: @classmethod @utils.read_only_cache() def get_sfp_count(cls): - sfp_count = utils.read_int_from_file('/run/hw-management/config/sfp_counter') - return sfp_count if sfp_count > 0 else len(glob.glob('/sys/module/sx_core/asic0/module*')) + from sonic_py_common import device_info + platform_path = device_info.get_path_to_platform_dir() + platform_json_path = os.path.join(platform_path, 'platform.json') + platform_data = utils.load_json_file(platform_json_path) + return len(platform_data['chassis']['sfps']) @classmethod def get_linecard_sfp_count(cls, lc_index): @@ -244,3 +248,23 @@ class DeviceDataManager: sai_profile_file = os.path.join(hwsku_dir, 'sai.profile') data = utils.read_key_value_file(sai_profile_file, delimeter='=') return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1' + + @classmethod + def wait_platform_ready(cls): + """ + Wait for Nvidia platform related services(SDK, hw-management) ready + Returns: + bool: True if wait success else timeout + """ + conditions = [] + sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror'] + if cls.is_independent_mode(): + sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset', + 'power_good', 'power_limit', 'power_on', 'temperature/input']) + else: + conditions.append(lambda: utils.read_int_from_file('/var/run/hw-management/config/asics_init_done') == 1) + sfp_count = cls.get_sfp_count() + for sfp_index in range(sfp_count): + for sysfs_node in sysfs_nodes: + conditions.append(lambda: os.path.exists(f'/sys/module/sx_core/asic0/module{sfp_index}/{sysfs_node}')) + return utils.wait_until_conditions(conditions, 300, 1) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py index 9db38e6b41..1135903c24 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py @@ -290,6 +290,30 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs): return False +def wait_until_conditions(conditions, timeout, interval=1): + """ + Wait until all the conditions become true + Args: + conditions (list): a list of callable which generate True|False + timeout (int): wait time in seconds + interval (int, optional): interval to check the predict. Defaults to 1. + + Returns: + bool: True if wait success else False + """ + while timeout > 0: + pending_conditions = [] + for condition in conditions: + if not condition(): + pending_conditions.append(condition) + if not pending_conditions: + return True + conditions = pending_conditions + time.sleep(interval) + timeout -= interval + return False + + class TimerEvent: def __init__(self, interval, cb, repeat): self.interval = interval diff --git a/platform/mellanox/mlnx-platform-api/tests/test_device_data.py b/platform/mellanox/mlnx-platform-api/tests/test_device_data.py index 866f01c3e7..c172b82a30 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_device_data.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_device_data.py @@ -60,6 +60,26 @@ class TestDeviceData: mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'} assert DeviceDataManager.is_independent_mode() + @mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp')) + @mock.patch('sonic_platform.device_data.utils.load_json_file') + def test_get_sfp_count(self, mock_load_json): + mock_load_json.return_value = { + 'chassis': { + 'sfps': [1,2,3] + } + } + assert DeviceDataManager.get_sfp_count() == 3 - - + @mock.patch('sonic_platform.device_data.time.sleep', mock.MagicMock()) + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3)) + @mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1)) + @mock.patch('sonic_platform.device_data.os.path.exists') + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') + def test_wait_platform_ready(self, mock_is_indep, mock_exists): + mock_exists.return_value = True + mock_is_indep.return_value = True + assert DeviceDataManager.wait_platform_ready() + mock_is_indep.return_value = False + assert DeviceDataManager.wait_platform_ready() + mock_exists.return_value = False + assert not DeviceDataManager.wait_platform_ready() diff --git a/platform/mellanox/mlnx-platform-api/tests/test_utils.py b/platform/mellanox/mlnx-platform-api/tests/test_utils.py index 2a186de7e5..b6ec67975f 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_utils.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_utils.py @@ -195,6 +195,13 @@ class TestUtils: mock_os_open = mock.mock_open(read_data='a=b') with mock.patch('sonic_platform.utils.open', mock_os_open): assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'} + + @mock.patch('sonic_platform.utils.time.sleep', mock.MagicMock()) + def test_wait_until_conditions(self): + conditions = [lambda: True] + assert utils.wait_until_conditions(conditions, 1) + conditions = [lambda: False] + assert not utils.wait_until_conditions(conditions, 1) def test_timer(self): timer = utils.Timer() From d6f6bbfc5d2bef0734d0a3f237d49c7f7ea17c0c Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Thu, 14 Dec 2023 18:35:44 +0800 Subject: [PATCH 18/33] [submodule] Update submodule sonic-gnmi to the latest HEAD automatically (#17436) #### Why I did it src/sonic-gnmi ``` * 88e82d4 - (HEAD -> master, origin/master, origin/HEAD) Replace PFC_WD_TABLE with PFC_WD (#173) (8 days ago) [Zain Budhwani] ``` #### How I did it #### How to verify it #### Description for the changelog --- src/sonic-gnmi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-gnmi b/src/sonic-gnmi index 07e0b36437..88e82d4bc4 160000 --- a/src/sonic-gnmi +++ b/src/sonic-gnmi @@ -1 +1 @@ -Subproject commit 07e0b364375c9b867ae1f5d600d0785f30e3f4d3 +Subproject commit 88e82d4bc40658bc100a9a0508b1d1289a924a61 From dac2ba6e1bc1bb620d742fd4f42fef1a82f76df7 Mon Sep 17 00:00:00 2001 From: Ze Gan Date: Fri, 15 Dec 2023 00:59:16 +0800 Subject: [PATCH 19/33] [Azp]: Add dash-api dependencies on building Azp ubuntu20.04 (#17507) Signed-off-by: Ze Gan --- .azure-pipelines/azure-pipelines-build-ubuntu-2004.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.azure-pipelines/azure-pipelines-build-ubuntu-2004.yml b/.azure-pipelines/azure-pipelines-build-ubuntu-2004.yml index 1ff50719ce..e2aa480576 100644 --- a/.azure-pipelines/azure-pipelines-build-ubuntu-2004.yml +++ b/.azure-pipelines/azure-pipelines-build-ubuntu-2004.yml @@ -28,7 +28,8 @@ stages: cmake pkg-config python3-pip python cmake libgtest-dev libgmock-dev libyang-dev \ debhelper-compat dh-elpa dh-sequence-python3 python3-all \ libpython3-all-dev python3-six xmlto unzip rake-compiler gem2deb pkg-php-tools \ - ant default-jdk maven-repo-helper libguava-java + ant default-jdk maven-repo-helper libguava-java \ + libboost-all-dev libgtest-dev build-essential wget http://ftp.us.debian.org/debian/pool/main/libg/libgoogle-gson-java/libgoogle-gson-java_2.8.6-1+deb11u1_all.deb sudo dpkg -i libgoogle-gson-java_2.8.6-1+deb11u1_all.deb mkdir -p /tmp/artifacts From f3f507826b4887009d353f96bd5412cbbb2ae0f1 Mon Sep 17 00:00:00 2001 From: Sudharsan Dhamal Gopalarathnam Date: Thu, 14 Dec 2023 09:13:20 -0800 Subject: [PATCH 20/33] [FRR] Fix zebra memory leak when bgp fib suppress pending is enabled (#17484) Fix zebra leaking memory with fib suppress enabled. Porting the fix from FRRouting/frr#14983 While running test_stress_route.py, systems with lower memory started to throw low memory logs. On further investigation, a memory leak has been found in zebra which was fixed in the FRR community. --- ...lane_fpm_nl-return-path-leaks-memory.patch | 57 +++++++++++++++++++ src/sonic-frr/patch/series | 1 + 2 files changed, 58 insertions(+) create mode 100644 src/sonic-frr/patch/0033-zebra-The-dplane_fpm_nl-return-path-leaks-memory.patch diff --git a/src/sonic-frr/patch/0033-zebra-The-dplane_fpm_nl-return-path-leaks-memory.patch b/src/sonic-frr/patch/0033-zebra-The-dplane_fpm_nl-return-path-leaks-memory.patch new file mode 100644 index 0000000000..00e8cc0062 --- /dev/null +++ b/src/sonic-frr/patch/0033-zebra-The-dplane_fpm_nl-return-path-leaks-memory.patch @@ -0,0 +1,57 @@ +From c13964525dae96299dc54daf635609971576a09e Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Mon, 11 Dec 2023 13:41:36 -0500 +Subject: [PATCH] zebra: The dplane_fpm_nl return path leaks memory + +The route entry created when using a ctx to pass route +entry data backup to the master pthread in zebra is +being leaked. Prevent this from happening. + +Signed-off-by: Donald Sharp + +diff --git a/zebra/rib.h b/zebra/rib.h +index 016106312..e99eee67c 100644 +--- a/zebra/rib.h ++++ b/zebra/rib.h +@@ -352,6 +352,8 @@ extern void _route_entry_dump(const char *func, union prefixconstptr pp, + union prefixconstptr src_pp, + const struct route_entry *re); + ++void zebra_rib_route_entry_free(struct route_entry *re); ++ + struct route_entry * + zebra_rib_route_entry_new(vrf_id_t vrf_id, int type, uint8_t instance, + uint32_t flags, uint32_t nhe_id, uint32_t table_id, +diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c +index 6bdc15592..fc9e8c457 100644 +--- a/zebra/rt_netlink.c ++++ b/zebra/rt_netlink.c +@@ -1001,6 +1001,8 @@ int netlink_route_change_read_unicast_internal(struct nlmsghdr *h, + re, ng, startup, ctx); + if (ng) + nexthop_group_delete(&ng); ++ if (ctx) ++ zebra_rib_route_entry_free(re); + } else { + /* + * I really don't see how this is possible +diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c +index f2f20bcf7..1cefdfae7 100644 +--- a/zebra/zebra_rib.c ++++ b/zebra/zebra_rib.c +@@ -4136,6 +4136,12 @@ struct route_entry *zebra_rib_route_entry_new(vrf_id_t vrf_id, int type, + + return re; + } ++ ++void zebra_rib_route_entry_free(struct route_entry *re) ++{ ++ XFREE(MTYPE_RE, re); ++} ++ + /* + * Internal route-add implementation; there are a couple of different public + * signatures. Callers in this path are responsible for the memory they +-- +2.17.1 + diff --git a/src/sonic-frr/patch/series b/src/sonic-frr/patch/series index c87c5e61f9..bcb2900882 100644 --- a/src/sonic-frr/patch/series +++ b/src/sonic-frr/patch/series @@ -32,3 +32,4 @@ cross-compile-changes.patch 0030-bgpd-Treat-EOR-as-withdrawn-to-avoid-unwanted-handli.patch 0031-bgpd-Ignore-handling-NLRIs-if-we-received-MP_UNREACH.patch 0032-zebra-Fix-fpm-multipath-encap-addition.patch +0033-zebra-The-dplane_fpm_nl-return-path-leaks-memory.patch From be99434991b0bf7cf90e8fa423572540d7f0a63d Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Fri, 15 Dec 2023 06:32:51 +0800 Subject: [PATCH 21/33] [submodule] Update submodule sonic-swss to the latest HEAD automatically (#17501) #### Why I did it src/sonic-swss ``` * ff524e6d - (HEAD -> master, origin/master, origin/HEAD) [dash] add a retry for an ACL rule creation if a tag is not created yet (#2972) (7 hours ago) [Yakiv Huryk] * 620db3da - [ci] Allow partially success build artifact in PR checker pipeline. #2986 (3 days ago) [Liu Shilong] * d357e6f1 - [copporch] Add safeguard during policer attribute update (#2977) (4 days ago) [Vivek] * cb460394 - [fpmsyncd][WR] Relax the static schema constraint for ROUTE_TABLE (#2981) (5 days ago) [Vivek] * a1ce21f6 - Change base directory referenced in coverage.xml (#2976) (6 days ago) [Lawrence Lee] * 920959cf - [Dash] [UT] Add ZMQ test case for dash (#2967) (6 days ago) [Hua Liu] ``` #### How I did it #### How to verify it #### Description for the changelog --- src/sonic-swss | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-swss b/src/sonic-swss index 6026b6d63b..ff524e6dc8 160000 --- a/src/sonic-swss +++ b/src/sonic-swss @@ -1 +1 @@ -Subproject commit 6026b6d63b686f5f7cef1522715141b4c725cfba +Subproject commit ff524e6dc828a8dca2f4bf6fccabc819e77710f0 From 73f6e5895a069b81eb1b9b8e5b34453c19e8ab5c Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Fri, 15 Dec 2023 15:31:28 +0800 Subject: [PATCH 22/33] [submodule] Update submodule sonic-platform-daemons to the latest HEAD automatically (#17499) --- src/sonic-platform-daemons | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-platform-daemons b/src/sonic-platform-daemons index 502c0b6622..1c9b01d123 160000 --- a/src/sonic-platform-daemons +++ b/src/sonic-platform-daemons @@ -1 +1 @@ -Subproject commit 502c0b6622008363cb1ed6d1b7c85b4093997093 +Subproject commit 1c9b01d12393f25db7b258f58d10ea77c3f5b682 From 2532661cd9bce6a3205c4b5174a17f4f1e48572f Mon Sep 17 00:00:00 2001 From: Liu Shilong Date: Fri, 15 Dec 2023 16:25:26 +0800 Subject: [PATCH 23/33] [ci] Enable sonic-restapi build in PR validation. (#17397) Why I did it Enable sonic-restapi build in two platform to avoid build break on restapi target. Work item tracking Microsoft ADO (number only): 26048426 How I did it How to verify it --- azure-pipelines.yml | 2 ++ rules/config | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 43ac0cb024..ca780398c4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -65,6 +65,7 @@ stages: - name: broadcom variables: swi_image: yes + INCLUDE_RESTAPI: y - name: mellanox variables: dbg_image: yes @@ -79,6 +80,7 @@ stages: timeoutInMinutes: 1200 variables: PLATFORM_ARCH: armhf + INCLUDE_RESTAPI: y - stage: Test dependsOn: BuildVS diff --git a/rules/config b/rules/config index 209129d036..70110469eb 100644 --- a/rules/config +++ b/rules/config @@ -144,7 +144,7 @@ INCLUDE_MGMT_FRAMEWORK = y ENABLE_HOST_SERVICE_ON_START = y # INCLUDE_RESTAPI - build docker-sonic-restapi for configuring the switch using REST APIs -INCLUDE_RESTAPI = n +INCLUDE_RESTAPI ?= n # INCLUDE_NAT - build docker-nat for nat support INCLUDE_NAT = y From 979516633d97f18a781f222b2c28e1b5df674d0c Mon Sep 17 00:00:00 2001 From: Liu Shilong Date: Fri, 15 Dec 2023 16:52:51 +0800 Subject: [PATCH 24/33] [ci] Support pensando platform build in pipeline. (#17512) Why I did it Update pipeline file to support pensando's build. Work item tracking Microsoft ADO (number only): 26087700 How I did it How to verify it --- .azure-pipelines/azure-pipelines-build.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.azure-pipelines/azure-pipelines-build.yml b/.azure-pipelines/azure-pipelines-build.yml index 961e72a3e5..d7f87456f8 100644 --- a/.azure-pipelines/azure-pipelines-build.yml +++ b/.azure-pipelines/azure-pipelines-build.yml @@ -114,12 +114,19 @@ jobs: docker_syncd_rpc_image: yes platform_rpc: nephos + - name: pensando + pool: sonicbld-arm64 + variables: + PLATFORM_ARCH: arm64 + buildSteps: - template: .azure-pipelines/template-skipvstest.yml@buildimage - template: .azure-pipelines/template-daemon.yml@buildimage - bash: | set -ex - if [ $(GROUP_NAME) == vs ]; then + if [ $(GROUP_NAME) == pensando ]; then + make $BUILD_OPTIONS target/sonic-pensando.tar + elif [ $(GROUP_NAME) == vs ]; then if [ $(dbg_image) == yes ]; then make $BUILD_OPTIONS INSTALL_DEBUG_TOOLS=y target/sonic-vs.img.gz mv target/sonic-vs.img.gz target/sonic-vs-dbg.img.gz From 69ad1ed41a8600629453644e3fa64a2be3cef584 Mon Sep 17 00:00:00 2001 From: spilkey-cisco <110940806+spilkey-cisco@users.noreply.github.com> Date: Fri, 15 Dec 2023 15:33:20 -0800 Subject: [PATCH 25/33] Fix system-health hardware_checker to consume fan tolerance details (#16689) Why I did it Fan tolerance checking is done through new APIs, is_under_speed and is_over_speed, which populate corresponding fields into the database. speed_tolerance is no longer used and was removed, but system-health was not updated and indicates failures: ADO: 25279165 root@sonic/# show system-health summary System status summary System status LED red_blink Services: Status: OK Hardware: Status: Not OK Reasons: Failed to get speed tolerance for fantray5.fan1 Failed to get speed tolerance for fantray5.fan0 Failed to get speed tolerance for fantray4.fan1 Failed to get speed tolerance for fantray4.fan0 Failed to get speed tolerance for fantray3.fan1 Failed to get speed tolerance for fantray3.fan0 Failed to get speed tolerance for fantray2.fan1 Failed to get speed tolerance for fantray2.fan0 Failed to get speed tolerance for fantray1.fan1 Failed to get speed tolerance for fantray1.fan0 Failed to get speed tolerance for fantray0.fan1 Failed to get speed tolerance for fantray0.fan0 Failed to get speed tolerance for PSU1.fan0 Failed to get speed tolerance for PSU0.fan0 How I did it Updated hardware_checker.py in system-health to consume new is_under_speed and is_over_speed database entries instead of speed_tolerance and hard-coded calculations. How to verify it root@sonic:/# show system-health summary System status summary System status LED green Services: Status: OK Hardware: Status: OK --- .../health_checker/hardware_checker.py | 28 ++++++++++--------- src/system-health/tests/test_system_health.py | 28 +++++++++++++++---- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/src/system-health/health_checker/hardware_checker.py b/src/system-health/health_checker/hardware_checker.py index 8f7a11f55c..113fd88663 100644 --- a/src/system-health/health_checker/hardware_checker.py +++ b/src/system-health/health_checker/hardware_checker.py @@ -102,37 +102,39 @@ class HardwareChecker(HealthChecker): if not self._ignore_check(config.ignore_devices, 'fan', name, 'speed'): speed = data_dict.get('speed', None) speed_target = data_dict.get('speed_target', None) - speed_tolerance = data_dict.get('speed_tolerance', None) + is_under_speed = data_dict.get('is_under_speed', None) + is_over_speed = data_dict.get('is_over_speed', None) if not speed: self.set_object_not_ok('Fan', name, 'Failed to get actual speed data for {}'.format(name)) continue elif not speed_target: self.set_object_not_ok('Fan', name, 'Failed to get target speed date for {}'.format(name)) continue - elif not speed_tolerance: - self.set_object_not_ok('Fan', name, 'Failed to get speed tolerance for {}'.format(name)) + elif is_under_speed is None: + self.set_object_not_ok('Fan', name, 'Failed to get under speed threshold check for {}'.format(name)) + continue + elif is_over_speed is None: + self.set_object_not_ok('Fan', name, 'Failed to get over speed threshold check for {}'.format(name)) continue else: try: speed = float(speed) speed_target = float(speed_target) - speed_tolerance = float(speed_tolerance) - speed_min_th = speed_target * (1 - float(speed_tolerance) / 100) - speed_max_th = speed_target * (1 + float(speed_tolerance) / 100) - if speed < speed_min_th or speed > speed_max_th: + if 'true' in (is_under_speed.lower(), is_over_speed.lower()): self.set_object_not_ok('Fan', name, - '{} speed is out of range, speed={}, range=[{},{}]'.format(name, - speed, - speed_min_th, - speed_max_th)) + '{} speed is out of range, speed={}, target={}'.format( + name, + speed, + speed_target)) continue except ValueError: self.set_object_not_ok('Fan', name, - 'Invalid fan speed data for {}, speed={}, target={}, tolerance={}'.format( + 'Invalid fan speed data for {}, speed={}, target={}, is_under_speed={}, is_over_speed={}'.format( name, speed, speed_target, - speed_tolerance)) + is_under_speed, + is_over_speed)) continue if not self._ignore_check(config.ignore_devices, 'fan', name, 'direction'): diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index c2d7822307..67f819ecc5 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -298,7 +298,8 @@ def test_hardware_checker(): 'status': 'True', 'speed': '60', 'speed_target': '60', - 'speed_tolerance': '20', + 'is_under_speed': 'False', + 'is_over_speed': 'False', 'direction': 'intake' }, 'FAN_INFO|fan2': { @@ -306,28 +307,40 @@ def test_hardware_checker(): 'status': 'True', 'speed': '60', 'speed_target': '60', - 'speed_tolerance': '20' + 'is_under_speed': 'False', + 'is_over_speed': 'False', }, 'FAN_INFO|fan3': { 'presence': 'True', 'status': 'False', 'speed': '60', 'speed_target': '60', - 'speed_tolerance': '20' + 'is_under_speed': 'False', + 'is_over_speed': 'False', }, 'FAN_INFO|fan4': { 'presence': 'True', 'status': 'True', 'speed': '20', 'speed_target': '60', - 'speed_tolerance': '20' + 'is_under_speed': 'True', + 'is_over_speed': 'False', }, 'FAN_INFO|fan5': { + 'presence': 'True', + 'status': 'True', + 'speed': '90', + 'speed_target': '60', + 'is_under_speed': 'False', + 'is_over_speed': 'True', + }, + 'FAN_INFO|fan6': { 'presence': 'True', 'status': 'True', 'speed': '60', 'speed_target': '60', - 'speed_tolerance': '20', + 'is_under_speed': 'False', + 'is_over_speed': 'False', 'direction': 'exhaust' } }) @@ -426,7 +439,10 @@ def test_hardware_checker(): assert 'fan5' in checker._info assert checker._info['fan5'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK - assert checker._info['fan5'][HealthChecker.INFO_FIELD_OBJECT_MSG] == 'fan5 direction exhaust is not aligned with fan1 direction intake' + + assert 'fan6' in checker._info + assert checker._info['fan6'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK + assert checker._info['fan6'][HealthChecker.INFO_FIELD_OBJECT_MSG] == 'fan6 direction exhaust is not aligned with fan1 direction intake' assert 'PSU 1' in checker._info assert checker._info['PSU 1'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_OK From 728df4e89d8d58bb017041bfbf62eb01fe565be9 Mon Sep 17 00:00:00 2001 From: Yaqiang Zhu Date: Fri, 15 Dec 2023 18:47:40 -0500 Subject: [PATCH 26/33] [dhcp_relay] Optimize j2 file in dhcp_relay container (#17506) --- dockers/docker-dhcp-relay/dhcp-relay.programs.j2 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dockers/docker-dhcp-relay/dhcp-relay.programs.j2 b/dockers/docker-dhcp-relay/dhcp-relay.programs.j2 index 94f6adf765..087a734d6f 100644 --- a/dockers/docker-dhcp-relay/dhcp-relay.programs.j2 +++ b/dockers/docker-dhcp-relay/dhcp-relay.programs.j2 @@ -1,9 +1,7 @@ [group:dhcp-relay] -programs=dhcprelayd, +programs=dhcprelayd {%- set relay_for_ipv6 = { 'flag': False } %} -{%- set add_preceding_comma = { 'flag': False } %} -{% if dhcp_server_ipv4_enabled %} -{%- endif %} +{%- set add_preceding_comma = { 'flag': True } %} {% for vlan_name in VLAN_INTERFACE %} {% if DHCP_RELAY and vlan_name in DHCP_RELAY and DHCP_RELAY[vlan_name]['dhcpv6_servers']|length > 0 %} {% set _dummy = relay_for_ipv6.update({'flag': True}) %} From ad90ad9fcd72df156ff8fc51963484d6a8e64f9c Mon Sep 17 00:00:00 2001 From: jingwenxie Date: Sat, 16 Dec 2023 09:04:55 +0800 Subject: [PATCH 27/33] Update TELEMETRY_CLIENT YANG model (#16861) ### Why I did it Github issue: https://github.com/sonic-net/sonic-buildimage/issues/16356. The YANG definition breaks GCU feature. We can either update sonic_yang and GCU's search algorithm to enable the same key count case or simply update YANG model to solve the issue. The pros for update YANG model are it could solve the issue directly and we don't need to handle the complicate search algorithm in sonic_yang and GCU. This is the only YANG model that has this issue. ### How I did it Combine two list into one. The previous YANG validation unit tests are still applicable. #### How to verify it Unit test and E2E test --- src/sonic-yang-models/doc/Configuration.md | 26 ++++++++ .../tests/files/sample_config_db.json | 4 +- .../tests_config/telemetry_client.json | 62 +++++++++---------- .../yang-models/sonic-telemetry_client.yang | 23 +++---- 4 files changed, 67 insertions(+), 48 deletions(-) diff --git a/src/sonic-yang-models/doc/Configuration.md b/src/sonic-yang-models/doc/Configuration.md index 5ee894f7aa..1b78d7eb09 100644 --- a/src/sonic-yang-models/doc/Configuration.md +++ b/src/sonic-yang-models/doc/Configuration.md @@ -70,6 +70,7 @@ Table of Contents * [TC to Priority group map](#tc-to-priority-group-map) * [TC to Queue map](#tc-to-queue-map) * [Telemetry](#telemetry) + * [Telemetry client](#telemetry-client) * [Tunnel](#tunnel) * [Versions](#versions) * [VLAN](#vlan) @@ -2221,6 +2222,31 @@ and is listed in this table. } ``` +### Telemetry client + +``` +{ + "TELEMETRY_CLIENT": { + "Global": { + "encoding": "JSON_IETF", + "retry_interval": "30", + "src_ip": "30.57.185.38", + "unidirectional": "true" + }, + "DestinationGroup|HS": { + "dst_addr": "30.57.186.214:8081,30.57.185.39:8081" + }, + "Subscription|HS_RDMA": { + "dst_group": "HS", + "path_target": "COUNTERS_DB", + "paths": "COUNTERS/Ethernet*,COUNTERS_PORT_NAME_MAP", + "report_interval": "5000", + "report_type": "periodic" + } + } +} +``` + ### Tunnel This table configures the MUX tunnel for Dual-ToR setup diff --git a/src/sonic-yang-models/tests/files/sample_config_db.json b/src/sonic-yang-models/tests/files/sample_config_db.json index b0e3f5f589..30b6de917b 100644 --- a/src/sonic-yang-models/tests/files/sample_config_db.json +++ b/src/sonic-yang-models/tests/files/sample_config_db.json @@ -1235,10 +1235,10 @@ "src_ip": "30.57.185.38", "unidirectional": "true" }, - "DestinationGroup_HS": { + "DestinationGroup|HS": { "dst_addr": "30.57.186.214:8081,30.57.185.39:8081" }, - "Subscription_HS_RDMA": { + "Subscription|HS_RDMA": { "dst_group": "HS", "path_target": "COUNTERS_DB", "paths": "COUNTERS/Ethernet*,COUNTERS_PORT_NAME_MAP", diff --git a/src/sonic-yang-models/tests/yang_model_tests/tests_config/telemetry_client.json b/src/sonic-yang-models/tests/yang_model_tests/tests_config/telemetry_client.json index 5286bcba8b..91de532c8c 100644 --- a/src/sonic-yang-models/tests/yang_model_tests/tests_config/telemetry_client.json +++ b/src/sonic-yang-models/tests/yang_model_tests/tests_config/telemetry_client.json @@ -8,15 +8,15 @@ "src_ip": "30.57.185.38", "unidirectional": "true" }, - "TELEMETRY_CLIENT_DS_LIST": [ + "TELEMETRY_CLIENT_LIST": [ { - "prefix": "DestinationGroup_HS", + "prefix": "DestinationGroup", + "name": "HS", "dst_addr": "30.57.186.214:8081,30.57.185.39:8081" - } - ], - "TELEMETRY_CLIENT_SUB_LIST": [ + }, { - "prefix": "Subscription_HS_RDMA", + "prefix": "Subscription", + "name": "HS_RDMA", "dst_group": "HS", "path_target": "COUNTERS_DB", "paths": "COUNTERS/Ethernet*,COUNTERS_PORT_NAME_MAP", @@ -36,15 +36,15 @@ "src_ip": "30.57.185.38", "unidirectional": "true" }, - "TELEMETRY_CLIENT_DS_LIST": [ + "TELEMETRY_CLIENT_LIST": [ { - "prefix": "DestinationGroup_HS", + "prefix": "DestinationGroup", + "name": "HS", "dst_addr": "30.57.186.214:8081,30.57.185.39:8081" - } - ], - "TELEMETRY_CLIENT_SUB_LIST": [ + }, { - "prefix": "Subscription_HS_RDMA", + "prefix": "Subscription", + "name": "HS_RDMA", "dst_group": "FS", "path_target": "COUNTERS_DB", "paths": "COUNTERS/Ethernet*,COUNTERS_PORT_NAME_MAP", @@ -64,15 +64,15 @@ "src_ip": "30.57.185.388", "unidirectional": "true" }, - "TELEMETRY_CLIENT_DS_LIST": [ + "TELEMETRY_CLIENT_LIST": [ { - "prefix": "DestinationGroup_HS", + "prefix": "DestinationGroup", + "name": "HS", "dst_addr": "30.57.186.214:8081,30.57.185.39:8081" - } - ], - "TELEMETRY_CLIENT_SUB_LIST": [ + }, { - "prefix": "Subscription_HS_RDMA", + "prefix": "Subscription", + "name": "HS_RDMA", "dst_group": "HS", "path_target": "COUNTERS_DB", "paths": "COUNTERS/Ethernet*,COUNTERS_PORT_NAME_MAP", @@ -92,15 +92,15 @@ "src_ip": "30.57.185.38", "unidirectional": "true" }, - "TELEMETRY_CLIENT_DS_LIST": [ + "TELEMETRY_CLIENT_LIST": [ { - "prefix": "DestinationGroup_HS", + "prefix": "DestinationGroup", + "name": "HS", "dst_addr": "30.57.186.214:8081,30.57.185.39:8081" - } - ], - "TELEMETRY_CLIENT_SUB_LIST": [ + }, { - "prefix": "Subscription_HS_RDMA", + "prefix": "Subscription", + "name": "HS_RDMA", "dst_group": "HS", "path_target": "COUNTERS_DB", "paths": "COUNTERS/Ethernet*,COUNTERS_PORT_NAME_MAP", @@ -120,15 +120,15 @@ "src_ip": "30.57.185.38", "unidirectional": "true" }, - "TELEMETRY_CLIENT_DS_LIST": [ + "TELEMETRY_CLIENT_LIST": [ { - "prefix": "DestinationGroup_HS", + "prefix": "DestinationGroup", + "name": "HS", "dst_addr": "30.57.186.214:80819,30.57.185.39:8081" - } - ], - "TELEMETRY_CLIENT_SUB_LIST": [ + }, { - "prefix": "Subscription_HS_RDMA", + "prefix": "Subscription", + "name": "HS_RDMA", "dst_group": "HS", "path_target": "COUNTERS_DB", "paths": "COUNTERS/Ethernet*,COUNTERS_PORT_NAME_MAP", @@ -139,4 +139,4 @@ } } } -} \ No newline at end of file +} diff --git a/src/sonic-yang-models/yang-models/sonic-telemetry_client.yang b/src/sonic-yang-models/yang-models/sonic-telemetry_client.yang index 7b6b231031..7c4b7b37b8 100644 --- a/src/sonic-yang-models/yang-models/sonic-telemetry_client.yang +++ b/src/sonic-yang-models/yang-models/sonic-telemetry_client.yang @@ -86,33 +86,26 @@ module sonic-telemetry_client { } } - list TELEMETRY_CLIENT_DS_LIST { + list TELEMETRY_CLIENT_LIST { ordered-by user; - key "prefix"; + key "prefix name"; leaf prefix { type string { - pattern "DestinationGroup_" + ".*"; + pattern 'Subscription|DestinationGroup'; } } + leaf name { + type string; + } + leaf dst_addr { type ipv4-port; } - } - - list TELEMETRY_CLIENT_SUB_LIST { - ordered-by user; - key "prefix"; - - leaf prefix { - type string { - pattern "Subscription_" + ".*"; - } - } leaf dst_group { - must "(contains(../../TELEMETRY_CLIENT_DS_LIST/prefix, current()))"; + must "(contains(../../TELEMETRY_CLIENT_LIST/name, current()))"; type string; } From ff1c8e0c245ac667046c7c5880bac82d08f9b32b Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sat, 16 Dec 2023 15:30:37 +0800 Subject: [PATCH 28/33] [submodule] Update submodule sonic-sairedis to the latest HEAD automatically (#17534) --- src/sonic-sairedis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-sairedis b/src/sonic-sairedis index 641b7304f2..92c050ab86 160000 --- a/src/sonic-sairedis +++ b/src/sonic-sairedis @@ -1 +1 @@ -Subproject commit 641b7304f2a10762a119b4c56c1621b4a88616b9 +Subproject commit 92c050ab86aac8b95ab83dc6aa06f2886038540e From 9664c201f610572560f0eebab5d0830f88288a3a Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sat, 16 Dec 2023 15:34:43 +0800 Subject: [PATCH 29/33] [submodule] Update submodule sonic-platform-common to the latest HEAD automatically (#17532) --- src/sonic-platform-common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-platform-common b/src/sonic-platform-common index 427217bd4c..0f72932a4c 160000 --- a/src/sonic-platform-common +++ b/src/sonic-platform-common @@ -1 +1 @@ -Subproject commit 427217bd4c869915f43f4e23f877aeb09db6bc3a +Subproject commit 0f72932a4cbfdf16cccc75e72304449b4df67d16 From e0dc6def82e03e85d0094693d84c35468073f2ba Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sat, 16 Dec 2023 15:49:20 +0800 Subject: [PATCH 30/33] [submodule] Update submodule sonic-platform-daemons to the latest HEAD automatically (#17533) --- src/sonic-platform-daemons | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-platform-daemons b/src/sonic-platform-daemons index 1c9b01d123..b2b890540d 160000 --- a/src/sonic-platform-daemons +++ b/src/sonic-platform-daemons @@ -1 +1 @@ -Subproject commit 1c9b01d12393f25db7b258f58d10ea77c3f5b682 +Subproject commit b2b890540d291d76b8763c1f6c22b3899385ff76 From 37a9c25cfb625df84a386f842c565e21ca2b79fe Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sat, 16 Dec 2023 15:51:42 +0800 Subject: [PATCH 31/33] [submodule] Update submodule sonic-swss to the latest HEAD automatically (#17535) --- src/sonic-swss | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-swss b/src/sonic-swss index ff524e6dc8..194daa04e0 160000 --- a/src/sonic-swss +++ b/src/sonic-swss @@ -1 +1 @@ -Subproject commit ff524e6dc828a8dca2f4bf6fccabc819e77710f0 +Subproject commit 194daa04e05696afdb1f44765a57f6803ddc1c4c From d8a1ffbaced4321e0dc7842af712b1e4083192f4 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Sun, 17 Dec 2023 14:02:47 +0800 Subject: [PATCH 32/33] [Mellanox] implement sfp.reset for CMIS management (#16862) - Why I did it For CMIS host management module, we need a different implementation for sfp.reset. This PR is to implement it - How I did it For SW control modules, do reset from hw_reset For FW control modules, do reset as the original way - How to verify it Manual test sonic-mgmt platform test --- .../mlnx-platform-api/sonic_platform/sfp.py | 72 +++++++++++++++---- .../mlnx-platform-api/sonic_platform/utils.py | 6 +- .../mlnx-platform-api/tests/test_sfp.py | 21 ++++-- 3 files changed, 77 insertions(+), 22 deletions(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index c5bcfddaf1..d03c0fe10e 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -26,6 +26,7 @@ try: import ctypes import subprocess import os + import threading from sonic_py_common.logger import Logger from sonic_py_common.general import check_output_pipe from . import utils @@ -219,6 +220,9 @@ class SdkHandleContext(object): deinitialize_sdk_handle(self.sdk_handle) class NvidiaSFPCommon(SfpOptoeBase): + sfp_index_to_logical_port_dict = {} + sfp_index_to_logical_lock = threading.Lock() + def __init__(self, sfp_index): super(NvidiaSFPCommon, self).__init__() self.index = sfp_index + 1 @@ -247,7 +251,31 @@ class NvidiaSFPCommon(SfpOptoeBase): error_type = utils.read_int_from_file(status_error_file_path) return oper_state, error_type - + + @classmethod + def get_sfp_index_to_logical_port(cls, force=False): + if not cls.sfp_index_to_logical_port_dict or force: + config_db = utils.DbUtils.get_db_instance('CONFIG_DB') + port_data = config_db.get_table('PORT') + for key, data in port_data.items(): + if data['index'] not in cls.sfp_index_to_logical_port_dict: + cls.sfp_index_to_logical_port_dict[int(data['index']) - 1] = key + + @classmethod + def get_logical_port_by_sfp_index(cls, sfp_index): + with cls.sfp_index_to_logical_lock: + cls.get_sfp_index_to_logical_port() + logical_port_name = cls.sfp_index_to_logical_port_dict.get(sfp_index) + if not logical_port_name: + cls.get_sfp_index_to_logical_port(force=True) + else: + config_db = utils.DbUtils.get_db_instance('CONFIG_DB') + current_index = int(config_db.get('CONFIG_DB', f'PORT|{logical_port_name}', 'index')) + if current_index != sfp_index: + cls.get_sfp_index_to_logical_port(force=True) + logical_port_name = cls.sfp_index_to_logical_port_dict.get(sfp_index) + return logical_port_name + class SFP(NvidiaSFPCommon): """Platform-specific SFP class""" @@ -299,6 +327,17 @@ class SFP(NvidiaSFPCommon): Returns: bool: True if device is present, False if not """ + if DeviceDataManager.is_independent_mode(): + if utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control') != 0: + if not utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/hw_present'): + return False + if not utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/power_good'): + return False + if not utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/power_on'): + return False + if utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/hw_reset') == 1: + return False + eeprom_raw = self._read_eeprom(0, 1, log_on_error=False) return eeprom_raw is not None @@ -455,8 +494,17 @@ class SFP(NvidiaSFPCommon): refer plugins/sfpreset.py """ - file_path = SFP_SDK_MODULE_SYSFS_ROOT_TEMPLATE.format(self.sdk_index) + SFP_SYSFS_RESET - return utils.write_file(file_path, '1') + try: + if not self.is_sw_control(): + file_path = SFP_SDK_MODULE_SYSFS_ROOT_TEMPLATE.format(self.sdk_index) + SFP_SYSFS_RESET + return utils.write_file(file_path, '1') + else: + file_path = SFP_SDK_MODULE_SYSFS_ROOT_TEMPLATE.format(self.sdk_index) + SFP_SYSFS_HWRESET + return utils.write_file(file_path, '0') and utils.write_file(file_path, '1') + except Exception as e: + print(f'Failed to reset module - {e}') + logger.log_error(f'Failed to reset module - {e}') + return False @classmethod @@ -918,15 +966,15 @@ class SFP(NvidiaSFPCommon): return False db = utils.DbUtils.get_db_instance('STATE_DB') - control_type = db.get('STATE_DB', f'TRANSCEIVER_MODULES_MGMT|{self.sdk_index}', 'control_type') - control_file_value = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control') - - if control_type == 'SW_CONTROL' and control_file_value == 1: - return True - elif control_type == 'FW_CONTROL' and control_file_value == 0: - return False - else: - raise Exception(f'Module {self.sdk_index} is in initialization, please retry later') + logical_port = NvidiaSFPCommon.get_logical_port_by_sfp_index(self.sdk_index) + if not logical_port: + raise Exception(f'Module {self.sdk_index} is not present or in initialization') + + initialized = db.exists('STATE_DB', f'TRANSCEIVER_STATUS|{logical_port}') + if not initialized: + raise Exception(f'Module {self.sdk_index} is not present or in initialization') + + return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control') == 1 class RJ45Port(NvidiaSFPCommon): diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py index 1135903c24..a7354ac7b8 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py @@ -381,9 +381,9 @@ class DbUtils: cls.db_instances.data = {} if db_name not in cls.db_instances.data: - from swsscommon.swsscommon import SonicV2Connector - db = SonicV2Connector(use_unix_socket_path=True) - db.connect(db_name) + from swsscommon.swsscommon import ConfigDBConnector + db = ConfigDBConnector(use_unix_socket_path=True) + db.db_connect(db_name) cls.db_instances.data[db_name] = db return cls.db_instances.data[db_name] except Exception as e: diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index dccc727bfe..d273e9bce7 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -266,9 +266,14 @@ class TestSfp: @mock.patch('sonic_platform.utils.write_file') def test_reset(self, mock_write): sfp = SFP(0) + sfp.is_sw_control = mock.MagicMock(return_value=False) mock_write.return_value = True assert sfp.reset() mock_write.assert_called_with('/sys/module/sx_core/asic0/module0/reset', '1') + sfp.is_sw_control.return_value = True + assert sfp.reset() + sfp.is_sw_control.side_effect = Exception('') + assert not sfp.reset() @mock.patch('sonic_platform.sfp.SFP.read_eeprom') def test_get_xcvr_api(self, mock_read): @@ -332,30 +337,32 @@ class TestSfp: assert sfp.get_temperature_warning_threashold() == 75.0 assert sfp.get_temperature_critical_threashold() == 85.0 + @mock.patch('sonic_platform.sfp.NvidiaSFPCommon.get_logical_port_by_sfp_index') @mock.patch('sonic_platform.utils.read_int_from_file') @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') @mock.patch('sonic_platform.utils.DbUtils.get_db_instance') - def test_is_sw_control(self, mock_get_db, mock_mode, mock_read): + def test_is_sw_control(self, mock_get_db, mock_mode, mock_read, mock_get_logical): sfp = SFP(0) mock_mode.return_value = False assert not sfp.is_sw_control() mock_mode.return_value = True + + mock_get_logical.return_value = None + with pytest.raises(Exception): + sfp.is_sw_control() + mock_get_logical.return_value = 'Ethernet0' mock_db = mock.MagicMock() mock_get_db.return_value = mock_db - mock_db.get = mock.MagicMock(return_value=None) + mock_db.exists = mock.MagicMock(return_value=False) with pytest.raises(Exception): sfp.is_sw_control() + mock_db.exists.return_value = True mock_read.return_value = 0 - mock_db.get.return_value = 'FW_CONTROL' assert not sfp.is_sw_control() mock_read.return_value = 1 - mock_db.get.return_value = 'SW_CONTROL' assert sfp.is_sw_control() - mock_read.return_value = 0 - with pytest.raises(Exception): - sfp.is_sw_control() @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode', mock.MagicMock(return_value=False)) @mock.patch('sonic_platform.sfp.SFP.is_sw_control', mock.MagicMock(return_value=False)) From 2ba3c90ad4ac5c03299b86795418702bba26900d Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sun, 17 Dec 2023 15:46:25 +0800 Subject: [PATCH 33/33] [submodule] Update submodule sonic-utilities to the latest HEAD automatically (#17516) --- src/sonic-utilities | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-utilities b/src/sonic-utilities index 377e5812f6..19ea849338 160000 --- a/src/sonic-utilities +++ b/src/sonic-utilities @@ -1 +1 @@ -Subproject commit 377e5812f66d4adba09221028191d459775d9a25 +Subproject commit 19ea8493388536921ad204833157dac2cd2bf3ba