[nvidia] make sure shared storage with syncd is cleared on restarts (#14547)

Why I did it
Sharing the storage of syncd with other proprietary application extensions allows them to communicate with syncd in differnt ways.
If one container wants to pass some information to syncd then shared storage can be used. However, today the shared storage isn't cleaned on restarts making it possible for syncd to read out-of-date information generated in the past.

NOTE: No plans to use it for standard SONIC dockers and we are working on removing the SDK dependency from PMON docker

How I did it
Implemented new service to clean the shared storage.

How to verify it
Do reboot/fast-reboot/warm-reboot/config-reload/systemctl restart swss and verify /tmp/ is cleaned after each restart in syncd container.

Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
This commit is contained in:
Stepan Blyshchak 2023-06-29 01:26:49 +03:00 committed by GitHub
parent a06ffc9f0c
commit 1ebdcda9e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 32 additions and 6 deletions

View File

@ -157,9 +157,9 @@ function waitForAllInstanceDatabaseConfigJsonFilesReady()
done done
fi fi
# Delay a second to allow all instance database_config.json files to be completely generated and fully accessible. # Delay a second to allow all instance database_config.json files to be completely generated and fully accessible.
# This delay is needed to make sure that the database_config.json files are correctly rendered from j2 template # This delay is needed to make sure that the database_config.json files are correctly rendered from j2 template
# files ( renderning takes some time ) # files ( renderning takes some time )
sleep 1 sleep 1
fi fi
} }
{%- endif %} {%- endif %}
@ -240,7 +240,7 @@ function postStartAction()
mv $WARM_DIR/dump.rdb $WARM_DIR/dump.rdb.old mv $WARM_DIR/dump.rdb $WARM_DIR/dump.rdb.old
else else
# If there is a config_db.json dump file, load it. # If there is a config_db.json dump file, load it.
if [ -r /etc/sonic/config_db$DEV.json ]; then if [ -r /etc/sonic/config_db$DEV.json ]; then
if [ -r /etc/sonic/init_cfg.json ]; then if [ -r /etc/sonic/init_cfg.json ]; then
$SONIC_CFGGEN -j /etc/sonic/init_cfg.json -j /etc/sonic/config_db$DEV.json --write-to-db $SONIC_CFGGEN -j /etc/sonic/init_cfg.json -j /etc/sonic/config_db$DEV.json --write-to-db
@ -323,7 +323,7 @@ start() {
{%- if sonic_asic_platform == "broadcom" %} {%- if sonic_asic_platform == "broadcom" %}
{%- if docker_container_name == "syncd" %} {%- if docker_container_name == "syncd" %}
# Set the SYNCD_SHM_SIZE if this variable not defined # Set the SYNCD_SHM_SIZE if this variable not defined
BRCM_PLATFORM_COMMON_DIR=/usr/share/sonic/device/x86_64-broadcom_common BRCM_PLATFORM_COMMON_DIR=/usr/share/sonic/device/x86_64-broadcom_common
SYNCD_SHM_INI=$BRCM_PLATFORM_COMMON_DIR/syncd_shm.ini SYNCD_SHM_INI=$BRCM_PLATFORM_COMMON_DIR/syncd_shm.ini
@ -545,14 +545,15 @@ start() {
{%- if docker_container_name == "syncd" %} {%- if docker_container_name == "syncd" %}
-v /var/log/mellanox:/var/log/mellanox:rw \ -v /var/log/mellanox:/var/log/mellanox:rw \
-v mlnx_sdk_socket:/var/run/sx_sdk \ -v mlnx_sdk_socket:/var/run/sx_sdk \
-v mlnx_sdk_ready:/tmp \ -v /tmp/nv-syncd-shared/:/tmp \
-v /dev/shm:/dev/shm:rw \ -v /dev/shm:/dev/shm:rw \
-v /var/log/sai_failure_dump:/var/log/sai_failure_dump:rw \ -v /var/log/sai_failure_dump:/var/log/sai_failure_dump:rw \
-e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \ -e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \
{%- elif docker_container_name == "pmon" %} {%- elif docker_container_name == "pmon" %}
-v /var/run/hw-management:/var/run/hw-management:rw \ -v /var/run/hw-management:/var/run/hw-management:rw \
-v mlnx_sdk_socket:/var/run/sx_sdk \ -v mlnx_sdk_socket:/var/run/sx_sdk \
-v mlnx_sdk_ready:/tmp \ -v /tmp/nv-syncd-shared/:/tmp \
-v /dev/shm:/dev/shm:rw \
-e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \ -e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \
-v /dev/shm:/dev/shm:rw \ -v /dev/shm:/dev/shm:rw \
{%- else %} {%- else %}

View File

@ -19,6 +19,10 @@ After=updategraph.service
BindsTo=sonic.target BindsTo=sonic.target
After=sonic.target After=sonic.target
Before=ntp-config.service Before=ntp-config.service
{% if sonic_asic_platform == 'mellanox' %}
Requires=nv-syncd-shared.service
After=nv-syncd-shared.service
{% endif %}
[Service] [Service]
User=root User=root

View File

@ -946,6 +946,10 @@ MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME=$(basename {{mlnx_platform_api_py3_wheel_path
sudo cp {{mlnx_platform_api_py3_wheel_path}} $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME sudo cp {{mlnx_platform_api_py3_wheel_path}} $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip3 install $MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip3 install $MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
sudo rm -rf $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME sudo rm -rf $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
# Install service that manages Nvidia specific shared storage
sudo cp platform/mellanox/nv-syncd-shared/nv-syncd-shared.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM/
sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable nv-syncd-shared
{% endif %} {% endif %}
{%- if SONIC_ROUTING_STACK == "frr" %} {%- if SONIC_ROUTING_STACK == "frr" %}

View File

@ -0,0 +1,17 @@
[Unit]
Description=Manage Nvidia specific syncd shared volume
Requires=docker.service
After=docker.service
BindsTo=sonic.target
After=sonic.target
PartOf=syncd.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=rm -rf /tmp/nv-syncd-shared/
ExecStart=mkdir -p /tmp/nv-syncd-shared/
[Install]
WantedBy=sonic.target