[nvidia] make sure shared storage with syncd is cleared on restarts (#14547) (#16046)

Why I did it
Sharing the storage of syncd with other proprietary application extensions allows them to communicate with syncd in differnt ways.
If one container wants to pass some information to syncd then shared storage can be used. However, today the shared storage isn't cleaned on restarts making it possible for syncd to read out-of-date information generated in the past.

NOTE: No plans to use it for standard SONIC dockers and we are working on removing the SDK dependency from PMON docker

How I did it
Implemented new service to clean the shared storage.

How to verify it
Do reboot/fast-reboot/warm-reboot/config-reload/systemctl restart swss and verify /tmp/ is cleaned after each restart in syncd container.

Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
Co-authored-by: Stepan Blyshchak <38952541+stepanblyschak@users.noreply.github.com>
This commit is contained in:
mssonicbld 2023-08-07 09:27:43 +08:00 committed by GitHub
parent 471a3a8067
commit 33a10b479a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 32 additions and 6 deletions

View File

@ -157,9 +157,9 @@ function waitForAllInstanceDatabaseConfigJsonFilesReady()
done
fi
# Delay a second to allow all instance database_config.json files to be completely generated and fully accessible.
# This delay is needed to make sure that the database_config.json files are correctly rendered from j2 template
# This delay is needed to make sure that the database_config.json files are correctly rendered from j2 template
# files ( renderning takes some time )
sleep 1
sleep 1
fi
}
{%- endif %}
@ -240,7 +240,7 @@ function postStartAction()
mv $WARM_DIR/dump.rdb $WARM_DIR/dump.rdb.old
else
# If there is a config_db.json dump file, load it.
if [ -r /etc/sonic/config_db$DEV.json ]; then
if [ -r /etc/sonic/config_db$DEV.json ]; then
if [ -r /etc/sonic/init_cfg.json ]; then
$SONIC_CFGGEN -j /etc/sonic/init_cfg.json -j /etc/sonic/config_db$DEV.json --write-to-db
@ -323,7 +323,7 @@ start() {
{%- if sonic_asic_platform == "broadcom" %}
{%- if docker_container_name == "syncd" %}
# Set the SYNCD_SHM_SIZE if this variable not defined
# Set the SYNCD_SHM_SIZE if this variable not defined
BRCM_PLATFORM_COMMON_DIR=/usr/share/sonic/device/x86_64-broadcom_common
SYNCD_SHM_INI=$BRCM_PLATFORM_COMMON_DIR/syncd_shm.ini
@ -545,14 +545,15 @@ start() {
{%- if docker_container_name == "syncd" %}
-v /var/log/mellanox:/var/log/mellanox:rw \
-v mlnx_sdk_socket:/var/run/sx_sdk \
-v mlnx_sdk_ready:/tmp \
-v /tmp/nv-syncd-shared/:/tmp \
-v /dev/shm:/dev/shm:rw \
-v /var/log/sai_failure_dump:/var/log/sai_failure_dump:rw \
-e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \
{%- elif docker_container_name == "pmon" %}
-v /var/run/hw-management:/var/run/hw-management:rw \
-v mlnx_sdk_socket:/var/run/sx_sdk \
-v mlnx_sdk_ready:/tmp \
-v /tmp/nv-syncd-shared/:/tmp \
-v /dev/shm:/dev/shm:rw \
-e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \
-v /dev/shm:/dev/shm:rw \
{%- else %}

View File

@ -19,6 +19,10 @@ After=updategraph.service
BindsTo=sonic.target
After=sonic.target
Before=ntp-config.service
{% if sonic_asic_platform == 'mellanox' %}
Requires=nv-syncd-shared.service
After=nv-syncd-shared.service
{% endif %}
[Service]
User=root

View File

@ -931,6 +931,10 @@ MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME=$(basename {{mlnx_platform_api_py3_wheel_path
sudo cp {{mlnx_platform_api_py3_wheel_path}} $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip3 install $MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
sudo rm -rf $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
# Install service that manages Nvidia specific shared storage
sudo cp platform/mellanox/nv-syncd-shared/nv-syncd-shared.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM/
sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable nv-syncd-shared
{% endif %}
{%- if SONIC_ROUTING_STACK == "frr" %}

View File

@ -0,0 +1,17 @@
[Unit]
Description=Manage Nvidia specific syncd shared volume
Requires=docker.service
After=docker.service
BindsTo=sonic.target
After=sonic.target
PartOf=syncd.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=rm -rf /tmp/nv-syncd-shared/
ExecStart=mkdir -p /tmp/nv-syncd-shared/
[Install]
WantedBy=sonic.target