From 1ff0c0b685b884c00b30c7298f8dbbd6cfc3dcd5 Mon Sep 17 00:00:00 2001 From: Sudharsan Dhamal Gopalarathnam Date: Sun, 5 Feb 2023 06:45:49 -0800 Subject: [PATCH] [Mellanox][sai_failure_dump]Added platform specific script to be invoked during SAI failure dump (#13533) - Why I did it Added platform specific script to be invoked during SAI failure dump. Added some generic changes to mount /var/log/sai_failure_dump as read write in the syncd docker - How I did it Added script in docker-syncd of mellanox and copied it to /usr/bin - How to verify it Manual UT and new sonic-mgmt tests --- files/build_templates/docker_image_ctl.j2 | 1 + .../mellanox/docker-syncd-mlnx/Dockerfile.j2 | 1 + .../docker-syncd-mlnx/platform_syncd_dump.sh | 22 +++++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100755 platform/mellanox/docker-syncd-mlnx/platform_syncd_dump.sh diff --git a/files/build_templates/docker_image_ctl.j2 b/files/build_templates/docker_image_ctl.j2 index 3017565e30..2c7d8a7d6d 100644 --- a/files/build_templates/docker_image_ctl.j2 +++ b/files/build_templates/docker_image_ctl.j2 @@ -526,6 +526,7 @@ start() { -v mlnx_sdk_socket:/var/run/sx_sdk \ -v mlnx_sdk_ready:/tmp \ -v /dev/shm:/dev/shm:rw \ + -v /var/log/sai_failure_dump:/var/log/sai_failure_dump:rw \ -e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \ {%- elif docker_container_name == "pmon" %} -v /var/run/hw-management:/var/run/hw-management:rw \ diff --git a/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 b/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 index a23baaec23..3d68ba4bc9 100755 --- a/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 +++ b/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 @@ -64,6 +64,7 @@ RUN apt-get clean -y && \ COPY ["supervisord.conf.j2", "/usr/share/sonic/templates/"] COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] COPY ["critical_processes", "/etc/supervisor/"] +COPY ["platform_syncd_dump.sh", "/usr/bin/"] RUN mkdir -p /etc/supervisor/conf.d/ RUN sonic-cfggen -a "{\"ENABLE_ASAN\":\"{{ENABLE_ASAN}}\"}" -t /usr/share/sonic/templates/supervisord.conf.j2 > /etc/supervisor/conf.d/supervisord.conf diff --git a/platform/mellanox/docker-syncd-mlnx/platform_syncd_dump.sh b/platform/mellanox/docker-syncd-mlnx/platform_syncd_dump.sh new file mode 100755 index 0000000000..f5dc28647f --- /dev/null +++ b/platform/mellanox/docker-syncd-mlnx/platform_syncd_dump.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# +# Script for sai failure dump +# + +# Source the platform specific dump file + +sai_dump_name="sai_sdk_dump_$(date +"%m_%d_%Y_%I_%M_%p")" +sai_dump_path="${DUMPDIR}/$sai_dump_name" +mkdir -p $sai_dump_path +sai_dump_file="${sai_dump_path}/$sai_dump_name" +saisdkdump -f $sai_dump_file +cd "${DUMPDIR}" +tar -cvf "$sai_dump_name".tar $sai_dump_name +gzip "$sai_dump_name".tar +rm -rf $sai_dump_name + +# Update max failure dumps +if grep -q SAI_DUMP_STORE_AMOUNT /usr/share/sonic/hwsku/sai.profile; then + SAI_MAX_FAILURE_DUMPS=$(grep SAI_DUMP_STORE_AMOUNT /usr/share/sonic/hwsku/sai.profile | cut -d '=' -f2) +fi +