sonic-buildimage/platform/mellanox/mlnx-fw-upgrade.j2
Stephen Sun b5e8c16134
[Mellanox] Enhance FW upgrade mechanism (#16090)
### Why I did it

1. Enhance the diagnosis information collecting mechanism
   - If the option `-v` is fed, it will pass additional diagnosis flags to mlxfwmanager
   - Collect all the output from mlxfwmanager and print them to syslog if it fails
2. Abort syncd in case waiting for device or upgrading firmware fails

Signed-off-by: Stephen Sun <stephens@nvidia.com>

### How I did it

#### How to verify it

Regression and manual test
2023-09-04 11:28:53 -07:00

355 lines
9.9 KiB
Django/Jinja
Executable File

{#-
Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES.
Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-#}
#!/bin/bash
declare -r SCRIPT_NAME="$(basename "$0")"
declare -r SCRIPT_PATH="$(readlink -f "$0")"
declare -r SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
declare -r YES_PARAM="yes"
declare -r NO_PARAM="no"
declare -r VERBOSE_ERROR="1"
declare -r VERBOSE_WARNING="2"
declare -r VERBOSE_NOTICE="3"
declare -r VERBOSE_INFO="4"
declare -r VERBOSE_MAX="${VERBOSE_INFO}"
declare -r VERBOSE_MIN="${VERBOSE_ERROR}"
declare -r EXIT_SUCCESS="0"
declare -r EXIT_FAILURE="1"
declare -r QUERY_CMD="mlxfwmanager --query"
declare -r LIST_CONTENT_CMD="mlxfwmanager --list-content"
declare -r BURN_CMD="mlxfwmanager -u -f -y"
declare -r QUERY_FILE="/tmp/mlxfwmanager-query.log"
declare -r LIST_CONTENT_FILE="/tmp/mlxfwmanager-list-content.log"
declare -r SPC1_ASIC="spc1"
declare -r SPC2_ASIC="spc2"
declare -r SPC3_ASIC="spc3"
declare -r SPC4_ASIC="spc4"
declare -r UNKN_ASIC="unknown"
declare -r UNKN_MST="unknown"
declare -rA FW_FILE_MAP=( \
[$SPC1_ASIC]="fw-SPC.mfa" \
[$SPC2_ASIC]="fw-SPC2.mfa" \
[$SPC3_ASIC]="fw-SPC3.mfa" \
[$SPC4_ASIC]="fw-SPC4.mfa" \
)
IMAGE_UPGRADE="${NO_PARAM}"
SYSLOG_LOGGER="${NO_PARAM}"
VERBOSE_LEVEL="${VERBOSE_MIN}"
MFT_DIAGNOSIS_FLAGS=""
function PrintHelp() {
echo
echo "Usage: ./${SCRIPT_NAME} [OPTIONS]"
echo
echo "OPTIONS:"
echo " -u, --upgrade Upgrade ASIC firmware using next boot image (useful after SONiC-To-SONiC update)"
echo " -s, --syslog Use syslog logger (enabled when -u|--upgrade)"
echo " -v, --verbose Verbose mode (enabled when -u|--upgrade)"
echo " -h, --help Print help"
echo
echo "Examples:"
echo " ./${SCRIPT_NAME} --verbose"
echo " ./${SCRIPT_NAME} --upgrade"
echo " ./${SCRIPT_NAME} --help"
echo
}
function ParseArguments() {
while [ "$#" -ge "1" ]; do
case "$1" in
-u|--upgrade)
IMAGE_UPGRADE="${YES_PARAM}"
SYSLOG_LOGGER="${YES_PARAM}"
;;
-v|--verbose)
VERBOSE_LEVEL="${VERBOSE_MAX}"
MFT_DIAGNOSIS_FLAGS="FLASH_ACCESS_DEBUG=1 FW_COMPS_DEBUG=1"
;;
-s|--syslog)
SYSLOG_LOGGER="${YES_PARAM}"
;;
-h|--help)
PrintHelp
exit "${EXIT_SUCCESS}"
;;
esac
shift
done
}
function LogError() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_ERROR}" ]]; then
echo "ERROR: $*"
logger -p "ERROR" -t "${SCRIPT_NAME}" "$*"
fi
if [[ "${SYSLOG_LOGGER}" = "${YES_PARAM}" ]]; then
logger -p "ERROR" -t "${SCRIPT_NAME}" "$*"
fi
}
function LogWarning() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_WARNING}" ]]; then
echo "WARNING: $*"
fi
if [[ "${SYSLOG_LOGGER}" = "${YES_PARAM}" ]]; then
logger -p "WARNING" -t "${SCRIPT_NAME}" "$*"
fi
}
function LogNotice() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_NOTICE}" ]]; then
echo "NOTICE: $*"
fi
if [[ "${SYSLOG_LOGGER}" = "${YES_PARAM}" ]]; then
logger -p "NOTICE" -t "${SCRIPT_NAME}" "$*"
fi
}
function LogInfo() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_INFO}" ]]; then
echo "INFO: $*"
fi
if [[ "${SYSLOG_LOGGER}" = "${YES_PARAM}" ]]; then
logger -p "INFO" -t "${SCRIPT_NAME}" "$*"
fi
}
function ExitFailure() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_ERROR}" ]]; then
echo
LogError "$@"
echo
fi
exit "${EXIT_FAILURE}"
}
function ExitSuccess() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_INFO}" ]]; then
echo
LogInfo "$@"
echo
fi
exit "${EXIT_SUCCESS}"
}
function WaitForDevice() {
local -i QUERY_RETRY_COUNT_MAX="10"
local -i QUERY_RETRY_COUNT="0"
${QUERY_CMD} > /dev/null
while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("$?" -ne "${EXIT_SUCCESS}") ]]; do
sleep 1s
((QUERY_RETRY_COUNT++))
output=$(eval ${MFT_DIAGNOSIS_FLAGS} ${QUERY_CMD}) > /dev/null
done
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
# Exit failure and print the detailed information
echo "$output"
failure_msg="${output#*Fail : }"
ExitFailure "FW Query command: ${QUERY_CMD} failed to wait for device with error: ${failure_msg}"
fi
}
function GetAsicType() {
local -r VENDOR_ID="15b3"
local -r SPC1_PRODUCT_ID="cb84"
local -r SPC2_PRODUCT_ID="cf6c"
local -r SPC3_PRODUCT_ID="cf70"
local -r SPC4_PRODUCT_ID="cf80"
if lspci -n | grep "${VENDOR_ID}:${SPC1_PRODUCT_ID}" &>/dev/null; then
echo "${SPC1_ASIC}"
exit "${EXIT_SUCCESS}"
elif lspci -n | grep "${VENDOR_ID}:${SPC2_PRODUCT_ID}" &>/dev/null; then
echo "${SPC2_ASIC}"
exit "${EXIT_SUCCESS}"
elif lspci -n | grep "${VENDOR_ID}:${SPC3_PRODUCT_ID}" &>/dev/null; then
echo "${SPC3_ASIC}"
exit "${EXIT_SUCCESS}"
elif lspci -n | grep "${VENDOR_ID}:${SPC4_PRODUCT_ID}" &>/dev/null; then
echo "${SPC4_ASIC}"
exit "${EXIT_SUCCESS}"
fi
echo "${UNKN_ASIC}"
exit "${EXIT_FAILURE}"
}
function GetMstDevice() {
local _MST_DEVICE="$(ls /dev/mst/*_pci_cr0 2>&1)"
if [[ ! -c "${_MST_DEVICE}" ]]; then
echo "${UNKN_MST}"
else
echo "${_MST_DEVICE}"
fi
exit "${EXIT_SUCCESS}"
}
function RunCmd() {
local ERROR_CODE="${EXIT_SUCCESS}"
if [[ "${VERBOSE_LEVEL}" -eq "${VERBOSE_MAX}" ]]; then
eval "$@"
else
eval "$@" &>/dev/null
fi
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
ExitFailure "command failed: $@"
fi
}
function RunFwUpdateCmd() {
local ERROR_CODE="${EXIT_SUCCESS}"
local COMMAND="${MFT_DIAGNOSIS_FLAGS} ${BURN_CMD} $@"
if [[ "${VERBOSE_LEVEL}" -eq "${VERBOSE_MAX}" ]]; then
output=$(eval "${COMMAND}")
else
output=$(eval "${COMMAND}") >/dev/null 2>&1
fi
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
echo "${output}"
failure_msg="${output#*Fail : }"
ExitFailure "FW Update command: ${COMMAND} failed with error: ${failure_msg}"
fi
}
function UpgradeFW() {
local -r _FW_BIN_PATH="$1"
local -r _ASIC_TYPE="$(GetAsicType)"
if [[ "${_ASIC_TYPE}" = "${UNKN_ASIC}" ]]; then
ExitFailure "failed to detect ASIC type"
fi
if [ ! -z "${_FW_BIN_PATH}" ]; then
local -r _FW_FILE="${_FW_BIN_PATH}/${FW_FILE_MAP[$_ASIC_TYPE]}"
else
local -r _FW_FILE="/etc/mlnx/${FW_FILE_MAP[$_ASIC_TYPE]}"
fi
if [ ! -f "${_FW_FILE}" ]; then
ExitFailure "no such file: ${_FW_FILE}"
fi
RunCmd "${QUERY_CMD} -o ${QUERY_FILE}"
local -r _FW_CURRENT_INFO="$(grep FW ${QUERY_FILE})"
local -r _FW_CURRENT="$(echo ${_FW_CURRENT_INFO} | cut -f2 -d' ')"
local -r _PSID_INFO="$(grep PSID ${QUERY_FILE})"
local -r _PSID="$(echo ${_PSID_INFO} | cut -f2 -d' ')"
RunCmd "${LIST_CONTENT_CMD} -i ${_FW_FILE} -o ${LIST_CONTENT_FILE}"
local -r _FW_AVAILABLE_INFO="$(grep ${_PSID} ${LIST_CONTENT_FILE})"
local -r _FW_AVAILABLE="$(echo ${_FW_AVAILABLE_INFO} | cut -f4 -d' ')"
if [[ -z "${_FW_CURRENT}" ]]; then
ExitFailure "could not retreive current FW version"
fi
if [[ -z "${_FW_AVAILABLE}" ]]; then
ExitFailure "could not retreive available FW version"
fi
if [[ "${_FW_CURRENT}" == "${_FW_AVAILABLE}" ]]; then
ExitSuccess "firmware is up to date"
else
LogNotice "firmware upgrade is required. Installing compatible version..."
local -r _MST_DEVICE="$(GetMstDevice)"
if [[ "${_MST_DEVICE}" = "${UNKN_MST}" ]]; then
LogWarning "could not find fastest mst device, using default device"
RunFwUpdateCmd "-i ${_FW_FILE}"
else
RunFwUpdateCmd "-d ${_MST_DEVICE} -i ${_FW_FILE}"
fi
fi
}
function UpgradeFWFromImage() {
local -r _NEXT_SONIC_IMAGE="$(sonic-installer list | grep "Next: " | cut -f2 -d' ')"
local -r _CURRENT_SONIC_IMAGE="$(sonic-installer list | grep "Current: " | cut -f2 -d' ')"
if [[ "${_CURRENT_SONIC_IMAGE}" == "${_NEXT_SONIC_IMAGE}" ]]; then
ExitSuccess "firmware is up to date"
fi
# /host/image-<version>/platform/fw/asic is now the new location for FW binaries.
# Prefere this path and if it does not exist use squashfs as a fallback.
local -r _PLATFORM_FW_BIN_PATH="/host/image-${_NEXT_SONIC_IMAGE#SONiC-OS-}/platform/fw/asic/"
if [[ -d "${_PLATFORM_FW_BIN_PATH}" ]]; then
LogInfo "Using FW binaries from ${_PLATFORM_FW_BIN_PATH}"
UpgradeFW "${_PLATFORM_FW_BIN_PATH}"
else
local -r _FS_PATH="/host/image-${_NEXT_SONIC_IMAGE#SONiC-OS-}/fs.squashfs"
local -r _FS_MOUNTPOINT="/tmp/image-${_NEXT_SONIC_IMAGE#SONiC-OS-}-fs"
local -r _FW_BIN_PATH="${_FS_MOUNTPOINT}/etc/mlnx/"
LogInfo "Using FW binaries from ${_FW_BIN_PATH}"
mkdir -p "${_FS_MOUNTPOINT}"
mount -t squashfs "${_FS_PATH}" "${_FS_MOUNTPOINT}"
UpgradeFW "${_FW_BIN_PATH}"
umount -rf "${_FS_MOUNTPOINT}"
rm -rf "${_FS_MOUNTPOINT}"
fi
}
function ExitIfQEMU() {
if [ -n "$(lspci -vvv | grep SimX)" ]; then
ExitSuccess "No FW upgrade for SimX platform"
fi
}
ParseArguments "$@"
ExitIfQEMU
WaitForDevice
if [ "${IMAGE_UPGRADE}" != "${YES_PARAM}" ]; then
UpgradeFW
else
UpgradeFWFromImage
fi
ExitSuccess "firmware upgrade is completed"