sonic-buildimage/platform/mellanox/mlnx-fw-upgrade.j2
Volodymyr Samotiy 672781e24a
[mlnx-fw-upgrade] Add FW reactivation in case 2 FW upgrades were done without reboot (#17092)
- Why I did it
In order to activate FW after it was upgraded need to perform reboot.
If reboot wasn't performed and user need to upgrade to another SONiC image then it will fail.
The reason for that is that during SONiC upgrade new FW should be installed but it will fail because previously installed FW wasn't activated.
In order to allow 2nd FW upgrade without reboot in-between need to reactivate FW image.
This change handles such flow.

Example of issue scenario:

User installed SONiC image on the switch
Then for some reason FW was upgraded by user or script but reboot was not performed to activate it.
After that upgrade to new SONiC image will fail because new image need to install FW but it fails due to previous one wasn't activated.

- How I did it
In "mlnx-fw-upgrade" script check if FW upgrade failed with the error that FW was already installed but reboot was not performed.
If so then perform FW image reactivation and try to upgrade FW again.

- How to verify it
Install SONiC image on the switch
Then upgrade FW but don't perform reboot.
After that upgrade to new SONiC image and check that upgrade was successfull.

Signed-off-by: Volodymyr Samotiy <volodymyrs@nvidia.com>
2023-11-19 11:01:31 +02:00

372 lines
10 KiB
Django/Jinja
Executable File

{#-
Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES.
Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-#}
#!/bin/bash
declare -r SCRIPT_NAME="$(basename "$0")"
declare -r SCRIPT_PATH="$(readlink -f "$0")"
declare -r SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
declare -r YES_PARAM="yes"
declare -r NO_PARAM="no"
declare -r VERBOSE_ERROR="1"
declare -r VERBOSE_WARNING="2"
declare -r VERBOSE_NOTICE="3"
declare -r VERBOSE_INFO="4"
declare -r VERBOSE_MAX="${VERBOSE_INFO}"
declare -r VERBOSE_MIN="${VERBOSE_ERROR}"
declare -r EXIT_SUCCESS="0"
declare -r EXIT_FAILURE="1"
declare -r FW_ALREADY_UPDATED_FAILURE="2"
declare -r QUERY_CMD="mlxfwmanager --query"
declare -r LIST_CONTENT_CMD="mlxfwmanager --list-content"
declare -r BURN_CMD="mlxfwmanager -u -f -y"
declare -r QUERY_FILE="/tmp/mlxfwmanager-query.log"
declare -r LIST_CONTENT_FILE="/tmp/mlxfwmanager-list-content.log"
declare -r SPC1_ASIC="spc1"
declare -r SPC2_ASIC="spc2"
declare -r SPC3_ASIC="spc3"
declare -r SPC4_ASIC="spc4"
declare -r UNKN_ASIC="unknown"
declare -r UNKN_MST="unknown"
declare -rA FW_FILE_MAP=( \
[$SPC1_ASIC]="fw-SPC.mfa" \
[$SPC2_ASIC]="fw-SPC2.mfa" \
[$SPC3_ASIC]="fw-SPC3.mfa" \
[$SPC4_ASIC]="fw-SPC4.mfa" \
)
IMAGE_UPGRADE="${NO_PARAM}"
SYSLOG_LOGGER="${NO_PARAM}"
VERBOSE_LEVEL="${VERBOSE_MIN}"
MFT_DIAGNOSIS_FLAGS=""
function PrintHelp() {
echo
echo "Usage: ./${SCRIPT_NAME} [OPTIONS]"
echo
echo "OPTIONS:"
echo " -u, --upgrade Upgrade ASIC firmware using next boot image (useful after SONiC-To-SONiC update)"
echo " -s, --syslog Use syslog logger (enabled when -u|--upgrade)"
echo " -v, --verbose Verbose mode (enabled when -u|--upgrade)"
echo " -h, --help Print help"
echo
echo "Examples:"
echo " ./${SCRIPT_NAME} --verbose"
echo " ./${SCRIPT_NAME} --upgrade"
echo " ./${SCRIPT_NAME} --help"
echo
}
function ParseArguments() {
while [ "$#" -ge "1" ]; do
case "$1" in
-u|--upgrade)
IMAGE_UPGRADE="${YES_PARAM}"
SYSLOG_LOGGER="${YES_PARAM}"
;;
-v|--verbose)
VERBOSE_LEVEL="${VERBOSE_MAX}"
MFT_DIAGNOSIS_FLAGS="FLASH_ACCESS_DEBUG=1 FW_COMPS_DEBUG=1"
;;
-s|--syslog)
SYSLOG_LOGGER="${YES_PARAM}"
;;
-h|--help)
PrintHelp
exit "${EXIT_SUCCESS}"
;;
esac
shift
done
}
function LogError() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_ERROR}" ]]; then
echo "ERROR: $*"
logger -p "ERROR" -t "${SCRIPT_NAME}" "$*"
fi
if [[ "${SYSLOG_LOGGER}" = "${YES_PARAM}" ]]; then
logger -p "ERROR" -t "${SCRIPT_NAME}" "$*"
fi
}
function LogWarning() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_WARNING}" ]]; then
echo "WARNING: $*"
fi
if [[ "${SYSLOG_LOGGER}" = "${YES_PARAM}" ]]; then
logger -p "WARNING" -t "${SCRIPT_NAME}" "$*"
fi
}
function LogNotice() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_NOTICE}" ]]; then
echo "NOTICE: $*"
fi
if [[ "${SYSLOG_LOGGER}" = "${YES_PARAM}" ]]; then
logger -p "NOTICE" -t "${SCRIPT_NAME}" "$*"
fi
}
function LogInfo() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_INFO}" ]]; then
echo "INFO: $*"
fi
if [[ "${SYSLOG_LOGGER}" = "${YES_PARAM}" ]]; then
logger -p "INFO" -t "${SCRIPT_NAME}" "$*"
fi
}
function ExitFailure() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_ERROR}" ]]; then
echo
LogError "$@"
echo
fi
exit "${EXIT_FAILURE}"
}
function ExitSuccess() {
if [[ "${VERBOSE_LEVEL}" -ge "${VERBOSE_INFO}" ]]; then
echo
LogInfo "$@"
echo
fi
exit "${EXIT_SUCCESS}"
}
function WaitForDevice() {
local -i QUERY_RETRY_COUNT_MAX="10"
local -i QUERY_RETRY_COUNT="0"
${QUERY_CMD} > /dev/null
while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("$?" -ne "${EXIT_SUCCESS}") ]]; do
sleep 1s
((QUERY_RETRY_COUNT++))
output=$(eval ${MFT_DIAGNOSIS_FLAGS} ${QUERY_CMD}) > /dev/null
done
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
# Exit failure and print the detailed information
echo "$output"
failure_msg="${output#*Fail : }"
ExitFailure "FW Query command: ${QUERY_CMD} failed to wait for device with error: ${failure_msg}"
fi
}
function GetAsicType() {
local -r VENDOR_ID="15b3"
local -r SPC1_PRODUCT_ID="cb84"
local -r SPC2_PRODUCT_ID="cf6c"
local -r SPC3_PRODUCT_ID="cf70"
local -r SPC4_PRODUCT_ID="cf80"
if lspci -n | grep "${VENDOR_ID}:${SPC1_PRODUCT_ID}" &>/dev/null; then
echo "${SPC1_ASIC}"
exit "${EXIT_SUCCESS}"
elif lspci -n | grep "${VENDOR_ID}:${SPC2_PRODUCT_ID}" &>/dev/null; then
echo "${SPC2_ASIC}"
exit "${EXIT_SUCCESS}"
elif lspci -n | grep "${VENDOR_ID}:${SPC3_PRODUCT_ID}" &>/dev/null; then
echo "${SPC3_ASIC}"
exit "${EXIT_SUCCESS}"
elif lspci -n | grep "${VENDOR_ID}:${SPC4_PRODUCT_ID}" &>/dev/null; then
echo "${SPC4_ASIC}"
exit "${EXIT_SUCCESS}"
fi
echo "${UNKN_ASIC}"
exit "${EXIT_FAILURE}"
}
function GetMstDevice() {
local _MST_DEVICE="$(ls /dev/mst/*_pci_cr0 2>&1)"
if [[ ! -c "${_MST_DEVICE}" ]]; then
echo "${UNKN_MST}"
else
echo "${_MST_DEVICE}"
fi
exit "${EXIT_SUCCESS}"
}
function RunCmd() {
local ERROR_CODE="${EXIT_SUCCESS}"
if [[ "${VERBOSE_LEVEL}" -eq "${VERBOSE_MAX}" ]]; then
eval "$@"
else
eval "$@" &>/dev/null
fi
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
ExitFailure "command failed: $@"
fi
}
function RunFwUpdateCmd() {
local ERROR_CODE="${EXIT_SUCCESS}"
local COMMAND="${MFT_DIAGNOSIS_FLAGS} ${BURN_CMD} $@"
if [[ "${VERBOSE_LEVEL}" -eq "${VERBOSE_MAX}" ]]; then
output=$(eval "${COMMAND}")
else
output=$(eval "${COMMAND}") >/dev/null 2>&1
fi
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" == "${FW_ALREADY_UPDATED_FAILURE}" ]]; then
LogInfo "FW reactivation is required. Reactivating and updating FW ..."
local -r _MST_DEVICE="$(GetMstDevice)"
local -r _CMD="flint -d ${_MST_DEVICE} ir"
output=$(eval "${_CMD}")
if [[ "${VERBOSE_LEVEL}" -eq "${VERBOSE_MAX}" ]]; then
output=$(eval "${COMMAND}")
else
output=$(eval "${COMMAND}") >/dev/null 2>&1
fi
fi
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
echo "${output}"
failure_msg="${output#*Fail : }"
ExitFailure "FW Update command: ${COMMAND} failed with error: ${failure_msg}"
fi
}
function UpgradeFW() {
local -r _FW_BIN_PATH="$1"
local -r _ASIC_TYPE="$(GetAsicType)"
if [[ "${_ASIC_TYPE}" = "${UNKN_ASIC}" ]]; then
ExitFailure "failed to detect ASIC type"
fi
if [ ! -z "${_FW_BIN_PATH}" ]; then
local -r _FW_FILE="${_FW_BIN_PATH}/${FW_FILE_MAP[$_ASIC_TYPE]}"
else
local -r _FW_FILE="/etc/mlnx/${FW_FILE_MAP[$_ASIC_TYPE]}"
fi
if [ ! -f "${_FW_FILE}" ]; then
ExitFailure "no such file: ${_FW_FILE}"
fi
RunCmd "${QUERY_CMD} -o ${QUERY_FILE}"
local -r _FW_CURRENT_INFO="$(grep FW ${QUERY_FILE})"
local -r _FW_CURRENT="$(echo ${_FW_CURRENT_INFO} | cut -f2 -d' ')"
local -r _PSID_INFO="$(grep PSID ${QUERY_FILE})"
local -r _PSID="$(echo ${_PSID_INFO} | cut -f2 -d' ')"
RunCmd "${LIST_CONTENT_CMD} -i ${_FW_FILE} -o ${LIST_CONTENT_FILE}"
local -r _FW_AVAILABLE_INFO="$(grep ${_PSID} ${LIST_CONTENT_FILE})"
local -r _FW_AVAILABLE="$(echo ${_FW_AVAILABLE_INFO} | cut -f4 -d' ')"
if [[ -z "${_FW_CURRENT}" ]]; then
ExitFailure "could not retreive current FW version"
fi
if [[ -z "${_FW_AVAILABLE}" ]]; then
ExitFailure "could not retreive available FW version"
fi
if [[ "${_FW_CURRENT}" == "${_FW_AVAILABLE}" ]]; then
ExitSuccess "firmware is up to date"
else
LogNotice "firmware upgrade is required. Installing compatible version..."
local -r _MST_DEVICE="$(GetMstDevice)"
if [[ "${_MST_DEVICE}" = "${UNKN_MST}" ]]; then
LogWarning "could not find fastest mst device, using default device"
RunFwUpdateCmd "-i ${_FW_FILE}"
else
RunFwUpdateCmd "-d ${_MST_DEVICE} -i ${_FW_FILE}"
fi
fi
}
function UpgradeFWFromImage() {
local -r _NEXT_SONIC_IMAGE="$(sonic-installer list | grep "Next: " | cut -f2 -d' ')"
local -r _CURRENT_SONIC_IMAGE="$(sonic-installer list | grep "Current: " | cut -f2 -d' ')"
if [[ "${_CURRENT_SONIC_IMAGE}" == "${_NEXT_SONIC_IMAGE}" ]]; then
ExitSuccess "firmware is up to date"
fi
# /host/image-<version>/platform/fw/asic is now the new location for FW binaries.
# Prefere this path and if it does not exist use squashfs as a fallback.
local -r _PLATFORM_FW_BIN_PATH="/host/image-${_NEXT_SONIC_IMAGE#SONiC-OS-}/platform/fw/asic/"
if [[ -d "${_PLATFORM_FW_BIN_PATH}" ]]; then
LogInfo "Using FW binaries from ${_PLATFORM_FW_BIN_PATH}"
UpgradeFW "${_PLATFORM_FW_BIN_PATH}"
else
local -r _FS_PATH="/host/image-${_NEXT_SONIC_IMAGE#SONiC-OS-}/fs.squashfs"
local -r _FS_MOUNTPOINT="/tmp/image-${_NEXT_SONIC_IMAGE#SONiC-OS-}-fs"
local -r _FW_BIN_PATH="${_FS_MOUNTPOINT}/etc/mlnx/"
LogInfo "Using FW binaries from ${_FW_BIN_PATH}"
mkdir -p "${_FS_MOUNTPOINT}"
mount -t squashfs "${_FS_PATH}" "${_FS_MOUNTPOINT}"
UpgradeFW "${_FW_BIN_PATH}"
umount -rf "${_FS_MOUNTPOINT}"
rm -rf "${_FS_MOUNTPOINT}"
fi
}
function ExitIfQEMU() {
if [ -n "$(lspci -vvv | grep SimX)" ]; then
ExitSuccess "No FW upgrade for SimX platform"
fi
}
ParseArguments "$@"
ExitIfQEMU
WaitForDevice
if [ "${IMAGE_UPGRADE}" != "${YES_PARAM}" ]; then
UpgradeFW
else
UpgradeFWFromImage
fi
ExitSuccess "firmware upgrade is completed"