[Mellanox] Enhance FW upgrade mechanism (#16090)

### Why I did it

1. Enhance the diagnosis information collecting mechanism
   - If the option `-v` is fed, it will pass additional diagnosis flags to mlxfwmanager
   - Collect all the output from mlxfwmanager and print them to syslog if it fails
2. Abort syncd in case waiting for device or upgrading firmware fails

Signed-off-by: Stephen Sun <stephens@nvidia.com>

### How I did it

#### How to verify it

Regression and manual test
This commit is contained in:
Stephen Sun 2023-09-05 02:28:53 +08:00 committed by GitHub
parent 78587cedc3
commit b5e8c16134
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 3 deletions

View File

@ -41,7 +41,11 @@ function startplatform() {
/usr/bin/flint -d $_MST_DEVICE --clear_semaphore /usr/bin/flint -d $_MST_DEVICE --clear_semaphore
fi fi
/usr/bin/mlnx-fw-upgrade.sh /usr/bin/mlnx-fw-upgrade.sh -v
if [[ "$?" -ne "${EXIT_SUCCESS}" ]]; then
debug "Failed to upgrade fw. " "$?" "Restart syncd"
exit 1
fi
/etc/init.d/sxdkernel restart /etc/init.d/sxdkernel restart
debug "Firmware update procedure ended" debug "Firmware update procedure ended"
fi fi

View File

@ -55,6 +55,7 @@ declare -rA FW_FILE_MAP=( \
IMAGE_UPGRADE="${NO_PARAM}" IMAGE_UPGRADE="${NO_PARAM}"
SYSLOG_LOGGER="${NO_PARAM}" SYSLOG_LOGGER="${NO_PARAM}"
VERBOSE_LEVEL="${VERBOSE_MIN}" VERBOSE_LEVEL="${VERBOSE_MIN}"
MFT_DIAGNOSIS_FLAGS=""
function PrintHelp() { function PrintHelp() {
echo echo
@ -82,6 +83,7 @@ function ParseArguments() {
;; ;;
-v|--verbose) -v|--verbose)
VERBOSE_LEVEL="${VERBOSE_MAX}" VERBOSE_LEVEL="${VERBOSE_MAX}"
MFT_DIAGNOSIS_FLAGS="FLASH_ACCESS_DEBUG=1 FW_COMPS_DEBUG=1"
;; ;;
-s|--syslog) -s|--syslog)
SYSLOG_LOGGER="${YES_PARAM}" SYSLOG_LOGGER="${YES_PARAM}"
@ -165,8 +167,16 @@ function WaitForDevice() {
while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("$?" -ne "${EXIT_SUCCESS}") ]]; do while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("$?" -ne "${EXIT_SUCCESS}") ]]; do
sleep 1s sleep 1s
((QUERY_RETRY_COUNT++)) ((QUERY_RETRY_COUNT++))
${QUERY_CMD} > /dev/null output=$(eval ${MFT_DIAGNOSIS_FLAGS} ${QUERY_CMD}) > /dev/null
done done
ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
# Exit failure and print the detailed information
echo "$output"
failure_msg="${output#*Fail : }"
ExitFailure "FW Query command: ${QUERY_CMD} failed to wait for device with error: ${failure_msg}"
fi
} }
function GetAsicType() { function GetAsicType() {
@ -224,7 +234,7 @@ function RunCmd() {
function RunFwUpdateCmd() { function RunFwUpdateCmd() {
local ERROR_CODE="${EXIT_SUCCESS}" local ERROR_CODE="${EXIT_SUCCESS}"
local COMMAND="${BURN_CMD} $@" local COMMAND="${MFT_DIAGNOSIS_FLAGS} ${BURN_CMD} $@"
if [[ "${VERBOSE_LEVEL}" -eq "${VERBOSE_MAX}" ]]; then if [[ "${VERBOSE_LEVEL}" -eq "${VERBOSE_MAX}" ]]; then
output=$(eval "${COMMAND}") output=$(eval "${COMMAND}")
@ -234,6 +244,7 @@ function RunFwUpdateCmd() {
ERROR_CODE="$?" ERROR_CODE="$?"
if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then if [[ "${ERROR_CODE}" != "${EXIT_SUCCESS}" ]]; then
echo "${output}"
failure_msg="${output#*Fail : }" failure_msg="${output#*Fail : }"
ExitFailure "FW Update command: ${COMMAND} failed with error: ${failure_msg}" ExitFailure "FW Update command: ${COMMAND} failed with error: ${failure_msg}"
fi fi