[Mellanox] Skip the leftover hardware reboot cause in case of last boot is warm/fast reboot (#13246)

- Why I did it
In case of warm/fast reboot, the hardware reboot cause will NOT be cleared because CPLD will not be touched in this flow. To not confuse the reboot cause determine logic, the leftover hardware reboot cause shall be skipped by the platform API, platform API will return the 'REBOOT_CAUSE_NON_HARDWARE' instead of the "hardware" reboot cause.

- How I did it
Check the proc cmdline to see whether the last reboot is a warm or fast reboot, if yes skip checking the leftover hardware reboot cause.

- How to verify it
a. Manual test:
    - Perform a power loss
    - Perform a warm/fast reboot
    - Check the reboot cause should be "warm-reboot" or "fast-reboot" instead of "power loss"
b. Run reboot cause related regression test.

Signed-off-by: Kebo Liu <kebol@nvidia.com>
This commit is contained in:
Kebo Liu 2023-01-11 22:50:46 +08:00 committed by GitHub
parent 20f47bb5ac
commit 7873a9131d
No account linked to committer's email address
2 changed files with 54 additions and 0 deletions

View File

@ -30,6 +30,7 @@ try:
from .utils import extract_RJ45_ports_index
from . import utils
from .device_data import DeviceDataManager
import re
except ImportError as e:
raise ImportError (str(e) + "- required module not found")
@ -61,6 +62,10 @@ REBOOT_CAUSE_ROOT = HWMGMT_SYSTEM_ROOT
REBOOT_CAUSE_FILE_LENGTH = 1
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
REBOOT_TYPE_KEXEC_PATTERN_WARM = ".*SONIC_BOOT_TYPE=(warm|fastfast).*"
REBOOT_TYPE_KEXEC_PATTERN_FAST = ".*SONIC_BOOT_TYPE=(fast|fast-reboot).*"
# Global logger class instance
logger = Logger()
@ -736,6 +741,18 @@ class Chassis(ChassisBase):
self.reboot_by_software = 'reset_sw_reset'
self.reboot_cause_initialized = True
def _parse_warmfast_reboot_from_proc_cmdline(self):
if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
with open(REBOOT_TYPE_KEXEC_FILE) as cause_file:
cause_file_kexec = cause_file.readline()
m = re.search(REBOOT_TYPE_KEXEC_PATTERN_WARM, cause_file_kexec)
if m and m.group(1):
return 'warm-reboot'
m = re.search(REBOOT_TYPE_KEXEC_PATTERN_FAST, cause_file_kexec)
if m and m.group(1):
return 'fast-reboot'
return None
def get_reboot_cause(self):
"""
Retrieves the cause of the previous reboot
@ -748,6 +765,14 @@ class Chassis(ChassisBase):
to pass a description of the reboot cause.
"""
#read reboot causes files in the following order
# To avoid the leftover hardware reboot cause confusing the reboot cause determine service
# Skip the hardware reboot cause check if warm/fast reboot cause found from cmdline
if utils.is_host():
reboot_cause = self._parse_warmfast_reboot_from_proc_cmdline()
if reboot_cause:
return self.REBOOT_CAUSE_NON_HARDWARE, ''
if not self.reboot_cause_initialized:
self.initialize_reboot_cause()

View File

@ -224,6 +224,35 @@ class TestChassis:
assert minor == value
mock_file_content[file_path] = 0
utils.is_host = mock.MagicMock(return_value=True)
chassis._parse_warmfast_reboot_from_proc_cmdline = mock.MagicMock(return_value='warm-reboot')
for key, value in chassis.reboot_major_cause_dict.items():
file_path = os.path.join(REBOOT_CAUSE_ROOT, key)
mock_file_content[file_path] = 1
major, minor = chassis.get_reboot_cause()
assert major == chassis.REBOOT_CAUSE_NON_HARDWARE
assert minor == ''
mock_file_content[file_path] = 0
for key, value in chassis.reboot_minor_cause_dict.items():
file_path = os.path.join(REBOOT_CAUSE_ROOT, key)
mock_file_content[file_path] = 1
major, minor = chassis.get_reboot_cause()
assert major == chassis.REBOOT_CAUSE_NON_HARDWARE
assert minor == value
mock_file_content[file_path] = 0
def test_parse_warmfast_reboot_from_proc_cmdline(self):
chassis = Chassis()
with mock.patch("builtins.open", mock.mock_open(read_data="SONIC_BOOT_TYPE=warm")):
assert chassis._parse_warmfast_reboot_from_proc_cmdline() == "warm-reboot"
with mock.patch("builtins.open", mock.mock_open(read_data="SONIC_BOOT_TYPE=fast")):
assert chassis._parse_warmfast_reboot_from_proc_cmdline() == "fast-reboot"
with mock.patch("builtins.open", mock.mock_open(read_data="SONIC_BOOT_TYPE=None")):
assert chassis._parse_warmfast_reboot_from_proc_cmdline() == None
def test_module(self):
from sonic_platform.chassis import ModularChassis
# Test get_num_modules, it should not create any SFP objects