[Mellanox] wait reset cause ready (#16722)
Why I did it SONiC service determine-reboot-cause might run before driver creating reset cause files. In that case, the reset cause will be "Unknown". This PR introduces a wait mechanism to wait for reset cause sysfs files ready. How I did it /run/hw-management/config/reset_attr_ready is the file to indicate all reset cause files are ready. In chassis.get_reboot_cause function, it waits /run/hw-management/config/reset_attr_ready for up to 45 seconds. How to verify it Manual test on master/202211/202205
This commit is contained in:
parent
ada2d88d02
commit
aedffd333b
@ -59,8 +59,9 @@ HWMGMT_SYSTEM_ROOT = '/var/run/hw-management/system/'
|
||||
|
||||
#reboot cause related definitions
|
||||
REBOOT_CAUSE_ROOT = HWMGMT_SYSTEM_ROOT
|
||||
|
||||
REBOOT_CAUSE_FILE_LENGTH = 1
|
||||
REBOOT_CAUSE_MAX_WAIT_TIME = 45
|
||||
REBOOT_CAUSE_CHECK_INTERVAL = 5
|
||||
REBOOT_CAUSE_READY_FILE = '/run/hw-management/config/reset_attr_ready'
|
||||
|
||||
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
|
||||
REBOOT_TYPE_KEXEC_PATTERN_WARM = ".*SONIC_BOOT_TYPE=(warm|fastfast).*"
|
||||
@ -782,6 +783,16 @@ class Chassis(ChassisBase):
|
||||
return 'fast-reboot'
|
||||
return None
|
||||
|
||||
def _wait_reboot_cause_ready(self):
|
||||
max_wait_time = REBOOT_CAUSE_MAX_WAIT_TIME
|
||||
while max_wait_time > 0:
|
||||
if utils.read_int_from_file(REBOOT_CAUSE_READY_FILE, log_func=None) == 1:
|
||||
return True
|
||||
time.sleep(REBOOT_CAUSE_CHECK_INTERVAL)
|
||||
max_wait_time -= REBOOT_CAUSE_CHECK_INTERVAL
|
||||
|
||||
return False
|
||||
|
||||
def get_reboot_cause(self):
|
||||
"""
|
||||
Retrieves the cause of the previous reboot
|
||||
@ -802,6 +813,10 @@ class Chassis(ChassisBase):
|
||||
if reboot_cause:
|
||||
return self.REBOOT_CAUSE_NON_HARDWARE, ''
|
||||
|
||||
if not self._wait_reboot_cause_ready():
|
||||
logger.log_error("Hardware reboot cause is not ready")
|
||||
return self.REBOOT_CAUSE_NON_HARDWARE, ''
|
||||
|
||||
if not self.reboot_cause_initialized:
|
||||
self.initialize_reboot_cause()
|
||||
|
||||
|
@ -194,6 +194,7 @@ class TestChassis:
|
||||
assert status is True
|
||||
assert 'sfp' in event_dict and not event_dict['sfp']
|
||||
|
||||
@mock.patch('sonic_platform.chassis.Chassis._wait_reboot_cause_ready', MagicMock(return_value=True))
|
||||
def test_reboot_cause(self):
|
||||
from sonic_platform import utils
|
||||
from sonic_platform.chassis import REBOOT_CAUSE_ROOT
|
||||
@ -242,6 +243,22 @@ class TestChassis:
|
||||
assert minor == value
|
||||
mock_file_content[file_path] = 0
|
||||
|
||||
@mock.patch('sonic_platform.chassis.Chassis._wait_reboot_cause_ready', MagicMock(return_value=False))
|
||||
def test_reboot_cause_timeout(self):
|
||||
chassis = Chassis()
|
||||
major, minor = chassis.get_reboot_cause()
|
||||
assert major == chassis.REBOOT_CAUSE_NON_HARDWARE
|
||||
assert minor == ''
|
||||
|
||||
@mock.patch('sonic_platform.utils.read_int_from_file')
|
||||
@mock.patch('sonic_platform.chassis.time.sleep', mock.MagicMock())
|
||||
def test_wait_reboot_cause_ready(self, mock_read_int):
|
||||
mock_read_int.return_value = 1
|
||||
chassis = Chassis()
|
||||
assert chassis._wait_reboot_cause_ready()
|
||||
mock_read_int.return_value = 0
|
||||
assert not chassis._wait_reboot_cause_ready()
|
||||
|
||||
def test_parse_warmfast_reboot_from_proc_cmdline(self):
|
||||
chassis = Chassis()
|
||||
with mock.patch("builtins.open", mock.mock_open(read_data="SONIC_BOOT_TYPE=warm")):
|
||||
|
Loading…
Reference in New Issue
Block a user