Fix post-reboot errors in platforms without sonic_platform package (#6130)
Refactor determine-reboot cause code. Fix errors seen during determine-reboot-cause when sonic_platform package is not installed. Add error handling for healthd service when sonic_platform package is not installed. Tested on KVM where sonic_platform is not present, and the errors are not seen anymore in syslog.
This commit is contained in:
parent
9aeb0a1bb1
commit
e8da2ee975
@ -40,7 +40,8 @@ REBOOT_TYPE_KEXEC_PATTERN_WARM = ".*SONIC_BOOT_TYPE=(warm|fastfast).*"
|
|||||||
REBOOT_TYPE_KEXEC_PATTERN_FAST = ".*SONIC_BOOT_TYPE=(fast|fast-reboot).*"
|
REBOOT_TYPE_KEXEC_PATTERN_FAST = ".*SONIC_BOOT_TYPE=(fast|fast-reboot).*"
|
||||||
|
|
||||||
REBOOT_CAUSE_UNKNOWN = "Unknown"
|
REBOOT_CAUSE_UNKNOWN = "Unknown"
|
||||||
|
REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware"
|
||||||
|
REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other"
|
||||||
|
|
||||||
# Global logger class instance
|
# Global logger class instance
|
||||||
sonic_logger = logger.Logger(SYSLOG_IDENTIFIER)
|
sonic_logger = logger.Logger(SYSLOG_IDENTIFIER)
|
||||||
@ -98,43 +99,27 @@ def find_proc_cmdline_reboot_cause():
|
|||||||
|
|
||||||
return proc_cmdline_reboot_cause
|
return proc_cmdline_reboot_cause
|
||||||
|
|
||||||
def get_reboot_cause_from_platform():
|
|
||||||
# Until all platform vendors have provided sonic_platform packages,
|
def find_hardware_reboot_cause():
|
||||||
# if there is no sonic_platform package installed, we only provide
|
# Find hardware reboot cause using sonic_platform library
|
||||||
# software-related reboot causes.
|
|
||||||
try:
|
try:
|
||||||
import sonic_platform
|
import sonic_platform
|
||||||
platform = sonic_platform.platform.Platform()
|
platform = sonic_platform.platform.Platform()
|
||||||
chassis = platform.get_chassis()
|
chassis = platform.get_chassis()
|
||||||
return chassis.get_reboot_cause()
|
hardware_reboot_cause_major, hardware_reboot_cause_minor = chassis.get_reboot_cause()
|
||||||
except ImportError as err:
|
|
||||||
sonic_logger.log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")
|
|
||||||
|
|
||||||
|
|
||||||
def find_hardware_reboot_cause():
|
|
||||||
hardware_reboot_cause = None
|
|
||||||
|
|
||||||
REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other"
|
|
||||||
REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware"
|
|
||||||
|
|
||||||
hardware_reboot_cause_major, hardware_reboot_cause_minor = get_reboot_cause_from_platform()
|
|
||||||
sonic_logger.log_info("Platform api returns reboot cause {}, {}".format(hardware_reboot_cause_major, hardware_reboot_cause_minor))
|
sonic_logger.log_info("Platform api returns reboot cause {}, {}".format(hardware_reboot_cause_major, hardware_reboot_cause_minor))
|
||||||
|
except ImportError:
|
||||||
|
sonic_logger.log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")
|
||||||
|
hardware_reboot_cause_major, hardware_reboot_cause_minor = REBOOT_CAUSE_NON_HARDWARE, "N/A"
|
||||||
|
|
||||||
if hardware_reboot_cause_major == REBOOT_CAUSE_NON_HARDWARE:
|
if hardware_reboot_cause_major:
|
||||||
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
|
sonic_logger.log_info("Platform api indicates reboot cause {}".format(hardware_reboot_cause_major))
|
||||||
# contain any software-related reboot info. We will use it as the previous cause.
|
|
||||||
pass
|
|
||||||
elif hardware_reboot_cause_major == REBOOT_CAUSE_HARDWARE_OTHER:
|
|
||||||
hardware_reboot_cause = "{} ({})".format(hardware_reboot_cause_major, hardware_reboot_cause_minor)
|
|
||||||
else:
|
|
||||||
hardware_reboot_cause = hardware_reboot_cause_major
|
|
||||||
|
|
||||||
if hardware_reboot_cause:
|
|
||||||
sonic_logger.log_info("Platform api indicates reboot cause {}".format(hardware_reboot_cause))
|
|
||||||
else:
|
else:
|
||||||
sonic_logger.log_info("No reboot cause found from platform api")
|
sonic_logger.log_info("No reboot cause found from platform api")
|
||||||
|
|
||||||
return hardware_reboot_cause, hardware_reboot_cause_minor
|
hardware_reboot_cause = "{} ({})".format(hardware_reboot_cause_major, hardware_reboot_cause_minor)
|
||||||
|
return hardware_reboot_cause
|
||||||
|
|
||||||
|
|
||||||
def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time):
|
def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time):
|
||||||
# resultant dictionary
|
# resultant dictionary
|
||||||
@ -175,14 +160,16 @@ def main():
|
|||||||
os.remove(PREVIOUS_REBOOT_CAUSE_FILE)
|
os.remove(PREVIOUS_REBOOT_CAUSE_FILE)
|
||||||
|
|
||||||
hardware_reboot_cause = None
|
hardware_reboot_cause = None
|
||||||
additional_reboot_info = None
|
# This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides
|
||||||
|
# any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE
|
||||||
|
additional_reboot_info = "N/A"
|
||||||
|
|
||||||
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
|
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
|
||||||
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
|
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
|
||||||
|
|
||||||
# 2. Check if the previous reboot was caused by hardware
|
# 2. Check if the previous reboot was caused by hardware
|
||||||
# If yes, the hardware reboot cause will be treated as the reboot cause
|
# If yes, the hardware reboot cause will be treated as the reboot cause
|
||||||
(hardware_reboot_cause, additional_reboot_info) = find_hardware_reboot_cause()
|
hardware_reboot_cause = find_hardware_reboot_cause()
|
||||||
|
|
||||||
# 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related
|
# 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related
|
||||||
# reboot info. We will use it as the previous cause.
|
# reboot info. We will use it as the previous cause.
|
||||||
@ -197,7 +184,7 @@ def main():
|
|||||||
# Else the software_reboot_cause will be treated as the reboot cause
|
# Else the software_reboot_cause will be treated as the reboot cause
|
||||||
if proc_cmdline_reboot_cause is not None:
|
if proc_cmdline_reboot_cause is not None:
|
||||||
previous_reboot_cause = software_reboot_cause
|
previous_reboot_cause = software_reboot_cause
|
||||||
elif hardware_reboot_cause is not None:
|
elif hardware_reboot_cause is not REBOOT_CAUSE_NON_HARDWARE:
|
||||||
previous_reboot_cause = hardware_reboot_cause
|
previous_reboot_cause = hardware_reboot_cause
|
||||||
else:
|
else:
|
||||||
previous_reboot_cause = software_reboot_cause
|
previous_reboot_cause = software_reboot_cause
|
||||||
|
@ -99,9 +99,8 @@ class TestDetermineRebootCause(object):
|
|||||||
assert result == "fast-reboot"
|
assert result == "fast-reboot"
|
||||||
|
|
||||||
def test_find_hardware_reboot_cause(self):
|
def test_find_hardware_reboot_cause(self):
|
||||||
with mock.patch("determine_reboot_cause.get_reboot_cause_from_platform", return_value=("Powerloss", None)):
|
|
||||||
result = find_hardware_reboot_cause()
|
result = find_hardware_reboot_cause()
|
||||||
assert result == ("Powerloss", None)
|
assert result == "Non-Hardware (N/A)"
|
||||||
|
|
||||||
def test_get_reboot_cause_dict_watchdog(self):
|
def test_get_reboot_cause_dict_watchdog(self):
|
||||||
reboot_cause_dict = get_reboot_cause_dict(REBOOT_CAUSE_WATCHDOG, "", GEN_TIME_WATCHDOG)
|
reboot_cause_dict = get_reboot_cause_dict(REBOOT_CAUSE_WATCHDOG, "", GEN_TIME_WATCHDOG)
|
||||||
@ -113,5 +112,5 @@ class TestDetermineRebootCause(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def teardown_class(cls):
|
def teardown_class(cls):
|
||||||
print("TREARDOWN")
|
print("TEARDOWN")
|
||||||
|
|
||||||
|
@ -68,6 +68,7 @@ class HealthDaemon(DaemonBase):
|
|||||||
"""
|
"""
|
||||||
self.log_notice("Starting up...")
|
self.log_notice("Starting up...")
|
||||||
|
|
||||||
|
try:
|
||||||
import sonic_platform.platform
|
import sonic_platform.platform
|
||||||
chassis = sonic_platform.platform.Platform().get_chassis()
|
chassis = sonic_platform.platform.Platform().get_chassis()
|
||||||
manager = HealthCheckerManager()
|
manager = HealthCheckerManager()
|
||||||
@ -81,6 +82,8 @@ class HealthDaemon(DaemonBase):
|
|||||||
|
|
||||||
if self.stop_event.wait(manager.config.interval):
|
if self.stop_event.wait(manager.config.interval):
|
||||||
break
|
break
|
||||||
|
except ImportError:
|
||||||
|
self.log_warning("sonic_platform package not installed. Cannot start system-health daemon")
|
||||||
|
|
||||||
self.deinit()
|
self.deinit()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user