[process-reboot-cause]Address the issue: Incorrect reboot cause returned when warm reboot follows a hardware caused reboot (#3880)
* [process-reboot-cause]Address the issue: Incorrect reboot cause returned when warm reboot follows a hardware caused reboot 1. check whether /proc/cmdline indicates warm/fast reboot. if yes the software reboot cause file will be treated as the reboot cause. finish 2. check whether platform api returns a reboot cause. if yes it is treated as the reboot cause. finish. 3. check whether /hosts/reboot-cause contains a cause. if yes it is treated as the cause otherwise return unknown. * [process-reboot-cause]Fix review comments * [process-reboot-cause]address comments 1. use "with" statement 2. update fast/warm reboot BOOT_ARG * [process-reboot-cause]address comments * refactor the code flow * Remove escape * Remove extra ':'
This commit is contained in:
parent
0510fc7258
commit
49869aa6fa
@ -11,6 +11,7 @@ try:
|
||||
import pwd
|
||||
import sys
|
||||
import syslog
|
||||
import re
|
||||
except ImportError as err:
|
||||
raise ImportError("%s - required module not found" % str(err))
|
||||
|
||||
@ -22,6 +23,16 @@ REBOOT_CAUSE_DIR = "/host/reboot-cause/"
|
||||
REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "reboot-cause.txt"
|
||||
PREVIOUS_REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "previous-reboot-cause.txt"
|
||||
FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform"
|
||||
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
|
||||
# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities
|
||||
# Because the system can be rebooted from some old versions, we have to take all possible BOOT options into consideration.
|
||||
# On 201803, 201807 we have
|
||||
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') fast-reboot"
|
||||
# On 201811 and later we have
|
||||
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" where BOOT_TYPE_ARG can be warm, fastfast or fast
|
||||
# To extract the commom part of them, we should have the following PATTERN
|
||||
REBOOT_TYPE_KEXEC_PATTERN_WARM = ".*SONIC_BOOT_TYPE=(warm|fastfast).*"
|
||||
REBOOT_TYPE_KEXEC_PATTERN_FAST = ".*SONIC_BOOT_TYPE=(fast|fast-reboot).*"
|
||||
|
||||
UNKNOWN_REBOOT_CAUSE = "Unknown"
|
||||
|
||||
@ -47,7 +58,32 @@ def log_error(msg):
|
||||
|
||||
|
||||
# ============================= Functions =============================
|
||||
def parse_warmfast_reboot_from_proc_cmdline():
|
||||
if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
|
||||
with open(REBOOT_TYPE_KEXEC_FILE, "r") as cause_file:
|
||||
cause_file_kexec = cause_file.readline()
|
||||
m = re.match(REBOOT_TYPE_KEXEC_PATTERN_WARM, cause_file_kexec)
|
||||
if m and m.group(1):
|
||||
return 'warm-reboot'
|
||||
m = re.match(REBOOT_TYPE_KEXEC_PATTERN_FAST, cause_file_kexec)
|
||||
if m and m.group(1):
|
||||
return 'fast-reboot'
|
||||
return None
|
||||
|
||||
|
||||
def find_software_reboot_cause():
|
||||
software_reboot_cause = UNKNOWN_REBOOT_CAUSE
|
||||
|
||||
if os.path.isfile(REBOOT_CAUSE_FILE):
|
||||
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
|
||||
software_reboot_cause = cause_file.readline().rstrip('\n')
|
||||
|
||||
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
|
||||
os.remove(FIRST_BOOT_PLATFORM_FILE)
|
||||
|
||||
return software_reboot_cause
|
||||
|
||||
|
||||
def main():
|
||||
log_info("Starting up...")
|
||||
|
||||
@ -73,51 +109,48 @@ def main():
|
||||
try:
|
||||
import sonic_platform
|
||||
|
||||
# Check if the previous reboot was caused by hardware
|
||||
platform = sonic_platform.platform.Platform()
|
||||
|
||||
chassis = platform.get_chassis()
|
||||
|
||||
hardware_reboot_cause, optional_details = chassis.get_reboot_cause()
|
||||
|
||||
if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE:
|
||||
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
|
||||
# contain any software-related reboot info. We will use it as the previous cause.
|
||||
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
|
||||
# If yes, the content of /hosts/reboot-cause/reboot-cause.txt will be treated as the reboot cause
|
||||
proc_cmdline_reboot_cause = parse_warmfast_reboot_from_proc_cmdline()
|
||||
if proc_cmdline_reboot_cause:
|
||||
log_info("/proc/cmdline indicates reboot type: {}".format(proc_cmdline_reboot_cause))
|
||||
if os.path.isfile(REBOOT_CAUSE_FILE):
|
||||
cause_file = open(REBOOT_CAUSE_FILE, "r")
|
||||
previous_reboot_cause = cause_file.readline().rstrip('\n')
|
||||
cause_file.close()
|
||||
# If it is FirstTime Boot and previous_reboot_cause is unknown
|
||||
# and hardware_reboot cause is non_hardware then
|
||||
# Update the reboot cause as required
|
||||
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
|
||||
if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE):
|
||||
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
|
||||
os.remove(FIRST_BOOT_PLATFORM_FILE)
|
||||
elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
|
||||
previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details)
|
||||
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
|
||||
proc_cmdline_reboot_cause = cause_file.readline().rstrip('\n')
|
||||
else:
|
||||
# /proc/cmdline says it's a warm/fast reboot but /host/reboot-cause.txt doesn't exist.
|
||||
# This could happen when upgrading from a version doesn't support reboot cause.
|
||||
log_info("Reboot cause file {} doesn't exist".format(REBOOT_CAUSE_DIR))
|
||||
|
||||
if proc_cmdline_reboot_cause is not None:
|
||||
previous_reboot_cause = proc_cmdline_reboot_cause
|
||||
else:
|
||||
previous_reboot_cause = hardware_reboot_cause
|
||||
# 2. Check if the previous reboot was caused by hardware
|
||||
# If yes, the hardware reboot cause will be treated as the reboot cause
|
||||
platform = sonic_platform.platform.Platform()
|
||||
|
||||
chassis = platform.get_chassis()
|
||||
|
||||
hardware_reboot_cause, optional_details = chassis.get_reboot_cause()
|
||||
|
||||
if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE:
|
||||
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
|
||||
# contain any software-related reboot info. We will use it as the previous cause.
|
||||
previous_reboot_cause = find_software_reboot_cause()
|
||||
elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
|
||||
previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details)
|
||||
else:
|
||||
previous_reboot_cause = hardware_reboot_cause
|
||||
except ImportError as err:
|
||||
log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")
|
||||
|
||||
# If there is a REBOOT_CAUSE_FILE, it will contain any software-related
|
||||
# reboot info. We will use it as the previous cause.
|
||||
if os.path.isfile(REBOOT_CAUSE_FILE):
|
||||
cause_file = open(REBOOT_CAUSE_FILE, "r")
|
||||
previous_reboot_cause = cause_file.readline().rstrip('\n')
|
||||
cause_file.close()
|
||||
previous_reboot_cause = find_software_reboot_cause()
|
||||
|
||||
# If it is FirstTime Boot and previous_reboot_cause is unknown
|
||||
# Update the reboot cause as required
|
||||
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
|
||||
if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE):
|
||||
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
|
||||
os.remove(FIRST_BOOT_PLATFORM_FILE)
|
||||
# Write the previous reboot cause to PREVIOUS_REBOOT_CAUSE_FILE
|
||||
prev_cause_file = open(PREVIOUS_REBOOT_CAUSE_FILE, "w")
|
||||
prev_cause_file.write(previous_reboot_cause)
|
||||
prev_cause_file.close()
|
||||
with open(PREVIOUS_REBOOT_CAUSE_FILE, "w") as prev_cause_file:
|
||||
prev_cause_file.write(previous_reboot_cause)
|
||||
|
||||
# Also log the previous reboot cause to the syslog
|
||||
log_info("Previous reboot cause: {}".format(previous_reboot_cause))
|
||||
@ -127,9 +160,8 @@ def main():
|
||||
os.remove(REBOOT_CAUSE_FILE)
|
||||
|
||||
# Write a new default reboot cause file for the next reboot
|
||||
cause_file = open(REBOOT_CAUSE_FILE, "w")
|
||||
cause_file.write(UNKNOWN_REBOOT_CAUSE)
|
||||
cause_file.close()
|
||||
with open(REBOOT_CAUSE_FILE, "w") as cause_file:
|
||||
cause_file.write(UNKNOWN_REBOOT_CAUSE)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
Reference in New Issue
Block a user