This repository has been archived on 2025-03-20. You can view files and clone it, but cannot push or open issues or pull requests.
sonic-buildimage/src/sonic-host-services/scripts/determine-reboot-cause
Vaibhav Hemant Dixit 40f69f0aac
[determine-reboot-cause] Skip invoking platform code for unit tests for hardware_reboot_cause (#6325)
What: Modify unit test to not call any platform dependent api in test_find_hardware_reboot_cause.

- Why I did it
MELLANOX build is failing for the recent PRs. The errors are due to platform library being invoked in a unit test for determine-reboot-cause script.

Verified by running unit tests and a successful Mellanox build.

Co-authored-by: Vaibhav Hemant Dixit <vadixit@microsoft.com>
2020-12-30 17:48:56 -08:00

229 lines
9.4 KiB
Python
Executable File

#!/usr/bin/env python3
#
# determine-reboot-cause
#
# Program designed to run once, soon after system boot which will
# determine the cause of the previous reboot and store it to the disk,
#
try:
import datetime
import json
import os
import pwd
import re
import sys
from sonic_py_common import device_info, logger
except ImportError as err:
raise ImportError("%s - required module not found" % str(err))
VERSION = "1.0"
SYSLOG_IDENTIFIER = "determine-reboot-cause"
REBOOT_CAUSE_DIR = "/host/reboot-cause/"
REBOOT_CAUSE_HISTORY_DIR = "/host/reboot-cause/history/"
REBOOT_CAUSE_FILE = os.path.join(REBOOT_CAUSE_DIR, "reboot-cause.txt")
PREVIOUS_REBOOT_CAUSE_FILE = os.path.join(REBOOT_CAUSE_DIR, "previous-reboot-cause.json")
FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform"
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities
# Because the system can be rebooted from some old versions, we have to take all possible BOOT options into consideration.
# On 201803, 201807 we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') fast-reboot"
# On 201811 and later we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" where BOOT_TYPE_ARG can be warm, fastfast or fast
# To extract the commom part of them, we should have the following PATTERN
REBOOT_TYPE_KEXEC_PATTERN_WARM = ".*SONIC_BOOT_TYPE=(warm|fastfast).*"
REBOOT_TYPE_KEXEC_PATTERN_FAST = ".*SONIC_BOOT_TYPE=(fast|fast-reboot).*"
REBOOT_CAUSE_UNKNOWN = "Unknown"
REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware"
REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other"
# Global logger class instance
sonic_logger = logger.Logger(SYSLOG_IDENTIFIER)
# ============================= Functions =============================
def parse_warmfast_reboot_from_proc_cmdline():
if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
with open(REBOOT_TYPE_KEXEC_FILE) as cause_file:
cause_file_kexec = cause_file.readline()
m = re.search(REBOOT_TYPE_KEXEC_PATTERN_WARM, cause_file_kexec)
if m and m.group(1):
return 'warm-reboot'
m = re.search(REBOOT_TYPE_KEXEC_PATTERN_FAST, cause_file_kexec)
if m and m.group(1):
return 'fast-reboot'
return None
def find_software_reboot_cause_from_reboot_cause_file():
software_reboot_cause = None
if os.path.isfile(REBOOT_CAUSE_FILE):
with open(REBOOT_CAUSE_FILE) as cause_file:
software_reboot_cause = cause_file.readline().rstrip('\n')
sonic_logger.log_info("{} indicates the reboot cause: {}".format(REBOOT_CAUSE_FILE, software_reboot_cause))
else:
sonic_logger.log_info("Reboot cause file {} not found".format(REBOOT_CAUSE_FILE))
return software_reboot_cause
def find_first_boot_version():
build_version = "unknown"
version_info = device_info.get_sonic_version_info()
if version_info:
build_version = version_info['build_version']
return " (First boot of SONiC version {})".format(build_version)
def find_software_reboot_cause():
software_reboot_cause = find_software_reboot_cause_from_reboot_cause_file()
if software_reboot_cause == REBOOT_CAUSE_UNKNOWN:
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
software_reboot_cause += find_first_boot_version()
os.remove(FIRST_BOOT_PLATFORM_FILE)
return software_reboot_cause
def find_proc_cmdline_reboot_cause():
proc_cmdline_reboot_cause = parse_warmfast_reboot_from_proc_cmdline()
if proc_cmdline_reboot_cause:
sonic_logger.log_info("/proc/cmdline indicates reboot type: {}".format(proc_cmdline_reboot_cause))
else:
sonic_logger.log_info("No reboot cause found from /proc/cmdline")
return proc_cmdline_reboot_cause
def get_reboot_cause_from_platform():
# Find hardware reboot cause using sonic_platform library
try:
import sonic_platform
platform = sonic_platform.platform.Platform()
chassis = platform.get_chassis()
hardware_reboot_cause_major, hardware_reboot_cause_minor = chassis.get_reboot_cause()
sonic_logger.log_info("Platform api returns reboot cause {}, {}".format(hardware_reboot_cause_major, hardware_reboot_cause_minor))
except ImportError:
sonic_logger.log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")
hardware_reboot_cause_major, hardware_reboot_cause_minor = REBOOT_CAUSE_NON_HARDWARE, "N/A"
return hardware_reboot_cause_major, hardware_reboot_cause_minor
def find_hardware_reboot_cause():
hardware_reboot_cause_major, hardware_reboot_cause_minor = get_reboot_cause_from_platform()
if hardware_reboot_cause_major:
sonic_logger.log_info("Platform api indicates reboot cause {}".format(hardware_reboot_cause_major))
else:
sonic_logger.log_info("No reboot cause found from platform api")
hardware_reboot_cause = "{} ({})".format(hardware_reboot_cause_major, hardware_reboot_cause_minor)
return hardware_reboot_cause
def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time):
# resultant dictionary
reboot_cause_dict = {}
reboot_cause_dict['gen_time'] = gen_time
reboot_cause_dict['cause'] = previous_reboot_cause
reboot_cause_dict['user'] = "N/A"
reboot_cause_dict['time'] = "N/A"
reboot_cause_dict['comment'] = comment if comment is not None else "N/A"
if re.search(r'User issued', previous_reboot_cause):
# Match with "User issued '{}' command [User: {}, Time: {}]"
match = re.search(r'User issued \'(.*)\' command \[User: (.*), Time: (.*)\]', previous_reboot_cause)
if match is not None:
reboot_cause_dict['cause'] = match.group(1)
reboot_cause_dict['user'] = match.group(2)
reboot_cause_dict['time'] = match.group(3)
return reboot_cause_dict
def main():
# Configure logger to log all messages INFO level and higher
sonic_logger.set_min_log_priority_info()
sonic_logger.log_info("Starting up...")
if not os.geteuid() == 0:
sonic_logger.log_error("User {} does not have permission to execute".format(pwd.getpwuid(os.getuid()).pw_name))
sys.exit("This utility must be run as root")
# Create REBOOT_CAUSE_DIR if it doesn't exist
if not os.path.exists(REBOOT_CAUSE_DIR):
os.makedirs(REBOOT_CAUSE_DIR)
# Remove stale PREVIOUS_REBOOT_CAUSE_FILE if it exists
if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE):
os.remove(PREVIOUS_REBOOT_CAUSE_FILE)
hardware_reboot_cause = None
# This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides
# any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE
additional_reboot_info = "N/A"
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
# 2. Check if the previous reboot was caused by hardware
# If yes, the hardware reboot cause will be treated as the reboot cause
hardware_reboot_cause = find_hardware_reboot_cause()
# 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related
# reboot info. We will use it as the previous cause.
software_reboot_cause = find_software_reboot_cause()
# The main decision logic of the reboot cause:
# If there is a reboot cause indicated by /proc/cmdline, it should be warmreboot/fastreboot
# the software_reboot_cause which is the content of /hosts/reboot-cause/reboot-cause.txt
# will be treated as the reboot cause
# Elif there is a reboot cause indicated by platform API,
# the hardware_reboot_cause will be treated as the reboot cause
# Else the software_reboot_cause will be treated as the reboot cause
if proc_cmdline_reboot_cause is not None:
previous_reboot_cause = software_reboot_cause
elif hardware_reboot_cause is not REBOOT_CAUSE_NON_HARDWARE:
previous_reboot_cause = hardware_reboot_cause
else:
previous_reboot_cause = software_reboot_cause
# Current time
reboot_cause_gen_time = str(datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))
# Save the previous cause info into its history file as json format
reboot_cause_dict = get_reboot_cause_dict(previous_reboot_cause, additional_reboot_info, reboot_cause_gen_time)
# Create reboot-cause-#time#.json under history directory
REBOOT_CAUSE_HISTORY_FILE = os.path.join(REBOOT_CAUSE_HISTORY_DIR, "reboot-cause-{}.json".format(reboot_cause_gen_time))
# Create REBOOT_CAUSE_HISTORY_DIR if it doesn't exist
if not os.path.exists(REBOOT_CAUSE_HISTORY_DIR):
os.makedirs(REBOOT_CAUSE_HISTORY_DIR)
# Write the previous reboot cause to REBOOT_CAUSE_HISTORY_FILE as a JSON format
with open(REBOOT_CAUSE_HISTORY_FILE, "w") as reboot_cause_history_file:
json.dump(reboot_cause_dict, reboot_cause_history_file)
# Create a symbolic link to previous-reboot-cause.json file
os.symlink(REBOOT_CAUSE_HISTORY_FILE, PREVIOUS_REBOOT_CAUSE_FILE)
# Remove the old REBOOT_CAUSE_FILE
if os.path.exists(REBOOT_CAUSE_FILE):
os.remove(REBOOT_CAUSE_FILE)
# Write a new default reboot cause file for the next reboot
with open(REBOOT_CAUSE_FILE, "w") as cause_file:
cause_file.write(REBOOT_CAUSE_UNKNOWN)
if __name__ == "__main__":
main()