[reboot-history] Add reboot history to state db (#5933)
- Why I did it Add reboot history to State db so that can be used telemetry service - How I did it Split the process-reboot-cause service to determine-reboot-cause and process-reboot-cause determine-reboot-cause to determine the reboot cause process-reboot-cause to parse the reboot cause files and put the reboot history to state db Moved to sonic-host-service* packages - How to verify it Performed unit test and tested on DUT
This commit is contained in:
parent
d3c1a5bf39
commit
5b31996f7b
@ -428,16 +428,6 @@ sudo cp $IMAGE_CONFIGS/pcie-check/pcie-check.service $FILESYSTEM_ROOT_USR_LIB_SY
|
|||||||
echo "pcie-check.service" | sudo tee -a $GENERATED_SERVICE_FILE
|
echo "pcie-check.service" | sudo tee -a $GENERATED_SERVICE_FILE
|
||||||
sudo cp $IMAGE_CONFIGS/pcie-check/pcie-check.sh $FILESYSTEM_ROOT/usr/bin/
|
sudo cp $IMAGE_CONFIGS/pcie-check/pcie-check.sh $FILESYSTEM_ROOT/usr/bin/
|
||||||
|
|
||||||
# Copy systemd timer configuration
|
|
||||||
# It implements delayed start of services
|
|
||||||
sudo cp $BUILD_TEMPLATES/process-reboot-cause.timer $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM
|
|
||||||
sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable process-reboot-cause.timer
|
|
||||||
|
|
||||||
# Copy process-reboot-cause service files
|
|
||||||
sudo cp $IMAGE_CONFIGS/process-reboot-cause/process-reboot-cause.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM
|
|
||||||
echo "process-reboot-cause.service" | sudo tee -a $GENERATED_SERVICE_FILE
|
|
||||||
sudo cp $IMAGE_CONFIGS/process-reboot-cause/process-reboot-cause $FILESYSTEM_ROOT/usr/bin/
|
|
||||||
|
|
||||||
## Install package without starting service
|
## Install package without starting service
|
||||||
## ref: https://wiki.debian.org/chroot
|
## ref: https://wiki.debian.org/chroot
|
||||||
sudo tee -a $FILESYSTEM_ROOT/usr/sbin/policy-rc.d > /dev/null <<EOF
|
sudo tee -a $FILESYSTEM_ROOT/usr/sbin/policy-rc.d > /dev/null <<EOF
|
||||||
|
@ -9,3 +9,6 @@ override_dh_installsystemd:
|
|||||||
dh_installsystemd --no-start --name=caclmgrd
|
dh_installsystemd --no-start --name=caclmgrd
|
||||||
dh_installsystemd --no-start --name=hostcfgd
|
dh_installsystemd --no-start --name=hostcfgd
|
||||||
dh_installsystemd --no-start --name=procdockerstatsd
|
dh_installsystemd --no-start --name=procdockerstatsd
|
||||||
|
dh_installsystemd --no-start --name=determine-reboot-cause
|
||||||
|
dh_installsystemd --no-start --name=process-reboot-cause
|
||||||
|
|
||||||
|
@ -0,0 +1,11 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Reboot cause determination service
|
||||||
|
Requires=rc-local.service
|
||||||
|
After=rc-local.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
ExecStart=/usr/local/bin/determine-reboot-cause
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
@ -1,28 +1,32 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
#
|
#
|
||||||
# process-reboot-cause
|
# determine-reboot-cause
|
||||||
#
|
#
|
||||||
# Program designed to run once, soon after system boot which will
|
# Program designed to run once, soon after system boot which will
|
||||||
# determine the cause of the previous reboot and store it to the disk,
|
# determine the cause of the previous reboot and store it to the disk,
|
||||||
#
|
#
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import pwd
|
import pwd
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from sonic_py_common import device_info, logger
|
from sonic_py_common import device_info, logger
|
||||||
|
|
||||||
except ImportError as err:
|
except ImportError as err:
|
||||||
raise ImportError("%s - required module not found" % str(err))
|
raise ImportError("%s - required module not found" % str(err))
|
||||||
|
|
||||||
VERSION = "1.0"
|
VERSION = "1.0"
|
||||||
|
|
||||||
SYSLOG_IDENTIFIER = "process-reboot-cause"
|
SYSLOG_IDENTIFIER = "determine-reboot-cause"
|
||||||
|
|
||||||
REBOOT_CAUSE_DIR = "/host/reboot-cause/"
|
REBOOT_CAUSE_DIR = "/host/reboot-cause/"
|
||||||
REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "reboot-cause.txt"
|
REBOOT_CAUSE_HISTORY_DIR = "/host/reboot-cause/history/"
|
||||||
PREVIOUS_REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "previous-reboot-cause.txt"
|
REBOOT_CAUSE_FILE = os.path.join(REBOOT_CAUSE_DIR, "reboot-cause.txt")
|
||||||
|
PREVIOUS_REBOOT_CAUSE_FILE = os.path.join(REBOOT_CAUSE_DIR, "previous-reboot-cause.json")
|
||||||
FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform"
|
FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform"
|
||||||
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
|
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
|
||||||
# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities
|
# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities
|
||||||
@ -45,7 +49,7 @@ sonic_logger = logger.Logger(SYSLOG_IDENTIFIER)
|
|||||||
# ============================= Functions =============================
|
# ============================= Functions =============================
|
||||||
def parse_warmfast_reboot_from_proc_cmdline():
|
def parse_warmfast_reboot_from_proc_cmdline():
|
||||||
if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
|
if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
|
||||||
with open(REBOOT_TYPE_KEXEC_FILE, "r") as cause_file:
|
with open(REBOOT_TYPE_KEXEC_FILE) as cause_file:
|
||||||
cause_file_kexec = cause_file.readline()
|
cause_file_kexec = cause_file.readline()
|
||||||
m = re.search(REBOOT_TYPE_KEXEC_PATTERN_WARM, cause_file_kexec)
|
m = re.search(REBOOT_TYPE_KEXEC_PATTERN_WARM, cause_file_kexec)
|
||||||
if m and m.group(1):
|
if m and m.group(1):
|
||||||
@ -56,69 +60,100 @@ def parse_warmfast_reboot_from_proc_cmdline():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def find_software_reboot_cause():
|
def find_software_reboot_cause_from_reboot_cause_file():
|
||||||
software_reboot_cause = REBOOT_CAUSE_UNKNOWN
|
software_reboot_cause = None
|
||||||
|
|
||||||
if os.path.isfile(REBOOT_CAUSE_FILE):
|
if os.path.isfile(REBOOT_CAUSE_FILE):
|
||||||
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
|
with open(REBOOT_CAUSE_FILE) as cause_file:
|
||||||
software_reboot_cause = cause_file.readline().rstrip('\n')
|
software_reboot_cause = cause_file.readline().rstrip('\n')
|
||||||
sonic_logger.log_info("{} indicates the reboot cause: {}".format(REBOOT_CAUSE_FILE, software_reboot_cause))
|
sonic_logger.log_info("{} indicates the reboot cause: {}".format(REBOOT_CAUSE_FILE, software_reboot_cause))
|
||||||
else:
|
else:
|
||||||
sonic_logger.log_info("Reboot cause file {} not found".format(REBOOT_CAUSE_FILE))
|
sonic_logger.log_info("Reboot cause file {} not found".format(REBOOT_CAUSE_FILE))
|
||||||
|
return software_reboot_cause
|
||||||
|
|
||||||
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
|
|
||||||
if software_reboot_cause == REBOOT_CAUSE_UNKNOWN:
|
|
||||||
version_info = device_info.get_sonic_version_info()
|
|
||||||
build_version = version_info['build_version'] if version_info else "unknown"
|
|
||||||
software_reboot_cause += " (First boot of SONiC version {})".format(build_version)
|
|
||||||
os.remove(FIRST_BOOT_PLATFORM_FILE)
|
|
||||||
|
|
||||||
|
def find_first_boot_version():
|
||||||
|
build_version = "unknown"
|
||||||
|
version_info = device_info.get_sonic_version_info()
|
||||||
|
if version_info:
|
||||||
|
build_version = version_info['build_version']
|
||||||
|
return " (First boot of SONiC version {})".format(build_version)
|
||||||
|
|
||||||
|
|
||||||
|
def find_software_reboot_cause():
|
||||||
|
software_reboot_cause = find_software_reboot_cause_from_reboot_cause_file()
|
||||||
|
if software_reboot_cause == REBOOT_CAUSE_UNKNOWN:
|
||||||
|
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
|
||||||
|
software_reboot_cause += find_first_boot_version()
|
||||||
|
os.remove(FIRST_BOOT_PLATFORM_FILE)
|
||||||
return software_reboot_cause
|
return software_reboot_cause
|
||||||
|
|
||||||
|
|
||||||
def find_proc_cmdline_reboot_cause():
|
def find_proc_cmdline_reboot_cause():
|
||||||
proc_cmdline_reboot_cause = parse_warmfast_reboot_from_proc_cmdline()
|
proc_cmdline_reboot_cause = parse_warmfast_reboot_from_proc_cmdline()
|
||||||
|
|
||||||
if proc_cmdline_reboot_cause:
|
if proc_cmdline_reboot_cause:
|
||||||
sonic_logger.log_info("/proc/cmdline indicates reboot type: {}".format(proc_cmdline_reboot_cause))
|
sonic_logger.log_info("/proc/cmdline indicates reboot type: {}".format(proc_cmdline_reboot_cause))
|
||||||
else:
|
else:
|
||||||
sonic_logger.log_info("No reboot cause found from /proc/cmdline")
|
sonic_logger.log_info("No reboot cause found from /proc/cmdline")
|
||||||
|
|
||||||
return proc_cmdline_reboot_cause
|
return proc_cmdline_reboot_cause
|
||||||
|
|
||||||
|
|
||||||
def find_hardware_reboot_cause():
|
|
||||||
hardware_reboot_cause = None
|
|
||||||
|
|
||||||
|
def get_reboot_cause_from_platform():
|
||||||
# Until all platform vendors have provided sonic_platform packages,
|
# Until all platform vendors have provided sonic_platform packages,
|
||||||
# if there is no sonic_platform package installed, we only provide
|
# if there is no sonic_platform package installed, we only provide
|
||||||
# software-related reboot causes.
|
# software-related reboot causes.
|
||||||
try:
|
try:
|
||||||
import sonic_platform
|
import sonic_platform
|
||||||
|
platform = sonic_platform.platform.Platform()
|
||||||
platform = sonic_platform.platform.Platform()
|
chassis = platform.get_chassis()
|
||||||
|
return chassis.get_reboot_cause()
|
||||||
chassis = platform.get_chassis()
|
|
||||||
|
|
||||||
hardware_reboot_cause_major, hardware_reboot_cause_minor = chassis.get_reboot_cause()
|
|
||||||
|
|
||||||
if hardware_reboot_cause_major == chassis.REBOOT_CAUSE_NON_HARDWARE:
|
|
||||||
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
|
|
||||||
# contain any software-related reboot info. We will use it as the previous cause.
|
|
||||||
pass
|
|
||||||
elif hardware_reboot_cause_major == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
|
|
||||||
hardware_reboot_cause = "{} ({})".format(hardware_reboot_cause_major, hardware_reboot_cause_minor)
|
|
||||||
else:
|
|
||||||
hardware_reboot_cause = hardware_reboot_cause_major
|
|
||||||
except ImportError as err:
|
except ImportError as err:
|
||||||
sonic_logger.log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")
|
sonic_logger.log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")
|
||||||
|
|
||||||
|
|
||||||
|
def find_hardware_reboot_cause():
|
||||||
|
hardware_reboot_cause = None
|
||||||
|
|
||||||
|
REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other"
|
||||||
|
REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware"
|
||||||
|
|
||||||
|
hardware_reboot_cause_major, hardware_reboot_cause_minor = get_reboot_cause_from_platform()
|
||||||
|
sonic_logger.log_info("Platform api returns reboot cause {}, {}".format(hardware_reboot_cause_major, hardware_reboot_cause_minor))
|
||||||
|
|
||||||
|
if hardware_reboot_cause_major == REBOOT_CAUSE_NON_HARDWARE:
|
||||||
|
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
|
||||||
|
# contain any software-related reboot info. We will use it as the previous cause.
|
||||||
|
pass
|
||||||
|
elif hardware_reboot_cause_major == REBOOT_CAUSE_HARDWARE_OTHER:
|
||||||
|
hardware_reboot_cause = "{} ({})".format(hardware_reboot_cause_major, hardware_reboot_cause_minor)
|
||||||
|
else:
|
||||||
|
hardware_reboot_cause = hardware_reboot_cause_major
|
||||||
|
|
||||||
if hardware_reboot_cause:
|
if hardware_reboot_cause:
|
||||||
sonic_logger.log_info("Platform api indicates reboot cause {}".format(hardware_reboot_cause))
|
sonic_logger.log_info("Platform api indicates reboot cause {}".format(hardware_reboot_cause))
|
||||||
else:
|
else:
|
||||||
sonic_logger.log_info("No reboot cause found from platform api")
|
sonic_logger.log_info("No reboot cause found from platform api")
|
||||||
|
|
||||||
return hardware_reboot_cause
|
return hardware_reboot_cause, hardware_reboot_cause_minor
|
||||||
|
|
||||||
|
def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time):
|
||||||
|
# resultant dictionary
|
||||||
|
reboot_cause_dict = {}
|
||||||
|
reboot_cause_dict['gen_time'] = gen_time
|
||||||
|
reboot_cause_dict['cause'] = previous_reboot_cause
|
||||||
|
reboot_cause_dict['user'] = "N/A"
|
||||||
|
reboot_cause_dict['time'] = "N/A"
|
||||||
|
reboot_cause_dict['comment'] = comment if comment is not None else "N/A"
|
||||||
|
|
||||||
|
if re.search(r'User issued', previous_reboot_cause):
|
||||||
|
# Match with "User issued '{}' command [User: {}, Time: {}]"
|
||||||
|
match = re.search(r'User issued \'(.*)\' command \[User: (.*), Time: (.*)\]', previous_reboot_cause)
|
||||||
|
if match is not None:
|
||||||
|
reboot_cause_dict['cause'] = match.group(1)
|
||||||
|
reboot_cause_dict['user'] = match.group(2)
|
||||||
|
reboot_cause_dict['time'] = match.group(3)
|
||||||
|
|
||||||
|
return reboot_cause_dict
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -139,15 +174,15 @@ def main():
|
|||||||
if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE):
|
if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE):
|
||||||
os.remove(PREVIOUS_REBOOT_CAUSE_FILE)
|
os.remove(PREVIOUS_REBOOT_CAUSE_FILE)
|
||||||
|
|
||||||
# Set a default previous reboot cause
|
hardware_reboot_cause = None
|
||||||
previous_reboot_cause = REBOOT_CAUSE_UNKNOWN
|
additional_reboot_info = None
|
||||||
|
|
||||||
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
|
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
|
||||||
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
|
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
|
||||||
|
|
||||||
# 2. Check if the previous reboot was caused by hardware
|
# 2. Check if the previous reboot was caused by hardware
|
||||||
# If yes, the hardware reboot cause will be treated as the reboot cause
|
# If yes, the hardware reboot cause will be treated as the reboot cause
|
||||||
hardware_reboot_cause = find_hardware_reboot_cause()
|
(hardware_reboot_cause, additional_reboot_info) = find_hardware_reboot_cause()
|
||||||
|
|
||||||
# 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related
|
# 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related
|
||||||
# reboot info. We will use it as the previous cause.
|
# reboot info. We will use it as the previous cause.
|
||||||
@ -155,7 +190,7 @@ def main():
|
|||||||
|
|
||||||
# The main decision logic of the reboot cause:
|
# The main decision logic of the reboot cause:
|
||||||
# If there is a reboot cause indicated by /proc/cmdline, it should be warmreboot/fastreboot
|
# If there is a reboot cause indicated by /proc/cmdline, it should be warmreboot/fastreboot
|
||||||
# the software_reboot_cause which is the content of /hosts/reboot-cause/reboot-cause.txt
|
# the software_reboot_cause which is the content of /hosts/reboot-cause/reboot-cause.txt
|
||||||
# will be treated as the reboot cause
|
# will be treated as the reboot cause
|
||||||
# Elif there is a reboot cause indicated by platform API,
|
# Elif there is a reboot cause indicated by platform API,
|
||||||
# the hardware_reboot_cause will be treated as the reboot cause
|
# the hardware_reboot_cause will be treated as the reboot cause
|
||||||
@ -167,12 +202,26 @@ def main():
|
|||||||
else:
|
else:
|
||||||
previous_reboot_cause = software_reboot_cause
|
previous_reboot_cause = software_reboot_cause
|
||||||
|
|
||||||
# Write the previous reboot cause to PREVIOUS_REBOOT_CAUSE_FILE
|
# Current time
|
||||||
with open(PREVIOUS_REBOOT_CAUSE_FILE, "w") as prev_cause_file:
|
reboot_cause_gen_time = str(datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))
|
||||||
prev_cause_file.write(previous_reboot_cause)
|
|
||||||
|
# Save the previous cause info into its history file as json format
|
||||||
|
reboot_cause_dict = get_reboot_cause_dict(previous_reboot_cause, additional_reboot_info, reboot_cause_gen_time)
|
||||||
|
|
||||||
|
# Create reboot-cause-#time#.json under history directory
|
||||||
|
REBOOT_CAUSE_HISTORY_FILE = os.path.join(REBOOT_CAUSE_HISTORY_DIR, "reboot-cause-{}.json".format(reboot_cause_gen_time))
|
||||||
|
|
||||||
|
# Create REBOOT_CAUSE_HISTORY_DIR if it doesn't exist
|
||||||
|
if not os.path.exists(REBOOT_CAUSE_HISTORY_DIR):
|
||||||
|
os.makedirs(REBOOT_CAUSE_HISTORY_DIR)
|
||||||
|
|
||||||
|
# Write the previous reboot cause to REBOOT_CAUSE_HISTORY_FILE as a JSON format
|
||||||
|
with open(REBOOT_CAUSE_HISTORY_FILE, "w") as reboot_cause_history_file:
|
||||||
|
json.dump(reboot_cause_dict, reboot_cause_history_file)
|
||||||
|
|
||||||
|
# Create a symbolic link to previous-reboot-cause.json file
|
||||||
|
os.symlink(REBOOT_CAUSE_HISTORY_FILE, PREVIOUS_REBOOT_CAUSE_FILE)
|
||||||
|
|
||||||
# Also log the previous reboot cause to the syslog
|
|
||||||
sonic_logger.log_info("Previous reboot cause: {}".format(previous_reboot_cause))
|
|
||||||
|
|
||||||
# Remove the old REBOOT_CAUSE_FILE
|
# Remove the old REBOOT_CAUSE_FILE
|
||||||
if os.path.exists(REBOOT_CAUSE_FILE):
|
if os.path.exists(REBOOT_CAUSE_FILE):
|
100
src/sonic-host-services/scripts/process-reboot-cause
Normal file
100
src/sonic-host-services/scripts/process-reboot-cause
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
#
|
||||||
|
# process-reboot-cause
|
||||||
|
#
|
||||||
|
# Program designed to read the previous reboot-cause files, log the last previous reboot-cause.
|
||||||
|
# And read the saved reboot-cause history files and save the reboot cause in the state-db.
|
||||||
|
#
|
||||||
|
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pwd
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import swsssdk
|
||||||
|
from sonic_py_common import logger
|
||||||
|
except ImportError as err:
|
||||||
|
raise ImportError("%s - required module not found" % str(err))
|
||||||
|
|
||||||
|
VERSION = "1.0"
|
||||||
|
|
||||||
|
SYSLOG_IDENTIFIER = "process-reboot-cause"
|
||||||
|
|
||||||
|
REBOOT_CAUSE_DIR = "/host/reboot-cause/"
|
||||||
|
REBOOT_CAUSE_HISTORY_DIR = "/host/reboot-cause/history/"
|
||||||
|
PREVIOUS_REBOOT_CAUSE_FILE = os.path.join(REBOOT_CAUSE_DIR, "previous-reboot-cause.json")
|
||||||
|
USER_ISSUED_REBOOT_CAUSE_REGEX ="User issued \'{}\' command [User: {}, Time: {}]"
|
||||||
|
|
||||||
|
REBOOT_CAUSE_UNKNOWN = "Unknown"
|
||||||
|
REBOOT_CAUSE_TABLE_NAME = "REBOOT_CAUSE"
|
||||||
|
|
||||||
|
REDIS_HOSTIP = "127.0.0.1"
|
||||||
|
state_db = None
|
||||||
|
|
||||||
|
# Global logger class instance
|
||||||
|
sonic_logger = logger.Logger(SYSLOG_IDENTIFIER)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================= Functions =============================
|
||||||
|
def read_reboot_cause_files_and_save_state_db():
|
||||||
|
# Connect State DB
|
||||||
|
state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP)
|
||||||
|
state_db.connect(state_db.STATE_DB)
|
||||||
|
|
||||||
|
# Sort the previous reboot cause files by creation time
|
||||||
|
REBOOT_FILE_LIST = [os.path.join(REBOOT_CAUSE_HISTORY_DIR, i) for i in os.listdir(REBOOT_CAUSE_HISTORY_DIR)]
|
||||||
|
TIME_SORTED_FULL_REBOOT_FILE_LIST = sorted(REBOOT_FILE_LIST, key=os.path.getmtime, reverse=True)
|
||||||
|
|
||||||
|
data = []
|
||||||
|
# Read each sorted previous reboot cause file and update the state db with previous reboot cause information
|
||||||
|
for i in range(min(10, len(TIME_SORTED_FULL_REBOOT_FILE_LIST))):
|
||||||
|
x = TIME_SORTED_FULL_REBOOT_FILE_LIST[i]
|
||||||
|
if os.path.isfile(x):
|
||||||
|
with open(x, "r") as cause_file:
|
||||||
|
data = json.load(cause_file)
|
||||||
|
_hash = '{}|{}'.format(REBOOT_CAUSE_TABLE_NAME, data['gen_time'])
|
||||||
|
state_db.set(state_db.STATE_DB, _hash, 'cause', data['cause'])
|
||||||
|
state_db.set(state_db.STATE_DB, _hash, 'time', data['time'])
|
||||||
|
state_db.set(state_db.STATE_DB, _hash, 'user', data['user'])
|
||||||
|
state_db.set(state_db.STATE_DB, _hash, 'comment', data['comment'])
|
||||||
|
|
||||||
|
if len(TIME_SORTED_FULL_REBOOT_FILE_LIST) > 10:
|
||||||
|
for i in range(len(TIME_SORTED_FULL_REBOOT_FILE_LIST)):
|
||||||
|
if i >= 10:
|
||||||
|
x = TIME_SORTED_FULL_REBOOT_FILE_LIST[i]
|
||||||
|
os.remove(x)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Configure logger to log all messages INFO level and higher
|
||||||
|
sonic_logger.set_min_log_priority_info()
|
||||||
|
|
||||||
|
sonic_logger.log_info("Starting up...")
|
||||||
|
|
||||||
|
if not os.geteuid() == 0:
|
||||||
|
sonic_logger.log_error("User {} does not have permission to execute".format(pwd.getpwuid(os.getuid()).pw_name))
|
||||||
|
sys.exit("This utility must be run as root")
|
||||||
|
|
||||||
|
# Set a default previous reboot cause
|
||||||
|
previous_reboot_cause = REBOOT_CAUSE_UNKNOWN
|
||||||
|
|
||||||
|
# Read the most recent reboot cause file and log data to syslog
|
||||||
|
if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE):
|
||||||
|
with open(PREVIOUS_REBOOT_CAUSE_FILE, "r") as last_cause_file:
|
||||||
|
data = json.load(last_cause_file)
|
||||||
|
if data['user']:
|
||||||
|
previous_reboot_cause = USER_ISSUED_REBOOT_CAUSE_REGEX.format(data['cause'], data['user'], data['time'])
|
||||||
|
else:
|
||||||
|
previous_reboot_cause = "{}".format(data['cause'])
|
||||||
|
|
||||||
|
# Log the last reboot cause to the syslog
|
||||||
|
sonic_logger.log_info("Previous reboot cause: {}".format(previous_reboot_cause))
|
||||||
|
|
||||||
|
if os.path.exists(REBOOT_CAUSE_HISTORY_DIR):
|
||||||
|
# Read the previous reboot cause from saved reboot-cause files and save the previous reboot cause upto 10 entry to the state db
|
||||||
|
read_reboot_cause_files_and_save_state_db()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -14,6 +14,8 @@ setup(
|
|||||||
'scripts/caclmgrd',
|
'scripts/caclmgrd',
|
||||||
'scripts/hostcfgd',
|
'scripts/hostcfgd',
|
||||||
'scripts/procdockerstatsd',
|
'scripts/procdockerstatsd',
|
||||||
|
'scripts/determine-reboot-cause',
|
||||||
|
'scripts/process-reboot-cause',
|
||||||
],
|
],
|
||||||
install_requires = [
|
install_requires = [
|
||||||
'Jinja2>=2.10',
|
'Jinja2>=2.10',
|
||||||
|
117
src/sonic-host-services/tests/determine-reboot-cause_test.py
Normal file
117
src/sonic-host-services/tests/determine-reboot-cause_test.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
import imp
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import swsssdk
|
||||||
|
|
||||||
|
# TODO: Remove this if/else block once we no longer support Python 2
|
||||||
|
if sys.version_info.major == 3:
|
||||||
|
from unittest import mock
|
||||||
|
else:
|
||||||
|
# Expect the 'mock' package for python 2
|
||||||
|
# https://pypi.python.org/pypi/mock
|
||||||
|
import mock
|
||||||
|
|
||||||
|
# TODO: Remove this if/else block once we no longer support Python 2
|
||||||
|
if sys.version_info.major == 3:
|
||||||
|
BUILTINS = "builtins"
|
||||||
|
else:
|
||||||
|
BUILTINS = "__builtin__"
|
||||||
|
|
||||||
|
from .mock_connector import MockConnector
|
||||||
|
|
||||||
|
swsssdk.SonicV2Connector = MockConnector
|
||||||
|
|
||||||
|
test_path = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
modules_path = os.path.dirname(test_path)
|
||||||
|
scripts_path = os.path.join(modules_path, "scripts")
|
||||||
|
sys.path.insert(0, modules_path)
|
||||||
|
|
||||||
|
PROC_CMDLINE_CONTENTS = """\
|
||||||
|
BOOT_IMAGE=/image-20191130.52/boot/vmlinuz-4.9.0-11-2-amd64 root=/dev/sda4 rw console=tty0 console=ttyS1,9600n8 quiet net.ifnames=0 biosdevname=0 loop=image-20191130.52/fs.squashfs loopfstype=squashfs apparmor=1 security=apparmor varlog_size=4096 usbcore.autosuspend=-1 module_blacklist=gpio_ich SONIC_BOOT_TYPE=warm"""
|
||||||
|
|
||||||
|
EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE = "warm"
|
||||||
|
|
||||||
|
PROC_CMDLINE_CONTENTS = """\
|
||||||
|
BOOT_IMAGE=/image-20191130.52/boot/vmlinuz-4.9.0-11-2-amd64 root=/dev/sda4 rw console=tty0 console=ttyS1,9600n8 quiet net.ifnames=0 biosdevname=0 loop=image-20191130.52/fs.squashfs loopfstype=squashfs apparmor=1 security=apparmor varlog_size=4096 usbcore.autosuspend=-1 module_blacklist=gpio_ich SONIC_BOOT_TYPE=warm"""
|
||||||
|
|
||||||
|
REBOOT_CAUSE_CONTENTS = """\
|
||||||
|
User issued 'warm-reboot' command [User: admin, Time: Mon Nov 2 22:37:45 UTC 2020]"""
|
||||||
|
|
||||||
|
GET_SONIC_VERSION_INFO = {'commit_id': 'e59ec8291', 'build_date': 'Mon Nov 2 06:00:14 UTC 2020', 'build_number': 75, 'kernel_version': '4.9.0-11-2-amd64', 'debian_version': '9.13', 'built_by': 'sonicbld@jenkins-slave-phx-2', 'asic_type': 'mellanox', 'build_version': '20191130.52'}
|
||||||
|
|
||||||
|
REBOOT_CAUSE_WATCHDOG = "Watchdog"
|
||||||
|
GEN_TIME_WATCHDOG = "2020_10_22_03_15_08"
|
||||||
|
REBOOT_CAUSE_USER = "User issued 'reboot' command [User: admin, Time: Thu Oct 22 03:11:08 UTC 2020]"
|
||||||
|
GEN_TIME_USER = "2020_10_22_03_14_07"
|
||||||
|
|
||||||
|
EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE = "warm-reboot"
|
||||||
|
EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER = "User issued 'warm-reboot' command [User: admin, Time: Mon Nov 2 22:37:45 UTC 2020]"
|
||||||
|
EXPECTED_FIND_FIRSTBOOT_VERSION = " (First boot of SONiC version 20191130.52)"
|
||||||
|
EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_FIRSTBOOT = "Unknown (First boot of SONiC version 20191130.52)"
|
||||||
|
EXPECTED_HARDWARE_REBOOT_CAUSE = {"warm-reboot", ""}
|
||||||
|
|
||||||
|
EXPECTED_WATCHDOG_REBOOT_CAUSE_DICT = {'comment': '', 'gen_time': '2020_10_22_03_15_08', 'cause': 'Watchdog', 'user': 'N/A', 'time': 'N/A'}
|
||||||
|
EXPECTED_USER_REBOOT_CAUSE_DICT = {'comment': '', 'gen_time': '2020_10_22_03_14_07', 'cause': 'reboot', 'user': 'admin', 'time': 'Thu Oct 22 03:11:08 UTC 2020'}
|
||||||
|
|
||||||
|
imp.load_source('determine_reboot_cause', scripts_path + '/determine-reboot-cause')
|
||||||
|
from determine_reboot_cause import *
|
||||||
|
|
||||||
|
class TestDetermineRebootCause(object):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls):
|
||||||
|
print("SETUP")
|
||||||
|
|
||||||
|
def test_parse_warmfast_reboot_from_proc_cmdline(self):
|
||||||
|
with mock.patch("os.path.isfile") as mock_isfile:
|
||||||
|
mock_isfile.return_value = True
|
||||||
|
open_mocked = mock.mock_open(read_data=PROC_CMDLINE_CONTENTS)
|
||||||
|
with mock.patch("{}.open".format(BUILTINS), open_mocked):
|
||||||
|
result = parse_warmfast_reboot_from_proc_cmdline()
|
||||||
|
assert result == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE
|
||||||
|
open_mocked.assert_called_once_with("/proc/cmdline")
|
||||||
|
|
||||||
|
def test_find_software_reboot_cause_user(self):
|
||||||
|
with mock.patch("os.path.isfile") as mock_isfile:
|
||||||
|
mock_isfile.return_value = True
|
||||||
|
open_mocked = mock.mock_open(read_data=REBOOT_CAUSE_CONTENTS)
|
||||||
|
with mock.patch("{}.open".format(BUILTINS), open_mocked):
|
||||||
|
result = find_software_reboot_cause_from_reboot_cause_file()
|
||||||
|
assert result == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER
|
||||||
|
open_mocked.assert_called_once_with("/host/reboot-cause/reboot-cause.txt")
|
||||||
|
|
||||||
|
def test_find_software_reboot_cause_first_boot(self):
|
||||||
|
with mock.patch("sonic_py_common.device_info.get_sonic_version_info", return_value=GET_SONIC_VERSION_INFO):
|
||||||
|
result = find_first_boot_version()
|
||||||
|
assert result == EXPECTED_FIND_FIRSTBOOT_VERSION
|
||||||
|
|
||||||
|
def test_find_software_reboot_cause(self):
|
||||||
|
with mock.patch("determine_reboot_cause.find_software_reboot_cause_from_reboot_cause_file", return_value="Unknown"):
|
||||||
|
with mock.patch("os.path.isfile") as mock_isfile:
|
||||||
|
mock_isfile.return_value = False
|
||||||
|
result = find_software_reboot_cause()
|
||||||
|
assert result == "Unknown"
|
||||||
|
|
||||||
|
def test_find_proc_cmdline_reboot_cause(self):
|
||||||
|
with mock.patch("determine_reboot_cause.parse_warmfast_reboot_from_proc_cmdline", return_value="fast-reboot"):
|
||||||
|
result = find_proc_cmdline_reboot_cause()
|
||||||
|
assert result == "fast-reboot"
|
||||||
|
|
||||||
|
def test_find_hardware_reboot_cause(self):
|
||||||
|
with mock.patch("determine_reboot_cause.get_reboot_cause_from_platform", return_value=("Powerloss", None)):
|
||||||
|
result = find_hardware_reboot_cause()
|
||||||
|
assert result == ("Powerloss", None)
|
||||||
|
|
||||||
|
def test_get_reboot_cause_dict_watchdog(self):
|
||||||
|
reboot_cause_dict = get_reboot_cause_dict(REBOOT_CAUSE_WATCHDOG, "", GEN_TIME_WATCHDOG)
|
||||||
|
assert reboot_cause_dict == EXPECTED_WATCHDOG_REBOOT_CAUSE_DICT
|
||||||
|
|
||||||
|
def test_get_reboot_cause_dict_user(self):
|
||||||
|
reboot_cause_dict = get_reboot_cause_dict(REBOOT_CAUSE_USER, "", GEN_TIME_USER)
|
||||||
|
assert reboot_cause_dict == EXPECTED_USER_REBOOT_CAUSE_DICT
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def teardown_class(cls):
|
||||||
|
print("TREARDOWN")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user