From 5b31996f7b6cdcc3842056a62076bb805ec82005 Mon Sep 17 00:00:00 2001 From: Sujin Kang Date: Fri, 20 Nov 2020 20:08:18 -0800 Subject: [PATCH] [reboot-history] Add reboot history to state db (#5933) - Why I did it Add reboot history to State db so that can be used telemetry service - How I did it Split the process-reboot-cause service to determine-reboot-cause and process-reboot-cause determine-reboot-cause to determine the reboot cause process-reboot-cause to parse the reboot cause files and put the reboot history to state db Moved to sonic-host-service* packages - How to verify it Performed unit test and tested on DUT --- .../build_templates/sonic_debian_extension.j2 | 10 -- src/sonic-host-services-data/debian/rules | 3 + ...rvices-data.determine-reboot-cause.service | 11 ++ ...services-data.process-reboot-cause.service | 0 ...t-services-data.process-reboot-cause.timer | 0 .../scripts/determine-reboot-cause | 143 ++++++++++++------ .../scripts/process-reboot-cause | 100 ++++++++++++ src/sonic-host-services/setup.py | 2 + .../tests/determine-reboot-cause_test.py | 117 ++++++++++++++ 9 files changed, 329 insertions(+), 57 deletions(-) create mode 100644 src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service rename files/image_config/process-reboot-cause/process-reboot-cause.service => src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.service (100%) rename files/build_templates/process-reboot-cause.timer => src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.timer (100%) rename files/image_config/process-reboot-cause/process-reboot-cause => src/sonic-host-services/scripts/determine-reboot-cause (56%) create mode 100644 src/sonic-host-services/scripts/process-reboot-cause create mode 100644 src/sonic-host-services/tests/determine-reboot-cause_test.py diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 7afc099e3a..00c0d9bbc5 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -428,16 +428,6 @@ sudo cp $IMAGE_CONFIGS/pcie-check/pcie-check.service $FILESYSTEM_ROOT_USR_LIB_SY echo "pcie-check.service" | sudo tee -a $GENERATED_SERVICE_FILE sudo cp $IMAGE_CONFIGS/pcie-check/pcie-check.sh $FILESYSTEM_ROOT/usr/bin/ -# Copy systemd timer configuration -# It implements delayed start of services -sudo cp $BUILD_TEMPLATES/process-reboot-cause.timer $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM -sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable process-reboot-cause.timer - -# Copy process-reboot-cause service files -sudo cp $IMAGE_CONFIGS/process-reboot-cause/process-reboot-cause.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM -echo "process-reboot-cause.service" | sudo tee -a $GENERATED_SERVICE_FILE -sudo cp $IMAGE_CONFIGS/process-reboot-cause/process-reboot-cause $FILESYSTEM_ROOT/usr/bin/ - ## Install package without starting service ## ref: https://wiki.debian.org/chroot sudo tee -a $FILESYSTEM_ROOT/usr/sbin/policy-rc.d > /dev/null < 10: + for i in range(len(TIME_SORTED_FULL_REBOOT_FILE_LIST)): + if i >= 10: + x = TIME_SORTED_FULL_REBOOT_FILE_LIST[i] + os.remove(x) + + +def main(): + # Configure logger to log all messages INFO level and higher + sonic_logger.set_min_log_priority_info() + + sonic_logger.log_info("Starting up...") + + if not os.geteuid() == 0: + sonic_logger.log_error("User {} does not have permission to execute".format(pwd.getpwuid(os.getuid()).pw_name)) + sys.exit("This utility must be run as root") + + # Set a default previous reboot cause + previous_reboot_cause = REBOOT_CAUSE_UNKNOWN + + # Read the most recent reboot cause file and log data to syslog + if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE): + with open(PREVIOUS_REBOOT_CAUSE_FILE, "r") as last_cause_file: + data = json.load(last_cause_file) + if data['user']: + previous_reboot_cause = USER_ISSUED_REBOOT_CAUSE_REGEX.format(data['cause'], data['user'], data['time']) + else: + previous_reboot_cause = "{}".format(data['cause']) + + # Log the last reboot cause to the syslog + sonic_logger.log_info("Previous reboot cause: {}".format(previous_reboot_cause)) + + if os.path.exists(REBOOT_CAUSE_HISTORY_DIR): + # Read the previous reboot cause from saved reboot-cause files and save the previous reboot cause upto 10 entry to the state db + read_reboot_cause_files_and_save_state_db() + + +if __name__ == "__main__": + main() diff --git a/src/sonic-host-services/setup.py b/src/sonic-host-services/setup.py index d0f7bd0556..2da0561b9c 100644 --- a/src/sonic-host-services/setup.py +++ b/src/sonic-host-services/setup.py @@ -14,6 +14,8 @@ setup( 'scripts/caclmgrd', 'scripts/hostcfgd', 'scripts/procdockerstatsd', + 'scripts/determine-reboot-cause', + 'scripts/process-reboot-cause', ], install_requires = [ 'Jinja2>=2.10', diff --git a/src/sonic-host-services/tests/determine-reboot-cause_test.py b/src/sonic-host-services/tests/determine-reboot-cause_test.py new file mode 100644 index 0000000000..9cef2aa30d --- /dev/null +++ b/src/sonic-host-services/tests/determine-reboot-cause_test.py @@ -0,0 +1,117 @@ +import imp +import sys +import os +import pytest + +import swsssdk + +# TODO: Remove this if/else block once we no longer support Python 2 +if sys.version_info.major == 3: + from unittest import mock +else: + # Expect the 'mock' package for python 2 + # https://pypi.python.org/pypi/mock + import mock + +# TODO: Remove this if/else block once we no longer support Python 2 +if sys.version_info.major == 3: + BUILTINS = "builtins" +else: + BUILTINS = "__builtin__" + +from .mock_connector import MockConnector + +swsssdk.SonicV2Connector = MockConnector + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +scripts_path = os.path.join(modules_path, "scripts") +sys.path.insert(0, modules_path) + +PROC_CMDLINE_CONTENTS = """\ +BOOT_IMAGE=/image-20191130.52/boot/vmlinuz-4.9.0-11-2-amd64 root=/dev/sda4 rw console=tty0 console=ttyS1,9600n8 quiet net.ifnames=0 biosdevname=0 loop=image-20191130.52/fs.squashfs loopfstype=squashfs apparmor=1 security=apparmor varlog_size=4096 usbcore.autosuspend=-1 module_blacklist=gpio_ich SONIC_BOOT_TYPE=warm""" + +EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE = "warm" + +PROC_CMDLINE_CONTENTS = """\ +BOOT_IMAGE=/image-20191130.52/boot/vmlinuz-4.9.0-11-2-amd64 root=/dev/sda4 rw console=tty0 console=ttyS1,9600n8 quiet net.ifnames=0 biosdevname=0 loop=image-20191130.52/fs.squashfs loopfstype=squashfs apparmor=1 security=apparmor varlog_size=4096 usbcore.autosuspend=-1 module_blacklist=gpio_ich SONIC_BOOT_TYPE=warm""" + +REBOOT_CAUSE_CONTENTS = """\ +User issued 'warm-reboot' command [User: admin, Time: Mon Nov 2 22:37:45 UTC 2020]""" + +GET_SONIC_VERSION_INFO = {'commit_id': 'e59ec8291', 'build_date': 'Mon Nov 2 06:00:14 UTC 2020', 'build_number': 75, 'kernel_version': '4.9.0-11-2-amd64', 'debian_version': '9.13', 'built_by': 'sonicbld@jenkins-slave-phx-2', 'asic_type': 'mellanox', 'build_version': '20191130.52'} + +REBOOT_CAUSE_WATCHDOG = "Watchdog" +GEN_TIME_WATCHDOG = "2020_10_22_03_15_08" +REBOOT_CAUSE_USER = "User issued 'reboot' command [User: admin, Time: Thu Oct 22 03:11:08 UTC 2020]" +GEN_TIME_USER = "2020_10_22_03_14_07" + +EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE = "warm-reboot" +EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER = "User issued 'warm-reboot' command [User: admin, Time: Mon Nov 2 22:37:45 UTC 2020]" +EXPECTED_FIND_FIRSTBOOT_VERSION = " (First boot of SONiC version 20191130.52)" +EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_FIRSTBOOT = "Unknown (First boot of SONiC version 20191130.52)" +EXPECTED_HARDWARE_REBOOT_CAUSE = {"warm-reboot", ""} + +EXPECTED_WATCHDOG_REBOOT_CAUSE_DICT = {'comment': '', 'gen_time': '2020_10_22_03_15_08', 'cause': 'Watchdog', 'user': 'N/A', 'time': 'N/A'} +EXPECTED_USER_REBOOT_CAUSE_DICT = {'comment': '', 'gen_time': '2020_10_22_03_14_07', 'cause': 'reboot', 'user': 'admin', 'time': 'Thu Oct 22 03:11:08 UTC 2020'} + +imp.load_source('determine_reboot_cause', scripts_path + '/determine-reboot-cause') +from determine_reboot_cause import * + +class TestDetermineRebootCause(object): + @classmethod + def setup_class(cls): + print("SETUP") + + def test_parse_warmfast_reboot_from_proc_cmdline(self): + with mock.patch("os.path.isfile") as mock_isfile: + mock_isfile.return_value = True + open_mocked = mock.mock_open(read_data=PROC_CMDLINE_CONTENTS) + with mock.patch("{}.open".format(BUILTINS), open_mocked): + result = parse_warmfast_reboot_from_proc_cmdline() + assert result == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE + open_mocked.assert_called_once_with("/proc/cmdline") + + def test_find_software_reboot_cause_user(self): + with mock.patch("os.path.isfile") as mock_isfile: + mock_isfile.return_value = True + open_mocked = mock.mock_open(read_data=REBOOT_CAUSE_CONTENTS) + with mock.patch("{}.open".format(BUILTINS), open_mocked): + result = find_software_reboot_cause_from_reboot_cause_file() + assert result == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER + open_mocked.assert_called_once_with("/host/reboot-cause/reboot-cause.txt") + + def test_find_software_reboot_cause_first_boot(self): + with mock.patch("sonic_py_common.device_info.get_sonic_version_info", return_value=GET_SONIC_VERSION_INFO): + result = find_first_boot_version() + assert result == EXPECTED_FIND_FIRSTBOOT_VERSION + + def test_find_software_reboot_cause(self): + with mock.patch("determine_reboot_cause.find_software_reboot_cause_from_reboot_cause_file", return_value="Unknown"): + with mock.patch("os.path.isfile") as mock_isfile: + mock_isfile.return_value = False + result = find_software_reboot_cause() + assert result == "Unknown" + + def test_find_proc_cmdline_reboot_cause(self): + with mock.patch("determine_reboot_cause.parse_warmfast_reboot_from_proc_cmdline", return_value="fast-reboot"): + result = find_proc_cmdline_reboot_cause() + assert result == "fast-reboot" + + def test_find_hardware_reboot_cause(self): + with mock.patch("determine_reboot_cause.get_reboot_cause_from_platform", return_value=("Powerloss", None)): + result = find_hardware_reboot_cause() + assert result == ("Powerloss", None) + + def test_get_reboot_cause_dict_watchdog(self): + reboot_cause_dict = get_reboot_cause_dict(REBOOT_CAUSE_WATCHDOG, "", GEN_TIME_WATCHDOG) + assert reboot_cause_dict == EXPECTED_WATCHDOG_REBOOT_CAUSE_DICT + + def test_get_reboot_cause_dict_user(self): + reboot_cause_dict = get_reboot_cause_dict(REBOOT_CAUSE_USER, "", GEN_TIME_USER) + assert reboot_cause_dict == EXPECTED_USER_REBOOT_CAUSE_DICT + + @classmethod + def teardown_class(cls): + print("TREARDOWN") +