[DellEMC] S6100 - iTCO watchdog support and reboot cause determination changes (#9149)

Why I did it
To support iTCO watchdog using watchdog APIs.
How I did it
Implemented a new watchdog class WatchdogTCO for interfacing with iTCO watchdog.
Updated reboot cause determination logic.
How to verify it
Verified that the watchdog APIs' return values are as expected.
Logs: UT_logs.txt
This commit is contained in:
Arun Saravanan Balachandran 2021-12-07 11:22:20 +05:30 committed by Judy Joseph
parent 3f7e77e210
commit a56ecd711d
4 changed files with 186 additions and 7 deletions

View File

@ -1,3 +1,3 @@
CONSOLE_PORT=0x2f8
CONSOLE_DEV=1
ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="module_blacklist=gpio_ich nos-config-part=/dev/sda12"
ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="module_blacklist=gpio_ich,wdat_wdt acpi_no_watchdog=1 nos-config-part=/dev/sda12"

View File

@ -15,6 +15,7 @@ MAILBOX_POWERON_REASON=/sys/devices/platform/SMF.512/hwmon/*/mb_poweron_reason
NVRAM_DEVICE_FILE=/dev/nvram
RESET_REASON_FILE=/host/reboot-cause/platform/reset_reason
SMF_DIR=/sys/devices/platform/SMF.512/hwmon/
TCO_RESET_NVRAM_OFFSET=0x59
while [[ ! -d $SMF_DIR ]]
do
@ -27,6 +28,7 @@ do
done
SMF_RESET=$(cat $SMF_RESET_REASON)
TCO_WD_RESET=0
if [[ -d /host/reboot-cause/platform ]]; then
reboot_dir_found=true
@ -80,6 +82,18 @@ _get_smf_reset_register(){
echo "Fourth reset - $fourth_reset" >> $RESET_REASON_FILE
fi
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "RST value in NVRAM: $first_reset, $second_reset, $third_reset, $fourth_reset"
if [[ $BIOS_VERSION_MINOR -gt 8 ]]; then
# Retrieve TCO reset status
tco_nvram=$((16#$(nvram_rd_wr.py --get --offset $TCO_RESET_NVRAM_OFFSET | cut -d " " -f 2)))
TCO_WD_RESET=$(($tco_nvram & 1))
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "TCO status value in NVRAM: $TCO_WD_RESET"
# Clear TCO reset status in NVRAM
tco_nvram=$(printf "%x" $(($tco_nvram & 0xfe)))
nvram_rd_wr.py --set --val $tco_nvram --offset $TCO_RESET_NVRAM_OFFSET
fi
# Clearing NVRAM values to holding next reset values
nvram_rd_wr.py --set --val 0xee --offset 0x58
nvram_rd_wr.py --set --val 0xee --offset 0x5c
@ -183,7 +197,9 @@ update_mailbox_register(){
&& [[ $SMF_MSS_VERSION_MAJOR -ge 2 ]] && [[ $SMF_MSS_VERSION_MINOR -ge 7 ]] \
&& [[ $SMF_FPGA_VERSION_MAJOR -ge 1 ]] && [[ $SMF_FPGA_VERSION_MINOR -ge 4 ]]; then
if [[ $reason = "cc" ]]; then
if [[ $TCO_WD_RESET = 1 ]]; then
echo 0xdd > $MAILBOX_POWERON_REASON
elif [[ $reason = "cc" ]]; then
_is_software_reboot
elif [[ $SMF_RESET = "11" ]]; then
echo 0xee > $MAILBOX_POWERON_REASON
@ -206,6 +222,8 @@ update_mailbox_register(){
echo 0xee > $MAILBOX_POWERON_REASON
elif [[ $is_wd_reboot = 1 ]] && [[ $reason != "cc" ]]; then
echo 0xdd > $MAILBOX_POWERON_REASON
elif [[ $TCO_WD_RESET = 1 ]]; then
echo 0xdd > $MAILBOX_POWERON_REASON
elif [[ $reason = "cc" ]]; then
_is_software_reboot
else

View File

@ -17,7 +17,7 @@ try:
from sonic_platform.module import Module
from sonic_platform.thermal import Thermal
from sonic_platform.component import Component
from sonic_platform.watchdog import Watchdog
from sonic_platform.watchdog import Watchdog, WatchdogTCO
from sonic_platform.eeprom import Eeprom
import time
except ImportError as e:
@ -93,7 +93,13 @@ class Chassis(ChassisBase):
component = Component(i)
self._component_list.append(component)
self._watchdog = Watchdog()
bios_ver = self.get_component(0).get_firmware_version()
bios_minor_ver = bios_ver.split("-")[-1]
if bios_minor_ver.isdigit() and (int(bios_minor_ver) >= 9):
self._watchdog = WatchdogTCO()
else:
self._watchdog = Watchdog()
self._transceiver_presence = self._get_transceiver_presence()
def _get_reboot_reason_smf_register(self):

View File

@ -1,5 +1,3 @@
#!/usr/bin/env python
########################################################################
#
# DELLEMC S6100
@ -10,13 +8,30 @@
########################################################################
try:
import array
import ctypes
import fcntl
import glob
import os
import struct
import ctypes
from sonic_platform_base.watchdog_base import WatchdogBase
except ImportError as e:
raise ImportError(str(e) + "- required module not found")
# ioctl constants
IOC_WRITE = 0x40000000
IOC_READ = 0x80000000
IOC_SIZE_INT = 0x00040000
WATCHDOG_IOCTL_BASE = ord('W')
WDIOC_SETOPTIONS = IOC_READ | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 4
WDIOC_KEEPALIVE = IOC_READ | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 5
WDIOC_SETTIMEOUT = IOC_READ | IOC_WRITE | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 6
WDIOS_DISABLECARD = 0x0001
WDIOS_ENABLECARD = 0x0002
class _timespec(ctypes.Structure):
_fields_ = [
@ -24,6 +39,7 @@ class _timespec(ctypes.Structure):
('tv_nsec', ctypes.c_long)
]
class Watchdog(WatchdogBase):
"""
Abstract base class for interfacing with a hardware watchdog module
@ -226,3 +242,142 @@ class Watchdog(WatchdogBase):
return 0
class WatchdogTCO(WatchdogBase):
"""
Watchdog class for interfacing with iTCO watchdog
"""
IDENTITY = "iTCO_wdt"
def __init__(self):
self.dev = None
self.dev_name = None
wd_sysfs_path = "/sys/class/watchdog"
for dev_file in glob.glob("/dev/watchdog*"):
dev = os.path.basename(dev_file)
dev_identity = self._read_file("{}/{}/identity".format(wd_sysfs_path, dev))
if dev_identity == self.IDENTITY:
self.dev_name = dev
break
if self.dev_name is None:
raise RuntimeError("{} is not initialized".format(self.IDENTITY))
self.state_file = "{}/{}/state".format(wd_sysfs_path, self.dev_name)
self.timeout_file = "{}/{}/timeout".format(wd_sysfs_path, self.dev_name)
self.timeleft_file = "{}/{}/timeleft".format(wd_sysfs_path, self.dev_name)
def __del__(self):
if self.dev is not None:
os.close(self.dev)
def _ioctl(self, request, arg=0, mutate_flag=True):
"""
Perform ioctl on watchdog device
"""
self._open_wd_dev()
fcntl.ioctl(self.dev, request, arg, mutate_flag)
def _open_wd_dev(self):
"""
Open watchdog device file
"""
if self.dev is None:
wd_dev = "/dev/{}".format(self.dev_name)
self.dev = os.open(wd_dev, os.O_RDWR)
@staticmethod
def _read_file(file_path):
"""
Read a file
"""
try:
with open(file_path, "r") as fd:
read_str = fd.read()
except OSError:
return -1
return read_str.strip()
def arm(self, seconds):
"""
Arm the hardware watchdog with a timeout of <seconds> seconds.
If the watchdog is currently armed, calling this function will
simply reset the timer to the provided value. If the underlying
hardware does not support the value provided in <seconds>, this
method should arm the watchdog with the *next greater*
available value.
Returns:
An integer specifying the *actual* number of seconds the
watchdog was armed with. On failure returns -1.
"""
if seconds < 0 or seconds > 0x3ff:
return -1
if seconds < 4:
seconds = 4
try:
timeout = int(self._read_file(self.timeout_file))
if timeout != seconds:
buf = array.array('I', [seconds])
self._ioctl(WDIOC_SETTIMEOUT, buf)
timeout = int(buf[0])
if self.is_armed():
self._ioctl(WDIOC_KEEPALIVE)
else:
buf = array.array('h', [WDIOS_ENABLECARD])
self._ioctl(WDIOC_SETOPTIONS, buf, False)
except OSError:
return -1
else:
return timeout
def disarm(self):
"""
Disarm the hardware watchdog
Returns:
A boolean, True if watchdog is disarmed successfully, False
if not
"""
disarmed = True
if self.is_armed():
try:
buf = array.array('h', [WDIOS_DISABLECARD])
self._ioctl(WDIOC_SETOPTIONS, buf, False)
except OSError:
disarmed = False
return disarmed
def is_armed(self):
"""
Retrieves the armed state of the hardware watchdog.
Returns:
A boolean, True if watchdog is armed, False if not
"""
state = self._read_file(self.state_file)
return state == "active"
def get_remaining_time(self):
"""
If the watchdog is armed, retrieve the number of seconds
remaining on the watchdog timer
Returns:
An integer specifying the number of seconds remaining on
their watchdog timer. If the watchdog is not armed, returns
-1.
"""
timeleft = -1
if self.is_armed():
timeleft = int(self._read_file(self.timeleft_file))
return timeleft