[DellEMC] S6100 - iTCO watchdog support and reboot cause determination changes (#9149)
Why I did it To support iTCO watchdog using watchdog APIs. How I did it Implemented a new watchdog class WatchdogTCO for interfacing with iTCO watchdog. Updated reboot cause determination logic. How to verify it Verified that the watchdog APIs' return values are as expected. Logs: UT_logs.txt
This commit is contained in:
parent
ab932591c1
commit
6d8c509817
@ -1,3 +1,3 @@
|
|||||||
CONSOLE_PORT=0x2f8
|
CONSOLE_PORT=0x2f8
|
||||||
CONSOLE_DEV=1
|
CONSOLE_DEV=1
|
||||||
ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="module_blacklist=gpio_ich nos-config-part=/dev/sda12"
|
ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="module_blacklist=gpio_ich,wdat_wdt acpi_no_watchdog=1 nos-config-part=/dev/sda12"
|
||||||
|
@ -15,6 +15,7 @@ MAILBOX_POWERON_REASON=/sys/devices/platform/SMF.512/hwmon/*/mb_poweron_reason
|
|||||||
NVRAM_DEVICE_FILE=/dev/nvram
|
NVRAM_DEVICE_FILE=/dev/nvram
|
||||||
RESET_REASON_FILE=/host/reboot-cause/platform/reset_reason
|
RESET_REASON_FILE=/host/reboot-cause/platform/reset_reason
|
||||||
SMF_DIR=/sys/devices/platform/SMF.512/hwmon/
|
SMF_DIR=/sys/devices/platform/SMF.512/hwmon/
|
||||||
|
TCO_RESET_NVRAM_OFFSET=0x59
|
||||||
|
|
||||||
while [[ ! -d $SMF_DIR ]]
|
while [[ ! -d $SMF_DIR ]]
|
||||||
do
|
do
|
||||||
@ -27,6 +28,7 @@ do
|
|||||||
done
|
done
|
||||||
|
|
||||||
SMF_RESET=$(cat $SMF_RESET_REASON)
|
SMF_RESET=$(cat $SMF_RESET_REASON)
|
||||||
|
TCO_WD_RESET=0
|
||||||
|
|
||||||
if [[ -d /host/reboot-cause/platform ]]; then
|
if [[ -d /host/reboot-cause/platform ]]; then
|
||||||
reboot_dir_found=true
|
reboot_dir_found=true
|
||||||
@ -80,6 +82,18 @@ _get_smf_reset_register(){
|
|||||||
echo "Fourth reset - $fourth_reset" >> $RESET_REASON_FILE
|
echo "Fourth reset - $fourth_reset" >> $RESET_REASON_FILE
|
||||||
fi
|
fi
|
||||||
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "RST value in NVRAM: $first_reset, $second_reset, $third_reset, $fourth_reset"
|
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "RST value in NVRAM: $first_reset, $second_reset, $third_reset, $fourth_reset"
|
||||||
|
|
||||||
|
if [[ $BIOS_VERSION_MINOR -gt 8 ]]; then
|
||||||
|
# Retrieve TCO reset status
|
||||||
|
tco_nvram=$((16#$(nvram_rd_wr.py --get --offset $TCO_RESET_NVRAM_OFFSET | cut -d " " -f 2)))
|
||||||
|
TCO_WD_RESET=$(($tco_nvram & 1))
|
||||||
|
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "TCO status value in NVRAM: $TCO_WD_RESET"
|
||||||
|
|
||||||
|
# Clear TCO reset status in NVRAM
|
||||||
|
tco_nvram=$(printf "%x" $(($tco_nvram & 0xfe)))
|
||||||
|
nvram_rd_wr.py --set --val $tco_nvram --offset $TCO_RESET_NVRAM_OFFSET
|
||||||
|
fi
|
||||||
|
|
||||||
# Clearing NVRAM values to holding next reset values
|
# Clearing NVRAM values to holding next reset values
|
||||||
nvram_rd_wr.py --set --val 0xee --offset 0x58
|
nvram_rd_wr.py --set --val 0xee --offset 0x58
|
||||||
nvram_rd_wr.py --set --val 0xee --offset 0x5c
|
nvram_rd_wr.py --set --val 0xee --offset 0x5c
|
||||||
@ -183,7 +197,9 @@ update_mailbox_register(){
|
|||||||
&& [[ $SMF_MSS_VERSION_MAJOR -ge 2 ]] && [[ $SMF_MSS_VERSION_MINOR -ge 7 ]] \
|
&& [[ $SMF_MSS_VERSION_MAJOR -ge 2 ]] && [[ $SMF_MSS_VERSION_MINOR -ge 7 ]] \
|
||||||
&& [[ $SMF_FPGA_VERSION_MAJOR -ge 1 ]] && [[ $SMF_FPGA_VERSION_MINOR -ge 4 ]]; then
|
&& [[ $SMF_FPGA_VERSION_MAJOR -ge 1 ]] && [[ $SMF_FPGA_VERSION_MINOR -ge 4 ]]; then
|
||||||
|
|
||||||
if [[ $reason = "cc" ]]; then
|
if [[ $TCO_WD_RESET = 1 ]]; then
|
||||||
|
echo 0xdd > $MAILBOX_POWERON_REASON
|
||||||
|
elif [[ $reason = "cc" ]]; then
|
||||||
_is_software_reboot
|
_is_software_reboot
|
||||||
elif [[ $SMF_RESET = "11" ]]; then
|
elif [[ $SMF_RESET = "11" ]]; then
|
||||||
echo 0xee > $MAILBOX_POWERON_REASON
|
echo 0xee > $MAILBOX_POWERON_REASON
|
||||||
@ -206,6 +222,8 @@ update_mailbox_register(){
|
|||||||
echo 0xee > $MAILBOX_POWERON_REASON
|
echo 0xee > $MAILBOX_POWERON_REASON
|
||||||
elif [[ $is_wd_reboot = 1 ]] && [[ $reason != "cc" ]]; then
|
elif [[ $is_wd_reboot = 1 ]] && [[ $reason != "cc" ]]; then
|
||||||
echo 0xdd > $MAILBOX_POWERON_REASON
|
echo 0xdd > $MAILBOX_POWERON_REASON
|
||||||
|
elif [[ $TCO_WD_RESET = 1 ]]; then
|
||||||
|
echo 0xdd > $MAILBOX_POWERON_REASON
|
||||||
elif [[ $reason = "cc" ]]; then
|
elif [[ $reason = "cc" ]]; then
|
||||||
_is_software_reboot
|
_is_software_reboot
|
||||||
else
|
else
|
||||||
|
@ -17,7 +17,7 @@ try:
|
|||||||
from sonic_platform.module import Module
|
from sonic_platform.module import Module
|
||||||
from sonic_platform.thermal import Thermal
|
from sonic_platform.thermal import Thermal
|
||||||
from sonic_platform.component import Component
|
from sonic_platform.component import Component
|
||||||
from sonic_platform.watchdog import Watchdog
|
from sonic_platform.watchdog import Watchdog, WatchdogTCO
|
||||||
from sonic_platform.eeprom import Eeprom
|
from sonic_platform.eeprom import Eeprom
|
||||||
import time
|
import time
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
@ -93,7 +93,13 @@ class Chassis(ChassisBase):
|
|||||||
component = Component(i)
|
component = Component(i)
|
||||||
self._component_list.append(component)
|
self._component_list.append(component)
|
||||||
|
|
||||||
self._watchdog = Watchdog()
|
bios_ver = self.get_component(0).get_firmware_version()
|
||||||
|
bios_minor_ver = bios_ver.split("-")[-1]
|
||||||
|
if bios_minor_ver.isdigit() and (int(bios_minor_ver) >= 9):
|
||||||
|
self._watchdog = WatchdogTCO()
|
||||||
|
else:
|
||||||
|
self._watchdog = Watchdog()
|
||||||
|
|
||||||
self._transceiver_presence = self._get_transceiver_presence()
|
self._transceiver_presence = self._get_transceiver_presence()
|
||||||
|
|
||||||
def _get_reboot_reason_smf_register(self):
|
def _get_reboot_reason_smf_register(self):
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
#
|
#
|
||||||
# DELLEMC S6100
|
# DELLEMC S6100
|
||||||
@ -10,13 +8,30 @@
|
|||||||
########################################################################
|
########################################################################
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
import array
|
||||||
|
import ctypes
|
||||||
|
import fcntl
|
||||||
|
import glob
|
||||||
import os
|
import os
|
||||||
import struct
|
import struct
|
||||||
import ctypes
|
|
||||||
from sonic_platform_base.watchdog_base import WatchdogBase
|
from sonic_platform_base.watchdog_base import WatchdogBase
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError(str(e) + "- required module not found")
|
raise ImportError(str(e) + "- required module not found")
|
||||||
|
|
||||||
|
# ioctl constants
|
||||||
|
IOC_WRITE = 0x40000000
|
||||||
|
IOC_READ = 0x80000000
|
||||||
|
IOC_SIZE_INT = 0x00040000
|
||||||
|
|
||||||
|
WATCHDOG_IOCTL_BASE = ord('W')
|
||||||
|
|
||||||
|
WDIOC_SETOPTIONS = IOC_READ | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 4
|
||||||
|
WDIOC_KEEPALIVE = IOC_READ | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 5
|
||||||
|
WDIOC_SETTIMEOUT = IOC_READ | IOC_WRITE | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 6
|
||||||
|
|
||||||
|
WDIOS_DISABLECARD = 0x0001
|
||||||
|
WDIOS_ENABLECARD = 0x0002
|
||||||
|
|
||||||
|
|
||||||
class _timespec(ctypes.Structure):
|
class _timespec(ctypes.Structure):
|
||||||
_fields_ = [
|
_fields_ = [
|
||||||
@ -24,6 +39,7 @@ class _timespec(ctypes.Structure):
|
|||||||
('tv_nsec', ctypes.c_long)
|
('tv_nsec', ctypes.c_long)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class Watchdog(WatchdogBase):
|
class Watchdog(WatchdogBase):
|
||||||
"""
|
"""
|
||||||
Abstract base class for interfacing with a hardware watchdog module
|
Abstract base class for interfacing with a hardware watchdog module
|
||||||
@ -226,3 +242,142 @@ class Watchdog(WatchdogBase):
|
|||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class WatchdogTCO(WatchdogBase):
|
||||||
|
"""
|
||||||
|
Watchdog class for interfacing with iTCO watchdog
|
||||||
|
"""
|
||||||
|
|
||||||
|
IDENTITY = "iTCO_wdt"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
|
||||||
|
self.dev = None
|
||||||
|
self.dev_name = None
|
||||||
|
wd_sysfs_path = "/sys/class/watchdog"
|
||||||
|
|
||||||
|
for dev_file in glob.glob("/dev/watchdog*"):
|
||||||
|
dev = os.path.basename(dev_file)
|
||||||
|
dev_identity = self._read_file("{}/{}/identity".format(wd_sysfs_path, dev))
|
||||||
|
if dev_identity == self.IDENTITY:
|
||||||
|
self.dev_name = dev
|
||||||
|
break
|
||||||
|
|
||||||
|
if self.dev_name is None:
|
||||||
|
raise RuntimeError("{} is not initialized".format(self.IDENTITY))
|
||||||
|
|
||||||
|
self.state_file = "{}/{}/state".format(wd_sysfs_path, self.dev_name)
|
||||||
|
self.timeout_file = "{}/{}/timeout".format(wd_sysfs_path, self.dev_name)
|
||||||
|
self.timeleft_file = "{}/{}/timeleft".format(wd_sysfs_path, self.dev_name)
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
if self.dev is not None:
|
||||||
|
os.close(self.dev)
|
||||||
|
|
||||||
|
def _ioctl(self, request, arg=0, mutate_flag=True):
|
||||||
|
"""
|
||||||
|
Perform ioctl on watchdog device
|
||||||
|
"""
|
||||||
|
self._open_wd_dev()
|
||||||
|
fcntl.ioctl(self.dev, request, arg, mutate_flag)
|
||||||
|
|
||||||
|
def _open_wd_dev(self):
|
||||||
|
"""
|
||||||
|
Open watchdog device file
|
||||||
|
"""
|
||||||
|
if self.dev is None:
|
||||||
|
wd_dev = "/dev/{}".format(self.dev_name)
|
||||||
|
self.dev = os.open(wd_dev, os.O_RDWR)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _read_file(file_path):
|
||||||
|
"""
|
||||||
|
Read a file
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(file_path, "r") as fd:
|
||||||
|
read_str = fd.read()
|
||||||
|
except OSError:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
return read_str.strip()
|
||||||
|
|
||||||
|
def arm(self, seconds):
|
||||||
|
"""
|
||||||
|
Arm the hardware watchdog with a timeout of <seconds> seconds.
|
||||||
|
If the watchdog is currently armed, calling this function will
|
||||||
|
simply reset the timer to the provided value. If the underlying
|
||||||
|
hardware does not support the value provided in <seconds>, this
|
||||||
|
method should arm the watchdog with the *next greater*
|
||||||
|
available value.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An integer specifying the *actual* number of seconds the
|
||||||
|
watchdog was armed with. On failure returns -1.
|
||||||
|
"""
|
||||||
|
if seconds < 0 or seconds > 0x3ff:
|
||||||
|
return -1
|
||||||
|
if seconds < 4:
|
||||||
|
seconds = 4
|
||||||
|
|
||||||
|
try:
|
||||||
|
timeout = int(self._read_file(self.timeout_file))
|
||||||
|
if timeout != seconds:
|
||||||
|
buf = array.array('I', [seconds])
|
||||||
|
self._ioctl(WDIOC_SETTIMEOUT, buf)
|
||||||
|
timeout = int(buf[0])
|
||||||
|
|
||||||
|
if self.is_armed():
|
||||||
|
self._ioctl(WDIOC_KEEPALIVE)
|
||||||
|
else:
|
||||||
|
buf = array.array('h', [WDIOS_ENABLECARD])
|
||||||
|
self._ioctl(WDIOC_SETOPTIONS, buf, False)
|
||||||
|
except OSError:
|
||||||
|
return -1
|
||||||
|
else:
|
||||||
|
return timeout
|
||||||
|
|
||||||
|
def disarm(self):
|
||||||
|
"""
|
||||||
|
Disarm the hardware watchdog
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A boolean, True if watchdog is disarmed successfully, False
|
||||||
|
if not
|
||||||
|
"""
|
||||||
|
disarmed = True
|
||||||
|
if self.is_armed():
|
||||||
|
try:
|
||||||
|
buf = array.array('h', [WDIOS_DISABLECARD])
|
||||||
|
self._ioctl(WDIOC_SETOPTIONS, buf, False)
|
||||||
|
except OSError:
|
||||||
|
disarmed = False
|
||||||
|
|
||||||
|
return disarmed
|
||||||
|
|
||||||
|
def is_armed(self):
|
||||||
|
"""
|
||||||
|
Retrieves the armed state of the hardware watchdog.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A boolean, True if watchdog is armed, False if not
|
||||||
|
"""
|
||||||
|
state = self._read_file(self.state_file)
|
||||||
|
return state == "active"
|
||||||
|
|
||||||
|
def get_remaining_time(self):
|
||||||
|
"""
|
||||||
|
If the watchdog is armed, retrieve the number of seconds
|
||||||
|
remaining on the watchdog timer
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An integer specifying the number of seconds remaining on
|
||||||
|
their watchdog timer. If the watchdog is not armed, returns
|
||||||
|
-1.
|
||||||
|
|
||||||
|
"""
|
||||||
|
timeleft = -1
|
||||||
|
if self.is_armed():
|
||||||
|
timeleft = int(self._read_file(self.timeleft_file))
|
||||||
|
|
||||||
|
return timeleft
|
||||||
|
Reference in New Issue
Block a user