[DellEMC] S6100 Last Reboot Reason Thermal Support (#3767)

This commit is contained in:
Santhosh Kumar T 2020-02-18 13:32:33 +05:30 committed by rlhui
parent 18e51088a0
commit 1e3df476e5
11 changed files with 338 additions and 45 deletions

View File

@ -240,6 +240,12 @@ if [ -f $FIRST_BOOT_FILE ]; then
# Notify firstboot to Platform, to use it for reboot-cause
touch /tmp/notify_firstboot_to_platform
# Create /host/reboot-cause/platform/ directory
# can be used to track last reboot reason by some platforms
if [ ! -d /host/reboot-cause/platform ]; then
mkdir -p /host/reboot-cause/platform
fi
if [ -d /host/image-$SONIC_VERSION/platform/$platform ]; then
dpkg -i /host/image-$SONIC_VERSION/platform/$platform/*.deb
fi

View File

@ -0,0 +1,97 @@
#!/usr/bin/python
#Script to read/write the nvram
import sys
import os
import getopt
import struct
nvram_resource='/dev/nvram'
def usage():
''' This is the Usage Method '''
print 'Utility for NVRAM read/write'
print '\t\t nvram_rd_wr.py --get --offset <offset>'
print '\t\t nvram_rd_wr.py --set --val <val> --offset <offset>'
sys.exit(1)
def nvram_reg_read(nvram_resource,offset):
fd=os.open(nvram_resource, os.O_RDONLY)
if(fd<0):
print 'file open failed %s"%nvram_resource'
return
if(os.lseek(fd, offset, os.SEEK_SET) != offset):
print 'lseek failed on %s'%nvram_resource
return
buf=os.read(fd,1)
reg_val1=ord(buf)
print 'value %x'%reg_val1
os.close(fd)
def nvram_reg_write(nvram_resource,offset,val):
fd=os.open(nvram_resource,os.O_RDWR)
if(fd<0):
print 'file open failed %s"%nvram_resource'
return
if(os.lseek(fd, offset, os.SEEK_SET) != offset):
print 'lseek failed on %s'%nvram_resource
return
ret=os.write(fd,struct.pack('B',val))
if(ret != 1):
print 'write failed %d'%ret
return
os.close(fd)
def main(argv):
''' The main function will read the user input from the
command line argument and process the request '''
opts = ''
val = ''
choice = ''
resouce = ''
offset = ''
try:
opts, args = getopt.getopt(argv, "hgs:" , \
["val=","offset=","help", "get", "set"])
except getopt.GetoptError:
usage()
if not os.path.exists(nvram_resource):
print 'NVRAM is not initialized'
sys.exit(1)
for opt,arg in opts:
if opt in ('-h','--help'):
choice = 'help'
elif opt in ('-g', '--get'):
choice = 'get'
elif opt in ('-s', '--set'):
choice = 'set'
elif opt == '--offset':
offset = int(arg,16) - 0xE
elif opt == '--val':
val = int(arg,16)
if choice == 'get' and offset != '':
nvram_reg_read(nvram_resource,offset)
elif choice == 'set' and offset != '' and val != '':
nvram_reg_write(nvram_resource,offset,val)
else:
usage()
#Calling the main method
if __name__ == "__main__":
main(sys.argv[1:])

View File

@ -2,7 +2,11 @@ s6100/scripts/iom_power_*.sh usr/local/bin
s6100/scripts/s6100_platform.sh usr/local/bin
common/dell_i2c_utils.sh usr/local/bin
common/io_rd_wr.py usr/local/bin
common/nvram_rd_wr.py usr/local/bin
s6100/scripts/platform_reboot_override usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/fast-reboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/track_reboot_reason.sh usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/warm-reboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/override.conf /etc/systemd/system/systemd-reboot.service.d
common/dell_lpc_mon.sh usr/local/bin
s6100/scripts/platform_sensors.py usr/local/bin
@ -12,5 +16,6 @@ s6100/scripts/platform_watchdog_disable.sh usr/local/bin
s6100/scripts/sensors usr/bin
s6100/systemd/platform-modules-s6100.service etc/systemd/system
s6100/systemd/s6100-lpc-monitor.service etc/systemd/system
s6100/systemd/s6100-reboot-cause.service etc/systemd/system
tools/flashrom/flashrom usr/local/bin/
common/fw-updater usr/local/bin

View File

@ -0,0 +1,6 @@
#!/bin/bash
if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xcc > mb_poweron_reason
fi

View File

@ -1,11 +1,17 @@
#!/usr/bin/python
import sys
import os
import subprocess
import struct
PORT_RES = '/dev/port'
def log_software_reboot():
# Run plugin script which will track the cli triggered reboot, fastboot, warmboot
res = subprocess.check_output(['/usr/share/sonic/device/x86_64-dell_s6100_c2538-r0/fast-reboot_plugin'])
return
def portio_reg_write(resource, offset, val):
fd = os.open(resource, os.O_RDWR)
if(fd < 0):
@ -21,5 +27,6 @@ def portio_reg_write(resource, offset, val):
os.close(fd)
if __name__ == "__main__":
log_software_reboot()
portio_reg_write(PORT_RES, 0xcf9, 0xe)

View File

@ -237,24 +237,6 @@ reset_muxes() {
io_rd_wr.py --set --val 0xff --offset 0x20b
}
track_reboot_reason() {
if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
rv=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat mb_poweron_reason)
reason=$(echo $rv | cut -d 'x' -f2)
if [ $reason == "ff" ]; then
cd /sys/devices/platform/SMF.512/hwmon/*
if [[ -e /tmp/notify_firstboot_to_platform ]]; then
echo 0x01 > mb_poweron_reason
else
echo 0xbb > mb_poweron_reason
fi
elif [ $reason == "bb" ] || [ $reason == "1" ]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xaa > mb_poweron_reason
fi
fi
}
install_python_api_package() {
device="/usr/share/sonic/device"
platform=$(/usr/local/bin/sonic-cfggen -H -v DEVICE_METADATA.localhost.platform)
@ -277,7 +259,8 @@ if [[ "$1" == "init" ]]; then
modprobe dell_ich
modprobe dell_s6100_iom_cpld
modprobe dell_s6100_lpc
track_reboot_reason
modprobe nvram
systemctl start s6100-reboot-cause.service
# Disable Watchdog Timer
if [[ -e /usr/local/bin/platform_watchdog_disable.sh ]]; then
@ -313,6 +296,7 @@ elif [[ "$1" == "deinit" ]]; then
modprobe -r i2c-mux-pca954x
modprobe -r i2c-dev
modprobe -r dell_ich
modprobe -r nvram
remove_python_api_package
else
echo "s6100_platform : Invalid option !"

View File

@ -0,0 +1,168 @@
#!/bin/bash
reboot_dir_found=false
reboot_file_found=false
smf_dir_missing=0
nvram_missing=0
REBOOT_CAUSE_FILE=/host/reboot-cause/reboot-cause.txt
REBOOT_REASON_FILE=/host/reboot-cause/platform/reboot_reason
BIOS_VERSION_FILE=/host/reboot-cause/platform/bios_minor_version
SMF_POWERON_REASON=/sys/devices/platform/SMF.512/hwmon/*/smf_poweron_reason
SMF_RESET_REASON=/sys/devices/platform/SMF.512/hwmon/*/smf_reset_reason
MAILBOX_POWERON_REASON=/sys/devices/platform/SMF.512/hwmon/*/mb_poweron_reason
NVRAM_DEVICE_FILE=/dev/nvram
RESET_REASON_FILE=/host/reboot-cause/platform/reset_reason
SMF_DIR=/sys/devices/platform/SMF.512/hwmon/
while [[ ! -d $SMF_DIR ]]
do
sleep 0.5
let smf_dir_missing=$smf_dir_missing+1
if [[ "$smf_dir_missing" = "5" ]]; then
echo "SMF is not initialized"
smf_dir_missing=0
fi
done
SMF_RESET=$(cat $SMF_RESET_REASON)
if [[ -d /host/reboot-cause/platform ]]; then
reboot_dir_found=true
if [[ -f $REBOOT_REASON_FILE ]]; then
reboot_file_found=true
fi
fi
SMF_BIOS_REG=$(io_rd_wr.py --get --offset 0x203 | cut -d " " -f 3)
SMF_BIOS_REG=$((16#$SMF_BIOS_REG))
bios_secondary_boot=$(($SMF_BIOS_REG & 1))
_get_smf_reset_register(){
BIOS_VERSION=$(/usr/sbin/dmidecode -s system-version)
BIOS_VERSION_MINOR=$(echo $BIOS_VERSION | cut -d'-' -f 2)
if [[ $BIOS_VERSION_MINOR -gt 7 ]]; then
echo $BIOS_VERSION > $BIOS_VERSION_FILE
elif [[ "$bios_secondary_boot" = "0" ]]; then
# For Primary BIOS with older version
if [[ -e $BIOS_VERSION_FILE ]]; then
rm $BIOS_VERSION_FILE
fi
fi
if [[ -e $BIOS_VERSION_FILE ]]; then
while [[ ! -e $NVRAM_DEVICE_FILE ]]
do
sleep 1
let nvram_missing=$nvram_missing+1
if [[ "$nvram_missing" = "5" ]]; then
echo "NVRAM is not initialized"
nvram_missing=0
fi
done
first_reset=$(nvram_rd_wr.py --get --offset 0x5c | cut -d " " -f 2)
second_reset=$(nvram_rd_wr.py --get --offset 0x5d | cut -d " " -f 2)
third_reset=$(nvram_rd_wr.py --get --offset 0x5e | cut -d " " -f 2)
fourth_reset=$(nvram_rd_wr.py --get --offset 0x5f | cut -d " " -f 2)
if [[ "$first_reset" != "ee" ]]; then
SMF_RESET=$first_reset
fi
# Saving NVRAM values for future debugging
if [[ $reboot_dir_found = true ]]; then
echo "First reset - $first_reset" > $RESET_REASON_FILE
echo "Second reset - $second_reset" >> $RESET_REASON_FILE
echo "Third reset - $third_reset" >> $RESET_REASON_FILE
echo "Fourth reset - $fourth_reset" >> $RESET_REASON_FILE
fi
# Clearing NVRAM values to holding next reset values
nvram_rd_wr.py --set --val 0xee --offset 0x58
nvram_rd_wr.py --set --val 0xee --offset 0x5c
nvram_rd_wr.py --set --val 0xee --offset 0x5d
nvram_rd_wr.py --set --val 0xee --offset 0x5e
nvram_rd_wr.py --set --val 0xee --offset 0x5f
fi
}
_is_thermal_reset() {
prev_thermal=$(cat $REBOOT_REASON_FILE)
curr_poweron_reason=$(cat $SMF_POWERON_REASON)
if [[ $curr_poweron_reason = "11" ]]; then
echo 0
return
fi
if [[ $prev_thermal = $curr_poweron_reason ]]; then
echo 2
return
else
echo "$curr_poweron_reason" > $REBOOT_REASON_FILE
echo 1
return
fi
echo 0
return
}
_is_watchdog_reset(){
curr_reset_reason=$SMF_RESET
if [[ $curr_reset_reason = "33" ]]; then
echo 1
return
fi
echo 0
return
}
_is_unknown_reset(){
if [[ -f $REBOOT_CAUSE_FILE ]]; then
if [[ $1 = 0 ]]; then
echo "Unknown software reboot" > $REBOOT_CAUSE_FILE
return
fi
curr_poweron_reason=$(cat $SMF_POWERON_REASON)
curr_reset_reason=$SMF_RESET
mb_poweron_reason=$(cat $MAILBOX_POWERON_REASON)
echo "Unknown POR: $curr_poweron_reason RST: $curr_reset_reason MBR: $mb_poweron_reason" > $REBOOT_CAUSE_FILE
fi
}
update_mailbox_register(){
if [[ "$bios_secondary_boot" = "1" ]]; then
echo "Secondary BIOS booted"
fi
if [[ $reboot_file_found = false ]]; then
echo "None" > $REBOOT_REASON_FILE
fi
_get_smf_reset_register
if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
is_thermal_reboot=$(_is_thermal_reset)
is_wd_reboot=$(_is_watchdog_reset)
mbr=$(cat $MAILBOX_POWERON_REASON)
reason=$(echo $mbr | cut -d 'x' -f2)
if [[ $reason = "ff" ]]; then
echo "None" > $REBOOT_REASON_FILE
echo 0xbb > $MAILBOX_POWERON_REASON
elif [[ $is_thermal_reboot = 1 ]]; then
echo 0xee > $MAILBOX_POWERON_REASON
elif [[ $is_wd_reboot = 1 ]] && [[ $reason != "cc" ]]; then
echo 0xdd > $MAILBOX_POWERON_REASON
elif [[ $reason = "cc" ]]; then
echo 0xaa > $MAILBOX_POWERON_REASON
else
_is_unknown_reset $is_thermal_reboot
echo 0x99 > $MAILBOX_POWERON_REASON
fi
fi
}
update_mailbox_register

View File

@ -0,0 +1 @@
fast-reboot_plugin

View File

@ -18,7 +18,9 @@ try:
from sonic_platform.module import Module
from sonic_platform.thermal import Thermal
from sonic_platform.component import Component
from sonic_platform.watchdog import Watchdog
from eeprom import Eeprom
import time
except ImportError as e:
raise ImportError(str(e) + "- required module not found")
@ -43,6 +45,8 @@ class Chassis(ChassisBase):
reset_reason_dict[33] = ChassisBase.REBOOT_CAUSE_WATCHDOG
reset_reason_dict[44] = ChassisBase.REBOOT_CAUSE_NON_HARDWARE
reset_reason_dict[55] = ChassisBase.REBOOT_CAUSE_NON_HARDWARE
reset_reason_dict[66] = ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER
reset_reason_dict[77] = ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER
power_reason_dict = {}
power_reason_dict[11] = ChassisBase.REBOOT_CAUSE_POWER_LOSS
@ -76,10 +80,15 @@ class Chassis(ChassisBase):
component = Component(i)
self._component_list.append(component)
self._watchdog = Watchdog()
def _get_reboot_reason_smf_register(self):
# Returns 0xAA on software reload
# Returns 0xFF on power-cycle
# Returns 0x01 on first-boot
# In S6100, mb_poweron_reason register will
# Returns 0xaa or 0xcc on software reload
# Returns 0xff or 0xbb on power-cycle
# Returns 0xdd on Watchdog
# Returns 0xee on Thermal Shutdown
# Returns 0x99 on Unknown reset
smf_mb_reg_reason = self._get_pmc_register('mb_poweron_reason')
return int(smf_mb_reg_reason, 16)
@ -188,27 +197,21 @@ class Chassis(ChassisBase):
power_reason = int(self._get_pmc_register('smf_poweron_reason'))
smf_mb_reg_reason = self._get_reboot_reason_smf_register()
if ((smf_mb_reg_reason == 0x01) and (power_reason == 0x11)):
if ((smf_mb_reg_reason == 0xbb) or (smf_mb_reg_reason == 0xff)):
return (ChassisBase.REBOOT_CAUSE_POWER_LOSS, None)
elif ((smf_mb_reg_reason == 0xaa) or (smf_mb_reg_reason == 0xcc)):
return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None)
elif (smf_mb_reg_reason == 0xdd):
return (ChassisBase.REBOOT_CAUSE_WATCHDOG, None)
elif (smf_mb_reg_reason == 0xee):
return (self.power_reason_dict[power_reason], None)
elif (reset_reason == 66):
return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER,
"Emulated Cold Reset")
elif (reset_reason == 77):
return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER,
"Emulated Warm Reset")
else:
return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None)
# Reset_Reason = 11 ==> PowerLoss
# So return the reboot reason from Last Power_Reason Dictionary
# If Reset_Reason is not 11 return from Reset_Reason dictionary
# Also check if power_reason, reset_reason are valid values by
# checking key presence in dictionary else return
# REBOOT_CAUSE_HARDWARE_OTHER as the Power_Reason and Reset_Reason
# registers returned invalid data
# In S6100, if Reset_Reason is not 11 and smf_mb_reg_reason
# is ff or bb, then it is PowerLoss
if (reset_reason == 11):
if (power_reason in self.power_reason_dict):
return (self.power_reason_dict[power_reason], None)
else:
if ((smf_mb_reg_reason == 0xbb) or (smf_mb_reg_reason == 0xff)):
return (ChassisBase.REBOOT_CAUSE_POWER_LOSS, None)
if (reset_reason in self.reset_reason_dict):
return (self.reset_reason_dict[reset_reason], None)
return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER, "Invalid Reason")

View File

@ -35,9 +35,13 @@ class Eeprom(eeprom_tlvinfo.TlvInfoDecoder):
else:
self.eeprom_data = self.read_eeprom()
except:
self.eeprom_data = "N/A"
if not self.is_module:
raise RuntimeError("Eeprom is not Programmed")
try:
self.eeprom_data = self.read_eeprom()
except:
raise RuntimeError("Eeprom is not Programmed")
else:
self.eeprom_data = "N/A"
else:
eeprom = self.eeprom_data

View File

@ -0,0 +1,12 @@
[Unit]
Description=Read Dell S6100 reboot cause
Before=process-reboot-cause.service
DefaultDependencies=no
[Service]
Type=oneshot
ExecStart=/usr/share/sonic/device/x86_64-dell_s6100_c2538-r0/track_reboot_reason.sh
RemainAfterExit=no
[Install]
WantedBy=multi-user.target