[devices]: Poller to detect Intel Rangely LPC failure for dell z9100/s6100 (#3065)
- What I did Added Daemon to Log LPC bus degradation in Intel C2000 processor. Intel Rangeley C2000 processors with revision less than or equal to 2 have issue where LPC bus degrades over time in some processors. To identify the problem and to notify the issue, a daemon has been added which will log on encountering the issue. - How I did it Added a daemon which validates the CPLD scratch(0x102) and SMF scratch(0x202) registers by writing and reading values on regular polling intervals (300 seconds). If there is a discrepancy between read and write, a critical log will be thrown. - How to verify it The infra is verify by simulating the issue where between write and read, the value in register is modified and the log appearance is checked. - Description for the changelog Added Daemon to identify LPC bus degradation issue and notify using syslog in Dell S6100 and Z9100 platforms. This daemon will only run on processors with revision less than or equal to 2.
This commit is contained in:
parent
1f210771d1
commit
7271f9d17c
46
platform/broadcom/sonic-platform-modules-dell/common/dell_lpc_mon.sh
Executable file
46
platform/broadcom/sonic-platform-modules-dell/common/dell_lpc_mon.sh
Executable file
@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
REV=$(lspci -xxx -s 0:0.0 | grep rev | awk -F 'rev ' '{print $2}' | sed 's/)//')
|
||||
if [ $REV -gt 2 ]
|
||||
then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
test_val=(55 aa)
|
||||
num_val=${#test_val[@]}
|
||||
index=0
|
||||
poll_interval=300
|
||||
cpld_scratch_reg=0x102
|
||||
smf_scratch_reg=0x202
|
||||
|
||||
function log_crit() {
|
||||
local msg=$1
|
||||
|
||||
`logger -p user.crit -t DELL_LPC_BUS_MON $msg`
|
||||
}
|
||||
|
||||
function validate_lpc() {
|
||||
local reg=$1
|
||||
local val=$2
|
||||
local reg_str="CPLD scratch register"
|
||||
|
||||
if [ $reg == $smf_scratch_reg ]
|
||||
then
|
||||
reg_str="SMF scratch register"
|
||||
fi
|
||||
io_rd_wr.py --set --val $val --offset $reg
|
||||
get_val=$(io_rd_wr.py --get --offset $reg | cut -d " " -f3)
|
||||
if [ $val != $get_val ]
|
||||
then
|
||||
log_crit "LPC bus has deteriorated on this unit. \
|
||||
$reg_str has value $get_val while expected is $val \
|
||||
Please contact technical support"
|
||||
fi
|
||||
}
|
||||
while true
|
||||
do
|
||||
val=${test_val[$index]}
|
||||
validate_lpc $cpld_scratch_reg $val
|
||||
validate_lpc $smf_scratch_reg $val
|
||||
index=$(((index+1)%num_val))
|
||||
sleep $poll_interval
|
||||
done
|
@ -4,9 +4,11 @@ common/dell_i2c_utils.sh usr/local/bin
|
||||
common/io_rd_wr.py usr/local/bin
|
||||
common/fstrim.timer etc/systemd/system
|
||||
common/fstrim.service etc/systemd/system
|
||||
common/dell_lpc_mon.sh usr/local/bin
|
||||
s6100/scripts/platform_sensors.py usr/local/bin
|
||||
s6100/scripts/platform_watchdog_enable.sh usr/local/bin
|
||||
s6100/scripts/platform_watchdog_disable.sh usr/local/bin
|
||||
s6100/scripts/sensors usr/bin
|
||||
s6100/systemd/platform-modules-s6100.service etc/systemd/system
|
||||
s6100/modules/sonic_platform-1.0-py2-none-any.whl usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
|
||||
s6100/systemd/s6100-lpc-monitor.service etc/systemd/system
|
||||
|
@ -8,4 +8,8 @@ systemctl start fstrim.timer
|
||||
depmod -a
|
||||
systemctl enable platform-modules-s6100.service
|
||||
systemctl start platform-modules-s6100.service
|
||||
|
||||
systemctl enable s6100-lpc-monitor.service
|
||||
systemctl start s6100-lpc-monitor.service
|
||||
|
||||
#DEBHELPER#
|
||||
|
@ -1,6 +1,7 @@
|
||||
z9100/scripts/check_qsfp.sh usr/local/bin
|
||||
z9100/scripts/z9100_platform.sh usr/local/bin
|
||||
common/dell_i2c_utils.sh usr/local/bin
|
||||
common/dell_lpc_mon.sh usr/local/bin
|
||||
common/io_rd_wr.py usr/local/bin
|
||||
common/fstrim.timer etc/systemd/system
|
||||
common/fstrim.service etc/systemd/system
|
||||
@ -10,3 +11,4 @@ z9100/scripts/sensors usr/bin
|
||||
z9100/modules/sonic_platform-1.0-py2-none-any.whl usr/share/sonic/device/x86_64-dell_z9100_c2538-r0
|
||||
z9100/cfg/z9100-modules.conf etc/modules-load.d
|
||||
z9100/systemd/platform-modules-z9100.service etc/systemd/system
|
||||
z9100/systemd/z9100-lpc-monitor.service etc/systemd/system
|
||||
|
@ -10,4 +10,7 @@ systemctl enable platform-modules-z9100.service
|
||||
systemctl start platform-modules-z9100.service
|
||||
|
||||
|
||||
systemctl enable z9100-lpc-monitor.service
|
||||
systemctl start z9100-lpc-monitor.service
|
||||
|
||||
#DEBHELPER#
|
||||
|
@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=Dell S6100 LPC bus monitoring poller
|
||||
DefaultDependencies=no
|
||||
|
||||
[Service]
|
||||
User=root
|
||||
ExecStart=/usr/local/bin/dell_lpc_mon.sh
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=Dell Z9100 LPC bus monitoring poller
|
||||
DefaultDependencies=no
|
||||
|
||||
[Service]
|
||||
User=root
|
||||
ExecStart=/usr/local/bin/dell_lpc_mon.sh
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
Loading…
Reference in New Issue
Block a user