[devices]: Poller to detect Intel Rangely LPC failure for dell z9100/s6100 (#3065)

- What I did
Added Daemon to Log LPC bus degradation in Intel C2000 processor. Intel Rangeley C2000 processors with revision less than or equal to 2 have issue where LPC bus degrades over time in some processors. To identify the problem and to notify the issue, a daemon has been added which will log on encountering the issue.

- How I did it
Added a daemon which validates the CPLD scratch(0x102) and SMF scratch(0x202) registers by writing and reading values on regular polling intervals (300 seconds). If there is a discrepancy between read and write, a critical log will be thrown.

- How to verify it
The infra is verify by simulating the issue where between write and read, the value in register is modified and the log appearance is checked.

- Description for the changelog

Added Daemon to identify LPC bus degradation issue and notify using syslog in Dell S6100 and Z9100 platforms. This daemon will only run on processors with revision less than or equal to 2.
This commit is contained in:
Sudharsan D.G 2019-06-24 08:13:40 -07:00 committed by Ying Xie
parent 1f210771d1
commit 7271f9d17c
7 changed files with 81 additions and 0 deletions

View File

@ -0,0 +1,46 @@
#!/bin/bash
REV=$(lspci -xxx -s 0:0.0 | grep rev | awk -F 'rev ' '{print $2}' | sed 's/)//')
if [ $REV -gt 2 ]
then
exit 0
fi
test_val=(55 aa)
num_val=${#test_val[@]}
index=0
poll_interval=300
cpld_scratch_reg=0x102
smf_scratch_reg=0x202
function log_crit() {
local msg=$1
`logger -p user.crit -t DELL_LPC_BUS_MON $msg`
}
function validate_lpc() {
local reg=$1
local val=$2
local reg_str="CPLD scratch register"
if [ $reg == $smf_scratch_reg ]
then
reg_str="SMF scratch register"
fi
io_rd_wr.py --set --val $val --offset $reg
get_val=$(io_rd_wr.py --get --offset $reg | cut -d " " -f3)
if [ $val != $get_val ]
then
log_crit "LPC bus has deteriorated on this unit. \
$reg_str has value $get_val while expected is $val \
Please contact technical support"
fi
}
while true
do
val=${test_val[$index]}
validate_lpc $cpld_scratch_reg $val
validate_lpc $smf_scratch_reg $val
index=$(((index+1)%num_val))
sleep $poll_interval
done

View File

@ -4,9 +4,11 @@ common/dell_i2c_utils.sh usr/local/bin
common/io_rd_wr.py usr/local/bin
common/fstrim.timer etc/systemd/system
common/fstrim.service etc/systemd/system
common/dell_lpc_mon.sh usr/local/bin
s6100/scripts/platform_sensors.py usr/local/bin
s6100/scripts/platform_watchdog_enable.sh usr/local/bin
s6100/scripts/platform_watchdog_disable.sh usr/local/bin
s6100/scripts/sensors usr/bin
s6100/systemd/platform-modules-s6100.service etc/systemd/system
s6100/modules/sonic_platform-1.0-py2-none-any.whl usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/systemd/s6100-lpc-monitor.service etc/systemd/system

View File

@ -8,4 +8,8 @@ systemctl start fstrim.timer
depmod -a
systemctl enable platform-modules-s6100.service
systemctl start platform-modules-s6100.service
systemctl enable s6100-lpc-monitor.service
systemctl start s6100-lpc-monitor.service
#DEBHELPER#

View File

@ -1,6 +1,7 @@
z9100/scripts/check_qsfp.sh usr/local/bin
z9100/scripts/z9100_platform.sh usr/local/bin
common/dell_i2c_utils.sh usr/local/bin
common/dell_lpc_mon.sh usr/local/bin
common/io_rd_wr.py usr/local/bin
common/fstrim.timer etc/systemd/system
common/fstrim.service etc/systemd/system
@ -10,3 +11,4 @@ z9100/scripts/sensors usr/bin
z9100/modules/sonic_platform-1.0-py2-none-any.whl usr/share/sonic/device/x86_64-dell_z9100_c2538-r0
z9100/cfg/z9100-modules.conf etc/modules-load.d
z9100/systemd/platform-modules-z9100.service etc/systemd/system
z9100/systemd/z9100-lpc-monitor.service etc/systemd/system

View File

@ -10,4 +10,7 @@ systemctl enable platform-modules-z9100.service
systemctl start platform-modules-z9100.service
systemctl enable z9100-lpc-monitor.service
systemctl start z9100-lpc-monitor.service
#DEBHELPER#

View File

@ -0,0 +1,12 @@
[Unit]
Description=Dell S6100 LPC bus monitoring poller
DefaultDependencies=no
[Service]
User=root
ExecStart=/usr/local/bin/dell_lpc_mon.sh
RemainAfterExit=yes
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,12 @@
[Unit]
Description=Dell Z9100 LPC bus monitoring poller
DefaultDependencies=no
[Service]
User=root
ExecStart=/usr/local/bin/dell_lpc_mon.sh
RemainAfterExit=yes
[Install]
WantedBy=multi-user.target