Add pcie-check service to check PCIe devices at boot (#4771)

* PCIe Monitor service

* Add rescan to pcie-mon.service when it fails to get all pcie devices

* space

* Clean up

* review comments

* update the pcie status in state db

* update the failed pcie status once at the end

* Update the pcie_status in STATE_DB and rename the service

* Add log to exit the service if the configuration file doesn't exist.

* fix the build failure

* Redo the pcie rescan for pcie-check failed case.

* review comments

* review comments

* review comments
This commit is contained in:
Sujin Kang 2020-07-13 14:15:09 -07:00 committed by GitHub
parent 76d68ad1f5
commit bf45e11d27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 84 additions and 0 deletions

View File

@ -0,0 +1,9 @@
[Unit]
Description=Start the pcie-check.service 10 seconds after boot
[Timer]
OnBootSec=10sec
Unit=pcie-check.service
[Install]
WantedBy=timers.target

View File

@ -364,6 +364,15 @@ sudo cp $IMAGE_CONFIGS/procdockerstatsd/procdockerstatsd.service $FILESYSTEM_ROO
echo "procdockerstatsd.service" | sudo tee -a $GENERATED_SERVICE_FILE
sudo cp $IMAGE_CONFIGS/procdockerstatsd/procdockerstatsd $FILESYSTEM_ROOT/usr/bin/
# Copy systemd timer configuration
sudo cp $BUILD_TEMPLATES/pcie-check.timer $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM
sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable pcie-check.timer
# Copy pcie-check service files
sudo cp $IMAGE_CONFIGS/pcie-check/pcie-check.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM
echo "pcie-check.service" | sudo tee -a $GENERATED_SERVICE_FILE
sudo cp $IMAGE_CONFIGS/pcie-check/pcie-check.sh $FILESYSTEM_ROOT/usr/bin/
# Copy systemd timer configuration
# It implements delayed start of services
sudo cp $BUILD_TEMPLATES/process-reboot-cause.timer $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM

View File

@ -0,0 +1,7 @@
[Unit]
Description=Check the PCIe device presence and status
After=rc.local.service
[Service]
Type=simple
ExecStart=/usr/bin/pcie-check.sh

View File

@ -0,0 +1,59 @@
#! /bin/bash
## Check the platform PCIe device presence and status
VERBOSE="no"
RESULTS="PCIe Device Checking All Test"
EXPECTED="PCIe Device Checking All Test ----------->>> PASSED"
MAX_WAIT_SECONDS=15
function debug()
{
/usr/bin/logger "$0 : $1"
if [[ x"${VERBOSE}" == x"yes" ]]; then
echo "$(date) $0: $1"
fi
}
function check_and_rescan_pcie_devices()
{
PCIE_CHK_CMD=$(sudo pcieutil pcie-check |grep "$RESULTS")
PLATFORM=$(sonic-cfggen -H -v DEVICE_METADATA.localhost.platform)
if [ ! -f /usr/share/sonic/device/$PLATFORM/plugins/pcie.yaml ]; then
debug "pcie.yaml does not exist! can't check pcie status!"
exit
fi
begin=$SECONDS
end=$((begin + MAX_WAIT_SECONDS))
rescan_time=$((MAX_WAIT_SECONDS/2))
rescan_time=$((begin + rescan_time))
while true
do
now=$SECONDS
if [[ $now -gt $end ]]; then
break
fi
if [ "$PCIE_CHK_CMD" = "$EXPECTED" ]; then
redis-cli -n 6 SET "PCIE_STATUS|PCIE_DEVICES" "PASSED"
debug "PCIe check passed"
exit
else
debug "sleep 0.1 seconds"
sleep 0.1
fi
if [ $now -gt $rescan_time ]; then
debug "PCIe check failed, try pci bus rescan"
echo 1 > /sys/bus/pci/rescan
rescan_time=$end
fi
done
debug "PCIe check failed"
redis-cli -n 6 SET "PCIE_STATUS|PCIE_DEVICES" "FAILED"
}
check_and_rescan_pcie_devices