[ci] Kill hanged docker build process to avoid build timeout issue. (#13726) (#13730)

Why I did it
Docker build has a low rate of hanging up.
It hangs on different steps. So, it looks like a bug in docker daemon.

How I did it
Start a daemon process to scan running time more than 1 hours, and kill the process.

How to verify it
This commit is contained in:
Liu Shilong 2023-02-20 18:16:05 +08:00 committed by GitHub
parent ddc6ae1d8b
commit b18751f721
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 0 deletions

View File

@ -98,6 +98,7 @@ jobs:
buildSteps:
- template: template-skipvstest.yml
- template: template-daemon.yml
- bash: |
set -ex
if [ $(GROUP_NAME) == vs ]; then

View File

@ -1,5 +1,11 @@
steps:
- script: |
set -x
# kill daemon process
ps $(cat /tmp/azp_daemon_kill_docker_pid)
sudo kill $(cat /tmp/azp_daemon_kill_docker_pid)
rm /tmp/azp_daemon_kill_docker_pid
if sudo [ -f /var/run/march/docker.pid ] ; then
pid=`sudo cat /var/run/march/docker.pid` ; sudo kill $pid
fi
@ -11,4 +17,5 @@ steps:
pid=`sudo cat dockerfs/var/run/docker.pid` ; sudo kill $pid
fi
sudo rm -rf $(ls -A1)
condition: always()
displayName: "Clean Workspace"

View File

@ -0,0 +1,24 @@
steps:
- bash: |
(
while true
do
sleep 120
now=$(date +%s)
pids=$(ps -C docker -o pid,etime,args | grep "docker build" | cut -d" " -f1)
for pid in $pids
do
start=$(date --date="$(ls -dl /proc/$pid --time-style full-iso | awk '{print$6,$7}')" +%s)
time_s=$(($now-$start))
if [[ $time_s -gt $(DOCKER_BUILD_TIMEOUT) ]]; then
echo =========== $(date +%F%T) $time_s &>> target/daemon.log
ps $pid &>> target/daemon.log
sudo kill $pid
fi
done
done
) &
daemon_pid=$!
ps $daemon_pid
echo $daemon_pid >> /tmp/azp_daemon_kill_docker_pid
displayName: start daemon to kill hang docker

View File

@ -4,3 +4,5 @@ variables:
SONIC_SLAVE_DOCKER_DRIVER: 'overlay2'
SONIC_BUILD_RETRY_COUNT: 3
SONIC_BUILD_RETRY_INTERVAL: 600
DOCKER_BUILDKIT: 0
DOCKER_BUILD_TIMEOUT: 3600