[ci] Kill hanged docker build process to avoid build timeout issue. (#13726)

Why I did it
Docker build has a low rate of hanging up.
It hangs on different steps. So, it looks like a bug in docker daemon.

How I did it
Start a daemon process to scan running time more than 1 hours, and kill the process.

How to verify it
This commit is contained in:
Liu Shilong 2023-02-16 21:58:14 +08:00 committed by GitHub
parent aee97a69c6
commit 22e46207c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 33 additions and 0 deletions

View File

@ -109,6 +109,7 @@ jobs:
buildSteps: buildSteps:
- template: template-skipvstest.yml - template: template-skipvstest.yml
- template: template-daemon.yml
- bash: | - bash: |
set -ex set -ex
if [ $(GROUP_NAME) == vs ]; then if [ $(GROUP_NAME) == vs ]; then

View File

@ -1,5 +1,11 @@
steps: steps:
- script: | - script: |
set -x
# kill daemon process
ps $(cat /tmp/azp_daemon_kill_docker_pid)
sudo kill $(cat /tmp/azp_daemon_kill_docker_pid)
rm /tmp/azp_daemon_kill_docker_pid
if sudo [ -f /var/run/march/docker.pid ] ; then if sudo [ -f /var/run/march/docker.pid ] ; then
pid=`sudo cat /var/run/march/docker.pid` ; sudo kill $pid pid=`sudo cat /var/run/march/docker.pid` ; sudo kill $pid
fi fi
@ -11,4 +17,5 @@ steps:
pid=`sudo cat dockerfs/var/run/docker.pid` ; sudo kill $pid pid=`sudo cat dockerfs/var/run/docker.pid` ; sudo kill $pid
fi fi
sudo rm -rf $(ls -A1) sudo rm -rf $(ls -A1)
condition: always()
displayName: "Clean Workspace" displayName: "Clean Workspace"

View File

@ -0,0 +1,24 @@
steps:
- bash: |
(
while true
do
sleep 120
now=$(date +%s)
pids=$(ps -C docker -o pid,etime,args | grep "docker build" | cut -d" " -f1)
for pid in $pids
do
start=$(date --date="$(ls -dl /proc/$pid --time-style full-iso | awk '{print$6,$7}')" +%s)
time_s=$(($now-$start))
if [[ $time_s -gt $(DOCKER_BUILD_TIMEOUT) ]]; then
echo =========== $(date +%F%T) $time_s &>> target/daemon.log
ps $pid &>> target/daemon.log
sudo kill $pid
fi
done
done
) &
daemon_pid=$!
ps $daemon_pid
echo $daemon_pid >> /tmp/azp_daemon_kill_docker_pid
displayName: start daemon to kill hang docker

View File

@ -5,3 +5,4 @@ variables:
SONIC_BUILD_RETRY_COUNT: 3 SONIC_BUILD_RETRY_COUNT: 3
SONIC_BUILD_RETRY_INTERVAL: 600 SONIC_BUILD_RETRY_INTERVAL: 600
DOCKER_BUILDKIT: 0 DOCKER_BUILDKIT: 0
DOCKER_BUILD_TIMEOUT: 3600