[ci] Kill hanged docker build process to avoid build timeout issue. (#13726)
Why I did it Docker build has a low rate of hanging up. It hangs on different steps. So, it looks like a bug in docker daemon. How I did it Start a daemon process to scan running time more than 1 hours, and kill the process. How to verify it
This commit is contained in:
parent
aee97a69c6
commit
22e46207c8
@ -109,6 +109,7 @@ jobs:
|
|||||||
|
|
||||||
buildSteps:
|
buildSteps:
|
||||||
- template: template-skipvstest.yml
|
- template: template-skipvstest.yml
|
||||||
|
- template: template-daemon.yml
|
||||||
- bash: |
|
- bash: |
|
||||||
set -ex
|
set -ex
|
||||||
if [ $(GROUP_NAME) == vs ]; then
|
if [ $(GROUP_NAME) == vs ]; then
|
||||||
|
@ -1,5 +1,11 @@
|
|||||||
steps:
|
steps:
|
||||||
- script: |
|
- script: |
|
||||||
|
set -x
|
||||||
|
# kill daemon process
|
||||||
|
ps $(cat /tmp/azp_daemon_kill_docker_pid)
|
||||||
|
sudo kill $(cat /tmp/azp_daemon_kill_docker_pid)
|
||||||
|
rm /tmp/azp_daemon_kill_docker_pid
|
||||||
|
|
||||||
if sudo [ -f /var/run/march/docker.pid ] ; then
|
if sudo [ -f /var/run/march/docker.pid ] ; then
|
||||||
pid=`sudo cat /var/run/march/docker.pid` ; sudo kill $pid
|
pid=`sudo cat /var/run/march/docker.pid` ; sudo kill $pid
|
||||||
fi
|
fi
|
||||||
@ -11,4 +17,5 @@ steps:
|
|||||||
pid=`sudo cat dockerfs/var/run/docker.pid` ; sudo kill $pid
|
pid=`sudo cat dockerfs/var/run/docker.pid` ; sudo kill $pid
|
||||||
fi
|
fi
|
||||||
sudo rm -rf $(ls -A1)
|
sudo rm -rf $(ls -A1)
|
||||||
|
condition: always()
|
||||||
displayName: "Clean Workspace"
|
displayName: "Clean Workspace"
|
||||||
|
24
.azure-pipelines/template-daemon.yml
Normal file
24
.azure-pipelines/template-daemon.yml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
steps:
|
||||||
|
- bash: |
|
||||||
|
(
|
||||||
|
while true
|
||||||
|
do
|
||||||
|
sleep 120
|
||||||
|
now=$(date +%s)
|
||||||
|
pids=$(ps -C docker -o pid,etime,args | grep "docker build" | cut -d" " -f1)
|
||||||
|
for pid in $pids
|
||||||
|
do
|
||||||
|
start=$(date --date="$(ls -dl /proc/$pid --time-style full-iso | awk '{print$6,$7}')" +%s)
|
||||||
|
time_s=$(($now-$start))
|
||||||
|
if [[ $time_s -gt $(DOCKER_BUILD_TIMEOUT) ]]; then
|
||||||
|
echo =========== $(date +%F%T) $time_s &>> target/daemon.log
|
||||||
|
ps $pid &>> target/daemon.log
|
||||||
|
sudo kill $pid
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done
|
||||||
|
) &
|
||||||
|
daemon_pid=$!
|
||||||
|
ps $daemon_pid
|
||||||
|
echo $daemon_pid >> /tmp/azp_daemon_kill_docker_pid
|
||||||
|
displayName: start daemon to kill hang docker
|
@ -5,3 +5,4 @@ variables:
|
|||||||
SONIC_BUILD_RETRY_COUNT: 3
|
SONIC_BUILD_RETRY_COUNT: 3
|
||||||
SONIC_BUILD_RETRY_INTERVAL: 600
|
SONIC_BUILD_RETRY_INTERVAL: 600
|
||||||
DOCKER_BUILDKIT: 0
|
DOCKER_BUILDKIT: 0
|
||||||
|
DOCKER_BUILD_TIMEOUT: 3600
|
||||||
|
Loading…
Reference in New Issue
Block a user