From cff30c59d005416b2159b7c2d9197ce89876819b Mon Sep 17 00:00:00 2001 From: yozhao101 <56170650+yozhao101@users.noreply.github.com> Date: Wed, 30 Oct 2019 16:41:55 -0700 Subject: [PATCH] [Services] Restart Router-advertiser service upon unexpected critical process exit (#3681) Signed-off-by: Yong Zhao --- dockers/docker-router-advertiser/Dockerfile.j2 | 2 ++ dockers/docker-router-advertiser/critical_processes | 1 + .../docker-router-advertiser.supervisord.conf | 6 ++++++ files/build_templates/radv.service.j2 | 4 ++++ rules/docker-router-advertiser.mk | 1 + 5 files changed, 14 insertions(+) create mode 100644 dockers/docker-router-advertiser/critical_processes diff --git a/dockers/docker-router-advertiser/Dockerfile.j2 b/dockers/docker-router-advertiser/Dockerfile.j2 index 1594a59c5e..39e7b28eff 100644 --- a/dockers/docker-router-advertiser/Dockerfile.j2 +++ b/dockers/docker-router-advertiser/Dockerfile.j2 @@ -27,5 +27,7 @@ RUN apt-get clean -y && \ COPY ["start.sh", "/usr/bin/"] COPY ["docker-router-advertiser.supervisord.conf", "/etc/supervisor/conf.d/"] COPY ["radvd.conf.j2", "wait_for_intf.sh.j2", "/usr/share/sonic/templates/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/bin/supervisord"] diff --git a/dockers/docker-router-advertiser/critical_processes b/dockers/docker-router-advertiser/critical_processes new file mode 100644 index 0000000000..238a0346ac --- /dev/null +++ b/dockers/docker-router-advertiser/critical_processes @@ -0,0 +1 @@ +radvd diff --git a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf index f0bb4d5b3b..4ea84ab11c 100644 --- a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf +++ b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-script] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 diff --git a/files/build_templates/radv.service.j2 b/files/build_templates/radv.service.j2 index 3dadc56e46..54440241d5 100644 --- a/files/build_templates/radv.service.j2 +++ b/files/build_templates/radv.service.j2 @@ -3,12 +3,16 @@ Description=Router advertiser container Requires=updategraph.service After=updategraph.service swss.service Before=ntp-config.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start ExecStart=/usr/bin/{{ docker_container_name }}.sh wait ExecStop=/usr/bin/{{ docker_container_name }}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target diff --git a/rules/docker-router-advertiser.mk b/rules/docker-router-advertiser.mk index 93dc028558..53e0d7600e 100644 --- a/rules/docker-router-advertiser.mk +++ b/rules/docker-router-advertiser.mk @@ -25,3 +25,4 @@ SONIC_STRETCH_DBG_DOCKERS += $(DOCKER_ROUTER_ADVERTISER_DBG) $(DOCKER_ROUTER_ADVERTISER)_CONTAINER_NAME = radv $(DOCKER_ROUTER_ADVERTISER)_RUN_OPT += --net=host --privileged -t $(DOCKER_ROUTER_ADVERTISER)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro +$(DOCKER_ROUTER_ADVERTISER)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)