From 4c31ef3cd217bc71071eba3b244829a1e72d1234 Mon Sep 17 00:00:00 2001 From: yozhao101 <56170650+yozhao101@users.noreply.github.com> Date: Mon, 4 Nov 2019 17:45:41 -0800 Subject: [PATCH] [Services] Restart Teamd service upon unexpected critical process exit. (#3703) Signed-off-by: Yong Zhao --- dockers/docker-teamd/Dockerfile.j2 | 2 ++ dockers/docker-teamd/critical_processes | 2 ++ dockers/docker-teamd/supervisord.conf | 8 +++++++- files/build_templates/teamd.service.j2 | 4 ++++ rules/docker-teamd.mk | 1 + 5 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 dockers/docker-teamd/critical_processes diff --git a/dockers/docker-teamd/Dockerfile.j2 b/dockers/docker-teamd/Dockerfile.j2 index 9188b7aaf7..fc8626e772 100644 --- a/dockers/docker-teamd/Dockerfile.j2 +++ b/dockers/docker-teamd/Dockerfile.j2 @@ -36,5 +36,7 @@ RUN apt-get clean -y && \ COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/bin/supervisord"] diff --git a/dockers/docker-teamd/critical_processes b/dockers/docker-teamd/critical_processes new file mode 100644 index 0000000000..b5c543df05 --- /dev/null +++ b/dockers/docker-teamd/critical_processes @@ -0,0 +1,2 @@ +teammgrd +teamsyncd diff --git a/dockers/docker-teamd/supervisord.conf b/dockers/docker-teamd/supervisord.conf index 738751d0a5..3a420e0fcd 100644 --- a/dockers/docker-teamd/supervisord.conf +++ b/dockers/docker-teamd/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -15,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog diff --git a/files/build_templates/teamd.service.j2 b/files/build_templates/teamd.service.j2 index 8034698ecc..be0521a4fb 100644 --- a/files/build_templates/teamd.service.j2 +++ b/files/build_templates/teamd.service.j2 @@ -3,12 +3,16 @@ Description=TEAMD container Requires=updategraph.service After=updategraph.service swss.service Before=ntp-config.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target diff --git a/rules/docker-teamd.mk b/rules/docker-teamd.mk index a697ce6b88..598eff97e8 100644 --- a/rules/docker-teamd.mk +++ b/rules/docker-teamd.mk @@ -29,3 +29,4 @@ $(DOCKER_TEAMD)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_TEAMD)_RUN_OPT += -v /host/warmboot:/var/warmboot $(DOCKER_TEAMD)_BASE_IMAGE_FILES += teamdctl:/usr/bin/teamdctl +$(DOCKER_TEAMD)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)