From 67fc68513ee784e12e4588f7eb6c8f51135bd16b Mon Sep 17 00:00:00 2001 From: yozhao101 <56170650+yozhao101@users.noreply.github.com> Date: Mon, 25 Nov 2019 13:02:00 -0800 Subject: [PATCH] [Services] Restart Sflow service upon unexpected critical process exit. (#3751) Signed-off-by: Yong Zhao --- dockers/docker-sflow/Dockerfile.j2 | 2 ++ dockers/docker-sflow/critical_processes | 1 + dockers/docker-sflow/supervisord.conf | 6 ++++++ files/build_templates/sflow.service.j2 | 4 ++++ rules/docker-sflow.mk | 2 +- 5 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 dockers/docker-sflow/critical_processes diff --git a/dockers/docker-sflow/Dockerfile.j2 b/dockers/docker-sflow/Dockerfile.j2 index e88789fb69..75da64e02e 100644 --- a/dockers/docker-sflow/Dockerfile.j2 +++ b/dockers/docker-sflow/Dockerfile.j2 @@ -29,5 +29,7 @@ RUN sed -ri '/^DAEMON_ARGS=""/c DAEMON_ARGS="-c /var/log/hsflowd.crash"' /etc/in COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/bin/supervisord"] diff --git a/dockers/docker-sflow/critical_processes b/dockers/docker-sflow/critical_processes new file mode 100644 index 0000000000..5b24e2d8e1 --- /dev/null +++ b/dockers/docker-sflow/critical_processes @@ -0,0 +1 @@ +sflowmgrd diff --git a/dockers/docker-sflow/supervisord.conf b/dockers/docker-sflow/supervisord.conf index e4f9259712..50986f197d 100644 --- a/dockers/docker-sflow/supervisord.conf +++ b/dockers/docker-sflow/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 diff --git a/files/build_templates/sflow.service.j2 b/files/build_templates/sflow.service.j2 index 3a5752412b..de08f027ad 100644 --- a/files/build_templates/sflow.service.j2 +++ b/files/build_templates/sflow.service.j2 @@ -3,12 +3,16 @@ Description=sFlow container Requires=swss.service After=swss.service syncd.service Before=ntp-config.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target diff --git a/rules/docker-sflow.mk b/rules/docker-sflow.mk index 19b0f290bf..94b5684818 100644 --- a/rules/docker-sflow.mk +++ b/rules/docker-sflow.mk @@ -32,4 +32,4 @@ $(DOCKER_SFLOW)_RUN_OPT += -v /host/warmboot:/var/warmboot $(DOCKER_SFLOW)_BASE_IMAGE_FILES += psample:/usr/bin/psample $(DOCKER_SFLOW)_BASE_IMAGE_FILES += sflowtool:/usr/bin/sflowtool - +$(DOCKER_SFLOW)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)