[Services] Restart DHCP-Relay service upon unexpected critical process exit. (#3667)
Signed-off-by: Yong Zhao <yozhao@microsoft.com>
This commit is contained in:
parent
7c5fb775d9
commit
ed79f54569
@ -26,5 +26,7 @@ RUN apt-get clean -y && \
|
||||
|
||||
COPY ["docker_init.sh", "start.sh", "/usr/bin/"]
|
||||
COPY ["docker-dhcp-relay.supervisord.conf.j2", "wait_for_intf.sh.j2", "/usr/share/sonic/templates/"]
|
||||
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
|
||||
COPY ["critical_processes", "/etc/supervisor"]
|
||||
|
||||
ENTRYPOINT ["/usr/bin/docker_init.sh"]
|
||||
|
1
dockers/docker-dhcp-relay/critical_processes
Normal file
1
dockers/docker-dhcp-relay/critical_processes
Normal file
@ -0,0 +1 @@
|
||||
isc-dhcp-relay
|
@ -3,6 +3,12 @@ logfile_maxbytes=1MB
|
||||
logfile_backups=2
|
||||
nodaemon=true
|
||||
|
||||
[eventlistener:supervisor-proc-exit-listener]
|
||||
command=/usr/bin/supervisor-proc-exit-listener
|
||||
events=PROCESS_STATE_EXITED
|
||||
autostart=true
|
||||
autorestart=unexpected
|
||||
|
||||
[program:start.sh]
|
||||
command=/usr/bin/start.sh
|
||||
priority=1
|
||||
|
@ -3,12 +3,16 @@ Description=DHCP relay container
|
||||
Requires=updategraph.service swss.service teamd.service
|
||||
After=updategraph.service swss.service syncd.service teamd.service
|
||||
Before=ntp-config.service
|
||||
StartLimitIntervalSec=1200
|
||||
StartLimitBurst=3
|
||||
|
||||
[Service]
|
||||
User={{ sonicadmin_user }}
|
||||
ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start
|
||||
ExecStart=/usr/bin/{{ docker_container_name }}.sh wait
|
||||
ExecStop=/usr/bin/{{ docker_container_name }}.sh stop
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target swss.service teamd.service
|
||||
|
@ -33,9 +33,10 @@ def main():
|
||||
|
||||
expected = int(payload_headers['expected'])
|
||||
processname = payload_headers['processname']
|
||||
groupname = payload_headers['groupname']
|
||||
|
||||
# If a critical process exited unexpectedly, terminate supervisor
|
||||
if expected == 0 and processname in critical_processes:
|
||||
if expected == 0 and processname in critical_processes or groupname in critical_processes:
|
||||
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
|
||||
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
|
||||
syslog.syslog(syslog.LOG_INFO, msg)
|
||||
|
@ -25,3 +25,4 @@ SONIC_STRETCH_DBG_DOCKERS += $(DOCKER_DHCP_RELAY_DBG)
|
||||
$(DOCKER_DHCP_RELAY)_CONTAINER_NAME = dhcp_relay
|
||||
$(DOCKER_DHCP_RELAY)_RUN_OPT += --net=host --privileged -t
|
||||
$(DOCKER_DHCP_RELAY)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
|
||||
$(DOCKER_DHCP_RELAY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
|
||||
|
@ -3,6 +3,12 @@ logfile_maxbytes=1MB
|
||||
logfile_backups=2
|
||||
nodaemon=true
|
||||
|
||||
[eventlistener:supervisor-proc-exit-listener]
|
||||
command=/usr/bin/supervisor-proc-exit-listener
|
||||
events=PROCESS_STATE_EXITED
|
||||
autostart=true
|
||||
autorestart=unexpected
|
||||
|
||||
[program:start.sh]
|
||||
command=/usr/bin/start.sh
|
||||
priority=1
|
||||
|
Reference in New Issue
Block a user