[Services] Restart database service upon unexpected critical process exit. (#4138)
* [database] Implement the auto-restart feature for database container. Signed-off-by: Yong Zhao <yozhao@microsoft.com> * [database] Remove the duplicate dependency in service files. Since we already have updategraph ---> config_setup ---> database, we do not need explicitly add database.service in all other container service files. Signed-off-by: Yong Zhao <yozhao@microsoft.com> * [event listener] Reorganize the line 73 in event listener script. Signed-off-by: Yong Zhao <yozhao@microsoft.com> * [database] update the file sflow.service.j2 to remove the duplicate dependency. Signed-off-by: Yong Zhao <yozhao@microsoft.com> * [event listener] Add comments in event listener. Signed-off-by: Yong Zhao <yozhao@microsoft.com> * [event listener] Update the comments in line 56. Signed-off-by: Yong Zhao <yozhao@microsoft.com> * [event listener] Add parentheses for if statement in line 76 in event listener. Signed-off-by: Yong Zhao <yozhao@microsoft.com>
This commit is contained in:
parent
71225ea4cc
commit
3ac345922b
@ -36,5 +36,7 @@ COPY ["supervisord.conf.j2", "/usr/share/sonic/templates/"]
|
|||||||
COPY ["docker-database-init.sh", "/usr/local/bin/"]
|
COPY ["docker-database-init.sh", "/usr/local/bin/"]
|
||||||
COPY ["ping_pong_db_insts", "/usr/local/bin/"]
|
COPY ["ping_pong_db_insts", "/usr/local/bin/"]
|
||||||
COPY ["database_config.json", "/etc/default/sonic-db/"]
|
COPY ["database_config.json", "/etc/default/sonic-db/"]
|
||||||
|
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
|
||||||
|
COPY ["critical_processes", "/etc/supervisor"]
|
||||||
|
|
||||||
ENTRYPOINT ["/usr/local/bin/docker-database-init.sh"]
|
ENTRYPOINT ["/usr/local/bin/docker-database-init.sh"]
|
||||||
|
1
dockers/docker-database/critical_processes
Normal file
1
dockers/docker-database/critical_processes
Normal file
@ -0,0 +1 @@
|
|||||||
|
redis
|
@ -3,6 +3,13 @@ logfile_maxbytes=1MB
|
|||||||
logfile_backups=2
|
logfile_backups=2
|
||||||
nodaemon=true
|
nodaemon=true
|
||||||
|
|
||||||
|
[eventlistener:supervisor-proc-exit-listener]
|
||||||
|
command=/usr/bin/supervisor-proc-exit-listener --container-name database
|
||||||
|
events=PROCESS_STATE_EXITED
|
||||||
|
autostart=true
|
||||||
|
autorestart=unexpected
|
||||||
|
|
||||||
|
|
||||||
[program:rsyslogd]
|
[program:rsyslogd]
|
||||||
command=/bin/bash -c "rm -f /var/run/rsyslogd.pid && /usr/sbin/rsyslogd -n"
|
command=/bin/bash -c "rm -f /var/run/rsyslogd.pid && /usr/sbin/rsyslogd -n"
|
||||||
priority=1
|
priority=1
|
||||||
|
@ -3,12 +3,16 @@ Description=Database container
|
|||||||
Requires=docker.service
|
Requires=docker.service
|
||||||
After=docker.service
|
After=docker.service
|
||||||
After=rc-local.service
|
After=rc-local.service
|
||||||
|
StartLimitIntervalSec=1200
|
||||||
|
StartLimitBurst=3
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
User=root
|
User=root
|
||||||
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
|
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
|
||||||
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
|
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
|
||||||
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
|
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
|
||||||
|
Restart=always
|
||||||
|
RestartSec=30
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
@ -52,6 +52,8 @@ def main(argv):
|
|||||||
processname = payload_headers['processname']
|
processname = payload_headers['processname']
|
||||||
groupname = payload_headers['groupname']
|
groupname = payload_headers['groupname']
|
||||||
|
|
||||||
|
# Read the status of auto-restart feature from Config_DB.
|
||||||
|
if container_name != 'database':
|
||||||
config_db = swsssdk.ConfigDBConnector()
|
config_db = swsssdk.ConfigDBConnector()
|
||||||
config_db.connect()
|
config_db.connect()
|
||||||
container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME)
|
container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME)
|
||||||
@ -68,8 +70,10 @@ def main(argv):
|
|||||||
syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name))
|
syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name))
|
||||||
sys.exit(4)
|
sys.exit(4)
|
||||||
|
|
||||||
# If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor
|
# If container is database or auto-restart feature is enabled and at the same time
|
||||||
if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes):
|
# a critical process exited unexpectedly, terminate supervisor
|
||||||
|
if ((container_name == 'database' or restart_feature == 'enabled') and expected == 0 and
|
||||||
|
(processname in critical_processes or groupname in critical_processes)):
|
||||||
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
|
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
|
||||||
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
|
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
|
||||||
syslog.syslog(syslog.LOG_INFO, msg)
|
syslog.syslog(syslog.LOG_INFO, msg)
|
||||||
|
@ -28,3 +28,4 @@ $(DOCKER_DATABASE)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
|
|||||||
|
|
||||||
$(DOCKER_DATABASE)_BASE_IMAGE_FILES += redis-cli:/usr/bin/redis-cli
|
$(DOCKER_DATABASE)_BASE_IMAGE_FILES += redis-cli:/usr/bin/redis-cli
|
||||||
$(DOCKER_DATABASE)_BASE_IMAGE_FILES += monit_database:/etc/monit/conf.d
|
$(DOCKER_DATABASE)_BASE_IMAGE_FILES += monit_database:/etc/monit/conf.d
|
||||||
|
$(DOCKER_DATABASE)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
|
||||||
|
Loading…
Reference in New Issue
Block a user