[Services] Restart database service upon unexpected critical process exit. (#4138)

* [database] Implement the auto-restart feature for database container.

Signed-off-by: Yong Zhao <yozhao@microsoft.com>

* [database] Remove the duplicate dependency in service files. Since we
already have updategraph ---> config_setup ---> database, we do not need
explicitly add database.service in all other container service files.

Signed-off-by: Yong Zhao <yozhao@microsoft.com>

* [event listener] Reorganize the line 73 in event listener script.

Signed-off-by: Yong Zhao <yozhao@microsoft.com>

* [database] update the file sflow.service.j2 to remove the duplicate
dependency.

Signed-off-by: Yong Zhao <yozhao@microsoft.com>

* [event listener] Add comments in event listener.

Signed-off-by: Yong Zhao <yozhao@microsoft.com>

* [event listener] Update the comments in line 56.

Signed-off-by: Yong Zhao <yozhao@microsoft.com>

* [event listener] Add parentheses for if statement in line 76 in event listener.

Signed-off-by: Yong Zhao <yozhao@microsoft.com>
This commit is contained in:
yozhao101 2020-02-11 14:03:02 -08:00 committed by Abhishek
parent 71225ea4cc
commit 3ac345922b
6 changed files with 34 additions and 15 deletions

View File

@ -36,5 +36,7 @@ COPY ["supervisord.conf.j2", "/usr/share/sonic/templates/"]
COPY ["docker-database-init.sh", "/usr/local/bin/"] COPY ["docker-database-init.sh", "/usr/local/bin/"]
COPY ["ping_pong_db_insts", "/usr/local/bin/"] COPY ["ping_pong_db_insts", "/usr/local/bin/"]
COPY ["database_config.json", "/etc/default/sonic-db/"] COPY ["database_config.json", "/etc/default/sonic-db/"]
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
COPY ["critical_processes", "/etc/supervisor"]
ENTRYPOINT ["/usr/local/bin/docker-database-init.sh"] ENTRYPOINT ["/usr/local/bin/docker-database-init.sh"]

View File

@ -0,0 +1 @@
redis

View File

@ -3,6 +3,13 @@ logfile_maxbytes=1MB
logfile_backups=2 logfile_backups=2
nodaemon=true nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name database
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
[program:rsyslogd] [program:rsyslogd]
command=/bin/bash -c "rm -f /var/run/rsyslogd.pid && /usr/sbin/rsyslogd -n" command=/bin/bash -c "rm -f /var/run/rsyslogd.pid && /usr/sbin/rsyslogd -n"
priority=1 priority=1

View File

@ -3,12 +3,16 @@ Description=Database container
Requires=docker.service Requires=docker.service
After=docker.service After=docker.service
After=rc-local.service After=rc-local.service
StartLimitIntervalSec=1200
StartLimitBurst=3
[Service] [Service]
User=root User=root
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View File

@ -52,24 +52,28 @@ def main(argv):
processname = payload_headers['processname'] processname = payload_headers['processname']
groupname = payload_headers['groupname'] groupname = payload_headers['groupname']
config_db = swsssdk.ConfigDBConnector() # Read the status of auto-restart feature from Config_DB.
config_db.connect() if container_name != 'database':
container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) config_db = swsssdk.ConfigDBConnector()
if not container_features_table: config_db.connect()
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...") container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME)
sys.exit(2) if not container_features_table:
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...")
sys.exit(2)
if not container_features_table.has_key(container_name): if not container_features_table.has_key(container_name):
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name)) syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name))
sys.exit(3) sys.exit(3)
restart_feature = container_features_table[container_name].get('auto_restart') restart_feature = container_features_table[container_name].get('auto_restart')
if not restart_feature: if not restart_feature:
syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name)) syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name))
sys.exit(4) sys.exit(4)
# If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor # If container is database or auto-restart feature is enabled and at the same time
if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): # a critical process exited unexpectedly, terminate supervisor
if ((container_name == 'database' or restart_feature == 'enabled') and expected == 0 and
(processname in critical_processes or groupname in critical_processes)):
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
msg = MSG_FORMAT_STR.format(payload_headers['processname']) msg = MSG_FORMAT_STR.format(payload_headers['processname'])
syslog.syslog(syslog.LOG_INFO, msg) syslog.syslog(syslog.LOG_INFO, msg)

View File

@ -28,3 +28,4 @@ $(DOCKER_DATABASE)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_DATABASE)_BASE_IMAGE_FILES += redis-cli:/usr/bin/redis-cli $(DOCKER_DATABASE)_BASE_IMAGE_FILES += redis-cli:/usr/bin/redis-cli
$(DOCKER_DATABASE)_BASE_IMAGE_FILES += monit_database:/etc/monit/conf.d $(DOCKER_DATABASE)_BASE_IMAGE_FILES += monit_database:/etc/monit/conf.d
$(DOCKER_DATABASE)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)