[Service] Enable/disable container auto-restart based on configuration. (#4073)

This commit is contained in:
yozhao101 2020-02-07 12:34:07 -08:00 committed by Abhishek
parent 984c43e01d
commit 71225ea4cc
20 changed files with 72 additions and 20 deletions

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name lldp
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name swss
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name pmon
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-script]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name radv
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name sflow
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name snmp
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name telemetry
events=PROCESS_STATE_EXITED
autostart=true
autorestart=false

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name teamd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -1,17 +1,34 @@
#!/usr/bin/env python
import getopt
import os
import signal
import sys
import syslog
import swsssdk
from supervisor import childutils
# Contents of file should be the names of critical processes (as defined in
# supervisor.conf file), one per line
CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes'
def main():
# This table in databse contains the features for container and each
# feature for a row will be configured a state or number.
CONTAINER_FEATURE_TABLE_NAME = 'CONTAINER_FEATURE'
def main(argv):
container_name = None
opts, args = getopt.getopt(argv, "c:", ["container-name="])
for opt, arg in opts:
if opt in ("-c", "--container-name"):
container_name = arg
if not container_name:
syslog.syslog(syslog.LOG_ERR, "Container name not specified. Exiting...")
sys.exit(1)
# Read the list of critical processes from a file
with open(CRITICAL_PROCESSES_FILE, 'r') as f:
critical_processes = [line.rstrip('\n') for line in f]
@ -35,12 +52,29 @@ def main():
processname = payload_headers['processname']
groupname = payload_headers['groupname']
# If a critical process exited unexpectedly, terminate supervisor
if expected == 0 and processname in critical_processes or groupname in critical_processes:
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME)
if not container_features_table:
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...")
sys.exit(2)
if not container_features_table.has_key(container_name):
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name))
sys.exit(3)
restart_feature = container_features_table[container_name].get('auto_restart')
if not restart_feature:
syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name))
sys.exit(4)
# If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor
if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes):
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
syslog.syslog(syslog.LOG_INFO, msg)
os.kill(os.getppid(), signal.SIGTERM)
if __name__ == "__main__":
main()
main(sys.argv[1:])

View File

@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
[program:start.sh]
command=/usr/bin/start.sh
priority=1

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
[program:start.sh]
command=/usr/bin/start.sh
priority=1

View File

@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
[program:start.sh]
command=/usr/bin/start.sh
priority=1

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

View File

@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected