From be3c036794c68f482e34dd98e9a288ec4f7b4b4e Mon Sep 17 00:00:00 2001 From: yozhao101 <56170650+yozhao101@users.noreply.github.com> Date: Thu, 21 Jan 2021 12:57:49 -0800 Subject: [PATCH] [supervisord] Monitoring the critical processes with supervisord. (#6242) - Why I did it Initially, we used Monit to monitor critical processes in each container. If one of critical processes was not running or crashed due to some reasons, then Monit will write an alerting message into syslog periodically. If we add a new process in a container, the corresponding Monti configuration file will also need to update. It is a little hard for maintenance. Currently we employed event listener of Supervisod to do this monitoring. Since processes in each container are managed by Supervisord, we can only focus on the logic of monitoring. - How I did it We borrowed the event listener of Supervisord to monitor critical processes in containers. The event listener will take following steps if it was notified one of critical processes exited unexpectedly: The event listener will first check whether the auto-restart mechanism was enabled for this container or not. If auto-restart mechanism was enabled, event listener will kill the Supervisord process, which should cause the container to exit and subsequently get restarted. If auto-restart mechanism was not enabled for this contianer, the event listener will enter a loop which will first sleep 1 minute and then check whether the process is running. If yes, the event listener exits. If no, an alerting message will be written into syslog. - How to verify it First, we need checked whether the auto-restart mechanism of a container was enabled or not by running the command show feature status. If enabled, one critical process should be selected and killed manually, then we need check whether the container will be restarted or not. Second, we can disable the auto-restart mechanism if it was enabled at step 1 by running the commnad sudo config feature autorestart disabled. Then one critical process should be selected and killed. After that, we will see the alerting message which will appear in the syslog every 1 minute. - Which release branch to backport (provide reason below if selected) 201811 201911 [x ] 202006 --- dockers/docker-database/supervisord.conf.j2 | 2 +- .../docker-dhcp-relay.supervisord.conf.j2 | 2 +- .../frr/supervisord/supervisord.conf.j2 | 2 +- dockers/docker-fpm-gobgp/supervisord.conf | 2 +- dockers/docker-fpm-quagga/supervisord.conf | 2 +- dockers/docker-lldp/supervisord.conf.j2 | 2 +- dockers/docker-nat/supervisord.conf | 2 +- dockers/docker-orchagent/supervisord.conf | 2 +- .../docker-pmon.supervisord.conf.j2 | 2 +- ...cker-router-advertiser.supervisord.conf.j2 | 2 +- dockers/docker-sflow/supervisord.conf | 2 +- dockers/docker-snmp/supervisord.conf | 2 +- dockers/docker-sonic-restapi/supervisord.conf | 2 +- .../docker-sonic-telemetry/supervisord.conf | 2 +- dockers/docker-teamd/supervisord.conf | 2 +- files/scripts/supervisor-proc-exit-listener | 130 +++++++++++++----- .../docker-syncd-bfn/supervisord.conf | 2 +- .../docker-syncd-brcm/supervisord.conf | 2 +- .../cavium/docker-syncd-cavm/supervisord.conf | 2 +- .../docker-syncd-centec/supervisord.conf | 2 +- .../docker-syncd-centec/supervisord.conf | 2 +- .../docker-syncd-mrvl/supervisord.conf | 2 +- .../docker-syncd-mrvl/supervisord.conf | 2 +- .../docker-syncd-mrvl/supervisord.conf | 2 +- .../docker-syncd-mlnx/supervisord.conf | 2 +- .../docker-syncd-nephos/supervisord.conf | 2 +- .../vs/docker-gbsyncd-vs/supervisord.conf | 2 +- platform/vs/docker-syncd-vs/supervisord.conf | 2 +- .../py2/docker-dhcp-relay.supervisord.conf | 2 +- .../py3/docker-dhcp-relay.supervisord.conf | 2 +- 30 files changed, 122 insertions(+), 66 deletions(-) diff --git a/dockers/docker-database/supervisord.conf.j2 b/dockers/docker-database/supervisord.conf.j2 index 65a172b374..616475fb07 100644 --- a/dockers/docker-database/supervisord.conf.j2 +++ b/dockers/docker-database/supervisord.conf.j2 @@ -5,7 +5,7 @@ nodaemon=true [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name database -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 index 59d6c1f5f2..1ec3ddb63d 100644 --- a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 +++ b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 b/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 index e71d6fa713..0c61873b5f 100644 --- a/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 +++ b/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name bgp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-fpm-gobgp/supervisord.conf b/dockers/docker-fpm-gobgp/supervisord.conf index b814dc024f..e7e3ee9f30 100644 --- a/dockers/docker-fpm-gobgp/supervisord.conf +++ b/dockers/docker-fpm-gobgp/supervisord.conf @@ -5,7 +5,7 @@ nodaemon=true [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name bgp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-fpm-quagga/supervisord.conf b/dockers/docker-fpm-quagga/supervisord.conf index 7397a7428a..470dea18a1 100644 --- a/dockers/docker-fpm-quagga/supervisord.conf +++ b/dockers/docker-fpm-quagga/supervisord.conf @@ -5,7 +5,7 @@ nodaemon=true [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name bgp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-lldp/supervisord.conf.j2 b/dockers/docker-lldp/supervisord.conf.j2 index 4692f7bf2a..3a84caee30 100644 --- a/dockers/docker-lldp/supervisord.conf.j2 +++ b/dockers/docker-lldp/supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name lldp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-nat/supervisord.conf b/dockers/docker-nat/supervisord.conf index 8555f2a48a..f03b0b3772 100644 --- a/dockers/docker-nat/supervisord.conf +++ b/dockers/docker-nat/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name nat -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index 37ddade2ed..538f251c26 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=100 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name swss -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 index 7f816d3339..e8dec40bc4 100644 --- a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 +++ b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=100 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name pmon -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 index ae48792285..5cbfd60322 100644 --- a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 +++ b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-script] command=/usr/bin/supervisor-proc-exit-listener --container-name radv -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-sflow/supervisord.conf b/dockers/docker-sflow/supervisord.conf index 8d1bdc5059..3ff5ff5645 100644 --- a/dockers/docker-sflow/supervisord.conf +++ b/dockers/docker-sflow/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name sflow -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-snmp/supervisord.conf b/dockers/docker-snmp/supervisord.conf index d1e6d09a82..414445fdd6 100644 --- a/dockers/docker-snmp/supervisord.conf +++ b/dockers/docker-snmp/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name snmp -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/dockers/docker-sonic-restapi/supervisord.conf b/dockers/docker-sonic-restapi/supervisord.conf index 74bbc92415..44508ce881 100644 --- a/dockers/docker-sonic-restapi/supervisord.conf +++ b/dockers/docker-sonic-restapi/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name restapi -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=false diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index df1e6fa5a3..fa8c86f597 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name telemetry -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=false diff --git a/dockers/docker-teamd/supervisord.conf b/dockers/docker-teamd/supervisord.conf index 78549a7684..04432a3123 100644 --- a/dockers/docker-teamd/supervisord.conf +++ b/dockers/docker-teamd/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name teamd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 143e539a16..06e402bd4f 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -2,11 +2,14 @@ import getopt import os +import select import signal import sys import syslog +import time import swsssdk + from supervisor import childutils # Each line of this file should specify either one critical process or one @@ -20,10 +23,18 @@ CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes' # The FEATURE table in config db contains auto-restart field FEATURE_TABLE_NAME = 'FEATURE' -# Read the critical processes/group names from CRITICAL_PROCESSES_FILE +# Value of parameter 'timeout' in select(...) method +SELECT_TIMEOUT_SECS = 1.0 + +# Alerting message will be written into syslog in the following interval +ALERTING_INTERVAL_SECS = 60 def get_critical_group_and_process_list(): + """ + @summary: Read the critical processes/group names from CRITICAL_PROCESSES_FILE. + @return: Two lists which contain critical processes and group names respectively. + """ critical_group_list = [] critical_process_list = [] @@ -49,6 +60,47 @@ def get_critical_group_and_process_list(): return critical_group_list, critical_process_list +def generate_alerting_message(process_name): + """ + @summary: If a critical process was not running, this function will determine it resides in host + or in a specific namespace. Then an alerting message will be written into syslog. + """ + namespace_prefix = os.environ.get("NAMESPACE_PREFIX") + namespace_id = os.environ.get("NAMESPACE_ID") + + if not namespace_prefix or not namespace_id: + namespace = "host" + else: + namespace = namespace_prefix + namespace_id + + syslog.syslog(syslog.LOG_ERR, "Process '{}' is not running in namespace '{}'.".format(process_name, namespace)) + + +def get_autorestart_state(container_name): + """ + @summary: Read the status of auto-restart feature from Config_DB. + @return: Return the status of auto-restart feature. + """ + config_db = swsssdk.ConfigDBConnector() + config_db.connect() + features_table = config_db.get_table(FEATURE_TABLE_NAME) + if not features_table: + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features table from Config DB. Exiting...") + sys.exit(2) + + if container_name not in features_table: + syslog.syslog(syslog.LOG_ERR, "Unable to retrieve feature '{}'. Exiting...".format(container_name)) + sys.exit(3) + + is_auto_restart = features_table[container_name].get('auto_restart') + if not is_auto_restart: + syslog.syslog( + syslog.LOG_ERR, "Unable to determine auto-restart feature status for '{}'. Exiting...".format(container_name)) + sys.exit(4) + + return is_auto_restart + + def main(argv): container_name = None opts, args = getopt.getopt(argv, "c:", ["container-name="]) @@ -62,51 +114,55 @@ def main(argv): critical_group_list, critical_process_list = get_critical_group_and_process_list() + process_under_alerting = {} + # Transition from ACKNOWLEDGED to READY + childutils.listener.ready() + while True: - # Transition from ACKNOWLEDGED to READY - childutils.listener.ready() + file_descriptor_list = select.select([sys.stdin], [], [], SELECT_TIMEOUT_SECS)[0] + if len(file_descriptor_list) > 0: + line = file_descriptor_list[0].readline() + headers = childutils.get_headers(line) + payload = sys.stdin.read(int(headers['len'])) - line = sys.stdin.readline() - headers = childutils.get_headers(line) - payload = sys.stdin.read(int(headers['len'])) + # Handle the PROCESS_STATE_EXITED event + if headers['eventname'] == 'PROCESS_STATE_EXITED': + payload_headers, payload_data = childutils.eventdata(payload + '\n') - # Transition from READY to ACKNOWLEDGED - childutils.listener.ok() + expected = int(payload_headers['expected']) + process_name = payload_headers['processname'] + group_name = payload_headers['groupname'] - # We only care about PROCESS_STATE_EXITED events - if headers['eventname'] == 'PROCESS_STATE_EXITED': - payload_headers, payload_data = childutils.eventdata(payload + '\n') + if (process_name in critical_process_list or group_name in critical_group_list) and expected == 0: + is_auto_restart = get_autorestart_state(container_name) + if is_auto_restart != "disabled": + MSG_FORMAT_STR = "Process '{}' exited unexpectedly. Terminating supervisor '{}'" + msg = MSG_FORMAT_STR.format(payload_headers['processname'], container_name) + syslog.syslog(syslog.LOG_INFO, msg) + os.kill(os.getppid(), signal.SIGTERM) + else: + process_under_alerting[process_name] = time.time() - expected = int(payload_headers['expected']) - processname = payload_headers['processname'] - groupname = payload_headers['groupname'] + # Handle the PROCESS_STATE_RUNNING event + elif headers['eventname'] == 'PROCESS_STATE_RUNNING': + payload_headers, payload_data = childutils.eventdata(payload + '\n') + process_name = payload_headers['processname'] - # Read the status of auto-restart feature from Config_DB. - config_db = swsssdk.ConfigDBConnector() - config_db.connect() - features_table = config_db.get_table(FEATURE_TABLE_NAME) - if not features_table: - syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features table from Config DB. Exiting...") - sys.exit(2) + if process_name in process_under_alerting: + process_under_alerting.pop(process_name) - if container_name not in features_table: - syslog.syslog(syslog.LOG_ERR, "Unable to retrieve feature '{}'. Exiting...".format(container_name)) - sys.exit(3) + # Transition from BUSY to ACKNOWLEDGED + childutils.listener.ok() - restart_feature = features_table[container_name].get('auto_restart') - if not restart_feature: - syslog.syslog( - syslog.LOG_ERR, "Unable to determine auto-restart feature status for '{}'. Exiting...".format(container_name)) - sys.exit(4) + # Transition from ACKNOWLEDGED to READY + childutils.listener.ready() - # If auto-restart feature is not disabled and at the same time - # a critical process exited unexpectedly, terminate supervisor - if (restart_feature != 'disabled' and expected == 0 and - (processname in critical_process_list or groupname in critical_group_list)): - MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." - msg = MSG_FORMAT_STR.format(payload_headers['processname']) - syslog.syslog(syslog.LOG_INFO, msg) - os.kill(os.getppid(), signal.SIGTERM) + # Check whether we need write alerting messages into syslog + for process in process_under_alerting.keys(): + epoch_time = time.time() + if epoch_time - process_under_alerting[process] >= ALERTING_INTERVAL_SECS: + process_under_alerting[process] = epoch_time + generate_alerting_message(process) if __name__ == "__main__": diff --git a/platform/barefoot/docker-syncd-bfn/supervisord.conf b/platform/barefoot/docker-syncd-bfn/supervisord.conf index 39ea308277..c83484e5e9 100644 --- a/platform/barefoot/docker-syncd-bfn/supervisord.conf +++ b/platform/barefoot/docker-syncd-bfn/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/broadcom/docker-syncd-brcm/supervisord.conf b/platform/broadcom/docker-syncd-brcm/supervisord.conf index a8e594c47a..5e80110697 100644 --- a/platform/broadcom/docker-syncd-brcm/supervisord.conf +++ b/platform/broadcom/docker-syncd-brcm/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/cavium/docker-syncd-cavm/supervisord.conf b/platform/cavium/docker-syncd-cavm/supervisord.conf index 0c6285d46a..91b94a2580 100644 --- a/platform/cavium/docker-syncd-cavm/supervisord.conf +++ b/platform/cavium/docker-syncd-cavm/supervisord.conf @@ -5,7 +5,7 @@ nodaemon=true [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/centec-arm64/docker-syncd-centec/supervisord.conf b/platform/centec-arm64/docker-syncd-centec/supervisord.conf index 2cf6814dda..10f406129d 100755 --- a/platform/centec-arm64/docker-syncd-centec/supervisord.conf +++ b/platform/centec-arm64/docker-syncd-centec/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/centec/docker-syncd-centec/supervisord.conf b/platform/centec/docker-syncd-centec/supervisord.conf index 831b7256a4..6df1893a0b 100644 --- a/platform/centec/docker-syncd-centec/supervisord.conf +++ b/platform/centec/docker-syncd-centec/supervisord.conf @@ -13,7 +13,7 @@ events=PROCESS_STATE [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf index 2cf6814dda..10f406129d 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf index c099bbccbf..e633b4fe11 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/marvell/docker-syncd-mrvl/supervisord.conf b/platform/marvell/docker-syncd-mrvl/supervisord.conf index 85442933cf..94be9dd268 100644 --- a/platform/marvell/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell/docker-syncd-mrvl/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/mellanox/docker-syncd-mlnx/supervisord.conf b/platform/mellanox/docker-syncd-mlnx/supervisord.conf index 9311a255b0..8491d762bf 100644 --- a/platform/mellanox/docker-syncd-mlnx/supervisord.conf +++ b/platform/mellanox/docker-syncd-mlnx/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/nephos/docker-syncd-nephos/supervisord.conf b/platform/nephos/docker-syncd-nephos/supervisord.conf index a05bf7bfec..955021ad2d 100644 --- a/platform/nephos/docker-syncd-nephos/supervisord.conf +++ b/platform/nephos/docker-syncd-nephos/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=python2 /usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/vs/docker-gbsyncd-vs/supervisord.conf b/platform/vs/docker-gbsyncd-vs/supervisord.conf index 3583ef6b5a..52267c8fa5 100644 --- a/platform/vs/docker-gbsyncd-vs/supervisord.conf +++ b/platform/vs/docker-gbsyncd-vs/supervisord.conf @@ -13,7 +13,7 @@ events=PROCESS_STATE [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name gbsyncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/platform/vs/docker-syncd-vs/supervisord.conf b/platform/vs/docker-syncd-vs/supervisord.conf index 7416f23a45..6a6d946632 100644 --- a/platform/vs/docker-syncd-vs/supervisord.conf +++ b/platform/vs/docker-syncd-vs/supervisord.conf @@ -14,7 +14,7 @@ buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/src/sonic-config-engine/tests/sample_output/py2/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/py2/docker-dhcp-relay.supervisord.conf index a213a25178..dad758947f 100644 --- a/src/sonic-config-engine/tests/sample_output/py2/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/py2/docker-dhcp-relay.supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected diff --git a/src/sonic-config-engine/tests/sample_output/py3/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/py3/docker-dhcp-relay.supervisord.conf index d5338b18cf..e2135d0529 100644 --- a/src/sonic-config-engine/tests/sample_output/py3/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/py3/docker-dhcp-relay.supervisord.conf @@ -14,7 +14,7 @@ buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay -events=PROCESS_STATE_EXITED +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING autostart=true autorestart=unexpected