From 5b3b4804adcffaa43012596acf6effe5a31f0256 Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Tue, 8 Sep 2020 23:36:38 -0700 Subject: [PATCH] [dockers][supervisor] Increase event buffer size for dependent-startup (#5247) When stopping the swss, pmon or bgp containers, log messages like the following can be seen: ``` Aug 23 22:50:43.789760 sonic-dut INFO swss#supervisord 2020-08-23 22:50:10,061 ERRO pool dependent-startup event buffer overflowed, discarding event 34 Aug 23 22:50:43.789760 sonic-dut INFO swss#supervisord 2020-08-23 22:50:10,063 ERRO pool dependent-startup event buffer overflowed, discarding event 35 Aug 23 22:50:43.789760 sonic-dut INFO swss#supervisord 2020-08-23 22:50:10,064 ERRO pool dependent-startup event buffer overflowed, discarding event 36 Aug 23 22:50:43.789760 sonic-dut INFO swss#supervisord 2020-08-23 22:50:10,066 ERRO pool dependent-startup event buffer overflowed, discarding event 37 ``` This is due to the number of programs in the container managed by supervisor, all generating events at the same time. The default event queue buffer size in supervisor is 10. This patch increases that value in all containers in order to eliminate these errors. As more programs are added to the containers, we may need to further adjust these values. I increased all buffer sizes to 25 except for containers with more programs or templated supervisor.conf files which allow for a variable number of programs. In these cases I increased the buffer size to 50. One final exception is the swss container, where the buffer fills up to ~50, so I increased this buffer to 100. Resolves https://github.com/Azure/sonic-buildimage/issues/5241 --- dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 | 1 + dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 | 1 + dockers/docker-lldp/supervisord.conf.j2 | 1 + dockers/docker-nat/supervisord.conf | 1 + dockers/docker-orchagent/supervisord.conf | 1 + dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 | 1 + .../docker-router-advertiser.supervisord.conf.j2 | 1 + dockers/docker-sflow/supervisord.conf | 1 + dockers/docker-snmp/supervisord.conf | 1 + dockers/docker-sonic-mgmt-framework/supervisord.conf | 1 + dockers/docker-sonic-restapi/supervisord.conf | 1 + dockers/docker-sonic-telemetry/supervisord.conf | 1 + dockers/docker-teamd/supervisord.conf | 1 + platform/barefoot/docker-syncd-bfn/supervisord.conf | 1 + platform/broadcom/docker-syncd-brcm/supervisord.conf | 1 + platform/centec-arm64/docker-syncd-centec/supervisord.conf | 1 + platform/innovium/docker-syncd-invm/supervisord.conf | 1 + platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf | 1 + platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf | 1 + platform/marvell/docker-syncd-mrvl/supervisord.conf | 1 + platform/mellanox/docker-syncd-mlnx/supervisord.conf | 1 + platform/nephos/docker-syncd-nephos/supervisord.conf | 1 + platform/vs/docker-syncd-vs/supervisord.conf | 1 + .../tests/sample_output/docker-dhcp-relay.supervisord.conf | 1 + 24 files changed, 24 insertions(+) diff --git a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 index 97de4f3604..2efac176fa 100644 --- a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 +++ b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay diff --git a/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 b/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 index e752b1ca05..862886b8af 100644 --- a/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 +++ b/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name bgp diff --git a/dockers/docker-lldp/supervisord.conf.j2 b/dockers/docker-lldp/supervisord.conf.j2 index 776dcd0296..5499e2af46 100644 --- a/dockers/docker-lldp/supervisord.conf.j2 +++ b/dockers/docker-lldp/supervisord.conf.j2 @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name lldp diff --git a/dockers/docker-nat/supervisord.conf b/dockers/docker-nat/supervisord.conf index a6fb78aa1b..9840300ec1 100644 --- a/dockers/docker-nat/supervisord.conf +++ b/dockers/docker-nat/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name nat diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index 3cd192f24c..f8c68337f0 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=100 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name swss diff --git a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 index ffb6a61cb1..f1eacf5b40 100644 --- a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 +++ b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=100 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name pmon diff --git a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 index 9bfc8d9220..9e75941170 100644 --- a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 +++ b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf.j2 @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-script] command=/usr/bin/supervisor-proc-exit-listener --container-name radv diff --git a/dockers/docker-sflow/supervisord.conf b/dockers/docker-sflow/supervisord.conf index a3150f23af..2731d89d06 100644 --- a/dockers/docker-sflow/supervisord.conf +++ b/dockers/docker-sflow/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name sflow diff --git a/dockers/docker-snmp/supervisord.conf b/dockers/docker-snmp/supervisord.conf index 2ca3686dcd..05f54122b5 100644 --- a/dockers/docker-snmp/supervisord.conf +++ b/dockers/docker-snmp/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name snmp diff --git a/dockers/docker-sonic-mgmt-framework/supervisord.conf b/dockers/docker-sonic-mgmt-framework/supervisord.conf index f78d0ec841..5ff38f0f62 100644 --- a/dockers/docker-sonic-mgmt-framework/supervisord.conf +++ b/dockers/docker-sonic-mgmt-framework/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [program:rsyslogd] command=/usr/sbin/rsyslogd -n -iNONE diff --git a/dockers/docker-sonic-restapi/supervisord.conf b/dockers/docker-sonic-restapi/supervisord.conf index 6f907a0ab0..ac9a9fe18a 100644 --- a/dockers/docker-sonic-restapi/supervisord.conf +++ b/dockers/docker-sonic-restapi/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name restapi diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index d968983cdd..f0578803a2 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name telemetry diff --git a/dockers/docker-teamd/supervisord.conf b/dockers/docker-teamd/supervisord.conf index 50d4249b19..c152932608 100644 --- a/dockers/docker-teamd/supervisord.conf +++ b/dockers/docker-teamd/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name teamd diff --git a/platform/barefoot/docker-syncd-bfn/supervisord.conf b/platform/barefoot/docker-syncd-bfn/supervisord.conf index 7083a174c5..7137279103 100644 --- a/platform/barefoot/docker-syncd-bfn/supervisord.conf +++ b/platform/barefoot/docker-syncd-bfn/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/platform/broadcom/docker-syncd-brcm/supervisord.conf b/platform/broadcom/docker-syncd-brcm/supervisord.conf index e0ac48c0a2..77f6519c2e 100644 --- a/platform/broadcom/docker-syncd-brcm/supervisord.conf +++ b/platform/broadcom/docker-syncd-brcm/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/platform/centec-arm64/docker-syncd-centec/supervisord.conf b/platform/centec-arm64/docker-syncd-centec/supervisord.conf index e1fd242b4e..1a15c140a7 100755 --- a/platform/centec-arm64/docker-syncd-centec/supervisord.conf +++ b/platform/centec-arm64/docker-syncd-centec/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/platform/innovium/docker-syncd-invm/supervisord.conf b/platform/innovium/docker-syncd-invm/supervisord.conf index a614d68f28..2a2413b300 100755 --- a/platform/innovium/docker-syncd-invm/supervisord.conf +++ b/platform/innovium/docker-syncd-invm/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [program:rsyslogd] command=/usr/sbin/rsyslogd -n -iNONE diff --git a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf index e1fd242b4e..1a15c140a7 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf index 0adb188ce3..408899f888 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/platform/marvell/docker-syncd-mrvl/supervisord.conf b/platform/marvell/docker-syncd-mrvl/supervisord.conf index 12752786eb..73523ac02a 100644 --- a/platform/marvell/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell/docker-syncd-mrvl/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/platform/mellanox/docker-syncd-mlnx/supervisord.conf b/platform/mellanox/docker-syncd-mlnx/supervisord.conf index dc6977ed47..55578ff2dc 100644 --- a/platform/mellanox/docker-syncd-mlnx/supervisord.conf +++ b/platform/mellanox/docker-syncd-mlnx/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/platform/nephos/docker-syncd-nephos/supervisord.conf b/platform/nephos/docker-syncd-nephos/supervisord.conf index dc6977ed47..55578ff2dc 100644 --- a/platform/nephos/docker-syncd-nephos/supervisord.conf +++ b/platform/nephos/docker-syncd-nephos/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/platform/vs/docker-syncd-vs/supervisord.conf b/platform/vs/docker-syncd-vs/supervisord.conf index b097d1aa8c..36e33850cb 100644 --- a/platform/vs/docker-syncd-vs/supervisord.conf +++ b/platform/vs/docker-syncd-vs/supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=25 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name syncd diff --git a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf index 6eeba48fe9..5037476139 100644 --- a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf @@ -10,6 +10,7 @@ autorestart=unexpected startretries=0 exitcodes=0,3 events=PROCESS_STATE +buffer_size=50 [eventlistener:supervisor-proc-exit-listener] command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay