sonic-buildimage/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2
Joe LeVeque c651a9ade4
[dockers][supervisor] Increase event buffer size for process exit listener; Set all event buffer sizes to 1024 (#7083)
To prevent error [messages](https://dev.azure.com/mssonic/build/_build/results?buildId=2254&view=logs&j=9a13fbcd-e92d-583c-2f89-d81f90cac1fd&t=739db6ba-1b35-5485-5697-de102068d650&l=802) like the following from being logged:

```
Mar 17 02:33:48.523153 vlab-01 INFO swss#supervisord 2021-03-17 02:33:48,518 ERRO pool supervisor-proc-exit-listener event buffer overflowed, discarding event 46
```

This is basically an addendum to https://github.com/Azure/sonic-buildimage/pull/5247, which increased the event buffer size for dependent-startup. While supervisor-proc-exit-listener doesn't subscribe to as many events as dependent-startup, there is still a chance some containers (like swss, as in the example above) have enough processes running to cause an overflow of the default buffer size of 10.

This is especially important for preventing erroneous log_analyzer failures in the sonic-mgmt repo regression tests, which have started occasionally causing PR check builds to fail. Example [here](https://dev.azure.com/mssonic/build/_build/results?buildId=2254&view=logs&j=9a13fbcd-e92d-583c-2f89-d81f90cac1fd&t=739db6ba-1b35-5485-5697-de102068d650&l=802).

I set all supervisor-proc-exit-listener event buffer sizes to 1024, and also updated all dependent-startup event buffer sizes to 1024, as well, to keep things simple, unified, and allow headroom so that we will not need to adjust these values frequently, if at all.
2021-03-27 21:14:24 -07:00

181 lines
4.1 KiB
Django/Jinja

[supervisord]
logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true
[eventlistener:dependent-startup]
command=python3 -m supervisord_dependent_startup
autostart=true
autorestart=unexpected
startretries=0
exitcodes=0,3
events=PROCESS_STATE
buffer_size=1024
[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name bgp
events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING
autostart=true
autorestart=unexpected
buffer_size=1024
[program:rsyslogd]
command=/usr/sbin/rsyslogd -n -iNONE
priority=1
autostart=false
autorestart=unexpected
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
[program:zebra]
command=/usr/lib/frr/zebra -A 127.0.0.1 -s 90000000 -M fpm -M snmp
priority=4
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=rsyslogd:running
[program:zsocket]
command=/usr/bin/zsocket.sh
priority=4
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=zebra:running
[program:staticd]
command=/usr/lib/frr/staticd -A 127.0.0.1
priority=4
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=zsocket:exited
{% if DEVICE_METADATA.localhost.frr_mgmt_framework_config is defined and DEVICE_METADATA.localhost.frr_mgmt_framework_config == "true" %}
[program:bfdd]
command=/usr/lib/frr/bfdd -A 127.0.0.1
priority=4
stopsignal=KILL
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=zebra:running
{% endif %}
[program:bgpd]
command=/usr/lib/frr/bgpd -A 127.0.0.1 -M snmp
priority=5
stopsignal=KILL
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=zsocket:exited
{% if DEVICE_METADATA.localhost.frr_mgmt_framework_config is defined and DEVICE_METADATA.localhost.frr_mgmt_framework_config == "true" %}
[program:ospfd]
command=/usr/lib/frr/ospfd -A 127.0.0.1 -M snmp
priority=5
stopsignal=KILL
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=zebra:running
[program:pimd]
command=/usr/lib/frr/pimd -A 127.0.0.1
priority=5
stopsignal=KILL
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=zebra:running
{% endif %}
[program:fpmsyncd]
command=fpmsyncd
priority=6
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=bgpd:running
{% if DEVICE_METADATA.localhost.frr_mgmt_framework_config is defined and DEVICE_METADATA.localhost.frr_mgmt_framework_config == "true" %}
[program:frrcfgd]
command=/usr/local/bin/frrcfgd
{% else %}
[program:bgpcfgd]
command=/usr/local/bin/bgpcfgd
{% endif %}
priority=6
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=bgpd:running
[program:bgpmon]
command=/usr/local/bin/bgpmon
priority=6
autostart=false
autorestart=true
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=bgpd:running
{% if DEVICE_METADATA.localhost.docker_routing_config_mode is defined and DEVICE_METADATA.localhost.docker_routing_config_mode == "unified" %}
[program:vtysh_b]
command=/usr/bin/vtysh -b
priority=6
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=bgpd:running
{% endif %}
{% if WARM_RESTART is defined and WARM_RESTART.bgp is defined and WARM_RESTART.bgp.bgp_eoiu is defined and WARM_RESTART.bgp.bgp_eoiu == "true" %}
[program:bgp_eoiu_marker]
command=/usr/bin/bgp_eoiu_marker.py
priority=7
autostart=false
autorestart=false
startsecs=0
startretries=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=bgpd:running
{% endif %}