[201911][Monit] Monitor critical processes in PMon contianer. (#7438)
Signed-off-by: Yong Zhao yozhao@microsoft.com Why I did it This PR aims to monitor the critical processes in PMon container by Monit in 201911 branch. How I did it I created a template configuration file of Monit and it will be rendered to generate Monit configuration file of PMon container by a service generate_monit_config.service. How to verify it I verified this on a Mellanox device str-msn2700-03 and an Arista device str-a7050-acs-1. Which release branch to backport (provide reason below if selected) 201811 [x ] 201911 202006 202012
This commit is contained in:
parent
80f0836643
commit
a8d2d0b5cd
@ -0,0 +1,61 @@
|
|||||||
|
{# This template file is used to generate Monit configuration file of platform monitor container -#}
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
## Monit configuration file for PMon container
|
||||||
|
## process list:
|
||||||
|
{% if not skip_fancontrol and HAVE_FANCONTROL_CONF == 1 %}
|
||||||
|
## fancontrol
|
||||||
|
{% endif %}
|
||||||
|
{% if not skip_ledd %}
|
||||||
|
## ledd
|
||||||
|
{% endif %}
|
||||||
|
{% if not skip_psud %}
|
||||||
|
## psud
|
||||||
|
{% endif %}
|
||||||
|
{% if not skip_sensors and HAVE_SENSORS_CONF == 1 %}
|
||||||
|
## sensord
|
||||||
|
{% endif %}
|
||||||
|
{% if not skip_syseepromd %}
|
||||||
|
## syseepromd
|
||||||
|
{% endif %}
|
||||||
|
{% if not skip_thermalctld %}
|
||||||
|
## thermalctld
|
||||||
|
{% endif %}
|
||||||
|
{% if not skip_xcvrd %}
|
||||||
|
## xcvrd
|
||||||
|
{% endif %}
|
||||||
|
###############################################################################
|
||||||
|
{% if not skip_fancontrol and HAVE_FANCONTROL_CONF == 1 %}
|
||||||
|
check program pmon|fancontrol with path "/usr/bin/process_checker pmon /bin/bash /usr/sbin/fancontrol"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if not skip_ledd %}
|
||||||
|
check program pmon|ledd with path "/usr/bin/process_checker pmon /usr/bin/python /usr/bin/ledd"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if not skip_psud %}
|
||||||
|
check program pmon|psud with path "/usr/bin/process_checker pmon /usr/bin/python /usr/bin/psud"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if not skip_sensors and HAVE_SENSORS_CONF == 1 %}
|
||||||
|
check program pmon|sensord with path "/usr/bin/process_checker pmon /usr/sbin/sensord -f daemon"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if not skip_syseepromd %}
|
||||||
|
check program pmon|syseepromd with path "/usr/bin/process_checker pmon /usr/bin/python /usr/bin/syseepromd"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if not skip_thermalctld %}
|
||||||
|
check program pmon|thermalctld with path "/usr/bin/process_checker pmon /usr/bin/python /usr/bin/thermalctld"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if not skip_xcvrd %}
|
||||||
|
check program pmon|xcvrd with path "/usr/bin/process_checker pmon /usr/bin/python /usr/bin/xcvrd"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||||
|
{%- endif -%}
|
33
files/image_config/monit/generate_monit_config
Normal file → Executable file
33
files/image_config/monit/generate_monit_config
Normal file → Executable file
@ -1,12 +1,39 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Generate the following files from templates:
|
# Generate the following files from templates:
|
||||||
# 1. Monit configuration file of radv container
|
# 1. Monit configuration file of dhcp_relay container
|
||||||
# 2. Monit configuration file of dhcp_relay container
|
# 2. Monit configuration file of PMon container
|
||||||
|
# 3. Monit configuration file of radv container
|
||||||
|
|
||||||
CFGGEN_PARAMS=" \
|
CFGGEN_PARAMS=" \
|
||||||
-d \
|
-d \
|
||||||
-t /usr/share/sonic/templates/monit_radv.j2,/etc/monit/conf.d/monit_radv \
|
|
||||||
-t /usr/share/sonic/templates/monit_dhcp_relay.j2,/etc/monit/conf.d/monit_dhcp_relay \
|
-t /usr/share/sonic/templates/monit_dhcp_relay.j2,/etc/monit/conf.d/monit_dhcp_relay \
|
||||||
|
-t /usr/share/sonic/templates/monit_radv.j2,/etc/monit/conf.d/monit_radv \
|
||||||
"
|
"
|
||||||
sonic-cfggen $CFGGEN_PARAMS
|
sonic-cfggen $CFGGEN_PARAMS
|
||||||
|
|
||||||
|
PLATFORM=$(sonic-cfggen -d -v DEVICE_METADATA.localhost.platform 2> /dev/null)
|
||||||
|
|
||||||
|
if [[ $? == 0 && $PLATFORM != "" ]]; then
|
||||||
|
SENSORS_CONF_FILE="/usr/share/sonic/device/$PLATFORM/sensors.conf"
|
||||||
|
FANCONTROL_CONF_FILE="/usr/share/sonic/device/$PLATFORM/fancontrol"
|
||||||
|
|
||||||
|
HAVE_SENSORS_CONF=0
|
||||||
|
HAVE_FANCONTROL_CONF=0
|
||||||
|
|
||||||
|
if [ -e $SENSORS_CONF_FILE ]; then
|
||||||
|
HAVE_SENSORS_CONF=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -e $FANCONTROL_CONF_FILE ]; then
|
||||||
|
HAVE_FANCONTROL_CONF=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
confvar="{\"HAVE_SENSORS_CONF\":$HAVE_SENSORS_CONF, \"HAVE_FANCONTROL_CONF\":$HAVE_FANCONTROL_CONF}"
|
||||||
|
|
||||||
|
if [ -e /usr/share/sonic/device/$PLATFORM/pmon_daemon_control.json ]; then
|
||||||
|
sonic-cfggen -j /usr/share/sonic/device/$PLATFORM/pmon_daemon_control.json -a "$confvar" -t /usr/share/sonic/template/monit_pmon.j2,/etc/monit/conf.d/monit_pmon
|
||||||
|
else
|
||||||
|
sonic-cfggen -a "$confvar" -t /usr/share/sonic/template/monit_pmon.j2,/etc/monit/conf.d/monit_pmon
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
@ -46,4 +46,5 @@ $(DOCKER_PLATFORM_MONITOR)_BASE_IMAGE_FILES += cmd_wrapper:/usr/bin/sensors
|
|||||||
$(DOCKER_PLATFORM_MONITOR)_BASE_IMAGE_FILES += cmd_wrapper:/usr/sbin/smartctl
|
$(DOCKER_PLATFORM_MONITOR)_BASE_IMAGE_FILES += cmd_wrapper:/usr/sbin/smartctl
|
||||||
$(DOCKER_PLATFORM_MONITOR)_BASE_IMAGE_FILES += cmd_wrapper:/usr/sbin/iSmart
|
$(DOCKER_PLATFORM_MONITOR)_BASE_IMAGE_FILES += cmd_wrapper:/usr/sbin/iSmart
|
||||||
$(DOCKER_PLATFORM_MONITOR)_BASE_IMAGE_FILES += cmd_wrapper:/usr/sbin/SmartCmd
|
$(DOCKER_PLATFORM_MONITOR)_BASE_IMAGE_FILES += cmd_wrapper:/usr/sbin/SmartCmd
|
||||||
|
$(DOCKER_PLATFORM_MONITOR)_BASE_IMAGE_FILES += monit_pmon.j2:/usr/share/sonic/templates
|
||||||
$(DOCKER_PLATFORM_MONITOR)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
|
$(DOCKER_PLATFORM_MONITOR)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
|
||||||
|
Reference in New Issue
Block a user