[201911][Monit] Monitor critical processes in radv and dhcp_relay containers. (#7340)
Signed-off-by: Yong Zhao yozhao@microsoft.com Why I did it This PR aims to monitor critical processes in router advertiser and dhcp_relay containers by Monit. How I did it Router advertiser container only ran on T0 device and the T0 device should have at least one VLAN interface which was configured an IPv6 address. At the same time, router advertiser container will not run on devices of which the deployment type is 8. As such, I created a service which will dynamically generate Monit configuration file of router advertiser from a template. Similarly Monit configuration file of dhcp_relay was also generated from a template since the number of dhcrelay process in dhcp_relay container is depended on number of VLANs. How to verify it I verified this implementation on a DuT.
This commit is contained in:
parent
b375053a36
commit
528543bc6a
@ -0,0 +1,36 @@
|
||||
{# This template file is used to generate Monit configuration file of dhcp_relay container -#}
|
||||
|
||||
################################################################################
|
||||
## Monit configuration file for dhcp_relay container
|
||||
## process list:
|
||||
## dhcrelay
|
||||
################################################################################
|
||||
{# If our configuration has VLANs... #}
|
||||
{%- if VLAN_INTERFACE -%}
|
||||
{# Count how may VLANs require a DHCP relay agent... #}
|
||||
{%- set num_relays = namespace(count=0) -%}
|
||||
{%- for vlan_name in VLAN_INTERFACE -%}
|
||||
{%- if VLAN and vlan_name in VLAN and 'dhcp_servers' in VLAN[vlan_name] and VLAN[vlan_name]['dhcp_servers']|length > 0 -%}
|
||||
{%- set num_relays.count = num_relays.count + 1 -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{# if one or more VLANs require DHCP relay agent #}
|
||||
{%- if num_relays.count > 0 -%}
|
||||
{%- set relay_for_ipv4 = namespace(flag=False) -%}
|
||||
{%- for vlan_name in VLAN_INTERFACE -%}
|
||||
{%- if VLAN and vlan_name in VLAN and 'dhcp_servers' in VLAN[vlan_name] and VLAN[vlan_name]['dhcp_servers']|length >0 -%}
|
||||
{%- for dhcp_server in VLAN[vlan_name]['dhcp_servers'] -%}
|
||||
{%- if dhcp_server | ipv4 -%}
|
||||
{%- set relay_for_ipv4.flag = True -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- if relay_for_ipv4.flag -%}
|
||||
{%- set relay_for_ipv4 = False -%}
|
||||
{# Check the running status of each DHCP relay agent instance #}
|
||||
check program dhcp_relay|dhcrelay with path "/usr/bin/process_checker dhcp_relay /usr/sbin/dhcrelay -d -m discard -a %h:%p %P --name-alias-map-file /tmp/port-name-alias-map.txt -id {{ vlan_name }}"
|
||||
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
@ -0,0 +1,33 @@
|
||||
{# This template is used to generate Monit configuration file of router advertiser container -#}
|
||||
|
||||
###############################################################################
|
||||
## Monit configuration for radv container
|
||||
## process list:
|
||||
## radvd
|
||||
###############################################################################
|
||||
{# Router advertiser should only run on ToR (T0) devices which have #}
|
||||
{# at least one VLAN interface which has an IPv6 address asigned #}
|
||||
{# But not for specific deployment_id #}
|
||||
{%- set vlan_v6 = namespace(count=0) -%}
|
||||
{%- if DEVICE_METADATA is defined and DEVICE_METADATA.localhost is defined -%}
|
||||
{%- if DEVICE_METADATA.localhost.deployment_id is defined and DEVICE_METADATA.localhost.type is defined -%}
|
||||
{%- if DEVICE_METADATA.localhost.deployment_id != "8" -%}
|
||||
{%- if "ToRRouter" in DEVICE_METADATA.localhost.type and DEVICE_METADATA.localhost.type != "MgmtToRRouter" -%}
|
||||
{%- if VLAN_INTERFACE -%}
|
||||
{%- for (name, prefix) in VLAN_INTERFACE|pfx_filter -%}
|
||||
{# If this VLAN has an IPv6 address... #}
|
||||
{%- if prefix | ipv6 -%}
|
||||
{%- set vlan_v6.count = vlan_v6.count + 1 -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if vlan_v6.count > 0 -%}
|
||||
{# Check the running status of radvd process #}
|
||||
check program radv|radvd with path "/usr/bin/process_checker radv /usr/sbin/radvd -n"
|
||||
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
||||
{%- endif -%}
|
@ -205,6 +205,9 @@ sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/
|
||||
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/*
|
||||
sudo cp $IMAGE_CONFIGS/monit/process_checker $FILESYSTEM_ROOT/usr/bin/
|
||||
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/process_checker
|
||||
sudo cp $IMAGE_CONFIGS/monit/generate_monit_config.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM
|
||||
sudo cp $IMAGE_CONFIGS/monit/generate_monit_config $FILESYSTEM_ROOT/usr/bin/
|
||||
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/generate_monit_config
|
||||
|
||||
# Copy crontabs
|
||||
sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/
|
||||
|
12
files/image_config/monit/generate_monit_config
Normal file
12
files/image_config/monit/generate_monit_config
Normal file
@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generate the following files from templates:
|
||||
# 1. Monit configuration file of radv container
|
||||
# 2. Monit configuration file of dhcp_relay container
|
||||
|
||||
CFGGEN_PARAMS=" \
|
||||
-d \
|
||||
-t /usr/share/sonic/templates/monit_radv.j2,/etc/monit/conf.d/monit_radv \
|
||||
-t /usr/share/sonic/templates/monit_dhcp_relay.j2,/etc/monit/conf.d/monit_dhcp_relay \
|
||||
"
|
||||
sonic-cfggen $CFGGEN_PARAMS
|
13
files/image_config/monit/generate_monit_config.service
Normal file
13
files/image_config/monit/generate_monit_config.service
Normal file
@ -0,0 +1,13 @@
|
||||
[Unit]
|
||||
Description=Generate Monit configuration file from template
|
||||
Requires=updategraph.service
|
||||
After=updategraph.service
|
||||
Before=monit.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/usr/bin/generate_monit_config
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
@ -26,3 +26,4 @@ $(DOCKER_DHCP_RELAY)_CONTAINER_NAME = dhcp_relay
|
||||
$(DOCKER_DHCP_RELAY)_RUN_OPT += --privileged -t
|
||||
$(DOCKER_DHCP_RELAY)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
|
||||
$(DOCKER_DHCP_RELAY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
|
||||
$(DOCKER_DHCP_RELAY)_BASE_IMAGE_FILES += monit_dhcp_relay.j2:/usr/share/sonic/templates
|
||||
|
@ -26,3 +26,4 @@ $(DOCKER_ROUTER_ADVERTISER)_CONTAINER_NAME = radv
|
||||
$(DOCKER_ROUTER_ADVERTISER)_RUN_OPT += --privileged -t
|
||||
$(DOCKER_ROUTER_ADVERTISER)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
|
||||
$(DOCKER_ROUTER_ADVERTISER)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
|
||||
$(DOCKER_ROUTER_ADVERTISER)_BASE_IMAGE_FILES += monit_radv.j2:/usr/share/sonic/templates
|
||||
|
Reference in New Issue
Block a user