[201911][Monit] Monitor critical processes in radv and dhcp_relay containers. (#7340)

Signed-off-by: Yong Zhao yozhao@microsoft.com

Why I did it
This PR aims to monitor critical processes in router advertiser and dhcp_relay containers by Monit.

How I did it
Router advertiser container only ran on T0 device and the T0 device should have at least one VLAN interface
which was configured an IPv6 address. At the same time, router advertiser container will not run on devices of which
the deployment type is 8.

As such, I created a service which will dynamically generate Monit configuration file of router advertiser from a
template.

Similarly Monit configuration file of dhcp_relay was also generated from a template since the number of dhcrelay process in dhcp_relay container is depended on number of VLANs.

How to verify it
I verified this implementation on a DuT.
This commit is contained in:
yozhao101 2021-04-16 08:40:06 -07:00 committed by GitHub
parent b375053a36
commit 528543bc6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 99 additions and 0 deletions

View File

@ -0,0 +1,36 @@
{# This template file is used to generate Monit configuration file of dhcp_relay container -#}
################################################################################
## Monit configuration file for dhcp_relay container
## process list:
## dhcrelay
################################################################################
{# If our configuration has VLANs... #}
{%- if VLAN_INTERFACE -%}
{# Count how may VLANs require a DHCP relay agent... #}
{%- set num_relays = namespace(count=0) -%}
{%- for vlan_name in VLAN_INTERFACE -%}
{%- if VLAN and vlan_name in VLAN and 'dhcp_servers' in VLAN[vlan_name] and VLAN[vlan_name]['dhcp_servers']|length > 0 -%}
{%- set num_relays.count = num_relays.count + 1 -%}
{%- endif -%}
{%- endfor -%}
{# if one or more VLANs require DHCP relay agent #}
{%- if num_relays.count > 0 -%}
{%- set relay_for_ipv4 = namespace(flag=False) -%}
{%- for vlan_name in VLAN_INTERFACE -%}
{%- if VLAN and vlan_name in VLAN and 'dhcp_servers' in VLAN[vlan_name] and VLAN[vlan_name]['dhcp_servers']|length >0 -%}
{%- for dhcp_server in VLAN[vlan_name]['dhcp_servers'] -%}
{%- if dhcp_server | ipv4 -%}
{%- set relay_for_ipv4.flag = True -%}
{%- endif -%}
{%- endfor -%}
{%- if relay_for_ipv4.flag -%}
{%- set relay_for_ipv4 = False -%}
{# Check the running status of each DHCP relay agent instance #}
check program dhcp_relay|dhcrelay with path "/usr/bin/process_checker dhcp_relay /usr/sbin/dhcrelay -d -m discard -a %h:%p %P --name-alias-map-file /tmp/port-name-alias-map.txt -id {{ vlan_name }}"
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endif -%}

View File

@ -0,0 +1,33 @@
{# This template is used to generate Monit configuration file of router advertiser container -#}
###############################################################################
## Monit configuration for radv container
## process list:
## radvd
###############################################################################
{# Router advertiser should only run on ToR (T0) devices which have #}
{# at least one VLAN interface which has an IPv6 address asigned #}
{# But not for specific deployment_id #}
{%- set vlan_v6 = namespace(count=0) -%}
{%- if DEVICE_METADATA is defined and DEVICE_METADATA.localhost is defined -%}
{%- if DEVICE_METADATA.localhost.deployment_id is defined and DEVICE_METADATA.localhost.type is defined -%}
{%- if DEVICE_METADATA.localhost.deployment_id != "8" -%}
{%- if "ToRRouter" in DEVICE_METADATA.localhost.type and DEVICE_METADATA.localhost.type != "MgmtToRRouter" -%}
{%- if VLAN_INTERFACE -%}
{%- for (name, prefix) in VLAN_INTERFACE|pfx_filter -%}
{# If this VLAN has an IPv6 address... #}
{%- if prefix | ipv6 -%}
{%- set vlan_v6.count = vlan_v6.count + 1 -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endif -%}
{%- endif -%}
{%- endif -%}
{%- endif -%}
{%- if vlan_v6.count > 0 -%}
{# Check the running status of radvd process #}
check program radv|radvd with path "/usr/bin/process_checker radv /usr/sbin/radvd -n"
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
{%- endif -%}

View File

@ -205,6 +205,9 @@ sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/*
sudo cp $IMAGE_CONFIGS/monit/process_checker $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/process_checker
sudo cp $IMAGE_CONFIGS/monit/generate_monit_config.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM
sudo cp $IMAGE_CONFIGS/monit/generate_monit_config $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/generate_monit_config
# Copy crontabs
sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/

View File

@ -0,0 +1,12 @@
#!/bin/bash
# Generate the following files from templates:
# 1. Monit configuration file of radv container
# 2. Monit configuration file of dhcp_relay container
CFGGEN_PARAMS=" \
-d \
-t /usr/share/sonic/templates/monit_radv.j2,/etc/monit/conf.d/monit_radv \
-t /usr/share/sonic/templates/monit_dhcp_relay.j2,/etc/monit/conf.d/monit_dhcp_relay \
"
sonic-cfggen $CFGGEN_PARAMS

View File

@ -0,0 +1,13 @@
[Unit]
Description=Generate Monit configuration file from template
Requires=updategraph.service
After=updategraph.service
Before=monit.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/usr/bin/generate_monit_config
[Install]
WantedBy=multi-user.target

View File

@ -26,3 +26,4 @@ $(DOCKER_DHCP_RELAY)_CONTAINER_NAME = dhcp_relay
$(DOCKER_DHCP_RELAY)_RUN_OPT += --privileged -t
$(DOCKER_DHCP_RELAY)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_DHCP_RELAY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
$(DOCKER_DHCP_RELAY)_BASE_IMAGE_FILES += monit_dhcp_relay.j2:/usr/share/sonic/templates

View File

@ -26,3 +26,4 @@ $(DOCKER_ROUTER_ADVERTISER)_CONTAINER_NAME = radv
$(DOCKER_ROUTER_ADVERTISER)_RUN_OPT += --privileged -t
$(DOCKER_ROUTER_ADVERTISER)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_ROUTER_ADVERTISER)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
$(DOCKER_ROUTER_ADVERTISER)_BASE_IMAGE_FILES += monit_radv.j2:/usr/share/sonic/templates