[201911][Monit] Unmonitor processes in disabled containers (#5462)
We want to let Monit to unmonitor the processes in containers which are disabled in `FEATURE` table such that Monit will not generate false alerting messages into the syslog. - Backport of https://github.com/Azure/sonic-buildimage/pull/5153 to the 201911 branch Signed-off-by: Yong Zhao <yozhao@microsoft.com>
This commit is contained in:
parent
10534a39eb
commit
7580c846ad
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## redis_server
|
## redis_server
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process redis_server matching "/usr/bin/redis-server"
|
check program database|redis_server with path "/usr/bin/process_checker database /usr/bin/redis-server"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -8,20 +8,20 @@
|
|||||||
## bgpcfgd
|
## bgpcfgd
|
||||||
## bgpmon
|
## bgpmon
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process zebra matching "/usr/lib/frr/zebra"
|
check program bgp|zebra with path "/usr/bin/process_checker bgp /usr/lib/frr/zebra"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process fpmsyncd matching "fpmsyncd"
|
check program bgp|fpmsyncd with path "/usr/bin/process_checker bgp fpmsyncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process bgpd matching "/usr/lib/frr/bgpd"
|
check program bgp|bgpd with path "/usr/bin/process_checker bgp /usr/lib/frr/bgpd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process staticd matching "/usr/lib/frr/staticd"
|
check program bgp|staticd with path "/usr/bin/process_checker bgp /usr/lib/frr/staticd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process bgpcfgd matching "python /usr/local/bin/bgpcfgd"
|
check program bgp|bgpcfgd with path "/usr/bin/process_checker bgp /usr/bin/python /usr/local/bin/bgpcfgd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process bgpcfgd matching "python /usr/local/bin/bgpmon"
|
check program bgp|bgpmon with path "/usr/bin/process_checker bgp python /usr/local/bin/bgpmon"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -5,11 +5,11 @@
|
|||||||
## lldp-syncd
|
## lldp-syncd
|
||||||
## lldpmgrd
|
## lldpmgrd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process lldpd_monitor matching "lldpd: "
|
check program lldp|lldpd_monitor with path "/usr/bin/process_checker lldp lldpd:"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process lldp_syncd matching "python2 -m lldp_syncd"
|
check program lldp|lldp_syncd with path "/usr/bin/process_checker lldp python2 -m lldp_syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process lldpmgrd matching "python /usr/bin/lldpmgrd"
|
check program lldp|lldpmgrd with path "/usr/bin/process_checker lldp python /usr/bin/lldpmgrd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -11,33 +11,33 @@
|
|||||||
## buffermgrd
|
## buffermgrd
|
||||||
## nbrmgrd
|
## nbrmgrd
|
||||||
## vxlanmgrd
|
## vxlanmgrd
|
||||||
###############################################################################
|
##############################################################################
|
||||||
check process orchagent matching "/usr/bin/orchagent -d /var/log/swss"
|
check program swss|orchagent with path "/usr/bin/process_checker swss /usr/bin/orchagent -d /var/log/swss"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process portsyncd matching "/usr/bin/portsyncd"
|
check program swss|portsyncd with path "/usr/bin/process_checker swss /usr/bin/portsyncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process neighsyncd matching "/usr/bin/neighsyncd"
|
check program swss|neighsyncd with path "/usr/bin/process_checker swss /usr/bin/neighsyncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process vrfmgrd matching "/usr/bin/vrfmgrd"
|
check program swss|vrfmgrd with path "/usr/bin/process_checker swss /usr/bin/vrfmgrd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process vlanmgrd matching "/usr/bin/vlanmgrd"
|
check program swss|vlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vlanmgrd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process intfmgrd matching "/usr/bin/intfmgrd"
|
check program swss|intfmgrd with path "/usr/bin/process_checker swss /usr/bin/intfmgrd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process portmgrd matching "/usr/bin/portmgrd"
|
check program swss|portmgrd with path "/usr/bin/process_checker swss /usr/bin/portmgrd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process buffermgrd matching "/usr/bin/buffermgrd -l"
|
check program swss|buffermgrd with path "/usr/bin/process_checker swss /usr/bin/buffermgrd -l"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process nbrmgrd matching "/usr/bin/nbrmgrd"
|
check program swss|nbrmgrd with path "/usr/bin/process_checker swss /usr/bin/nbrmgrd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process vxlanmgrd matching "/usr/bin/vxlanmgrd"
|
check program swss|vxlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vxlanmgrd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## sflowmgrd
|
## sflowmgrd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process sflowmgrd matching "/usr/bin/sflowmgrd"
|
check program sflow|sflowmgrd with path "/usr/bin/process_checker sflow /usr/bin/sflowmgrd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -4,8 +4,8 @@
|
|||||||
## snmpd
|
## snmpd
|
||||||
## snmpd_subagent
|
## snmpd_subagent
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process snmpd matching "/usr/sbin/snmpd -f"
|
check program snmp|snmpd with path "/usr/bin/process_checker snmp /usr/sbin/snmpd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process snmp_subagent matching "python3.6 -m sonic_ax_impl"
|
check program snmp|snmp_subagent with path "/usr/bin/process_checker snmp python3.6 -m sonic_ax_impl"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## restapi
|
## restapi
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process restapi matching "/usr/sbin/go-server-server"
|
check program restapi|restapi with path "/usr/bin/process_checker restapi /usr/sbin/go-server-server"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -4,8 +4,8 @@
|
|||||||
## telemetry
|
## telemetry
|
||||||
## dialout_client
|
## dialout_client
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process telemetry matching "/usr/sbin/telemetry"
|
check program telemetry|telemetry with path "/usr/bin/process_checker telemetry /usr/sbin/telemetry"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process dialout_client matching "/usr/sbin/dialout_client_cli"
|
check program telemetry|dialout_client with path "/usr/bin/process_checker telemetry /usr/sbin/dialout_client_cli"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
11
dockers/docker-teamd/base_image_files/monit_teamd
Normal file
11
dockers/docker-teamd/base_image_files/monit_teamd
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
###############################################################################
|
||||||
|
## Monit configuration for teamd container
|
||||||
|
## process list:
|
||||||
|
## teamsyncd
|
||||||
|
## teammgrd
|
||||||
|
###############################################################################
|
||||||
|
check program teamd|teamsyncd with path "/usr/bin/process_checker teamd /usr/bin/teamsyncd"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
|
check program teamd|teammgrd with path "/usr/bin/process_checker teamd /usr/bin/teammgrd"
|
||||||
|
if status != 0 for 5 times within 5 cycles then alert
|
@ -109,6 +109,9 @@ sudo rm -rf $FILESYSTEM_ROOT/$REDIS_DUMP_LOAD_PY2_WHEEL_NAME
|
|||||||
# Install Python module for ipaddress
|
# Install Python module for ipaddress
|
||||||
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip install ipaddress
|
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip install ipaddress
|
||||||
|
|
||||||
|
# Install Python module for psutil
|
||||||
|
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip install psutil
|
||||||
|
|
||||||
# Install SwSS SDK Python 2 package
|
# Install SwSS SDK Python 2 package
|
||||||
SWSSSDK_PY2_WHEEL_NAME=$(basename {{swsssdk_py2_wheel_path}})
|
SWSSSDK_PY2_WHEEL_NAME=$(basename {{swsssdk_py2_wheel_path}})
|
||||||
sudo cp {{swsssdk_py2_wheel_path}} $FILESYSTEM_ROOT/$SWSSSDK_PY2_WHEEL_NAME
|
sudo cp {{swsssdk_py2_wheel_path}} $FILESYSTEM_ROOT/$SWSSSDK_PY2_WHEEL_NAME
|
||||||
@ -197,6 +200,8 @@ sudo cp $IMAGE_CONFIGS/monit/monitrc $FILESYSTEM_ROOT/etc/monit/
|
|||||||
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/monitrc
|
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/monitrc
|
||||||
sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/
|
sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/
|
||||||
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/*
|
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/*
|
||||||
|
sudo cp $IMAGE_CONFIGS/monit/process_checker $FILESYSTEM_ROOT/usr/bin/
|
||||||
|
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/process_checker
|
||||||
|
|
||||||
# Copy crontabs
|
# Copy crontabs
|
||||||
sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/
|
sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/
|
||||||
|
57
files/image_config/monit/process_checker
Executable file
57
files/image_config/monit/process_checker
Executable file
@ -0,0 +1,57 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import syslog
|
||||||
|
|
||||||
|
import psutil
|
||||||
|
import swsssdk
|
||||||
|
|
||||||
|
|
||||||
|
def check_process_existence(container_name, process_cmdline):
|
||||||
|
"""
|
||||||
|
@summary: Check whether the process in the specified container is running or not and
|
||||||
|
an alerting message will written into syslog if it failed to run.
|
||||||
|
"""
|
||||||
|
config_db = swsssdk.ConfigDBConnector()
|
||||||
|
config_db.connect()
|
||||||
|
feature_table = config_db.get_table("FEATURE")
|
||||||
|
|
||||||
|
if container_name in feature_table.keys():
|
||||||
|
# We look into the 'FEATURE' table to verify whether the container is disabled or not.
|
||||||
|
# If the container is diabled, we exit.
|
||||||
|
if ("state" in feature_table[container_name].keys()
|
||||||
|
and feature_table[container_name]["state"] == "disabled"):
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
# We leveraged the psutil library to help us check whether the process is running or not.
|
||||||
|
# If the process entity is found in process tree and it is also in the 'running' or 'sleeping'
|
||||||
|
# state, then it will be marked as 'running'.
|
||||||
|
is_running = False
|
||||||
|
for process in psutil.process_iter(["cmdline", "status"]):
|
||||||
|
if ((' '.join(process.cmdline())).startswith(process_cmdline) and process.status() in ["running", "sleeping"]):
|
||||||
|
is_running = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not is_running:
|
||||||
|
# If this script is run by Monit, then the following output will be appended to
|
||||||
|
# Monit's syslog message.
|
||||||
|
print("'{}' is not running.".format(process_cmdline))
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
syslog.syslog(syslog.LOG_ERR, "container '{}' is not included in SONiC image or the given container name is invalid!"
|
||||||
|
.format(container_name))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Check whether the process in the specified \
|
||||||
|
container is running and an alerting message will be written into syslog if it \
|
||||||
|
failed to run.", usage="/usr/bin/process_checker <container_name> <process_cmdline>")
|
||||||
|
parser.add_argument("container_name", help="container name")
|
||||||
|
parser.add_argument("process_cmdline", nargs=argparse.REMAINDER, help="process command line")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
check_process_existence(args.container_name, ' '.join(args.process_cmdline))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## syncd
|
## syncd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -4,8 +4,8 @@
|
|||||||
## syncd
|
## syncd
|
||||||
## dsserve
|
## dsserve
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd"
|
check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## syncd
|
## syncd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## syncd
|
## syncd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## syncd
|
## syncd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## syncd
|
## syncd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## syncd
|
## syncd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -3,5 +3,5 @@
|
|||||||
## process list:
|
## process list:
|
||||||
## syncd
|
## syncd
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -4,8 +4,8 @@
|
|||||||
## syncd
|
## syncd
|
||||||
## dsserve
|
## dsserve
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process syncd matching "/usr/bin/syncd\s"
|
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd"
|
check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if status != 0 for 5 times within 5 cycles then alert
|
||||||
|
@ -29,4 +29,5 @@ $(DOCKER_TEAMD)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
|
|||||||
$(DOCKER_TEAMD)_RUN_OPT += -v /host/warmboot:/var/warmboot
|
$(DOCKER_TEAMD)_RUN_OPT += -v /host/warmboot:/var/warmboot
|
||||||
|
|
||||||
$(DOCKER_TEAMD)_BASE_IMAGE_FILES += teamdctl:/usr/bin/teamdctl
|
$(DOCKER_TEAMD)_BASE_IMAGE_FILES += teamdctl:/usr/bin/teamdctl
|
||||||
|
$(DOCKER_TEAMD)_BASE_IMAGE_FILES += monit_teamd:/etc/monit/conf.d
|
||||||
$(DOCKER_TEAMD)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
|
$(DOCKER_TEAMD)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
|
||||||
|
Loading…
Reference in New Issue
Block a user