2019-12-30 20:25:57 -06:00
|
|
|
###############################################################################
|
|
|
|
## Monit configuration for SONiC host OS
|
|
|
|
##
|
|
|
|
## This includes system-level monitoring as well as processes which
|
|
|
|
## run in the host OS (i.e., not inside a Docker container)
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
check filesystem root-overlay with path /
|
2020-10-31 19:29:49 -05:00
|
|
|
if space usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles
|
2019-12-30 20:25:57 -06:00
|
|
|
|
|
|
|
check filesystem var-log with path /var/log
|
2020-10-31 19:29:49 -05:00
|
|
|
if space usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles
|
2019-12-30 20:25:57 -06:00
|
|
|
|
|
|
|
check system $HOST
|
2020-10-31 19:29:49 -05:00
|
|
|
if memory usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles
|
|
|
|
if cpu usage (user) > 90% for 10 times within 20 cycles then alert repeat every 1 cycles
|
|
|
|
if cpu usage (system) > 90% for 10 times within 20 cycles then alert repeat every 1 cycles
|
2019-12-30 20:25:57 -06:00
|
|
|
|
|
|
|
check process rsyslog with pidfile /var/run/rsyslogd.pid
|
|
|
|
start program = "/bin/systemctl start rsyslog.service"
|
|
|
|
stop program = "/bin/systemctl stop rsyslog.service"
|
2020-01-10 15:01:24 -06:00
|
|
|
if totalmem > 800 MB for 10 times within 20 cycles then restart
|
2020-08-04 12:33:13 -05:00
|
|
|
|
|
|
|
# route_check.py Verify routes between APPL-DB & ASIC-DB are in sync.
|
|
|
|
# For any discrepancy, details are logged and a non-zero code is returned
|
|
|
|
# which would trigger a monit alert.
|
|
|
|
# Hence for any discrepancy, there will be log messages for "ERR" level
|
|
|
|
# from both route_check.py & monit.
|
|
|
|
#
|
2020-09-20 22:16:42 -05:00
|
|
|
check program routeCheck with path "/usr/local/bin/route_check.py"
|
2020-08-04 12:33:13 -05:00
|
|
|
every 5 cycles
|
2020-10-31 19:29:49 -05:00
|
|
|
if status != 0 for 3 cycle then alert repeat every 1 cycles
|
|
|
|
|
2021-05-26 19:59:08 -05:00
|
|
|
# Check if /etc & /home are writable. If not, make them writable.
|
|
|
|
# Raise syslog error message, in case of underlying issues
|
|
|
|
#
|
|
|
|
check program diskCheck with path "/usr/local/bin/disk_check.py"
|
|
|
|
every 5 cycles
|
|
|
|
if status != 0 for 3 cycle then alert repeat every 1 cycles
|
|
|
|
|
2021-01-07 21:52:22 -06:00
|
|
|
check program container_checker with path "/usr/bin/container_checker"
|
|
|
|
if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles
|
2021-08-19 18:29:25 -05:00
|
|
|
|
|
|
|
# vnet_route_check.py: tool that verifies VNET routes consistancy between SONiC and vendor SDK DBs.
|
|
|
|
check program vnetRouteCheck with path "/usr/local/bin/vnet_route_check.py"
|
|
|
|
every 5 cycles
|
|
|
|
if status != 0 for 3 cycle then alert repeat every 1 cycles
|
|
|
|
|
2022-10-06 10:06:46 -05:00
|
|
|
# memory_check tool that verifies that memory usage does not cross the threshold or invokes techsupport.
|
|
|
|
check program memory_check with path "/usr/local/bin/memory_threshold_check.py"
|
|
|
|
if status == 2 for 10 times within 20 cycles then exec "/usr/local/bin/memory_threshold_check_handler.py"
|