Add bgpmon to be started as a new daemon under BGP docker (#5329)

* Add bgpmon under sonic-bgpcfgd to be started as a new daemon under BGP docker

* Added bgpmon to be monitored by Monit so that if it crashed, it gets alerted

* use console_scripts entry point to package bgpmon
This commit is contained in:
gechiang 2020-09-20 14:32:09 -07:00 committed by GitHub
parent 2de3afaf35
commit 128def6969
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 190 additions and 0 deletions

View File

@ -6,6 +6,7 @@
## bgpd
## staticd
## bgpcfgd
## bgpmon
###############################################################################
check process zebra matching "/usr/lib/frr/zebra"
if does not exist for 5 times within 5 cycles then alert
@ -21,3 +22,6 @@ check process staticd matching "/usr/lib/frr/staticd"
check process bgpcfgd matching "python /usr/local/bin/bgpcfgd"
if does not exist for 5 times within 5 cycles then alert
check process bgpmon matching "python /usr/local/bin/bgpmon"
if does not exist for 5 times within 5 cycles then alert

View File

@ -84,6 +84,17 @@ stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=bgpd:running
[program:bgpmon]
command=/usr/local/bin/bgpmon
priority=6
autostart=false
autorestart=false
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
dependent_startup=true
dependent_startup_wait_for=bgpd:running
{% if DEVICE_METADATA.localhost.docker_routing_config_mode is defined and DEVICE_METADATA.localhost.docker_routing_config_mode == "unified" %}
[program:vtysh_b]
command=/usr/bin/vtysh -b

170
src/sonic-bgpcfgd/bgpmon.py Executable file
View File

@ -0,0 +1,170 @@
#!/usr/bin/env python2
""""
Description: bgpmon.py -- populating bgp related information in stateDB.
script is started by supervisord in bgp docker when the docker is started.
Initial creation of this daemon is to assist SNMP agent in obtaining the
BGP related information for its MIB support. The MIB that this daemon is
assiting is for the CiscoBgp4MIB (Neighbor state only). If there are other
BGP related items that needs to be updated in a periodic manner in the
future, then more can be added into this process.
The script check if there are any bgp activities by monitoring the bgp
frr.log file timestamp. If activity is detected, then it will request bgp
neighbor state via vtysh cli interface. This bgp activity monitoring is
done periodically (every 15 second). When triggered, it looks specifically
for the neighbor state in the json output of show ip bgp neighbors json
and update the state DB for each neighbor accordingly.
In order to not disturb and hold on to the State DB access too long and
removal of the stale neighbors (neighbors that was there previously on
previous get request but no longer there in the current get request), a
"previous" neighbor dictionary will be kept and used to determine if there
is a need to perform update or the peer is stale to be removed from the
state DB
"""
import commands
import json
import os
import syslog
import swsssdk
import time
PIPE_BATCH_MAX_COUNT = 50
class BgpStateGet():
def __init__(self):
# list peer_l stores the Neighbor peer Ip address
# dic peer_state stores the Neighbor peer state entries
# list new_peer_l stores the new snapshot of Neighbor peer ip address
# dic new_peer_state stores the new snapshot of Neighbor peer states
self.peer_l = []
self.peer_state = {}
self.new_peer_l = []
self.new_peer_state = {}
self.cached_timestamp = 0
self.db = swsssdk.SonicV2Connector()
self.db.connect(self.db.STATE_DB, False)
client = self.db.get_redis_client(self.db.STATE_DB)
self.pipe = client.pipeline()
self.db.delete_all_by_pattern(self.db.STATE_DB, "NEIGH_STATE_TABLE|*" )
# A quick way to check if there are anything happening within BGP is to
# check its log file has any activities. This is by checking its modified
# timestamp against the cached timestamp that we keep and if there is a
# difference, there is activity detected. In case the log file got wiped
# out, it will default back to constant pulling every 15 seconds
def bgp_activity_detected(self):
try:
timestamp = os.stat("/var/log/frr/frr.log").st_mtime
if timestamp != self.cached_timestamp:
self.cached_timestamp = timestamp
return True
else:
return False
except (IOError, OSError):
return True
def update_new_peer_states(self, peer_dict):
peer_l = peer_dict["peers"].keys()
self.new_peer_l.extend(peer_l)
for i in range (0, len(peer_l)):
self.new_peer_state[peer_l[i]] = peer_dict["peers"][peer_l[i]]["state"]
# Get a new snapshot of BGP neighbors and store them in the "new" location
def get_all_neigh_states(self):
cmd = "vtysh -c 'show bgp summary json'"
rc, output = commands.getstatusoutput(cmd)
if rc:
syslog.syslog(syslog.LOG_ERR, "*ERROR* Failed with rc:{} when execute: {}".format(rc, cmd))
return
peer_info = json.loads(output)
# cmd ran successfully, safe to Clean the "new" lists/dic for new sanpshot
del self.new_peer_l[:]
self.new_peer_state.clear()
for key, value in peer_info.items():
if key == "ipv4Unicast" or key == "ipv6Unicast":
self.update_new_peer_states(value)
# This method will take the caller's dictionary which contains the peer state operation
# That need to be updated in StateDB using Redis pipeline.
# The data{} will be cleared at the end of this method before returning to caller.
def flush_pipe(self, data):
"""Dump each entry in data{} into State DB via redis pipeline.
Args:
data: Neighbor state in dictionary format
{
'NEIGH_STATE_TABLE|ip_address_a': {'state':state},
'NEIGH_STATE_TABLE|ip_address_b': {'state':state},
'NEIGH_STATE_TABLE|ip_address_c': {'state':state},
'NEIGH_STATE_TABLE|ip_address_x': None,
'NEIGH_STATE_TABLE|ip_address_z': None
...
}
"""
for key, value in data.items():
if value is None:
# delete case
self.pipe.delete(key)
else:
# Add or Modify case
self.pipe.hmset(key, value)
self.pipe.execute()
data.clear()
def update_neigh_states(self):
data = {}
for i in range (0, len(self.new_peer_l)):
peer = self.new_peer_l[i]
key = "NEIGH_STATE_TABLE|%s" % peer
if peer in self.peer_l:
# only update the entry if state changed
if self.peer_state[peer] != self.new_peer_state[peer]:
# state changed. Update state DB for this entry
state = self.new_peer_state[peer]
data[key] = {'state':state}
self.peer_state[peer] = state
# remove this neighbor from old list since it is accounted for
self.peer_l.remove(peer)
else:
# New neighbor found case. Add to dictionary and state DB
state = self.new_peer_state[peer]
data[key] = {'state':state}
self.peer_state[peer] = state
if len(data) > PIPE_BATCH_MAX_COUNT:
self.flush_pipe(data)
# Check for stale state entries to be cleaned up
while len(self.peer_l) > 0:
# remove this from the stateDB and the current nighbor state entry
peer = self.peer_l.pop(0)
del_key = "NEIGH_STATE_TABLE|%s" % peer
data[del_key] = None
del self.peer_state[peer]
if len(data) > PIPE_BATCH_MAX_COUNT:
self.flush_pipe(data)
# If anything in the pipeline not yet flushed, flush them now
if len(data) > 0:
self.flush_pipe(data)
# Save the new List
self.peer_l = self.new_peer_l[:]
def main():
print "bgpmon service started"
try:
bgp_state_get = BgpStateGet()
except Exception as e:
syslog.syslog(syslog.LOG_ERR, "{}: error exit 1, reason {}".format(THIS_MODULE, str(e)))
exit(1)
# periodically obtain the new neighbor infomraton and update if necessary
while True:
time.sleep(15)
if bgp_state_get.bgp_activity_detected():
bgp_state_get.get_all_neigh_states()
bgp_state_get.update_neigh_states()
if __name__ == '__main__':
main()

View File

@ -10,6 +10,11 @@ setuptools.setup(name='sonic-bgpcfgd',
url='https://github.com/Azure/sonic-buildimage',
packages=setuptools.find_packages(),
scripts=['bgpcfgd'],
entry_points={
'console_scripts': [
'bgpmon = bgpmon:main',
]
},
install_requires=['jinja2>=2.10', 'netaddr', 'pyyaml'],
setup_requires=['pytest-runner', 'pytest'],
)