Add bgpmon to be started as a new daemon under BGP docker (#5329)
* Add bgpmon under sonic-bgpcfgd to be started as a new daemon under BGP docker * Added bgpmon to be monitored by Monit so that if it crashed, it gets alerted * use console_scripts entry point to package bgpmon
This commit is contained in:
parent
2de3afaf35
commit
128def6969
@ -6,6 +6,7 @@
|
||||
## bgpd
|
||||
## staticd
|
||||
## bgpcfgd
|
||||
## bgpmon
|
||||
###############################################################################
|
||||
check process zebra matching "/usr/lib/frr/zebra"
|
||||
if does not exist for 5 times within 5 cycles then alert
|
||||
@ -21,3 +22,6 @@ check process staticd matching "/usr/lib/frr/staticd"
|
||||
|
||||
check process bgpcfgd matching "python /usr/local/bin/bgpcfgd"
|
||||
if does not exist for 5 times within 5 cycles then alert
|
||||
|
||||
check process bgpmon matching "python /usr/local/bin/bgpmon"
|
||||
if does not exist for 5 times within 5 cycles then alert
|
||||
|
@ -84,6 +84,17 @@ stderr_logfile=syslog
|
||||
dependent_startup=true
|
||||
dependent_startup_wait_for=bgpd:running
|
||||
|
||||
[program:bgpmon]
|
||||
command=/usr/local/bin/bgpmon
|
||||
priority=6
|
||||
autostart=false
|
||||
autorestart=false
|
||||
startsecs=0
|
||||
stdout_logfile=syslog
|
||||
stderr_logfile=syslog
|
||||
dependent_startup=true
|
||||
dependent_startup_wait_for=bgpd:running
|
||||
|
||||
{% if DEVICE_METADATA.localhost.docker_routing_config_mode is defined and DEVICE_METADATA.localhost.docker_routing_config_mode == "unified" %}
|
||||
[program:vtysh_b]
|
||||
command=/usr/bin/vtysh -b
|
||||
|
170
src/sonic-bgpcfgd/bgpmon.py
Executable file
170
src/sonic-bgpcfgd/bgpmon.py
Executable file
@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
""""
|
||||
Description: bgpmon.py -- populating bgp related information in stateDB.
|
||||
script is started by supervisord in bgp docker when the docker is started.
|
||||
|
||||
Initial creation of this daemon is to assist SNMP agent in obtaining the
|
||||
BGP related information for its MIB support. The MIB that this daemon is
|
||||
assiting is for the CiscoBgp4MIB (Neighbor state only). If there are other
|
||||
BGP related items that needs to be updated in a periodic manner in the
|
||||
future, then more can be added into this process.
|
||||
|
||||
The script check if there are any bgp activities by monitoring the bgp
|
||||
frr.log file timestamp. If activity is detected, then it will request bgp
|
||||
neighbor state via vtysh cli interface. This bgp activity monitoring is
|
||||
done periodically (every 15 second). When triggered, it looks specifically
|
||||
for the neighbor state in the json output of show ip bgp neighbors json
|
||||
and update the state DB for each neighbor accordingly.
|
||||
In order to not disturb and hold on to the State DB access too long and
|
||||
removal of the stale neighbors (neighbors that was there previously on
|
||||
previous get request but no longer there in the current get request), a
|
||||
"previous" neighbor dictionary will be kept and used to determine if there
|
||||
is a need to perform update or the peer is stale to be removed from the
|
||||
state DB
|
||||
"""
|
||||
import commands
|
||||
import json
|
||||
import os
|
||||
import syslog
|
||||
import swsssdk
|
||||
import time
|
||||
|
||||
PIPE_BATCH_MAX_COUNT = 50
|
||||
|
||||
class BgpStateGet():
|
||||
def __init__(self):
|
||||
# list peer_l stores the Neighbor peer Ip address
|
||||
# dic peer_state stores the Neighbor peer state entries
|
||||
# list new_peer_l stores the new snapshot of Neighbor peer ip address
|
||||
# dic new_peer_state stores the new snapshot of Neighbor peer states
|
||||
self.peer_l = []
|
||||
self.peer_state = {}
|
||||
self.new_peer_l = []
|
||||
self.new_peer_state = {}
|
||||
self.cached_timestamp = 0
|
||||
self.db = swsssdk.SonicV2Connector()
|
||||
self.db.connect(self.db.STATE_DB, False)
|
||||
client = self.db.get_redis_client(self.db.STATE_DB)
|
||||
self.pipe = client.pipeline()
|
||||
self.db.delete_all_by_pattern(self.db.STATE_DB, "NEIGH_STATE_TABLE|*" )
|
||||
|
||||
# A quick way to check if there are anything happening within BGP is to
|
||||
# check its log file has any activities. This is by checking its modified
|
||||
# timestamp against the cached timestamp that we keep and if there is a
|
||||
# difference, there is activity detected. In case the log file got wiped
|
||||
# out, it will default back to constant pulling every 15 seconds
|
||||
def bgp_activity_detected(self):
|
||||
try:
|
||||
timestamp = os.stat("/var/log/frr/frr.log").st_mtime
|
||||
if timestamp != self.cached_timestamp:
|
||||
self.cached_timestamp = timestamp
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except (IOError, OSError):
|
||||
return True
|
||||
|
||||
def update_new_peer_states(self, peer_dict):
|
||||
peer_l = peer_dict["peers"].keys()
|
||||
self.new_peer_l.extend(peer_l)
|
||||
for i in range (0, len(peer_l)):
|
||||
self.new_peer_state[peer_l[i]] = peer_dict["peers"][peer_l[i]]["state"]
|
||||
|
||||
# Get a new snapshot of BGP neighbors and store them in the "new" location
|
||||
def get_all_neigh_states(self):
|
||||
cmd = "vtysh -c 'show bgp summary json'"
|
||||
rc, output = commands.getstatusoutput(cmd)
|
||||
if rc:
|
||||
syslog.syslog(syslog.LOG_ERR, "*ERROR* Failed with rc:{} when execute: {}".format(rc, cmd))
|
||||
return
|
||||
|
||||
peer_info = json.loads(output)
|
||||
# cmd ran successfully, safe to Clean the "new" lists/dic for new sanpshot
|
||||
del self.new_peer_l[:]
|
||||
self.new_peer_state.clear()
|
||||
for key, value in peer_info.items():
|
||||
if key == "ipv4Unicast" or key == "ipv6Unicast":
|
||||
self.update_new_peer_states(value)
|
||||
|
||||
# This method will take the caller's dictionary which contains the peer state operation
|
||||
# That need to be updated in StateDB using Redis pipeline.
|
||||
# The data{} will be cleared at the end of this method before returning to caller.
|
||||
def flush_pipe(self, data):
|
||||
"""Dump each entry in data{} into State DB via redis pipeline.
|
||||
Args:
|
||||
data: Neighbor state in dictionary format
|
||||
{
|
||||
'NEIGH_STATE_TABLE|ip_address_a': {'state':state},
|
||||
'NEIGH_STATE_TABLE|ip_address_b': {'state':state},
|
||||
'NEIGH_STATE_TABLE|ip_address_c': {'state':state},
|
||||
'NEIGH_STATE_TABLE|ip_address_x': None,
|
||||
'NEIGH_STATE_TABLE|ip_address_z': None
|
||||
...
|
||||
}
|
||||
"""
|
||||
for key, value in data.items():
|
||||
if value is None:
|
||||
# delete case
|
||||
self.pipe.delete(key)
|
||||
else:
|
||||
# Add or Modify case
|
||||
self.pipe.hmset(key, value)
|
||||
self.pipe.execute()
|
||||
data.clear()
|
||||
|
||||
def update_neigh_states(self):
|
||||
data = {}
|
||||
for i in range (0, len(self.new_peer_l)):
|
||||
peer = self.new_peer_l[i]
|
||||
key = "NEIGH_STATE_TABLE|%s" % peer
|
||||
if peer in self.peer_l:
|
||||
# only update the entry if state changed
|
||||
if self.peer_state[peer] != self.new_peer_state[peer]:
|
||||
# state changed. Update state DB for this entry
|
||||
state = self.new_peer_state[peer]
|
||||
data[key] = {'state':state}
|
||||
self.peer_state[peer] = state
|
||||
# remove this neighbor from old list since it is accounted for
|
||||
self.peer_l.remove(peer)
|
||||
else:
|
||||
# New neighbor found case. Add to dictionary and state DB
|
||||
state = self.new_peer_state[peer]
|
||||
data[key] = {'state':state}
|
||||
self.peer_state[peer] = state
|
||||
if len(data) > PIPE_BATCH_MAX_COUNT:
|
||||
self.flush_pipe(data)
|
||||
# Check for stale state entries to be cleaned up
|
||||
while len(self.peer_l) > 0:
|
||||
# remove this from the stateDB and the current nighbor state entry
|
||||
peer = self.peer_l.pop(0)
|
||||
del_key = "NEIGH_STATE_TABLE|%s" % peer
|
||||
data[del_key] = None
|
||||
del self.peer_state[peer]
|
||||
if len(data) > PIPE_BATCH_MAX_COUNT:
|
||||
self.flush_pipe(data)
|
||||
# If anything in the pipeline not yet flushed, flush them now
|
||||
if len(data) > 0:
|
||||
self.flush_pipe(data)
|
||||
# Save the new List
|
||||
self.peer_l = self.new_peer_l[:]
|
||||
|
||||
def main():
|
||||
|
||||
print "bgpmon service started"
|
||||
|
||||
try:
|
||||
bgp_state_get = BgpStateGet()
|
||||
except Exception as e:
|
||||
syslog.syslog(syslog.LOG_ERR, "{}: error exit 1, reason {}".format(THIS_MODULE, str(e)))
|
||||
exit(1)
|
||||
|
||||
# periodically obtain the new neighbor infomraton and update if necessary
|
||||
while True:
|
||||
time.sleep(15)
|
||||
if bgp_state_get.bgp_activity_detected():
|
||||
bgp_state_get.get_all_neigh_states()
|
||||
bgp_state_get.update_neigh_states()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -10,6 +10,11 @@ setuptools.setup(name='sonic-bgpcfgd',
|
||||
url='https://github.com/Azure/sonic-buildimage',
|
||||
packages=setuptools.find_packages(),
|
||||
scripts=['bgpcfgd'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'bgpmon = bgpmon:main',
|
||||
]
|
||||
},
|
||||
install_requires=['jinja2>=2.10', 'netaddr', 'pyyaml'],
|
||||
setup_requires=['pytest-runner', 'pytest'],
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user