Add bgpmon to be started as a new daemon under BGP docker (#5329)
* Add bgpmon under sonic-bgpcfgd to be started as a new daemon under BGP docker * Added bgpmon to be monitored by Monit so that if it crashed, it gets alerted * use console_scripts entry point to package bgpmon
This commit is contained in:
parent
2de3afaf35
commit
128def6969
@ -6,6 +6,7 @@
|
|||||||
## bgpd
|
## bgpd
|
||||||
## staticd
|
## staticd
|
||||||
## bgpcfgd
|
## bgpcfgd
|
||||||
|
## bgpmon
|
||||||
###############################################################################
|
###############################################################################
|
||||||
check process zebra matching "/usr/lib/frr/zebra"
|
check process zebra matching "/usr/lib/frr/zebra"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if does not exist for 5 times within 5 cycles then alert
|
||||||
@ -21,3 +22,6 @@ check process staticd matching "/usr/lib/frr/staticd"
|
|||||||
|
|
||||||
check process bgpcfgd matching "python /usr/local/bin/bgpcfgd"
|
check process bgpcfgd matching "python /usr/local/bin/bgpcfgd"
|
||||||
if does not exist for 5 times within 5 cycles then alert
|
if does not exist for 5 times within 5 cycles then alert
|
||||||
|
|
||||||
|
check process bgpmon matching "python /usr/local/bin/bgpmon"
|
||||||
|
if does not exist for 5 times within 5 cycles then alert
|
||||||
|
@ -84,6 +84,17 @@ stderr_logfile=syslog
|
|||||||
dependent_startup=true
|
dependent_startup=true
|
||||||
dependent_startup_wait_for=bgpd:running
|
dependent_startup_wait_for=bgpd:running
|
||||||
|
|
||||||
|
[program:bgpmon]
|
||||||
|
command=/usr/local/bin/bgpmon
|
||||||
|
priority=6
|
||||||
|
autostart=false
|
||||||
|
autorestart=false
|
||||||
|
startsecs=0
|
||||||
|
stdout_logfile=syslog
|
||||||
|
stderr_logfile=syslog
|
||||||
|
dependent_startup=true
|
||||||
|
dependent_startup_wait_for=bgpd:running
|
||||||
|
|
||||||
{% if DEVICE_METADATA.localhost.docker_routing_config_mode is defined and DEVICE_METADATA.localhost.docker_routing_config_mode == "unified" %}
|
{% if DEVICE_METADATA.localhost.docker_routing_config_mode is defined and DEVICE_METADATA.localhost.docker_routing_config_mode == "unified" %}
|
||||||
[program:vtysh_b]
|
[program:vtysh_b]
|
||||||
command=/usr/bin/vtysh -b
|
command=/usr/bin/vtysh -b
|
||||||
|
170
src/sonic-bgpcfgd/bgpmon.py
Executable file
170
src/sonic-bgpcfgd/bgpmon.py
Executable file
@ -0,0 +1,170 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
|
""""
|
||||||
|
Description: bgpmon.py -- populating bgp related information in stateDB.
|
||||||
|
script is started by supervisord in bgp docker when the docker is started.
|
||||||
|
|
||||||
|
Initial creation of this daemon is to assist SNMP agent in obtaining the
|
||||||
|
BGP related information for its MIB support. The MIB that this daemon is
|
||||||
|
assiting is for the CiscoBgp4MIB (Neighbor state only). If there are other
|
||||||
|
BGP related items that needs to be updated in a periodic manner in the
|
||||||
|
future, then more can be added into this process.
|
||||||
|
|
||||||
|
The script check if there are any bgp activities by monitoring the bgp
|
||||||
|
frr.log file timestamp. If activity is detected, then it will request bgp
|
||||||
|
neighbor state via vtysh cli interface. This bgp activity monitoring is
|
||||||
|
done periodically (every 15 second). When triggered, it looks specifically
|
||||||
|
for the neighbor state in the json output of show ip bgp neighbors json
|
||||||
|
and update the state DB for each neighbor accordingly.
|
||||||
|
In order to not disturb and hold on to the State DB access too long and
|
||||||
|
removal of the stale neighbors (neighbors that was there previously on
|
||||||
|
previous get request but no longer there in the current get request), a
|
||||||
|
"previous" neighbor dictionary will be kept and used to determine if there
|
||||||
|
is a need to perform update or the peer is stale to be removed from the
|
||||||
|
state DB
|
||||||
|
"""
|
||||||
|
import commands
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import syslog
|
||||||
|
import swsssdk
|
||||||
|
import time
|
||||||
|
|
||||||
|
PIPE_BATCH_MAX_COUNT = 50
|
||||||
|
|
||||||
|
class BgpStateGet():
|
||||||
|
def __init__(self):
|
||||||
|
# list peer_l stores the Neighbor peer Ip address
|
||||||
|
# dic peer_state stores the Neighbor peer state entries
|
||||||
|
# list new_peer_l stores the new snapshot of Neighbor peer ip address
|
||||||
|
# dic new_peer_state stores the new snapshot of Neighbor peer states
|
||||||
|
self.peer_l = []
|
||||||
|
self.peer_state = {}
|
||||||
|
self.new_peer_l = []
|
||||||
|
self.new_peer_state = {}
|
||||||
|
self.cached_timestamp = 0
|
||||||
|
self.db = swsssdk.SonicV2Connector()
|
||||||
|
self.db.connect(self.db.STATE_DB, False)
|
||||||
|
client = self.db.get_redis_client(self.db.STATE_DB)
|
||||||
|
self.pipe = client.pipeline()
|
||||||
|
self.db.delete_all_by_pattern(self.db.STATE_DB, "NEIGH_STATE_TABLE|*" )
|
||||||
|
|
||||||
|
# A quick way to check if there are anything happening within BGP is to
|
||||||
|
# check its log file has any activities. This is by checking its modified
|
||||||
|
# timestamp against the cached timestamp that we keep and if there is a
|
||||||
|
# difference, there is activity detected. In case the log file got wiped
|
||||||
|
# out, it will default back to constant pulling every 15 seconds
|
||||||
|
def bgp_activity_detected(self):
|
||||||
|
try:
|
||||||
|
timestamp = os.stat("/var/log/frr/frr.log").st_mtime
|
||||||
|
if timestamp != self.cached_timestamp:
|
||||||
|
self.cached_timestamp = timestamp
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except (IOError, OSError):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def update_new_peer_states(self, peer_dict):
|
||||||
|
peer_l = peer_dict["peers"].keys()
|
||||||
|
self.new_peer_l.extend(peer_l)
|
||||||
|
for i in range (0, len(peer_l)):
|
||||||
|
self.new_peer_state[peer_l[i]] = peer_dict["peers"][peer_l[i]]["state"]
|
||||||
|
|
||||||
|
# Get a new snapshot of BGP neighbors and store them in the "new" location
|
||||||
|
def get_all_neigh_states(self):
|
||||||
|
cmd = "vtysh -c 'show bgp summary json'"
|
||||||
|
rc, output = commands.getstatusoutput(cmd)
|
||||||
|
if rc:
|
||||||
|
syslog.syslog(syslog.LOG_ERR, "*ERROR* Failed with rc:{} when execute: {}".format(rc, cmd))
|
||||||
|
return
|
||||||
|
|
||||||
|
peer_info = json.loads(output)
|
||||||
|
# cmd ran successfully, safe to Clean the "new" lists/dic for new sanpshot
|
||||||
|
del self.new_peer_l[:]
|
||||||
|
self.new_peer_state.clear()
|
||||||
|
for key, value in peer_info.items():
|
||||||
|
if key == "ipv4Unicast" or key == "ipv6Unicast":
|
||||||
|
self.update_new_peer_states(value)
|
||||||
|
|
||||||
|
# This method will take the caller's dictionary which contains the peer state operation
|
||||||
|
# That need to be updated in StateDB using Redis pipeline.
|
||||||
|
# The data{} will be cleared at the end of this method before returning to caller.
|
||||||
|
def flush_pipe(self, data):
|
||||||
|
"""Dump each entry in data{} into State DB via redis pipeline.
|
||||||
|
Args:
|
||||||
|
data: Neighbor state in dictionary format
|
||||||
|
{
|
||||||
|
'NEIGH_STATE_TABLE|ip_address_a': {'state':state},
|
||||||
|
'NEIGH_STATE_TABLE|ip_address_b': {'state':state},
|
||||||
|
'NEIGH_STATE_TABLE|ip_address_c': {'state':state},
|
||||||
|
'NEIGH_STATE_TABLE|ip_address_x': None,
|
||||||
|
'NEIGH_STATE_TABLE|ip_address_z': None
|
||||||
|
...
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
for key, value in data.items():
|
||||||
|
if value is None:
|
||||||
|
# delete case
|
||||||
|
self.pipe.delete(key)
|
||||||
|
else:
|
||||||
|
# Add or Modify case
|
||||||
|
self.pipe.hmset(key, value)
|
||||||
|
self.pipe.execute()
|
||||||
|
data.clear()
|
||||||
|
|
||||||
|
def update_neigh_states(self):
|
||||||
|
data = {}
|
||||||
|
for i in range (0, len(self.new_peer_l)):
|
||||||
|
peer = self.new_peer_l[i]
|
||||||
|
key = "NEIGH_STATE_TABLE|%s" % peer
|
||||||
|
if peer in self.peer_l:
|
||||||
|
# only update the entry if state changed
|
||||||
|
if self.peer_state[peer] != self.new_peer_state[peer]:
|
||||||
|
# state changed. Update state DB for this entry
|
||||||
|
state = self.new_peer_state[peer]
|
||||||
|
data[key] = {'state':state}
|
||||||
|
self.peer_state[peer] = state
|
||||||
|
# remove this neighbor from old list since it is accounted for
|
||||||
|
self.peer_l.remove(peer)
|
||||||
|
else:
|
||||||
|
# New neighbor found case. Add to dictionary and state DB
|
||||||
|
state = self.new_peer_state[peer]
|
||||||
|
data[key] = {'state':state}
|
||||||
|
self.peer_state[peer] = state
|
||||||
|
if len(data) > PIPE_BATCH_MAX_COUNT:
|
||||||
|
self.flush_pipe(data)
|
||||||
|
# Check for stale state entries to be cleaned up
|
||||||
|
while len(self.peer_l) > 0:
|
||||||
|
# remove this from the stateDB and the current nighbor state entry
|
||||||
|
peer = self.peer_l.pop(0)
|
||||||
|
del_key = "NEIGH_STATE_TABLE|%s" % peer
|
||||||
|
data[del_key] = None
|
||||||
|
del self.peer_state[peer]
|
||||||
|
if len(data) > PIPE_BATCH_MAX_COUNT:
|
||||||
|
self.flush_pipe(data)
|
||||||
|
# If anything in the pipeline not yet flushed, flush them now
|
||||||
|
if len(data) > 0:
|
||||||
|
self.flush_pipe(data)
|
||||||
|
# Save the new List
|
||||||
|
self.peer_l = self.new_peer_l[:]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
print "bgpmon service started"
|
||||||
|
|
||||||
|
try:
|
||||||
|
bgp_state_get = BgpStateGet()
|
||||||
|
except Exception as e:
|
||||||
|
syslog.syslog(syslog.LOG_ERR, "{}: error exit 1, reason {}".format(THIS_MODULE, str(e)))
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# periodically obtain the new neighbor infomraton and update if necessary
|
||||||
|
while True:
|
||||||
|
time.sleep(15)
|
||||||
|
if bgp_state_get.bgp_activity_detected():
|
||||||
|
bgp_state_get.get_all_neigh_states()
|
||||||
|
bgp_state_get.update_neigh_states()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -10,6 +10,11 @@ setuptools.setup(name='sonic-bgpcfgd',
|
|||||||
url='https://github.com/Azure/sonic-buildimage',
|
url='https://github.com/Azure/sonic-buildimage',
|
||||||
packages=setuptools.find_packages(),
|
packages=setuptools.find_packages(),
|
||||||
scripts=['bgpcfgd'],
|
scripts=['bgpcfgd'],
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [
|
||||||
|
'bgpmon = bgpmon:main',
|
||||||
|
]
|
||||||
|
},
|
||||||
install_requires=['jinja2>=2.10', 'netaddr', 'pyyaml'],
|
install_requires=['jinja2>=2.10', 'netaddr', 'pyyaml'],
|
||||||
setup_requires=['pytest-runner', 'pytest'],
|
setup_requires=['pytest-runner', 'pytest'],
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user