Changes to persist TSA/B state across reloads (#11257)

This commit is contained in:
tjchadaga 2022-07-12 00:22:48 -07:00 committed by Ying Xie
parent 4abfd37a8d
commit fc93871881
12 changed files with 354 additions and 18 deletions

View File

@ -5,6 +5,12 @@
PLATFORM=${PLATFORM:-`sonic-cfggen -H -v DEVICE_METADATA.localhost.platform`}
if [[ $1 == "TSA" ]]; then
TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "true"}}}'
elif [[ $1 == "TSB" ]]; then
TSA_STATE_UPDATE='{"BGP_DEVICE_GLOBAL":{"STATE":{"tsa_enabled": "false"}}}'
fi
# Parse the device specific asic conf file, if it exists
ASIC_CONF=/usr/share/sonic/device/$PLATFORM/asic.conf
[ -f $ASIC_CONF ] && . $ASIC_CONF
@ -20,10 +26,25 @@ if [[ ($NUM_ASIC -gt 1) ]]; then
if [ $sub_role == 'FrontEnd' ]
then
echo -e "BGP"$asic" : \c"
docker exec -i bgp$asic /usr/bin/$1
if [[ -n "$TSA_STATE_UPDATE" ]]; then
sonic-cfggen -a "$TSA_STATE_UPDATE" -w -n $NAMESPACE_PREFIX$asic
logger -t $1 -p user.info "BGP$asic: System Mode: Normal -> Maintenance"
echo "BGP$asic: System Mode: Normal -> Maintenance"
else
# If TSC is executed, invoke FRR script to check installed route-maps
docker exec -i bgp$asic /usr/bin/$1
fi
fi
asic=$[$asic+1]
done
else
docker exec -i bgp /usr/bin/$1
if [[ -n "$TSA_STATE_UPDATE" ]]; then
sonic-cfggen -a "$TSA_STATE_UPDATE" -w
logger -t $1 -p user.info "System Mode: Normal -> Maintenance"
echo "System Mode: Normal -> Maintenance"
else
# If TSC is executed, invoke FRR script to check installed route-maps
docker exec -i bgp /usr/bin/$1
fi
fi

View File

@ -1,12 +1,20 @@
#!/bin/bash
# toggle the mux to standby if dualtor and any mux active
if
[[ "$(sonic-cfggen -d -v DEVICE_METADATA.localhost.subtype | tr [:upper:] [:lower:])" == *"dualtor"* ]] &&
[[ $(show mux status | grep active | wc -l) > 0 ]];
then
logger -t TSA -p user.info "Toggle all mux mode to standby"
sudo config mux mode standby all
if [[ "$(sonic-cfggen -d -v BGP_DEVICE_GLOBAL.STATE.tsa_enabled)" == "true" ]]; then
echo "System is already in Maintenance"
logger -t TSA -p user.info "System is already in Maintenance"
else
# toggle the mux to standby if dualtor and any mux active
if
[[ "$(sonic-cfggen -d -v DEVICE_METADATA.localhost.subtype | tr [:upper:] [:lower:])" == *"dualtor"* ]] &&
[[ $(show mux status | grep active | wc -l) > 0 ]];
then
logger -t TSA -p user.info "Toggle all mux mode to standby"
sudo config mux mode standby all
fi
/usr/bin/TS TSA
echo "Please execute 'config save' to preserve System mode in Maintenance after reboot or config reload"
fi
/usr/bin/TS TSA

View File

@ -1,10 +1,17 @@
#!/bin/bash
# toggle the mux to auto if dualtor
if [[ "$(sonic-cfggen -d -v DEVICE_METADATA.localhost.subtype | tr [:upper:] [:lower:])" == *"dualtor"* ]];
then
logger -t TSB -p user.info "Toggle all mux mode to auto"
sudo config mux mode auto all
if [[ "$(sonic-cfggen -d -v BGP_DEVICE_GLOBAL.STATE.tsa_enabled)" == "false" ]]; then
echo "System is already in Normal mode"
logger -t TSB -p user.info "System is already in Normal mode"
else
# toggle the mux to auto if dualtor
if [[ "$(sonic-cfggen -d -v DEVICE_METADATA.localhost.subtype | tr [:upper:] [:lower:])" == *"dualtor"* ]];
then
logger -t TSB -p user.info "Toggle all mux mode to auto"
sudo config mux mode auto all
fi
/usr/bin/TS TSB
echo "Please execute 'config save' to preserve System mode in Normal state after reboot or config reload"
fi
/usr/bin/TS TSB

View File

@ -27,6 +27,11 @@
"POLL_INTERVAL": "10000"
}
},
"BGP_DEVICE_GLOBAL": {
"STATE": {
"tsa_enabled": "false"
}
},
{%- set features = [("bgp", "enabled", false, "enabled"),
("database", "always_enabled", false, "always_enabled"),
("lldp", "enabled", true, "enabled"),

View File

@ -18,6 +18,7 @@ from .managers_intf import InterfaceMgr
from .managers_setsrc import ZebraSetSrc
from .managers_static_rt import StaticRouteMgr
from .managers_rm import RouteMapMgr
from .managers_device_global import DeviceGlobalCfgMgr
from .runner import Runner, signal_handler
from .template import TemplateFabric
from .utils import read_constants
@ -64,6 +65,8 @@ def do_work():
# Route Advertisement Managers
AdvertiseRouteMgr(common_objs, "STATE_DB", swsscommon.STATE_ADVERTISE_NETWORK_TABLE_NAME),
RouteMapMgr(common_objs, "APPL_DB", swsscommon.APP_BGP_PROFILE_TABLE_NAME),
# Device Global Manager
DeviceGlobalCfgMgr(common_objs, "CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME),
]
runner = Runner(common_objs['cfg_mgr'])
for mgr in managers:
@ -96,3 +99,4 @@ def main():
sys.exit(rc)
except SystemExit:
os._exit(rc)

View File

@ -8,6 +8,7 @@ from .log import log_warn, log_err, log_info, log_debug, log_crit
from .manager import Manager
from .template import TemplateFabric
from .utils import run_command
from .managers_device_global import DeviceGlobalCfgMgr
class BGPPeerGroupMgr(object):
@ -23,6 +24,7 @@ class BGPPeerGroupMgr(object):
tf = common_objs['tf']
self.policy_template = tf.from_file(base_template + "policies.conf.j2")
self.peergroup_template = tf.from_file(base_template + "peer-group.conf.j2")
self.device_global_cfgmgr = DeviceGlobalCfgMgr(common_objs, "CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)
def update(self, name, **kwargs):
"""
@ -56,14 +58,15 @@ class BGPPeerGroupMgr(object):
"""
try:
pg = self.peergroup_template.render(**kwargs)
tsa_rm = self.device_global_cfgmgr.check_state_and_get_tsa_routemaps(pg)
except jinja2.TemplateError as e:
log_err("Can't render peer-group template: '%s': %s" % (name, str(e)))
return False
if kwargs['vrf'] == 'default':
cmd = ('router bgp %s\n' % kwargs['bgp_asn']) + pg
cmd = ('router bgp %s\n' % kwargs['bgp_asn']) + pg + tsa_rm
else:
cmd = ('router bgp %s vrf %s\n' % (kwargs['bgp_asn'], kwargs['vrf'])) + pg
cmd = ('router bgp %s vrf %s\n' % (kwargs['bgp_asn'], kwargs['vrf'])) + pg + tsa_rm
self.update_entity(cmd, "Peer-group for peer '%s'" % name)
return True

View File

@ -0,0 +1,101 @@
from .manager import Manager
from .log import log_err, log_debug, log_notice
import re
from swsscommon import swsscommon
class DeviceGlobalCfgMgr(Manager):
"""This class responds to change in device-specific state"""
def __init__(self, common_objs, db, table):
"""
Initialize the object
:param common_objs: common object dictionary
:param db: name of the db
:param table: name of the table in the db
"""
self.directory = common_objs['directory']
self.cfg_mgr = common_objs['cfg_mgr']
self.constants = common_objs['constants']
self.tsa_template = common_objs['tf'].from_file("bgpd/tsa/bgpd.tsa.isolate.conf.j2")
self.tsb_template = common_objs['tf'].from_file("bgpd/tsa/bgpd.tsa.unisolate.conf.j2")
super(DeviceGlobalCfgMgr, self).__init__(
common_objs,
[],
db,
table,
)
def set_handler(self, key, data):
log_debug("DeviceGlobalCfgMgr:: set handler")
""" Handle device tsa_enabled state change """
if not data:
log_err("DeviceGlobalCfgMgr:: data is None")
return False
if "tsa_enabled" in data:
self.cfg_mgr.commit()
self.cfg_mgr.update()
self.isolate_unisolate_device(data["tsa_enabled"])
self.directory.put(self.db_name, self.table_name, "tsa_enabled", data["tsa_enabled"])
return True
return False
def del_handler(self, key):
log_debug("DeviceGlobalCfgMgr:: del handler")
return True
def check_state_and_get_tsa_routemaps(self, cfg):
""" API to get TSA route-maps if device is isolated"""
cmd = ""
if self.directory.path_exist("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME, "tsa_enabled"):
tsa_status = self.directory.get_slot("CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)["tsa_enabled"]
if tsa_status == "true":
cmds = cfg.replace("#012", "\n").split("\n")
log_notice("DeviceGlobalCfgMgr:: Device is isolated. Applying TSA route-maps")
cmd = self.get_ts_routemaps(cmds, self.tsa_template)
return cmd
def isolate_unisolate_device(self, tsa_status):
""" API to get TSA/TSB route-maps and apply configuration"""
cmd = "\n"
if tsa_status == "true":
log_notice("DeviceGlobalCfgMgr:: Device isolated. Executing TSA")
cmd += self.get_ts_routemaps(self.cfg_mgr.get_text(), self.tsa_template)
else:
log_notice("DeviceGlobalCfgMgr:: Device un-isolated. Executing TSB")
cmd += self.get_ts_routemaps(self.cfg_mgr.get_text(), self.tsb_template)
self.cfg_mgr.push(cmd)
log_debug("DeviceGlobalCfgMgr::Done")
def get_ts_routemaps(self, cmds, ts_template):
if not cmds:
return ""
route_map_names = self.__extract_out_route_map_names(cmds)
return self.__generate_routemaps_from_template(route_map_names, ts_template)
def __generate_routemaps_from_template(self, route_map_names, template):
cmd = "\n"
for rm in sorted(route_map_names):
if "_INTERNAL_" in rm:
continue
if "V4" in rm:
ipv="V4" ; ipp="ip"
elif "V6" in rm:
ipv="V6" ; ipp="ipv6"
else:
continue
cmd += template.render(route_map_name=rm,ip_version=ipv,ip_protocol=ipp, constants=self.constants)
cmd += "\n"
return cmd
def __extract_out_route_map_names(self, cmds):
route_map_names = set()
out_route_map = re.compile(r'^\s*neighbor \S+ route-map (\S+) out$')
for line in cmds:
result = out_route_map.match(line)
if result:
route_map_names.add(result.group(1))
return route_map_names

View File

@ -0,0 +1,33 @@
!
! template: bgpd/templates/general/peer-group.conf.j2
!
neighbor PEER_V4 peer-group
neighbor PEER_V6 peer-group
address-family ipv4
neighbor PEER_V4 allowas-in 1
neighbor PEER_V4 soft-reconfiguration inbound
neighbor PEER_V4 route-map FROM_BGP_PEER_V4 in
neighbor PEER_V4 route-map TO_BGP_PEER_V4 out
exit-address-family
address-family ipv6
neighbor PEER_V6 allowas-in 1
neighbor PEER_V6 soft-reconfiguration inbound
neighbor PEER_V6 route-map FROM_BGP_PEER_V6 in
neighbor PEER_V6 route-map TO_BGP_PEER_V6 out
exit-address-family
!
! end of template: bgpd/templates/general/peer-group.conf.j2
!
route-map TO_BGP_PEER_V4 permit 20
match ip address prefix-list PL_LoopbackV4
set community 12345:12345
route-map TO_BGP_PEER_V4 deny 30
!
route-map TO_BGP_PEER_V6 permit 20
match ipv6 address prefix-list PL_LoopbackV6
set community 12345:12345
route-map TO_BGP_PEER_V6 deny 30
!

View File

@ -0,0 +1,29 @@
!
! template: bgpd/templates/general/peer-group.conf.j2
!
neighbor PEER_V4 peer-group
neighbor PEER_V6 peer-group
address-family ipv4
neighbor PEER_V4 allowas-in 1
neighbor PEER_V4 soft-reconfiguration inbound
neighbor PEER_V4 route-map FROM_BGP_PEER_V4 in
neighbor PEER_V4 route-map TO_BGP_PEER_V4 out
exit-address-family
address-family ipv6
neighbor PEER_V6 allowas-in 1
neighbor PEER_V6 soft-reconfiguration inbound
neighbor PEER_V6 route-map FROM_BGP_PEER_V6 in
neighbor PEER_V6 route-map TO_BGP_PEER_V6 out
exit-address-family
!
! end of template: bgpd/templates/general/peer-group.conf.j2
!
no route-map TO_BGP_PEER_V4 permit 20
no route-map TO_BGP_PEER_V4 deny 30
!
no route-map TO_BGP_PEER_V6 permit 20
no route-map TO_BGP_PEER_V6 deny 30
!

View File

@ -0,0 +1,11 @@
route-map TO_BGP_PEER_V4 permit 20
match ip address prefix-list PL_LoopbackV4
set community 12345:12345
route-map TO_BGP_PEER_V4 deny 30
!
route-map TO_BGP_PEER_V6 permit 20
match ipv6 address prefix-list PL_LoopbackV6
set community 12345:12345
route-map TO_BGP_PEER_V6 deny 30
!

View File

@ -0,0 +1,7 @@
no route-map TO_BGP_PEER_V4 permit 20
no route-map TO_BGP_PEER_V4 deny 30
!
no route-map TO_BGP_PEER_V6 permit 20
no route-map TO_BGP_PEER_V6 deny 30
!

View File

@ -0,0 +1,107 @@
from unittest.mock import MagicMock, patch
import os
from bgpcfgd.directory import Directory
from bgpcfgd.template import TemplateFabric
from . import swsscommon_test
from .util import load_constants
import bgpcfgd.managers_device_global
from swsscommon import swsscommon
TEMPLATE_PATH = os.path.abspath('../../dockers/docker-fpm-frr/frr')
BASE_PATH = os.path.abspath('../sonic-bgpcfgd/tests/data/general/peer-group.conf/')
def constructor():
cfg_mgr = MagicMock()
def get_text():
text = []
for line in cfg_mgr.changes.split('\n'):
if line.lstrip().startswith('!'):
continue
text.append(line)
text += [" "]
return text
def update():
cfg_mgr.changes = get_string_from_file("/result_all.conf")
def push(cfg):
cfg_mgr.changes += cfg + "\n"
def get_config():
return cfg_mgr.changes
cfg_mgr.get_text = get_text
cfg_mgr.update = update
cfg_mgr.push = push
cfg_mgr.get_config = get_config
constants = load_constants()['constants']
common_objs = {
'directory': Directory(),
'cfg_mgr': cfg_mgr,
'tf': TemplateFabric(TEMPLATE_PATH),
'constants': constants
}
mgr = bgpcfgd.managers_device_global.DeviceGlobalCfgMgr(common_objs, "CONFIG_DB", swsscommon.CFG_BGP_DEVICE_GLOBAL_TABLE_NAME)
cfg_mgr.update()
return mgr
@patch('bgpcfgd.managers_device_global.log_debug')
def test_isolate_device(mocked_log_info):
m = constructor()
res = m.set_handler("STATE", {"tsa_enabled": "true"})
assert res, "Expect True return value for set_handler"
mocked_log_info.assert_called_with("DeviceGlobalCfgMgr::Done")
assert m.cfg_mgr.get_config() == get_string_from_file("/result_all_isolate.conf")
@patch('bgpcfgd.managers_device_global.log_debug')
def test_unisolate_device(mocked_log_info):
m = constructor()
res = m.set_handler("STATE", {"tsa_enabled": "false"})
assert res, "Expect True return value for set_handler"
mocked_log_info.assert_called_with("DeviceGlobalCfgMgr::Done")
assert m.cfg_mgr.get_config() == get_string_from_file("/result_all_unisolate.conf")
def test_check_state_and_get_tsa_routemaps():
m = constructor()
m.set_handler("STATE", {"tsa_enabled": "true"})
res = m.check_state_and_get_tsa_routemaps(m.cfg_mgr.get_config())
assert res == get_string_from_file("/result_isolate.conf")
m.set_handler("STATE", {"tsa_enabled": "false"})
res = m.check_state_and_get_tsa_routemaps(m.cfg_mgr.get_config())
assert res == ""
def test_get_tsa_routemaps():
m = constructor()
assert m.get_ts_routemaps([], m.tsa_template) == ""
res = m.get_ts_routemaps(m.cfg_mgr.get_text(), m.tsa_template)
expected_res = get_string_from_file("/result_isolate.conf")
assert res == expected_res
def test_get_tsb_routemaps():
m = constructor()
assert m.get_ts_routemaps([], m.tsb_template) == ""
res = m.get_ts_routemaps(m.cfg_mgr.get_text(), m.tsb_template)
expected_res = get_string_from_file("/result_unisolate.conf")
assert res == expected_res
def get_string_from_file(filename):
fp = open(BASE_PATH + filename, "r")
cfg = fp.read()
fp.close()
return cfg
@patch('bgpcfgd.managers_device_global.log_err')
def test_set_handler_failure_case(mocked_log_info):
m = constructor()
res = m.set_handler("STATE", {})
assert res == False, "Expect False return value for invalid data passed to set_handler"
mocked_log_info.assert_called_with("DeviceGlobalCfgMgr:: data is None")
def test_del_handler():
m = constructor()
res = m.del_handler("STATE")
assert res, "Expect True return value for del_handler"