sonic-buildimage/files/scripts/write_standby.py
Devesh Pathak d74055e12c
Increase wait_for_tunnel() timeout to 90s (#14279)
Why I did it
Orchagent sometimes take additional time to execute Tunnel tasks. This cause write_standby script to error out and mux state machines are not initialized. It results in show mux status missing some ports in output.

Mar 13 20:36:52.337051 m64-tor-0-yy41 INFO systemd[1]: Starting MUX Cable Container...
Mar 13 20:37:52.480322 m64-tor-0-yy41 ERR write_standby: Timed out waiting for tunnel MuxTunnel0, mux state will not be written
Mar 13 20:37:58.983412 m64-tor-0-yy41 NOTICE swss#orchagent: :- doTask: Tunnel(s) added to ASIC_DB.
How I did it
Increase timeout from 60s to 90s

How to verify it
Verified that mux state machine is initialized and show mux status has all needed ports in it.
2023-04-07 11:30:58 +08:00

200 lines
7.3 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import time
from sonic_py_common import logger as log
from swsscommon.swsscommon import ConfigDBConnector, DBConnector, FieldValuePairs, ProducerStateTable, SonicV2Connector, Table
from swsscommon.swsscommon import APPL_DB, STATE_DB
logger = log.Logger('write_standby')
REDIS_SOCK_PATH = '/var/run/redis/redis.sock'
def create_fvs(**kwargs):
return FieldValuePairs(list(kwargs.items()))
class MuxStateWriter(object):
"""
Class used to write standby mux state to APP DB
"""
def __init__(self, activeactive, activestandby, shutdown_module):
self.config_db_connector = None
self.appl_db_connector = None
self.state_db_connector = None
self.asic_db_connector = None
self.default_active_active_state = activeactive
self.default_active_standby_state = activestandby
self.shutdown_module = shutdown_module
self.is_shutdwon = (self.shutdown_module != None)
@property
def config_db(self):
"""
Returns config DB connector.
Initializes the connector during the first call
"""
if self.config_db_connector is None:
self.config_db_connector = ConfigDBConnector(use_unix_socket_path=True)
self.config_db_connector.connect()
return self.config_db_connector
@property
def appl_db(self):
"""
Returns the app DB connector.
Initializes the connector during the first call
"""
if self.appl_db_connector is None:
self.appl_db_connector = DBConnector(APPL_DB, REDIS_SOCK_PATH, True)
return self.appl_db_connector
@property
def state_db(self):
"""
Returns the state DB connector.
Intializes the connector during the first call
"""
if self.state_db_connector is None:
self.state_db_connector = DBConnector(STATE_DB, REDIS_SOCK_PATH, True)
return self.state_db_connector
@property
def asic_db(self):
"""
Returns the ASIC DB connector.
Initializes the connector during the first call
"""
if self.asic_db_connector is None:
self.asic_db_connector = SonicV2Connector(use_unix_socket_path=True)
self.asic_db_connector.connect('ASIC_DB')
return self.asic_db_connector
@property
def tunnel_name(self):
"""
Returns the name of the IP-in-IP tunnel used for Dual ToR devices
"""
return self.config_db.get_keys('TUNNEL')[0]
@property
def is_dualtor(self):
"""
Checks if script is running on a dual ToR system
"""
localhost_key = self.config_db.get_keys('DEVICE_METADATA')[0]
metadata = self.config_db.get_entry('DEVICE_METADATA', localhost_key)
return 'subtype' in metadata and 'dualtor' in metadata['subtype'].lower()
@property
def is_warmrestart(self):
"""
Checks if a warmrestart is going on
"""
tbl = Table(self.state_db, 'WARM_RESTART_ENABLE_TABLE')
(status, value) = tbl.hget('system', 'enable')
if status and value == 'true':
return True
if self.shutdown_module:
(status, value) = tbl.hget(self.shutdown_module, 'enable')
if status and value == 'true':
return True
return False
def get_all_mux_intfs_modes(self):
"""
Returns a list of all mux cable interfaces, with suggested modes
Setting mux initial modes is crucial to kick off the statemachines,
have to set the modes for all mux/gRPC ports.
"""
intf_modes = {}
all_intfs = self.config_db.get_table('MUX_CABLE')
for intf, status in all_intfs.items():
state = status['state'].lower()
if state in ['active', 'standby']:
intf_modes[intf] = state
elif state in ['auto', 'manual']:
if ('soc_ipv4' in status or 'soc_ipv6' in status or
('cable_type' in status and status['cable_type'] == 'active-active')):
intf_modes[intf] = self.default_active_active_state
else:
intf_modes[intf] = self.default_active_standby_state
return intf_modes
def tunnel_exists(self):
"""
Checks if the IP-in-IP tunnel has been written to ASIC DB
"""
tunnel_key_pattern = 'ASIC_STATE:SAI_OBJECT_TYPE_TUNNEL:*'
return len(self.asic_db.keys('ASIC_DB', tunnel_key_pattern)) > 0
def wait_for_tunnel(self, interval=1, timeout=90):
"""
Waits until the IP-in-IP tunnel has been created
Returns:
(bool) True if the tunnel has been created
False if the timeout period is exceeded
"""
logger.log_info("Waiting for tunnel {} with timeout {} seconds".format(self.tunnel_name, timeout))
start = time.time()
curr_time = time.time()
while not self.tunnel_exists() and curr_time - start < timeout:
time.sleep(interval)
curr_time = time.time()
# If we timed out, return False else return True
return curr_time - start < timeout
def apply_mux_config(self):
"""
Writes standby mux state to APP DB for all mux interfaces
"""
if not self.is_dualtor:
# If not running on a dual ToR system, take no action
return
if self.is_warmrestart and self.is_shutdwon:
# If in warmrestart context, take no action
logger.log_warning("Skip setting mux state due to ongoing warmrestart.")
return
modes = self.get_all_mux_intfs_modes()
if self.wait_for_tunnel():
logger.log_warning("Applying state to interfaces {}".format(modes))
producer_state_table = ProducerStateTable(self.appl_db, 'MUX_CABLE_TABLE')
for intf, state in modes.items():
fvs = create_fvs(state=state)
producer_state_table.set(intf, fvs)
else:
logger.log_error("Timed out waiting for tunnel {}, mux state will not be written".format(self.tunnel_name))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Write initial mux state')
parser.add_argument('-a', '--active_active',
help='state: intial state for "auto" and/or "manual" config in active-active mode, default "active"',
type=str, required=False, default='active')
parser.add_argument('-s', '--active_standby',
help='state: intial state for "auto" and/or "manual" config in active-standby mode, default "standby"',
type=str, required=False, default='standby')
parser.add_argument('--shutdown', help='write mux state after shutdown other services, supported: mux, bgp',
type=str, required=False, choices=['mux', 'bgp'], default=None)
args = parser.parse_args()
active_active_state = args.active_active
active_standby_state = args.active_standby
if args.shutdown in ['mux', 'bgp']:
active_active_state = "standby"
mux_writer = MuxStateWriter(activeactive=active_active_state, activestandby=active_standby_state, shutdown_module=args.shutdown)
mux_writer.apply_mux_config()