diff --git a/files/image_config/monit/container_checker b/files/image_config/monit/container_checker index a67a96a0c1..c6271d26c8 100755 --- a/files/image_config/monit/container_checker +++ b/files/image_config/monit/container_checker @@ -23,7 +23,6 @@ import swsssdk from sonic_py_common import multi_asic, device_info from swsscommon import swsscommon - def get_expected_running_containers(): """ @summary: This function will get the expected running & always-enabled containers by following the rule: @@ -41,7 +40,19 @@ def get_expected_running_containers(): expected_running_containers = set() always_running_containers = set() - + + # Get current asic presence list. For multi_asic system, multi instance containers + # should be checked only for asics present. + asics_id_presence = multi_asic.get_asic_presence_list() + + # Some services may run all the instances irrespective of asic presence. + # Add those to exception list. + # database service: Currently services have dependency on all database services to + # be up irrespective of asic presence. + # bgp service: Currently bgp runs all instances. Once this is fixed to be config driven, + # it will be removed from exception list. + run_all_instance_list = ['database', 'bgp'] + for container_name in feature_table.keys(): if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]: if multi_asic.is_multi_asic(): @@ -50,7 +61,8 @@ def get_expected_running_containers(): if feature_table[container_name]["has_per_asic_scope"] == "True": num_asics = multi_asic.get_num_asics() for asic_id in range(num_asics): - expected_running_containers.add(container_name + str(asic_id)) + if asic_id in asics_id_presence or container_name in run_all_instance_list: + expected_running_containers.add(container_name + str(asic_id)) else: expected_running_containers.add(container_name) if feature_table[container_name]["state"] == 'always_enabled': @@ -60,9 +72,11 @@ def get_expected_running_containers(): if feature_table[container_name]["has_per_asic_scope"] == "True": num_asics = multi_asic.get_num_asics() for asic_id in range(num_asics): - always_running_containers.add(container_name + str(asic_id)) + if asic_id in asics_id_presence or container_name in run_all_instance_list: + always_running_containers.add(container_name + str(asic_id)) else: always_running_containers.add(container_name) + if device_info.is_supervisor(): always_running_containers.add("database-chassis") return expected_running_containers, always_running_containers diff --git a/src/sonic-py-common/sonic_py_common/multi_asic.py b/src/sonic-py-common/sonic_py_common/multi_asic.py index 8ba409165f..e08746be03 100644 --- a/src/sonic-py-common/sonic_py_common/multi_asic.py +++ b/src/sonic-py-common/sonic_py_common/multi_asic.py @@ -22,7 +22,8 @@ BGP_INTERNAL_NEIGH_CFG_DB_TABLE = 'BGP_INTERNAL_NEIGHBOR' NEIGH_DEVICE_METADATA_CFG_DB_TABLE = 'DEVICE_NEIGHBOR_METADATA' DEFAULT_NAMESPACE = '' PORT_ROLE = 'role' - +CHASSIS_STATE_DB='CHASSIS_STATE_DB' +CHASSIS_ASIC_INFO_TABLE='CHASSIS_ASIC_TABLE' # Dictionary to cache config_db connection handle per namespace # to prevent duplicate connections from being opened @@ -451,3 +452,31 @@ def validate_namespace(namespace): return True else: return False + +def get_asic_presence_list(): + """ + @summary: This function will get the asic presence list. On Supervisor, the list includes only the asics + for inserted and detected fabric cards. For non-supervisor cards, e.g. line card, the list should + contain all supported asics by the card. The function gets the asic list from CHASSIS_ASIC_TABLE from + CHASSIS_STATE_DB. The function assumes that the first N asic ids (asic0 to asic(N-1)) in + CHASSIS_ASIC_TABLE belongs to the supervisor, where N is the max number of asics supported by the Chassis + @return: List of asics present + """ + asics_list = [] + if is_multi_asic(): + if not is_supervisor(): + # This is not supervisor, all asics should be present. Assuming that asics + # are not removable entity on Line Cards. Add all asics, 0 - num_asics to the list. + asics_list = list(range(0, get_num_asics())) + else: + # This is supervisor card. Some fabric cards may not be inserted. + # Get asic list from CHASSIS_ASIC_TABLE which lists only the asics + # present based on Fabric card detection by the platform. + db = swsscommon.DBConnector(CHASSIS_STATE_DB, 0, True) + asic_table = swsscommon.Table(db, CHASSIS_ASIC_INFO_TABLE) + if asic_table: + asics_presence_list = list(asic_table.getKeys()) + for asic in asics_presence_list: + # asic is asid id: asic0, asic1.... asicN. Get the numeric value. + asics_list.append(int(get_asic_id_from_name(asic))) + return asics_list