[202111] [Mellanox] Add retry when SFP object is not initialized correctly due to eeprom read failure (#11265)
- Why I did it During platform API SFP object initialization, there are two steps, one is to read the xSFP type from EEPROM, and another is to parse the xSFP DOM support capability. There is the possibility that the xSFP EEPROM is not ready when it started to read, which will result in the SFP object does not have type and DOM capability correctly initialized, which will cause further issues. So need to add a mechanism to retry in this case. - How I did it Add flags to indicate whether the SFP object has been correctly initialized or not, set the flag when an error happened and after all relevant bytes from EEPROM are correctly read out and parsed, clear the flag. Leverage the Python decorator to decorate the related functions, each time when the related function is called the decorator will check whether the SFP object has been correctly initialized or not, if not it will read the EEPROM and parse again. - How to verify it Run SFP-related platform tests to make sure no new issue is introduced. Signed-off-by: Kebo Liu <kebol@nvidia.com>
This commit is contained in:
parent
6ab339482f
commit
27839792d1
@ -369,7 +369,8 @@ class SfpCapability:
|
||||
self.calibration = 0
|
||||
self.qsfp_page3_available = False
|
||||
self.second_application_list = False
|
||||
|
||||
self.dom_detect_finished = False
|
||||
|
||||
|
||||
class SFP(SfpBase):
|
||||
"""Platform-specific SFP class"""
|
||||
@ -417,11 +418,11 @@ class SFP(SfpBase):
|
||||
logger.log_error('Failed to open SDK handle')
|
||||
return SFP.shared_sdk_handle
|
||||
|
||||
@property
|
||||
def sfp_type(self):
|
||||
def _detect_sfp_type(self):
|
||||
if not self._sfp_type:
|
||||
eeprom_raw = []
|
||||
eeprom_raw = self._read_eeprom_specific_bytes(XCVR_TYPE_OFFSET, XCVR_TYPE_WIDTH)
|
||||
|
||||
if eeprom_raw:
|
||||
if eeprom_raw[0] in SFP_TYPE_CODE_LIST:
|
||||
self._sfp_type = SFP_TYPE
|
||||
@ -430,14 +431,13 @@ class SFP(SfpBase):
|
||||
elif eeprom_raw[0] in QSFP_DD_TYPE_CODE_LIST:
|
||||
self._sfp_type = QSFP_DD_TYPE
|
||||
|
||||
# we don't regonize this identifier value, treat the xSFP module as the default type
|
||||
if not self._sfp_type:
|
||||
raise RuntimeError("Failed to detect SFP type for SFP {}".format(self.index))
|
||||
else:
|
||||
return self._sfp_type
|
||||
@property
|
||||
@utils.pre_initialize(_detect_sfp_type)
|
||||
def sfp_type(self):
|
||||
return self._sfp_type
|
||||
|
||||
def _dom_capability_detect(self):
|
||||
if self._sfp_capability:
|
||||
if self._sfp_capability and self._sfp_capability.dom_detect_finished:
|
||||
return
|
||||
|
||||
self._sfp_capability = SfpCapability()
|
||||
@ -456,8 +456,9 @@ class SFP(SfpBase):
|
||||
# need to add more code for determining the capability and version compliance
|
||||
# in SFF-8636 dom capability definitions evolving with the versions.
|
||||
qsfp_dom_capability_raw = self._read_eeprom_specific_bytes((offset + XCVR_DOM_CAPABILITY_OFFSET), XCVR_DOM_CAPABILITY_WIDTH)
|
||||
if qsfp_dom_capability_raw is not None:
|
||||
qsfp_version_compliance_raw = self._read_eeprom_specific_bytes(QSFP_VERSION_COMPLIANCE_OFFSET, QSFP_VERSION_COMPLIANCE_WIDTH)
|
||||
qsfp_version_compliance_raw = self._read_eeprom_specific_bytes(QSFP_VERSION_COMPLIANCE_OFFSET, QSFP_VERSION_COMPLIANCE_WIDTH)
|
||||
qsfp_option_value_raw = self._read_eeprom_specific_bytes(QSFP_OPTION_VALUE_OFFSET, QSFP_OPTION_VALUE_WIDTH)
|
||||
if None not in (qsfp_dom_capability_raw, qsfp_version_compliance_raw, qsfp_option_value_raw):
|
||||
qsfp_version_compliance = int(qsfp_version_compliance_raw[0], 16)
|
||||
dom_capability = sfpi_obj.parse_dom_capability(qsfp_dom_capability_raw, 0)
|
||||
if qsfp_version_compliance >= 0x08:
|
||||
@ -475,13 +476,14 @@ class SFP(SfpBase):
|
||||
sfpd_obj = sff8436Dom()
|
||||
if sfpd_obj is None:
|
||||
return None
|
||||
qsfp_option_value_raw = self._read_eeprom_specific_bytes(QSFP_OPTION_VALUE_OFFSET, QSFP_OPTION_VALUE_WIDTH)
|
||||
if qsfp_option_value_raw is not None:
|
||||
optional_capability = sfpd_obj.parse_option_params(qsfp_option_value_raw, 0)
|
||||
self._sfp_capability.dom_tx_disable_supported = optional_capability['data']['TxDisable']['value'] == 'On'
|
||||
|
||||
optional_capability = sfpd_obj.parse_option_params(qsfp_option_value_raw, 0)
|
||||
self._sfp_capability.dom_tx_disable_supported = optional_capability['data']['TxDisable']['value'] == 'On'
|
||||
dom_status_indicator = sfpd_obj.parse_dom_status_indicator(qsfp_version_compliance_raw, 1)
|
||||
self._sfp_capability.qsfp_page3_available = dom_status_indicator['data']['FlatMem']['value'] == 'Off'
|
||||
self._sfp_capability.dom_detect_finished = True
|
||||
else:
|
||||
logger.log_warning("SFP {}: Dom capabilty parsing is failed due to eeprom read fail, will re-try next time.".format(self.index))
|
||||
self._sfp_capability.dom_supported = False
|
||||
self._sfp_capability.dom_temp_supported = False
|
||||
self._sfp_capability.dom_volt_supported = False
|
||||
@ -489,6 +491,7 @@ class SFP(SfpBase):
|
||||
self._sfp_capability.dom_tx_power_supported = False
|
||||
self._sfp_capability.calibration = 0
|
||||
self._sfp_capability.qsfp_page3_available = False
|
||||
self._sfp_capability.dom_detect_finished = False
|
||||
|
||||
elif self.sfp_type == QSFP_DD_TYPE:
|
||||
sfpi_obj = qsfp_dd_InterfaceId()
|
||||
@ -518,6 +521,7 @@ class SFP(SfpBase):
|
||||
self._sfp_capability.dom_tx_bias_power_supported = False
|
||||
self._sfp_capability.dom_thresholds_supported = False
|
||||
self._sfp_capability.dom_rx_tx_power_bias_supported = False
|
||||
self._sfp_capability.dom_detect_finished = True
|
||||
else:
|
||||
self._sfp_capability.dom_supported = False
|
||||
self._sfp_capability.dom_temp_supported = False
|
||||
@ -527,6 +531,8 @@ class SFP(SfpBase):
|
||||
self._sfp_capability.dom_tx_bias_power_supported = False
|
||||
self._sfp_capability.dom_thresholds_supported = False
|
||||
self._sfp_capability.dom_rx_tx_power_bias_supported = False
|
||||
self._sfp_capability.dom_detect_finished = False
|
||||
logger.log_warning("SFP {}: Dom capabilty parsing is failed due to eeprom read fail, will re-try next time.".format(self.index))
|
||||
|
||||
elif self.sfp_type == SFP_TYPE:
|
||||
sfpi_obj = sff8472InterfaceId()
|
||||
@ -554,12 +560,15 @@ class SFP(SfpBase):
|
||||
self._sfp_capability.dom_tx_power_supported = False
|
||||
self._sfp_capability.calibration = 0
|
||||
self._sfp_capability.dom_tx_disable_supported = (int(sfp_dom_capability_raw[1], 16) & 0x40 != 0)
|
||||
self._sfp_capability.dom_detect_finished = True
|
||||
else:
|
||||
self._sfp_capability.dom_supported = False
|
||||
self._sfp_capability.dom_temp_supported = False
|
||||
self._sfp_capability.dom_volt_supported = False
|
||||
self._sfp_capability.dom_rx_power_supported = False
|
||||
self._sfp_capability.dom_tx_power_supported = False
|
||||
self._sfp_capability.dom_detect_finished = False
|
||||
logger.log_warning("SFP {}: Dom capabilty parsing is failed due to sfp type is not one of the supported ones, will re-try next time.".format(self.index))
|
||||
|
||||
@property
|
||||
@utils.pre_initialize(_dom_capability_detect)
|
||||
@ -907,7 +916,7 @@ class SFP(SfpBase):
|
||||
transceiver_info_dict['nominal_bit_rate'] = "Not supported for CMIS cables"
|
||||
transceiver_info_dict['application_advertisement'] = host_media_list
|
||||
|
||||
else:
|
||||
elif self.sfp_type == SFP_TYPE:
|
||||
offset = 0
|
||||
vendor_rev_width = XCVR_HW_REV_WIDTH_SFP
|
||||
interface_info_bulk_width = XCVR_INTFACE_BULK_WIDTH_SFP
|
||||
@ -916,6 +925,10 @@ class SFP(SfpBase):
|
||||
if sfpi_obj is None:
|
||||
print("Error: sfp_object open failed")
|
||||
return None
|
||||
else:
|
||||
# None of any supported SFP type, could be SFP object not correctly initialized.
|
||||
logger.log_warning("SFP {}: type is not one the supported type, or SFP object initialization is not finished yet.".format(self.index))
|
||||
return None
|
||||
|
||||
if self.sfp_type != QSFP_DD_TYPE:
|
||||
sfp_interface_bulk_raw = self._read_eeprom_specific_bytes(offset + XCVR_INTERFACE_DATA_START, XCVR_INTERFACE_DATA_SIZE)
|
||||
|
@ -25,9 +25,10 @@ test_path = os.path.dirname(os.path.abspath(__file__))
|
||||
modules_path = os.path.dirname(test_path)
|
||||
sys.path.insert(0, modules_path)
|
||||
|
||||
from sonic_platform.sfp import SFP, SX_PORT_MODULE_STATUS_INITIALIZING, SX_PORT_MODULE_STATUS_PLUGGED, SX_PORT_MODULE_STATUS_UNPLUGGED, SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR, SX_PORT_MODULE_STATUS_PLUGGED_DISABLED
|
||||
from sonic_platform.sfp import QSFP_TYPE, QSFP_DD_TYPE, SFP_TYPE, SFP, SX_PORT_MODULE_STATUS_INITIALIZING, SX_PORT_MODULE_STATUS_PLUGGED, SX_PORT_MODULE_STATUS_UNPLUGGED, SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR, SX_PORT_MODULE_STATUS_PLUGGED_DISABLED
|
||||
from sonic_platform.chassis import Chassis
|
||||
|
||||
|
||||
class TestSfp:
|
||||
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_linecard_count', mock.MagicMock(return_value=8))
|
||||
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_linecard_max_port_count')
|
||||
@ -80,3 +81,58 @@ class TestSfp:
|
||||
description = sfp.get_error_description()
|
||||
|
||||
assert description == expected_description
|
||||
|
||||
def test_detect_sfp_type(self):
|
||||
sfp = SFP(0)
|
||||
assert sfp._sfp_type == None
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = ['03'])
|
||||
assert sfp.sfp_type == SFP_TYPE
|
||||
|
||||
sfp._sfp_type = None
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = ['0d'])
|
||||
assert sfp.sfp_type == QSFP_TYPE
|
||||
|
||||
sfp._sfp_type = None
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = ['18'])
|
||||
assert sfp.sfp_type == QSFP_DD_TYPE
|
||||
|
||||
def test_detect_dom_capability(self):
|
||||
sfp = SFP(0)
|
||||
sfp.get_presence = mock.MagicMock(return_value = True)
|
||||
|
||||
# QSFP postive flow
|
||||
sfp._sfp_type = QSFP_TYPE
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = ['19', '08', 'ff', '04'])
|
||||
assert sfp.dom_supported == True
|
||||
assert sfp._sfp_capability.dom_detect_finished == True
|
||||
|
||||
# QSFP negative flow
|
||||
sfp._sfp_capability.dom_detect_finished = False
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = None)
|
||||
assert sfp.dom_supported == False
|
||||
assert sfp._sfp_capability.dom_detect_finished == False
|
||||
|
||||
# SFP postive flow
|
||||
sfp._sfp_type = SFP_TYPE
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = ['ff', '00'])
|
||||
assert sfp.dom_supported == True
|
||||
assert sfp._sfp_capability.dom_detect_finished == True
|
||||
|
||||
# SFP negative flow
|
||||
sfp._sfp_capability.dom_detect_finished = False
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = None)
|
||||
assert sfp.dom_supported == False
|
||||
assert sfp._sfp_capability.dom_detect_finished == False
|
||||
|
||||
# QSFPDD postive flow
|
||||
sfp._sfp_type = QSFP_DD_TYPE
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = ['00'])
|
||||
assert sfp.dom_supported == True
|
||||
assert sfp._sfp_capability.dom_detect_finished == True
|
||||
|
||||
# QSFPDD negative flow
|
||||
sfp._sfp_capability.dom_detect_finished = False
|
||||
sfp._read_eeprom_specific_bytes = mock.MagicMock(return_value = None)
|
||||
assert sfp.dom_supported == False
|
||||
assert sfp._sfp_capability.dom_detect_finished == False
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user