[Mellanox] Add bitmap support for SFP error event (#7605)
#### Why I did it Currently, SONiC use a single value to represent SFP error, however, multiple SFP errors could exist at the same time. This PR is aimed to support it #### How I did it Return bitmap instead of single value when a SFP event occurs Signed-off-by: Stephen Sun <stephens@nvidia.com>
This commit is contained in:
parent
0a92ce18cf
commit
147bf240f0
@ -569,18 +569,22 @@ class Chassis(ChassisBase):
|
||||
|
||||
wait_for_ever = (timeout == 0)
|
||||
port_dict = {}
|
||||
error_dict = {}
|
||||
if wait_for_ever:
|
||||
timeout = MAX_SELECT_DELAY
|
||||
while True:
|
||||
status = self.sfp_event.check_sfp_status(port_dict, timeout)
|
||||
status = self.sfp_event.check_sfp_status(port_dict, error_dict, timeout)
|
||||
if bool(port_dict):
|
||||
break
|
||||
else:
|
||||
status = self.sfp_event.check_sfp_status(port_dict, timeout)
|
||||
status = self.sfp_event.check_sfp_status(port_dict, error_dict, timeout)
|
||||
|
||||
if status:
|
||||
self.reinit_sfps(port_dict)
|
||||
return True, {'sfp':port_dict}
|
||||
result_dict = {'sfp':port_dict}
|
||||
if error_dict:
|
||||
result_dict['sfp_error'] = error_dict
|
||||
return True, result_dict
|
||||
else:
|
||||
return True, {'sfp':{}}
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
try:
|
||||
import subprocess
|
||||
import os
|
||||
from sonic_platform_base.sfp_base import SfpBase
|
||||
from sonic_platform_base.sonic_eeprom import eeprom_dts
|
||||
from sonic_platform_base.sonic_sfp.sff8472 import sff8472InterfaceId
|
||||
@ -33,6 +34,18 @@ try:
|
||||
except ImportError as e:
|
||||
pass
|
||||
|
||||
try:
|
||||
if os.environ["PLATFORM_API_UNIT_TESTING"] == "1":
|
||||
# Unable to import SDK constants under unit test
|
||||
# Define them here
|
||||
SX_PORT_MODULE_STATUS_INITIALIZING = 0
|
||||
SX_PORT_MODULE_STATUS_PLUGGED = 1
|
||||
SX_PORT_MODULE_STATUS_UNPLUGGED = 2
|
||||
SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR = 3
|
||||
SX_PORT_MODULE_STATUS_PLUGGED_DISABLED = 4
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# definitions of the offset and width for values in XCVR info eeprom
|
||||
XCVR_INTFACE_BULK_OFFSET = 0
|
||||
XCVR_INTFACE_BULK_WIDTH_QSFP = 20
|
||||
@ -328,6 +341,18 @@ class SdkHandleContext(object):
|
||||
class SFP(SfpBase):
|
||||
"""Platform-specific SFP class"""
|
||||
|
||||
SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE = 'Long range for non-Mellanox cable or module'
|
||||
SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST = 'Enforce part number list'
|
||||
SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED = 'PMD type not enabled'
|
||||
SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED = 'PCIE system power slot exceeded'
|
||||
SFP_MLNX_ERROR_DESCRIPTION_RESERVED = 'Reserved'
|
||||
|
||||
SFP_MLNX_ERROR_BIT_LONGRANGE_NON_MLNX_CABLE = 0x00010000
|
||||
SFP_MLNX_ERROR_BIT_ENFORCE_PART_NUMBER_LIST = 0x00020000
|
||||
SFP_MLNX_ERROR_BIT_PMD_TYPE_NOT_ENABLED = 0x00040000
|
||||
SFP_MLNX_ERROR_BIT_PCIE_POWER_SLOT_EXCEEDED = 0x00080000
|
||||
SFP_MLNX_ERROR_BIT_RESERVED = 0x80000000
|
||||
|
||||
def __init__(self, sfp_index, sfp_type, sdk_handle_getter, platform):
|
||||
SfpBase.__init__(self)
|
||||
self.index = sfp_index + 1
|
||||
@ -386,7 +411,7 @@ class SFP(SfpBase):
|
||||
# Read out any bytes from any offset
|
||||
def _read_eeprom_specific_bytes(self, offset, num_bytes):
|
||||
eeprom_raw = []
|
||||
ethtool_cmd = "ethtool -m sfp{} hex on offset {} length {}".format(self.index, offset, num_bytes)
|
||||
ethtool_cmd = "ethtool -m sfp{} hex on offset {} length {} 2>/dev/null".format(self.index, offset, num_bytes)
|
||||
try:
|
||||
output = subprocess.check_output(ethtool_cmd,
|
||||
shell=True,
|
||||
@ -2165,3 +2190,68 @@ class SFP(SfpBase):
|
||||
bool: True if it is replaceable.
|
||||
"""
|
||||
return True
|
||||
|
||||
def _get_error_code(self):
|
||||
"""
|
||||
Get error code of the SFP module
|
||||
|
||||
Returns:
|
||||
The error code fetch from SDK API
|
||||
"""
|
||||
module_id_info_list = new_sx_mgmt_module_id_info_t_arr(1)
|
||||
module_info_list = new_sx_mgmt_phy_module_info_t_arr(1)
|
||||
|
||||
module_id_info = sx_mgmt_module_id_info_t()
|
||||
module_id_info.slot_id = 0
|
||||
module_id_info.module_id = self.sdk_index
|
||||
sx_mgmt_module_id_info_t_arr_setitem(module_id_info_list, 0, module_id_info)
|
||||
|
||||
rc = sx_mgmt_phy_module_info_get(self.sdk_handle, module_id_info_list, 1, module_info_list)
|
||||
assert SX_STATUS_SUCCESS == rc, "sx_mgmt_phy_module_info_get failed, error code {}".format(rc)
|
||||
|
||||
mod_info = sx_mgmt_phy_module_info_t_arr_getitem(module_info_list, 0)
|
||||
return mod_info.module_state.oper_state, mod_info.module_state.error_type
|
||||
|
||||
@classmethod
|
||||
def _get_error_description_dict(cls):
|
||||
return {0: cls.SFP_ERROR_DESCRIPTION_POWER_BUDGET_EXCEEDED,
|
||||
1: cls.SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE,
|
||||
2: cls.SFP_ERROR_DESCRIPTION_I2C_STUCK,
|
||||
3: cls.SFP_ERROR_DESCRIPTION_BAD_EEPROM,
|
||||
4: cls.SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST,
|
||||
5: cls.SFP_ERROR_DESCRIPTION_UNSUPPORTED_CABLE,
|
||||
6: cls.SFP_ERROR_DESCRIPTION_HIGH_TEMP,
|
||||
7: cls.SFP_ERROR_DESCRIPTION_BAD_CABLE,
|
||||
8: cls.SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED,
|
||||
12: cls.SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED,
|
||||
255: cls.SFP_MLNX_ERROR_DESCRIPTION_RESERVED
|
||||
}
|
||||
|
||||
def get_error_description(self):
|
||||
"""
|
||||
Get error description
|
||||
|
||||
Args:
|
||||
error_code: The error code returned by _get_error_code
|
||||
|
||||
Returns:
|
||||
The error description
|
||||
"""
|
||||
oper_status, error_code = self._get_error_code()
|
||||
if oper_status == SX_PORT_MODULE_STATUS_INITIALIZING:
|
||||
error_description = self.SFP_STATUS_INITIALIZING
|
||||
elif oper_status == SX_PORT_MODULE_STATUS_PLUGGED:
|
||||
error_description = self.SFP_STATUS_OK
|
||||
elif oper_status == SX_PORT_MODULE_STATUS_UNPLUGGED:
|
||||
error_description = self.SFP_STATUS_UNPLUGGED
|
||||
elif oper_status == SX_PORT_MODULE_STATUS_PLUGGED_DISABLED:
|
||||
error_description = self.SFP_STATUS_DISABLED
|
||||
elif oper_status == SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR:
|
||||
error_description_dict = self._get_error_description_dict()
|
||||
if error_code in error_description_dict:
|
||||
error_description = error_description_dict[error_code]
|
||||
else:
|
||||
error_description = "Unknown error ({})".format(error_code)
|
||||
else:
|
||||
error_description = "Unknow SFP module status ({})".format(oper_status)
|
||||
return error_description
|
||||
|
@ -7,8 +7,16 @@ import sys, errno
|
||||
import os
|
||||
import time
|
||||
import select
|
||||
from python_sdk_api.sx_api import *
|
||||
if 'MLNX_PLATFORM_API_UNIT_TESTING' not in os.environ:
|
||||
from python_sdk_api.sx_api import *
|
||||
else:
|
||||
from mock import MagicMock
|
||||
class MockSxFd(object):
|
||||
fd = 99
|
||||
new_sx_fd_t_p = MagicMock(return_value=MockSxFd())
|
||||
new_sx_user_channel_t_p = MagicMock()
|
||||
from sonic_py_common.logger import Logger
|
||||
from .sfp import SFP
|
||||
|
||||
# SFP status from PMAOS register
|
||||
# 0x1 plug in
|
||||
@ -22,15 +30,6 @@ SDK_SFP_STATE_OUT = 0x2
|
||||
SDK_SFP_STATE_ERR = 0x3
|
||||
SDK_SFP_STATE_DIS = 0x4
|
||||
|
||||
# SFP status that will be handled by XCVRD
|
||||
STATUS_PLUGIN = '1'
|
||||
STATUS_PLUGOUT = '0'
|
||||
STATUS_ERR_I2C_STUCK = '2'
|
||||
STATUS_ERR_BAD_EEPROM = '3'
|
||||
STATUS_ERR_UNSUPPORTED_CABLE = '4'
|
||||
STATUS_ERR_HIGH_TEMP = '5'
|
||||
STATUS_ERR_BAD_CABLE = '6'
|
||||
|
||||
# SFP status used in this file only, will not expose to XCVRD
|
||||
# STATUS_ERROR will be mapped to different status according to the error code
|
||||
STATUS_UNKNOWN = '-1'
|
||||
@ -60,19 +59,39 @@ STATUS_ERROR = '-2'
|
||||
'''
|
||||
|
||||
# SFP errors that will block eeprom accessing
|
||||
sdk_sfp_err_type_dict = {
|
||||
0x2: STATUS_ERR_I2C_STUCK,
|
||||
0x3: STATUS_ERR_BAD_EEPROM,
|
||||
0x5: STATUS_ERR_UNSUPPORTED_CABLE,
|
||||
0x6: STATUS_ERR_HIGH_TEMP,
|
||||
0x7: STATUS_ERR_BAD_CABLE
|
||||
SDK_SFP_BLOCKING_ERRORS = [
|
||||
0x2, # SFP.SFP_ERROR_BIT_I2C_STUCK,
|
||||
0x3, # SFP.SFP_ERROR_BIT_BAD_EEPROM,
|
||||
0x5, # SFP.SFP_ERROR_BIT_UNSUPPORTED_CABLE,
|
||||
0x6, # SFP.SFP_ERROR_BIT_HIGH_TEMP,
|
||||
0x7, # SFP.SFP_ERROR_BIT_BAD_CABLE
|
||||
]
|
||||
|
||||
SDK_ERRORS_TO_ERROR_BITS = {
|
||||
0x0: SFP.SFP_ERROR_BIT_POWER_BUDGET_EXCEEDED,
|
||||
0x1: SFP.SFP_MLNX_ERROR_BIT_LONGRANGE_NON_MLNX_CABLE,
|
||||
0x2: SFP.SFP_ERROR_BIT_I2C_STUCK,
|
||||
0x3: SFP.SFP_ERROR_BIT_BAD_EEPROM,
|
||||
0x4: SFP.SFP_MLNX_ERROR_BIT_ENFORCE_PART_NUMBER_LIST,
|
||||
0x5: SFP.SFP_ERROR_BIT_UNSUPPORTED_CABLE,
|
||||
0x6: SFP.SFP_ERROR_BIT_HIGH_TEMP,
|
||||
0x7: SFP.SFP_ERROR_BIT_BAD_CABLE,
|
||||
0x8: SFP.SFP_MLNX_ERROR_BIT_PMD_TYPE_NOT_ENABLED,
|
||||
0xc: SFP.SFP_MLNX_ERROR_BIT_PCIE_POWER_SLOT_EXCEEDED
|
||||
}
|
||||
|
||||
SDK_ERRORS_TO_DESCRIPTION = {
|
||||
0x1: SFP.SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE,
|
||||
0x4: SFP.SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST,
|
||||
0x8: SFP.SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED,
|
||||
0xc: SFP.SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED
|
||||
}
|
||||
|
||||
sfp_value_status_dict = {
|
||||
SDK_SFP_STATE_IN: STATUS_PLUGIN,
|
||||
SDK_SFP_STATE_OUT: STATUS_PLUGOUT,
|
||||
SDK_SFP_STATE_IN: str(SFP.SFP_STATUS_BIT_INSERTED),
|
||||
SDK_SFP_STATE_OUT: str(SFP.SFP_STATUS_BIT_REMOVED),
|
||||
SDK_SFP_STATE_ERR: STATUS_ERROR,
|
||||
SDK_SFP_STATE_DIS: STATUS_PLUGOUT,
|
||||
SDK_SFP_STATE_DIS: str(SFP.SFP_STATUS_BIT_REMOVED),
|
||||
}
|
||||
|
||||
# system level event/error
|
||||
@ -195,7 +214,7 @@ class sfp_event:
|
||||
delete_sx_fd_t_p(self.rx_fd_p)
|
||||
delete_sx_user_channel_t_p(self.user_channel_p)
|
||||
|
||||
def check_sfp_status(self, port_change, timeout):
|
||||
def check_sfp_status(self, port_change, error_dict, timeout):
|
||||
"""
|
||||
the meaning of timeout is aligned with select.select, which has the following meaning:
|
||||
0: poll, returns without blocked
|
||||
@ -233,6 +252,7 @@ class sfp_event:
|
||||
break
|
||||
|
||||
sfp_state = sfp_value_status_dict.get(module_state, STATUS_UNKNOWN)
|
||||
error_description = None
|
||||
if sfp_state == STATUS_UNKNOWN:
|
||||
# in the following sequence, STATUS_UNKNOWN can be returned.
|
||||
# so we shouldn't raise exception here.
|
||||
@ -247,18 +267,29 @@ class sfp_event:
|
||||
|
||||
# If get SFP status error(0x3) from SDK, then need to read the error_type to get the detailed error
|
||||
if sfp_state == STATUS_ERROR:
|
||||
if error_type in sdk_sfp_err_type_dict.keys():
|
||||
# In SFP at error status case, need to overwrite the sfp_state with the exact error code
|
||||
sfp_state = sdk_sfp_err_type_dict[error_type]
|
||||
else:
|
||||
# For errors don't block the eeprom accessing, we don't report it to XCVRD
|
||||
logger.log_info("SFP error on port but not blocking eeprom read, error_type {}".format(error_type))
|
||||
found +=1
|
||||
sfp_state_bits = SDK_ERRORS_TO_ERROR_BITS.get(error_type)
|
||||
if sfp_state_bits is None:
|
||||
logger.log_error("Unrecognized error {} detected on ports {}".format(error_type, port_list))
|
||||
found += 1
|
||||
continue
|
||||
|
||||
if error_type in SDK_SFP_BLOCKING_ERRORS:
|
||||
# In SFP at error status case, need to overwrite the sfp_state with the exact error code
|
||||
sfp_state_bits |= SFP.SFP_ERROR_BIT_BLOCKING
|
||||
|
||||
# An error should be always set along with 'INSERTED'
|
||||
sfp_state_bits |= SFP.SFP_STATUS_BIT_INSERTED
|
||||
|
||||
# For vendor specific errors, the description should be returned as well
|
||||
error_description = SDK_ERRORS_TO_DESCRIPTION.get(error_type)
|
||||
|
||||
sfp_state = str(sfp_state_bits)
|
||||
|
||||
for port in port_list:
|
||||
logger.log_info("SFP on port {} state {}".format(port, sfp_state))
|
||||
port_change[port+1] = sfp_state
|
||||
if error_description:
|
||||
error_dict[port+1] = error_description
|
||||
found += 1
|
||||
|
||||
return found != 0
|
||||
|
@ -8,8 +8,11 @@ test_path = os.path.dirname(os.path.abspath(__file__))
|
||||
modules_path = os.path.dirname(test_path)
|
||||
sys.path.insert(0, modules_path)
|
||||
|
||||
os.environ["PLATFORM_API_UNIT_TESTING"] = "1"
|
||||
|
||||
from sonic_py_common import device_info
|
||||
from sonic_platform.sfp import SFP
|
||||
from sonic_platform.sfp import SFP, SX_PORT_MODULE_STATUS_INITIALIZING, SX_PORT_MODULE_STATUS_PLUGGED, SX_PORT_MODULE_STATUS_UNPLUGGED, SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR, SX_PORT_MODULE_STATUS_PLUGGED_DISABLED
|
||||
|
||||
from sonic_platform.chassis import Chassis
|
||||
|
||||
|
||||
@ -26,8 +29,14 @@ def mock_get_sdk_handle(self):
|
||||
self.sdk_handle = 1
|
||||
return self.sdk_handle
|
||||
|
||||
|
||||
def mock_get_sfp_error_code(self):
|
||||
return self.oper_code, self.error_code
|
||||
|
||||
|
||||
device_info.get_platform = mock_get_platform
|
||||
SFP._read_eeprom_specific_bytes = mock_read_eeprom_specific_bytes
|
||||
SFP._get_error_code = mock_get_sfp_error_code
|
||||
Chassis.get_sdk_handle = mock_get_sdk_handle
|
||||
|
||||
|
||||
@ -82,3 +91,35 @@ def test_sfp_full_initialize_without_partial():
|
||||
# Verify when get_sfp is called, the SFP modules won't be initialized again
|
||||
sfp1 = allsfp[0]
|
||||
assert sfp1 == chassis.get_sfp(1)
|
||||
|
||||
|
||||
def test_sfp_get_error_status():
|
||||
chassis = Chassis()
|
||||
|
||||
# Fetch an SFP module to test
|
||||
sfp = chassis.get_sfp(1)
|
||||
|
||||
description_dict = sfp._get_error_description_dict()
|
||||
|
||||
sfp.oper_code = SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR
|
||||
for error in description_dict.keys():
|
||||
sfp.error_code = error
|
||||
description = sfp.get_error_description()
|
||||
|
||||
assert description == description_dict[sfp.error_code]
|
||||
|
||||
sfp.error_code = -1
|
||||
description = sfp.get_error_description()
|
||||
assert description == "Unknown error (-1)"
|
||||
|
||||
expected_description_list = [
|
||||
(SX_PORT_MODULE_STATUS_INITIALIZING, "Initializing"),
|
||||
(SX_PORT_MODULE_STATUS_PLUGGED, "OK"),
|
||||
(SX_PORT_MODULE_STATUS_UNPLUGGED, "Unplugged"),
|
||||
(SX_PORT_MODULE_STATUS_PLUGGED_DISABLED, "Disabled")
|
||||
]
|
||||
for oper_code, expected_description in expected_description_list:
|
||||
sfp.oper_code = oper_code
|
||||
description = sfp.get_error_description()
|
||||
|
||||
assert description == expected_description
|
||||
|
46
platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py
Normal file
46
platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py
Normal file
@ -0,0 +1,46 @@
|
||||
import os
|
||||
import select
|
||||
import sys
|
||||
|
||||
from mock import MagicMock
|
||||
|
||||
test_path = os.path.dirname(os.path.abspath(__file__))
|
||||
modules_path = os.path.dirname(test_path)
|
||||
sys.path.insert(0, modules_path)
|
||||
|
||||
from sonic_platform_base.sfp_base import SfpBase
|
||||
|
||||
class TestSfpEvent(object):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
os.environ["MLNX_PLATFORM_API_UNIT_TESTING"] = "1"
|
||||
select.select = MagicMock(return_value=([99], None, None))
|
||||
|
||||
def test_check_sfp_status(self):
|
||||
from sonic_platform.sfp_event import SDK_SFP_STATE_IN, SDK_SFP_STATE_OUT, SDK_SFP_STATE_ERR
|
||||
from sonic_platform.sfp_event import SDK_ERRORS_TO_ERROR_BITS, SDK_ERRORS_TO_DESCRIPTION, SDK_SFP_BLOCKING_ERRORS
|
||||
|
||||
self.executor(SDK_SFP_STATE_IN, None, SfpBase.SFP_STATUS_BIT_INSERTED)
|
||||
self.executor(SDK_SFP_STATE_OUT, None, SfpBase.SFP_STATUS_BIT_REMOVED)
|
||||
for error_type, error_status in SDK_ERRORS_TO_ERROR_BITS.items():
|
||||
description = SDK_ERRORS_TO_DESCRIPTION.get(error_type)
|
||||
if error_type in SDK_SFP_BLOCKING_ERRORS:
|
||||
error_status |= SfpBase.SFP_ERROR_BIT_BLOCKING
|
||||
error_status |= SfpBase.SFP_STATUS_BIT_INSERTED
|
||||
self.executor(SDK_SFP_STATE_ERR, error_type, error_status, description)
|
||||
|
||||
def executor(self, mock_module_state, mock_error_type, expect_status, description=None):
|
||||
from sonic_platform.sfp_event import sfp_event
|
||||
|
||||
event = sfp_event()
|
||||
event.on_pmpe = MagicMock(return_value=(True, [0,1], mock_module_state, mock_error_type))
|
||||
port_change = {}
|
||||
error_dict = {}
|
||||
found = event.check_sfp_status(port_change, error_dict, 0)
|
||||
assert found
|
||||
expect_status_str = str(expect_status)
|
||||
assert 1 in port_change and port_change[1] == expect_status_str
|
||||
assert 2 in port_change and port_change[2] == expect_status_str
|
||||
if description:
|
||||
assert 1 in error_dict and error_dict[1] == description
|
||||
assert 2 in error_dict and error_dict[2] == description
|
Loading…
Reference in New Issue
Block a user