DellEMC: get_change_event Platform API implementation for S6000, S6100 and Z9100 (#4593)

For detecting transceiver change events through xcvrd in DellEMC S6000, S6100 and Z9100 platforms.

- In S6000, rename 'get_transceiver_change_event' in chassis.py to 'get_change_event' and return appropriate values.
- In S6100, implement 'get_change_event' through polling method (poll interval = 1 second) in chassis.py (Transceiver insertion/removal does not generate interrupts due to a CPLD bug)
- In Z9100, implement 'get_change_event' through interrupt method using select.epoll().
This commit is contained in:
Arun Saravanan Balachandran 2020-05-21 01:22:50 +05:30 committed by Abhishek Dosi
parent 3ca65bdb67
commit 98b8d1eee1
3 changed files with 305 additions and 9 deletions

View File

@ -237,13 +237,32 @@ class Chassis(ChassisBase):
return int(content, 16) return int(content, 16)
def get_transceiver_change_event(self, timeout=0): def get_change_event(self, timeout=0):
""" """
Returns a dictionary containing sfp changes which have Returns a nested dictionary containing all devices which have
experienced a change at chassis level experienced a change at chassis level
Args:
timeout: Timeout in milliseconds (optional). If timeout == 0,
this method will block until a change is detected.
Returns:
(bool, dict):
- True if call successful, False if not;
- A nested dictionary where key is a device type,
value is a dictionary with key:value pairs in the
format of {'device_id':'device_event'},
where device_id is the device ID for this device and
device_event,
status='1' represents device inserted,
status='0' represents device removed.
Ex. {'fan':{'0':'0', '2':'1'}, 'sfp':{'11':'0'}}
indicates that fan 0 has been removed, fan 2
has been inserted and sfp 11 has been removed.
""" """
start_time = time.time() start_time = time.time()
port_dict = {} port_dict = {}
ret_dict = {"sfp": port_dict}
port = self.PORT_START port = self.PORT_START
forever = False forever = False
@ -256,7 +275,7 @@ class Chassis(ChassisBase):
end_time = start_time + timeout end_time = start_time + timeout
if (start_time > end_time): if (start_time > end_time):
return False, {} # Time wrap or possibly incorrect timeout return False, ret_dict # Time wrap or possibly incorrect timeout
while (timeout >= 0): while (timeout >= 0):
# Check for OIR events and return updated port_dict # Check for OIR events and return updated port_dict
@ -276,7 +295,7 @@ class Chassis(ChassisBase):
# Update reg value # Update reg value
self.modprs_register = reg_value self.modprs_register = reg_value
return True, port_dict return True, ret_dict
if forever: if forever:
time.sleep(1) time.sleep(1)
@ -287,7 +306,7 @@ class Chassis(ChassisBase):
else: else:
if timeout > 0: if timeout > 0:
time.sleep(timeout) time.sleep(timeout)
return True, {} return True, ret_dict
return False, {} return False, ret_dict

View File

@ -39,6 +39,7 @@ class Chassis(ChassisBase):
HWMON_DIR = "/sys/devices/platform/SMF.512/hwmon/" HWMON_DIR = "/sys/devices/platform/SMF.512/hwmon/"
HWMON_NODE = os.listdir(HWMON_DIR)[0] HWMON_NODE = os.listdir(HWMON_DIR)[0]
MAILBOX_DIR = HWMON_DIR + HWMON_NODE MAILBOX_DIR = HWMON_DIR + HWMON_NODE
POLL_INTERVAL = 1 # Poll interval in seconds
reset_reason_dict = {} reset_reason_dict = {}
reset_reason_dict[11] = ChassisBase.REBOOT_CAUSE_POWER_LOSS reset_reason_dict[11] = ChassisBase.REBOOT_CAUSE_POWER_LOSS
@ -81,6 +82,7 @@ class Chassis(ChassisBase):
self._component_list.append(component) self._component_list.append(component)
self._watchdog = Watchdog() self._watchdog = Watchdog()
self._transceiver_presence = self._get_transceiver_presence()
def _get_reboot_reason_smf_register(self): def _get_reboot_reason_smf_register(self):
# In S6100, mb_poweron_reason register will # In S6100, mb_poweron_reason register will
@ -111,6 +113,24 @@ class Chassis(ChassisBase):
rv = rv.lstrip(" ") rv = rv.lstrip(" ")
return rv return rv
def _get_register(self, reg_file):
# On successful read, returns the value read from given
# reg_name and on failure returns 'ERR'
rv = 'ERR'
if (not os.path.isfile(reg_file)):
return rv
try:
with open(reg_file, 'r') as fd:
rv = fd.read()
except Exception as error:
rv = 'ERR'
rv = rv.rstrip('\r\n')
rv = rv.lstrip(" ")
return rv
def get_name(self): def get_name(self):
""" """
Retrieves the name of the chassis Retrieves the name of the chassis
@ -215,3 +235,103 @@ class Chassis(ChassisBase):
return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None) return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None)
return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER, "Invalid Reason") return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER, "Invalid Reason")
def _get_transceiver_presence(self):
cpld2_modprs = self._get_register(
"/sys/class/i2c-adapter/i2c-14/14-003e/qsfp_modprs")
cpld3_modprs = self._get_register(
"/sys/class/i2c-adapter/i2c-15/15-003e/qsfp_modprs")
cpld4_modprs = self._get_register(
"/sys/class/i2c-adapter/i2c-16/16-003e/qsfp_modprs")
cpld5_modprs = self._get_register(
"/sys/class/i2c-adapter/i2c-17/17-003e/qsfp_modprs")
# If IOM is not present, register read will fail.
# Handle the scenario gracefully
if (cpld2_modprs == 'read error') or (cpld2_modprs == 'ERR'):
cpld2_modprs = '0x0'
if (cpld3_modprs == 'read error') or (cpld3_modprs == 'ERR'):
cpld3_modprs = '0x0'
if (cpld4_modprs == 'read error') or (cpld4_modprs == 'ERR'):
cpld4_modprs = '0x0'
if (cpld5_modprs == 'read error') or (cpld5_modprs == 'ERR'):
cpld5_modprs = '0x0'
# Make it contiguous
transceiver_presence = (int(cpld2_modprs, 16) & 0xffff) |\
((int(cpld4_modprs, 16) & 0xffff) << 16) |\
((int(cpld3_modprs, 16) & 0xffff) << 32) |\
((int(cpld5_modprs, 16) & 0xffff) << 48)
return transceiver_presence
def get_change_event(self, timeout=0):
"""
Returns a nested dictionary containing all devices which have
experienced a change at chassis level
Args:
timeout: Timeout in milliseconds (optional). If timeout == 0,
this method will block until a change is detected.
Returns:
(bool, dict):
- True if call successful, False if not;
- A nested dictionary where key is a device type,
value is a dictionary with key:value pairs in the
format of {'device_id':'device_event'},
where device_id is the device ID for this device and
device_event,
status='1' represents device inserted,
status='0' represents device removed.
Ex. {'fan':{'0':'0', '2':'1'}, 'sfp':{'11':'0'}}
indicates that fan 0 has been removed, fan 2
has been inserted and sfp 11 has been removed.
"""
port_dict = {}
ret_dict = {'sfp': port_dict}
forever = False
if timeout == 0:
forever = True
elif timeout > 0:
timeout = timeout / float(1000) # Convert to secs
else:
return False, ret_dict # Incorrect timeout
while True:
if forever:
timer = self.POLL_INTERVAL
else:
timer = min(timeout, self.POLL_INTERVAL)
start_time = time.time()
time.sleep(timer)
cur_presence = self._get_transceiver_presence()
# Update dict only if a change has been detected
if cur_presence != self._transceiver_presence:
changed_ports = self._transceiver_presence ^ cur_presence
for port in range(self.get_num_sfps()):
# Mask off the bit corresponding to particular port
mask = 1 << port
if changed_ports & mask:
# qsfp_modprs 1 => optics is removed
if cur_presence & mask:
port_dict[port] = '0'
# qsfp_modprs 0 => optics is inserted
else:
port_dict[port] = '1'
# Update current presence
self._transceiver_presence = cur_presence
break
if not forever:
elapsed_time = time.time() - start_time
timeout = round(timeout - elapsed_time, 3)
if timeout <= 0:
break
return True, ret_dict

View File

@ -10,8 +10,8 @@
try: try:
import os import os
import subprocess import select
import re import sys
from sonic_platform_base.chassis_base import ChassisBase from sonic_platform_base.chassis_base import ChassisBase
from sonic_platform.sfp import Sfp from sonic_platform.sfp import Sfp
from sonic_platform.fan import Fan from sonic_platform.fan import Fan
@ -60,6 +60,8 @@ class Chassis(ChassisBase):
28: [16, 6], 29: [16, 7], 30: [16, 8], 31: [16, 9] 28: [16, 6], 29: [16, 7], 30: [16, 8], 31: [16, 9]
} }
OIR_FD_PATH = "/sys/devices/platform/dell_ich.0/sci_int_gpio_sus6"
reset_reason_dict = {} reset_reason_dict = {}
reset_reason_dict[11] = ChassisBase.REBOOT_CAUSE_POWER_LOSS reset_reason_dict[11] = ChassisBase.REBOOT_CAUSE_POWER_LOSS
reset_reason_dict[33] = ChassisBase.REBOOT_CAUSE_WATCHDOG reset_reason_dict[33] = ChassisBase.REBOOT_CAUSE_WATCHDOG
@ -74,6 +76,8 @@ class Chassis(ChassisBase):
def __init__(self): def __init__(self):
ChassisBase.__init__(self) ChassisBase.__init__(self)
self.oir_fd = -1
self.epoll = -1
PORT_START = 0 PORT_START = 0
PORT_END = 31 PORT_END = 31
PORTS_IN_BLOCK = (PORT_END + 1) PORTS_IN_BLOCK = (PORT_END + 1)
@ -112,6 +116,12 @@ class Chassis(ChassisBase):
component = Component(i) component = Component(i)
self._component_list.append(component) self._component_list.append(component)
def __del__(self):
if self.oir_fd != -1:
self.epoll.unregister(self.oir_fd.fileno())
self.epoll.close()
self.oir_fd.close()
def _get_pmc_register(self, reg_name): def _get_pmc_register(self, reg_name):
# On successful read, returns the value read from given # On successful read, returns the value read from given
# reg_name and on failure returns 'ERR' # reg_name and on failure returns 'ERR'
@ -131,6 +141,24 @@ class Chassis(ChassisBase):
rv = rv.lstrip(" ") rv = rv.lstrip(" ")
return rv return rv
def _get_register(self, reg_file):
# On successful read, returns the value read from given
# reg_name and on failure returns 'ERR'
rv = 'ERR'
if (not os.path.isfile(reg_file)):
return rv
try:
with open(reg_file, 'r') as fd:
rv = fd.read()
except Exception as error:
rv = 'ERR'
rv = rv.rstrip('\r\n')
rv = rv.lstrip(" ")
return rv
def get_name(self): def get_name(self):
""" """
Retrieves the name of the chassis Retrieves the name of the chassis
@ -252,3 +280,132 @@ class Chassis(ChassisBase):
return (self.reset_reason_dict[reset_reason], None) return (self.reset_reason_dict[reset_reason], None)
return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER, "Invalid Reason") return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER, "Invalid Reason")
def _check_interrupts(self, port_dict):
is_port_dict_updated = False
cpld2_abs_int = self._get_register(
"/sys/class/i2c-adapter/i2c-14/14-003e/qsfp_abs_int")
cpld2_abs_sta = self._get_register(
"/sys/class/i2c-adapter/i2c-14/14-003e/qsfp_abs_sta")
cpld3_abs_int = self._get_register(
"/sys/class/i2c-adapter/i2c-15/15-003e/qsfp_abs_int")
cpld3_abs_sta = self._get_register(
"/sys/class/i2c-adapter/i2c-15/15-003e/qsfp_abs_sta")
cpld4_abs_int = self._get_register(
"/sys/class/i2c-adapter/i2c-16/16-003e/qsfp_abs_int")
cpld4_abs_sta = self._get_register(
"/sys/class/i2c-adapter/i2c-16/16-003e/qsfp_abs_sta")
if (cpld2_abs_int == 'ERR' or cpld2_abs_sta == 'ERR' or
cpld3_abs_int == 'ERR' or cpld3_abs_sta == 'ERR' or
cpld4_abs_int == 'ERR' or cpld4_abs_sta == 'ERR'):
return False, is_port_dict_updated
cpld2_abs_int = int(cpld2_abs_int, 16)
cpld2_abs_sta = int(cpld2_abs_sta, 16)
cpld3_abs_int = int(cpld3_abs_int, 16)
cpld3_abs_sta = int(cpld3_abs_sta, 16)
cpld4_abs_int = int(cpld4_abs_int, 16)
cpld4_abs_sta = int(cpld4_abs_sta, 16)
# Make it contiguous (discard reserved bits)
interrupt_reg = (cpld2_abs_int & 0xfff) |\
((cpld3_abs_int & 0x3ff) << 12) |\
((cpld4_abs_int & 0x3ff) << 22)
status_reg = (cpld2_abs_sta & 0xfff) |\
((cpld3_abs_sta & 0x3ff) << 12) |\
((cpld4_abs_sta & 0x3ff) << 22)
for port in range(self.get_num_sfps()):
if interrupt_reg & (1 << port):
# update only if atleast one port has generated
# interrupt
is_port_dict_updated = True
if status_reg & (1 << port):
# status reg 1 => optics is removed
port_dict[port+1] = '0'
else:
# status reg 0 => optics is inserted
port_dict[port+1] = '1'
return True, is_port_dict_updated
def get_change_event(self, timeout=0):
"""
Returns a nested dictionary containing all devices which have
experienced a change at chassis level
Args:
timeout: Timeout in milliseconds (optional). If timeout == 0,
this method will block until a change is detected.
Returns:
(bool, dict):
- True if call successful, False if not;
- A nested dictionary where key is a device type,
value is a dictionary with key:value pairs in the
format of {'device_id':'device_event'},
where device_id is the device ID for this device and
device_event,
status='1' represents device inserted,
status='0' represents device removed.
Ex. {'fan':{'0':'0', '2':'1'}, 'sfp':{'11':'0'}}
indicates that fan 0 has been removed, fan 2
has been inserted and sfp 11 has been removed.
"""
port_dict = {}
ret_dict = {'sfp': port_dict}
if timeout != 0:
timeout = timeout / 1000
try:
# We get notified when there is an SCI interrupt from GPIO SUS6
# Open the sysfs file and register the epoll object
self.oir_fd = open(self.OIR_FD_PATH, "r")
if self.oir_fd != -1:
# Do a dummy read before epoll register
self.oir_fd.read()
self.epoll = select.epoll()
self.epoll.register(self.oir_fd.fileno(),
select.EPOLLIN & select.EPOLLET)
else:
return False, ret_dict
# Check for missed interrupts by invoking self.check_interrupts
# which will update the port_dict.
while True:
interrupt_count_start = self._get_register(self.OIR_FD_PATH)
retval, is_port_dict_updated = \
self._check_interrupts(port_dict)
if (retval is True) and (is_port_dict_updated is True):
return True, ret_dict
interrupt_count_end = self._get_register(self.OIR_FD_PATH)
if (interrupt_count_start == 'ERR' or
interrupt_count_end == 'ERR'):
break
# check_interrupts() itself may take upto 100s of msecs.
# We detect a missed interrupt based on the count
if interrupt_count_start == interrupt_count_end:
break
# Block until an xcvr is inserted or removed with timeout = -1
events = self.epoll.poll(timeout=timeout if timeout != 0 else -1)
if events:
# check interrupts and return the port_dict
retval, is_port_dict_updated = \
self._check_interrupts(port_dict)
return retval, ret_dict
except Exception:
return False, ret_dict
finally:
if self.oir_fd != -1:
self.epoll.unregister(self.oir_fd.fileno())
self.epoll.close()
self.oir_fd.close()
self.oir_fd = -1
self.epoll = -1