Dell S6100: Modify transceiver change event from interrupt to poll mode (#7309)

#### Why I did it

- xcvrd crash was seen in latest 201811 images.
- For Dell S6100,API 2.0 uses poll mode while 1.0 was still using interrupt mode.

#### How I did it

- Modified get_transceiver_change_event in 1.0 to poll mode.
This commit is contained in:
Aravind Mani 2021-04-14 12:12:30 -07:00 committed by GitHub
parent f5703825e3
commit 95fecafdf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 129 deletions

View File

@ -16,7 +16,6 @@ try:
import time import time
import os import os
import logging import logging
import select
from sonic_sfp.sfputilbase import SfpUtilBase from sonic_sfp.sfputilbase import SfpUtilBase
except ImportError as e: except ImportError as e:
raise ImportError("%s - required module not found" % str(e)) raise ImportError("%s - required module not found" % str(e))
@ -28,7 +27,7 @@ class SfpUtil(SfpUtilBase):
PORT_START = 0 PORT_START = 0
PORT_END = 63 PORT_END = 63
PORTS_IN_BLOCK = 64 PORTS_IN_BLOCK = 64
POLL_INTERVAL = 1
_port_to_eeprom_mapping = {} _port_to_eeprom_mapping = {}
_port_to_i2c_mapping = { _port_to_i2c_mapping = {
0: [6, 18, 34, 50, 66], 0: [6, 18, 34, 50, 66],
@ -110,7 +109,6 @@ class SfpUtil(SfpUtilBase):
IOM_4_PORT_END = 63 IOM_4_PORT_END = 63
BASE_VAL_PATH = "/sys/class/i2c-adapter/i2c-{0}/{0}-003e/" BASE_VAL_PATH = "/sys/class/i2c-adapter/i2c-{0}/{0}-003e/"
OIR_FD_PATH = "/sys/devices/platform/dell_ich.0/sci_int_gpio_sus6"
oir_fd = -1 oir_fd = -1
epoll = -1 epoll = -1
@ -235,8 +233,8 @@ class SfpUtil(SfpUtilBase):
elif (not assigned): elif (not assigned):
self.port_to_eeprom_mapping[port_num] =\ self.port_to_eeprom_mapping[port_num] =\
"No IOM" "No IOM"
SfpUtilBase.__init__(self) SfpUtilBase.__init__(self)
self._transceiver_presence = self._get_transceiver_presence()
def __del__(self): def __del__(self):
if self.oir_fd != -1: if self.oir_fd != -1:
@ -482,7 +480,7 @@ class SfpUtil(SfpUtilBase):
def get_register(self, reg_file): def get_register(self, reg_file):
retval = 'ERR' retval = 'ERR'
if (not os.path.isfile(reg_file)): if not os.path.isfile(reg_file):
print(reg_file + ' not found !') print(reg_file + ' not found !')
return retval return retval
@ -496,141 +494,88 @@ class SfpUtil(SfpUtilBase):
retval = retval.lstrip(" ") retval = retval.lstrip(" ")
return retval return retval
def check_interrupts(self, port_dict): def _get_transceiver_presence(self):
retval = 0
is_port_dict_updated = False
# Read the QSFP ABS interrupt & status registers cpld2_modprs = self.get_register(
cpld2_abs_int = self.get_register( "/sys/class/i2c-adapter/i2c-14/14-003e/qsfp_modprs")
"/sys/class/i2c-adapter/i2c-14/14-003e/qsfp_abs_int") cpld3_modprs = self.get_register(
cpld2_abs_sta = self.get_register( "/sys/class/i2c-adapter/i2c-15/15-003e/qsfp_modprs")
"/sys/class/i2c-adapter/i2c-14/14-003e/qsfp_abs_sta") cpld4_modprs = self.get_register(
cpld3_abs_int = self.get_register( "/sys/class/i2c-adapter/i2c-16/16-003e/qsfp_modprs")
"/sys/class/i2c-adapter/i2c-15/15-003e/qsfp_abs_int") cpld5_modprs = self.get_register(
cpld3_abs_sta = self.get_register( "/sys/class/i2c-adapter/i2c-17/17-003e/qsfp_modprs")
"/sys/class/i2c-adapter/i2c-15/15-003e/qsfp_abs_sta")
cpld4_abs_int = self.get_register(
"/sys/class/i2c-adapter/i2c-16/16-003e/qsfp_abs_int")
cpld4_abs_sta = self.get_register(
"/sys/class/i2c-adapter/i2c-16/16-003e/qsfp_abs_sta")
cpld5_abs_int = self.get_register(
"/sys/class/i2c-adapter/i2c-17/17-003e/qsfp_abs_int")
cpld5_abs_sta = self.get_register(
"/sys/class/i2c-adapter/i2c-17/17-003e/qsfp_abs_sta")
if (cpld2_abs_int == 'ERR' or cpld2_abs_sta == 'ERR' or # If IOM is not present, register read will fail.
cpld3_abs_int == 'ERR' or cpld3_abs_sta == 'ERR' or
cpld4_abs_int == 'ERR' or cpld4_abs_sta == 'ERR' or
cpld5_abs_int == 'ERR' or cpld5_abs_sta == 'ERR'):
return -1
# If IOM is not present, interrupt will return 'read error'
# Handle the scenario gracefully # Handle the scenario gracefully
if (cpld2_abs_int == 'read error'): if cpld2_modprs == 'read error' or cpld2_modprs == 'ERR':
cpld2_abs_int = "0x0" cpld2_modprs = '0x0'
cpld2_abs_sta = "0x0" if cpld3_modprs == 'read error' or cpld3_modprs == 'ERR':
if (cpld3_abs_int == 'read error'): cpld3_modprs = '0x0'
cpld3_abs_int = "0x0" if cpld4_modprs == 'read error' or cpld4_modprs == 'ERR':
cpld3_abs_sta = "0x0" cpld4_modprs = '0x0'
if (cpld4_abs_int == 'read error'): if cpld5_modprs == 'read error' or cpld5_modprs == 'ERR':
cpld4_abs_int = "0x0" cpld5_modprs = '0x0'
cpld4_abs_sta = "0x0"
if (cpld5_abs_int == 'read error'):
cpld5_abs_int = "0x0"
cpld5_abs_sta = "0x0"
cpld2_abs_int = int(cpld2_abs_int, 16)
cpld2_abs_sta = int(cpld2_abs_sta, 16)
cpld3_abs_int = int(cpld3_abs_int, 16)
cpld3_abs_sta = int(cpld3_abs_sta, 16)
cpld4_abs_int = int(cpld4_abs_int, 16)
cpld4_abs_sta = int(cpld4_abs_sta, 16)
cpld5_abs_int = int(cpld5_abs_int, 16)
cpld5_abs_sta = int(cpld5_abs_sta, 16)
# Make it contiguous # Make it contiguous
interrupt_reg = (cpld2_abs_int & 0xffff) | \ transceiver_presence = (int(cpld2_modprs, 16) & 0xffff) |\
((cpld4_abs_int & 0xffff) << 16) | \ ((int(cpld4_modprs, 16) & 0xffff) << 16) |\
((cpld3_abs_int & 0xffff) << 32) | \ ((int(cpld3_modprs, 16) & 0xffff) << 32) |\
((cpld5_abs_int & 0xffff) << 48) ((int(cpld5_modprs, 16) & 0xffff) << 48)
status_reg = (cpld2_abs_sta & 0xffff) | \
((cpld4_abs_sta & 0xffff) << 16) | \
((cpld3_abs_sta & 0xffff) << 32) | \
((cpld5_abs_sta & 0xffff) << 48)
port = self.port_start return transceiver_presence
while port <= self.port_end:
if interrupt_reg & (1 << port):
# update only if atleast one port has generated
# interrupt
is_port_dict_updated = True
if status_reg & (1 << port):
# status reg 1 => optics is removed
port_dict[port] = '0'
else:
# status reg 0 => optics is inserted
port_dict[port] = '1'
port += 1
return retval, is_port_dict_updated
def get_transceiver_change_event(self, timeout=0): def get_transceiver_change_event(self, timeout=0):
"""
Returns a dictionary containing optics insertion/removal status.
Args:
timeout: Timeout in milliseconds (optional). If timeout == 0,
this method will block until a change is detected.
Returns:
(bool, dict):
- True if call successful, False if not;
"""
port_dict = {} port_dict = {}
try: forever = False
# We get notified when there is an SCI interrupt from GPIO SUS6
# Open the sysfs file and register the epoll object if timeout == 0:
self.oir_fd = open(self.OIR_FD_PATH, "r") forever = True
if self.oir_fd != -1: elif timeout > 0:
# Do a dummy read before epoll register timeout = timeout / float(1000) # Convert to secs
self.oir_fd.read() else:
self.epoll = select.epoll() return False, port_dict # Incorrect timeout
self.epoll.register(self.oir_fd.fileno(),
select.EPOLLIN & select.EPOLLET) while True:
if forever:
timer = self.POLL_INTERVAL
else: else:
print("get_transceiver_change_event : unable to create fd") timer = min(timeout, self.POLL_INTERVAL)
return False, {} start_time = time.time()
# Check for missed interrupts by invoking self.check_interrupts time.sleep(timer)
# which will update the port_dict. cur_presence = self._get_transceiver_presence()
while True:
interrupt_count_start = self.get_register(self.OIR_FD_PATH)
retval, is_port_dict_updated = \ # Update dict only if a change has been detected
self.check_interrupts(port_dict) if cur_presence != self._transceiver_presence:
if ((retval == 0) and (is_port_dict_updated is True)): changed_ports = self._transceiver_presence ^ cur_presence
return True, port_dict for port in range(self.port_end):
# Mask off the bit corresponding to particular port
mask = 1 << port
if changed_ports & mask:
# qsfp_modprs 1 => optics is removed
if cur_presence & mask:
port_dict[port] = '0'
# qsfp_modprs 0 => optics is inserted
else:
port_dict[port] = '1'
interrupt_count_end = self.get_register(self.OIR_FD_PATH) # Update current presence
self._transceiver_presence = cur_presence
break
if (interrupt_count_start == 'ERR' or if not forever:
interrupt_count_end == 'ERR'): elapsed_time = time.time() - start_time
print("get_transceiver_change_event : \ timeout = round(timeout - elapsed_time, 3)
unable to retrive interrupt count") if timeout <= 0:
break break
# check_interrupts() itself may take upto 100s of msecs. return True, port_dict
# We detect a missed interrupt based on the count
if interrupt_count_start == interrupt_count_end:
break
# Block until an xcvr is inserted or removed with timeout = -1
events = self.epoll.poll(
timeout=timeout if timeout != 0 else -1)
if events:
# check interrupts and return the port_dict
retval, is_port_dict_updated = \
self.check_interrupts(port_dict)
if (retval != 0):
return False, {}
return True, port_dict
except:
return False, {}
finally:
if self.oir_fd != -1:
self.epoll.unregister(self.oir_fd.fileno())
self.epoll.close()
self.oir_fd.close()
self.oir_fd = -1
self.epoll = -1
return False, {}

View File

@ -28,7 +28,6 @@ if [[ "$1" == "init" ]]; then
pericom="/sys/bus/pci/devices/0000:08:00.0" pericom="/sys/bus/pci/devices/0000:08:00.0"
modprobe i2c-dev modprobe i2c-dev
modprobe i2c-mux-pca954x force_deselect_on_exit=1 modprobe i2c-mux-pca954x force_deselect_on_exit=1
modprobe dell_ich
modprobe dell_s6100_iom_cpld modprobe dell_s6100_iom_cpld
modprobe dell_s6100_lpc modprobe dell_s6100_lpc
modprobe nvram modprobe nvram
@ -60,7 +59,6 @@ elif [[ "$1" == "deinit" ]]; then
modprobe -r dell_s6100_iom_cpld modprobe -r dell_s6100_iom_cpld
modprobe -r i2c-mux-pca954x modprobe -r i2c-mux-pca954x
modprobe -r i2c-dev modprobe -r i2c-dev
modprobe -r dell_ich
modprobe -r nvram modprobe -r nvram
remove_python_api_package remove_python_api_package
else else