[Mellanox] Fix race condition while creating SFP (#17544)
Why I did it Fix issue xcvrd crashes due to cannot import name 'initialize_sfp_thermal': Nov 27 09:47:16.388639 sonic ERR pmon#xcvrd: Exception occured at CmisManagerTask thread due to ImportError("cannot import name 'initialize_sfp_thermal' from partially initialized module 'sonic_platform.thermal' (most likely due to a circular import) (/usr/local/lib/python3.9/dist-packages/sonic_platform/thermal.py)") Nov 27 09:47:16.392544 sonic ERR pmon#xcvrd: Traceback (most recent call last): Nov 27 09:47:16.392643 sonic ERR pmon#xcvrd: File "/usr/local/lib/python3.9/dist-packages/xcvrd/xcvrd.py", line 1518, in run Nov 27 09:47:16.392757 sonic ERR pmon#xcvrd: self.task_worker() Nov 27 09:47:16.392757 sonic ERR pmon#xcvrd: File "/usr/local/lib/python3.9/dist-packages/xcvrd/xcvrd.py", line 1240, in task_worker Nov 27 09:47:16.392757 sonic ERR pmon#xcvrd: sfp = platform_chassis.get_sfp(pport) Nov 27 09:47:16.392793 sonic ERR pmon#xcvrd: File "/usr/local/lib/python3.9/dist-packages/sonic_platform/chassis.py", line 346, in get_sfp Nov 27 09:47:16.392830 sonic ERR pmon#xcvrd: self.initialize_single_sfp(index) Nov 27 09:47:16.392830 sonic ERR pmon#xcvrd: File "/usr/local/lib/python3.9/dist-packages/sonic_platform/chassis.py", line 288, in initialize_single_sfp Nov 27 09:47:16.392830 sonic ERR pmon#xcvrd: self._sfp_list[index] = sfp_module.SFP(index) Nov 27 09:47:16.392830 sonic ERR pmon#xcvrd: File "/usr/local/lib/python3.9/dist-packages/sonic_platform/sfp.py", line 272, in __init__ Nov 27 09:47:16.392866 sonic ERR pmon#xcvrd: from .thermal import initialize_sfp_thermal Nov 27 09:47:16.392918 sonic ERR pmon#xcvrd: ImportError: cannot import name 'initialize_sfp_thermal' from partially initialized module 'sonic_platform.thermal' (most likely due to a circular import) (/usr/local/lib/python3.9/dist-packages/sonic_platform/thermal.py) Nov 27 09:47:16.393103 sonic ERR pmon#xcvrd: Xcvrd: exception found at child thread CmisManagerTask due to ImportError("cannot import name 'initialize_sfp_thermal' from partially initialized module 'sonic_platform.thermal' (most likely due to a circular import) (/usr/local/lib/python3.9/dist-packages/sonic_platform/thermal.py)") Nov 27 09:47:16.393103 sonic ERR pmon#xcvrd: Exiting main loop as child thread raised exception! Work item tracking Microsoft ADO (number only): How I did it Add lock for creating SFP object How to verify it UNIT TEST Manual Test
This commit is contained in:
parent
7f03584d9d
commit
9240bcb5b1
@ -32,6 +32,7 @@ try:
|
||||
from .device_data import DeviceDataManager
|
||||
import re
|
||||
import time
|
||||
import threading
|
||||
except ImportError as e:
|
||||
raise ImportError (str(e) + "- required module not found")
|
||||
|
||||
@ -119,6 +120,7 @@ class Chassis(ChassisBase):
|
||||
self.reboot_cause_initialized = False
|
||||
|
||||
self.sfp_module = None
|
||||
self.sfp_lock = threading.Lock()
|
||||
|
||||
# Build the RJ45 port list from platform.json and hwsku.json
|
||||
self._RJ45_port_inited = False
|
||||
@ -266,38 +268,49 @@ class Chassis(ChassisBase):
|
||||
|
||||
def initialize_single_sfp(self, index):
|
||||
sfp_count = self.get_num_sfps()
|
||||
# Use double checked locking mechanism for:
|
||||
# 1. protect shared resource self._sfp_list
|
||||
# 2. performance (avoid locking every time)
|
||||
if index < sfp_count:
|
||||
if not self._sfp_list:
|
||||
self._sfp_list = [None] * sfp_count
|
||||
if not self._sfp_list or not self._sfp_list[index]:
|
||||
with self.sfp_lock:
|
||||
if not self._sfp_list:
|
||||
self._sfp_list = [None] * sfp_count
|
||||
|
||||
if not self._sfp_list[index]:
|
||||
sfp_module = self._import_sfp_module()
|
||||
if self.RJ45_port_list and index in self.RJ45_port_list:
|
||||
self._sfp_list[index] = sfp_module.RJ45Port(index)
|
||||
else:
|
||||
self._sfp_list[index] = sfp_module.SFP(index)
|
||||
self.sfp_initialized_count += 1
|
||||
if not self._sfp_list[index]:
|
||||
sfp_module = self._import_sfp_module()
|
||||
if self.RJ45_port_list and index in self.RJ45_port_list:
|
||||
self._sfp_list[index] = sfp_module.RJ45Port(index)
|
||||
else:
|
||||
self._sfp_list[index] = sfp_module.SFP(index)
|
||||
self.sfp_initialized_count += 1
|
||||
|
||||
def initialize_sfp(self):
|
||||
if not self._sfp_list:
|
||||
sfp_module = self._import_sfp_module()
|
||||
sfp_count = self.get_num_sfps()
|
||||
for index in range(sfp_count):
|
||||
if self.RJ45_port_list and index in self.RJ45_port_list:
|
||||
sfp_object = sfp_module.RJ45Port(index)
|
||||
else:
|
||||
sfp_object = sfp_module.SFP(index)
|
||||
self._sfp_list.append(sfp_object)
|
||||
self.sfp_initialized_count = sfp_count
|
||||
elif self.sfp_initialized_count != len(self._sfp_list):
|
||||
sfp_module = self._import_sfp_module()
|
||||
for index in range(len(self._sfp_list)):
|
||||
if self._sfp_list[index] is None:
|
||||
if self.RJ45_port_list and index in self.RJ45_port_list:
|
||||
self._sfp_list[index] = sfp_module.RJ45Port(index)
|
||||
else:
|
||||
self._sfp_list[index] = sfp_module.SFP(index)
|
||||
self.sfp_initialized_count = len(self._sfp_list)
|
||||
sfp_count = self.get_num_sfps()
|
||||
# Use double checked locking mechanism for:
|
||||
# 1. protect shared resource self._sfp_list
|
||||
# 2. performance (avoid locking every time)
|
||||
if sfp_count != self.sfp_initialized_count:
|
||||
with self.sfp_lock:
|
||||
if sfp_count != self.sfp_initialized_count:
|
||||
if not self._sfp_list:
|
||||
sfp_module = self._import_sfp_module()
|
||||
for index in range(sfp_count):
|
||||
if self.RJ45_port_list and index in self.RJ45_port_list:
|
||||
sfp_object = sfp_module.RJ45Port(index)
|
||||
else:
|
||||
sfp_object = sfp_module.SFP(index)
|
||||
self._sfp_list.append(sfp_object)
|
||||
self.sfp_initialized_count = sfp_count
|
||||
elif self.sfp_initialized_count != len(self._sfp_list):
|
||||
sfp_module = self._import_sfp_module()
|
||||
for index in range(len(self._sfp_list)):
|
||||
if self._sfp_list[index] is None:
|
||||
if self.RJ45_port_list and index in self.RJ45_port_list:
|
||||
self._sfp_list[index] = sfp_module.RJ45Port(index)
|
||||
else:
|
||||
self._sfp_list[index] = sfp_module.SFP(index)
|
||||
self.sfp_initialized_count = len(self._sfp_list)
|
||||
|
||||
def get_num_sfps(self):
|
||||
"""
|
||||
|
@ -16,8 +16,10 @@
|
||||
#
|
||||
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import subprocess
|
||||
import threading
|
||||
|
||||
from mock import MagicMock
|
||||
if sys.version_info.major == 3:
|
||||
@ -167,6 +169,31 @@ class TestChassis:
|
||||
assert len(sfp_list) == 3
|
||||
assert chassis.sfp_initialized_count == 3
|
||||
|
||||
def test_create_sfp_in_multi_thread(self):
|
||||
DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=3)
|
||||
|
||||
iteration_num = 100
|
||||
while iteration_num > 0:
|
||||
chassis = Chassis()
|
||||
assert chassis.sfp_initialized_count == 0
|
||||
t1 = threading.Thread(target=lambda: chassis.get_sfp(1))
|
||||
t2 = threading.Thread(target=lambda: chassis.get_sfp(1))
|
||||
t3 = threading.Thread(target=lambda: chassis.get_all_sfps())
|
||||
t4 = threading.Thread(target=lambda: chassis.get_all_sfps())
|
||||
threads = [t1, t2, t3, t4]
|
||||
random.shuffle(threads)
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
assert len(chassis.get_all_sfps()) == 3
|
||||
assert chassis.sfp_initialized_count == 3
|
||||
for index, s in enumerate(chassis.get_all_sfps()):
|
||||
assert s.sdk_index == index
|
||||
iteration_num -= 1
|
||||
|
||||
|
||||
|
||||
@mock.patch('sonic_platform.sfp_event.sfp_event.check_sfp_status', MagicMock())
|
||||
@mock.patch('sonic_platform.sfp_event.sfp_event.__init__', MagicMock(return_value=None))
|
||||
@mock.patch('sonic_platform.sfp_event.sfp_event.initialize', MagicMock())
|
||||
|
Loading…
Reference in New Issue
Block a user