[Mellanox] update asic and module temperature in a thread for CMIS management (#16955)
- Why I did it When module is totally under software control, driver cannot get module temperature/temperature threshold from firmware. In this case, sonic needs to get temperature/temperature threshold from EEPROM. In this PR, a thread thermal updater is created to update module temperature/temperature threshold while software control is enabled. - How I did it Query ASIC temperature from SDK sysfs and update hw-management-tc periodically Query Module temperature from EEPROM and update hw-management-tc periodically - How to verify it Manual test New Unit tests
This commit is contained in:
parent
0d62cf0e92
commit
1b84f3daa5
@ -82,6 +82,8 @@ class Chassis(ChassisBase):
|
|||||||
# System UID LED
|
# System UID LED
|
||||||
_led_uid = None
|
_led_uid = None
|
||||||
|
|
||||||
|
chassis_instance = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Chassis, self).__init__()
|
super(Chassis, self).__init__()
|
||||||
|
|
||||||
@ -127,6 +129,8 @@ class Chassis(ChassisBase):
|
|||||||
self._RJ45_port_inited = False
|
self._RJ45_port_inited = False
|
||||||
self._RJ45_port_list = None
|
self._RJ45_port_list = None
|
||||||
|
|
||||||
|
Chassis.chassis_instance = self
|
||||||
|
|
||||||
self.modules_mgmt_thread = threading.Thread()
|
self.modules_mgmt_thread = threading.Thread()
|
||||||
self.modules_changes_queue = queue.Queue()
|
self.modules_changes_queue = queue.Queue()
|
||||||
self.modules_mgmt_task_stopping_event = threading.Event()
|
self.modules_mgmt_task_stopping_event = threading.Event()
|
||||||
|
@ -31,6 +31,8 @@ try:
|
|||||||
from . import utils
|
from . import utils
|
||||||
from .device_data import DeviceDataManager
|
from .device_data import DeviceDataManager
|
||||||
from sonic_platform_base.sonic_xcvr.sfp_optoe_base import SfpOptoeBase
|
from sonic_platform_base.sonic_xcvr.sfp_optoe_base import SfpOptoeBase
|
||||||
|
from sonic_platform_base.sonic_xcvr.fields import consts
|
||||||
|
from sonic_platform_base.sonic_xcvr.api.public import sff8636, sff8436
|
||||||
|
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError (str(e) + "- required module not found")
|
raise ImportError (str(e) + "- required module not found")
|
||||||
@ -155,6 +157,10 @@ SFP_TYPE_SFF8636 = 'sff8636'
|
|||||||
# SFP stderr
|
# SFP stderr
|
||||||
SFP_EEPROM_NOT_AVAILABLE = 'Input/output error'
|
SFP_EEPROM_NOT_AVAILABLE = 'Input/output error'
|
||||||
|
|
||||||
|
SFP_DEFAULT_TEMP_WARNNING_THRESHOLD = 70.0
|
||||||
|
SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD = 80.0
|
||||||
|
SFP_TEMPERATURE_SCALE = 8.0
|
||||||
|
|
||||||
# SFP EEPROM limited bytes
|
# SFP EEPROM limited bytes
|
||||||
limited_eeprom = {
|
limited_eeprom = {
|
||||||
SFP_TYPE_CMIS: {
|
SFP_TYPE_CMIS: {
|
||||||
@ -264,7 +270,7 @@ class SFP(NvidiaSFPCommon):
|
|||||||
|
|
||||||
if slot_id == 0: # For non-modular chassis
|
if slot_id == 0: # For non-modular chassis
|
||||||
from .thermal import initialize_sfp_thermal
|
from .thermal import initialize_sfp_thermal
|
||||||
self._thermal_list = initialize_sfp_thermal(sfp_index)
|
self._thermal_list = initialize_sfp_thermal(self)
|
||||||
else: # For modular chassis
|
else: # For modular chassis
|
||||||
# (slot_id % MAX_LC_CONUNT - 1) * MAX_PORT_COUNT + (sfp_index + 1) * (MAX_PORT_COUNT / LC_PORT_COUNT)
|
# (slot_id % MAX_LC_CONUNT - 1) * MAX_PORT_COUNT + (sfp_index + 1) * (MAX_PORT_COUNT / LC_PORT_COUNT)
|
||||||
max_linecard_count = DeviceDataManager.get_linecard_count()
|
max_linecard_count = DeviceDataManager.get_linecard_count()
|
||||||
@ -822,6 +828,77 @@ class SFP(NvidiaSFPCommon):
|
|||||||
api = self.get_xcvr_api()
|
api = self.get_xcvr_api()
|
||||||
return [False] * api.NUM_CHANNELS if api else None
|
return [False] * api.NUM_CHANNELS if api else None
|
||||||
|
|
||||||
|
def get_temperature(self):
|
||||||
|
try:
|
||||||
|
if not self.is_sw_control():
|
||||||
|
temp_file = f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/input'
|
||||||
|
if not os.path.exists(temp_file):
|
||||||
|
logger.log_error(f'Failed to read from file {temp_file} - not exists')
|
||||||
|
return None
|
||||||
|
temperature = utils.read_int_from_file(temp_file,
|
||||||
|
log_func=None)
|
||||||
|
return temperature / SFP_TEMPERATURE_SCALE if temperature is not None else None
|
||||||
|
except:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
self.reinit()
|
||||||
|
temperature = super().get_temperature()
|
||||||
|
return temperature if temperature is not None else None
|
||||||
|
|
||||||
|
def get_temperature_warning_threashold(self):
|
||||||
|
"""Get temperature warning threshold
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: temperature warning threshold
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not self.is_sw_control():
|
||||||
|
emergency = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/emergency',
|
||||||
|
log_func=None,
|
||||||
|
default=None)
|
||||||
|
return emergency / SFP_TEMPERATURE_SCALE if emergency is not None else SFP_DEFAULT_TEMP_WARNNING_THRESHOLD
|
||||||
|
except:
|
||||||
|
return SFP_DEFAULT_TEMP_WARNNING_THRESHOLD
|
||||||
|
|
||||||
|
thresh = self._get_temperature_threshold()
|
||||||
|
if thresh and consts.TEMP_HIGH_WARNING_FIELD in thresh:
|
||||||
|
return thresh[consts.TEMP_HIGH_WARNING_FIELD]
|
||||||
|
return SFP_DEFAULT_TEMP_WARNNING_THRESHOLD
|
||||||
|
|
||||||
|
def get_temperature_critical_threashold(self):
|
||||||
|
"""Get temperature critical threshold
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: temperature critical threshold
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not self.is_sw_control():
|
||||||
|
critical = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/critical',
|
||||||
|
log_func=None,
|
||||||
|
default=None)
|
||||||
|
return critical / SFP_TEMPERATURE_SCALE if critical is not None else SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD
|
||||||
|
except:
|
||||||
|
return SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD
|
||||||
|
|
||||||
|
thresh = self._get_temperature_threshold()
|
||||||
|
if thresh and consts.TEMP_HIGH_ALARM_FIELD in thresh:
|
||||||
|
return thresh[consts.TEMP_HIGH_ALARM_FIELD]
|
||||||
|
return SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD
|
||||||
|
|
||||||
|
def _get_temperature_threshold(self):
|
||||||
|
self.reinit()
|
||||||
|
api = self.get_xcvr_api()
|
||||||
|
if not api:
|
||||||
|
return None
|
||||||
|
|
||||||
|
thresh_support = api.get_transceiver_thresholds_support()
|
||||||
|
if thresh_support:
|
||||||
|
if isinstance(api, sff8636.Sff8636Api) or isinstance(api, sff8436.Sff8436Api):
|
||||||
|
return api.xcvr_eeprom.read(consts.TEMP_THRESHOLDS_FIELD)
|
||||||
|
return api.xcvr_eeprom.read(consts.THRESHOLDS_FIELD)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
def get_xcvr_api(self):
|
def get_xcvr_api(self):
|
||||||
"""
|
"""
|
||||||
Retrieves the XcvrApi associated with this SFP
|
Retrieves the XcvrApi associated with this SFP
|
||||||
|
@ -36,6 +36,8 @@ except ImportError as e:
|
|||||||
# Global logger class instance
|
# Global logger class instance
|
||||||
logger = Logger()
|
logger = Logger()
|
||||||
|
|
||||||
|
DEFAULT_TEMP_SCALE = 1000
|
||||||
|
|
||||||
"""
|
"""
|
||||||
The most important information for creating a Thermal object is 3 sysfs files: temperature file, high threshold file and
|
The most important information for creating a Thermal object is 3 sysfs files: temperature file, high threshold file and
|
||||||
high critical threshold file. There is no common naming rule for thermal objects on Nvidia platform. There are two types
|
high critical threshold file. There is no common naming rule for thermal objects on Nvidia platform. There are two types
|
||||||
@ -72,9 +74,11 @@ THERMAL_NAMING_RULE = {
|
|||||||
"chassis thermals": [
|
"chassis thermals": [
|
||||||
{
|
{
|
||||||
"name": "ASIC",
|
"name": "ASIC",
|
||||||
"temperature": "asic",
|
"temperature": "input",
|
||||||
"high_threshold": "asic_temp_emergency",
|
"high_threshold_default": 105,
|
||||||
"high_critical_threshold": "asic_temp_trip_crit"
|
"high_critical_threshold_default": 120,
|
||||||
|
"sysfs_folder": "/sys/module/sx_core/asic0/temperature",
|
||||||
|
"scale": 8
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Ambient Port Side Temp",
|
"name": "Ambient Port Side Temp",
|
||||||
@ -187,8 +191,8 @@ def initialize_psu_thermal(psu_index, presence_cb):
|
|||||||
return [create_indexable_thermal(THERMAL_NAMING_RULE['psu thermals'], psu_index, CHASSIS_THERMAL_SYSFS_FOLDER, 1, presence_cb)]
|
return [create_indexable_thermal(THERMAL_NAMING_RULE['psu thermals'], psu_index, CHASSIS_THERMAL_SYSFS_FOLDER, 1, presence_cb)]
|
||||||
|
|
||||||
|
|
||||||
def initialize_sfp_thermal(sfp_index):
|
def initialize_sfp_thermal(sfp):
|
||||||
return [create_indexable_thermal(THERMAL_NAMING_RULE['sfp thermals'], sfp_index, CHASSIS_THERMAL_SYSFS_FOLDER, 1)]
|
return [ModuleThermal(sfp)]
|
||||||
|
|
||||||
|
|
||||||
def initialize_linecard_thermals(lc_name, lc_index):
|
def initialize_linecard_thermals(lc_name, lc_index):
|
||||||
@ -214,6 +218,7 @@ def initialize_linecard_sfp_thermal(lc_name, lc_index, sfp_index):
|
|||||||
def create_indexable_thermal(rule, index, sysfs_folder, position, presence_cb=None):
|
def create_indexable_thermal(rule, index, sysfs_folder, position, presence_cb=None):
|
||||||
index += rule.get('start_index', 1)
|
index += rule.get('start_index', 1)
|
||||||
name = rule['name'].format(index)
|
name = rule['name'].format(index)
|
||||||
|
sysfs_folder = rule.get('sysfs_folder', sysfs_folder)
|
||||||
temp_file = os.path.join(sysfs_folder, rule['temperature'].format(index))
|
temp_file = os.path.join(sysfs_folder, rule['temperature'].format(index))
|
||||||
_check_thermal_sysfs_existence(temp_file)
|
_check_thermal_sysfs_existence(temp_file)
|
||||||
if 'high_threshold' in rule:
|
if 'high_threshold' in rule:
|
||||||
@ -226,10 +231,13 @@ def create_indexable_thermal(rule, index, sysfs_folder, position, presence_cb=No
|
|||||||
_check_thermal_sysfs_existence(high_crit_th_file)
|
_check_thermal_sysfs_existence(high_crit_th_file)
|
||||||
else:
|
else:
|
||||||
high_crit_th_file = None
|
high_crit_th_file = None
|
||||||
|
high_th_default = rule.get('high_threshold_default')
|
||||||
|
high_crit_th_default = rule.get('high_critical_threshold_default')
|
||||||
|
scale = rule.get('scale', DEFAULT_TEMP_SCALE)
|
||||||
if not presence_cb:
|
if not presence_cb:
|
||||||
return Thermal(name, temp_file, high_th_file, high_crit_th_file, position)
|
return Thermal(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position)
|
||||||
else:
|
else:
|
||||||
return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, position, presence_cb)
|
return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position, presence_cb)
|
||||||
|
|
||||||
|
|
||||||
def create_single_thermal(rule, sysfs_folder, position, presence_cb=None):
|
def create_single_thermal(rule, sysfs_folder, position, presence_cb=None):
|
||||||
@ -243,6 +251,7 @@ def create_single_thermal(rule, sysfs_folder, position, presence_cb=None):
|
|||||||
elif not default_present:
|
elif not default_present:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
sysfs_folder = rule.get('sysfs_folder', sysfs_folder)
|
||||||
temp_file = os.path.join(sysfs_folder, temp_file)
|
temp_file = os.path.join(sysfs_folder, temp_file)
|
||||||
_check_thermal_sysfs_existence(temp_file)
|
_check_thermal_sysfs_existence(temp_file)
|
||||||
if 'high_threshold' in rule:
|
if 'high_threshold' in rule:
|
||||||
@ -255,11 +264,14 @@ def create_single_thermal(rule, sysfs_folder, position, presence_cb=None):
|
|||||||
_check_thermal_sysfs_existence(high_crit_th_file)
|
_check_thermal_sysfs_existence(high_crit_th_file)
|
||||||
else:
|
else:
|
||||||
high_crit_th_file = None
|
high_crit_th_file = None
|
||||||
|
high_th_default = rule.get('high_threshold_default')
|
||||||
|
high_crit_th_default = rule.get('high_critical_threshold_default')
|
||||||
|
scale = rule.get('scale', DEFAULT_TEMP_SCALE)
|
||||||
name = rule['name']
|
name = rule['name']
|
||||||
if not presence_cb:
|
if not presence_cb:
|
||||||
return Thermal(name, temp_file, high_th_file, high_crit_th_file, position)
|
return Thermal(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position)
|
||||||
else:
|
else:
|
||||||
return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, position, presence_cb)
|
return RemovableThermal(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position, presence_cb)
|
||||||
|
|
||||||
|
|
||||||
def _check_thermal_sysfs_existence(file_path):
|
def _check_thermal_sysfs_existence(file_path):
|
||||||
@ -268,7 +280,7 @@ def _check_thermal_sysfs_existence(file_path):
|
|||||||
|
|
||||||
|
|
||||||
class Thermal(ThermalBase):
|
class Thermal(ThermalBase):
|
||||||
def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position):
|
def __init__(self, name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position):
|
||||||
"""
|
"""
|
||||||
index should be a string for category ambient and int for other categories
|
index should be a string for category ambient and int for other categories
|
||||||
"""
|
"""
|
||||||
@ -278,6 +290,9 @@ class Thermal(ThermalBase):
|
|||||||
self.temperature = temp_file
|
self.temperature = temp_file
|
||||||
self.high_threshold = high_th_file
|
self.high_threshold = high_th_file
|
||||||
self.high_critical_threshold = high_crit_th_file
|
self.high_critical_threshold = high_crit_th_file
|
||||||
|
self.high_th_default = high_th_default
|
||||||
|
self.high_crit_th_default = high_crit_th_default
|
||||||
|
self.scale = scale
|
||||||
|
|
||||||
def get_name(self):
|
def get_name(self):
|
||||||
"""
|
"""
|
||||||
@ -297,7 +312,7 @@ class Thermal(ThermalBase):
|
|||||||
of one degree Celsius, e.g. 30.125
|
of one degree Celsius, e.g. 30.125
|
||||||
"""
|
"""
|
||||||
value = utils.read_float_from_file(self.temperature, None, log_func=logger.log_info)
|
value = utils.read_float_from_file(self.temperature, None, log_func=logger.log_info)
|
||||||
return value / 1000.0 if (value is not None and value != 0) else None
|
return value / self.scale if (value is not None and value != 0) else None
|
||||||
|
|
||||||
def get_high_threshold(self):
|
def get_high_threshold(self):
|
||||||
"""
|
"""
|
||||||
@ -308,9 +323,9 @@ class Thermal(ThermalBase):
|
|||||||
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||||
"""
|
"""
|
||||||
if self.high_threshold is None:
|
if self.high_threshold is None:
|
||||||
return None
|
return self.high_th_default
|
||||||
value = utils.read_float_from_file(self.high_threshold, None, log_func=logger.log_info)
|
value = utils.read_float_from_file(self.high_threshold, None, log_func=logger.log_info)
|
||||||
return value / 1000.0 if (value is not None and value != 0) else None
|
return value / self.scale if (value is not None and value != 0) else self.high_th_default
|
||||||
|
|
||||||
def get_high_critical_threshold(self):
|
def get_high_critical_threshold(self):
|
||||||
"""
|
"""
|
||||||
@ -321,9 +336,9 @@ class Thermal(ThermalBase):
|
|||||||
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||||
"""
|
"""
|
||||||
if self.high_critical_threshold is None:
|
if self.high_critical_threshold is None:
|
||||||
return None
|
return self.high_crit_th_default
|
||||||
value = utils.read_float_from_file(self.high_critical_threshold, None, log_func=logger.log_info)
|
value = utils.read_float_from_file(self.high_critical_threshold, None, log_func=logger.log_info)
|
||||||
return value / 1000.0 if (value is not None and value != 0) else None
|
return value / self.scale if (value is not None and value != 0) else self.high_crit_th_default
|
||||||
|
|
||||||
def get_position_in_parent(self):
|
def get_position_in_parent(self):
|
||||||
"""
|
"""
|
||||||
@ -343,8 +358,8 @@ class Thermal(ThermalBase):
|
|||||||
|
|
||||||
|
|
||||||
class RemovableThermal(Thermal):
|
class RemovableThermal(Thermal):
|
||||||
def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position, presence_cb):
|
def __init__(self, name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position, presence_cb):
|
||||||
super(RemovableThermal, self).__init__(name, temp_file, high_th_file, high_crit_th_file, position)
|
super(RemovableThermal, self).__init__(name, temp_file, high_th_file, high_crit_th_file, high_th_default, high_crit_th_default, scale, position)
|
||||||
self.presence_cb = presence_cb
|
self.presence_cb = presence_cb
|
||||||
|
|
||||||
def get_temperature(self):
|
def get_temperature(self):
|
||||||
@ -388,3 +403,68 @@ class RemovableThermal(Thermal):
|
|||||||
logger.log_debug("get_high_critical_threshold for {} failed due to {}".format(self.name, hint))
|
logger.log_debug("get_high_critical_threshold for {} failed due to {}".format(self.name, hint))
|
||||||
return None
|
return None
|
||||||
return super(RemovableThermal, self).get_high_critical_threshold()
|
return super(RemovableThermal, self).get_high_critical_threshold()
|
||||||
|
|
||||||
|
|
||||||
|
class ModuleThermal(ThermalBase):
|
||||||
|
def __init__(self, sfp):
|
||||||
|
"""
|
||||||
|
index should be a string for category ambient and int for other categories
|
||||||
|
"""
|
||||||
|
super(ModuleThermal, self).__init__()
|
||||||
|
self.name = f'xSFP module {sfp.sdk_index + 1} Temp'
|
||||||
|
self.sfp = sfp
|
||||||
|
|
||||||
|
def get_name(self):
|
||||||
|
"""
|
||||||
|
Retrieves the name of the device
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
string: The name of the device
|
||||||
|
"""
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def get_temperature(self):
|
||||||
|
"""
|
||||||
|
Retrieves current temperature reading from thermal
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A float number of current temperature in Celsius up to nearest thousandth
|
||||||
|
of one degree Celsius, e.g. 30.125
|
||||||
|
"""
|
||||||
|
return self.sfp.get_temperature()
|
||||||
|
|
||||||
|
def get_high_threshold(self):
|
||||||
|
"""
|
||||||
|
Retrieves the high threshold temperature of thermal
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A float number, the high threshold temperature of thermal in Celsius
|
||||||
|
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||||
|
"""
|
||||||
|
return self.sfp.get_temperature_warning_threashold()
|
||||||
|
|
||||||
|
def get_high_critical_threshold(self):
|
||||||
|
"""
|
||||||
|
Retrieves the high critical threshold temperature of thermal
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A float number, the high critical threshold temperature of thermal in Celsius
|
||||||
|
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||||
|
"""
|
||||||
|
return self.sfp.get_temperature_critical_threashold()
|
||||||
|
|
||||||
|
def get_position_in_parent(self):
|
||||||
|
"""
|
||||||
|
Retrieves 1-based relative physical position in parent device
|
||||||
|
Returns:
|
||||||
|
integer: The 1-based relative physical position in parent device
|
||||||
|
"""
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def is_replaceable(self):
|
||||||
|
"""
|
||||||
|
Indicate whether this device is replaceable.
|
||||||
|
Returns:
|
||||||
|
bool: True if it is replaceable.
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
@ -15,9 +15,36 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
|
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
|
||||||
|
from . import thermal_updater
|
||||||
|
from .device_data import DeviceDataManager
|
||||||
|
|
||||||
|
|
||||||
class ThermalManager(ThermalManagerBase):
|
class ThermalManager(ThermalManagerBase):
|
||||||
|
thermal_updater_task = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def run_policy(cls, chassis):
|
def run_policy(cls, chassis):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def initialize(cls):
|
||||||
|
"""
|
||||||
|
Initialize thermal manager, including register thermal condition types and thermal action types
|
||||||
|
and any other vendor specific initialization.
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if DeviceDataManager.is_independent_mode():
|
||||||
|
from .chassis import Chassis
|
||||||
|
cls.thermal_updater_task = thermal_updater.ThermalUpdater(Chassis.chassis_instance.get_all_sfps())
|
||||||
|
cls.thermal_updater_task.start()
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def deinitialize(cls):
|
||||||
|
"""
|
||||||
|
Destroy thermal manager, including any vendor specific cleanup. The default behavior of this function
|
||||||
|
is a no-op.
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if DeviceDataManager.is_independent_mode():
|
||||||
|
cls.thermal_updater_task.stop()
|
||||||
|
@ -0,0 +1,213 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
|
||||||
|
# Apache-2.0
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from . import utils
|
||||||
|
from sonic_py_common import logger
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
sys.path.append('/run/hw-management/bin')
|
||||||
|
|
||||||
|
try:
|
||||||
|
import hw_management_independent_mode_update
|
||||||
|
except ImportError:
|
||||||
|
# For unit test only
|
||||||
|
from unittest import mock
|
||||||
|
hw_management_independent_mode_update = mock.MagicMock()
|
||||||
|
hw_management_independent_mode_update.module_data_set_module_counter = mock.MagicMock()
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_asic = mock.MagicMock()
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_module = mock.MagicMock()
|
||||||
|
hw_management_independent_mode_update.thermal_data_clean_asic = mock.MagicMock()
|
||||||
|
hw_management_independent_mode_update.thermal_data_clean_module = mock.MagicMock()
|
||||||
|
|
||||||
|
|
||||||
|
SFP_TEMPERATURE_SCALE = 1000
|
||||||
|
ASIC_TEMPERATURE_SCALE = 125
|
||||||
|
ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD = 105000
|
||||||
|
ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD = 120000
|
||||||
|
|
||||||
|
ERROR_READ_THERMAL_DATA = 254000
|
||||||
|
|
||||||
|
TC_CONFIG_FILE = '/run/hw-management/config/tc_config.json'
|
||||||
|
logger = logger.Logger('thermal-updater')
|
||||||
|
|
||||||
|
|
||||||
|
class ThermalUpdater:
|
||||||
|
def __init__(self, sfp_list):
|
||||||
|
self._sfp_list = sfp_list
|
||||||
|
self._sfp_status = {}
|
||||||
|
self._timer = utils.Timer()
|
||||||
|
|
||||||
|
def load_tc_config(self):
|
||||||
|
asic_poll_interval = 1
|
||||||
|
sfp_poll_interval = 10
|
||||||
|
data = utils.load_json_file(TC_CONFIG_FILE)
|
||||||
|
if not data:
|
||||||
|
logger.log_notice(f'{TC_CONFIG_FILE} does not exist, use default polling interval')
|
||||||
|
|
||||||
|
if data:
|
||||||
|
dev_parameters = data.get('dev_parameters')
|
||||||
|
if dev_parameters is not None:
|
||||||
|
asic_parameter = dev_parameters.get('asic')
|
||||||
|
if asic_parameter is not None:
|
||||||
|
asic_poll_interval_config = asic_parameter.get('poll_time')
|
||||||
|
if asic_poll_interval_config:
|
||||||
|
asic_poll_interval = int(asic_poll_interval_config) / 2
|
||||||
|
module_parameter = dev_parameters.get('module\\d+')
|
||||||
|
if module_parameter is not None:
|
||||||
|
sfp_poll_interval_config = module_parameter.get('poll_time')
|
||||||
|
if sfp_poll_interval_config:
|
||||||
|
sfp_poll_interval = int(sfp_poll_interval_config) / 2
|
||||||
|
|
||||||
|
logger.log_notice(f'ASIC polling interval: {asic_poll_interval}')
|
||||||
|
self._timer.schedule(asic_poll_interval, self.update_asic)
|
||||||
|
logger.log_notice(f'Module polling interval: {sfp_poll_interval}')
|
||||||
|
self._timer.schedule(sfp_poll_interval, self.update_module)
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
self.clean_thermal_data()
|
||||||
|
if not self.wait_all_sfp_ready():
|
||||||
|
logger.log_error('Failed to wait for all SFP ready, will put hw-management-tc to suspend')
|
||||||
|
self.control_tc(True)
|
||||||
|
return
|
||||||
|
self.control_tc(False)
|
||||||
|
self.load_tc_config()
|
||||||
|
self._timer.start()
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
self._timer.stop()
|
||||||
|
self.control_tc(True)
|
||||||
|
|
||||||
|
def control_tc(self, suspend):
|
||||||
|
logger.log_notice(f'Set hw-management-tc to {"suspend" if suspend else "resume"}')
|
||||||
|
utils.write_file('/run/hw-management/config/suspend', 1 if suspend else 0)
|
||||||
|
|
||||||
|
def clean_thermal_data(self):
|
||||||
|
hw_management_independent_mode_update.module_data_set_module_counter(len(self._sfp_list))
|
||||||
|
hw_management_independent_mode_update.thermal_data_clean_asic(0)
|
||||||
|
for sfp in self._sfp_list:
|
||||||
|
hw_management_independent_mode_update.thermal_data_clean_module(
|
||||||
|
0,
|
||||||
|
sfp.sdk_index + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_all_sfp_ready(self):
|
||||||
|
logger.log_notice('Waiting for all SFP modules ready...')
|
||||||
|
max_wait_time = 60
|
||||||
|
ready_set = set()
|
||||||
|
while len(ready_set) != len(self._sfp_list):
|
||||||
|
for sfp in self._sfp_list:
|
||||||
|
try:
|
||||||
|
sfp.is_sw_control()
|
||||||
|
ready_set.add(sfp)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
max_wait_time -= 1
|
||||||
|
if max_wait_time == 0:
|
||||||
|
return False
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
logger.log_notice('All SFP modules are ready')
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_asic_temp(self):
|
||||||
|
temperature = utils.read_int_from_file('/sys/module/sx_core/asic0/temperature/input', default=None)
|
||||||
|
return temperature * ASIC_TEMPERATURE_SCALE if temperature is not None else None
|
||||||
|
|
||||||
|
def get_asic_temp_warning_threashold(self):
|
||||||
|
emergency = utils.read_int_from_file('/sys/module/sx_core/asic0/temperature/emergency', default=None, log_func=None)
|
||||||
|
return emergency * ASIC_TEMPERATURE_SCALE if emergency is not None else ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD
|
||||||
|
|
||||||
|
def get_asic_temp_critical_threashold(self):
|
||||||
|
critical = utils.read_int_from_file('/sys/module/sx_core/asic0/temperature/critical', default=None, log_func=None)
|
||||||
|
return critical * ASIC_TEMPERATURE_SCALE if critical is not None else ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD
|
||||||
|
|
||||||
|
def update_single_module(self, sfp):
|
||||||
|
try:
|
||||||
|
presence = sfp.get_presence()
|
||||||
|
pre_presence = self._sfp_status.get(sfp.sdk_index)
|
||||||
|
if presence:
|
||||||
|
temperature = sfp.get_temperature()
|
||||||
|
if temperature == 0:
|
||||||
|
warning_thresh = 0
|
||||||
|
critical_thresh = 0
|
||||||
|
fault = 0
|
||||||
|
else:
|
||||||
|
warning_thresh = sfp.get_temperature_warning_threashold()
|
||||||
|
critical_thresh = sfp.get_temperature_critical_threashold()
|
||||||
|
fault = ERROR_READ_THERMAL_DATA if (temperature is None or warning_thresh is None or critical_thresh is None) else 0
|
||||||
|
temperature = 0 if temperature is None else int(temperature * SFP_TEMPERATURE_SCALE)
|
||||||
|
warning_thresh = 0 if warning_thresh is None else int(warning_thresh * SFP_TEMPERATURE_SCALE)
|
||||||
|
critical_thresh = 0 if critical_thresh is None else int(critical_thresh * SFP_TEMPERATURE_SCALE)
|
||||||
|
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_module(
|
||||||
|
0, # ASIC index always 0 for now
|
||||||
|
sfp.sdk_index + 1,
|
||||||
|
temperature,
|
||||||
|
critical_thresh,
|
||||||
|
warning_thresh,
|
||||||
|
fault
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if pre_presence != presence:
|
||||||
|
hw_management_independent_mode_update.thermal_data_clean_module(0, sfp.sdk_index + 1)
|
||||||
|
|
||||||
|
if pre_presence != presence:
|
||||||
|
self._sfp_status[sfp.sdk_index] = presence
|
||||||
|
except Exception as e:
|
||||||
|
logger.log_error('Failed to update module {sfp.sdk_index} thermal data - {e}')
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_module(
|
||||||
|
0, # ASIC index always 0 for now
|
||||||
|
sfp.sdk_index + 1,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
ERROR_READ_THERMAL_DATA
|
||||||
|
)
|
||||||
|
|
||||||
|
def update_module(self):
|
||||||
|
for sfp in self._sfp_list:
|
||||||
|
self.update_single_module(sfp)
|
||||||
|
|
||||||
|
def update_asic(self):
|
||||||
|
try:
|
||||||
|
asic_temp = self.get_asic_temp()
|
||||||
|
warn_threshold = self.get_asic_temp_warning_threashold()
|
||||||
|
critical_threshold = self.get_asic_temp_critical_threashold()
|
||||||
|
fault = 0
|
||||||
|
if asic_temp is None:
|
||||||
|
logger.log_error('Failed to read ASIC temperature, send fault to hw-management-tc')
|
||||||
|
asic_temp = warn_threshold
|
||||||
|
fault = ERROR_READ_THERMAL_DATA
|
||||||
|
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_asic(
|
||||||
|
0, # ASIC index always 0 for now
|
||||||
|
asic_temp,
|
||||||
|
critical_threshold,
|
||||||
|
warn_threshold,
|
||||||
|
fault
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.log_error('Failed to update ASIC thermal data - {e}')
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_asic(
|
||||||
|
0, # ASIC index always 0 for now
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
ERROR_READ_THERMAL_DATA
|
||||||
|
)
|
@ -18,6 +18,7 @@ import ctypes
|
|||||||
import functools
|
import functools
|
||||||
import subprocess
|
import subprocess
|
||||||
import json
|
import json
|
||||||
|
import queue
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
@ -289,6 +290,60 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class TimerEvent:
|
||||||
|
def __init__(self, interval, cb, repeat):
|
||||||
|
self.interval = interval
|
||||||
|
self._cb = cb
|
||||||
|
self.repeat = repeat
|
||||||
|
|
||||||
|
def execute(self):
|
||||||
|
self._cb()
|
||||||
|
|
||||||
|
|
||||||
|
class Timer(threading.Thread):
|
||||||
|
def __init__(self):
|
||||||
|
super(Timer, self).__init__()
|
||||||
|
self._timestamp_queue = queue.PriorityQueue()
|
||||||
|
self._wait_event = threading.Event()
|
||||||
|
self._stop_event = threading.Event()
|
||||||
|
self._min_timestamp = None
|
||||||
|
|
||||||
|
def schedule(self, interval, cb, repeat=True, run_now=True):
|
||||||
|
timer_event = TimerEvent(interval, cb, repeat)
|
||||||
|
self.add_timer_event(timer_event, run_now)
|
||||||
|
|
||||||
|
def add_timer_event(self, timer_event, run_now=True):
|
||||||
|
timestamp = time.time()
|
||||||
|
if not run_now:
|
||||||
|
timestamp += timer_event.interval
|
||||||
|
|
||||||
|
self._timestamp_queue.put_nowait((timestamp, timer_event))
|
||||||
|
if self._min_timestamp is not None and timestamp < self._min_timestamp:
|
||||||
|
self._wait_event.set()
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
if self.is_alive():
|
||||||
|
self._wait_event.set()
|
||||||
|
self._stop_event.set()
|
||||||
|
self.join()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
while not self._stop_event.is_set():
|
||||||
|
now = time.time()
|
||||||
|
item = self._timestamp_queue.get()
|
||||||
|
self._min_timestamp = item[0]
|
||||||
|
if self._min_timestamp > now:
|
||||||
|
self._wait_event.wait(self._min_timestamp - now)
|
||||||
|
self._wait_event.clear()
|
||||||
|
self._timestamp_queue.put(item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
timer_event = item[1]
|
||||||
|
timer_event.execute()
|
||||||
|
if timer_event.repeat:
|
||||||
|
self.add_timer_event(timer_event, False)
|
||||||
|
|
||||||
|
|
||||||
class DbUtils:
|
class DbUtils:
|
||||||
lock = threading.Lock()
|
lock = threading.Lock()
|
||||||
db_instances = threading.local()
|
db_instances = threading.local()
|
||||||
|
@ -292,6 +292,46 @@ class TestSfp:
|
|||||||
assert sfp.get_transceiver_threshold_info()
|
assert sfp.get_transceiver_threshold_info()
|
||||||
sfp.reinit()
|
sfp.reinit()
|
||||||
|
|
||||||
|
@mock.patch('os.path.exists')
|
||||||
|
@mock.patch('sonic_platform.utils.read_int_from_file')
|
||||||
|
def test_get_temperature(self, mock_read, mock_exists):
|
||||||
|
sfp = SFP(0)
|
||||||
|
sfp.is_sw_control = mock.MagicMock(return_value=True)
|
||||||
|
mock_exists.return_value = False
|
||||||
|
assert sfp.get_temperature() == None
|
||||||
|
|
||||||
|
mock_exists.return_value = True
|
||||||
|
assert sfp.get_temperature() == None
|
||||||
|
|
||||||
|
mock_read.return_value = None
|
||||||
|
sfp.is_sw_control.return_value = False
|
||||||
|
assert sfp.get_temperature() == None
|
||||||
|
|
||||||
|
mock_read.return_value = 448
|
||||||
|
assert sfp.get_temperature() == 56.0
|
||||||
|
|
||||||
|
def test_get_temperature_threshold(self):
|
||||||
|
sfp = SFP(0)
|
||||||
|
sfp.is_sw_control = mock.MagicMock(return_value=True)
|
||||||
|
assert sfp.get_temperature_warning_threashold() == 70.0
|
||||||
|
assert sfp.get_temperature_critical_threashold() == 80.0
|
||||||
|
|
||||||
|
mock_api = mock.MagicMock()
|
||||||
|
mock_api.get_transceiver_thresholds_support = mock.MagicMock(return_value=False)
|
||||||
|
sfp.get_xcvr_api = mock.MagicMock(return_value=mock_api)
|
||||||
|
assert sfp.get_temperature_warning_threashold() == 70.0
|
||||||
|
assert sfp.get_temperature_critical_threashold() == 80.0
|
||||||
|
|
||||||
|
from sonic_platform_base.sonic_xcvr.fields import consts
|
||||||
|
mock_api.get_transceiver_thresholds_support.return_value = True
|
||||||
|
mock_api.xcvr_eeprom = mock.MagicMock()
|
||||||
|
mock_api.xcvr_eeprom.read = mock.MagicMock(return_value={
|
||||||
|
consts.TEMP_HIGH_ALARM_FIELD: 85.0,
|
||||||
|
consts.TEMP_HIGH_WARNING_FIELD: 75.0
|
||||||
|
})
|
||||||
|
assert sfp.get_temperature_warning_threashold() == 75.0
|
||||||
|
assert sfp.get_temperature_critical_threashold() == 85.0
|
||||||
|
|
||||||
@mock.patch('sonic_platform.utils.read_int_from_file')
|
@mock.patch('sonic_platform.utils.read_int_from_file')
|
||||||
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode')
|
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode')
|
||||||
@mock.patch('sonic_platform.utils.DbUtils.get_db_instance')
|
@mock.patch('sonic_platform.utils.DbUtils.get_db_instance')
|
||||||
|
@ -31,6 +31,7 @@ sys.path.insert(0, modules_path)
|
|||||||
import sonic_platform.chassis
|
import sonic_platform.chassis
|
||||||
from sonic_platform.chassis import Chassis
|
from sonic_platform.chassis import Chassis
|
||||||
from sonic_platform.device_data import DeviceDataManager
|
from sonic_platform.device_data import DeviceDataManager
|
||||||
|
from sonic_platform.sfp import SFP
|
||||||
|
|
||||||
sonic_platform.chassis.extract_RJ45_ports_index = mock.MagicMock(return_value=[])
|
sonic_platform.chassis.extract_RJ45_ports_index = mock.MagicMock(return_value=[])
|
||||||
|
|
||||||
@ -148,23 +149,27 @@ class TestThermal:
|
|||||||
|
|
||||||
@mock.patch('os.path.exists', mock.MagicMock(return_value=True))
|
@mock.patch('os.path.exists', mock.MagicMock(return_value=True))
|
||||||
def test_sfp_thermal(self):
|
def test_sfp_thermal(self):
|
||||||
from sonic_platform.thermal import initialize_sfp_thermal, THERMAL_NAMING_RULE
|
from sonic_platform.thermal import THERMAL_NAMING_RULE
|
||||||
thermal_list = initialize_sfp_thermal(0)
|
sfp = SFP(0)
|
||||||
|
thermal_list = sfp.get_all_thermals()
|
||||||
assert len(thermal_list) == 1
|
assert len(thermal_list) == 1
|
||||||
thermal = thermal_list[0]
|
thermal = thermal_list[0]
|
||||||
rule = THERMAL_NAMING_RULE['sfp thermals']
|
rule = THERMAL_NAMING_RULE['sfp thermals']
|
||||||
start_index = rule.get('start_index', 1)
|
start_index = rule.get('start_index', 1)
|
||||||
assert thermal.get_name() == rule['name'].format(start_index)
|
assert thermal.get_name() == rule['name'].format(start_index)
|
||||||
assert rule['temperature'].format(start_index) in thermal.temperature
|
|
||||||
assert rule['high_threshold'].format(start_index) in thermal.high_threshold
|
|
||||||
assert rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold
|
|
||||||
assert thermal.get_position_in_parent() == 1
|
assert thermal.get_position_in_parent() == 1
|
||||||
assert thermal.is_replaceable() == False
|
assert thermal.is_replaceable() == False
|
||||||
|
sfp.get_temperature = mock.MagicMock(return_value=35.4)
|
||||||
|
sfp.get_temperature_warning_threashold = mock.MagicMock(return_value=70)
|
||||||
|
sfp.get_temperature_critical_threashold = mock.MagicMock(return_value=80)
|
||||||
|
assert thermal.get_temperature() == 35.4
|
||||||
|
assert thermal.get_high_threshold() == 70
|
||||||
|
assert thermal.get_high_critical_threshold() == 80
|
||||||
|
|
||||||
@mock.patch('sonic_platform.utils.read_float_from_file')
|
@mock.patch('sonic_platform.utils.read_float_from_file')
|
||||||
def test_get_temperature(self, mock_read):
|
def test_get_temperature(self, mock_read):
|
||||||
from sonic_platform.thermal import Thermal
|
from sonic_platform.thermal import Thermal
|
||||||
thermal = Thermal('test', 'temp_file', None, None, 1)
|
thermal = Thermal('test', 'temp_file', None, None, None, None, 1000, 1)
|
||||||
mock_read.return_value = 35727
|
mock_read.return_value = 35727
|
||||||
assert thermal.get_temperature() == 35.727
|
assert thermal.get_temperature() == 35.727
|
||||||
|
|
||||||
@ -177,7 +182,7 @@ class TestThermal:
|
|||||||
@mock.patch('sonic_platform.utils.read_float_from_file')
|
@mock.patch('sonic_platform.utils.read_float_from_file')
|
||||||
def test_get_high_threshold(self, mock_read):
|
def test_get_high_threshold(self, mock_read):
|
||||||
from sonic_platform.thermal import Thermal
|
from sonic_platform.thermal import Thermal
|
||||||
thermal = Thermal('test', None, None, None, 1)
|
thermal = Thermal('test', None, None, None, None, None, 1000, 1)
|
||||||
assert thermal.get_high_threshold() is None
|
assert thermal.get_high_threshold() is None
|
||||||
|
|
||||||
thermal.high_threshold = 'high_th_file'
|
thermal.high_threshold = 'high_th_file'
|
||||||
@ -193,7 +198,7 @@ class TestThermal:
|
|||||||
@mock.patch('sonic_platform.utils.read_float_from_file')
|
@mock.patch('sonic_platform.utils.read_float_from_file')
|
||||||
def test_get_high_critical_threshold(self, mock_read):
|
def test_get_high_critical_threshold(self, mock_read):
|
||||||
from sonic_platform.thermal import Thermal
|
from sonic_platform.thermal import Thermal
|
||||||
thermal = Thermal('test', None, None, None, 1)
|
thermal = Thermal('test', None, None, None, None, None, 1000, 1)
|
||||||
assert thermal.get_high_critical_threshold() is None
|
assert thermal.get_high_critical_threshold() is None
|
||||||
|
|
||||||
thermal.high_critical_threshold = 'high_th_file'
|
thermal.high_critical_threshold = 'high_th_file'
|
||||||
|
@ -0,0 +1,128 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
|
||||||
|
# Apache-2.0
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
import time
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from sonic_platform import utils
|
||||||
|
from sonic_platform.thermal_updater import ThermalUpdater, hw_management_independent_mode_update
|
||||||
|
from sonic_platform.thermal_updater import ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD, \
|
||||||
|
ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD
|
||||||
|
|
||||||
|
|
||||||
|
mock_tc_config = """
|
||||||
|
{
|
||||||
|
"dev_parameters": {
|
||||||
|
"asic": {
|
||||||
|
"pwm_min": 20,
|
||||||
|
"pwm_max": 100,
|
||||||
|
"val_min": "!70000",
|
||||||
|
"val_max": "!105000",
|
||||||
|
"poll_time": 3
|
||||||
|
},
|
||||||
|
"module\\\\d+": {
|
||||||
|
"pwm_min": 20,
|
||||||
|
"pwm_max": 100,
|
||||||
|
"val_min": 60000,
|
||||||
|
"val_max": 80000,
|
||||||
|
"poll_time": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class TestThermalUpdater:
|
||||||
|
def test_load_tc_config_non_exists(self):
|
||||||
|
updater = ThermalUpdater(None)
|
||||||
|
updater.load_tc_config()
|
||||||
|
assert updater._timer._timestamp_queue.qsize() == 2
|
||||||
|
|
||||||
|
def test_load_tc_config_mocked(self):
|
||||||
|
updater = ThermalUpdater(None)
|
||||||
|
mock_os_open = mock.mock_open(read_data=mock_tc_config)
|
||||||
|
with mock.patch('sonic_platform.utils.open', mock_os_open):
|
||||||
|
updater.load_tc_config()
|
||||||
|
assert updater._timer._timestamp_queue.qsize() == 2
|
||||||
|
|
||||||
|
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_asic', mock.MagicMock())
|
||||||
|
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_module', mock.MagicMock())
|
||||||
|
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.wait_all_sfp_ready')
|
||||||
|
@mock.patch('sonic_platform.utils.write_file')
|
||||||
|
def test_start_stop(self, mock_write, mock_wait):
|
||||||
|
mock_wait.return_value = True
|
||||||
|
mock_sfp = mock.MagicMock()
|
||||||
|
mock_sfp.sdk_index = 1
|
||||||
|
updater = ThermalUpdater([mock_sfp])
|
||||||
|
updater.start()
|
||||||
|
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 0)
|
||||||
|
utils.wait_until(updater._timer.is_alive, timeout=5)
|
||||||
|
|
||||||
|
mock_write.reset_mock()
|
||||||
|
updater.stop()
|
||||||
|
assert not updater._timer.is_alive()
|
||||||
|
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1)
|
||||||
|
|
||||||
|
mock_wait.return_value = False
|
||||||
|
mock_write.reset_mock()
|
||||||
|
updater.start()
|
||||||
|
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1)
|
||||||
|
updater.stop()
|
||||||
|
|
||||||
|
@mock.patch('sonic_platform.thermal_updater.time.sleep', mock.MagicMock())
|
||||||
|
def test_wait_all_sfp_ready(self):
|
||||||
|
mock_sfp = mock.MagicMock()
|
||||||
|
mock_sfp.is_sw_control = mock.MagicMock(return_value=True)
|
||||||
|
updater = ThermalUpdater([mock_sfp])
|
||||||
|
assert updater.wait_all_sfp_ready()
|
||||||
|
mock_sfp.is_sw_control.side_effect = Exception('')
|
||||||
|
assert not updater.wait_all_sfp_ready()
|
||||||
|
|
||||||
|
@mock.patch('sonic_platform.utils.read_int_from_file')
|
||||||
|
def test_update_asic(self, mock_read):
|
||||||
|
mock_read.return_value = 8
|
||||||
|
updater = ThermalUpdater(None)
|
||||||
|
assert updater.get_asic_temp() == 1000
|
||||||
|
assert updater.get_asic_temp_warning_threashold() == 1000
|
||||||
|
assert updater.get_asic_temp_critical_threashold() == 1000
|
||||||
|
updater.update_asic()
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_asic.assert_called_once()
|
||||||
|
|
||||||
|
mock_read.return_value = None
|
||||||
|
assert updater.get_asic_temp() is None
|
||||||
|
assert updater.get_asic_temp_warning_threashold() == ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD
|
||||||
|
assert updater.get_asic_temp_critical_threashold() == ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD
|
||||||
|
|
||||||
|
def test_update_module(self):
|
||||||
|
mock_sfp = mock.MagicMock()
|
||||||
|
mock_sfp.sdk_index = 10
|
||||||
|
mock_sfp.get_presence = mock.MagicMock(return_value=True)
|
||||||
|
mock_sfp.get_temperature = mock.MagicMock(return_value=55.0)
|
||||||
|
mock_sfp.get_temperature_warning_threashold = mock.MagicMock(return_value=70.0)
|
||||||
|
mock_sfp.get_temperature_critical_threashold = mock.MagicMock(return_value=80.0)
|
||||||
|
updater = ThermalUpdater([mock_sfp])
|
||||||
|
updater.update_module()
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_module.assert_called_once_with(0, 11, 55000, 80000, 70000, 0)
|
||||||
|
|
||||||
|
mock_sfp.get_temperature = mock.MagicMock(return_value=0.0)
|
||||||
|
hw_management_independent_mode_update.reset_mock()
|
||||||
|
updater.update_module()
|
||||||
|
hw_management_independent_mode_update.thermal_data_set_module.assert_called_once_with(0, 11, 0, 0, 0, 0)
|
||||||
|
|
||||||
|
mock_sfp.get_presence = mock.MagicMock(return_value=False)
|
||||||
|
updater.update_module()
|
||||||
|
hw_management_independent_mode_update.thermal_data_clean_module.assert_called_once_with(0, 11)
|
@ -191,6 +191,26 @@ class TestUtils:
|
|||||||
mock_os_open = mock.mock_open(read_data='a:b')
|
mock_os_open = mock.mock_open(read_data='a:b')
|
||||||
with mock.patch('sonic_platform.utils.open', mock_os_open):
|
with mock.patch('sonic_platform.utils.open', mock_os_open):
|
||||||
assert utils.read_key_value_file('some_file') == {'a':'b'}
|
assert utils.read_key_value_file('some_file') == {'a':'b'}
|
||||||
|
|
||||||
mock_os_open = mock.mock_open(read_data='a=b')
|
mock_os_open = mock.mock_open(read_data='a=b')
|
||||||
with mock.patch('sonic_platform.utils.open', mock_os_open):
|
with mock.patch('sonic_platform.utils.open', mock_os_open):
|
||||||
assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'}
|
assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'}
|
||||||
|
|
||||||
|
def test_timer(self):
|
||||||
|
timer = utils.Timer()
|
||||||
|
timer.start()
|
||||||
|
mock_cb_1000_run_now = mock.MagicMock()
|
||||||
|
mock_cb_1000_run_future = mock.MagicMock()
|
||||||
|
mock_cb_1_run_future_once = mock.MagicMock()
|
||||||
|
mock_cb_1_run_future_repeat = mock.MagicMock()
|
||||||
|
timer.schedule(1000, cb=mock_cb_1000_run_now, repeat=False, run_now=True)
|
||||||
|
timer.schedule(1000, cb=mock_cb_1000_run_future, repeat=False, run_now=False)
|
||||||
|
timer.schedule(1, cb=mock_cb_1_run_future_once, repeat=False, run_now=False)
|
||||||
|
timer.schedule(1, cb=mock_cb_1_run_future_repeat, repeat=True, run_now=False)
|
||||||
|
time.sleep(3)
|
||||||
|
timer.stop()
|
||||||
|
|
||||||
|
mock_cb_1000_run_now.assert_called_once()
|
||||||
|
mock_cb_1000_run_future.assert_not_called()
|
||||||
|
mock_cb_1_run_future_once.assert_called_once()
|
||||||
|
assert mock_cb_1_run_future_repeat.call_count > 1
|
||||||
|
Reference in New Issue
Block a user