[Mellanox] Optimize thermal control policies (#9452)
- Why I did it Optimize thermal control policies to simplify the logic and add more protection code in policies to make sure it works even if kernel algorithm does not work. - How I did it Reduce unused thermal policies Add timely ASIC temperature check in thermal policy to make sure ASIC temperature and fan speed is coordinated Minimum allowed fan speed now is calculated by max of the expected fan speed among all policies Move some logic from fan.py to thermal.py to make it more readable - How to verify it 1. Manual test 2. Regression
This commit is contained in:
parent
bc56e064c3
commit
4ae504a813
@ -30,6 +30,7 @@ try:
|
||||
from sonic_py_common.logger import Logger
|
||||
from .led import ComponentFaultyIndicator
|
||||
from . import utils
|
||||
from .thermal import Thermal
|
||||
except ImportError as e:
|
||||
raise ImportError (str(e) + "- required module not found")
|
||||
|
||||
@ -44,13 +45,9 @@ CONFIG_PATH = "/var/run/hw-management/config"
|
||||
FAN_DIR = "/var/run/hw-management/thermal/fan{}_dir"
|
||||
FAN_DIR_VALUE_EXHAUST = 0
|
||||
FAN_DIR_VALUE_INTAKE = 1
|
||||
COOLING_STATE_PATH = "/var/run/hw-management/thermal/cooling_cur_state"
|
||||
|
||||
|
||||
class MlnxFan(FanBase):
|
||||
MIN_VALID_COOLING_LEVEL = 1
|
||||
MAX_VALID_COOLING_LEVEL = 10
|
||||
|
||||
def __init__(self, fan_index, position):
|
||||
super(MlnxFan, self).__init__()
|
||||
self.index = fan_index + 1
|
||||
@ -128,37 +125,6 @@ class MlnxFan(FanBase):
|
||||
"""
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def set_cooling_level(cls, level, cur_state):
|
||||
"""
|
||||
Change cooling level. The input level should be an integer value [1, 10].
|
||||
1 means 10%, 2 means 20%, 10 means 100%.
|
||||
"""
|
||||
if level < cls.MIN_VALID_COOLING_LEVEL or level > cls.MAX_VALID_COOLING_LEVEL:
|
||||
raise RuntimeError("Failed to set cooling level, level value must be in range [{}, {}], got {}".format(
|
||||
cls.MIN_VALID_COOLING_LEVEL,
|
||||
cls.MAX_VALID_COOLING_LEVEL,
|
||||
level
|
||||
))
|
||||
|
||||
try:
|
||||
# Reset FAN cooling level vector. According to low level team,
|
||||
# if we need set cooling level to X, we need first write a (10+X)
|
||||
# to cooling_cur_state file to reset the cooling level vector.
|
||||
utils.write_file(COOLING_STATE_PATH, level + 10, raise_exception=True)
|
||||
|
||||
# We need set cooling level after resetting the cooling level vector
|
||||
utils.write_file(COOLING_STATE_PATH, cur_state, raise_exception=True)
|
||||
except (ValueError, IOError) as e:
|
||||
raise RuntimeError("Failed to set cooling level - {}".format(e))
|
||||
|
||||
@classmethod
|
||||
def get_cooling_level(cls):
|
||||
try:
|
||||
return utils.read_int_from_file(COOLING_STATE_PATH, raise_exception=True)
|
||||
except (ValueError, IOError) as e:
|
||||
raise RuntimeError("Failed to get cooling level - {}".format(e))
|
||||
|
||||
|
||||
class PsuFan(MlnxFan):
|
||||
# PSU fan speed vector
|
||||
@ -228,7 +194,7 @@ class PsuFan(MlnxFan):
|
||||
"""
|
||||
try:
|
||||
# Get PSU fan target speed according to current system cooling level
|
||||
cooling_level = self.get_cooling_level()
|
||||
cooling_level = Thermal.get_cooling_level()
|
||||
return int(self.PSU_FAN_SPEED[cooling_level], 16)
|
||||
except Exception:
|
||||
return self.get_speed()
|
||||
@ -264,9 +230,6 @@ class PsuFan(MlnxFan):
|
||||
|
||||
class Fan(MlnxFan):
|
||||
"""Platform-specific Fan class"""
|
||||
|
||||
min_cooling_level = 2
|
||||
|
||||
def __init__(self, fan_index, fan_drawer, position):
|
||||
super(Fan, self).__init__(fan_index, position)
|
||||
|
||||
@ -345,11 +308,6 @@ class Fan(MlnxFan):
|
||||
status = True
|
||||
|
||||
try:
|
||||
cooling_level = int(speed // 10)
|
||||
if cooling_level < self.min_cooling_level:
|
||||
cooling_level = self.min_cooling_level
|
||||
speed = self.min_cooling_level * 10
|
||||
self.set_cooling_level(cooling_level, cooling_level)
|
||||
pwm = int(PWM_MAX*speed/100.0)
|
||||
utils.write_file(self.fan_speed_set_path, pwm, raise_exception=True)
|
||||
except (ValueError, IOError):
|
||||
|
@ -131,13 +131,23 @@ THERMAL_NAMING_RULE = {
|
||||
}
|
||||
|
||||
CHASSIS_THERMAL_SYSFS_FOLDER = '/run/hw-management/thermal'
|
||||
COOLING_STATE_PATH = "/var/run/hw-management/thermal/cooling_cur_state"
|
||||
THERMAL_ZONE_ASIC_PATH = '/var/run/hw-management/thermal/mlxsw/'
|
||||
THERMAL_ZONE_FOLDER_WILDCARD = '/run/hw-management/thermal/mlxsw*'
|
||||
THERMAL_ZONE_POLICY_FILE = 'thermal_zone_policy'
|
||||
THERMAL_ZONE_HIGH_THRESHOLD = 'temp_trip_high'
|
||||
THERMAL_ZONE_HOT_THRESHOLD = 'temp_trip_hot'
|
||||
THERMAL_ZONE_NORMAL_THRESHOLD = 'temp_trip_norm'
|
||||
THERMAL_ZONE_MODE_FILE = 'thermal_zone_mode'
|
||||
THERMAL_ZONE_POLICY_FILE = 'thermal_zone_policy'
|
||||
THERMAL_ZONE_TEMP_FILE = 'thermal_zone_temp'
|
||||
THERMAL_ZONE_THRESHOLD_FILE = 'temp_trip_high'
|
||||
THERMAL_ZONE_HYSTERESIS = 5000
|
||||
MODULE_TEMP_FAULT_WILDCARRD = '/run/hw-management/thermal/module*_temp_fault'
|
||||
MAX_AMBIENT_TEMP = 120
|
||||
# Min allowed cooling level when all thermal zones are in normal state
|
||||
MIN_COOLING_LEVEL_FOR_NORMAL = 2
|
||||
# Min allowed cooling level when any thermal zone is in high state but no thermal zone is in emergency state
|
||||
MIN_COOLING_LEVEL_FOR_HIGH = 4
|
||||
MAX_COOLING_LEVEL = 10
|
||||
|
||||
|
||||
def initialize_chassis_thermals():
|
||||
@ -263,6 +273,14 @@ def _check_thermal_sysfs_existence(file_path):
|
||||
|
||||
class Thermal(ThermalBase):
|
||||
thermal_algorithm_status = False
|
||||
# Expect cooling level, used for caching the cooling level value before commiting to hardware
|
||||
expect_cooling_level = None
|
||||
# Expect cooling state
|
||||
expect_cooling_state = None
|
||||
# Last committed cooling level
|
||||
last_set_cooling_level = None
|
||||
last_set_cooling_state = None
|
||||
last_set_psu_cooling_level = None
|
||||
|
||||
def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position):
|
||||
"""
|
||||
@ -368,31 +386,37 @@ class Thermal(ThermalBase):
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def check_thermal_zone_temperature(cls):
|
||||
"""
|
||||
Check thermal zone current temperature with normal temperature
|
||||
|
||||
def get_min_allowed_cooling_level_by_thermal_zone(cls):
|
||||
"""Get min allowed cooling level according to thermal zone status:
|
||||
1. If temperature of all thermal zones is less than normal threshold, min allowed cooling level is
|
||||
$MIN_COOLING_LEVEL_FOR_NORMAL = 2
|
||||
2. If temperature of any thermal zone is greater than normal threshold, but no thermal zone temperature
|
||||
is greater than high threshold, min allowed cooling level is $MIN_COOLING_LEVEL_FOR_HIGH = 4
|
||||
3. Otherwise, there is no minimum allowed value and policy should not adjust cooling level
|
||||
Returns:
|
||||
True if all thermal zones current temperature less or equal than normal temperature
|
||||
int: minimum allowed cooling level
|
||||
"""
|
||||
for thermal_zone_folder in glob.iglob(THERMAL_ZONE_FOLDER_WILDCARD):
|
||||
if not cls._check_thermal_zone_temperature(thermal_zone_folder):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _check_thermal_zone_temperature(cls, thermal_zone_path):
|
||||
threshold_path = os.path.join(thermal_zone_path, THERMAL_ZONE_THRESHOLD_FILE)
|
||||
current_temp_path = os.path.join(thermal_zone_path, THERMAL_ZONE_TEMP_FILE)
|
||||
|
||||
min_allowed = MIN_COOLING_LEVEL_FOR_NORMAL
|
||||
thermal_zone_present = False
|
||||
try:
|
||||
threshold = utils.read_int_from_file(threshold_path, raise_exception=True)
|
||||
current = utils.read_int_from_file(current_temp_path, raise_exception=True)
|
||||
return current <= threshold
|
||||
for thermal_zone_folder in glob.iglob(THERMAL_ZONE_FOLDER_WILDCARD):
|
||||
thermal_zone_present = True
|
||||
normal_thresh = utils.read_int_from_file(os.path.join(thermal_zone_folder, THERMAL_ZONE_NORMAL_THRESHOLD))
|
||||
current = utils.read_int_from_file(os.path.join(thermal_zone_folder, THERMAL_ZONE_TEMP_FILE))
|
||||
if current < normal_thresh - THERMAL_ZONE_HYSTERESIS:
|
||||
continue
|
||||
|
||||
hot_thresh = utils.read_int_from_file(os.path.join(thermal_zone_folder, THERMAL_ZONE_HIGH_THRESHOLD))
|
||||
if current < hot_thresh - THERMAL_ZONE_HYSTERESIS:
|
||||
min_allowed = MIN_COOLING_LEVEL_FOR_HIGH
|
||||
else:
|
||||
min_allowed = None
|
||||
break
|
||||
except Exception as e:
|
||||
logger.log_info("Fail to check thermal zone temperature for file {} due to {}".format(thermal_zone_path, repr(e)))
|
||||
return False
|
||||
logger.log_error('Failed to get thermal zone status for {} - {}'.format(thermal_zone_folder, repr(e)))
|
||||
return None
|
||||
|
||||
return min_allowed if thermal_zone_present else None
|
||||
|
||||
@classmethod
|
||||
def check_module_temperature_trustable(cls):
|
||||
@ -416,6 +440,85 @@ class Thermal(ThermalBase):
|
||||
logger.log_error('Failed to get minimum ambient temperature, use pessimistic instead')
|
||||
return MAX_AMBIENT_TEMP
|
||||
|
||||
@classmethod
|
||||
def set_cooling_level(cls, level):
|
||||
"""
|
||||
Change cooling level. The input level should be an integer value [1, 10].
|
||||
1 means 10%, 2 means 20%, 10 means 100%.
|
||||
"""
|
||||
if cls.last_set_cooling_level != level:
|
||||
utils.write_file(COOLING_STATE_PATH, level + 10, raise_exception=True)
|
||||
cls.last_set_cooling_level = level
|
||||
|
||||
@classmethod
|
||||
def set_cooling_state(cls, state):
|
||||
"""Change cooling state.
|
||||
Args:
|
||||
state (int): cooling state
|
||||
"""
|
||||
if cls.last_set_cooling_state != state:
|
||||
utils.write_file(COOLING_STATE_PATH, state, raise_exception=True)
|
||||
cls.last_set_cooling_state = state
|
||||
|
||||
@classmethod
|
||||
def get_cooling_level(cls):
|
||||
try:
|
||||
return utils.read_int_from_file(COOLING_STATE_PATH, raise_exception=True)
|
||||
except (ValueError, IOError) as e:
|
||||
raise RuntimeError("Failed to get cooling level - {}".format(e))
|
||||
|
||||
@classmethod
|
||||
def set_expect_cooling_level(cls, expect_value):
|
||||
"""During thermal policy running, cache the expect cooling level generated by policies. The max expect
|
||||
cooling level will be committed to hardware.
|
||||
Args:
|
||||
expect_value (int): Expected cooling level value
|
||||
"""
|
||||
if cls.expect_cooling_level is None or cls.expect_cooling_level < expect_value:
|
||||
cls.expect_cooling_level = int(expect_value)
|
||||
|
||||
@classmethod
|
||||
def commit_cooling_level(cls, thermal_info_dict):
|
||||
"""Commit cooling level to hardware. This will affect system fan and PSU fan speed.
|
||||
Args:
|
||||
thermal_info_dict (dict): Thermal information dictionary
|
||||
"""
|
||||
if cls.expect_cooling_level is not None:
|
||||
cls.set_cooling_level(cls.expect_cooling_level)
|
||||
|
||||
if cls.expect_cooling_state is not None:
|
||||
cls.set_cooling_state(cls.expect_cooling_state)
|
||||
elif cls.expect_cooling_level is not None:
|
||||
cls.set_cooling_state(cls.expect_cooling_level)
|
||||
|
||||
cls.expect_cooling_level = None
|
||||
# We need to set system fan speed here because kernel will automaticlly adjust fan speed according to cooling level and cooling state
|
||||
|
||||
# Commit PSU fan speed with current state
|
||||
from .thermal_infos import ChassisInfo
|
||||
if ChassisInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ChassisInfo.INFO_NAME], ChassisInfo):
|
||||
cooling_level = cls.get_cooling_level()
|
||||
if cls.last_set_psu_cooling_level == cooling_level:
|
||||
return
|
||||
speed = cooling_level * 10
|
||||
chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis()
|
||||
for psu in chassis.get_all_psus():
|
||||
for psu_fan in psu.get_all_fans():
|
||||
psu_fan.set_speed(speed)
|
||||
cls.last_set_psu_cooling_level = cooling_level
|
||||
|
||||
@classmethod
|
||||
def monitor_asic_themal_zone(cls):
|
||||
"""This is a protection for asic thermal zone, if asic temperature is greater than hot threshold + THERMAL_ZONE_HYSTERESIS,
|
||||
and if cooling state is not MAX, we need enforce the cooling state to MAX
|
||||
"""
|
||||
asic_temp = utils.read_int_from_file(os.path.join(THERMAL_ZONE_ASIC_PATH, THERMAL_ZONE_TEMP_FILE), raise_exception=True)
|
||||
hot_thresh = utils.read_int_from_file(os.path.join(THERMAL_ZONE_ASIC_PATH, THERMAL_ZONE_HOT_THRESHOLD), raise_exception=True)
|
||||
if asic_temp >= hot_thresh + THERMAL_ZONE_HYSTERESIS:
|
||||
cls.expect_cooling_state = MAX_COOLING_LEVEL
|
||||
else:
|
||||
cls.expect_cooling_state = None
|
||||
|
||||
|
||||
class RemovableThermal(Thermal):
|
||||
def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position, presence_cb):
|
||||
|
@ -16,7 +16,7 @@
|
||||
#
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
|
||||
from .thermal import logger
|
||||
from .thermal import Thermal
|
||||
|
||||
|
||||
class SetFanSpeedAction(ThermalPolicyActionBase):
|
||||
@ -64,120 +64,28 @@ class SetAllFanSpeedAction(SetFanSpeedAction):
|
||||
:param thermal_info_dict: A dictionary stores all thermal information.
|
||||
:return:
|
||||
"""
|
||||
from .thermal_infos import FanInfo
|
||||
if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo):
|
||||
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
|
||||
for fan in fan_info_obj.get_presence_fans():
|
||||
fan.set_speed(self.speed)
|
||||
logger.log_info('Set all system FAN speed to {}'.format(self.speed))
|
||||
|
||||
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, self.speed)
|
||||
|
||||
@classmethod
|
||||
def set_psu_fan_speed(cls, thermal_info_dict, speed):
|
||||
from .thermal_infos import ChassisInfo
|
||||
if ChassisInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ChassisInfo.INFO_NAME], ChassisInfo):
|
||||
chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis()
|
||||
for psu in chassis.get_all_psus():
|
||||
for psu_fan in psu.get_all_fans():
|
||||
psu_fan.set_speed(speed)
|
||||
|
||||
|
||||
@thermal_json_object('fan.all.check_and_set_speed')
|
||||
class CheckAndSetAllFanSpeedAction(SetAllFanSpeedAction):
|
||||
"""
|
||||
Action to check thermal zone temperature and recover speed for all fans
|
||||
"""
|
||||
def execute(self, thermal_info_dict):
|
||||
"""
|
||||
Check thermal zone and set speed for all fans
|
||||
:param thermal_info_dict: A dictionary stores all thermal information.
|
||||
:return:
|
||||
"""
|
||||
from .thermal import Thermal
|
||||
if Thermal.check_thermal_zone_temperature():
|
||||
SetAllFanSpeedAction.execute(self, thermal_info_dict)
|
||||
|
||||
|
||||
@thermal_json_object('thermal_control.control')
|
||||
class ControlThermalAlgoAction(ThermalPolicyActionBase):
|
||||
"""
|
||||
Action to control the thermal control algorithm
|
||||
"""
|
||||
# JSON field definition
|
||||
JSON_FIELD_STATUS = 'status'
|
||||
|
||||
def __init__(self):
|
||||
self.status = True
|
||||
|
||||
def load_from_json(self, json_obj):
|
||||
"""
|
||||
Construct ControlThermalAlgoAction via JSON. JSON example:
|
||||
{
|
||||
"type": "thermal_control.control"
|
||||
"status": "true"
|
||||
}
|
||||
:param json_obj: A JSON object representing a ControlThermalAlgoAction action.
|
||||
:return:
|
||||
"""
|
||||
if ControlThermalAlgoAction.JSON_FIELD_STATUS in json_obj:
|
||||
status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower()
|
||||
if status_str == 'true':
|
||||
self.status = True
|
||||
elif status_str == 'false':
|
||||
self.status = False
|
||||
else:
|
||||
raise ValueError('Invalid {} field value, please specify true of false'.
|
||||
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
|
||||
else:
|
||||
raise ValueError('ControlThermalAlgoAction '
|
||||
'missing mandatory field {} in JSON policy file'.
|
||||
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
|
||||
|
||||
def execute(self, thermal_info_dict):
|
||||
"""
|
||||
Disable thermal control algorithm
|
||||
:param thermal_info_dict: A dictionary stores all thermal information.
|
||||
:return:
|
||||
"""
|
||||
from .thermal_infos import FanInfo
|
||||
from .thermal import Thermal
|
||||
from .thermal_conditions import UpdateCoolingLevelToMinCondition
|
||||
from .fan import Fan
|
||||
status_changed = Thermal.set_thermal_algorithm_status(self.status, False)
|
||||
|
||||
# Only update cooling level if thermal algorithm status changed
|
||||
if status_changed:
|
||||
if self.status:
|
||||
# Check thermal zone temperature, if all thermal zone temperature
|
||||
# back to normal, set it to minimum allowed speed to
|
||||
# save power
|
||||
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
|
||||
|
||||
logger.log_info('Changed thermal algorithm status to {}'.format(self.status))
|
||||
Thermal.set_expect_cooling_level(self.speed / 10)
|
||||
|
||||
|
||||
@thermal_json_object('thermal.recover')
|
||||
class ThermalRecoverAction(ThermalPolicyActionBase):
|
||||
def execute(self, thermal_info_dict):
|
||||
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
|
||||
|
||||
|
||||
class ChangeMinCoolingLevelAction(ThermalPolicyActionBase):
|
||||
UNKNOWN_SKU_COOLING_LEVEL = 6
|
||||
|
||||
def execute(self, thermal_info_dict):
|
||||
from .device_data import DeviceDataManager
|
||||
from .fan import Fan
|
||||
from .thermal_infos import ChassisInfo
|
||||
from .thermal_conditions import MinCoolingLevelChangeCondition
|
||||
from .thermal_conditions import UpdateCoolingLevelToMinCondition
|
||||
from .thermal import MAX_COOLING_LEVEL, MIN_COOLING_LEVEL_FOR_HIGH, logger
|
||||
Thermal.monitor_asic_themal_zone()
|
||||
|
||||
# Calculate dynamic minimum cooling level
|
||||
dynamic_min_cooling_level = None
|
||||
minimum_table = DeviceDataManager.get_minimum_table()
|
||||
if not minimum_table:
|
||||
Fan.min_cooling_level = ChangeMinCoolingLevelAction.UNKNOWN_SKU_COOLING_LEVEL
|
||||
# If there is no minimum_table defined, set dynamic_min_cooling_level to default value
|
||||
dynamic_min_cooling_level = ThermalRecoverAction.UNKNOWN_SKU_COOLING_LEVEL
|
||||
else:
|
||||
trust_state = MinCoolingLevelChangeCondition.trust_state
|
||||
temperature = MinCoolingLevelChangeCondition.temperature
|
||||
trust_state = Thermal.check_module_temperature_trustable()
|
||||
temperature = Thermal.get_min_amb_temperature()
|
||||
temperature = int(temperature / 1000)
|
||||
minimum_table = minimum_table['unk_{}'.format(trust_state)]
|
||||
|
||||
for key, cooling_level in minimum_table.items():
|
||||
@ -185,41 +93,19 @@ class ChangeMinCoolingLevelAction(ThermalPolicyActionBase):
|
||||
temp_min = int(temp_range[0].strip())
|
||||
temp_max = int(temp_range[1].strip())
|
||||
if temp_min <= temperature <= temp_max:
|
||||
Fan.min_cooling_level = cooling_level - 10
|
||||
dynamic_min_cooling_level = cooling_level - 10
|
||||
break
|
||||
|
||||
current_cooling_level = Fan.get_cooling_level()
|
||||
if current_cooling_level < Fan.min_cooling_level:
|
||||
Fan.set_cooling_level(Fan.min_cooling_level, Fan.min_cooling_level)
|
||||
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, Fan.min_cooling_level * 10)
|
||||
if not dynamic_min_cooling_level:
|
||||
# Should not go to this branch, just in case
|
||||
logger.log_error('Failed to get dynamic minimum cooling level')
|
||||
dynamic_min_cooling_level = MAX_COOLING_LEVEL
|
||||
|
||||
if Thermal.last_set_cooling_level is not None and dynamic_min_cooling_level > Thermal.last_set_cooling_level and dynamic_min_cooling_level >= MIN_COOLING_LEVEL_FOR_HIGH:
|
||||
# No need to check thermal zone as dynamic_min_cooling_level is greater than previous value and MIN_COOLING_LEVEL_FOR_HIGH
|
||||
Thermal.set_expect_cooling_level(dynamic_min_cooling_level)
|
||||
else:
|
||||
Fan.set_cooling_level(Fan.min_cooling_level, current_cooling_level)
|
||||
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
|
||||
|
||||
|
||||
class UpdatePsuFanSpeedAction(ThermalPolicyActionBase):
|
||||
def execute(self, thermal_info_dict):
|
||||
from .thermal_conditions import CoolingLevelChangeCondition
|
||||
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, CoolingLevelChangeCondition.cooling_level * 10)
|
||||
|
||||
|
||||
class UpdateCoolingLevelToMinAction(ThermalPolicyActionBase):
|
||||
def execute(self, thermal_info_dict):
|
||||
self.update_cooling_level_to_minimum(thermal_info_dict)
|
||||
|
||||
@classmethod
|
||||
def update_cooling_level_to_minimum(cls, thermal_info_dict):
|
||||
from .fan import Fan
|
||||
from .thermal import Thermal
|
||||
from .thermal_conditions import UpdateCoolingLevelToMinCondition
|
||||
from .thermal_infos import FanInfo
|
||||
if Thermal.check_thermal_zone_temperature():
|
||||
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
|
||||
speed = Fan.min_cooling_level * 10
|
||||
for fan in fan_info_obj.get_presence_fans():
|
||||
fan.set_speed(speed)
|
||||
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, speed)
|
||||
UpdateCoolingLevelToMinCondition.enable = False
|
||||
else:
|
||||
UpdateCoolingLevelToMinCondition.enable = True
|
||||
|
||||
min_cooling_level_by_tz = Thermal.get_min_allowed_cooling_level_by_thermal_zone()
|
||||
if min_cooling_level_by_tz is not None:
|
||||
cooling_level = max(dynamic_min_cooling_level, min_cooling_level_by_tz)
|
||||
Thermal.set_expect_cooling_level(cooling_level)
|
||||
|
@ -90,53 +90,3 @@ class AllPsuPresenceCondition(PsuCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
psu_info_obj = self.get_psu_info(thermal_info_dict)
|
||||
return len(psu_info_obj.get_absence_psus()) == 0 if psu_info_obj else False
|
||||
|
||||
|
||||
class MinCoolingLevelChangeCondition(ThermalPolicyConditionBase):
|
||||
trust_state = None
|
||||
temperature = None
|
||||
|
||||
def is_match(self, thermal_info_dict):
|
||||
from .thermal import Thermal
|
||||
|
||||
trust_state = Thermal.check_module_temperature_trustable()
|
||||
temperature = Thermal.get_min_amb_temperature()
|
||||
temperature = int(temperature / 1000)
|
||||
|
||||
change_cooling_level = False
|
||||
if trust_state != MinCoolingLevelChangeCondition.trust_state:
|
||||
MinCoolingLevelChangeCondition.trust_state = trust_state
|
||||
change_cooling_level = True
|
||||
|
||||
if temperature != MinCoolingLevelChangeCondition.temperature:
|
||||
MinCoolingLevelChangeCondition.temperature = temperature
|
||||
change_cooling_level = True
|
||||
|
||||
return change_cooling_level
|
||||
|
||||
|
||||
class CoolingLevelChangeCondition(ThermalPolicyConditionBase):
|
||||
cooling_level = None
|
||||
|
||||
def is_match(self, thermal_info_dict):
|
||||
from .fan import Fan
|
||||
current_cooling_level = Fan.get_cooling_level()
|
||||
if current_cooling_level != CoolingLevelChangeCondition.cooling_level:
|
||||
CoolingLevelChangeCondition.cooling_level = current_cooling_level
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class UpdateCoolingLevelToMinCondition(ThermalPolicyConditionBase):
|
||||
enable = False
|
||||
def is_match(self, thermal_info_dict):
|
||||
if not UpdateCoolingLevelToMinCondition.enable:
|
||||
return False
|
||||
|
||||
from .fan import Fan
|
||||
current_cooling_level = Fan.get_cooling_level()
|
||||
if current_cooling_level == Fan.min_cooling_level:
|
||||
UpdateCoolingLevelToMinCondition.enable = False
|
||||
return False
|
||||
return True
|
||||
|
@ -14,24 +14,14 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
|
||||
from .thermal_actions import *
|
||||
from .thermal_conditions import *
|
||||
from .thermal_infos import *
|
||||
from .thermal import logger, MAX_COOLING_LEVEL, Thermal
|
||||
|
||||
|
||||
class ThermalManager(ThermalManagerBase):
|
||||
@classmethod
|
||||
def initialize(cls):
|
||||
"""
|
||||
Initialize thermal manager, including register thermal condition types and thermal action types
|
||||
and any other vendor specific initialization.
|
||||
:return:
|
||||
"""
|
||||
cls._add_private_thermal_policy()
|
||||
|
||||
@classmethod
|
||||
def start_thermal_control_algorithm(cls):
|
||||
"""
|
||||
@ -40,7 +30,6 @@ class ThermalManager(ThermalManagerBase):
|
||||
Returns:
|
||||
bool: True if set success, False if fail.
|
||||
"""
|
||||
from .thermal import Thermal
|
||||
Thermal.set_thermal_algorithm_status(True)
|
||||
|
||||
@classmethod
|
||||
@ -51,22 +40,31 @@ class ThermalManager(ThermalManagerBase):
|
||||
Returns:
|
||||
bool: True if set success, False if fail.
|
||||
"""
|
||||
from .thermal import Thermal
|
||||
Thermal.set_thermal_algorithm_status(False)
|
||||
|
||||
@classmethod
|
||||
def _add_private_thermal_policy(cls):
|
||||
dynamic_min_speed_policy = ThermalPolicy()
|
||||
dynamic_min_speed_policy.conditions[MinCoolingLevelChangeCondition] = MinCoolingLevelChangeCondition()
|
||||
dynamic_min_speed_policy.actions[ChangeMinCoolingLevelAction] = ChangeMinCoolingLevelAction()
|
||||
cls._policy_dict['DynamicMinCoolingLevelPolicy'] = dynamic_min_speed_policy
|
||||
def run_policy(cls, chassis):
|
||||
if not cls._policy_dict:
|
||||
return
|
||||
|
||||
update_psu_fan_speed_policy = ThermalPolicy()
|
||||
update_psu_fan_speed_policy.conditions[CoolingLevelChangeCondition] = CoolingLevelChangeCondition()
|
||||
update_psu_fan_speed_policy.actions[UpdatePsuFanSpeedAction] = UpdatePsuFanSpeedAction()
|
||||
cls._policy_dict['UpdatePsuFanSpeedPolicy'] = update_psu_fan_speed_policy
|
||||
try:
|
||||
cls._collect_thermal_information(chassis)
|
||||
except Exception as e:
|
||||
logger.log_error('Failed to collect thermal information {}'.format(repr(e)))
|
||||
Thermal.set_expect_cooling_level(MAX_COOLING_LEVEL)
|
||||
Thermal.commit_cooling_level(cls._thermal_info_dict)
|
||||
return
|
||||
|
||||
update_cooling_level_policy = ThermalPolicy()
|
||||
update_cooling_level_policy.conditions[UpdateCoolingLevelToMinCondition] = UpdateCoolingLevelToMinCondition()
|
||||
update_cooling_level_policy.actions[UpdateCoolingLevelToMinAction] = UpdateCoolingLevelToMinAction()
|
||||
cls._policy_dict['UpdateCoolingLevelPolicy'] = update_cooling_level_policy
|
||||
for policy in cls._policy_dict.values():
|
||||
if not cls._running:
|
||||
return
|
||||
try:
|
||||
print(policy.name)
|
||||
if policy.is_match(cls._thermal_info_dict):
|
||||
policy.do_action(cls._thermal_info_dict)
|
||||
except Exception as e:
|
||||
logger.log_error('Failed to run thermal policy {} - {}'.format(policy.name, repr(e)))
|
||||
# In case there is an exception, we put cooling level to max value
|
||||
Thermal.set_expect_cooling_level(MAX_COOLING_LEVEL)
|
||||
|
||||
Thermal.commit_cooling_level(cls._thermal_info_dict)
|
||||
|
@ -42,3 +42,14 @@ def auto_recover_mock():
|
||||
utils.read_str_from_file = origin_read_str_from_file
|
||||
utils.write_file = origin_write_file
|
||||
utils.read_float_from_file = origin_read_float_from_file
|
||||
|
||||
|
||||
@pytest.fixture(scope='function', autouse=True)
|
||||
def auto_reset_cooling_level():
|
||||
from sonic_platform.thermal import Thermal
|
||||
yield
|
||||
Thermal.expect_cooling_level = None
|
||||
Thermal.expect_cooling_state = None
|
||||
Thermal.last_set_cooling_level = None
|
||||
Thermal.last_set_cooling_state = None
|
||||
Thermal.last_set_psu_cooling_level = None
|
||||
|
@ -27,8 +27,7 @@
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
"type": "thermal.recover"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.set_speed",
|
||||
@ -45,8 +44,7 @@
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
"type": "thermal.recover"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.set_speed",
|
||||
@ -66,8 +64,7 @@
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "true"
|
||||
"type": "thermal.recover"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -18,14 +18,14 @@ import os
|
||||
import pytest
|
||||
import subprocess
|
||||
import sys
|
||||
from mock import call, MagicMock
|
||||
from mock import MagicMock, patch
|
||||
|
||||
test_path = os.path.dirname(os.path.abspath(__file__))
|
||||
modules_path = os.path.dirname(test_path)
|
||||
sys.path.insert(0, modules_path)
|
||||
|
||||
from sonic_platform import utils
|
||||
from sonic_platform.fan import Fan, PsuFan, COOLING_STATE_PATH
|
||||
from sonic_platform.fan import Fan, PsuFan
|
||||
from sonic_platform.fan_drawer import RealDrawer, VirtualDrawer
|
||||
from sonic_platform.psu import Psu
|
||||
|
||||
@ -100,64 +100,31 @@ class TestFan:
|
||||
fan.fan_drawer.get_presence = MagicMock(return_value=True)
|
||||
assert fan.get_presence() is True
|
||||
|
||||
def test_system_fan_set_speed(self):
|
||||
@patch('sonic_platform.utils.write_file')
|
||||
def test_system_fan_set_speed(self, mock_write_file):
|
||||
fan_drawer = RealDrawer(0)
|
||||
fan = Fan(2, fan_drawer, 1)
|
||||
fan.min_cooling_level = 2
|
||||
fan.set_cooling_level = MagicMock()
|
||||
utils.write_file = MagicMock()
|
||||
fan.set_speed(60)
|
||||
fan.set_cooling_level.assert_called_with(6, 6)
|
||||
utils.write_file.assert_called_with(fan.fan_speed_set_path, 153, raise_exception=True)
|
||||
mock_write_file.assert_called_with(fan.fan_speed_set_path, 153, raise_exception=True)
|
||||
|
||||
fan.min_cooling_level = 7
|
||||
fan.set_speed(60)
|
||||
fan.set_cooling_level.assert_called_with(7, 7)
|
||||
utils.write_file.assert_called_with(fan.fan_speed_set_path, 178, raise_exception=True)
|
||||
|
||||
def test_set_cooling_level(self):
|
||||
with pytest.raises(RuntimeError):
|
||||
Fan.set_cooling_level(11, 11)
|
||||
|
||||
utils.write_file = MagicMock()
|
||||
Fan.set_cooling_level(10, 10)
|
||||
calls = [call(COOLING_STATE_PATH, 20, raise_exception=True), call(COOLING_STATE_PATH, 10, raise_exception=True)]
|
||||
utils.write_file.assert_has_calls(calls)
|
||||
|
||||
utils.write_file = MagicMock(side_effect=IOError(''))
|
||||
with pytest.raises(RuntimeError):
|
||||
Fan.set_cooling_level(10, 10)
|
||||
|
||||
utils.write_file = MagicMock(side_effect=ValueError(''))
|
||||
with pytest.raises(RuntimeError):
|
||||
Fan.set_cooling_level(10, 10)
|
||||
|
||||
def test_get_cooling_level(self):
|
||||
utils.read_int_from_file = MagicMock()
|
||||
Fan.get_cooling_level()
|
||||
utils.read_int_from_file.assert_called_with(COOLING_STATE_PATH, raise_exception=True)
|
||||
|
||||
utils.read_int_from_file = MagicMock(side_effect=IOError(''))
|
||||
with pytest.raises(RuntimeError):
|
||||
Fan.get_cooling_level()
|
||||
|
||||
utils.read_int_from_file = MagicMock(side_effect=ValueError(''))
|
||||
with pytest.raises(RuntimeError):
|
||||
Fan.get_cooling_level()
|
||||
|
||||
def test_psu_fan_basic(self):
|
||||
@patch('sonic_platform.thermal.Thermal.get_cooling_level')
|
||||
@patch('sonic_platform.psu.Psu.get_presence')
|
||||
@patch('sonic_platform.psu.Psu.get_powergood_status')
|
||||
@patch('os.path.exists')
|
||||
def test_psu_fan_basic(self, mock_path_exists, mock_powergood, mock_presence, mock_cooling_level):
|
||||
mock_path_exists.return_value = False
|
||||
psu = Psu(0)
|
||||
fan = PsuFan(0, 1, psu)
|
||||
assert fan.get_direction() == Fan.FAN_DIRECTION_NOT_APPLICABLE
|
||||
assert fan.get_status() is True
|
||||
assert fan.get_presence() is False
|
||||
psu.get_presence = MagicMock(return_value=True)
|
||||
mock_presence.return_value = True
|
||||
assert fan.get_presence() is False
|
||||
psu.get_powergood_status = MagicMock(return_value=True)
|
||||
mock_powergood.return_value = True
|
||||
assert fan.get_presence() is False
|
||||
os.path.exists = MagicMock(return_value=True)
|
||||
mock_path_exists.return_value = True
|
||||
assert fan.get_presence() is True
|
||||
fan.get_cooling_level = MagicMock(return_value=7)
|
||||
mock_cooling_level.return_value = 7
|
||||
assert fan.get_target_speed() == 70
|
||||
|
||||
def test_psu_fan_set_speed(self):
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
import glob
|
||||
import os
|
||||
import pytest
|
||||
import sys
|
||||
if sys.version_info.major == 3:
|
||||
from unittest import mock
|
||||
@ -32,12 +33,13 @@ from sonic_platform.device_data import DeviceDataManager
|
||||
|
||||
|
||||
class TestThermal:
|
||||
@mock.patch('os.path.exists', mock.MagicMock(return_value=True))
|
||||
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_gearbox_count', mock.MagicMock(return_value=2))
|
||||
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_cpu_thermal_count', mock.MagicMock(return_value=2))
|
||||
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_platform_name', mock.MagicMock(return_value='x86_64-mlnx_msn2700-r0'))
|
||||
def test_chassis_thermal(self):
|
||||
from sonic_platform.thermal import THERMAL_NAMING_RULE
|
||||
os.path.exists = mock.MagicMock(return_value=True)
|
||||
DeviceDataManager.get_gearbox_count = mock.MagicMock(return_value=2)
|
||||
DeviceDataManager.get_cpu_thermal_count = mock.MagicMock(return_value=2)
|
||||
DeviceDataManager.get_platform_name = mock.MagicMock(return_value='x86_64-mlnx_msn2700-r0')
|
||||
chassis = Chassis()
|
||||
thermal_list = chassis.get_all_thermals()
|
||||
assert thermal_list
|
||||
@ -207,27 +209,35 @@ class TestThermal:
|
||||
|
||||
assert Thermal.set_thermal_algorithm_status(False)
|
||||
|
||||
def test_check_thermal_zone_temperature(self):
|
||||
from sonic_platform.thermal import Thermal, THERMAL_ZONE_FOLDER_WILDCARD, THERMAL_ZONE_THRESHOLD_FILE, THERMAL_ZONE_TEMP_FILE
|
||||
from sonic_platform import utils
|
||||
glob.iglob = mock.MagicMock(return_value=['thermal_zone1', 'thermal_zone2'])
|
||||
|
||||
utils.read_int_from_file = mock.MagicMock(side_effect=Exception(''))
|
||||
assert not Thermal.check_thermal_zone_temperature()
|
||||
@mock.patch('glob.iglob', mock.MagicMock(return_value=['thermal_zone1', 'thermal_zone2']))
|
||||
@mock.patch('sonic_platform.utils.read_int_from_file')
|
||||
def test_get_min_allowed_cooling_level_by_thermal_zone(self, mock_read_file):
|
||||
from sonic_platform.thermal import Thermal, THERMAL_ZONE_TEMP_FILE, THERMAL_ZONE_HIGH_THRESHOLD, THERMAL_ZONE_NORMAL_THRESHOLD, MIN_COOLING_LEVEL_FOR_HIGH, MIN_COOLING_LEVEL_FOR_NORMAL
|
||||
mock_read_file.side_effect = Exception('')
|
||||
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() is None
|
||||
|
||||
mock_file_content = {}
|
||||
def mock_read_int_from_file(file_path, default=0, raise_exception=False):
|
||||
return mock_file_content[file_path]
|
||||
|
||||
utils.read_int_from_file = mock_read_int_from_file
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_THRESHOLD_FILE)] = 25
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 30
|
||||
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_THRESHOLD_FILE)] = 25
|
||||
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_TEMP_FILE)] = 24
|
||||
assert not Thermal.check_thermal_zone_temperature()
|
||||
mock_read_file.side_effect = mock_read_int_from_file
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_NORMAL_THRESHOLD)] = 75000
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_HIGH_THRESHOLD)] = 85000
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 69000
|
||||
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_NORMAL_THRESHOLD)] = 75000
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_HIGH_THRESHOLD)] = 85000
|
||||
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_TEMP_FILE)] = 24000
|
||||
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() == MIN_COOLING_LEVEL_FOR_NORMAL
|
||||
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 71000
|
||||
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() == MIN_COOLING_LEVEL_FOR_HIGH
|
||||
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 79000
|
||||
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() == MIN_COOLING_LEVEL_FOR_HIGH
|
||||
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 81000
|
||||
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() is None
|
||||
|
||||
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 24
|
||||
assert Thermal.check_thermal_zone_temperature()
|
||||
|
||||
def test_check_module_temperature_trustable(self):
|
||||
from sonic_platform.thermal import Thermal
|
||||
@ -255,3 +265,47 @@ class TestThermal:
|
||||
mock_file_content[os.path.join(CHASSIS_THERMAL_SYSFS_FOLDER, 'fan_amb')] = 50
|
||||
mock_file_content[os.path.join(CHASSIS_THERMAL_SYSFS_FOLDER, 'port_amb')] = 40
|
||||
assert Thermal.get_min_amb_temperature() == 40
|
||||
|
||||
@mock.patch('sonic_platform.utils.write_file')
|
||||
def test_set_cooling_level(self, mock_write_file):
|
||||
from sonic_platform.thermal import Thermal, COOLING_STATE_PATH
|
||||
Thermal.set_cooling_level(10)
|
||||
calls = [mock.call(COOLING_STATE_PATH, 20, raise_exception=True)]
|
||||
mock_write_file.assert_has_calls(calls)
|
||||
|
||||
pre_call_count = mock_write_file.call_count
|
||||
Thermal.set_cooling_level(10)
|
||||
assert pre_call_count == mock_write_file.call_count
|
||||
|
||||
Thermal.set_cooling_level(9)
|
||||
calls = [mock.call(COOLING_STATE_PATH, 19, raise_exception=True)]
|
||||
mock_write_file.assert_has_calls(calls)
|
||||
|
||||
@mock.patch('sonic_platform.utils.write_file')
|
||||
def test_set_cooling_state(self, mock_write_file):
|
||||
from sonic_platform.thermal import Thermal, COOLING_STATE_PATH
|
||||
Thermal.set_cooling_state(10)
|
||||
calls = [mock.call(COOLING_STATE_PATH, 10, raise_exception=True)]
|
||||
mock_write_file.assert_has_calls(calls)
|
||||
|
||||
pre_call_count = mock_write_file.call_count
|
||||
Thermal.set_cooling_state(10)
|
||||
assert pre_call_count == mock_write_file.call_count
|
||||
|
||||
Thermal.set_cooling_state(9)
|
||||
calls = [mock.call(COOLING_STATE_PATH, 9, raise_exception=True)]
|
||||
mock_write_file.assert_has_calls(calls)
|
||||
|
||||
@mock.patch('sonic_platform.utils.read_int_from_file')
|
||||
def test_get_cooling_level(self, mock_read_file):
|
||||
from sonic_platform.thermal import Thermal, COOLING_STATE_PATH
|
||||
Thermal.get_cooling_level()
|
||||
mock_read_file.assert_called_with(COOLING_STATE_PATH, raise_exception=True)
|
||||
|
||||
mock_read_file.side_effect = IOError('')
|
||||
with pytest.raises(RuntimeError):
|
||||
Thermal.get_cooling_level()
|
||||
|
||||
mock_read_file.side_effect = ValueError('')
|
||||
with pytest.raises(RuntimeError):
|
||||
Thermal.get_cooling_level()
|
@ -18,7 +18,7 @@ import os
|
||||
import sys
|
||||
import pytest
|
||||
import json
|
||||
from mock import MagicMock
|
||||
from mock import MagicMock, patch
|
||||
from .mock_platform import MockChassis, MockFan, MockFanDrawer, MockPsu
|
||||
|
||||
test_path = os.path.dirname(os.path.abspath(__file__))
|
||||
@ -27,24 +27,10 @@ sys.path.insert(0, modules_path)
|
||||
|
||||
from sonic_platform.thermal_manager import ThermalManager
|
||||
from sonic_platform.thermal_infos import FanInfo, PsuInfo
|
||||
from sonic_platform.fan import Fan
|
||||
from sonic_platform.thermal import Thermal
|
||||
from sonic_platform.thermal import Thermal, MAX_COOLING_LEVEL
|
||||
from sonic_platform.device_data import DeviceDataManager
|
||||
|
||||
|
||||
@pytest.fixture(scope='module', autouse=True)
|
||||
def configure_mocks():
|
||||
check_thermal_zone_temperature = Thermal.check_thermal_zone_temperature
|
||||
set_thermal_algorithm_status = Thermal.set_thermal_algorithm_status
|
||||
Thermal.check_thermal_zone_temperature = MagicMock()
|
||||
Thermal.set_thermal_algorithm_status = MagicMock()
|
||||
|
||||
yield
|
||||
|
||||
Thermal.check_thermal_zone_temperature = check_thermal_zone_temperature
|
||||
Thermal.set_thermal_algorithm_status = set_thermal_algorithm_status
|
||||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
def thermal_manager():
|
||||
policy_file = os.path.join(test_path, 'thermal_policy.json')
|
||||
@ -113,51 +99,60 @@ def test_psu_info():
|
||||
assert not psu_info.is_status_changed()
|
||||
|
||||
|
||||
def test_fan_policy(thermal_manager):
|
||||
@patch('sonic_platform.thermal.Thermal.monitor_asic_themal_zone', MagicMock())
|
||||
@patch('sonic_platform.thermal.Thermal.get_cooling_level', MagicMock(return_value=6))
|
||||
@patch('sonic_platform.thermal.Thermal.get_min_allowed_cooling_level_by_thermal_zone', MagicMock(return_value=2))
|
||||
@patch('sonic_platform.thermal.Thermal.set_cooling_state')
|
||||
@patch('sonic_platform.thermal.Thermal.set_cooling_level')
|
||||
def test_fan_policy(mock_set_cooling_level, mock_set_cooling_state, thermal_manager):
|
||||
print('In test_fan_policy')
|
||||
from sonic_platform.thermal import MIN_COOLING_LEVEL_FOR_NORMAL
|
||||
chassis = MockChassis()
|
||||
chassis.make_fan_absence()
|
||||
chassis.get_all_fan_drawers()[0].get_all_fans().append(MockFan())
|
||||
chassis.platform_name = 'some_platform'
|
||||
thermal_manager.run_policy(chassis)
|
||||
|
||||
mock_set_cooling_level.assert_called_with(MAX_COOLING_LEVEL)
|
||||
mock_set_cooling_state.assert_called_with(MAX_COOLING_LEVEL)
|
||||
|
||||
Thermal.expect_cooling_level = None
|
||||
fan_list = chassis.get_all_fan_drawers()[0].get_all_fans()
|
||||
assert fan_list[1].speed == 100
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
|
||||
|
||||
fan_list[0].presence = True
|
||||
Thermal.check_thermal_zone_temperature = MagicMock(return_value=True)
|
||||
thermal_manager.run_policy(chassis)
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
|
||||
assert Thermal.check_thermal_zone_temperature.call_count == 2
|
||||
assert fan_list[0].speed == 60
|
||||
assert fan_list[1].speed == 60
|
||||
mock_set_cooling_level.assert_called_with(6)
|
||||
mock_set_cooling_state.assert_called_with(6)
|
||||
|
||||
Thermal.expect_cooling_level = None
|
||||
fan_list[0].status = False
|
||||
thermal_manager.run_policy(chassis)
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
|
||||
mock_set_cooling_level.assert_called_with(MAX_COOLING_LEVEL)
|
||||
|
||||
Thermal.expect_cooling_level = None
|
||||
fan_list[0].status = True
|
||||
Thermal.check_thermal_zone_temperature = MagicMock(return_value=False)
|
||||
thermal_manager.run_policy(chassis)
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
|
||||
assert Thermal.check_thermal_zone_temperature.call_count == 2
|
||||
assert fan_list[0].speed == 100
|
||||
assert fan_list[1].speed == 100
|
||||
mock_set_cooling_level.assert_called_with(6)
|
||||
mock_set_cooling_state.assert_called_with(6)
|
||||
|
||||
|
||||
def test_psu_policy(thermal_manager):
|
||||
@patch('sonic_platform.thermal.Thermal.monitor_asic_themal_zone', MagicMock())
|
||||
@patch('sonic_platform.thermal.Thermal.get_min_allowed_cooling_level_by_thermal_zone', MagicMock(return_value=2))
|
||||
@patch('sonic_platform.thermal.Thermal.get_cooling_level', MagicMock(return_value=6))
|
||||
@patch('sonic_platform.thermal.Thermal.set_cooling_state')
|
||||
@patch('sonic_platform.thermal.Thermal.set_cooling_level')
|
||||
def test_psu_policy(mock_set_cooling_level, mock_set_cooling_state, thermal_manager):
|
||||
chassis = MockChassis()
|
||||
chassis.make_psu_absence()
|
||||
chassis.fan_list.append(MockFan())
|
||||
chassis.platform_name = 'some_platform'
|
||||
thermal_manager.run_policy(chassis)
|
||||
|
||||
fan_list = chassis.get_all_fans()
|
||||
assert fan_list[0].speed == 100
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
|
||||
mock_set_cooling_level.assert_called_with(MAX_COOLING_LEVEL)
|
||||
mock_set_cooling_state.assert_called_with(MAX_COOLING_LEVEL)
|
||||
|
||||
psu_list = chassis.get_all_psus()
|
||||
psu_list[0].presence = True
|
||||
thermal_manager.run_policy(chassis)
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
|
||||
mock_set_cooling_level.assert_called_with(6)
|
||||
mock_set_cooling_state.assert_called_with(6)
|
||||
|
||||
|
||||
def test_any_fan_absence_condition():
|
||||
@ -328,6 +323,7 @@ def test_load_set_fan_speed_action():
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
|
||||
@patch('sonic_platform.thermal.Thermal.set_cooling_level', MagicMock())
|
||||
def test_execute_set_fan_speed_action():
|
||||
chassis = MockChassis()
|
||||
chassis.get_all_fan_drawers().append(MockFanDrawer())
|
||||
@ -337,85 +333,14 @@ def test_execute_set_fan_speed_action():
|
||||
fan_info = FanInfo()
|
||||
fan_info.collect(chassis)
|
||||
|
||||
Thermal.expect_cooling_level = None
|
||||
from sonic_platform.thermal_actions import SetAllFanSpeedAction
|
||||
action = SetAllFanSpeedAction()
|
||||
action.speed = 99
|
||||
action.speed = 20
|
||||
action.execute({'fan_info': fan_info})
|
||||
assert fan_list[0].speed == 99
|
||||
assert fan_list[1].speed == 99
|
||||
assert Thermal.expect_cooling_level == 2
|
||||
|
||||
|
||||
def test_load_control_thermal_algo_action():
|
||||
from sonic_platform.thermal_actions import ControlThermalAlgoAction
|
||||
action = ControlThermalAlgoAction()
|
||||
json_str = '{\"status\": \"false\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
action.load_from_json(json_obj)
|
||||
assert not action.status
|
||||
|
||||
json_str = '{\"status\": \"true\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
action.load_from_json(json_obj)
|
||||
assert action.status
|
||||
|
||||
json_str = '{\"status\": \"invalid\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
json_str = '{\"invalid\": \"true\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
def test_load_check_and_set_speed_action():
|
||||
from sonic_platform.thermal_actions import CheckAndSetAllFanSpeedAction
|
||||
action = CheckAndSetAllFanSpeedAction()
|
||||
json_str = '{\"speed\": \"40\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
action.load_from_json(json_obj)
|
||||
assert action.speed == 40
|
||||
|
||||
json_str = '{\"speed\": \"-1\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
json_str = '{\"speed\": \"101\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
json_str = '{\"invalid\": \"60\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
def test_execute_check_and_set_fan_speed_action():
|
||||
chassis = MockChassis()
|
||||
chassis.get_all_fan_drawers().append(MockFanDrawer())
|
||||
fan_list = chassis.get_all_fan_drawers()[0].get_all_fans()
|
||||
fan_list.append(MockFan())
|
||||
fan_list.append(MockFan())
|
||||
fan_info = FanInfo()
|
||||
fan_info.collect(chassis)
|
||||
Thermal.check_thermal_zone_temperature = MagicMock(return_value=True)
|
||||
|
||||
from sonic_platform.thermal_actions import CheckAndSetAllFanSpeedAction
|
||||
action = CheckAndSetAllFanSpeedAction()
|
||||
action.speed = 99
|
||||
action.execute({'fan_info': fan_info})
|
||||
assert fan_list[0].speed == 99
|
||||
assert fan_list[1].speed == 99
|
||||
|
||||
Thermal.check_thermal_zone_temperature = MagicMock(return_value=False)
|
||||
fan_list[0].speed = 100
|
||||
fan_list[1].speed = 100
|
||||
action.speed = 60
|
||||
action.execute({'fan_info': fan_info})
|
||||
assert fan_list[0].speed == 100
|
||||
assert fan_list[1].speed == 100
|
||||
|
||||
def test_load_duplicate_condition():
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
|
||||
with open(os.path.join(test_path, 'duplicate_condition.json')) as f:
|
||||
@ -497,54 +422,89 @@ def check_minimum_table_data(platform, minimum_table):
|
||||
assert cooling_level > previous_cooling_level
|
||||
previous_cooling_level = cooling_level
|
||||
|
||||
def test_dynamic_minimum_policy(thermal_manager):
|
||||
from sonic_platform.thermal_conditions import MinCoolingLevelChangeCondition
|
||||
from sonic_platform.thermal_actions import ChangeMinCoolingLevelAction
|
||||
from sonic_platform.thermal_infos import ChassisInfo, FanInfo
|
||||
from sonic_platform.thermal import Thermal
|
||||
from sonic_platform.fan import Fan
|
||||
ThermalManager.initialize()
|
||||
assert 'DynamicMinCoolingLevelPolicy' in thermal_manager._policy_dict
|
||||
policy = thermal_manager._policy_dict['DynamicMinCoolingLevelPolicy']
|
||||
assert MinCoolingLevelChangeCondition in policy.conditions
|
||||
assert ChangeMinCoolingLevelAction in policy.actions
|
||||
|
||||
condition = policy.conditions[MinCoolingLevelChangeCondition]
|
||||
action = policy.actions[ChangeMinCoolingLevelAction]
|
||||
Thermal.check_module_temperature_trustable = MagicMock(return_value='trust')
|
||||
Thermal.get_min_amb_temperature = MagicMock(return_value=35001)
|
||||
assert condition.is_match(None)
|
||||
assert MinCoolingLevelChangeCondition.trust_state == 'trust'
|
||||
assert MinCoolingLevelChangeCondition.temperature == 35
|
||||
assert not condition.is_match(None)
|
||||
|
||||
Thermal.check_module_temperature_trustable = MagicMock(return_value='untrust')
|
||||
assert condition.is_match(None)
|
||||
assert MinCoolingLevelChangeCondition.trust_state == 'untrust'
|
||||
|
||||
Thermal.get_min_amb_temperature = MagicMock(return_value=25999)
|
||||
assert condition.is_match(None)
|
||||
assert MinCoolingLevelChangeCondition.temperature == 25
|
||||
|
||||
@patch('sonic_platform.thermal.Thermal.monitor_asic_themal_zone', MagicMock())
|
||||
@patch('sonic_platform.device_data.DeviceDataManager.get_platform_name')
|
||||
@patch('sonic_platform.thermal.Thermal.get_min_allowed_cooling_level_by_thermal_zone')
|
||||
@patch('sonic_platform.thermal.Thermal.get_min_amb_temperature')
|
||||
@patch('sonic_platform.thermal.Thermal.check_module_temperature_trustable')
|
||||
def test_thermal_recover_policy(mock_check_trustable, mock_get_min_amb, moc_get_min_allowed, mock_platform_name):
|
||||
from sonic_platform.thermal_infos import ChassisInfo
|
||||
from sonic_platform.thermal_actions import ThermalRecoverAction
|
||||
chassis = MockChassis()
|
||||
mock_platform_name.return_value = 'invalid'
|
||||
info = ChassisInfo()
|
||||
info._chassis = chassis
|
||||
fan_info = FanInfo()
|
||||
thermal_info_dict = {ChassisInfo.INFO_NAME: info}
|
||||
|
||||
thermal_info_dict = {
|
||||
ChassisInfo.INFO_NAME: info,
|
||||
FanInfo.INFO_NAME: fan_info
|
||||
}
|
||||
DeviceDataManager.get_platform_name = MagicMock(return_value=None)
|
||||
Fan.get_cooling_level = MagicMock(return_value=5)
|
||||
Fan.set_cooling_level = MagicMock()
|
||||
Thermal.expect_cooling_level = None
|
||||
action = ThermalRecoverAction()
|
||||
moc_get_min_allowed.return_value = 2
|
||||
action.execute(thermal_info_dict)
|
||||
assert Fan.min_cooling_level == 6
|
||||
Fan.set_cooling_level.assert_called_with(6, 6)
|
||||
Fan.set_cooling_level.call_count = 0
|
||||
assert Thermal.expect_cooling_level == 6
|
||||
Thermal.last_set_cooling_level = Thermal.expect_cooling_level
|
||||
|
||||
DeviceDataManager.get_platform_name = MagicMock(return_value='x86_64-mlnx_msn2700-r0')
|
||||
print('Before execute')
|
||||
Thermal.expect_cooling_level = None
|
||||
mock_platform_name.return_value = 'x86_64-mlnx_msn2700-r0'
|
||||
mock_check_trustable.return_value = 'trust'
|
||||
mock_get_min_amb.return_value = 29999
|
||||
moc_get_min_allowed.return_value = None
|
||||
action.execute(thermal_info_dict)
|
||||
assert Fan.min_cooling_level == 3
|
||||
Fan.set_cooling_level.assert_called_with(3, 5)
|
||||
assert Thermal.expect_cooling_level is None
|
||||
|
||||
moc_get_min_allowed.return_value = 4
|
||||
action.execute(thermal_info_dict)
|
||||
assert Thermal.expect_cooling_level == 4
|
||||
Thermal.last_set_cooling_level = Thermal.expect_cooling_level
|
||||
|
||||
mock_check_trustable.return_value = 'untrust'
|
||||
mock_get_min_amb.return_value = 31001
|
||||
action.execute(thermal_info_dict)
|
||||
assert Thermal.expect_cooling_level == 5
|
||||
|
||||
|
||||
@patch('sonic_platform.thermal.Thermal.set_cooling_state')
|
||||
@patch('sonic_platform.utils.read_int_from_file')
|
||||
def test_monitor_asic_themal_zone(mock_read_int, mock_set_cooling_state):
|
||||
mock_read_int.side_effect = [111000, 105000]
|
||||
Thermal.monitor_asic_themal_zone()
|
||||
assert Thermal.expect_cooling_state == MAX_COOLING_LEVEL
|
||||
Thermal.commit_cooling_level({})
|
||||
mock_set_cooling_state.assert_called_with(MAX_COOLING_LEVEL)
|
||||
mock_read_int.reset()
|
||||
mock_read_int.side_effect = [104000, 105000]
|
||||
Thermal.monitor_asic_themal_zone()
|
||||
assert Thermal.expect_cooling_state is None
|
||||
|
||||
|
||||
def test_set_expect_cooling_level():
|
||||
Thermal.set_expect_cooling_level(5)
|
||||
assert Thermal.expect_cooling_level == 5
|
||||
|
||||
Thermal.set_expect_cooling_level(3)
|
||||
assert Thermal.expect_cooling_level == 5
|
||||
|
||||
Thermal.set_expect_cooling_level(10)
|
||||
assert Thermal.expect_cooling_level == 10
|
||||
|
||||
|
||||
@patch('sonic_platform.thermal.Thermal.commit_cooling_level', MagicMock())
|
||||
@patch('sonic_platform.thermal_conditions.AnyFanFaultCondition.is_match')
|
||||
@patch('sonic_platform.thermal_manager.ThermalManager._collect_thermal_information')
|
||||
@patch('sonic_platform.thermal.Thermal.set_expect_cooling_level')
|
||||
def test_run_policy(mock_expect, mock_collect_info, mock_match, thermal_manager):
|
||||
chassis = MockChassis()
|
||||
mock_collect_info.side_effect = Exception('')
|
||||
thermal_manager.run_policy(chassis)
|
||||
mock_expect.assert_called_with(MAX_COOLING_LEVEL)
|
||||
|
||||
mock_collect_info.side_effect = None
|
||||
mock_expect.reset_mock()
|
||||
mock_match.side_effect = Exception('')
|
||||
thermal_manager.run_policy(chassis)
|
||||
mock_expect.assert_called_with(MAX_COOLING_LEVEL)
|
||||
|
||||
thermal_manager.stop()
|
||||
mock_expect.reset_mock()
|
||||
thermal_manager.run_policy(chassis)
|
||||
assert mock_expect.call_count == 0
|
||||
|
||||
|
@ -23,10 +23,6 @@
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.set_speed",
|
||||
"speed": "100"
|
||||
@ -41,10 +37,6 @@
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.set_speed",
|
||||
"speed": "100"
|
||||
@ -59,10 +51,6 @@
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.set_speed",
|
||||
"speed": "100"
|
||||
@ -84,12 +72,8 @@
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"type": "thermal.recover",
|
||||
"status": "true"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.check_and_set_speed",
|
||||
"speed": "60"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user