[Mellanox] thermal control enhancement for dynamic minimum fan speed and PSU fan speed policy (#4403)
This commit is contained in:
parent
860cb265ac
commit
c730f3e207
@ -1,6 +1,6 @@
|
||||
{
|
||||
"thermal_control_algorithm": {
|
||||
"run_at_boot_up": "false",
|
||||
"run_at_boot_up": "true",
|
||||
"fan_speed_when_suspend": "60"
|
||||
},
|
||||
"info_types": [
|
||||
@ -51,6 +51,24 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "any fan broken",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "fan.any.fault"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.set_speed",
|
||||
"speed": "100"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "all fan and psu presence",
|
||||
"conditions": [
|
||||
@ -59,12 +77,15 @@
|
||||
},
|
||||
{
|
||||
"type": "psu.all.presence"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.good"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "fan.all.set_speed",
|
||||
"speed": "60"
|
||||
"type": "thermal_control.control",
|
||||
"status": "true"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -17,7 +17,8 @@ RUN apt-get update && \
|
||||
rrdtool \
|
||||
python-smbus \
|
||||
ethtool \
|
||||
dmidecode && \
|
||||
dmidecode \
|
||||
i2c-tools && \
|
||||
pip install enum34
|
||||
|
||||
{% if docker_platform_monitor_debs.strip() -%}
|
||||
|
@ -29,6 +29,7 @@ MAX_SELECT_DELAY = 3600
|
||||
MLNX_NUM_PSU = 2
|
||||
|
||||
GET_HWSKU_CMD = "sonic-cfggen -d -v DEVICE_METADATA.localhost.hwsku"
|
||||
GET_PLATFORM_CMD = "sonic-cfggen -d -v DEVICE_METADATA.localhost.platform"
|
||||
|
||||
EEPROM_CACHE_ROOT = '/var/cache/sonic/decode-syseeprom'
|
||||
EEPROM_CACHE_FILE = 'syseeprom_cache'
|
||||
@ -60,6 +61,7 @@ class Chassis(ChassisBase):
|
||||
|
||||
# Initialize SKU name
|
||||
self.sku_name = self._get_sku_name()
|
||||
self.platform_name = self._get_platform_name()
|
||||
mi = get_machine_info()
|
||||
if mi is not None:
|
||||
self.name = mi['onie_platform']
|
||||
@ -110,9 +112,9 @@ class Chassis(ChassisBase):
|
||||
|
||||
for index in range(num_of_fan):
|
||||
if multi_rotor_in_drawer:
|
||||
fan = Fan(has_fan_dir, index, index/2, False, self.sku_name)
|
||||
fan = Fan(has_fan_dir, index, index/2, False, self.platform_name)
|
||||
else:
|
||||
fan = Fan(has_fan_dir, index, index, False, self.sku_name)
|
||||
fan = Fan(has_fan_dir, index, index, False, self.platform_name)
|
||||
self._fan_list.append(fan)
|
||||
|
||||
|
||||
@ -245,6 +247,12 @@ class Chassis(ChassisBase):
|
||||
return out.rstrip('\n')
|
||||
|
||||
|
||||
def _get_platform_name(self):
|
||||
p = subprocess.Popen(GET_PLATFORM_CMD, shell=True, stdout=subprocess.PIPE)
|
||||
out, err = p.communicate()
|
||||
return out.rstrip('\n')
|
||||
|
||||
|
||||
def _get_port_position_tuple_by_sku_name(self):
|
||||
position_tuple = port_position_tuple_list[hwsku_dict_port[self.sku_name]]
|
||||
return position_tuple
|
||||
|
@ -0,0 +1,101 @@
|
||||
DEVICE_DATA = {
|
||||
'x86_64-mlnx_msn2700-r0': {
|
||||
'thermal': {
|
||||
'minimum_table': {
|
||||
"p2c_trust": {"-127:40":13, "41:120":15},
|
||||
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
|
||||
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
|
||||
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
|
||||
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
|
||||
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
|
||||
}
|
||||
}
|
||||
},
|
||||
'x86_64-mlnx_msn2740-r0': {
|
||||
'thermal': {
|
||||
'minimum_table': {
|
||||
"p2c_trust": {"-127:120":13},
|
||||
"p2c_untrust": {"-127:35":13, "36:40":14 , "41:120":15},
|
||||
"c2p_trust": {"-127:120":13},
|
||||
"c2p_untrust": {"-127:15":13, "16:30":14 , "31:35":15, "36:120":17},
|
||||
"unk_trust": {"-127:120":13},
|
||||
"unk_untrust": {"-127:15":13, "16:30":14 , "31:35":15, "36:120":17},
|
||||
}
|
||||
}
|
||||
},
|
||||
'x86_64-mlnx_msn2100-r0': {
|
||||
'thermal': {
|
||||
'minimum_table': {
|
||||
"p2c_trust": {"-127:120":12},
|
||||
"p2c_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16},
|
||||
"c2p_trust": {"-127:40":12, "41:120":13},
|
||||
"c2p_untrust": {"-127:40":12, "41:120":13},
|
||||
"unk_trust": {"-127:40":12, "41:120":13},
|
||||
"unk_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16}
|
||||
}
|
||||
}
|
||||
},
|
||||
'x86_64-mlnx_msn2410-r0': {
|
||||
'thermal': {
|
||||
'minimum_table': {
|
||||
"p2c_trust": {"-127:40":13, "41:120":15},
|
||||
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
|
||||
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
|
||||
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
|
||||
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
|
||||
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
|
||||
}
|
||||
}
|
||||
},
|
||||
'x86_64-mlnx_msn2010-r0': {
|
||||
'thermal': {
|
||||
'minimum_table': {
|
||||
"p2c_trust": {"-127:120":12},
|
||||
"p2c_untrust": {"-127:15":12, "16:20":13, "21:30":14, "31:35":15, "36:120":16},
|
||||
"c2p_trust": {"-127:120":12},
|
||||
"c2p_untrust": {"-127:20":12, "21:25":13 , "26:30":14, "31:35":15, "36:120":16},
|
||||
"unk_trust": {"-127:120":12},
|
||||
"unk_untrust": {"-127:15":12, "16:20":13 , "21:30":14, "31:35":15, "36:120":16}
|
||||
}
|
||||
}
|
||||
},
|
||||
'x86_64-mlnx_msn3700-r0': {
|
||||
'thermal': {
|
||||
'minimum_table': {
|
||||
"p2c_trust": {"-127:25":12, "26:40":13 , "41:120":14},
|
||||
"p2c_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
|
||||
"c2p_trust": {"-127:25":12, "26:40":13 , "41:120":14},
|
||||
"c2p_untrust": {"-127:25":12, "26:40":13 , "41:120":14},
|
||||
"unk_trust": {"-127:25":12, "26:40":13 , "41:120":14},
|
||||
"unk_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
|
||||
}
|
||||
}
|
||||
},
|
||||
'x86_64-mlnx_msn3700c-r0': {
|
||||
'thermal': {
|
||||
'minimum_table': {
|
||||
"p2c_trust": {"-127:25":12, "26:40":13 , "41:120":14},
|
||||
"p2c_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
|
||||
"c2p_trust": {"-127:25":12, "26:40":13 , "41:120":14},
|
||||
"c2p_untrust": {"-127:25":12, "26:40":13 , "41:120":14},
|
||||
"unk_trust": {"-127:25":12, "26:40":13 , "41:120":14},
|
||||
"unk_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
|
||||
}
|
||||
}
|
||||
},
|
||||
'x86_64-mlnx_msn3800-r0': {
|
||||
'thermal': {
|
||||
'minimum_table': {
|
||||
"p2c_trust": {"-127:35":12, "36:120":13},
|
||||
"p2c_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
|
||||
"c2p_trust": {"-127:30":12, "31:40":13 , "41:120":14},
|
||||
"c2p_untrust": {"-127:20":12, "21:30":13 , "31:35":14, "36:40":15, "41:120":16},
|
||||
"unk_trust": {"-127:30":12, "31:40":13 , "41:120":14},
|
||||
"unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
|
||||
}
|
||||
}
|
||||
},
|
||||
'x86_64-mlnx_msn4700-r0': {
|
||||
|
||||
}
|
||||
}
|
@ -9,6 +9,7 @@
|
||||
#############################################################################
|
||||
|
||||
import os.path
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
from sonic_platform_base.fan_base import FanBase
|
||||
@ -22,25 +23,34 @@ PWM_MAX = 255
|
||||
|
||||
FAN_PATH = "/var/run/hw-management/thermal/"
|
||||
LED_PATH = "/var/run/hw-management/led/"
|
||||
CONFIG_PATH = "/var/run/hw-management/config"
|
||||
# fan_dir isn't supported on Spectrum 1. It is supported on Spectrum 2 and later switches
|
||||
FAN_DIR = "/var/run/hw-management/system/fan_dir"
|
||||
COOLING_STATE_PATH = "/var/run/hw-management/thermal/cooling_cur_state"
|
||||
|
||||
# SKUs with unplugable FANs:
|
||||
# Platforms with unplugable FANs:
|
||||
# 1. don't have fanX_status and should be treated as always present
|
||||
hwsku_dict_with_unplugable_fan = ['ACS-MSN2010', 'ACS-MSN2100']
|
||||
platform_with_unplugable_fan = ['x86_64-mlnx_msn2010-r0', 'x86_64-mlnx_msn2100-r0']
|
||||
|
||||
|
||||
class Fan(FanBase):
|
||||
"""Platform-specific Fan class"""
|
||||
|
||||
STATUS_LED_COLOR_ORANGE = "orange"
|
||||
min_cooling_level = 2
|
||||
MIN_VALID_COOLING_LEVEL = 1
|
||||
MAX_VALID_COOLING_LEVEL = 10
|
||||
# PSU fan speed vector
|
||||
PSU_FAN_SPEED = ['0x3c', '0x3c', '0x3c', '0x3c', '0x3c',
|
||||
'0x3c', '0x3c', '0x46', '0x50', '0x5a', '0x64']
|
||||
|
||||
def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, sku = None):
|
||||
def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, platform = None):
|
||||
# API index is starting from 0, Mellanox platform index is starting from 1
|
||||
self.index = fan_index + 1
|
||||
self.drawer_index = drawer_index + 1
|
||||
|
||||
self.is_psu_fan = psu_fan
|
||||
self.always_presence = False if sku not in hwsku_dict_with_unplugable_fan else True
|
||||
self.always_presence = False if platform not in platform_with_unplugable_fan else True
|
||||
|
||||
self.fan_min_speed_path = "fan{}_min".format(self.index)
|
||||
if not self.is_psu_fan:
|
||||
@ -54,6 +64,10 @@ class Fan(FanBase):
|
||||
self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index)
|
||||
self._name = 'psu_{}_fan_{}'.format(self.index, 1)
|
||||
self.fan_max_speed_path = None
|
||||
self.psu_i2c_bus_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_bus'.format(self.index))
|
||||
self.psu_i2c_addr_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_addr'.format(self.index))
|
||||
self.psu_i2c_command_path = os.path.join(CONFIG_PATH, 'fan_command')
|
||||
|
||||
self.fan_status_path = "fan{}_fault".format(self.index)
|
||||
self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index)
|
||||
self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index)
|
||||
@ -90,7 +104,7 @@ class Fan(FanBase):
|
||||
|
||||
try:
|
||||
with open(os.path.join(self.fan_dir), 'r') as fan_dir:
|
||||
fan_dir_bits = int(fan_dir.read())
|
||||
fan_dir_bits = int(fan_dir.read().strip())
|
||||
fan_mask = 1 << self.drawer_index - 1
|
||||
if fan_dir_bits & fan_mask:
|
||||
return self.FAN_DIRECTION_INTAKE
|
||||
@ -116,7 +130,7 @@ class Fan(FanBase):
|
||||
else:
|
||||
try:
|
||||
with open(os.path.join(FAN_PATH, self.fan_status_path), 'r') as fault_status:
|
||||
status = int(fault_status.read())
|
||||
status = int(fault_status.read().strip())
|
||||
except (ValueError, IOError):
|
||||
status = 1
|
||||
|
||||
@ -142,7 +156,7 @@ class Fan(FanBase):
|
||||
else:
|
||||
try:
|
||||
with open(os.path.join(FAN_PATH, self.fan_presence_path), 'r') as presence_status:
|
||||
status = int(presence_status.read())
|
||||
status = int(presence_status.read().strip())
|
||||
except (ValueError, IOError):
|
||||
status = 0
|
||||
|
||||
@ -164,7 +178,7 @@ class Fan(FanBase):
|
||||
speed = 0
|
||||
try:
|
||||
with open(os.path.join(FAN_PATH, self.fan_max_speed_path), 'r') as max_fan_speed:
|
||||
speed = int(max_fan_speed.read())
|
||||
speed = int(max_fan_speed.read().strip())
|
||||
except (ValueError, IOError):
|
||||
speed = 0
|
||||
|
||||
@ -181,7 +195,7 @@ class Fan(FanBase):
|
||||
speed = 0
|
||||
try:
|
||||
with open(os.path.join(FAN_PATH, self.fan_speed_get_path), 'r') as fan_curr_speed:
|
||||
speed_in_rpm = int(fan_curr_speed.read())
|
||||
speed_in_rpm = int(fan_curr_speed.read().strip())
|
||||
except (ValueError, IOError):
|
||||
speed_in_rpm = 0
|
||||
|
||||
@ -210,7 +224,7 @@ class Fan(FanBase):
|
||||
|
||||
try:
|
||||
with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm:
|
||||
pwm = int(fan_pwm.read())
|
||||
pwm = int(fan_pwm.read().strip())
|
||||
except (ValueError, IOError):
|
||||
pwm = 0
|
||||
|
||||
@ -231,13 +245,34 @@ class Fan(FanBase):
|
||||
bool: True if set success, False if fail.
|
||||
"""
|
||||
status = True
|
||||
pwm = int(round(PWM_MAX*speed/100.0))
|
||||
|
||||
if self.is_psu_fan:
|
||||
#PSU fan speed is not setable.
|
||||
return False
|
||||
|
||||
from .thermal import logger
|
||||
try:
|
||||
with open(self.psu_i2c_bus_path, 'r') as f:
|
||||
bus = f.read().strip()
|
||||
with open(self.psu_i2c_addr_path, 'r') as f:
|
||||
addr = f.read().strip()
|
||||
with open(self.psu_i2c_command_path, 'r') as f:
|
||||
command = f.read().strip()
|
||||
speed = Fan.PSU_FAN_SPEED[int(speed / 10)]
|
||||
command = "i2cset -f -y {0} {1} {2} {3} wp".format(bus, addr, command, speed)
|
||||
subprocess.check_call(command, shell = True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as ce:
|
||||
logger.log_error('Failed to call command {}, return code={}, command output={}'.format(ce.cmd, ce.returncode, ce.output))
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.log_error('Failed to set PSU FAN speed - {}'.format(e))
|
||||
return False
|
||||
|
||||
try:
|
||||
cooling_level = int(speed / 10)
|
||||
if cooling_level < self.min_cooling_level:
|
||||
cooling_level = self.min_cooling_level
|
||||
speed = self.min_cooling_level * 10
|
||||
self.set_cooling_level(cooling_level, cooling_level)
|
||||
pwm = int(round(PWM_MAX*speed/100.0))
|
||||
with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'w') as fan_pwm:
|
||||
fan_pwm.write(str(pwm))
|
||||
except (ValueError, IOError):
|
||||
@ -352,3 +387,42 @@ class Fan(FanBase):
|
||||
"""
|
||||
# The tolerance value is fixed as 20% for all the Mellanox platform
|
||||
return 20
|
||||
|
||||
@classmethod
|
||||
def set_cooling_level(cls, level, cur_state):
|
||||
"""
|
||||
Change cooling level. The input level should be an integer value [1, 10].
|
||||
1 means 10%, 2 means 20%, 10 means 100%.
|
||||
"""
|
||||
if not isinstance(level, int):
|
||||
raise RuntimeError("Failed to set cooling level, input parameter must be integer")
|
||||
|
||||
if level < cls.MIN_VALID_COOLING_LEVEL or level > cls.MAX_VALID_COOLING_LEVEL:
|
||||
raise RuntimeError("Failed to set cooling level, level value must be in range [{}, {}], got {}".format(
|
||||
cls.MIN_VALID_COOLING_LEVEL,
|
||||
cls.MAX_VALID_COOLING_LEVEL,
|
||||
level
|
||||
))
|
||||
|
||||
try:
|
||||
# Reset FAN cooling level vector. According to low level team,
|
||||
# if we need set cooling level to X, we need first write a (10+X)
|
||||
# to cooling_cur_state file to reset the cooling level vector.
|
||||
with open(COOLING_STATE_PATH, 'w') as cooling_state:
|
||||
cooling_state.write(str(level + 10))
|
||||
|
||||
# We need set cooling level after resetting the cooling level vector
|
||||
with open(COOLING_STATE_PATH, 'w') as cooling_state:
|
||||
cooling_state.write(str(cur_state))
|
||||
except (ValueError, IOError) as e:
|
||||
raise RuntimeError("Failed to set cooling level - {}".format(e))
|
||||
|
||||
@classmethod
|
||||
def get_cooling_level(cls):
|
||||
try:
|
||||
with open(COOLING_STATE_PATH, 'r') as cooling_state:
|
||||
cooling_level = int(cooling_state.read().strip())
|
||||
return cooling_level
|
||||
except (ValueError, IOError) as e:
|
||||
raise RuntimeError("Failed to get cooling level - {}".format(e))
|
||||
|
||||
|
@ -101,7 +101,7 @@ class Psu(PsuBase):
|
||||
|
||||
# unplugable PSU has no FAN
|
||||
if sku not in hwsku_dict_with_unplugable_psu:
|
||||
fan = Fan(sku, psu_index, psu_index, True)
|
||||
fan = Fan(False, psu_index, psu_index, True)
|
||||
self._fan_list.append(fan)
|
||||
|
||||
self.psu_green_led_path = "led_psu_green"
|
||||
@ -121,7 +121,7 @@ class Psu(PsuBase):
|
||||
result = 0
|
||||
try:
|
||||
with open(filename, 'r') as fileobj:
|
||||
result = int(fileobj.read())
|
||||
result = int(fileobj.read().strip())
|
||||
except Exception as e:
|
||||
logger.log_info("Fail to read file {} due to {}".format(filename, repr(e)))
|
||||
return result
|
||||
|
@ -42,6 +42,16 @@ THERMAL_API_INVALID_HIGH_THRESHOLD = 0.0
|
||||
|
||||
HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/"
|
||||
|
||||
THERMAL_ZONE_ASIC_PATH = "/var/run/hw-management/thermal/mlxsw/"
|
||||
THERMAL_ZONE_MODULE_PATH = "/var/run/hw-management/thermal/mlxsw-module{}/"
|
||||
THERMAL_ZONE_GEARBOX_PATH = "/var/run/hw-management/thermal/mlxsw-gearbox{}/"
|
||||
THERMAL_ZONE_MODE = "thermal_zone_mode"
|
||||
THERMAL_ZONE_POLICY = "thermal_zone_policy"
|
||||
THERMAL_ZONE_TEMPERATURE = "thermal_zone_temp"
|
||||
THERMAL_ZONE_NORMAL_TEMPERATURE = "temp_trip_norm"
|
||||
|
||||
MODULE_TEMPERATURE_FAULT_PATH = "/var/run/hw-management/thermal/module{}_temp_fault"
|
||||
|
||||
thermal_api_handler_cpu_core = {
|
||||
THERMAL_API_GET_TEMPERATURE:"cpu_core{}",
|
||||
THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max",
|
||||
@ -262,6 +272,7 @@ def initialize_thermals(sku, thermal_list, psu_list):
|
||||
# create thermal objects for all categories of sensors
|
||||
tp_index = hwsku_dict_thermal[sku]
|
||||
thermal_profile = thermal_profile_list[tp_index]
|
||||
Thermal.thermal_profile = thermal_profile
|
||||
for category in thermal_device_categories_all:
|
||||
if category == THERMAL_DEV_CATEGORY_AMBIENT:
|
||||
count, ambient_list = thermal_profile[category]
|
||||
@ -290,6 +301,9 @@ def initialize_thermals(sku, thermal_list, psu_list):
|
||||
|
||||
|
||||
class Thermal(ThermalBase):
|
||||
thermal_profile = None
|
||||
thermal_algorithm_status = False
|
||||
|
||||
def __init__(self, category, index, has_index, dependency = None):
|
||||
"""
|
||||
index should be a string for category ambient and int for other categories
|
||||
@ -321,14 +335,15 @@ class Thermal(ThermalBase):
|
||||
return self.name
|
||||
|
||||
|
||||
def _read_generic_file(self, filename, len):
|
||||
@classmethod
|
||||
def _read_generic_file(cls, filename, len):
|
||||
"""
|
||||
Read a generic file, returns the contents of the file
|
||||
"""
|
||||
result = None
|
||||
try:
|
||||
with open(filename, 'r') as fileobj:
|
||||
result = fileobj.read()
|
||||
result = fileobj.read().strip()
|
||||
except Exception as e:
|
||||
logger.log_info("Fail to read file {} due to {}".format(filename, repr(e)))
|
||||
return result
|
||||
@ -420,3 +435,132 @@ class Thermal(ThermalBase):
|
||||
if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD:
|
||||
return None
|
||||
return value_float / 1000.0
|
||||
|
||||
|
||||
@classmethod
|
||||
def _write_generic_file(cls, filename, content):
|
||||
"""
|
||||
Generic functions to write content to a specified file path if
|
||||
the content has changed.
|
||||
"""
|
||||
try:
|
||||
with open(filename, 'w+') as file_obj:
|
||||
origin_content = file_obj.read()
|
||||
if origin_content != content:
|
||||
file_obj.write(content)
|
||||
except Exception as e:
|
||||
logger.log_info("Fail to write file {} due to {}".format(filename, repr(e)))
|
||||
|
||||
@classmethod
|
||||
def set_thermal_algorithm_status(cls, status, force=True):
|
||||
"""
|
||||
Enable/disable kernel thermal algorithm.
|
||||
When enable kernel thermal algorithm, kernel will adjust fan speed
|
||||
according to thermal zones temperature. Please note that kernel will
|
||||
only adjust fan speed when temperature across some "edge", e.g temperature
|
||||
changes to exceed high threshold.
|
||||
When disable kernel thermal algorithm, kernel no longer adjust fan speed.
|
||||
We usually disable the algorithm when we want to set a fix speed. E.g, when
|
||||
a fan unit is removed from system, we will set fan speed to 100% and disable
|
||||
the algorithm to avoid it adjust the speed.
|
||||
"""
|
||||
if not cls.thermal_profile:
|
||||
raise Exception("Fail to get thermal profile for this switch")
|
||||
|
||||
if not force and cls.thermal_algorithm_status == status:
|
||||
return
|
||||
|
||||
cls.thermal_algorithm_status = status
|
||||
content = "enabled" if status else "disabled"
|
||||
policy = "step_wise" if status else "user_space"
|
||||
cls._write_generic_file(join(THERMAL_ZONE_ASIC_PATH, THERMAL_ZONE_MODE), content)
|
||||
cls._write_generic_file(join(THERMAL_ZONE_ASIC_PATH, THERMAL_ZONE_POLICY), policy)
|
||||
|
||||
if THERMAL_DEV_CATEGORY_MODULE in cls.thermal_profile:
|
||||
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE]
|
||||
if count != 0:
|
||||
for index in range(count):
|
||||
cls._write_generic_file(join(THERMAL_ZONE_MODULE_PATH.format(start + index), THERMAL_ZONE_MODE), content)
|
||||
cls._write_generic_file(join(THERMAL_ZONE_MODULE_PATH.format(start + index), THERMAL_ZONE_POLICY), policy)
|
||||
|
||||
if THERMAL_DEV_CATEGORY_GEARBOX in cls.thermal_profile:
|
||||
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_GEARBOX]
|
||||
if count != 0:
|
||||
for index in range(count):
|
||||
cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_MODE), content)
|
||||
cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_POLICY), policy)
|
||||
|
||||
@classmethod
|
||||
def check_thermal_zone_temperature(cls):
|
||||
"""
|
||||
Check thermal zone current temperature with normal temperature
|
||||
|
||||
Returns:
|
||||
True if all thermal zones current temperature less or equal than normal temperature
|
||||
"""
|
||||
if not cls.thermal_profile:
|
||||
raise Exception("Fail to get thermal profile for this switch")
|
||||
|
||||
if not cls._check_thermal_zone_temperature(THERMAL_ZONE_ASIC_PATH):
|
||||
return False
|
||||
|
||||
if THERMAL_DEV_CATEGORY_MODULE in cls.thermal_profile:
|
||||
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE]
|
||||
if count != 0:
|
||||
for index in range(count):
|
||||
if not cls._check_thermal_zone_temperature(THERMAL_ZONE_MODULE_PATH.format(start + index)):
|
||||
return False
|
||||
|
||||
if THERMAL_DEV_CATEGORY_GEARBOX in cls.thermal_profile:
|
||||
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_GEARBOX]
|
||||
if count != 0:
|
||||
for index in range(count):
|
||||
if not cls._check_thermal_zone_temperature(THERMAL_ZONE_GEARBOX_PATH.format(start + index)):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _check_thermal_zone_temperature(cls, thermal_zone_path):
|
||||
normal_temp_path = join(thermal_zone_path, THERMAL_ZONE_NORMAL_TEMPERATURE)
|
||||
current_temp_path = join(thermal_zone_path, THERMAL_ZONE_TEMPERATURE)
|
||||
normal = None
|
||||
current = None
|
||||
try:
|
||||
with open(normal_temp_path, 'r') as file_obj:
|
||||
normal = float(file_obj.read())
|
||||
|
||||
with open(current_temp_path, 'r') as file_obj:
|
||||
current = float(file_obj.read())
|
||||
|
||||
return current <= normal
|
||||
except Exception as e:
|
||||
logger.log_info("Fail to check thermal zone temperature for file {} due to {}".format(thermal_zone_path, repr(e)))
|
||||
|
||||
@classmethod
|
||||
def check_module_temperature_trustable(cls):
|
||||
if not cls.thermal_profile:
|
||||
raise Exception("Fail to get thermal profile for this switch")
|
||||
|
||||
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE]
|
||||
for index in range(count):
|
||||
fault_file_path = MODULE_TEMPERATURE_FAULT_PATH.format(index + start)
|
||||
fault = cls._read_generic_file(fault_file_path, 0)
|
||||
if fault.strip() != '0':
|
||||
return 'untrust'
|
||||
return 'trust'
|
||||
|
||||
@classmethod
|
||||
def get_air_flow_direction(cls):
|
||||
fan_ambient_path = join(HW_MGMT_THERMAL_ROOT, THERMAL_DEV_FAN_AMBIENT)
|
||||
port_ambient_path = join(HW_MGMT_THERMAL_ROOT, THERMAL_DEV_PORT_AMBIENT)
|
||||
|
||||
# if there is any exception, let it raise
|
||||
fan_ambient_temp = int(cls._read_generic_file(fan_ambient_path, 0))
|
||||
port_ambient_temp = int(cls._read_generic_file(port_ambient_path, 0))
|
||||
if fan_ambient_temp > port_ambient_temp:
|
||||
return 'p2c', fan_ambient_temp
|
||||
elif fan_ambient_temp < port_ambient_temp:
|
||||
return 'c2p', port_ambient_temp
|
||||
else:
|
||||
return 'unk', fan_ambient_temp
|
||||
|
@ -1,5 +1,6 @@
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
|
||||
from .thermal import logger
|
||||
|
||||
|
||||
class SetFanSpeedAction(ThermalPolicyActionBase):
|
||||
@ -52,7 +53,38 @@ class SetAllFanSpeedAction(SetFanSpeedAction):
|
||||
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
|
||||
for fan in fan_info_obj.get_presence_fans():
|
||||
fan.set_speed(self.speed)
|
||||
logger.log_info('Set all system FAN speed to {}'.format(self.speed))
|
||||
|
||||
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, self.speed)
|
||||
|
||||
@classmethod
|
||||
def set_psu_fan_speed(cls, thermal_info_dict, speed):
|
||||
from .thermal_infos import ChassisInfo
|
||||
if ChassisInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ChassisInfo.INFO_NAME], ChassisInfo):
|
||||
chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis()
|
||||
for psu in chassis.get_all_psus():
|
||||
for psu_fan in psu.get_all_fans():
|
||||
psu_fan.set_speed(speed)
|
||||
|
||||
logger.log_info('Updated PSU FAN speed to {}%'.format(speed))
|
||||
|
||||
|
||||
|
||||
@thermal_json_object('fan.all.check_and_set_speed')
|
||||
class CheckAndSetAllFanSpeedAction(SetAllFanSpeedAction):
|
||||
"""
|
||||
Action to check thermal zone temperature and recover speed for all fans
|
||||
"""
|
||||
def execute(self, thermal_info_dict):
|
||||
"""
|
||||
Check thermal zone and set speed for all fans
|
||||
:param thermal_info_dict: A dictionary stores all thermal information.
|
||||
:return:
|
||||
"""
|
||||
from .thermal import Thermal
|
||||
if Thermal.check_thermal_zone_temperature():
|
||||
SetAllFanSpeedAction.execute(self, thermal_info_dict)
|
||||
|
||||
|
||||
@thermal_json_object('thermal_control.control')
|
||||
class ControlThermalAlgoAction(ThermalPolicyActionBase):
|
||||
@ -95,14 +127,80 @@ class ControlThermalAlgoAction(ThermalPolicyActionBase):
|
||||
:param thermal_info_dict: A dictionary stores all thermal information.
|
||||
:return:
|
||||
"""
|
||||
from .thermal_infos import FanInfo
|
||||
from .thermal import Thermal
|
||||
from .thermal_conditions import UpdateCoolingLevelToMinCondition
|
||||
from .fan import Fan
|
||||
Thermal.set_thermal_algorithm_status(self.status, False)
|
||||
if self.status:
|
||||
# Check thermal zone temperature, if all thermal zone temperature
|
||||
# back to normal, set it to minimum allowed speed to
|
||||
# save power
|
||||
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
|
||||
|
||||
logger.log_info('Changed thermal algorithm status to {}'.format(self.status))
|
||||
|
||||
|
||||
class ChangeMinCoolingLevelAction(ThermalPolicyActionBase):
|
||||
UNKNOWN_SKU_COOLING_LEVEL = 6
|
||||
def execute(self, thermal_info_dict):
|
||||
from .device_data import DEVICE_DATA
|
||||
from .fan import Fan
|
||||
from .thermal_infos import ChassisInfo
|
||||
if ChassisInfo.INFO_NAME in thermal_info_dict:
|
||||
chassis_info_obj = thermal_info_dict[ChassisInfo.INFO_NAME]
|
||||
chassis = chassis_info_obj.get_chassis()
|
||||
thermal_manager = chassis.get_thermal_manager()
|
||||
if self.status:
|
||||
thermal_manager.start_thermal_control_algorithm()
|
||||
else:
|
||||
thermal_manager.stop_thermal_control_algorithm()
|
||||
from .thermal_conditions import MinCoolingLevelChangeCondition
|
||||
from .thermal_conditions import UpdateCoolingLevelToMinCondition
|
||||
|
||||
chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis()
|
||||
if chassis.platform_name not in DEVICE_DATA or 'thermal' not in DEVICE_DATA[chassis.platform_name] or 'minimum_table' not in DEVICE_DATA[chassis.platform_name]['thermal']:
|
||||
Fan.min_cooling_level = ChangeMinCoolingLevelAction.UNKNOWN_SKU_COOLING_LEVEL
|
||||
else:
|
||||
air_flow_dir = MinCoolingLevelChangeCondition.air_flow_dir
|
||||
trust_state = MinCoolingLevelChangeCondition.trust_state
|
||||
temperature = MinCoolingLevelChangeCondition.temperature
|
||||
minimum_table = DEVICE_DATA[chassis.platform_name]['thermal']['minimum_table']['{}_{}'.format(air_flow_dir, trust_state)]
|
||||
|
||||
for key, cooling_level in minimum_table.items():
|
||||
temp_range = key.split(':')
|
||||
temp_min = int(temp_range[0].strip())
|
||||
temp_max = int(temp_range[1].strip())
|
||||
if temp_min <= temperature <= temp_max:
|
||||
Fan.min_cooling_level = cooling_level - 10
|
||||
break
|
||||
|
||||
current_cooling_level = Fan.get_cooling_level()
|
||||
if current_cooling_level < Fan.min_cooling_level:
|
||||
Fan.set_cooling_level(Fan.min_cooling_level, Fan.min_cooling_level)
|
||||
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, Fan.min_cooling_level * 10)
|
||||
else:
|
||||
Fan.set_cooling_level(Fan.min_cooling_level, current_cooling_level)
|
||||
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
|
||||
|
||||
logger.log_info('Changed minimum cooling level to {}'.format(Fan.min_cooling_level))
|
||||
|
||||
|
||||
class UpdatePsuFanSpeedAction(ThermalPolicyActionBase):
|
||||
def execute(self, thermal_info_dict):
|
||||
from .thermal_conditions import CoolingLevelChangeCondition
|
||||
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, CoolingLevelChangeCondition.cooling_level * 10)
|
||||
|
||||
|
||||
class UpdateCoolingLevelToMinAction(ThermalPolicyActionBase):
|
||||
def execute(self, thermal_info_dict):
|
||||
self.update_cooling_level_to_minimum(thermal_info_dict)
|
||||
|
||||
@classmethod
|
||||
def update_cooling_level_to_minimum(cls, thermal_info_dict):
|
||||
from .fan import Fan
|
||||
from .thermal import Thermal
|
||||
from .thermal_conditions import UpdateCoolingLevelToMinCondition
|
||||
from .thermal_infos import FanInfo
|
||||
if Thermal.check_thermal_zone_temperature():
|
||||
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
|
||||
speed = Fan.min_cooling_level * 10
|
||||
for fan in fan_info_obj.get_presence_fans():
|
||||
fan.set_speed(speed)
|
||||
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, speed)
|
||||
UpdateCoolingLevelToMinCondition.enable = False
|
||||
else:
|
||||
UpdateCoolingLevelToMinCondition.enable = True
|
||||
|
||||
|
@ -32,6 +32,20 @@ class AllFanPresenceCondition(FanCondition):
|
||||
return len(fan_info_obj.get_absence_fans()) == 0 if fan_info_obj else False
|
||||
|
||||
|
||||
@thermal_json_object('fan.any.fault')
|
||||
class AnyFanFaultCondition(FanCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
fan_info_obj = self.get_fan_info(thermal_info_dict)
|
||||
return len(fan_info_obj.get_fault_fans()) > 0 if fan_info_obj else False
|
||||
|
||||
|
||||
@thermal_json_object('fan.all.good')
|
||||
class AllFanGoodCondition(FanCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
fan_info_obj = self.get_fan_info(thermal_info_dict)
|
||||
return len(fan_info_obj.get_fault_fans()) == 0 if fan_info_obj else False
|
||||
|
||||
|
||||
class PsuCondition(ThermalPolicyConditionBase):
|
||||
def get_psu_info(self, thermal_info_dict):
|
||||
from .thermal_infos import PsuInfo
|
||||
@ -61,3 +75,57 @@ class AllPsuPresenceCondition(PsuCondition):
|
||||
psu_info_obj = self.get_psu_info(thermal_info_dict)
|
||||
return len(psu_info_obj.get_absence_psus()) == 0 if psu_info_obj else False
|
||||
|
||||
|
||||
class MinCoolingLevelChangeCondition(ThermalPolicyConditionBase):
|
||||
trust_state = None
|
||||
air_flow_dir = None
|
||||
temperature = None
|
||||
|
||||
def is_match(self, thermal_info_dict):
|
||||
from .thermal import Thermal
|
||||
|
||||
trust_state = Thermal.check_module_temperature_trustable()
|
||||
air_flow_dir, temperature = Thermal.get_air_flow_direction()
|
||||
temperature = temperature / 1000
|
||||
|
||||
change_cooling_level = False
|
||||
if trust_state != MinCoolingLevelChangeCondition.trust_state:
|
||||
MinCoolingLevelChangeCondition.trust_state = trust_state
|
||||
change_cooling_level = True
|
||||
|
||||
if air_flow_dir != MinCoolingLevelChangeCondition.air_flow_dir:
|
||||
MinCoolingLevelChangeCondition.air_flow_dir = air_flow_dir
|
||||
change_cooling_level = True
|
||||
|
||||
if temperature != MinCoolingLevelChangeCondition.temperature:
|
||||
MinCoolingLevelChangeCondition.temperature = temperature
|
||||
change_cooling_level = True
|
||||
|
||||
return change_cooling_level
|
||||
|
||||
|
||||
class CoolingLevelChangeCondition(ThermalPolicyConditionBase):
|
||||
cooling_level = None
|
||||
|
||||
def is_match(self, thermal_info_dict):
|
||||
from .fan import Fan
|
||||
current_cooling_level = Fan.get_cooling_level()
|
||||
if current_cooling_level != CoolingLevelChangeCondition.cooling_level:
|
||||
CoolingLevelChangeCondition.cooling_level = current_cooling_level
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class UpdateCoolingLevelToMinCondition(ThermalPolicyConditionBase):
|
||||
enable = False
|
||||
def is_match(self, thermal_info_dict):
|
||||
if not UpdateCoolingLevelToMinCondition.enable:
|
||||
return False
|
||||
|
||||
from .fan import Fan
|
||||
current_cooling_level = Fan.get_cooling_level()
|
||||
if current_cooling_level == Fan.min_cooling_level:
|
||||
UpdateCoolingLevelToMinCondition.enable = False
|
||||
return False
|
||||
return True
|
||||
|
@ -14,6 +14,7 @@ class FanInfo(ThermalPolicyInfoBase):
|
||||
def __init__(self):
|
||||
self._absence_fans = set()
|
||||
self._presence_fans = set()
|
||||
self._fault_fans = set()
|
||||
self._status_changed = False
|
||||
|
||||
def collect(self, chassis):
|
||||
@ -24,17 +25,27 @@ class FanInfo(ThermalPolicyInfoBase):
|
||||
"""
|
||||
self._status_changed = False
|
||||
for fan in chassis.get_all_fans():
|
||||
if fan.get_presence() and fan not in self._presence_fans:
|
||||
presence = fan.get_presence()
|
||||
status = fan.get_status()
|
||||
if presence and fan not in self._presence_fans:
|
||||
self._presence_fans.add(fan)
|
||||
self._status_changed = True
|
||||
if fan in self._absence_fans:
|
||||
self._absence_fans.remove(fan)
|
||||
elif not fan.get_presence() and fan not in self._absence_fans:
|
||||
elif not presence and fan not in self._absence_fans:
|
||||
self._absence_fans.add(fan)
|
||||
self._status_changed = True
|
||||
if fan in self._presence_fans:
|
||||
self._presence_fans.remove(fan)
|
||||
|
||||
if not status and fan not in self._fault_fans:
|
||||
self._fault_fans.add(fan)
|
||||
self._status_changed = True
|
||||
elif status and fan in self._fault_fans:
|
||||
self._fault_fans.remove(fan)
|
||||
self._status_changed = True
|
||||
|
||||
|
||||
def get_absence_fans(self):
|
||||
"""
|
||||
Retrieves absence fans
|
||||
@ -49,6 +60,13 @@ class FanInfo(ThermalPolicyInfoBase):
|
||||
"""
|
||||
return self._presence_fans
|
||||
|
||||
def get_fault_fans(self):
|
||||
"""
|
||||
Retrieves fault fans
|
||||
:return: A set of fault fans
|
||||
"""
|
||||
return self._fault_fans
|
||||
|
||||
def is_status_changed(self):
|
||||
"""
|
||||
Retrieves if the status of fan information changed
|
||||
|
@ -1,12 +1,29 @@
|
||||
import os
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
|
||||
from .thermal_actions import *
|
||||
from .thermal_conditions import *
|
||||
from .thermal_infos import *
|
||||
|
||||
|
||||
class ThermalManager(ThermalManagerBase):
|
||||
THERMAL_ALGORITHM_CONTROL_PATH = '/var/run/hw-management/config/suspend'
|
||||
@classmethod
|
||||
def initialize(cls):
|
||||
"""
|
||||
Initialize thermal manager, including register thermal condition types and thermal action types
|
||||
and any other vendor specific initialization.
|
||||
:return:
|
||||
"""
|
||||
cls._add_private_thermal_policy()
|
||||
|
||||
@classmethod
|
||||
def deinitialize(cls):
|
||||
"""
|
||||
Destroy thermal manager, including any vendor specific cleanup. The default behavior of this function
|
||||
is a no-op.
|
||||
:return:
|
||||
"""
|
||||
cls.start_thermal_control_algorithm()
|
||||
|
||||
@classmethod
|
||||
def start_thermal_control_algorithm(cls):
|
||||
@ -16,7 +33,8 @@ class ThermalManager(ThermalManagerBase):
|
||||
Returns:
|
||||
bool: True if set success, False if fail.
|
||||
"""
|
||||
cls._control_thermal_control_algorithm(False)
|
||||
from .thermal import Thermal
|
||||
Thermal.set_thermal_algorithm_status(True)
|
||||
|
||||
@classmethod
|
||||
def stop_thermal_control_algorithm(cls):
|
||||
@ -26,25 +44,22 @@ class ThermalManager(ThermalManagerBase):
|
||||
Returns:
|
||||
bool: True if set success, False if fail.
|
||||
"""
|
||||
cls._control_thermal_control_algorithm(True)
|
||||
from .thermal import Thermal
|
||||
Thermal.set_thermal_algorithm_status(False)
|
||||
|
||||
@classmethod
|
||||
def _control_thermal_control_algorithm(cls, suspend):
|
||||
"""
|
||||
Control thermal control algorithm
|
||||
def _add_private_thermal_policy(cls):
|
||||
dynamic_min_speed_policy = ThermalPolicy()
|
||||
dynamic_min_speed_policy.conditions[MinCoolingLevelChangeCondition] = MinCoolingLevelChangeCondition()
|
||||
dynamic_min_speed_policy.actions[ChangeMinCoolingLevelAction] = ChangeMinCoolingLevelAction()
|
||||
cls._policy_dict['DynamicMinCoolingLevelPolicy'] = dynamic_min_speed_policy
|
||||
|
||||
Args:
|
||||
suspend: Bool, indicate suspend the algorithm or not
|
||||
update_psu_fan_speed_policy = ThermalPolicy()
|
||||
update_psu_fan_speed_policy.conditions[CoolingLevelChangeCondition] = CoolingLevelChangeCondition()
|
||||
update_psu_fan_speed_policy.actions[UpdatePsuFanSpeedAction] = UpdatePsuFanSpeedAction()
|
||||
cls._policy_dict['UpdatePsuFanSpeedPolicy'] = update_psu_fan_speed_policy
|
||||
|
||||
Returns:
|
||||
bool: True if set success, False if fail.
|
||||
"""
|
||||
status = True
|
||||
write_value = 1 if suspend else 0
|
||||
try:
|
||||
with open(cls.THERMAL_ALGORITHM_CONTROL_PATH, 'w') as control_file:
|
||||
control_file.write(str(write_value))
|
||||
except (ValueError, IOError):
|
||||
status = False
|
||||
|
||||
return status
|
||||
update_cooling_level_policy = ThermalPolicy()
|
||||
update_cooling_level_policy.conditions[UpdateCoolingLevelToMinCondition] = UpdateCoolingLevelToMinCondition()
|
||||
update_cooling_level_policy.actions[UpdateCoolingLevelToMinAction] = UpdateCoolingLevelToMinAction()
|
||||
cls._policy_dict['UpdateCoolingLevelPolicy'] = update_cooling_level_policy
|
||||
|
@ -1,13 +1,20 @@
|
||||
class MockFan:
|
||||
speed = 60
|
||||
def __init__(self):
|
||||
self.presence = True
|
||||
self.speed = 60
|
||||
self.status = True
|
||||
|
||||
def get_presence(self):
|
||||
return self.presence
|
||||
|
||||
def set_speed(self, speed):
|
||||
self.speed = speed
|
||||
MockFan.speed = speed
|
||||
|
||||
def get_status(self):
|
||||
return self.status
|
||||
|
||||
def get_target_speed(self):
|
||||
return MockFan.speed
|
||||
|
||||
|
||||
class MockPsu:
|
||||
@ -21,6 +28,9 @@ class MockPsu:
|
||||
def get_powergood_status(self):
|
||||
return self.powergood
|
||||
|
||||
def get_all_fans(self):
|
||||
return []
|
||||
|
||||
|
||||
class MockChassis:
|
||||
def __init__(self):
|
||||
|
@ -11,6 +11,11 @@ sys.path.insert(0, modules_path)
|
||||
|
||||
from sonic_platform.thermal_manager import ThermalManager
|
||||
from sonic_platform.thermal_infos import FanInfo, PsuInfo
|
||||
from sonic_platform.fan import Fan
|
||||
from sonic_platform.thermal import Thermal
|
||||
|
||||
Thermal.check_thermal_zone_temperature = MagicMock()
|
||||
Thermal.set_thermal_algorithm_status = MagicMock()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
@ -27,6 +32,7 @@ def test_load_policy(thermal_manager):
|
||||
|
||||
assert 'any fan absence' in thermal_manager._policy_dict
|
||||
assert 'any psu absence' in thermal_manager._policy_dict
|
||||
assert 'any fan broken' in thermal_manager._policy_dict
|
||||
assert 'all fan and psu presence' in thermal_manager._policy_dict
|
||||
|
||||
assert thermal_manager._fan_speed_when_suspend == 60
|
||||
@ -40,6 +46,7 @@ def test_fan_info():
|
||||
fan_info.collect(chassis)
|
||||
assert len(fan_info.get_absence_fans()) == 1
|
||||
assert len(fan_info.get_presence_fans()) == 0
|
||||
assert len(fan_info.get_fault_fans()) == 0
|
||||
assert fan_info.is_status_changed()
|
||||
|
||||
fan_list = chassis.get_all_fans()
|
||||
@ -47,8 +54,15 @@ def test_fan_info():
|
||||
fan_info.collect(chassis)
|
||||
assert len(fan_info.get_absence_fans()) == 0
|
||||
assert len(fan_info.get_presence_fans()) == 1
|
||||
assert len(fan_info.get_fault_fans()) == 0
|
||||
assert fan_info.is_status_changed()
|
||||
|
||||
fan_list[0].status = False
|
||||
fan_info.collect(chassis)
|
||||
assert len(fan_info.get_absence_fans()) == 0
|
||||
assert len(fan_info.get_presence_fans()) == 1
|
||||
assert len(fan_info.get_fault_fans()) == 1
|
||||
assert fan_info.is_status_changed()
|
||||
|
||||
def test_psu_info():
|
||||
chassis = MockChassis()
|
||||
@ -77,35 +91,47 @@ def test_fan_policy(thermal_manager):
|
||||
chassis = MockChassis()
|
||||
chassis.make_fan_absence()
|
||||
chassis.fan_list.append(MockFan())
|
||||
thermal_manager.start_thermal_control_algorithm = MagicMock()
|
||||
thermal_manager.stop_thermal_control_algorithm = MagicMock()
|
||||
thermal_manager.run_policy(chassis)
|
||||
|
||||
fan_list = chassis.get_all_fans()
|
||||
assert fan_list[1].speed == 100
|
||||
thermal_manager.stop_thermal_control_algorithm.assert_called_once()
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
|
||||
|
||||
fan_list[0].presence = True
|
||||
Thermal.check_thermal_zone_temperature = MagicMock(return_value=True)
|
||||
thermal_manager.run_policy(chassis)
|
||||
thermal_manager.start_thermal_control_algorithm.assert_called_once()
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
|
||||
assert Thermal.check_thermal_zone_temperature.call_count == 2
|
||||
assert fan_list[0].speed == 60
|
||||
assert fan_list[1].speed == 60
|
||||
|
||||
fan_list[0].status = False
|
||||
thermal_manager.run_policy(chassis)
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
|
||||
|
||||
fan_list[0].status = True
|
||||
Thermal.check_thermal_zone_temperature = MagicMock(return_value=False)
|
||||
thermal_manager.run_policy(chassis)
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
|
||||
assert Thermal.check_thermal_zone_temperature.call_count == 2
|
||||
assert fan_list[0].speed == 100
|
||||
assert fan_list[1].speed == 100
|
||||
|
||||
|
||||
def test_psu_policy(thermal_manager):
|
||||
chassis = MockChassis()
|
||||
chassis.make_psu_absence()
|
||||
chassis.fan_list.append(MockFan())
|
||||
thermal_manager.start_thermal_control_algorithm = MagicMock()
|
||||
thermal_manager.stop_thermal_control_algorithm = MagicMock()
|
||||
thermal_manager.run_policy(chassis)
|
||||
|
||||
fan_list = chassis.get_all_fans()
|
||||
assert fan_list[0].speed == 100
|
||||
thermal_manager.stop_thermal_control_algorithm.assert_called_once()
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
|
||||
|
||||
psu_list = chassis.get_all_psus()
|
||||
psu_list[0].presence = True
|
||||
thermal_manager.run_policy(chassis)
|
||||
thermal_manager.start_thermal_control_algorithm.assert_called_once()
|
||||
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
|
||||
|
||||
|
||||
def test_any_fan_absence_condition():
|
||||
@ -159,6 +185,44 @@ def test_all_fan_presence_condition():
|
||||
fan_info.collect(chassis)
|
||||
assert condition.is_match({'fan_info': fan_info})
|
||||
|
||||
def test_any_fan_fault_condition():
|
||||
chassis = MockChassis()
|
||||
fan = MockFan()
|
||||
fan_list = chassis.get_all_fans()
|
||||
fan_list.append(fan)
|
||||
fault_fan = MockFan()
|
||||
fault_fan.status = False
|
||||
fan_list.append(fault_fan)
|
||||
fan_info = FanInfo()
|
||||
fan_info.collect(chassis)
|
||||
|
||||
from sonic_platform.thermal_conditions import AnyFanFaultCondition
|
||||
condition = AnyFanFaultCondition()
|
||||
assert condition.is_match({'fan_info': fan_info})
|
||||
|
||||
fault_fan.status = True
|
||||
fan_info.collect(chassis)
|
||||
assert not condition.is_match({'fan_info': fan_info})
|
||||
|
||||
def test_all_fan_good_condition():
|
||||
chassis = MockChassis()
|
||||
fan = MockFan()
|
||||
fan_list = chassis.get_all_fans()
|
||||
fan_list.append(fan)
|
||||
fault_fan = MockFan()
|
||||
fault_fan.status = False
|
||||
fan_list.append(fault_fan)
|
||||
fan_info = FanInfo()
|
||||
fan_info.collect(chassis)
|
||||
|
||||
from sonic_platform.thermal_conditions import AllFanGoodCondition
|
||||
condition = AllFanGoodCondition()
|
||||
assert not condition.is_match({'fan_info': fan_info})
|
||||
|
||||
fault_fan.status = True
|
||||
fan_info.collect(chassis)
|
||||
assert condition.is_match({'fan_info': fan_info})
|
||||
|
||||
|
||||
def test_any_psu_absence_condition():
|
||||
chassis = MockChassis()
|
||||
@ -275,6 +339,53 @@ def test_load_control_thermal_algo_action():
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
def test_load_check_and_set_speed_action():
|
||||
from sonic_platform.thermal_actions import CheckAndSetAllFanSpeedAction
|
||||
action = CheckAndSetAllFanSpeedAction()
|
||||
json_str = '{\"speed\": \"40\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
action.load_from_json(json_obj)
|
||||
assert action.speed == 40
|
||||
|
||||
json_str = '{\"speed\": \"-1\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
json_str = '{\"speed\": \"101\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
json_str = '{\"invalid\": \"60\"}'
|
||||
json_obj = json.loads(json_str)
|
||||
with pytest.raises(ValueError):
|
||||
action.load_from_json(json_obj)
|
||||
|
||||
def test_execute_check_and_set_fan_speed_action():
|
||||
chassis = MockChassis()
|
||||
fan_list = chassis.get_all_fans()
|
||||
fan_list.append(MockFan())
|
||||
fan_list.append(MockFan())
|
||||
fan_info = FanInfo()
|
||||
fan_info.collect(chassis)
|
||||
Thermal.check_thermal_zone_temperature = MagicMock(return_value=True)
|
||||
|
||||
from sonic_platform.thermal_actions import CheckAndSetAllFanSpeedAction
|
||||
action = CheckAndSetAllFanSpeedAction()
|
||||
action.speed = 99
|
||||
action.execute({'fan_info': fan_info})
|
||||
assert fan_list[0].speed == 99
|
||||
assert fan_list[1].speed == 99
|
||||
|
||||
Thermal.check_thermal_zone_temperature = MagicMock(return_value=False)
|
||||
fan_list[0].speed = 100
|
||||
fan_list[1].speed = 100
|
||||
action.speed = 60
|
||||
action.execute({'fan_info': fan_info})
|
||||
assert fan_list[0].speed == 100
|
||||
assert fan_list[1].speed == 100
|
||||
|
||||
def test_load_duplicate_condition():
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
|
||||
with open(os.path.join(test_path, 'duplicate_condition.json')) as f:
|
||||
@ -315,4 +426,94 @@ def test_load_policy_with_same_conditions():
|
||||
with pytest.raises(Exception):
|
||||
MockThermalManager.load(os.path.join(test_path, 'policy_with_same_conditions.json'))
|
||||
|
||||
def test_dynamic_minimum_table_data():
|
||||
from sonic_platform.device_data import DEVICE_DATA
|
||||
for platform, platform_data in DEVICE_DATA.items():
|
||||
if 'thermal' in platform_data and 'minimum_table' in platform_data['thermal']:
|
||||
minimum_table = platform_data['thermal']['minimum_table']
|
||||
check_minimum_table_data(platform, minimum_table)
|
||||
|
||||
def check_minimum_table_data(platform, minimum_table):
|
||||
valid_dir = ['p2c', 'c2p', 'unk']
|
||||
valid_trust_state = ['trust', 'untrust']
|
||||
|
||||
for category, data in minimum_table.items():
|
||||
key_data = category.split('_')
|
||||
assert key_data[0] in valid_dir
|
||||
assert key_data[1] in valid_trust_state
|
||||
|
||||
data_list = [(value, key) for key, value in data.items()]
|
||||
data_list.sort(key=lambda x : x[0])
|
||||
|
||||
previous_edge = None
|
||||
previous_cooling_level = None
|
||||
for item in data_list:
|
||||
cooling_level = item[0]
|
||||
range_str = item[1]
|
||||
|
||||
ranges = range_str.split(':')
|
||||
low = int(ranges[0])
|
||||
high = int(ranges[1])
|
||||
assert low < high
|
||||
|
||||
if previous_edge is None:
|
||||
assert low == -127
|
||||
else:
|
||||
assert low - previous_edge == 1, '{}-{}-{} error, item={}'.format(platform, key_data[0], key_data[1], item)
|
||||
previous_edge = high
|
||||
|
||||
assert 10 <= cooling_level <= 20
|
||||
if previous_cooling_level is not None:
|
||||
assert cooling_level > previous_cooling_level
|
||||
previous_cooling_level = cooling_level
|
||||
|
||||
def test_dynamic_minimum_policy(thermal_manager):
|
||||
from sonic_platform.thermal_conditions import MinCoolingLevelChangeCondition
|
||||
from sonic_platform.thermal_actions import ChangeMinCoolingLevelAction
|
||||
from sonic_platform.thermal_infos import ChassisInfo
|
||||
from sonic_platform.thermal import Thermal
|
||||
from sonic_platform.fan import Fan
|
||||
ThermalManager.initialize()
|
||||
assert 'DynamicMinCoolingLevelPolicy' in thermal_manager._policy_dict
|
||||
policy = thermal_manager._policy_dict['DynamicMinCoolingLevelPolicy']
|
||||
assert MinCoolingLevelChangeCondition in policy.conditions
|
||||
assert ChangeMinCoolingLevelAction in policy.actions
|
||||
|
||||
condition = policy.conditions[MinCoolingLevelChangeCondition]
|
||||
action = policy.actions[ChangeMinCoolingLevelAction]
|
||||
Thermal.check_module_temperature_trustable = MagicMock(return_value='trust')
|
||||
Thermal.get_air_flow_direction = MagicMock(return_value=('p2c', 35000))
|
||||
assert condition.is_match(None)
|
||||
assert MinCoolingLevelChangeCondition.trust_state == 'trust'
|
||||
assert MinCoolingLevelChangeCondition.air_flow_dir == 'p2c'
|
||||
assert MinCoolingLevelChangeCondition.temperature == 35
|
||||
assert not condition.is_match(None)
|
||||
|
||||
Thermal.check_module_temperature_trustable = MagicMock(return_value='untrust')
|
||||
assert condition.is_match(None)
|
||||
assert MinCoolingLevelChangeCondition.trust_state == 'untrust'
|
||||
|
||||
Thermal.get_air_flow_direction = MagicMock(return_value=('c2p', 35000))
|
||||
assert condition.is_match(None)
|
||||
assert MinCoolingLevelChangeCondition.air_flow_dir == 'c2p'
|
||||
|
||||
Thermal.get_air_flow_direction = MagicMock(return_value=('c2p', 25000))
|
||||
assert condition.is_match(None)
|
||||
assert MinCoolingLevelChangeCondition.temperature == 25
|
||||
|
||||
chassis = MockChassis()
|
||||
chassis.platform_name = 'invalid'
|
||||
info = ChassisInfo()
|
||||
info._chassis = chassis
|
||||
thermal_info_dict = {ChassisInfo.INFO_NAME: info}
|
||||
Fan.get_cooling_level = MagicMock(return_value=5)
|
||||
Fan.set_cooling_level = MagicMock()
|
||||
action.execute(thermal_info_dict)
|
||||
assert Fan.min_cooling_level == 6
|
||||
Fan.set_cooling_level.assert_called_with(6, 6)
|
||||
Fan.set_cooling_level.call_count = 0
|
||||
|
||||
chassis.platform_name = 'x86_64-mlnx_msn2700-r0'
|
||||
action.execute(thermal_info_dict)
|
||||
assert Fan.min_cooling_level == 4
|
||||
Fan.set_cooling_level.assert_called_with(4, 5)
|
||||
|
@ -51,6 +51,24 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "any fan broken",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "fan.any.fault"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.set_speed",
|
||||
"speed": "100"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "all fan and psu presence",
|
||||
"conditions": [
|
||||
@ -59,12 +77,19 @@
|
||||
},
|
||||
{
|
||||
"type": "psu.all.presence"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.good"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "true"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.check_and_set_speed",
|
||||
"speed": "60"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user