Add thermal control support for SONiC (#3949)

This commit is contained in:
Junchao-Mellanox 2020-03-10 01:41:10 +08:00 committed by GitHub
parent 67f520ff5b
commit be549db395
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 1021 additions and 15 deletions

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1,72 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "false",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "60"
}
]
}
]
}

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -86,3 +86,14 @@ stdout_logfile=syslog
stderr_logfile=syslog stderr_logfile=syslog
startsecs=0 startsecs=0
{% endif %} {% endif %}
{% if not skip_thermalctld %}
[program:thermalctld]
command=/usr/bin/thermalctld
priority=9
autostart=false
autorestart=true
stdout_logfile=syslog
stderr_logfile=syslog
startsecs=0
{% endif %}

View File

@ -71,3 +71,7 @@ supervisorctl start psud
supervisorctl start syseepromd supervisorctl start syseepromd
{% endif %} {% endif %}
{% if not skip_thermalctld %}
supervisorctl start thermalctld
{% endif %}

View File

@ -0,0 +1,31 @@
From 76b02916794be2e2558fcff1d11609a594f633d7 Mon Sep 17 00:00:00 2001
From: Stephen Sun <stephens@mellanox.com>
Date: Fri, 14 Feb 2020 13:48:00 +0800
Subject: [PATCH] Disable thermal policy running in hw-mgmt service SONiC
thermal control algorithm has been supported.
Signed-off-by: Stephen Sun <stephens@mellanox.com>
---
usr/usr/bin/hw-management.sh | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
index 2cdbfb2..48b41d5 100755
--- a/usr/usr/bin/hw-management.sh
+++ b/usr/usr/bin/hw-management.sh
@@ -799,7 +799,11 @@ do_start()
#disabled for leopard chipless bringup.
echo 1 > $config_path/suspend
- $THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
+#
+# Disable thermal control algorithm in hw-management service
+# because there has already been that in SONiC
+#
+# $THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
}
do_stop()
--
1.9.1

View File

@ -3,6 +3,7 @@
SONIC_PLATFORM_API_PY2 = mlnx_platform_api-1.0-py2-none-any.whl SONIC_PLATFORM_API_PY2 = mlnx_platform_api-1.0-py2-none-any.whl
$(SONIC_PLATFORM_API_PY2)_SRC_PATH = $(PLATFORM_PATH)/mlnx-platform-api $(SONIC_PLATFORM_API_PY2)_SRC_PATH = $(PLATFORM_PATH)/mlnx-platform-api
$(SONIC_PLATFORM_API_PY2)_PYTHON_VERSION = 2 $(SONIC_PLATFORM_API_PY2)_PYTHON_VERSION = 2
$(SONIC_PLATFORM_API_PY2)_DEPENDS = $(SONIC_PLATFORM_COMMON_PY2) $(SONIC_DAEMON_BASE_PY2) $(SONIC_CONFIG_ENGINE)
SONIC_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2) SONIC_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2)
export mlnx_platform_api_py2_wheel_path="$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY2))" export mlnx_platform_api_py2_wheel_path="$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY2))"

View File

@ -0,0 +1,2 @@
*.pyc
.cache/

View File

@ -0,0 +1,3 @@
[pytest]
filterwarnings =
ignore::DeprecationWarning

View File

@ -0,0 +1,2 @@
[aliases]
test=pytest

View File

@ -12,6 +12,14 @@ setup(
maintainer_email='kevinw@mellanox.com', maintainer_email='kevinw@mellanox.com',
packages=[ packages=[
'sonic_platform', 'sonic_platform',
'tests'
],
setup_requires= [
'pytest-runner'
],
tests_require = [
'pytest',
'mock>=2.0.0'
], ],
classifiers=[ classifiers=[
'Development Status :: 3 - Alpha', 'Development Status :: 3 - Alpha',
@ -26,5 +34,6 @@ setup(
'Topic :: Utilities', 'Topic :: Utilities',
], ],
keywords='sonic SONiC platform PLATFORM', keywords='sonic SONiC platform PLATFORM',
test_suite='setup.get_test_suite'
) )

View File

@ -1,2 +1,2 @@
__all__ = ["platform", "chassis"] __all__ = ["platform", "chassis"]
from sonic_platform import * from sonic_platform import *

View File

@ -470,3 +470,8 @@ class Chassis(ChassisBase):
return True, {'sfp':port_dict} return True, {'sfp':port_dict}
else: else:
return True, {'sfp':{}} return True, {'sfp':{}}
def get_thermal_manager(self):
from .thermal_manager import ThermalManager
return ThermalManager

View File

@ -43,10 +43,12 @@ class Fan(FanBase):
self.fan_speed_set_path = "fan{}_speed_set".format(self.index) self.fan_speed_set_path = "fan{}_speed_set".format(self.index)
self.fan_presence_path = "fan{}_status".format(self.drawer_index) self.fan_presence_path = "fan{}_status".format(self.drawer_index)
self.fan_max_speed_path = "fan{}_max".format(self.index) self.fan_max_speed_path = "fan{}_max".format(self.index)
self._name = "fan{}".format(fan_index + 1)
else: else:
self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index) self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index)
self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index) self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index)
self.fan_max_speed_path = "psu{}_max".format(self.index) self._name = 'psu_{}_fan_{}'.format(self.index, fan_index)
self.fan_max_speed_path = None
self.fan_status_path = "fan{}_fault".format(self.index) self.fan_status_path = "fan{}_fault".format(self.index)
self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index) self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index)
self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index) self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index)
@ -93,6 +95,9 @@ class Fan(FanBase):
raise RuntimeError("Failed to read fan direction status to {}".format(repr(e))) raise RuntimeError("Failed to read fan direction status to {}".format(repr(e)))
def get_name(self):
return self._name
def get_status(self): def get_status(self):
""" """
Retrieves the operational status of fan Retrieves the operational status of fan
@ -171,7 +176,11 @@ class Fan(FanBase):
speed_in_rpm = int(fan_curr_speed.read()) speed_in_rpm = int(fan_curr_speed.read())
except (ValueError, IOError): except (ValueError, IOError):
speed_in_rpm = 0 speed_in_rpm = 0
if self.fan_max_speed_path is None:
# in case of max speed unsupported, we just return speed in unit of RPM.
return speed_in_rpm
max_speed_in_rpm = self._get_max_speed_in_rpm() max_speed_in_rpm = self._get_max_speed_in_rpm()
speed = 100*speed_in_rpm/max_speed_in_rpm speed = 100*speed_in_rpm/max_speed_in_rpm
@ -185,11 +194,10 @@ class Fan(FanBase):
Returns: Returns:
int: percentage of the max fan speed int: percentage of the max fan speed
""" """
speed = 0
if self.is_psu_fan: if self.is_psu_fan:
# Not like system fan, psu fan speed can not be modified, so target speed is N/A # Not like system fan, psu fan speed can not be modified, so target speed is N/A
return speed return self.get_speed()
try: try:
with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm: with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm:
pwm = int(fan_pwm.read()) pwm = int(fan_pwm.read())

View File

@ -24,6 +24,7 @@ class Platform(PlatformBase):
self._chassis.initialize_psu() self._chassis.initialize_psu()
self._chassis.initialize_fan() self._chassis.initialize_fan()
self._chassis.initialize_eeprom() self._chassis.initialize_eeprom()
self._chassis.initialize_thermals()
def _is_host(self): def _is_host(self):
""" """

View File

@ -68,6 +68,7 @@ class Psu(PsuBase):
psu_oper_status = "thermal/psu{}_pwr_status".format(self.index) psu_oper_status = "thermal/psu{}_pwr_status".format(self.index)
#psu_oper_status should always be present for all SKUs #psu_oper_status should always be present for all SKUs
self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status) self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status)
self._name = "PSU{}".format(psu_index + 1)
if sku in hwsku_dict_psu: if sku in hwsku_dict_psu:
filemap = psu_profile_list[hwsku_dict_psu[sku]] filemap = psu_profile_list[hwsku_dict_psu[sku]]
@ -100,7 +101,10 @@ class Psu(PsuBase):
fan = Fan(sku, psu_index, psu_index, True) fan = Fan(sku, psu_index, psu_index, True)
if fan.get_presence(): if fan.get_presence():
self._fan = fan self._fan_list.append(fan)
def get_name(self):
return self._name
self.psu_green_led_path = "led_psu_green" self.psu_green_led_path = "led_psu_green"
self.psu_red_led_path = "led_psu_red" self.psu_red_led_path = "led_psu_red"

View File

@ -36,24 +36,31 @@ THERMAL_DEV_BOARD_AMBIENT = "board_amb"
THERMAL_API_GET_TEMPERATURE = "get_temperature" THERMAL_API_GET_TEMPERATURE = "get_temperature"
THERMAL_API_GET_HIGH_THRESHOLD = "get_high_threshold" THERMAL_API_GET_HIGH_THRESHOLD = "get_high_threshold"
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD = "get_high_critical_threshold"
THERMAL_API_INVALID_HIGH_THRESHOLD = 0.0
HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/" HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/"
thermal_api_handler_cpu_core = { thermal_api_handler_cpu_core = {
THERMAL_API_GET_TEMPERATURE:"cpu_core{}", THERMAL_API_GET_TEMPERATURE:"cpu_core{}",
THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max" THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max",
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_core{}_crit"
} }
thermal_api_handler_cpu_pack = { thermal_api_handler_cpu_pack = {
THERMAL_API_GET_TEMPERATURE:"cpu_pack", THERMAL_API_GET_TEMPERATURE:"cpu_pack",
THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max" THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max",
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_pack_crit"
} }
thermal_api_handler_module = { thermal_api_handler_module = {
THERMAL_API_GET_TEMPERATURE:"module{}_temp_input", THERMAL_API_GET_TEMPERATURE:"module{}_temp_input",
THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit" THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit",
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"module{}_temp_emergency"
} }
thermal_api_handler_psu = { thermal_api_handler_psu = {
THERMAL_API_GET_TEMPERATURE:"psu{}_temp", THERMAL_API_GET_TEMPERATURE:"psu{}_temp",
THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max" THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max",
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:None
} }
thermal_api_handler_gearbox = { thermal_api_handler_gearbox = {
THERMAL_API_GET_TEMPERATURE:"gearbox{}_temp_input", THERMAL_API_GET_TEMPERATURE:"gearbox{}_temp_input",
@ -233,6 +240,7 @@ thermal_profile_list = [
}, },
] ]
def initialize_thermals(sku, thermal_list, psu_list): def initialize_thermals(sku, thermal_list, psu_list):
# create thermal objects for all categories of sensors # create thermal objects for all categories of sensors
tp_index = hwsku_dict_thermal[sku] tp_index = hwsku_dict_thermal[sku]
@ -262,6 +270,8 @@ def initialize_thermals(sku, thermal_list, psu_list):
thermal = Thermal(category, start + index, True) thermal = Thermal(category, start + index, True)
thermal_list.append(thermal) thermal_list.append(thermal)
class Thermal(ThermalBase): class Thermal(ThermalBase):
def __init__(self, category, index, has_index, dependency = None, hint = None): def __init__(self, category, index, has_index, dependency = None, hint = None):
""" """
@ -280,9 +290,11 @@ class Thermal(ThermalBase):
self.category = category self.category = category
self.temperature = self._get_file_from_api(THERMAL_API_GET_TEMPERATURE) self.temperature = self._get_file_from_api(THERMAL_API_GET_TEMPERATURE)
self.high_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_THRESHOLD) self.high_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_THRESHOLD)
self.high_critical_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD)
self.dependency = dependency self.dependency = dependency
self.dependent_hint = hint self.dependent_hint = hint
def get_name(self): def get_name(self):
""" """
Retrieves the name of the device Retrieves the name of the device
@ -292,6 +304,7 @@ class Thermal(ThermalBase):
""" """
return self.name return self.name
def _read_generic_file(self, filename, len): def _read_generic_file(self, filename, len):
""" """
Read a generic file, returns the contents of the file Read a generic file, returns the contents of the file
@ -304,6 +317,7 @@ class Thermal(ThermalBase):
logger.log_info("Fail to read file {} due to {}".format(filename, repr(e))) logger.log_info("Fail to read file {} due to {}".format(filename, repr(e)))
return result return result
def _get_file_from_api(self, api_name): def _get_file_from_api(self, api_name):
if self.category == THERMAL_DEV_CATEGORY_AMBIENT: if self.category == THERMAL_DEV_CATEGORY_AMBIENT:
if api_name == THERMAL_API_GET_TEMPERATURE: if api_name == THERMAL_API_GET_TEMPERATURE:
@ -315,9 +329,13 @@ class Thermal(ThermalBase):
if self.category in thermal_device_categories_singleton: if self.category in thermal_device_categories_singleton:
filename = handler filename = handler
else: else:
filename = handler.format(self.index) if handler:
filename = handler.format(self.index)
else:
return None
return join(HW_MGMT_THERMAL_ROOT, filename) return join(HW_MGMT_THERMAL_ROOT, filename)
def get_temperature(self): def get_temperature(self):
""" """
Retrieves current temperature reading from thermal Retrieves current temperature reading from thermal
@ -337,8 +355,11 @@ class Thermal(ThermalBase):
if value_str is None: if value_str is None:
return None return None
value_float = float(value_str) value_float = float(value_str)
if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD:
return None
return value_float / 1000.0 return value_float / 1000.0
def get_high_threshold(self): def get_high_threshold(self):
""" """
Retrieves the high threshold temperature of thermal Retrieves the high threshold temperature of thermal
@ -353,4 +374,25 @@ class Thermal(ThermalBase):
if value_str is None: if value_str is None:
return None return None
value_float = float(value_str) value_float = float(value_str)
if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD:
return None
return value_float / 1000.0
def get_high_critical_threshold(self):
"""
Retrieves the high critical threshold temperature of thermal
Returns:
A float number, the high critical threshold temperature of thermal in Celsius
up to nearest thousandth of one degree Celsius, e.g. 30.125
"""
if self.high_critical_threshold is None:
return None
value_str = self._read_generic_file(self.high_critical_threshold, 0)
if value_str is None:
return None
value_float = float(value_str)
if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD:
return None
return value_float / 1000.0 return value_float / 1000.0

View File

@ -0,0 +1,108 @@
from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
class SetFanSpeedAction(ThermalPolicyActionBase):
"""
Base thermal action class to set speed for fans
"""
# JSON field definition
JSON_FIELD_SPEED = 'speed'
def __init__(self):
"""
Constructor of SetFanSpeedAction which actually do nothing.
"""
self.speed = None
def load_from_json(self, json_obj):
"""
Construct SetFanSpeedAction via JSON. JSON example:
{
"type": "fan.all.set_speed"
"speed": "100"
}
:param json_obj: A JSON object representing a SetFanSpeedAction action.
:return:
"""
if SetFanSpeedAction.JSON_FIELD_SPEED in json_obj:
speed = float(json_obj[SetFanSpeedAction.JSON_FIELD_SPEED])
if speed < 0 or speed > 100:
raise ValueError('SetFanSpeedAction invalid speed value {} in JSON policy file, valid value should be [0, 100]'.
format(speed))
self.speed = float(json_obj[SetFanSpeedAction.JSON_FIELD_SPEED])
else:
raise ValueError('SetFanSpeedAction missing mandatory field {} in JSON policy file'.
format(SetFanSpeedAction.JSON_FIELD_SPEED))
@thermal_json_object('fan.all.set_speed')
class SetAllFanSpeedAction(SetFanSpeedAction):
"""
Action to set speed for all fans
"""
def execute(self, thermal_info_dict):
"""
Set speed for all fans
:param thermal_info_dict: A dictionary stores all thermal information.
:return:
"""
from .thermal_infos import FanInfo
if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo):
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
for fan in fan_info_obj.get_presence_fans():
fan.set_speed(self.speed)
@thermal_json_object('thermal_control.control')
class ControlThermalAlgoAction(ThermalPolicyActionBase):
"""
Action to control the thermal control algorithm
"""
# JSON field definition
JSON_FIELD_STATUS = 'status'
def __init__(self):
self.status = True
def load_from_json(self, json_obj):
"""
Construct ControlThermalAlgoAction via JSON. JSON example:
{
"type": "thermal_control.control"
"status": "true"
}
:param json_obj: A JSON object representing a ControlThermalAlgoAction action.
:return:
"""
if ControlThermalAlgoAction.JSON_FIELD_STATUS in json_obj:
status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower()
if status_str == 'true':
self.status = True
elif status_str == 'false':
self.status = False
else:
raise ValueError('Invalid {} field value, please specify true of false'.
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
else:
raise ValueError('ControlThermalAlgoAction '
'missing mandatory field {} in JSON policy file'.
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
def execute(self, thermal_info_dict):
"""
Disable thermal control algorithm
:param thermal_info_dict: A dictionary stores all thermal information.
:return:
"""
from .thermal_infos import ChassisInfo
if ChassisInfo.INFO_NAME in thermal_info_dict:
chassis_info_obj = thermal_info_dict[ChassisInfo.INFO_NAME]
chassis = chassis_info_obj.get_chassis()
thermal_manager = chassis.get_thermal_manager()
if self.status:
thermal_manager.start_thermal_control_algorithm()
else:
thermal_manager.stop_thermal_control_algorithm()

View File

@ -0,0 +1,63 @@
from sonic_platform_base.sonic_thermal_control.thermal_condition_base import ThermalPolicyConditionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
class FanCondition(ThermalPolicyConditionBase):
def get_fan_info(self, thermal_info_dict):
from .thermal_infos import FanInfo
if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo):
return thermal_info_dict[FanInfo.INFO_NAME]
else:
return None
@thermal_json_object('fan.any.absence')
class AnyFanAbsenceCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_absence_fans()) > 0 if fan_info_obj else False
@thermal_json_object('fan.all.absence')
class AllFanAbsenceCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_presence_fans()) == 0 if fan_info_obj else False
@thermal_json_object('fan.all.presence')
class AllFanPresenceCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_absence_fans()) == 0 if fan_info_obj else False
class PsuCondition(ThermalPolicyConditionBase):
def get_psu_info(self, thermal_info_dict):
from .thermal_infos import PsuInfo
if PsuInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[PsuInfo.INFO_NAME], PsuInfo):
return thermal_info_dict[PsuInfo.INFO_NAME]
else:
return None
@thermal_json_object('psu.any.absence')
class AnyPsuAbsenceCondition(PsuCondition):
def is_match(self, thermal_info_dict):
psu_info_obj = self.get_psu_info(thermal_info_dict)
return len(psu_info_obj.get_absence_psus()) > 0 if psu_info_obj else False
@thermal_json_object('psu.all.absence')
class AllPsuAbsenceCondition(PsuCondition):
def is_match(self, thermal_info_dict):
psu_info_obj = self.get_psu_info(thermal_info_dict)
return len(psu_info_obj.get_presence_psus()) == 0 if psu_info_obj else False
@thermal_json_object('psu.all.presence')
class AllPsuPresenceCondition(PsuCondition):
def is_match(self, thermal_info_dict):
psu_info_obj = self.get_psu_info(thermal_info_dict)
return len(psu_info_obj.get_absence_psus()) == 0 if psu_info_obj else False

View File

@ -0,0 +1,136 @@
from sonic_platform_base.sonic_thermal_control.thermal_info_base import ThermalPolicyInfoBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
@thermal_json_object('fan_info')
class FanInfo(ThermalPolicyInfoBase):
"""
Fan information needed by thermal policy
"""
# Fan information name
INFO_NAME = 'fan_info'
def __init__(self):
self._absence_fans = set()
self._presence_fans = set()
self._status_changed = False
def collect(self, chassis):
"""
Collect absence and presence fans.
:param chassis: The chassis object
:return:
"""
self._status_changed = False
for fan in chassis.get_all_fans():
if fan.get_presence() and fan not in self._presence_fans:
self._presence_fans.add(fan)
self._status_changed = True
if fan in self._absence_fans:
self._absence_fans.remove(fan)
elif not fan.get_presence() and fan not in self._absence_fans:
self._absence_fans.add(fan)
self._status_changed = True
if fan in self._presence_fans:
self._presence_fans.remove(fan)
def get_absence_fans(self):
"""
Retrieves absence fans
:return: A set of absence fans
"""
return self._absence_fans
def get_presence_fans(self):
"""
Retrieves presence fans
:return: A set of presence fans
"""
return self._presence_fans
def is_status_changed(self):
"""
Retrieves if the status of fan information changed
:return: True if status changed else False
"""
return self._status_changed
@thermal_json_object('psu_info')
class PsuInfo(ThermalPolicyInfoBase):
"""
PSU information needed by thermal policy
"""
INFO_NAME = 'psu_info'
def __init__(self):
self._absence_psus = set()
self._presence_psus = set()
self._status_changed = False
def collect(self, chassis):
"""
Collect absence and presence PSUs.
:param chassis: The chassis object
:return:
"""
self._status_changed = False
for psu in chassis.get_all_psus():
if psu.get_presence() and psu not in self._presence_psus:
self._presence_psus.add(psu)
self._status_changed = True
if psu in self._absence_psus:
self._absence_psus.remove(psu)
elif not psu.get_presence() and psu not in self._absence_psus:
self._absence_psus.add(psu)
self._status_changed = True
if psu in self._presence_psus:
self._presence_psus.remove(psu)
def get_absence_psus(self):
"""
Retrieves presence PSUs
:return: A set of absence PSUs
"""
return self._absence_psus
def get_presence_psus(self):
"""
Retrieves presence PSUs
:return: A set of presence fans
"""
return self._presence_psus
def is_status_changed(self):
"""
Retrieves if the status of PSU information changed
:return: True if status changed else False
"""
return self._status_changed
@thermal_json_object('chassis_info')
class ChassisInfo(ThermalPolicyInfoBase):
"""
Chassis information needed by thermal policy
"""
INFO_NAME = 'chassis_info'
def __init__(self):
self._chassis = None
def collect(self, chassis):
"""
Collect platform chassis.
:param chassis: The chassis object
:return:
"""
self._chassis = chassis
def get_chassis(self):
"""
Retrieves platform chassis object
:return: A platform chassis object.
"""
return self._chassis

View File

@ -0,0 +1,50 @@
import os
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
from .thermal_actions import *
from .thermal_conditions import *
from .thermal_infos import *
class ThermalManager(ThermalManagerBase):
THERMAL_ALGORITHM_CONTROL_PATH = '/var/run/hw-management/config/suspend'
@classmethod
def start_thermal_control_algorithm(cls):
"""
Start thermal control algorithm
Returns:
bool: True if set success, False if fail.
"""
cls._control_thermal_control_algorithm(False)
@classmethod
def stop_thermal_control_algorithm(cls):
"""
Stop thermal control algorithm
Returns:
bool: True if set success, False if fail.
"""
cls._control_thermal_control_algorithm(True)
@classmethod
def _control_thermal_control_algorithm(cls, suspend):
"""
Control thermal control algorithm
Args:
suspend: Bool, indicate suspend the algorithm or not
Returns:
bool: True if set success, False if fail.
"""
status = True
write_value = 1 if suspend else 0
try:
with open(cls.THERMAL_ALGORITHM_CONTROL_PATH, 'w') as control_file:
control_file.write(str(write_value))
except (ValueError, IOError):
status = False
return status

View File

@ -0,0 +1,44 @@
class MockFan:
def __init__(self):
self.presence = True
self.speed = 60
def get_presence(self):
return self.presence
def set_speed(self, speed):
self.speed = speed
class MockPsu:
def __init__(self):
self.presence = True
def get_presence(self):
return self.presence
class MockChassis:
def __init__(self):
self.fan_list = []
self.psu_list = []
def get_all_psus(self):
return self.psu_list
def get_all_fans(self):
return self.fan_list
def get_thermal_manager(self):
from sonic_platform.thermal_manager import ThermalManager
return ThermalManager
def make_fan_absence(self):
fan = MockFan()
fan.presence = False
self.fan_list.append(fan)
def make_psu_absence(self):
psu = MockPsu()
psu.presence = False
self.psu_list.append(psu)

View File

@ -0,0 +1,272 @@
import os
import sys
import pytest
import json
from mock import MagicMock
from .mock_platform import MockChassis, MockFan, MockPsu
test_path = os.path.dirname(os.path.abspath(__file__))
modules_path = os.path.dirname(test_path)
sys.path.insert(0, modules_path)
from sonic_platform.thermal_manager import ThermalManager
from sonic_platform.thermal_infos import FanInfo, PsuInfo
@pytest.fixture(scope='session', autouse=True)
def thermal_manager():
policy_file = os.path.join(test_path, 'thermal_policy.json')
ThermalManager.load(policy_file)
return ThermalManager
def test_load_policy(thermal_manager):
assert 'psu_info' in thermal_manager._thermal_info_dict
assert 'fan_info' in thermal_manager._thermal_info_dict
assert 'chassis_info' in thermal_manager._thermal_info_dict
assert 'any fan absence' in thermal_manager._policy_dict
assert 'any psu absence' in thermal_manager._policy_dict
assert 'all fan and psu presence' in thermal_manager._policy_dict
assert thermal_manager._fan_speed_when_suspend == 60
assert thermal_manager._run_thermal_algorithm_at_boot_up == False
def test_fan_info():
chassis = MockChassis()
chassis.make_fan_absence()
fan_info = FanInfo()
fan_info.collect(chassis)
assert len(fan_info.get_absence_fans()) == 1
assert len(fan_info.get_presence_fans()) == 0
assert fan_info.is_status_changed()
fan_list = chassis.get_all_fans()
fan_list[0].presence = True
fan_info.collect(chassis)
assert len(fan_info.get_absence_fans()) == 0
assert len(fan_info.get_presence_fans()) == 1
assert fan_info.is_status_changed()
def test_psu_info():
chassis = MockChassis()
chassis.make_psu_absence()
psu_info = PsuInfo()
psu_info.collect(chassis)
assert len(psu_info.get_absence_psus()) == 1
assert len(psu_info.get_presence_psus()) == 0
assert psu_info.is_status_changed()
psu_list = chassis.get_all_psus()
psu_list[0].presence = True
psu_info.collect(chassis)
assert len(psu_info.get_absence_psus()) == 0
assert len(psu_info.get_presence_psus()) == 1
assert psu_info.is_status_changed()
def test_fan_policy(thermal_manager):
chassis = MockChassis()
chassis.make_fan_absence()
chassis.fan_list.append(MockFan())
thermal_manager.start_thermal_control_algorithm = MagicMock()
thermal_manager.stop_thermal_control_algorithm = MagicMock()
thermal_manager.run_policy(chassis)
fan_list = chassis.get_all_fans()
assert fan_list[1].speed == 100
thermal_manager.stop_thermal_control_algorithm.assert_called_once()
fan_list[0].presence = True
thermal_manager.run_policy(chassis)
thermal_manager.start_thermal_control_algorithm.assert_called_once()
def test_psu_policy(thermal_manager):
chassis = MockChassis()
chassis.make_psu_absence()
chassis.fan_list.append(MockFan())
thermal_manager.start_thermal_control_algorithm = MagicMock()
thermal_manager.stop_thermal_control_algorithm = MagicMock()
thermal_manager.run_policy(chassis)
fan_list = chassis.get_all_fans()
assert fan_list[0].speed == 100
thermal_manager.stop_thermal_control_algorithm.assert_called_once()
psu_list = chassis.get_all_psus()
psu_list[0].presence = True
thermal_manager.run_policy(chassis)
thermal_manager.start_thermal_control_algorithm.assert_called_once()
def test_any_fan_absence_condition():
chassis = MockChassis()
chassis.make_fan_absence()
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_conditions import AnyFanAbsenceCondition
condition = AnyFanAbsenceCondition()
assert condition.is_match({'fan_info': fan_info})
fan = chassis.get_all_fans()[0]
fan.presence = True
fan_info.collect(chassis)
assert not condition.is_match({'fan_info': fan_info})
def test_all_fan_absence_condition():
chassis = MockChassis()
chassis.make_fan_absence()
fan = MockFan()
fan_list = chassis.get_all_fans()
fan_list.append(fan)
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_conditions import AllFanAbsenceCondition
condition = AllFanAbsenceCondition()
assert not condition.is_match({'fan_info': fan_info})
fan.presence = False
fan_info.collect(chassis)
assert condition.is_match({'fan_info': fan_info})
def test_all_fan_presence_condition():
chassis = MockChassis()
chassis.make_fan_absence()
fan = MockFan()
fan_list = chassis.get_all_fans()
fan_list.append(fan)
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_conditions import AllFanPresenceCondition
condition = AllFanPresenceCondition()
assert not condition.is_match({'fan_info': fan_info})
fan_list[0].presence = True
fan_info.collect(chassis)
assert condition.is_match({'fan_info': fan_info})
def test_any_psu_absence_condition():
chassis = MockChassis()
chassis.make_psu_absence()
psu_info = PsuInfo()
psu_info.collect(chassis)
from sonic_platform.thermal_conditions import AnyPsuAbsenceCondition
condition = AnyPsuAbsenceCondition()
assert condition.is_match({'psu_info': psu_info})
psu = chassis.get_all_psus()[0]
psu.presence = True
psu_info.collect(chassis)
assert not condition.is_match({'psu_info': psu_info})
def test_all_psu_absence_condition():
chassis = MockChassis()
chassis.make_psu_absence()
psu = MockPsu()
psu_list = chassis.get_all_psus()
psu_list.append(psu)
psu_info = PsuInfo()
psu_info.collect(chassis)
from sonic_platform.thermal_conditions import AllPsuAbsenceCondition
condition = AllPsuAbsenceCondition()
assert not condition.is_match({'psu_info': psu_info})
psu.presence = False
psu_info.collect(chassis)
assert condition.is_match({'psu_info': psu_info})
def test_all_fan_presence_condition():
chassis = MockChassis()
chassis.make_psu_absence()
psu = MockPsu()
psu_list = chassis.get_all_psus()
psu_list.append(psu)
psu_info = PsuInfo()
psu_info.collect(chassis)
from sonic_platform.thermal_conditions import AllPsuPresenceCondition
condition = AllPsuPresenceCondition()
assert not condition.is_match({'psu_info': psu_info})
psu_list[0].presence = True
psu_info.collect(chassis)
assert condition.is_match({'psu_info': psu_info})
def test_load_set_fan_speed_action():
from sonic_platform.thermal_actions import SetAllFanSpeedAction
action = SetAllFanSpeedAction()
json_str = '{\"speed\": \"50\"}'
json_obj = json.loads(json_str)
action.load_from_json(json_obj)
assert action.speed == 50
json_str = '{\"speed\": \"-1\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
json_str = '{\"speed\": \"101\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
json_str = '{\"invalid\": \"101\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
def test_execute_set_fan_speed_action():
chassis = MockChassis()
fan_list = chassis.get_all_fans()
fan_list.append(MockFan())
fan_list.append(MockFan())
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_actions import SetAllFanSpeedAction
action = SetAllFanSpeedAction()
action.speed = 99
action.execute({'fan_info': fan_info})
assert fan_list[0].speed == 99
assert fan_list[1].speed == 99
def test_load_control_thermal_algo_action():
from sonic_platform.thermal_actions import ControlThermalAlgoAction
action = ControlThermalAlgoAction()
json_str = '{\"status\": \"false\"}'
json_obj = json.loads(json_str)
action.load_from_json(json_obj)
assert not action.status
json_str = '{\"status\": \"true\"}'
json_obj = json.loads(json_str)
action.load_from_json(json_obj)
assert action.status
json_str = '{\"status\": \"invalid\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
json_str = '{\"invalid\": \"true\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)

View File

@ -0,0 +1,72 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "false",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "true"
}
]
}
]
}

View File

@ -10,7 +10,7 @@ $(DOCKER_PLATFORM_MONITOR)_DEPENDS += $(LIBSENSORS) $(LM_SENSORS) $(FANCONTROL)
ifeq ($(CONFIGURED_PLATFORM),barefoot) ifeq ($(CONFIGURED_PLATFORM),barefoot)
$(DOCKER_PLATFORM_MONITOR)_DEPENDS += $(PYTHON_THRIFT) $(DOCKER_PLATFORM_MONITOR)_DEPENDS += $(PYTHON_THRIFT)
endif endif
$(DOCKER_PLATFORM_MONITOR)_PYTHON_DEBS += $(SONIC_LEDD) $(SONIC_XCVRD) $(SONIC_PSUD) $(SONIC_SYSEEPROMD) $(DOCKER_PLATFORM_MONITOR)_PYTHON_DEBS += $(SONIC_LEDD) $(SONIC_XCVRD) $(SONIC_PSUD) $(SONIC_SYSEEPROMD) $(SONIC_THERMALCTLD)
$(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_COMMON_PY2) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_COMMON_PY2)
$(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SWSSSDK_PY2) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SWSSSDK_PY2)
$(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2)

View File

@ -0,0 +1,6 @@
# sonic-thermalctld (SONiC Thermal control daemon) Debian package
SONIC_THERMALCTLD = python-sonic-thermalctld_1.0-1_all.deb
$(SONIC_THERMALCTLD)_SRC_PATH = $(SRC_PATH)/sonic-platform-daemons/sonic-thermalctld
$(SONIC_THERMALCTLD)_WHEEL_DEPENDS = $(SONIC_DAEMON_BASE_PY2)
SONIC_PYTHON_STDEB_DEBS += $(SONIC_THERMALCTLD)

View File

@ -0,0 +1,50 @@
import multiprocessing
import os
import signal
import threading
#
# ProcessTaskBase =====================================================================
#
class ProcessTaskBase(object): # TODO: put this class to swss-platform-common
def __init__(self):
self.task_process = None
self.task_stopping_event = multiprocessing.Event()
def task_worker(self):
pass
def task_run(self):
if self.task_stopping_event.is_set():
return
self.task_process = multiprocessing.Process(target=self.task_worker)
self.task_process.start()
def task_stop(self):
self.task_stopping_event.set()
os.kill(self.task_process.pid, signal.SIGKILL)
#
# ThreadTaskBase =====================================================================
#
class ThreadTaskBase(object): # TODO: put this class to swss-platform-common;
def __init__(self):
self.task_thread = None
self.task_stopping_event = threading.Event()
def task_worker(self):
pass
def task_run(self):
if self.task_stopping_event.is_set():
return
self.task_thread = threading.Thread(target=self.task_worker)
self.task_thread.start()
def task_stop(self):
self.task_stopping_event.set()
self.task_thread.join()

@ -1 +1 @@
Subproject commit 6f74dd3f4f42bc945467cffa4f889b50e4b1468a Subproject commit ed50e72d028092399e2768e64a7a4ef01e7571de

@ -1 +1 @@
Subproject commit a34ba131f618a8df6beec1f548aa08f9cedc48db Subproject commit fc455a7d01f8df1ed6a55960056facdf1b3b0b3c