[201911][thermal control] Backport feature from master branch (#4677)

Backport thermal control feature from master branch to 201911 branch by cherry-picking commits and manually resolving conflicts.
This commit is contained in:
Junchao-Mellanox 2020-06-09 02:20:43 +08:00 committed by GitHub
parent b5a419e1c8
commit 0a70571011
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
50 changed files with 2193 additions and 107 deletions

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1,80 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "true",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any fan broken",
"conditions": [
{
"type": "fan.any.fault"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
},
{
"type": "fan.all.good"
}
],
"actions": [
{
"type": "thermal.recover"
}
]
}
]
}

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -0,0 +1 @@
../x86_64-mlnx_msn2700-r0/thermal_policy.json

View File

@ -18,7 +18,8 @@ RUN apt-get update && \
rrdtool \ rrdtool \
python-smbus \ python-smbus \
ethtool \ ethtool \
dmidecode && \ dmidecode \
i2c-tools && \
pip install enum34 pip install enum34
{% if docker_platform_monitor_debs.strip() -%} {% if docker_platform_monitor_debs.strip() -%}

View File

@ -91,3 +91,14 @@ stdout_logfile=syslog
stderr_logfile=syslog stderr_logfile=syslog
startsecs=10 startsecs=10
{% endif %} {% endif %}
{% if not skip_thermalctld %}
[program:thermalctld]
command=/usr/bin/thermalctld
priority=9
autostart=false
autorestart=true
stdout_logfile=syslog
stderr_logfile=syslog
startsecs=0
{% endif %}

View File

@ -75,3 +75,7 @@ supervisorctl start psud
supervisorctl start syseepromd supervisorctl start syseepromd
{% endif %} {% endif %}
{% if not skip_thermalctld %}
supervisorctl start thermalctld
{% endif %}

View File

@ -1,53 +0,0 @@
From ebb17bd1f6996f73cb67313846a63c789e74c4f4 Mon Sep 17 00:00:00 2001
From: Mykola Faryma <mykolaf@mellanox.com>
Date: Fri, 21 Feb 2020 12:28:54 +0200
Subject: [PATCH 1/1] Make hw-mgmt SimX compatiable
Signed-off-by: Mykola Faryma <mykolaf@mellanox.com>
---
usr/usr/bin/hw-management.sh | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
index 1b5b18a..3dfd4b1 100755
--- a/usr/usr/bin/hw-management.sh
+++ b/usr/usr/bin/hw-management.sh
@@ -943,6 +943,35 @@ do_chip_down()
/usr/bin/hw-management-thermal-events.sh change hotplug_asic down %S %p
}
+handle_simx()
+{
+ local -r onie_platform="$(cat /host/machine.conf | grep onie_platform | cut -d= -f2)"
+
+ local -r syseeprom_cache_path="/var/cache/sonic/decode-syseeprom/syseeprom_cache"
+ local -r syseeprom_hex_path="/usr/share/sonic/device/${onie_platform}/syseeprom.hex"
+ local -r syseeprom_vpd_path="/var/run/hw-management/eeprom/vpd_info"
+
+ case $ACTION in
+ start)
+ /bin/bash -c "/bin/rm -f ${syseeprom_cache_path}"
+ /bin/bash -c "/bin/mkdir -p ${eeprom_path}"
+ /bin/bash -c "/usr/bin/xxd -r -p ${syseeprom_hex_path} ${syseeprom_vpd_path}"
+ ;;
+ stop)
+ /bin/bash -c "/bin/rm -fr ${hw_management_path}"
+ ;;
+ *)
+ echo "Usage: `basename $0` {start|stop}"
+ exit 1
+ ;;
+ esac
+}
+
+if [[ "$(cat /sys/devices/virtual/dmi/id/sys_vendor)" = "QEMU" ]]; then
+ handle_simx
+ exit 0
+fi
+
case $ACTION in
start)
if [ -d /var/run/hw-management ]; then
--
1.9.1

View File

@ -0,0 +1,27 @@
From 3512488c981eb81d51ce92cb3573721e36861f56 Mon Sep 17 00:00:00 2001
From: Junchao Chen <junchao@mellanox.com>
Date: Fri, 29 May 2020 10:38:53 +0300
Subject: [PATCH] Disable hw-management thermal control service
---
usr/usr/bin/hw-management.sh | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
index 65e5d39..0d1c4a1 100755
--- a/usr/usr/bin/hw-management.sh
+++ b/usr/usr/bin/hw-management.sh
@@ -832,7 +832,9 @@ do_start()
if [ -f $config_path/max_tachos ]; then
max_tachos=$(<$config_path/max_tachos)
fi
- $THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
+ # Disable hw-management thermal control because
+ # SONiC already implement it
+ #$THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
}
do_stop()
--
1.9.1

View File

@ -3,6 +3,7 @@
SONIC_PLATFORM_API_PY2 = mlnx_platform_api-1.0-py2-none-any.whl SONIC_PLATFORM_API_PY2 = mlnx_platform_api-1.0-py2-none-any.whl
$(SONIC_PLATFORM_API_PY2)_SRC_PATH = $(PLATFORM_PATH)/mlnx-platform-api $(SONIC_PLATFORM_API_PY2)_SRC_PATH = $(PLATFORM_PATH)/mlnx-platform-api
$(SONIC_PLATFORM_API_PY2)_PYTHON_VERSION = 2 $(SONIC_PLATFORM_API_PY2)_PYTHON_VERSION = 2
$(SONIC_PLATFORM_API_PY2)_DEPENDS = $(SONIC_PLATFORM_COMMON_PY2) $(SONIC_DAEMON_BASE_PY2) $(SONIC_CONFIG_ENGINE)
SONIC_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2) SONIC_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2)
export mlnx_platform_api_py2_wheel_path="$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY2))" export mlnx_platform_api_py2_wheel_path="$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY2))"

View File

@ -0,0 +1,2 @@
*.pyc
.cache/

View File

@ -0,0 +1,3 @@
[pytest]
filterwarnings =
ignore::DeprecationWarning

View File

@ -0,0 +1,2 @@
[aliases]
test=pytest

View File

@ -12,6 +12,14 @@ setup(
maintainer_email='kevinw@mellanox.com', maintainer_email='kevinw@mellanox.com',
packages=[ packages=[
'sonic_platform', 'sonic_platform',
'tests'
],
setup_requires= [
'pytest-runner'
],
tests_require = [
'pytest',
'mock>=2.0.0'
], ],
classifiers=[ classifiers=[
'Development Status :: 3 - Alpha', 'Development Status :: 3 - Alpha',
@ -26,5 +34,6 @@ setup(
'Topic :: Utilities', 'Topic :: Utilities',
], ],
keywords='sonic SONiC platform PLATFORM', keywords='sonic SONiC platform PLATFORM',
test_suite='setup.get_test_suite'
) )

View File

@ -1,2 +1,2 @@
__all__ = ["platform", "chassis"] __all__ = ["platform", "chassis"]
from sonic_platform import * from sonic_platform import *

View File

@ -15,6 +15,7 @@ try:
from sonic_daemon_base.daemon_base import Logger from sonic_daemon_base.daemon_base import Logger
from os import listdir from os import listdir
from os.path import isfile, join from os.path import isfile, join
from glob import glob
import sys import sys
import io import io
import re import re
@ -28,12 +29,17 @@ MAX_SELECT_DELAY = 3600
MLNX_NUM_PSU = 2 MLNX_NUM_PSU = 2
GET_HWSKU_CMD = "sonic-cfggen -d -v DEVICE_METADATA.localhost.hwsku" GET_HWSKU_CMD = "sonic-cfggen -d -v DEVICE_METADATA.localhost.hwsku"
GET_PLATFORM_CMD = "sonic-cfggen -d -v DEVICE_METADATA.localhost.platform"
EEPROM_CACHE_ROOT = '/var/cache/sonic/decode-syseeprom' EEPROM_CACHE_ROOT = '/var/cache/sonic/decode-syseeprom'
EEPROM_CACHE_FILE = 'syseeprom_cache' EEPROM_CACHE_FILE = 'syseeprom_cache'
HWMGMT_SYSTEM_ROOT = '/var/run/hw-management/system/' HWMGMT_SYSTEM_ROOT = '/var/run/hw-management/system/'
MST_DEVICE_NAME_PATTERN = '/dev/mst/mt[0-9]*_pciconf0'
MST_DEVICE_RE_PATTERN = '/dev/mst/mt([0-9]*)_pciconf0'
SPECTRUM1_CHIP_ID = '52100'
#reboot cause related definitions #reboot cause related definitions
REBOOT_CAUSE_ROOT = HWMGMT_SYSTEM_ROOT REBOOT_CAUSE_ROOT = HWMGMT_SYSTEM_ROOT
@ -55,6 +61,7 @@ class Chassis(ChassisBase):
# Initialize SKU name # Initialize SKU name
self.sku_name = self._get_sku_name() self.sku_name = self._get_sku_name()
self.platform_name = self._get_platform_name()
mi = get_machine_info() mi = get_machine_info()
if mi is not None: if mi is not None:
self.name = mi['onie_platform'] self.name = mi['onie_platform']
@ -93,11 +100,21 @@ class Chassis(ChassisBase):
num_of_fan, num_of_drawer = self._extract_num_of_fans_and_fan_drawers() num_of_fan, num_of_drawer = self._extract_num_of_fans_and_fan_drawers()
multi_rotor_in_drawer = num_of_fan > num_of_drawer multi_rotor_in_drawer = num_of_fan > num_of_drawer
# Fan's direction isn't supported on spectrum 1 devices for now
mst_dev_list = glob(MST_DEVICE_NAME_PATTERN)
if not mst_dev_list:
raise RuntimeError("Can't get chip type due to {} not found".format(MST_DEVICE_NAME_PATTERN))
m = re.search(MST_DEVICE_RE_PATTERN, mst_dev_list[0])
if m.group(1) == SPECTRUM1_CHIP_ID:
has_fan_dir = False
else:
has_fan_dir = True
for index in range(num_of_fan): for index in range(num_of_fan):
if multi_rotor_in_drawer: if multi_rotor_in_drawer:
fan = Fan(index, index/2) fan = Fan(has_fan_dir, index, index/2, False, self.platform_name)
else: else:
fan = Fan(index, index) fan = Fan(has_fan_dir, index, index, False, self.platform_name)
self._fan_list.append(fan) self._fan_list.append(fan)
@ -230,6 +247,12 @@ class Chassis(ChassisBase):
return out.rstrip('\n') return out.rstrip('\n')
def _get_platform_name(self):
p = subprocess.Popen(GET_PLATFORM_CMD, shell=True, stdout=subprocess.PIPE)
out, err = p.communicate()
return out.rstrip('\n')
def _get_port_position_tuple_by_sku_name(self): def _get_port_position_tuple_by_sku_name(self):
position_tuple = port_position_tuple_list[hwsku_dict_port[self.sku_name]] position_tuple = port_position_tuple_list[hwsku_dict_port[self.sku_name]]
return position_tuple return position_tuple
@ -442,3 +465,8 @@ class Chassis(ChassisBase):
return True, {'sfp':port_dict} return True, {'sfp':port_dict}
else: else:
return True, {'sfp':{}} return True, {'sfp':{}}
def get_thermal_manager(self):
from .thermal_manager import ThermalManager
return ThermalManager

View File

@ -0,0 +1,90 @@
DEVICE_DATA = {
'x86_64-mlnx_msn2700-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:30":13, "31:40":14 , "41:120":15},
"unk_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16}
}
}
},
'x86_64-mlnx_msn2740-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:120":13},
"unk_untrust": {"-127:15":13, "16:25":14 , "26:30":15, "31:120":17},
}
}
},
'x86_64-mlnx_msn2100-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:40":12, "41:120":13},
"unk_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16}
}
}
},
'x86_64-mlnx_msn2410-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:30":13, "31:40":14 , "41:120":15},
"unk_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16}
}
}
},
'x86_64-mlnx_msn2010-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:120":12},
"unk_untrust": {"-127:15":12, "16:20":13 , "21:30":14, "31:35":15, "36:120":16}
}
}
},
'x86_64-mlnx_msn3700-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:25":12, "26:40":13 , "41:120":14},
"unk_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
}
}
},
'x86_64-mlnx_msn3700c-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:40":12, "41:120":13},
"unk_untrust": {"-127:10":12, "11:20":13 , "21:30":14, "31:35":15, "36:120":16},
}
}
},
'x86_64-mlnx_msn3800-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
}
}
},
'x86_64-mlnx_msn4700-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:120":16},
"unk_untrust": {"-127:120":16},
}
}
},
'x86_64-mlnx_msn3420-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:120":16},
"unk_untrust": {"-127:120":16},
}
}
},
'x86_64-mlnx_msn4600c-r0': {
'thermal': {
'minimum_table': {
"unk_trust": {"-127:120":16},
"unk_untrust": {"-127:120":16},
}
}
}
}

View File

@ -9,6 +9,7 @@
############################################################################# #############################################################################
import os.path import os.path
import subprocess
try: try:
from sonic_platform_base.fan_base import FanBase from sonic_platform_base.fan_base import FanBase
@ -22,32 +23,99 @@ PWM_MAX = 255
FAN_PATH = "/var/run/hw-management/thermal/" FAN_PATH = "/var/run/hw-management/thermal/"
LED_PATH = "/var/run/hw-management/led/" LED_PATH = "/var/run/hw-management/led/"
CONFIG_PATH = "/var/run/hw-management/config"
# fan_dir isn't supported on Spectrum 1. It is supported on Spectrum 2 and later switches
FAN_DIR = "/var/run/hw-management/system/fan_dir"
COOLING_STATE_PATH = "/var/run/hw-management/thermal/cooling_cur_state"
# Platforms with unplugable FANs:
# 1. don't have fanX_status and should be treated as always present
platform_with_unplugable_fan = ['x86_64-mlnx_msn2010-r0', 'x86_64-mlnx_msn2100-r0']
class Fan(FanBase): class Fan(FanBase):
"""Platform-specific Fan class""" """Platform-specific Fan class"""
def __init__(self, fan_index, drawer_index = 1, psu_fan = False):
STATUS_LED_COLOR_ORANGE = "orange"
min_cooling_level = 2
MIN_VALID_COOLING_LEVEL = 1
MAX_VALID_COOLING_LEVEL = 10
# PSU fan speed vector
PSU_FAN_SPEED = ['0x3c', '0x3c', '0x3c', '0x3c', '0x3c',
'0x3c', '0x3c', '0x46', '0x50', '0x5a', '0x64']
def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, platform = None):
# API index is starting from 0, Mellanox platform index is starting from 1 # API index is starting from 0, Mellanox platform index is starting from 1
self.index = fan_index + 1 self.index = fan_index + 1
self.drawer_index = drawer_index + 1 self.drawer_index = drawer_index + 1
self.is_psu_fan = psu_fan self.is_psu_fan = psu_fan
self.always_presence = False if platform not in platform_with_unplugable_fan else True
self.fan_min_speed_path = "fan{}_min".format(self.index) self.fan_min_speed_path = "fan{}_min".format(self.index)
if not self.is_psu_fan: if not self.is_psu_fan:
self.fan_speed_get_path = "fan{}_speed_get".format(self.index) self.fan_speed_get_path = "fan{}_speed_get".format(self.index)
self.fan_speed_set_path = "fan{}_speed_set".format(self.index) self.fan_speed_set_path = "fan{}_speed_set".format(self.index)
self.fan_presence_path = "fan{}_status".format(self.drawer_index) self.fan_presence_path = "fan{}_status".format(self.drawer_index)
self.fan_max_speed_path = "fan{}_max".format(self.index) self.fan_max_speed_path = "fan{}_max".format(self.index)
self._name = "fan{}".format(fan_index + 1)
else: else:
self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index) self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index)
self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index) self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index)
self.fan_max_speed_path = "psu{}_max".format(self.index) self._name = 'psu_{}_fan_{}'.format(self.index, 1)
self.fan_max_speed_path = None
self.psu_i2c_bus_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_bus'.format(self.index))
self.psu_i2c_addr_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_addr'.format(self.index))
self.psu_i2c_command_path = os.path.join(CONFIG_PATH, 'fan_command')
self.fan_status_path = "fan{}_fault".format(self.index) self.fan_status_path = "fan{}_fault".format(self.index)
self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index) self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index)
self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index) self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index)
self.fan_orange_led_path = "led_fan{}_orange".format(self.drawer_index) self.fan_orange_led_path = "led_fan{}_orange".format(self.drawer_index)
self.fan_pwm_path = "pwm1" self.fan_pwm_path = "pwm1"
self.fan_led_cap_path = "led_fan{}_capability".format(self.drawer_index) self.fan_led_cap_path = "led_fan{}_capability".format(self.drawer_index)
if has_fan_dir:
self.fan_dir = FAN_DIR
else:
self.fan_dir = None
def get_direction(self):
"""
Retrieves the fan's direction
Returns:
A string, either FAN_DIRECTION_INTAKE or FAN_DIRECTION_EXHAUST
depending on fan direction
Notes:
What Mellanox calls forward:
Air flows from fans side to QSFP side, for example: MSN2700-CS2F
which means intake in community
What Mellanox calls reverse:
Air flow from QSFP side to fans side, for example: MSN2700-CS2R
which means exhaust in community
According to hw-mgmt:
1 stands for forward, in other words intake
0 stands for reverse, in other words exhaust
"""
if not self.fan_dir or self.is_psu_fan or not self.get_presence():
return self.FAN_DIRECTION_NOT_APPLICABLE
try:
with open(os.path.join(self.fan_dir), 'r') as fan_dir:
fan_dir_bits = int(fan_dir.read().strip())
fan_mask = 1 << self.drawer_index - 1
if fan_dir_bits & fan_mask:
return self.FAN_DIRECTION_INTAKE
else:
return self.FAN_DIRECTION_EXHAUST
except (ValueError, IOError) as e:
raise RuntimeError("Failed to read fan direction status to {}".format(repr(e)))
def get_name(self):
return self._name
def get_status(self): def get_status(self):
""" """
@ -58,15 +126,15 @@ class Fan(FanBase):
""" """
status = 0 status = 0
if self.is_psu_fan: if self.is_psu_fan:
status = 1 status = 0
else: else:
try: try:
with open(os.path.join(FAN_PATH, self.fan_status_path), 'r') as fault_status: with open(os.path.join(FAN_PATH, self.fan_status_path), 'r') as fault_status:
status = int(fault_status.read()) status = int(fault_status.read().strip())
except (ValueError, IOError): except (ValueError, IOError):
status = 0 status = 1
return status == 1 return status == 0
def get_presence(self): def get_presence(self):
""" """
@ -82,11 +150,14 @@ class Fan(FanBase):
else: else:
status = 0 status = 0
else: else:
try: if self.always_presence:
with open(os.path.join(FAN_PATH, self.fan_presence_path), 'r') as presence_status: status = 1
status = int(presence_status.read()) else:
except (ValueError, IOError): try:
status = 0 with open(os.path.join(FAN_PATH, self.fan_presence_path), 'r') as presence_status:
status = int(presence_status.read().strip())
except (ValueError, IOError):
status = 0
return status == 1 return status == 1
@ -104,7 +175,7 @@ class Fan(FanBase):
speed = 0 speed = 0
try: try:
with open(os.path.join(FAN_PATH, self.fan_max_speed_path), 'r') as max_fan_speed: with open(os.path.join(FAN_PATH, self.fan_max_speed_path), 'r') as max_fan_speed:
speed = int(max_fan_speed.read()) speed = int(max_fan_speed.read().strip())
except (ValueError, IOError): except (ValueError, IOError):
speed = 0 speed = 0
@ -120,12 +191,18 @@ class Fan(FanBase):
speed = 0 speed = 0
try: try:
with open(os.path.join(FAN_PATH, self.fan_speed_get_path), 'r') as fan_curr_speed: with open(os.path.join(FAN_PATH, self.fan_speed_get_path), 'r') as fan_curr_speed:
speed_in_rpm = int(fan_curr_speed.read()) speed_in_rpm = int(fan_curr_speed.read().strip())
except (ValueError, IOError): except (ValueError, IOError):
speed_in_rpm = 0 speed_in_rpm = 0
if self.fan_max_speed_path is None:
# in case of max speed unsupported, we just return speed in unit of RPM.
return speed_in_rpm
max_speed_in_rpm = self._get_max_speed_in_rpm() max_speed_in_rpm = self._get_max_speed_in_rpm()
speed = 100*speed_in_rpm/max_speed_in_rpm speed = 100*speed_in_rpm/max_speed_in_rpm
if speed > 100:
speed = 100
return speed return speed
@ -136,14 +213,13 @@ class Fan(FanBase):
Returns: Returns:
int: percentage of the max fan speed int: percentage of the max fan speed
""" """
speed = 0
if self.is_psu_fan: if self.is_psu_fan:
# Not like system fan, psu fan speed can not be modified, so target speed is N/A # Not like system fan, psu fan speed can not be modified, so target speed is N/A
return speed return self.get_speed()
try: try:
with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm: with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm:
pwm = int(fan_pwm.read()) pwm = int(fan_pwm.read().strip())
except (ValueError, IOError): except (ValueError, IOError):
pwm = 0 pwm = 0
@ -163,13 +239,36 @@ class Fan(FanBase):
bool: True if set success, False if fail. bool: True if set success, False if fail.
""" """
status = True status = True
pwm = int(round(PWM_MAX*speed/100.0))
if self.is_psu_fan: if self.is_psu_fan:
#PSU fan speed is not setable. if not self.get_presence():
return False return False
from .thermal import logger
try:
with open(self.psu_i2c_bus_path, 'r') as f:
bus = f.read().strip()
with open(self.psu_i2c_addr_path, 'r') as f:
addr = f.read().strip()
with open(self.psu_i2c_command_path, 'r') as f:
command = f.read().strip()
speed = Fan.PSU_FAN_SPEED[int(speed / 10)]
command = "i2cset -f -y {0} {1} {2} {3} wp".format(bus, addr, command, speed)
subprocess.check_call(command, shell = True)
return True
except subprocess.CalledProcessError as ce:
logger.log_error('Failed to call command {}, return code={}, command output={}'.format(ce.cmd, ce.returncode, ce.output))
return False
except Exception as e:
logger.log_error('Failed to set PSU FAN speed - {}'.format(e))
return False
try: try:
cooling_level = int(speed / 10)
if cooling_level < self.min_cooling_level:
cooling_level = self.min_cooling_level
speed = self.min_cooling_level * 10
self.set_cooling_level(cooling_level, cooling_level)
pwm = int(round(PWM_MAX*speed/100.0))
with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'w') as fan_pwm: with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'w') as fan_pwm:
fan_pwm.write(str(pwm)) fan_pwm.write(str(pwm))
except (ValueError, IOError): except (ValueError, IOError):
@ -243,4 +342,43 @@ class Fan(FanBase):
considered tolerable considered tolerable
""" """
# The tolerance value is fixed as 20% for all the Mellanox platform # The tolerance value is fixed as 20% for all the Mellanox platform
return 20 return 20
@classmethod
def set_cooling_level(cls, level, cur_state):
"""
Change cooling level. The input level should be an integer value [1, 10].
1 means 10%, 2 means 20%, 10 means 100%.
"""
if not isinstance(level, int):
raise RuntimeError("Failed to set cooling level, input parameter must be integer")
if level < cls.MIN_VALID_COOLING_LEVEL or level > cls.MAX_VALID_COOLING_LEVEL:
raise RuntimeError("Failed to set cooling level, level value must be in range [{}, {}], got {}".format(
cls.MIN_VALID_COOLING_LEVEL,
cls.MAX_VALID_COOLING_LEVEL,
level
))
try:
# Reset FAN cooling level vector. According to low level team,
# if we need set cooling level to X, we need first write a (10+X)
# to cooling_cur_state file to reset the cooling level vector.
with open(COOLING_STATE_PATH, 'w') as cooling_state:
cooling_state.write(str(level + 10))
# We need set cooling level after resetting the cooling level vector
with open(COOLING_STATE_PATH, 'w') as cooling_state:
cooling_state.write(str(cur_state))
except (ValueError, IOError) as e:
raise RuntimeError("Failed to set cooling level - {}".format(e))
@classmethod
def get_cooling_level(cls):
try:
with open(COOLING_STATE_PATH, 'r') as cooling_state:
cooling_level = int(cooling_state.read().strip())
return cooling_level
except (ValueError, IOError) as e:
raise RuntimeError("Failed to get cooling level - {}".format(e))

View File

@ -24,6 +24,7 @@ class Platform(PlatformBase):
self._chassis.initialize_psu() self._chassis.initialize_psu()
self._chassis.initialize_fan() self._chassis.initialize_fan()
self._chassis.initialize_eeprom() self._chassis.initialize_eeprom()
self._chassis.initialize_thermals()
def _is_host(self): def _is_host(self):
""" """

View File

@ -60,6 +60,7 @@ class Psu(PsuBase):
psu_oper_status = "thermal/psu{}_pwr_status".format(self.index) psu_oper_status = "thermal/psu{}_pwr_status".format(self.index)
#psu_oper_status should always be present for all SKUs #psu_oper_status should always be present for all SKUs
self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status) self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status)
self._name = "PSU{}".format(psu_index + 1)
if sku in hwsku_dict_psu: if sku in hwsku_dict_psu:
filemap = psu_profile_list[hwsku_dict_psu[sku]] filemap = psu_profile_list[hwsku_dict_psu[sku]]
@ -90,9 +91,20 @@ class Psu(PsuBase):
psu_presence = os.path.join(self.psu_path, psu_presence) psu_presence = os.path.join(self.psu_path, psu_presence)
self.psu_presence = psu_presence self.psu_presence = psu_presence
fan = Fan(psu_index, psu_index, True) # unplugable PSU has no FAN
if fan.get_presence(): if sku not in hwsku_dict_with_unplugable_psu:
self._fan = fan fan = Fan(False, psu_index, psu_index, True)
self._fan_list.append(fan)
self.psu_green_led_path = "led_psu_green"
self.psu_red_led_path = "led_psu_red"
self.psu_orange_led_path = "led_psu_orange"
self.psu_led_cap_path = "led_psu_capability"
def get_name(self):
return self._name
def _read_generic_file(self, filename, len): def _read_generic_file(self, filename, len):
""" """
@ -100,8 +112,10 @@ class Psu(PsuBase):
""" """
result = 0 result = 0
try: try:
if not os.path.exists(filename):
return result
with open(filename, 'r') as fileobj: with open(filename, 'r') as fileobj:
result = int(fileobj.read()) result = int(fileobj.read().strip())
except Exception as e: except Exception as e:
logger.log_info("Fail to read file {} due to {}".format(filename, repr(e))) logger.log_info("Fail to read file {} due to {}".format(filename, repr(e)))
return result return result
@ -169,3 +183,117 @@ class Psu(PsuBase):
return float(power) / 1000000 return float(power) / 1000000
else: else:
return None return None
def _get_led_capability(self):
cap_list = None
try:
with open(os.path.join(LED_PATH, self.psu_led_cap_path), 'r') as psu_led_cap:
caps = psu_led_cap.read()
cap_list = caps.split()
except (ValueError, IOError):
pass
return cap_list
def set_status_led(self, color):
"""
Sets the state of the PSU status LED
Args:
color: A string representing the color with which to set the
PSU status LED
Returns:
bool: True if status LED state is set successfully, False if not
Notes:
Only one led for all PSUs.
"""
led_cap_list = self._get_led_capability()
if led_cap_list is None:
return False
status = False
try:
if color == self.STATUS_LED_COLOR_GREEN:
with open(os.path.join(LED_PATH, self.psu_green_led_path), 'w') as psu_led:
psu_led.write(LED_ON)
status = True
elif color == self.STATUS_LED_COLOR_RED:
# Some fan don't support red led but support orange led, in this case we set led to orange
if self.STATUS_LED_COLOR_RED in led_cap_list:
led_path = os.path.join(LED_PATH, self.psu_red_led_path)
elif self.STATUS_LED_COLOR_ORANGE in led_cap_list:
led_path = os.path.join(LED_PATH, self.psu_orange_led_path)
else:
return False
with open(led_path, 'w') as psu_led:
psu_led.write(LED_ON)
status = True
elif color == self.STATUS_LED_COLOR_OFF:
if self.STATUS_LED_COLOR_GREEN in led_cap_list:
with open(os.path.join(LED_PATH, self.psu_green_led_path), 'w') as psu_led:
psu_led.write(str(LED_OFF))
if self.STATUS_LED_COLOR_RED in led_cap_list:
with open(os.path.join(LED_PATH, self.psu_red_led_path), 'w') as psu_led:
psu_led.write(str(LED_OFF))
if self.STATUS_LED_COLOR_ORANGE in led_cap_list:
with open(os.path.join(LED_PATH, self.psu_orange_led_path), 'w') as psu_led:
psu_led.write(str(LED_OFF))
status = True
else:
status = False
except (ValueError, IOError):
status = False
return status
def get_status_led(self):
"""
Gets the state of the PSU status LED
Returns:
A string, one of the predefined STATUS_LED_COLOR_* strings above
"""
led_cap_list = self._get_led_capability()
if led_cap_list is None:
return self.STATUS_LED_COLOR_OFF
try:
with open(os.path.join(LED_PATH, self.psu_green_led_path), 'r') as psu_led:
if LED_OFF != psu_led.read().rstrip('\n'):
return self.STATUS_LED_COLOR_GREEN
if self.STATUS_LED_COLOR_RED in led_cap_list:
with open(os.path.join(LED_PATH, self.psu_red_led_path), 'r') as psu_led:
if LED_OFF != psu_led.read().rstrip('\n'):
return self.STATUS_LED_COLOR_RED
if self.STATUS_LED_COLOR_ORANGE in led_cap_list:
with open(os.path.join(LED_PATH, self.psu_orange_led_path), 'r') as psu_led:
if LED_OFF != psu_led.read().rstrip('\n'):
return self.STATUS_LED_COLOR_RED
except (ValueError, IOError) as e:
raise RuntimeError("Failed to read led status for psu due to {}".format(repr(e)))
return self.STATUS_LED_COLOR_OFF
def get_power_available_status(self):
"""
Gets the power available status
Returns:
True if power is present and power on.
False and "absence of PSU" if power is not present.
False and "absence of power" if power is present but not power on.
"""
if not self.get_presence():
return False, "absence of PSU"
elif not self.get_powergood_status():
return False, "absence of power"
else:
return True, ""

View File

@ -36,28 +36,46 @@ THERMAL_DEV_BOARD_AMBIENT = "board_amb"
THERMAL_API_GET_TEMPERATURE = "get_temperature" THERMAL_API_GET_TEMPERATURE = "get_temperature"
THERMAL_API_GET_HIGH_THRESHOLD = "get_high_threshold" THERMAL_API_GET_HIGH_THRESHOLD = "get_high_threshold"
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD = "get_high_critical_threshold"
THERMAL_API_INVALID_HIGH_THRESHOLD = 0.0
HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/" HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/"
THERMAL_ZONE_ASIC_PATH = "/var/run/hw-management/thermal/mlxsw/"
THERMAL_ZONE_MODULE_PATH = "/var/run/hw-management/thermal/mlxsw-module{}/"
THERMAL_ZONE_GEARBOX_PATH = "/var/run/hw-management/thermal/mlxsw-gearbox{}/"
THERMAL_ZONE_MODE = "thermal_zone_mode"
THERMAL_ZONE_POLICY = "thermal_zone_policy"
THERMAL_ZONE_TEMPERATURE = "thermal_zone_temp"
THERMAL_ZONE_NORMAL_TEMPERATURE = "temp_trip_norm"
MODULE_TEMPERATURE_FAULT_PATH = "/var/run/hw-management/thermal/module{}_temp_fault"
thermal_api_handler_cpu_core = { thermal_api_handler_cpu_core = {
THERMAL_API_GET_TEMPERATURE:"cpu_core{}", THERMAL_API_GET_TEMPERATURE:"cpu_core{}",
THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max" THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max",
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_core{}_crit"
} }
thermal_api_handler_cpu_pack = { thermal_api_handler_cpu_pack = {
THERMAL_API_GET_TEMPERATURE:"cpu_pack", THERMAL_API_GET_TEMPERATURE:"cpu_pack",
THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max" THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max",
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_pack_crit"
} }
thermal_api_handler_module = { thermal_api_handler_module = {
THERMAL_API_GET_TEMPERATURE:"module{}_temp_input", THERMAL_API_GET_TEMPERATURE:"module{}_temp_input",
THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit" THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit",
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"module{}_temp_emergency"
} }
thermal_api_handler_psu = { thermal_api_handler_psu = {
THERMAL_API_GET_TEMPERATURE:"psu{}_temp", THERMAL_API_GET_TEMPERATURE:"psu{}_temp",
THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max" THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max",
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:None
} }
thermal_api_handler_gearbox = { thermal_api_handler_gearbox = {
THERMAL_API_GET_TEMPERATURE:"temp_input_gearbox{}", THERMAL_API_GET_TEMPERATURE:"gearbox{}_temp_input",
THERMAL_API_GET_HIGH_THRESHOLD:None THERMAL_API_GET_HIGH_THRESHOLD:None,
THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:None
} }
thermal_ambient_apis = { thermal_ambient_apis = {
THERMAL_DEV_ASIC_AMBIENT : "asic", THERMAL_DEV_ASIC_AMBIENT : "asic",
@ -281,10 +299,12 @@ thermal_profile_list = [
} }
] ]
def initialize_thermals(sku, thermal_list, psu_list): def initialize_thermals(sku, thermal_list, psu_list):
# create thermal objects for all categories of sensors # create thermal objects for all categories of sensors
tp_index = hwsku_dict_thermal[sku] tp_index = hwsku_dict_thermal[sku]
thermal_profile = thermal_profile_list[tp_index] thermal_profile = thermal_profile_list[tp_index]
Thermal.thermal_profile = thermal_profile
for category in thermal_device_categories_all: for category in thermal_device_categories_all:
if category == THERMAL_DEV_CATEGORY_AMBIENT: if category == THERMAL_DEV_CATEGORY_AMBIENT:
count, ambient_list = thermal_profile[category] count, ambient_list = thermal_profile[category]
@ -303,15 +323,20 @@ def initialize_thermals(sku, thermal_list, psu_list):
else: else:
if category == THERMAL_DEV_CATEGORY_PSU: if category == THERMAL_DEV_CATEGORY_PSU:
for index in range(count): for index in range(count):
thermal = Thermal(category, start + index, True, psu_list[index].get_powergood_status, "power off") thermal = Thermal(category, start + index, True, psu_list[index].get_power_available_status)
thermal_list.append(thermal) thermal_list.append(thermal)
else: else:
for index in range(count): for index in range(count):
thermal = Thermal(category, start + index, True) thermal = Thermal(category, start + index, True)
thermal_list.append(thermal) thermal_list.append(thermal)
class Thermal(ThermalBase): class Thermal(ThermalBase):
def __init__(self, category, index, has_index, dependency = None, hint = None): thermal_profile = None
thermal_algorithm_status = False
def __init__(self, category, index, has_index, dependency = None):
""" """
index should be a string for category ambient and int for other categories index should be a string for category ambient and int for other categories
""" """
@ -328,8 +353,9 @@ class Thermal(ThermalBase):
self.category = category self.category = category
self.temperature = self._get_file_from_api(THERMAL_API_GET_TEMPERATURE) self.temperature = self._get_file_from_api(THERMAL_API_GET_TEMPERATURE)
self.high_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_THRESHOLD) self.high_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_THRESHOLD)
self.high_critical_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD)
self.dependency = dependency self.dependency = dependency
self.dependent_hint = hint
def get_name(self): def get_name(self):
""" """
@ -340,18 +366,21 @@ class Thermal(ThermalBase):
""" """
return self.name return self.name
def _read_generic_file(self, filename, len):
@classmethod
def _read_generic_file(cls, filename, len):
""" """
Read a generic file, returns the contents of the file Read a generic file, returns the contents of the file
""" """
result = None result = None
try: try:
with open(filename, 'r') as fileobj: with open(filename, 'r') as fileobj:
result = fileobj.read() result = fileobj.read().strip()
except Exception as e: except Exception as e:
logger.log_info("Fail to read file {} due to {}".format(filename, repr(e))) logger.log_info("Fail to read file {} due to {}".format(filename, repr(e)))
return result return result
def _get_file_from_api(self, api_name): def _get_file_from_api(self, api_name):
if self.category == THERMAL_DEV_CATEGORY_AMBIENT: if self.category == THERMAL_DEV_CATEGORY_AMBIENT:
if api_name == THERMAL_API_GET_TEMPERATURE: if api_name == THERMAL_API_GET_TEMPERATURE:
@ -363,9 +392,13 @@ class Thermal(ThermalBase):
if self.category in thermal_device_categories_singleton: if self.category in thermal_device_categories_singleton:
filename = handler filename = handler
else: else:
filename = handler.format(self.index) if handler:
filename = handler.format(self.index)
else:
return None
return join(HW_MGMT_THERMAL_ROOT, filename) return join(HW_MGMT_THERMAL_ROOT, filename)
def get_temperature(self): def get_temperature(self):
""" """
Retrieves current temperature reading from thermal Retrieves current temperature reading from thermal
@ -374,19 +407,20 @@ class Thermal(ThermalBase):
A float number of current temperature in Celsius up to nearest thousandth A float number of current temperature in Celsius up to nearest thousandth
of one degree Celsius, e.g. 30.125 of one degree Celsius, e.g. 30.125
""" """
if self.dependency and not self.dependency(): if self.dependency:
if self.dependent_hint: status, hint = self.dependency()
hint = self.dependent_hint if not status:
else: logger.log_debug("get_temperature for {} failed due to {}".format(self.name, hint))
hint = "unknown reason" return None
logger.log_info("get_temperature for {} failed due to {}".format(self.name, hint))
return None
value_str = self._read_generic_file(self.temperature, 0) value_str = self._read_generic_file(self.temperature, 0)
if value_str is None: if value_str is None:
return None return None
value_float = float(value_str) value_float = float(value_str)
if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD:
return None
return value_float / 1000.0 return value_float / 1000.0
def get_high_threshold(self): def get_high_threshold(self):
""" """
Retrieves the high threshold temperature of thermal Retrieves the high threshold temperature of thermal
@ -397,8 +431,167 @@ class Thermal(ThermalBase):
""" """
if self.high_threshold is None: if self.high_threshold is None:
return None return None
if self.dependency:
status, hint = self.dependency()
if not status:
logger.log_debug("get_high_threshold for {} failed due to {}".format(self.name, hint))
return None
value_str = self._read_generic_file(self.high_threshold, 0) value_str = self._read_generic_file(self.high_threshold, 0)
if value_str is None: if value_str is None:
return None return None
value_float = float(value_str) value_float = float(value_str)
if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD:
return None
return value_float / 1000.0 return value_float / 1000.0
def get_high_critical_threshold(self):
"""
Retrieves the high critical threshold temperature of thermal
Returns:
A float number, the high critical threshold temperature of thermal in Celsius
up to nearest thousandth of one degree Celsius, e.g. 30.125
"""
if self.high_critical_threshold is None:
return None
if self.dependency:
status, hint = self.dependency()
if not status:
logger.log_debug("get_high_critical_threshold for {} failed due to {}".format(self.name, hint))
return None
value_str = self._read_generic_file(self.high_critical_threshold, 0)
if value_str is None:
return None
value_float = float(value_str)
if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD:
return None
return value_float / 1000.0
@classmethod
def _write_generic_file(cls, filename, content):
"""
Generic functions to write content to a specified file path if
the content has changed.
"""
try:
with open(filename, 'w+') as file_obj:
origin_content = file_obj.read()
if origin_content != content:
file_obj.write(content)
except Exception as e:
logger.log_info("Fail to write file {} due to {}".format(filename, repr(e)))
@classmethod
def set_thermal_algorithm_status(cls, status, force=True):
"""
Enable/disable kernel thermal algorithm.
When enable kernel thermal algorithm, kernel will adjust fan speed
according to thermal zones temperature. Please note that kernel will
only adjust fan speed when temperature across some "edge", e.g temperature
changes to exceed high threshold.
When disable kernel thermal algorithm, kernel no longer adjust fan speed.
We usually disable the algorithm when we want to set a fix speed. E.g, when
a fan unit is removed from system, we will set fan speed to 100% and disable
the algorithm to avoid it adjust the speed.
Returns:
True if thermal algorithm status changed.
"""
if not cls.thermal_profile:
raise Exception("Fail to get thermal profile for this switch")
if not force and cls.thermal_algorithm_status == status:
return False
cls.thermal_algorithm_status = status
content = "enabled" if status else "disabled"
policy = "step_wise" if status else "user_space"
cls._write_generic_file(join(THERMAL_ZONE_ASIC_PATH, THERMAL_ZONE_MODE), content)
cls._write_generic_file(join(THERMAL_ZONE_ASIC_PATH, THERMAL_ZONE_POLICY), policy)
if THERMAL_DEV_CATEGORY_MODULE in cls.thermal_profile:
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE]
if count != 0:
for index in range(count):
cls._write_generic_file(join(THERMAL_ZONE_MODULE_PATH.format(start + index), THERMAL_ZONE_MODE), content)
cls._write_generic_file(join(THERMAL_ZONE_MODULE_PATH.format(start + index), THERMAL_ZONE_POLICY), policy)
if THERMAL_DEV_CATEGORY_GEARBOX in cls.thermal_profile:
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_GEARBOX]
if count != 0:
for index in range(count):
cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_MODE), content)
cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_POLICY), policy)
return True
@classmethod
def check_thermal_zone_temperature(cls):
"""
Check thermal zone current temperature with normal temperature
Returns:
True if all thermal zones current temperature less or equal than normal temperature
"""
if not cls.thermal_profile:
raise Exception("Fail to get thermal profile for this switch")
if not cls._check_thermal_zone_temperature(THERMAL_ZONE_ASIC_PATH):
return False
if THERMAL_DEV_CATEGORY_MODULE in cls.thermal_profile:
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE]
if count != 0:
for index in range(count):
if not cls._check_thermal_zone_temperature(THERMAL_ZONE_MODULE_PATH.format(start + index)):
return False
if THERMAL_DEV_CATEGORY_GEARBOX in cls.thermal_profile:
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_GEARBOX]
if count != 0:
for index in range(count):
if not cls._check_thermal_zone_temperature(THERMAL_ZONE_GEARBOX_PATH.format(start + index)):
return False
return True
@classmethod
def _check_thermal_zone_temperature(cls, thermal_zone_path):
normal_temp_path = join(thermal_zone_path, THERMAL_ZONE_NORMAL_TEMPERATURE)
current_temp_path = join(thermal_zone_path, THERMAL_ZONE_TEMPERATURE)
normal = None
current = None
try:
with open(normal_temp_path, 'r') as file_obj:
normal = float(file_obj.read())
with open(current_temp_path, 'r') as file_obj:
current = float(file_obj.read())
return current <= normal
except Exception as e:
logger.log_info("Fail to check thermal zone temperature for file {} due to {}".format(thermal_zone_path, repr(e)))
@classmethod
def check_module_temperature_trustable(cls):
if not cls.thermal_profile:
raise Exception("Fail to get thermal profile for this switch")
start, count = cls.thermal_profile[THERMAL_DEV_CATEGORY_MODULE]
for index in range(count):
fault_file_path = MODULE_TEMPERATURE_FAULT_PATH.format(index + start)
fault = cls._read_generic_file(fault_file_path, 0)
if fault.strip() != '0':
return 'untrust'
return 'trust'
@classmethod
def get_min_amb_temperature(cls):
fan_ambient_path = join(HW_MGMT_THERMAL_ROOT, THERMAL_DEV_FAN_AMBIENT)
port_ambient_path = join(HW_MGMT_THERMAL_ROOT, THERMAL_DEV_PORT_AMBIENT)
# if there is any exception, let it raise
fan_ambient_temp = int(cls._read_generic_file(fan_ambient_path, 0))
port_ambient_temp = int(cls._read_generic_file(port_ambient_path, 0))
return fan_ambient_temp if fan_ambient_temp < port_ambient_temp else port_ambient_temp

View File

@ -0,0 +1,209 @@
from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
from .thermal import logger
class SetFanSpeedAction(ThermalPolicyActionBase):
"""
Base thermal action class to set speed for fans
"""
# JSON field definition
JSON_FIELD_SPEED = 'speed'
def __init__(self):
"""
Constructor of SetFanSpeedAction which actually do nothing.
"""
self.speed = None
def load_from_json(self, json_obj):
"""
Construct SetFanSpeedAction via JSON. JSON example:
{
"type": "fan.all.set_speed"
"speed": "100"
}
:param json_obj: A JSON object representing a SetFanSpeedAction action.
:return:
"""
if SetFanSpeedAction.JSON_FIELD_SPEED in json_obj:
speed = float(json_obj[SetFanSpeedAction.JSON_FIELD_SPEED])
if speed < 0 or speed > 100:
raise ValueError('SetFanSpeedAction invalid speed value {} in JSON policy file, valid value should be [0, 100]'.
format(speed))
self.speed = float(json_obj[SetFanSpeedAction.JSON_FIELD_SPEED])
else:
raise ValueError('SetFanSpeedAction missing mandatory field {} in JSON policy file'.
format(SetFanSpeedAction.JSON_FIELD_SPEED))
@thermal_json_object('fan.all.set_speed')
class SetAllFanSpeedAction(SetFanSpeedAction):
"""
Action to set speed for all fans
"""
def execute(self, thermal_info_dict):
"""
Set speed for all fans
:param thermal_info_dict: A dictionary stores all thermal information.
:return:
"""
from .thermal_infos import FanInfo
if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo):
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
for fan in fan_info_obj.get_presence_fans():
fan.set_speed(self.speed)
logger.log_info('Set all system FAN speed to {}'.format(self.speed))
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, self.speed)
@classmethod
def set_psu_fan_speed(cls, thermal_info_dict, speed):
from .thermal_infos import ChassisInfo
if ChassisInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ChassisInfo.INFO_NAME], ChassisInfo):
chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis()
for psu in chassis.get_all_psus():
for psu_fan in psu.get_all_fans():
psu_fan.set_speed(speed)
@thermal_json_object('fan.all.check_and_set_speed')
class CheckAndSetAllFanSpeedAction(SetAllFanSpeedAction):
"""
Action to check thermal zone temperature and recover speed for all fans
"""
def execute(self, thermal_info_dict):
"""
Check thermal zone and set speed for all fans
:param thermal_info_dict: A dictionary stores all thermal information.
:return:
"""
from .thermal import Thermal
if Thermal.check_thermal_zone_temperature():
SetAllFanSpeedAction.execute(self, thermal_info_dict)
@thermal_json_object('thermal_control.control')
class ControlThermalAlgoAction(ThermalPolicyActionBase):
"""
Action to control the thermal control algorithm
"""
# JSON field definition
JSON_FIELD_STATUS = 'status'
def __init__(self):
self.status = True
def load_from_json(self, json_obj):
"""
Construct ControlThermalAlgoAction via JSON. JSON example:
{
"type": "thermal_control.control"
"status": "true"
}
:param json_obj: A JSON object representing a ControlThermalAlgoAction action.
:return:
"""
if ControlThermalAlgoAction.JSON_FIELD_STATUS in json_obj:
status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower()
if status_str == 'true':
self.status = True
elif status_str == 'false':
self.status = False
else:
raise ValueError('Invalid {} field value, please specify true of false'.
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
else:
raise ValueError('ControlThermalAlgoAction '
'missing mandatory field {} in JSON policy file'.
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
def execute(self, thermal_info_dict):
"""
Disable thermal control algorithm
:param thermal_info_dict: A dictionary stores all thermal information.
:return:
"""
from .thermal_infos import FanInfo
from .thermal import Thermal
from .thermal_conditions import UpdateCoolingLevelToMinCondition
from .fan import Fan
status_changed = Thermal.set_thermal_algorithm_status(self.status, False)
# Only update cooling level if thermal algorithm status changed
if status_changed:
if self.status:
# Check thermal zone temperature, if all thermal zone temperature
# back to normal, set it to minimum allowed speed to
# save power
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
logger.log_info('Changed thermal algorithm status to {}'.format(self.status))
@thermal_json_object('thermal.recover')
class ThermalRecoverAction(ThermalPolicyActionBase):
def execute(self, thermal_info_dict):
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
class ChangeMinCoolingLevelAction(ThermalPolicyActionBase):
UNKNOWN_SKU_COOLING_LEVEL = 6
def execute(self, thermal_info_dict):
from .device_data import DEVICE_DATA
from .fan import Fan
from .thermal_infos import ChassisInfo
from .thermal_conditions import MinCoolingLevelChangeCondition
from .thermal_conditions import UpdateCoolingLevelToMinCondition
chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis()
if chassis.platform_name not in DEVICE_DATA or 'thermal' not in DEVICE_DATA[chassis.platform_name] or 'minimum_table' not in DEVICE_DATA[chassis.platform_name]['thermal']:
Fan.min_cooling_level = ChangeMinCoolingLevelAction.UNKNOWN_SKU_COOLING_LEVEL
else:
trust_state = MinCoolingLevelChangeCondition.trust_state
temperature = MinCoolingLevelChangeCondition.temperature
minimum_table = DEVICE_DATA[chassis.platform_name]['thermal']['minimum_table']['unk_{}'.format(trust_state)]
for key, cooling_level in minimum_table.items():
temp_range = key.split(':')
temp_min = int(temp_range[0].strip())
temp_max = int(temp_range[1].strip())
if temp_min <= temperature <= temp_max:
Fan.min_cooling_level = cooling_level - 10
break
current_cooling_level = Fan.get_cooling_level()
if current_cooling_level < Fan.min_cooling_level:
Fan.set_cooling_level(Fan.min_cooling_level, Fan.min_cooling_level)
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, Fan.min_cooling_level * 10)
else:
Fan.set_cooling_level(Fan.min_cooling_level, current_cooling_level)
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
class UpdatePsuFanSpeedAction(ThermalPolicyActionBase):
def execute(self, thermal_info_dict):
from .thermal_conditions import CoolingLevelChangeCondition
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, CoolingLevelChangeCondition.cooling_level * 10)
class UpdateCoolingLevelToMinAction(ThermalPolicyActionBase):
def execute(self, thermal_info_dict):
self.update_cooling_level_to_minimum(thermal_info_dict)
@classmethod
def update_cooling_level_to_minimum(cls, thermal_info_dict):
from .fan import Fan
from .thermal import Thermal
from .thermal_conditions import UpdateCoolingLevelToMinCondition
from .thermal_infos import FanInfo
if Thermal.check_thermal_zone_temperature():
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
speed = Fan.min_cooling_level * 10
for fan in fan_info_obj.get_presence_fans():
fan.set_speed(speed)
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, speed)
UpdateCoolingLevelToMinCondition.enable = False
else:
UpdateCoolingLevelToMinCondition.enable = True

View File

@ -0,0 +1,126 @@
from sonic_platform_base.sonic_thermal_control.thermal_condition_base import ThermalPolicyConditionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
class FanCondition(ThermalPolicyConditionBase):
def get_fan_info(self, thermal_info_dict):
from .thermal_infos import FanInfo
if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo):
return thermal_info_dict[FanInfo.INFO_NAME]
else:
return None
@thermal_json_object('fan.any.absence')
class AnyFanAbsenceCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_absence_fans()) > 0 if fan_info_obj else False
@thermal_json_object('fan.all.absence')
class AllFanAbsenceCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_presence_fans()) == 0 if fan_info_obj else False
@thermal_json_object('fan.all.presence')
class AllFanPresenceCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_absence_fans()) == 0 if fan_info_obj else False
@thermal_json_object('fan.any.fault')
class AnyFanFaultCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_fault_fans()) > 0 if fan_info_obj else False
@thermal_json_object('fan.all.good')
class AllFanGoodCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_fault_fans()) == 0 if fan_info_obj else False
class PsuCondition(ThermalPolicyConditionBase):
def get_psu_info(self, thermal_info_dict):
from .thermal_infos import PsuInfo
if PsuInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[PsuInfo.INFO_NAME], PsuInfo):
return thermal_info_dict[PsuInfo.INFO_NAME]
else:
return None
@thermal_json_object('psu.any.absence')
class AnyPsuAbsenceCondition(PsuCondition):
def is_match(self, thermal_info_dict):
psu_info_obj = self.get_psu_info(thermal_info_dict)
return len(psu_info_obj.get_absence_psus()) > 0 if psu_info_obj else False
@thermal_json_object('psu.all.absence')
class AllPsuAbsenceCondition(PsuCondition):
def is_match(self, thermal_info_dict):
psu_info_obj = self.get_psu_info(thermal_info_dict)
return len(psu_info_obj.get_presence_psus()) == 0 if psu_info_obj else False
@thermal_json_object('psu.all.presence')
class AllPsuPresenceCondition(PsuCondition):
def is_match(self, thermal_info_dict):
psu_info_obj = self.get_psu_info(thermal_info_dict)
return len(psu_info_obj.get_absence_psus()) == 0 if psu_info_obj else False
class MinCoolingLevelChangeCondition(ThermalPolicyConditionBase):
trust_state = None
temperature = None
def is_match(self, thermal_info_dict):
from .thermal import Thermal
trust_state = Thermal.check_module_temperature_trustable()
temperature = Thermal.get_min_amb_temperature()
temperature = temperature / 1000
change_cooling_level = False
if trust_state != MinCoolingLevelChangeCondition.trust_state:
MinCoolingLevelChangeCondition.trust_state = trust_state
change_cooling_level = True
if temperature != MinCoolingLevelChangeCondition.temperature:
MinCoolingLevelChangeCondition.temperature = temperature
change_cooling_level = True
return change_cooling_level
class CoolingLevelChangeCondition(ThermalPolicyConditionBase):
cooling_level = None
def is_match(self, thermal_info_dict):
from .fan import Fan
current_cooling_level = Fan.get_cooling_level()
if current_cooling_level != CoolingLevelChangeCondition.cooling_level:
CoolingLevelChangeCondition.cooling_level = current_cooling_level
return True
else:
return False
class UpdateCoolingLevelToMinCondition(ThermalPolicyConditionBase):
enable = False
def is_match(self, thermal_info_dict):
if not UpdateCoolingLevelToMinCondition.enable:
return False
from .fan import Fan
current_cooling_level = Fan.get_cooling_level()
if current_cooling_level == Fan.min_cooling_level:
UpdateCoolingLevelToMinCondition.enable = False
return False
return True

View File

@ -0,0 +1,154 @@
from sonic_platform_base.sonic_thermal_control.thermal_info_base import ThermalPolicyInfoBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
@thermal_json_object('fan_info')
class FanInfo(ThermalPolicyInfoBase):
"""
Fan information needed by thermal policy
"""
# Fan information name
INFO_NAME = 'fan_info'
def __init__(self):
self._absence_fans = set()
self._presence_fans = set()
self._fault_fans = set()
self._status_changed = False
def collect(self, chassis):
"""
Collect absence and presence fans.
:param chassis: The chassis object
:return:
"""
self._status_changed = False
for fan in chassis.get_all_fans():
presence = fan.get_presence()
status = fan.get_status()
if presence and fan not in self._presence_fans:
self._presence_fans.add(fan)
self._status_changed = True
if fan in self._absence_fans:
self._absence_fans.remove(fan)
elif not presence and fan not in self._absence_fans:
self._absence_fans.add(fan)
self._status_changed = True
if fan in self._presence_fans:
self._presence_fans.remove(fan)
if not status and fan not in self._fault_fans:
self._fault_fans.add(fan)
self._status_changed = True
elif status and fan in self._fault_fans:
self._fault_fans.remove(fan)
self._status_changed = True
def get_absence_fans(self):
"""
Retrieves absence fans
:return: A set of absence fans
"""
return self._absence_fans
def get_presence_fans(self):
"""
Retrieves presence fans
:return: A set of presence fans
"""
return self._presence_fans
def get_fault_fans(self):
"""
Retrieves fault fans
:return: A set of fault fans
"""
return self._fault_fans
def is_status_changed(self):
"""
Retrieves if the status of fan information changed
:return: True if status changed else False
"""
return self._status_changed
@thermal_json_object('psu_info')
class PsuInfo(ThermalPolicyInfoBase):
"""
PSU information needed by thermal policy
"""
INFO_NAME = 'psu_info'
def __init__(self):
self._absence_psus = set()
self._presence_psus = set()
self._status_changed = False
def collect(self, chassis):
"""
Collect absence and presence PSUs.
:param chassis: The chassis object
:return:
"""
self._status_changed = False
for psu in chassis.get_all_psus():
if psu.get_presence() and psu.get_powergood_status() and psu not in self._presence_psus:
self._presence_psus.add(psu)
self._status_changed = True
if psu in self._absence_psus:
self._absence_psus.remove(psu)
elif (not psu.get_presence() or not psu.get_powergood_status()) and psu not in self._absence_psus:
self._absence_psus.add(psu)
self._status_changed = True
if psu in self._presence_psus:
self._presence_psus.remove(psu)
def get_absence_psus(self):
"""
Retrieves presence PSUs
:return: A set of absence PSUs
"""
return self._absence_psus
def get_presence_psus(self):
"""
Retrieves presence PSUs
:return: A set of presence fans
"""
return self._presence_psus
def is_status_changed(self):
"""
Retrieves if the status of PSU information changed
:return: True if status changed else False
"""
return self._status_changed
@thermal_json_object('chassis_info')
class ChassisInfo(ThermalPolicyInfoBase):
"""
Chassis information needed by thermal policy
"""
INFO_NAME = 'chassis_info'
def __init__(self):
self._chassis = None
def collect(self, chassis):
"""
Collect platform chassis.
:param chassis: The chassis object
:return:
"""
self._chassis = chassis
def get_chassis(self):
"""
Retrieves platform chassis object
:return: A platform chassis object.
"""
return self._chassis

View File

@ -0,0 +1,64 @@
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
from .thermal_actions import * # lgtm [py/polluting-import]
from .thermal_conditions import * # lgtm [py/polluting-import]
from .thermal_infos import * # lgtm [py/polluting-import]
class ThermalManager(ThermalManagerBase):
@classmethod
def initialize(cls):
"""
Initialize thermal manager, including register thermal condition types and thermal action types
and any other vendor specific initialization.
:return:
"""
cls._add_private_thermal_policy()
@classmethod
def deinitialize(cls):
"""
Destroy thermal manager, including any vendor specific cleanup. The default behavior of this function
is a no-op.
:return:
"""
cls.start_thermal_control_algorithm()
@classmethod
def start_thermal_control_algorithm(cls):
"""
Start thermal control algorithm
Returns:
bool: True if set success, False if fail.
"""
from .thermal import Thermal
Thermal.set_thermal_algorithm_status(True)
@classmethod
def stop_thermal_control_algorithm(cls):
"""
Stop thermal control algorithm
Returns:
bool: True if set success, False if fail.
"""
from .thermal import Thermal
Thermal.set_thermal_algorithm_status(False)
@classmethod
def _add_private_thermal_policy(cls):
dynamic_min_speed_policy = ThermalPolicy()
dynamic_min_speed_policy.conditions[MinCoolingLevelChangeCondition] = MinCoolingLevelChangeCondition()
dynamic_min_speed_policy.actions[ChangeMinCoolingLevelAction] = ChangeMinCoolingLevelAction()
cls._policy_dict['DynamicMinCoolingLevelPolicy'] = dynamic_min_speed_policy
update_psu_fan_speed_policy = ThermalPolicy()
update_psu_fan_speed_policy.conditions[CoolingLevelChangeCondition] = CoolingLevelChangeCondition()
update_psu_fan_speed_policy.actions[UpdatePsuFanSpeedAction] = UpdatePsuFanSpeedAction()
cls._policy_dict['UpdatePsuFanSpeedPolicy'] = update_psu_fan_speed_policy
update_cooling_level_policy = ThermalPolicy()
update_cooling_level_policy.conditions[UpdateCoolingLevelToMinCondition] = UpdateCoolingLevelToMinCondition()
update_cooling_level_policy.actions[UpdateCoolingLevelToMinAction] = UpdateCoolingLevelToMinAction()
cls._policy_dict['UpdateCoolingLevelPolicy'] = update_cooling_level_policy

View File

@ -0,0 +1,18 @@
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
}

View File

@ -0,0 +1,17 @@
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
},
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
}

View File

@ -0,0 +1,10 @@
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
]
}

View File

@ -0,0 +1,11 @@
{
"name": "any fan absence",
"conditions": [
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
}

View File

@ -0,0 +1,58 @@
class MockFan:
speed = 60
def __init__(self):
self.presence = True
self.status = True
def get_presence(self):
return self.presence
def set_speed(self, speed):
MockFan.speed = speed
def get_status(self):
return self.status
def get_target_speed(self):
return MockFan.speed
class MockPsu:
def __init__(self):
self.presence = True
self.powergood = True
def get_presence(self):
return self.presence
def get_powergood_status(self):
return self.powergood
def get_all_fans(self):
return []
class MockChassis:
def __init__(self):
self.fan_list = []
self.psu_list = []
def get_all_psus(self):
return self.psu_list
def get_all_fans(self):
return self.fan_list
def get_thermal_manager(self):
from sonic_platform.thermal_manager import ThermalManager
return ThermalManager
def make_fan_absence(self):
fan = MockFan()
fan.presence = False
self.fan_list.append(fan)
def make_psu_absence(self):
psu = MockPsu()
psu.presence = False
self.psu_list.append(psu)

View File

@ -0,0 +1,75 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "false",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence 1",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "true"
}
]
}
]
}

View File

@ -0,0 +1,17 @@
import os
import sys
from mock import MagicMock
test_path = os.path.dirname(os.path.abspath(__file__))
modules_path = os.path.dirname(test_path)
sys.path.insert(0, modules_path)
from sonic_platform.fan import Fan
def test_get_absence_fan_direction():
fan = Fan(True, 0, 0)
fan.get_presence = MagicMock(return_value=False)
assert fan.fan_dir is not None
assert not fan.is_psu_fan
assert fan.get_direction() == Fan.FAN_DIRECTION_NOT_APPLICABLE

View File

@ -0,0 +1,496 @@
import os
import sys
import pytest
import json
from mock import MagicMock
from .mock_platform import MockChassis, MockFan, MockPsu
test_path = os.path.dirname(os.path.abspath(__file__))
modules_path = os.path.dirname(test_path)
sys.path.insert(0, modules_path)
from sonic_platform.thermal_manager import ThermalManager
from sonic_platform.thermal_infos import FanInfo, PsuInfo
from sonic_platform.thermal import Thermal
Thermal.check_thermal_zone_temperature = MagicMock()
Thermal.set_thermal_algorithm_status = MagicMock()
@pytest.fixture(scope='session', autouse=True)
def thermal_manager():
policy_file = os.path.join(test_path, 'thermal_policy.json')
ThermalManager.load(policy_file)
return ThermalManager
def test_load_policy(thermal_manager):
assert 'psu_info' in thermal_manager._thermal_info_dict
assert 'fan_info' in thermal_manager._thermal_info_dict
assert 'chassis_info' in thermal_manager._thermal_info_dict
assert 'any fan absence' in thermal_manager._policy_dict
assert 'any psu absence' in thermal_manager._policy_dict
assert 'any fan broken' in thermal_manager._policy_dict
assert 'all fan and psu presence' in thermal_manager._policy_dict
assert thermal_manager._fan_speed_when_suspend == 60
assert thermal_manager._run_thermal_algorithm_at_boot_up == False
def test_fan_info():
chassis = MockChassis()
chassis.make_fan_absence()
fan_info = FanInfo()
fan_info.collect(chassis)
assert len(fan_info.get_absence_fans()) == 1
assert len(fan_info.get_presence_fans()) == 0
assert len(fan_info.get_fault_fans()) == 0
assert fan_info.is_status_changed()
fan_list = chassis.get_all_fans()
fan_list[0].presence = True
fan_info.collect(chassis)
assert len(fan_info.get_absence_fans()) == 0
assert len(fan_info.get_presence_fans()) == 1
assert len(fan_info.get_fault_fans()) == 0
assert fan_info.is_status_changed()
fan_list[0].status = False
fan_info.collect(chassis)
assert len(fan_info.get_absence_fans()) == 0
assert len(fan_info.get_presence_fans()) == 1
assert len(fan_info.get_fault_fans()) == 1
assert fan_info.is_status_changed()
def test_psu_info():
chassis = MockChassis()
chassis.make_psu_absence()
psu_info = PsuInfo()
psu_info.collect(chassis)
assert len(psu_info.get_absence_psus()) == 1
assert len(psu_info.get_presence_psus()) == 0
assert psu_info.is_status_changed()
psu_list = chassis.get_all_psus()
psu_list[0].presence = True
psu_info.collect(chassis)
assert len(psu_info.get_absence_psus()) == 0
assert len(psu_info.get_presence_psus()) == 1
assert psu_info.is_status_changed()
psu_list[0].powergood = False
psu_info.collect(chassis)
assert len(psu_info.get_absence_psus()) == 1
assert len(psu_info.get_presence_psus()) == 0
assert psu_info.is_status_changed()
def test_fan_policy(thermal_manager):
chassis = MockChassis()
chassis.make_fan_absence()
chassis.fan_list.append(MockFan())
thermal_manager.run_policy(chassis)
fan_list = chassis.get_all_fans()
assert fan_list[1].speed == 100
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
fan_list[0].presence = True
Thermal.check_thermal_zone_temperature = MagicMock(return_value=True)
thermal_manager.run_policy(chassis)
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
assert Thermal.check_thermal_zone_temperature.call_count == 2
assert fan_list[0].speed == 60
assert fan_list[1].speed == 60
fan_list[0].status = False
thermal_manager.run_policy(chassis)
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
fan_list[0].status = True
Thermal.check_thermal_zone_temperature = MagicMock(return_value=False)
thermal_manager.run_policy(chassis)
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
assert Thermal.check_thermal_zone_temperature.call_count == 2
assert fan_list[0].speed == 100
assert fan_list[1].speed == 100
def test_psu_policy(thermal_manager):
chassis = MockChassis()
chassis.make_psu_absence()
chassis.fan_list.append(MockFan())
thermal_manager.run_policy(chassis)
fan_list = chassis.get_all_fans()
assert fan_list[0].speed == 100
Thermal.set_thermal_algorithm_status.assert_called_with(False, False)
psu_list = chassis.get_all_psus()
psu_list[0].presence = True
thermal_manager.run_policy(chassis)
Thermal.set_thermal_algorithm_status.assert_called_with(True, False)
def test_any_fan_absence_condition():
chassis = MockChassis()
chassis.make_fan_absence()
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_conditions import AnyFanAbsenceCondition
condition = AnyFanAbsenceCondition()
assert condition.is_match({'fan_info': fan_info})
fan = chassis.get_all_fans()[0]
fan.presence = True
fan_info.collect(chassis)
assert not condition.is_match({'fan_info': fan_info})
def test_all_fan_absence_condition():
chassis = MockChassis()
chassis.make_fan_absence()
fan = MockFan()
fan_list = chassis.get_all_fans()
fan_list.append(fan)
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_conditions import AllFanAbsenceCondition
condition = AllFanAbsenceCondition()
assert not condition.is_match({'fan_info': fan_info})
fan.presence = False
fan_info.collect(chassis)
assert condition.is_match({'fan_info': fan_info})
def test_any_fan_fault_condition():
chassis = MockChassis()
fan = MockFan()
fan_list = chassis.get_all_fans()
fan_list.append(fan)
fault_fan = MockFan()
fault_fan.status = False
fan_list.append(fault_fan)
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_conditions import AnyFanFaultCondition
condition = AnyFanFaultCondition()
assert condition.is_match({'fan_info': fan_info})
fault_fan.status = True
fan_info.collect(chassis)
assert not condition.is_match({'fan_info': fan_info})
def test_all_fan_good_condition():
chassis = MockChassis()
fan = MockFan()
fan_list = chassis.get_all_fans()
fan_list.append(fan)
fault_fan = MockFan()
fault_fan.status = False
fan_list.append(fault_fan)
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_conditions import AllFanGoodCondition
condition = AllFanGoodCondition()
assert not condition.is_match({'fan_info': fan_info})
fault_fan.status = True
fan_info.collect(chassis)
assert condition.is_match({'fan_info': fan_info})
def test_any_psu_absence_condition():
chassis = MockChassis()
chassis.make_psu_absence()
psu_info = PsuInfo()
psu_info.collect(chassis)
from sonic_platform.thermal_conditions import AnyPsuAbsenceCondition
condition = AnyPsuAbsenceCondition()
assert condition.is_match({'psu_info': psu_info})
psu = chassis.get_all_psus()[0]
psu.presence = True
psu_info.collect(chassis)
assert not condition.is_match({'psu_info': psu_info})
def test_all_psu_absence_condition():
chassis = MockChassis()
chassis.make_psu_absence()
psu = MockPsu()
psu_list = chassis.get_all_psus()
psu_list.append(psu)
psu_info = PsuInfo()
psu_info.collect(chassis)
from sonic_platform.thermal_conditions import AllPsuAbsenceCondition
condition = AllPsuAbsenceCondition()
assert not condition.is_match({'psu_info': psu_info})
psu.presence = False
psu_info.collect(chassis)
assert condition.is_match({'psu_info': psu_info})
def test_all_fan_presence_condition():
chassis = MockChassis()
chassis.make_psu_absence()
psu = MockPsu()
psu_list = chassis.get_all_psus()
psu_list.append(psu)
psu_info = PsuInfo()
psu_info.collect(chassis)
from sonic_platform.thermal_conditions import AllPsuPresenceCondition
condition = AllPsuPresenceCondition()
assert not condition.is_match({'psu_info': psu_info})
psu_list[0].presence = True
psu_info.collect(chassis)
assert condition.is_match({'psu_info': psu_info})
def test_load_set_fan_speed_action():
from sonic_platform.thermal_actions import SetAllFanSpeedAction
action = SetAllFanSpeedAction()
json_str = '{\"speed\": \"50\"}'
json_obj = json.loads(json_str)
action.load_from_json(json_obj)
assert action.speed == 50
json_str = '{\"speed\": \"-1\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
json_str = '{\"speed\": \"101\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
json_str = '{\"invalid\": \"101\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
def test_execute_set_fan_speed_action():
chassis = MockChassis()
fan_list = chassis.get_all_fans()
fan_list.append(MockFan())
fan_list.append(MockFan())
fan_info = FanInfo()
fan_info.collect(chassis)
from sonic_platform.thermal_actions import SetAllFanSpeedAction
action = SetAllFanSpeedAction()
action.speed = 99
action.execute({'fan_info': fan_info})
assert fan_list[0].speed == 99
assert fan_list[1].speed == 99
def test_load_control_thermal_algo_action():
from sonic_platform.thermal_actions import ControlThermalAlgoAction
action = ControlThermalAlgoAction()
json_str = '{\"status\": \"false\"}'
json_obj = json.loads(json_str)
action.load_from_json(json_obj)
assert not action.status
json_str = '{\"status\": \"true\"}'
json_obj = json.loads(json_str)
action.load_from_json(json_obj)
assert action.status
json_str = '{\"status\": \"invalid\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
json_str = '{\"invalid\": \"true\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
def test_load_check_and_set_speed_action():
from sonic_platform.thermal_actions import CheckAndSetAllFanSpeedAction
action = CheckAndSetAllFanSpeedAction()
json_str = '{\"speed\": \"40\"}'
json_obj = json.loads(json_str)
action.load_from_json(json_obj)
assert action.speed == 40
json_str = '{\"speed\": \"-1\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
json_str = '{\"speed\": \"101\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
json_str = '{\"invalid\": \"60\"}'
json_obj = json.loads(json_str)
with pytest.raises(ValueError):
action.load_from_json(json_obj)
def test_execute_check_and_set_fan_speed_action():
chassis = MockChassis()
fan_list = chassis.get_all_fans()
fan_list.append(MockFan())
fan_list.append(MockFan())
fan_info = FanInfo()
fan_info.collect(chassis)
Thermal.check_thermal_zone_temperature = MagicMock(return_value=True)
from sonic_platform.thermal_actions import CheckAndSetAllFanSpeedAction
action = CheckAndSetAllFanSpeedAction()
action.speed = 99
action.execute({'fan_info': fan_info})
assert fan_list[0].speed == 99
assert fan_list[1].speed == 99
Thermal.check_thermal_zone_temperature = MagicMock(return_value=False)
fan_list[0].speed = 100
fan_list[1].speed = 100
action.speed = 60
action.execute({'fan_info': fan_info})
assert fan_list[0].speed == 100
assert fan_list[1].speed == 100
def test_load_duplicate_condition():
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
with open(os.path.join(test_path, 'duplicate_condition.json')) as f:
json_obj = json.load(f)
policy = ThermalPolicy()
with pytest.raises(Exception):
policy.load_from_json(json_obj)
def test_load_duplicate_action():
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
with open(os.path.join(test_path, 'duplicate_action.json')) as f:
json_obj = json.load(f)
policy = ThermalPolicy()
with pytest.raises(Exception):
policy.load_from_json(json_obj)
def test_load_empty_condition():
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
with open(os.path.join(test_path, 'empty_condition.json')) as f:
json_obj = json.load(f)
policy = ThermalPolicy()
with pytest.raises(Exception):
policy.load_from_json(json_obj)
def test_load_empty_action():
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
with open(os.path.join(test_path, 'empty_action.json')) as f:
json_obj = json.load(f)
policy = ThermalPolicy()
with pytest.raises(Exception):
policy.load_from_json(json_obj)
def test_load_policy_with_same_conditions():
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
class MockThermalManager(ThermalManagerBase):
pass
with pytest.raises(Exception):
MockThermalManager.load(os.path.join(test_path, 'policy_with_same_conditions.json'))
def test_dynamic_minimum_table_data():
from sonic_platform.device_data import DEVICE_DATA
for platform, platform_data in DEVICE_DATA.items():
if 'thermal' in platform_data and 'minimum_table' in platform_data['thermal']:
minimum_table = platform_data['thermal']['minimum_table']
check_minimum_table_data(platform, minimum_table)
def check_minimum_table_data(platform, minimum_table):
valid_dir = ['p2c', 'c2p', 'unk']
valid_trust_state = ['trust', 'untrust']
for category, data in minimum_table.items():
key_data = category.split('_')
assert key_data[0] in valid_dir
assert key_data[1] in valid_trust_state
data_list = [(value, key) for key, value in data.items()]
data_list.sort(key=lambda x : x[0])
previous_edge = None
previous_cooling_level = None
for item in data_list:
cooling_level = item[0]
range_str = item[1]
ranges = range_str.split(':')
low = int(ranges[0])
high = int(ranges[1])
assert low < high
if previous_edge is None:
assert low == -127
else:
assert low - previous_edge == 1, '{}-{}-{} error, item={}'.format(platform, key_data[0], key_data[1], item)
previous_edge = high
assert 10 <= cooling_level <= 20
if previous_cooling_level is not None:
assert cooling_level > previous_cooling_level
previous_cooling_level = cooling_level
def test_dynamic_minimum_policy(thermal_manager):
from sonic_platform.thermal_conditions import MinCoolingLevelChangeCondition
from sonic_platform.thermal_actions import ChangeMinCoolingLevelAction
from sonic_platform.thermal_infos import ChassisInfo
from sonic_platform.thermal import Thermal
from sonic_platform.fan import Fan
ThermalManager.initialize()
assert 'DynamicMinCoolingLevelPolicy' in thermal_manager._policy_dict
policy = thermal_manager._policy_dict['DynamicMinCoolingLevelPolicy']
assert MinCoolingLevelChangeCondition in policy.conditions
assert ChangeMinCoolingLevelAction in policy.actions
condition = policy.conditions[MinCoolingLevelChangeCondition]
action = policy.actions[ChangeMinCoolingLevelAction]
Thermal.check_module_temperature_trustable = MagicMock(return_value='trust')
Thermal.get_min_amb_temperature = MagicMock(return_value=35000)
assert condition.is_match(None)
assert MinCoolingLevelChangeCondition.trust_state == 'trust'
assert MinCoolingLevelChangeCondition.temperature == 35
assert not condition.is_match(None)
Thermal.check_module_temperature_trustable = MagicMock(return_value='untrust')
assert condition.is_match(None)
assert MinCoolingLevelChangeCondition.trust_state == 'untrust'
Thermal.get_min_amb_temperature = MagicMock(return_value=25000)
assert condition.is_match(None)
assert MinCoolingLevelChangeCondition.temperature == 25
chassis = MockChassis()
chassis.platform_name = 'invalid'
info = ChassisInfo()
info._chassis = chassis
thermal_info_dict = {ChassisInfo.INFO_NAME: info}
Fan.get_cooling_level = MagicMock(return_value=5)
Fan.set_cooling_level = MagicMock()
action.execute(thermal_info_dict)
assert Fan.min_cooling_level == 6
Fan.set_cooling_level.assert_called_with(6, 6)
Fan.set_cooling_level.call_count = 0
chassis.platform_name = 'x86_64-mlnx_msn2700-r0'
action.execute(thermal_info_dict)
assert Fan.min_cooling_level == 3
Fan.set_cooling_level.assert_called_with(3, 5)

View File

@ -0,0 +1,97 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "false",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any fan broken",
"conditions": [
{
"type": "fan.any.fault"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
},
{
"type": "fan.all.good"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "true"
},
{
"type": "fan.all.check_and_set_speed",
"speed": "60"
}
]
}
]
}

View File

@ -10,7 +10,7 @@ $(DOCKER_PLATFORM_MONITOR)_DEPENDS += $(LIBSENSORS) $(LM_SENSORS) $(FANCONTROL)
ifeq ($(CONFIGURED_PLATFORM),barefoot) ifeq ($(CONFIGURED_PLATFORM),barefoot)
$(DOCKER_PLATFORM_MONITOR)_DEPENDS += $(PYTHON_THRIFT) $(DOCKER_PLATFORM_MONITOR)_DEPENDS += $(PYTHON_THRIFT)
endif endif
$(DOCKER_PLATFORM_MONITOR)_PYTHON_DEBS += $(SONIC_LEDD) $(SONIC_XCVRD) $(SONIC_PSUD) $(SONIC_SYSEEPROMD) $(DOCKER_PLATFORM_MONITOR)_PYTHON_DEBS += $(SONIC_LEDD) $(SONIC_XCVRD) $(SONIC_PSUD) $(SONIC_SYSEEPROMD) $(SONIC_THERMALCTLD)
$(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_COMMON_PY2) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_COMMON_PY2)
$(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SWSSSDK_PY2) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SWSSSDK_PY2)
$(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2)

View File

@ -0,0 +1,6 @@
# sonic-thermalctld (SONiC Thermal control daemon) Debian package
SONIC_THERMALCTLD = python-sonic-thermalctld_1.0-1_all.deb
$(SONIC_THERMALCTLD)_SRC_PATH = $(SRC_PATH)/sonic-platform-daemons/sonic-thermalctld
$(SONIC_THERMALCTLD)_WHEEL_DEPENDS = $(SONIC_DAEMON_BASE_PY2)
SONIC_PYTHON_STDEB_DEBS += $(SONIC_THERMALCTLD)

View File

@ -0,0 +1,50 @@
import multiprocessing
import os
import signal
import threading
#
# ProcessTaskBase =====================================================================
#
class ProcessTaskBase(object): # TODO: put this class to swss-platform-common
def __init__(self):
self.task_process = None
self.task_stopping_event = multiprocessing.Event()
def task_worker(self):
pass
def task_run(self):
if self.task_stopping_event.is_set():
return
self.task_process = multiprocessing.Process(target=self.task_worker)
self.task_process.start()
def task_stop(self):
self.task_stopping_event.set()
os.kill(self.task_process.pid, signal.SIGKILL)
#
# ThreadTaskBase =====================================================================
#
class ThreadTaskBase(object): # TODO: put this class to swss-platform-common;
def __init__(self):
self.task_thread = None
self.task_stopping_event = threading.Event()
def task_worker(self):
pass
def task_run(self):
if self.task_stopping_event.is_set():
return
self.task_thread = threading.Thread(target=self.task_worker)
self.task_thread.start()
def task_stop(self):
self.task_stopping_event.set()
self.task_thread.join()

@ -1 +1 @@
Subproject commit ee60f546d8740418ec2bd2ca922cc3be5fdfd0ac Subproject commit 9036e15dffe9b6581e4c724726abbea8446f9993

@ -1 +1 @@
Subproject commit 6a0a3bedb57d04eb4dd2f7494aba37e3477674b5 Subproject commit 40e7452d300758341d31f4afee59f2de2eb4dc47