[DellEMC S6000]: Move fan control to thermalctld from fancontrol.sh (#7239)

Why I did it
To implement fan control using thermalctld in DellEMC S6000 platform

Requires: Azure/sonic-linux-kernel#241

How I did it
Add thermal policies in 'thermal_policy.json'
Implemented thermal_manager.py and the necessary modules to perform fan control via thermalctld
Removed fancontrol.sh
How to verify it
Verified that the fan speeds are set based on the fan and temperature status.
Logs: S6000_fan_control_test_logs.txt
This commit is contained in:
Arun Saravanan Balachandran 2021-12-09 00:13:14 +05:30 committed by GitHub
parent 969cea07aa
commit 6d07efa890
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 682 additions and 315 deletions

View File

@ -0,0 +1,64 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "true",
"fan_speed_when_suspend": "80"
},
"info_types": [
{
"type": "chassis_info"
},
{
"type": "fandrawer_info"
},
{
"type": "psu_fan_info"
}
],
"policies": [
{
"name": "over temperature",
"conditions" : [
{
"type": "chassis.over_temperature"
}
],
"actions": [
{
"type": "chassis.thermal_shutdown"
}
]
},
{
"name": "any fandrawer fault",
"conditions": [
{
"type": "fandrawer.any.fault"
}
],
"actions": [
{
"type": "fan.all.set_max_speed"
},
{
"type": "fandrawer.fault.set_status_led"
}
]
},
{
"name": "all fandrawer normal",
"conditions": [
{
"type": "fandrawer.all.normal"
}
],
"actions": [
{
"type": "fan.all.set_thermal_level_speed"
},
{
"type": "fandrawer.normal.set_status_led"
}
]
}
]
}

View File

@ -1,10 +1,8 @@
s6000/scripts/s6000_platform.sh usr/local/bin
s6000/scripts/reset-qsfp usr/local/bin
s6000/scripts/set-fan-speed usr/local/bin
s6000/scripts/fancontrol.sh usr/local/bin
s6000/scripts/hw-management-generate-dump.sh usr/bin
s6000/systemd/platform-modules-s6000.service etc/systemd/system
s6000/systemd/fancontrol.service etc/systemd/system
common/io_rd_wr.py usr/local/bin
common/actions.sh usr/share/sonic/device/x86_64-dell_s6000_s1220-r0
s6000/scripts/platform_reboot_override usr/share/sonic/device/x86_64-dell_s6000_s1220-r0

View File

@ -5,8 +5,4 @@ depmod -a
systemctl enable platform-modules-s6000.service
systemctl start platform-modules-s6000.service
systemctl enable fancontrol.service
systemctl start fancontrol.service
#DEBHELPER#

View File

@ -1,253 +0,0 @@
#!/bin/bash
trap 'cleanup' 0 1 2 3 6 9 10 11 13 15
LEVEL=99
INTERVAL=5
FAULTY_FANTRAY1=1
FAULTY_FANTRAY2=1
FAULTY_FANTRAY3=1
# FAN RPM Speed
IDLE=7000
LEVEL1=10000
LEVEL2=13000
LEVEL3=16000
LEVEL4=19000
LEVEL5=19000
LRR_FILE="/host/reboot-cause/reboot-cause.txt"
I2C_ADAPTER="/sys/class/i2c-adapter/i2c-2/i2c-11"
SENSOR1="$I2C_ADAPTER/11-004c/hwmon/hwmon*/temp1_input"
SENSOR2="$I2C_ADAPTER/11-004d/hwmon/hwmon*/temp1_input"
SENSOR3="$I2C_ADAPTER/11-004e/hwmon/hwmon*/temp1_input"
SENSOR1_MAX="$I2C_ADAPTER/11-004c/hwmon/hwmon*/temp1_max"
SENSOR2_MAX="$I2C_ADAPTER/11-004d/hwmon/hwmon*/temp1_max"
SENSOR3_MAX="$I2C_ADAPTER/11-004e/hwmon/hwmon*/temp1_max"
SENSOR1_MAX_VAL=$(cat $SENSOR1_MAX)
SENSOR2_MAX_VAL=$(cat $SENSOR2_MAX)
SENSOR3_MAX_VAL=$(cat $SENSOR3_MAX)
# Reducing by 63 to differentiate this temperature settings
# from pmon sensors configuration settings
SENSOR1_NEW_MAX=$(expr `echo $SENSOR1_MAX_VAL` + 5000 - 63)
SENSOR2_NEW_MAX=$(expr `echo $SENSOR2_MAX_VAL` + 5000 - 63)
SENSOR3_NEW_MAX=$(expr `echo $SENSOR3_MAX_VAL` + 5000 - 63)
# Three fan trays with each contains two separate fans
# fan1-fan4 fan2-fan5 fan3-fan6
FANTRAY1_FAN1=$I2C_ADAPTER/11-0029/fan1_target
FANTRAY1_FAN2=$I2C_ADAPTER/11-0029/fan2_target
FANTRAY2_FAN1=$I2C_ADAPTER/11-0029/fan3_target
FANTRAY2_FAN2=$I2C_ADAPTER/11-0029/fan4_target
FANTRAY3_FAN1=$I2C_ADAPTER/11-002a/fan1_target
FANTRAY3_FAN2=$I2C_ADAPTER/11-002a/fan2_target
FANTRAY1_FAN1_RPM=$I2C_ADAPTER/11-0029/fan1_input
FANTRAY1_FAN2_RPM=$I2C_ADAPTER/11-0029/fan2_input
FANTRAY2_FAN1_RPM=$I2C_ADAPTER/11-0029/fan3_input
FANTRAY2_FAN2_RPM=$I2C_ADAPTER/11-0029/fan4_input
FANTRAY3_FAN1_RPM=$I2C_ADAPTER/11-002a/fan1_input
FANTRAY3_FAN2_RPM=$I2C_ADAPTER/11-002a/fan2_input
function check_module
{
MODULE=$1
lsmod | grep "$MODULE" > /dev/null
ret=$?
if [[ $ret = "1" ]]; then
echo "$MODULE is not loaded!"
exit 1
fi
}
function cleanup
{
echo $SENSOR1_MAX_VAL > $SENSOR1_MAX
echo $SENSOR2_MAX_VAL > $SENSOR2_MAX
echo $SENSOR3_MAX_VAL > $SENSOR3_MAX
exit 1
}
function check_faulty_fan
{
# Assume fans in FanTray spins less than 1000 RPM is faulty.
# To Maintain temperature assign max speed 16200 RPM to all other fans.
# This RPM speed handle temperature upto 75C degrees
fan1=$(cat $FANTRAY1_FAN1_RPM)
fan2=$(cat $FANTRAY1_FAN2_RPM)
fan3=$(cat $FANTRAY2_FAN1_RPM)
fan4=$(cat $FANTRAY2_FAN2_RPM)
fan5=$(cat $FANTRAY3_FAN1_RPM)
fan6=$(cat $FANTRAY3_FAN2_RPM)
# FanTray1
if [ "$fan1" -le "1000" ] || [ "$fan2" -le "1000" ]; then
# First time detecting failure
if [ $FAULTY_FANTRAY1 -lt "2" ]; then
FAULTY_FANTRAY1=2
/usr/local/bin/set-fan-speed 16200 2 > /dev/null
logger "Faulty Fans in Fantray1 $fan1 $fan2 Please check."
fi
elif [ "$fan1" -ge "1000" ] || [ "$fan2" -ge "1000" ]; then
FAULTY_FANTRAY1=0
fi
# FanTray2
if [ "$fan3" -le "1000" ] || [ "$fan4" -le "1000" ]; then
# First time detecting failure
if [ $FAULTY_FANTRAY2 -lt "2" ]; then
FAULTY_FANTRAY2=2
/usr/local/bin/set-fan-speed 16200 2 > /dev/null
logger "Faulty Fans in FanTray2: $fan3 $fan4. Please check."
fi
elif [ "$fan3" -ge "1000" ] || [ "$fan4" -ge "1000" ]; then
FAULTY_FANTRAY2=0
fi
# FanTray3
if [ "$fan5" -le "1000" ] || [ "$fan6" -le "1000" ]; then
# First time detecting failure
if [ $FAULTY_FANTRAY3 -lt "2" ]; then
FAULTY_FANTRAY3=2
/usr/local/bin/set-fan-speed 16200 2 > /dev/null
logger "FanTray3 Fans are Faulty.. $fan5 $fan6. Please check."
fi
elif [ "$fan5" -ge "1000" ] || [ "$fan6" -ge "1000" ]; then
FAULTY_FANTRAY3=0
fi
}
function update_fan_speed
{
local fan_speed=$1
echo $fan_speed > $FANTRAY1_FAN1
echo $fan_speed > $FANTRAY1_FAN2
echo $fan_speed > $FANTRAY2_FAN1
echo $fan_speed > $FANTRAY2_FAN2
echo $fan_speed > $FANTRAY3_FAN1
echo $fan_speed > $FANTRAY3_FAN2
}
function monitor_temp_sensors
{
SENSOR1_CUR_MAX_VAL=$(cat $SENSOR1_MAX)
SENSOR2_CUR_MAX_VAL=$(cat $SENSOR2_MAX)
SENSOR3_CUR_MAX_VAL=$(cat $SENSOR3_MAX)
if [ "$SENSOR1_CUR_MAX_VAL" -ne "$SENSOR1_NEW_MAX" ]
then
SENSOR1_NEW_MAX=$(expr `echo $SENSOR1_CUR_MAX_VAL` + 5000 - 63)
SENSOR1_MAX_VAL=$SENSOR1_CUR_MAX_VAL
echo $SENSOR1_NEW_MAX > $SENSOR1_MAX
fi
if [ "$SENSOR2_CUR_MAX_VAL" -ne "$SENSOR2_NEW_MAX" ]
then
SENSOR2_NEW_MAX=$(expr `echo $SENSOR2_CUR_MAX_VAL` + 5000 - 63)
SENSOR2_MAX_VAL=$SENSOR2_CUR_MAX_VAL
echo $SENSOR2_NEW_MAX > $SENSOR2_MAX
fi
if [ "$SENSOR3_CUR_MAX_VAL" -ne "$SENSOR3_NEW_MAX" ]
then
SENSOR3_NEW_MAX=$(expr `echo $SENSOR3_CUR_MAX_VAL` + 5000 - 63)
SENSOR3_MAX_VAL=$SENSOR3_CUR_MAX_VAL
echo $SENSOR3_NEW_MAX > $SENSOR3_MAX
fi
# go through all temp sensor outputs
sensor1=$(expr `echo $(cat $SENSOR1)` / 1000)
sensor2=$(expr `echo $(cat $SENSOR2)` / 1000)
sensor3=$(expr `echo $(cat $SENSOR3)` / 1000)
# All sensors output in 1000's
s1=$(cat $SENSOR1)
s2=$(cat $SENSOR2)
s3=$(cat $SENSOR3)
if [ "$s1" -ge "$SENSOR1_MAX_VAL" ] || [ "$s2" -ge "$SENSOR2_MAX_VAL" ] || [ "$s3" -ge "$SENSOR3_MAX_VAL" ]
then
# Thermal trip is about to happen
echo "Thermal Overload $sensor1 $sensor2 $sensor3" > $LRR_FILE
logger "Shutting down due to over temperature ($sensor1 degree, $sensor2 degree, $sensor3 degree)"
sync
sleep 1 # Give time to send logger message to server
# Assigning the original max values back in sensors
echo $SENSOR1_MAX_VAL > $SENSOR1_MAX
echo $SENSOR2_MAX_VAL > $SENSOR2_MAX
echo $SENSOR3_MAX_VAL > $SENSOR3_MAX
### Not Reached ###
# In case if HW fails to shutdown
/sbin/shutdown -P now
fi
sum=$(($sensor1 + $sensor2 + $sensor3))
sensor_temp=$(($sum/3))
if [ "$sensor_temp" -le "25" ] && [ "$LEVEL" -ne "0" ]
then
# Set Fan Speed to 7000 RPM"
LEVEL=0
update_fan_speed $IDLE
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"
elif [ "$sensor_temp" -ge "26" ] && [ "$sensor_temp" -le "44" ] && [ "$LEVEL" -ne "1" ]
then
# Set Fan Speed to 10000 RPM"
LEVEL=1
update_fan_speed $LEVEL1
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"
elif [ "$sensor_temp" -ge "45" ] && [ "$sensor_temp" -le "59" ] && [ "$LEVEL" -ne "2" ]
then
# Set Fan Speed to 13000 RPM"
LEVEL=2
update_fan_speed $LEVEL2
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"
elif [ "$sensor_temp" -ge "60" ] && [ "$sensor_temp" -le "79" ] && [ "$LEVEL" -ne "3" ]
then
# Set Fan Speed to 16000 RPM"
LEVEL=3
update_fan_speed $LEVEL3
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"
elif [ "$sensor_temp" -ge "80" ] && [ "$LEVEL" -ne "4" ]
then
# Set Fan Speed to 19000 RPM"
LEVEL=4
update_fan_speed $LEVEL4
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"
fi
# Check for faulty fan
check_faulty_fan
}
# Check drivers for sysfs attributes
check_module "dell_s6000_platform"
check_module "max6620"
# main loop calling the main function at specified intervals
while true
do
monitor_temp_sensors
# Sleep while still handling signals
sleep $INTERVAL &
wait
done

View File

@ -125,6 +125,26 @@ switch_board_qsfp_lpmode() {
echo $value > /sys/bus/platform/devices/dell-s6000-cpld.0/qsfp_lpmode
}
set_max6620_dynamic_reg() {
DEVICE1=/sys/class/i2c-adapter/i2c-11/11-0029/hwmon/hwmon*/fan1_div
DEVICE2=/sys/class/i2c-adapter/i2c-11/11-002a/hwmon/hwmon*/fan1_div
# Retry three times
for count in `seq 1 3`; do
if [ -w $DEVICE1 -o -w $DEVICE2 ]; then
for i in `seq 1 4`; do
echo $1 > /sys/class/i2c-adapter/i2c-11/11-0029/hwmon/hwmon*/fan${i}_div
done
for i in `seq 1 2`; do
echo $1 > /sys/class/i2c-adapter/i2c-11/11-002a/hwmon/hwmon*/fan${i}_div
done
return
fi
# Sleep for 3 seconds to wait for device tree to be ready
sleep 3
done
}
install_python_api_package() {
device="/usr/share/sonic/device"
platform=$(/usr/local/bin/sonic-cfggen -H -v DEVICE_METADATA.localhost.platform)
@ -154,7 +174,7 @@ if [[ "$1" == "init" ]]; then
#Use 1 for PCIe Gen1, 2 for PCIe Gen2
change_pcie_speed 1
add_i2c_devices
set_max6620_dynamic_reg 4
/usr/local/bin/set-fan-speed 15000
switch_board_qsfp_lpmode "disable"
/usr/local/bin/reset-qsfp

View File

@ -21,12 +21,12 @@ PSU_FAN2=/sys/class/i2c-adapter/i2c-1/1-0059/fan1_target
# Three fan trays with each contains two separate fans
# fan1-fan4 fan2-fan5 fan3-fan6
FAN1=/sys/class/i2c-adapter/i2c-11/11-0029/fan1_target
FAN2=/sys/class/i2c-adapter/i2c-11/11-0029/fan2_target
FAN3=/sys/class/i2c-adapter/i2c-11/11-0029/fan3_target
FAN4=/sys/class/i2c-adapter/i2c-11/11-0029/fan4_target
FAN5=/sys/class/i2c-adapter/i2c-11/11-002a/fan1_target
FAN6=/sys/class/i2c-adapter/i2c-11/11-002a/fan2_target
FAN1=/sys/class/i2c-adapter/i2c-11/11-0029/hwmon/hwmon*/fan1_target
FAN2=/sys/class/i2c-adapter/i2c-11/11-0029/hwmon/hwmon*/fan2_target
FAN3=/sys/class/i2c-adapter/i2c-11/11-0029/hwmon/hwmon*/fan3_target
FAN4=/sys/class/i2c-adapter/i2c-11/11-0029/hwmon/hwmon*/fan4_target
FAN5=/sys/class/i2c-adapter/i2c-11/11-002a/hwmon/hwmon*/fan1_target
FAN6=/sys/class/i2c-adapter/i2c-11/11-002a/hwmon/hwmon*/fan2_target
speed=$1
logger -t platform-modules "Trying to set fan speed to $speed"

View File

@ -10,6 +10,7 @@
try:
import os
import time
import subprocess
import struct
from sonic_platform_base.chassis_base import ChassisBase
from sonic_platform.sfp import Sfp
@ -27,6 +28,14 @@ MAX_S6000_PSU = 2
MAX_S6000_THERMAL = 6
MAX_S6000_COMPONENT = 4
HYST_RANGE = 5
LEVEL0_THRESHOLD = 25
LEVEL1_THRESHOLD = 30
LEVEL2_THRESHOLD = 45
LEVEL3_THRESHOLD = 60
LEVEL4_THRESHOLD = 80
LEVEL5_THRESHOLD = 85
class Chassis(ChassisBase):
"""
@ -42,6 +51,11 @@ class Chassis(ChassisBase):
reset_reason_dict[0x6] = ChassisBase.REBOOT_CAUSE_NON_HARDWARE
reset_reason_dict[0x7] = ChassisBase.REBOOT_CAUSE_THERMAL_OVERLOAD_OTHER
_num_monitor_thermals = 3
_monitor_thermal_list = []
_is_fan_control_enabled = False
_fan_control_initialised = False
def __init__(self):
ChassisBase.__init__(self)
self.status_led_reg = "system_led"
@ -142,20 +156,12 @@ class Chassis(ChassisBase):
return
os.close(fd)
def _get_thermal_reset(self):
reset_file = "/host/reboot-cause/reboot-cause.txt"
if (not os.path.isfile(reset_file)):
return False
try:
with open(reset_file, 'r') as fd:
rv = fd.read()
except Exception as error:
return False
def _init_fan_control(self):
if "Thermal Overload" in rv:
return True
return False
if not self._fan_control_initialised:
for i in range(self._num_monitor_thermals):
self._monitor_thermal_list.append(Thermal(i))
self._fan_control_initialised = True
def get_name(self):
"""
@ -245,9 +251,6 @@ class Chassis(ChassisBase):
# NVRAM. Only Warmboot and Coldboot reason are supported here.
# Since it does not support any hardware reason, we return
# non_hardware as default
if self._get_thermal_reset() == True:
self._nvram_write(0x49, 0x7)
lrr = self._get_cpld_register('last_reboot_reason')
if (lrr != 'ERR'):
reset_reason = int(lrr, base=16)
@ -382,3 +385,77 @@ class Chassis(ChassisBase):
return status_led
else:
return None
def get_thermal_manager(self):
"""
Retrieves thermal manager class on this chassis
Returns:
A class derived from ThermalManagerBase representing the
specified thermal manager
"""
from .thermal_manager import ThermalManager
return ThermalManager
def set_fan_control_status(self, enable):
if enable and not self._is_fan_control_enabled:
self._init_fan_control()
for thermal in self._monitor_thermal_list:
thermal.set_high_threshold(LEVEL5_THRESHOLD, force=True)
self._is_fan_control_enabled = True
elif not enable and self._is_fan_control_enabled:
for thermal in self._monitor_thermal_list:
thermal.set_high_threshold(LEVEL4_THRESHOLD, force=True)
self._is_fan_control_enabled = False
def get_monitor_thermals(self):
return self._monitor_thermal_list
def thermal_shutdown(self):
# Update reboot cause
self._nvram_write(0x49, 0x7)
subprocess.call('sync')
time.sleep(1)
for thermal in self._monitor_thermal_list:
thermal.set_high_threshold(LEVEL4_THRESHOLD, force=True)
@staticmethod
def get_system_thermal_level(curr_thermal_level, system_temperature):
def get_level_in_hystersis(curr_level, level1, level2):
if curr_level != level1 and curr_level != level2:
return level1 if abs(curr_level - level1) < abs(curr_level - level2) else level2
else:
return curr_level
if system_temperature < LEVEL0_THRESHOLD:
curr_thermal_level = 0
elif LEVEL0_THRESHOLD <= system_temperature < LEVEL1_THRESHOLD:
curr_thermal_level = get_level_in_hystersis(curr_thermal_level, 0, 1)
elif LEVEL1_THRESHOLD <= system_temperature <= (LEVEL2_THRESHOLD - HYST_RANGE):
curr_thermal_level = 1
elif (LEVEL2_THRESHOLD - HYST_RANGE) < system_temperature < LEVEL2_THRESHOLD:
curr_thermal_level = get_level_in_hystersis(curr_thermal_level, 1, 2)
elif LEVEL2_THRESHOLD <= system_temperature <= (LEVEL3_THRESHOLD - HYST_RANGE):
curr_thermal_level = 2
elif (LEVEL3_THRESHOLD - HYST_RANGE) < system_temperature < LEVEL3_THRESHOLD:
curr_thermal_level = get_level_in_hystersis(curr_thermal_level, 2, 3)
elif LEVEL3_THRESHOLD <= system_temperature < LEVEL4_THRESHOLD:
curr_thermal_level = 3
else:
curr_thermal_level = 4
return curr_thermal_level
@staticmethod
def is_over_temperature(temperature_list):
over_temperature = False
for temperature in temperature_list:
if temperature > LEVEL4_THRESHOLD:
over_temperature = True
break
return over_temperature

View File

@ -16,9 +16,14 @@ try:
except ImportError as e:
raise ImportError(str(e) + "- required module not found")
MAX_S6000_PSU_FAN_SPEED = 18000
MAX_S6000_FAN_SPEED = 19000
MAX_S6000_FAN_TARGET_SPEED = 18900
# Each element corresponds to required speed (in RPM)
# for a given system thermal level
THERMAL_LEVEL_PSU_FAN_SPEED = (7200, 10800, 14400, 16200, 18000)
THERMAL_LEVEL_FAN_SPEED = (7000, 10000, 13000, 16000, 19000)
class Fan(FanBase):
@ -34,6 +39,7 @@ class Fan(FanBase):
def __init__(self, fantray_index=1, fan_index=1,
psu_index=1, psu_fan=False, dependency=None):
FanBase.__init__(self)
self._target_speed = None
self.is_psu_fan = psu_fan
self.is_driver_initialized = True
@ -42,11 +48,18 @@ class Fan(FanBase):
self.fantray_index = fantray_index
self.index = fan_index
self.dependency = dependency
self.get_fan_speed_reg = self.I2C_DIR +\
"{}/fan{}_input".format(*self.FAN_DEV_MAPPING[fantray_index][fan_index])
self.set_fan_speed_reg = self.I2C_DIR +\
"{}/fan{}_target".format(*self.FAN_DEV_MAPPING[fantray_index][fan_index])
hwmon_dir = self.I2C_DIR +\
"{}/hwmon/".format(self.FAN_DEV_MAPPING[fantray_index][fan_index][0])
hwmon_node = os.listdir(hwmon_dir)[0]
self.fan_status_reg = hwmon_dir + hwmon_node +\
"/fan{}_alarm".format(self.FAN_DEV_MAPPING[fantray_index][fan_index][1])
self.get_fan_speed_reg = hwmon_dir + hwmon_node +\
"/fan{}_input".format(self.FAN_DEV_MAPPING[fantray_index][fan_index][1])
self.set_fan_speed_reg = hwmon_dir + hwmon_node +\
"/fan{}_target".format(self.FAN_DEV_MAPPING[fantray_index][fan_index][1])
self.max_fan_speed = MAX_S6000_FAN_SPEED
self.thermal_level_to_speed = THERMAL_LEVEL_FAN_SPEED
else:
self.psu_index = psu_index
self.index = 1
@ -64,6 +77,7 @@ class Fan(FanBase):
self.get_fan_speed_reg = hwmon_dir + hwmon_node + '/fan1_input'
self.max_fan_speed = MAX_S6000_PSU_FAN_SPEED
self.thermal_level_to_speed = THERMAL_LEVEL_PSU_FAN_SPEED
def _get_i2c_register(self, reg_file):
# On successful read, returns the value read from given
@ -114,6 +128,31 @@ class Fan(FanBase):
self.get_fan_speed_reg = fan_speed_reg[0]
self.is_driver_initialized = True
def _get_speed_to_percentage(self, speed):
speed_percent = (100 * speed) // self.max_fan_speed
return speed_percent if speed_percent <= 100 else 100
def _get_target_speed_rpm(self):
target_speed_rpm = self._get_i2c_register(self.set_fan_speed_reg)
if (target_speed_rpm != 'ERR') and self.get_presence():
target_speed_rpm = int(target_speed_rpm, 10)
else:
target_speed_rpm = 0
return target_speed_rpm
def _set_speed_rpm(self, speed):
if not self.is_psu_fan:
if speed > MAX_S6000_FAN_TARGET_SPEED:
speed = MAX_S6000_FAN_TARGET_SPEED
self._target_speed = speed
rv = self._set_i2c_register(self.set_fan_speed_reg, speed)
if (rv != 'ERR'):
return True
else:
return False
def get_name(self):
"""
Retrieves the name of the Fan
@ -159,10 +198,17 @@ class Fan(FanBase):
bool: True if Fan is operating properly, False if not
"""
status = False
fan_speed = self._get_i2c_register(self.get_fan_speed_reg)
if (fan_speed != 'ERR'):
if (int(fan_speed) > 1000):
status = True
if self.is_psu_fan:
fan_speed = self._get_i2c_register(self.get_fan_speed_reg)
if (fan_speed != 'ERR'):
if (int(fan_speed) > 1000):
status = True
else:
fan_status = self._get_i2c_register(self.fan_status_reg)
if (fan_status != 'ERR'):
fan_status = int(fan_status, 10)
if ~fan_status & 0b1:
status = True
return status
@ -214,8 +260,7 @@ class Fan(FanBase):
"""
fan_speed = self._get_i2c_register(self.get_fan_speed_reg)
if (fan_speed != 'ERR') and self.get_presence():
speed_in_rpm = int(fan_speed, 10)
speed = (100 * speed_in_rpm)//self.max_fan_speed
speed = self._get_speed_to_percentage(int(fan_speed, 10))
else:
speed = 0
@ -247,11 +292,7 @@ class Fan(FanBase):
bool: True if set success, False if fail.
"""
fan_set = (speed * self.max_fan_speed) // 100
rv = self._set_i2c_register(self.set_fan_speed_reg, fan_set)
if (rv != 'ERR'):
return True
else:
return False
return self._set_speed_rpm(fan_set)
def set_status_led(self, color):
"""
@ -284,5 +325,26 @@ class Fan(FanBase):
An integer, the percentage of full fan speed, in the range 0
(off) to 100 (full speed)
"""
# Fan speeds are controlled by fancontrol.sh
return self.get_speed()
target_speed_rpm = self._get_target_speed_rpm()
if not self.is_psu_fan and self._target_speed:
# Handle max6620 driver approximation
max6620_conv_factor = (60 * 8192 * 4) / 2
expected_speed_rpm = max6620_conv_factor // (max6620_conv_factor // self._target_speed)
if expected_speed_rpm == target_speed_rpm:
if self._target_speed >= MAX_S6000_FAN_TARGET_SPEED:
return 100
else:
return self._get_speed_to_percentage(self._target_speed)
return self._get_speed_to_percentage(target_speed_rpm)
def set_speed_for_thermal_level(self, thermal_level):
req_speed_rpm = self.thermal_level_to_speed[thermal_level]
req_speed = self._get_speed_to_percentage(req_speed_rpm)
target_speed = self.get_target_speed()
if req_speed != target_speed:
self._set_speed_rpm(req_speed_rpm)

View File

@ -101,13 +101,27 @@ class Thermal(ThermalBase):
try:
with open(sysfs_file, 'r') as fd:
rv = fd.read()
except:
except Exception:
rv = 'ERR'
rv = rv.rstrip('\r\n')
rv = rv.lstrip(" ")
return rv
def _write_sysfs_file(self, sysfs_file, value):
rv = 'ERR'
if (not os.path.isfile(sysfs_file)):
return rv
try:
with open(sysfs_file, 'w') as fd:
rv = fd.write(str(value))
except Exception as e:
rv = 'ERR'
return rv
def _get_sysfs_path(self):
temperature_path = glob.glob(self.thermal_temperature_file)
high_threshold_path = glob.glob(self.thermal_high_threshold_file)
@ -243,18 +257,28 @@ class Thermal(ThermalBase):
return thermal_low_threshold / 1000.0
def set_high_threshold(self, temperature):
def set_high_threshold(self, temperature, force=False):
"""
Sets the high threshold temperature of thermal
Args :
temperature: A float number up to nearest thousandth of one
degree Celsius, e.g. 30.125
force (optional): A boolean, True if set threshold. Only to
be used via thermal Manager.
Returns:
A boolean, True if threshold is set successfully, False if
not
"""
# Thermal threshold values are pre-defined based on HW.
# Only to be used by Thermal Manager
if force and self.index <= 3 and (80 <= temperature <=85):
high_threshold = temperature * 1000
result = self._write_sysfs_file(self.thermal_high_threshold_file,
high_threshold)
if result != 'ERR':
return True
return False
def set_low_threshold(self, temperature):

View File

@ -0,0 +1,105 @@
from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
from sonic_py_common.logger import Logger
logger = Logger()
class ThermalPolicyAction(ThermalPolicyActionBase):
@staticmethod
def get_chassis_info(thermal_info_dict):
from .thermal_info import ChassisInfo
chassis_info = thermal_info_dict.get(ChassisInfo.INFO_NAME)
return chassis_info if isinstance(chassis_info, ChassisInfo) else None
@staticmethod
def get_fandrawer_info(thermal_info_dict):
from .thermal_info import FanDrawerInfo
fandrawer_info = thermal_info_dict.get(FanDrawerInfo.INFO_NAME)
return fandrawer_info if isinstance(fandrawer_info, FanDrawerInfo) else None
@staticmethod
def get_psu_fan_info(thermal_info_dict):
from .thermal_info import PsuFanInfo
psu_fan_info = thermal_info_dict.get(PsuFanInfo.INFO_NAME)
return psu_fan_info if isinstance(psu_fan_info, PsuFanInfo) else None
@thermal_json_object('fandrawer.fault.set_status_led')
class SetFanDrawerFaultStatusLed(ThermalPolicyAction):
def execute(self, thermal_info_dict):
fandrawer_info = self.get_fandrawer_info(thermal_info_dict)
if fandrawer_info and fandrawer_info.is_status_changed:
for fandrawer in fandrawer_info.fault_fandrawers:
if fandrawer.get_status_led() != 'amber':
fandrawer.set_status_led('amber')
@thermal_json_object('fandrawer.normal.set_status_led')
class SetFanDrawerNormalStatusLed(ThermalPolicyAction):
def execute(self, thermal_info_dict):
fandrawer_info = self.get_fandrawer_info(thermal_info_dict)
if fandrawer_info and fandrawer_info.is_status_changed:
for fandrawer in fandrawer_info.non_fault_fandrawers:
if fandrawer.get_status_led() != 'green':
fandrawer.set_status_led('green')
@thermal_json_object('fan.all.set_max_speed')
class SetAllFanMaxSpeedAction(ThermalPolicyAction):
def execute(self, thermal_info_dict):
fandrawer_info = self.get_fandrawer_info(thermal_info_dict)
psu_fan_info = self.get_psu_fan_info(thermal_info_dict)
if fandrawer_info:
if fandrawer_info.is_status_changed and fandrawer_info.is_new_fault:
logger.log_warning("Fandrawer fault detected. Setting all fans to maximum speed")
for fan in fandrawer_info.non_fault_fans:
fan.set_speed(100)
if psu_fan_info:
for fan in psu_fan_info.present_fans:
fan.set_speed(100)
@thermal_json_object('fan.all.set_thermal_level_speed')
class SetAllFanThermalLevelSpeedAction(ThermalPolicyAction):
def execute(self, thermal_info_dict):
chassis_info = self.get_chassis_info(thermal_info_dict)
fandrawer_info = self.get_fandrawer_info(thermal_info_dict)
psu_fan_info = self.get_psu_fan_info(thermal_info_dict)
if chassis_info:
if chassis_info.is_status_changed:
if chassis_info.initial_run:
logger.log_notice("System thermal level is at LEVEL{}".format(chassis_info.system_thermal_level))
else:
logger.log_notice("System thermal level changed to LEVEL{}".format(chassis_info.system_thermal_level))
if fandrawer_info:
if fandrawer_info.is_status_changed and not chassis_info.initial_run:
logger.log_notice("All fandrawers back to normal")
for fan in fandrawer_info.non_fault_fans:
fan.set_speed_for_thermal_level(chassis_info.system_thermal_level)
if psu_fan_info:
for fan in psu_fan_info.present_fans:
fan.set_speed_for_thermal_level(chassis_info.system_thermal_level)
@thermal_json_object('chassis.thermal_shutdown')
class ThermalShutdownAction(ThermalPolicyAction):
def execute(self, thermal_info_dict):
chassis_info = self.get_chassis_info(thermal_info_dict)
if chassis_info:
logger.log_warning("Shutting down due to over temperature - "
+ ",".join("{} C".format(i) for i in chassis_info.temperature_list))
chassis_info.chassis.thermal_shutdown()

View File

@ -0,0 +1,35 @@
from sonic_platform_base.sonic_thermal_control.thermal_condition_base import ThermalPolicyConditionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
class FanDrawerCondition(ThermalPolicyConditionBase):
@staticmethod
def get_fandrawer_info(thermal_info_dict):
from .thermal_info import FanDrawerInfo
fandrawer_info = thermal_info_dict.get(FanDrawerInfo.INFO_NAME)
return fandrawer_info if isinstance(fandrawer_info, FanDrawerInfo) else None
@thermal_json_object('fandrawer.any.fault')
class AnyFanDrawerAbsentOrFaultCondition(FanDrawerCondition):
def is_match(self, thermal_info_dict):
fandrawer_info = self.get_fandrawer_info(thermal_info_dict)
return fandrawer_info.fault if fandrawer_info else False
@thermal_json_object('fandrawer.all.normal')
class AllFanDrawerGoodCondition(FanDrawerCondition):
def is_match(self, thermal_info_dict):
fandrawer_info = self.get_fandrawer_info(thermal_info_dict)
return not fandrawer_info.fault if fandrawer_info else False
@thermal_json_object('chassis.over_temperature')
class OverTemperatureCondition(ThermalPolicyConditionBase):
@staticmethod
def is_match(thermal_info_dict):
from .thermal_info import ChassisInfo
chassis_info = thermal_info_dict.get(ChassisInfo.INFO_NAME)
return chassis_info.is_over_temperature if isinstance(chassis_info, ChassisInfo) else False

View File

@ -0,0 +1,189 @@
from sonic_platform_base.sonic_thermal_control.thermal_info_base import ThermalPolicyInfoBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
@thermal_json_object('chassis_info')
class ChassisInfo(ThermalPolicyInfoBase):
"""
Chassis information needed by thermal policy
"""
INFO_NAME = 'chassis_info'
def __init__(self):
self._chassis = None
self._temperature_list = []
self._thermal_list = []
self._system_thermal_level = 0
self._initial_run = False
self._is_over_temperature = False
self._is_status_changed = False
def collect(self, chassis):
"""
Collect platform chassis.
:param chassis: The chassis object
:return:
"""
self._initial_run = False
self._is_status_changed = False
self._temperature_list = []
if not self._chassis:
self._initial_run = True
self._chassis = chassis
self._thermal_list = chassis.get_monitor_thermals()
for thermal in self._thermal_list:
self._temperature_list.append(thermal.get_temperature())
system_temperature = sum(self._temperature_list) / len(self._temperature_list)
curr_level = chassis.get_system_thermal_level(self._system_thermal_level,
system_temperature)
if curr_level != self._system_thermal_level:
self._is_status_changed = True
self._system_thermal_level = curr_level
self._is_over_temperature = chassis.is_over_temperature(self._temperature_list)
@property
def chassis(self):
return self._chassis
@property
def initial_run(self):
return self._initial_run
@property
def is_over_temperature(self):
return self._is_over_temperature
@property
def is_status_changed(self):
return self._is_status_changed
@property
def system_thermal_level(self):
return self._system_thermal_level
@property
def temperature_list(self):
return self._temperature_list
@thermal_json_object('fandrawer_info')
class FanDrawerInfo(ThermalPolicyInfoBase):
INFO_NAME = 'fandrawer_info'
def __init__(self):
self._fault_fandrawers = set()
self._present_fandrawers = set()
self._fault_fans = set()
self._present_fans = set()
self._fault = False
self._is_new_fault = False
self._is_status_changed = False
def collect(self, chassis):
"""
Collect fan information for thermal policy.
:param chassis: The chassis object.
:return:
"""
fault = False
self._is_new_fault = False
self._is_status_changed = False
for fandrawer in chassis.get_all_fan_drawers():
presence = fandrawer.get_presence()
if presence and (fandrawer not in self._present_fandrawers):
self._is_status_changed = True
self._present_fandrawers.add(fandrawer)
self._present_fans.update(fandrawer.get_all_fans())
elif not presence and (fandrawer in self._present_fandrawers):
self._is_status_changed = True
self._present_fandrawers.discard(fandrawer)
self._present_fans.difference_update(fandrawer.get_all_fans())
fan_fault = False
for fan in fandrawer.get_all_fans():
status = fan.get_status()
fan_fault |= not status
if status and (fan in self._fault_fans):
self._is_status_changed = True
self._fault_fans.discard(fan)
elif not status and (fan not in self._fault_fans):
self._is_status_changed = True
self._fault_fans.add(fan)
if self._is_status_changed:
if fan_fault and (fandrawer not in self._fault_fandrawers):
self._fault_fandrawers.add(fandrawer)
elif not fan_fault:
self._fault_fandrawers.discard(fandrawer)
if self._fault_fans or (chassis.get_num_fans() != len(self._present_fans)):
fault = True
if self._is_status_changed:
if fault and not self._fault:
self._is_new_fault = True
self._fault = fault
@property
def fault_fans(self):
return self._present_fans.intersection(self._fault_fans)
@property
def non_fault_fans(self):
return self._present_fans.difference(self._fault_fans)
@property
def fault_fandrawers(self):
return self._present_fandrawers.intersection(self._fault_fandrawers)
@property
def non_fault_fandrawers(self):
return self._present_fandrawers.difference(self._fault_fandrawers)
@property
def fault(self):
return self._fault
@property
def is_new_fault(self):
return self._is_new_fault
@property
def is_status_changed(self):
return self._is_status_changed
@thermal_json_object('psu_fan_info')
class PsuFanInfo(ThermalPolicyInfoBase):
INFO_NAME = 'psu_fan_info'
def __init__(self):
self._present_fans = set()
def collect(self, chassis):
"""
Collect fan information for thermal policy.
:param chassis: The chassis object.
:return:
"""
for psu in chassis.get_all_psus():
for fan in psu.get_all_fans():
presence = fan.get_presence()
if presence and (fan not in self._present_fans):
self._present_fans.add(fan)
elif not presence and (fan in self._present_fans):
self._present_fans.discard(fan)
@property
def present_fans(self):
return self._present_fans

View File

@ -0,0 +1,63 @@
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
from .thermal_action import (
SetAllFanMaxSpeedAction, SetAllFanThermalLevelSpeedAction,
SetFanDrawerFaultStatusLed, SetFanDrawerNormalStatusLed,
ThermalShutdownAction
)
from .thermal_condition import (
AllFanDrawerGoodCondition, AnyFanDrawerAbsentOrFaultCondition,
OverTemperatureCondition
)
from .thermal_info import ChassisInfo, FanDrawerInfo, PsuFanInfo
class ThermalManager(ThermalManagerBase):
_chassis = None
_fan_speed_default = 80
@classmethod
def deinitialize(cls):
"""
Destroy thermal manager, including any vendor specific cleanup.
:return:
"""
cls.stop_thermal_algorithm()
@classmethod
def init_thermal_algorithm(cls, chassis):
"""
Initialize thermal algorithm according to policy file.
:param chassis: The chassis object.
:return:
"""
if cls._chassis is None:
cls._chassis = chassis
cls.start_thermal_algorithm()
@classmethod
def start_thermal_algorithm(cls):
"""
Start vendor specific thermal control algorithm.
:return:
"""
if cls._chassis:
cls._chassis.set_fan_control_status(True)
@classmethod
def stop_thermal_algorithm(cls):
"""
Stop vendor specific thermal control algorithm.
:return:
"""
if cls._chassis:
cls._chassis.set_fan_control_status(False)
for fan in cls._chassis.get_all_fans():
fan.set_speed(cls._fan_speed_default)
for psu in cls._chassis.get_all_psus():
for fan in psu.get_all_fans():
fan.set_speed(cls._fan_speed_default)

View File

@ -1,13 +0,0 @@
[Unit]
Description=Dell S6000 fan speed regulator
After=platform-modules-s6000.service
Before=pmon.service
[Service]
ExecStart=-/usr/local/bin/fancontrol.sh
Restart=always
RestartSec=30
[Install]
WantedBy=multi-user.target