[device/celestica]: Add thermalctld support on DX010 platform APIs (#6089)
**- Why I did it** - The thermalctld daemon on the Pmon docker requires support from the thermal manager API. **- How I did it** - Removed the old function for detecting a faulty fan. - Removed the old function for detecting excess temperature. - Implement thermal_manager APIs based on ThermalManagerBase - Implement thermal_conditions APIs based on ThermalPolicyConditionBase - Implement thermal_actions APIs based on ThermalPolicyActionBase - Implement thermal_info APIs based on ThermalPolicyInfoBase - Add thermal_policy.json
This commit is contained in:
parent
c9d3e25115
commit
0ca343422d
@ -9,5 +9,4 @@ MINSTOP=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/
|
||||
MINPWM=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/pwm5=89 13-004d/pwm1=89 13-004d/pwm2=89 13-004d/pwm3=89 13-004d/pwm4=89 13-004d/pwm5=89
|
||||
MAXPWM=13-002e/pwm1=255 13-002e/pwm2=255 13-002e/pwm3=255 13-002e/pwm4=255 13-002e/pwm5=255 13-004d/pwm1=255 13-004d/pwm2=255 13-004d/pwm3=255 13-004d/pwm4=255 13-004d/pwm5=255
|
||||
THYST=13-002e/pwm1=3 13-002e/pwm2=3 13-002e/pwm3=3 13-002e/pwm4=3 13-002e/pwm5=3 13-004d/pwm1=3 13-004d/pwm2=3 13-004d/pwm3=3 13-004d/pwm4=3 13-004d/pwm5=3
|
||||
MAXTEMPCRIT=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=65 /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=75
|
||||
MAXTEMPTYPE=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=ASIC /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=CPU
|
||||
|
||||
|
@ -9,5 +9,4 @@ MINSTOP=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/
|
||||
MINPWM=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/pwm5=89 13-004d/pwm1=89 13-004d/pwm2=89 13-004d/pwm3=89 13-004d/pwm4=89 13-004d/pwm5=89
|
||||
MAXPWM=13-002e/pwm1=255 13-002e/pwm2=255 13-002e/pwm3=255 13-002e/pwm4=255 13-002e/pwm5=255 13-004d/pwm1=255 13-004d/pwm2=255 13-004d/pwm3=255 13-004d/pwm4=255 13-004d/pwm5=255
|
||||
THYST=13-002e/pwm1=3 13-002e/pwm2=3 13-002e/pwm3=3 13-002e/pwm4=3 13-002e/pwm5=3 13-004d/pwm1=3 13-004d/pwm2=3 13-004d/pwm3=3 13-004d/pwm4=3 13-004d/pwm5=3
|
||||
MAXTEMPCRIT=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=75 /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=75
|
||||
MAXTEMPTYPE=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=ASIC /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=CPU
|
||||
|
||||
|
@ -74,8 +74,9 @@ class Chassis(ChassisBase):
|
||||
|
||||
def __initialize_thermals(self):
|
||||
from sonic_platform.thermal import Thermal
|
||||
airflow = self.__get_air_flow()
|
||||
for index in range(0, NUM_THERMAL):
|
||||
thermal = Thermal(index)
|
||||
thermal = Thermal(index, airflow)
|
||||
self._thermal_list.append(thermal)
|
||||
|
||||
def __initialize_eeprom(self):
|
||||
@ -88,6 +89,11 @@ class Chassis(ChassisBase):
|
||||
component = Component(index)
|
||||
self._component_list.append(component)
|
||||
|
||||
def __get_air_flow(self):
|
||||
air_flow_path = '/usr/share/sonic/device/{}/fan_airflow'.format(self._api_helper.platform) if self.is_host else '/usr/share/sonic/platform/fan_airflow'
|
||||
air_flow = self._api_helper.read_one_line_file(air_flow_path)
|
||||
return air_flow or 'B2F'
|
||||
|
||||
def get_base_mac(self):
|
||||
"""
|
||||
Retrieves the base MAC address for the chassis
|
||||
@ -291,3 +297,7 @@ class Chassis(ChassisBase):
|
||||
A boolean value, True if device is operating properly, False if not
|
||||
"""
|
||||
return True
|
||||
|
||||
def get_thermal_manager(self):
|
||||
from .thermal_manager import ThermalManager
|
||||
return ThermalManager
|
||||
|
@ -304,4 +304,20 @@ class Fan(FanBase):
|
||||
Returns:
|
||||
A boolean value, True if device is operating properly, False if not
|
||||
"""
|
||||
return self.get_presence() and self.get_speed() > 0
|
||||
status = 1
|
||||
if self.is_psu_fan:
|
||||
fan_fault_sysfs_name = "fan1_fault"
|
||||
fan_fault_sysfs_path = self.__search_file_by_name(
|
||||
self.psu_hwmon_path, fan_fault_sysfs_name)
|
||||
status = self._api_helper.read_one_line_file(fan_fault_sysfs_path)
|
||||
|
||||
elif self.get_presence():
|
||||
chip = self.emc2305_chip_mapping[self.fan_index]
|
||||
device = chip['device']
|
||||
fan_index = chip['index_map']
|
||||
sysfs_path = "%s%s/%s" % (
|
||||
EMC2305_PATH, device, 'fan{}_fault')
|
||||
sysfs_path = sysfs_path.format(fan_index[self.fan_tray_index])
|
||||
status = self._api_helper.read_one_line_file(sysfs_path)
|
||||
|
||||
return False if int(status) != 0 else True
|
||||
|
@ -16,46 +16,75 @@ try:
|
||||
except ImportError as e:
|
||||
raise ImportError(str(e) + "- required module not found")
|
||||
|
||||
THERMAL_INFO = {
|
||||
0: {
|
||||
"F2B_max": 50,
|
||||
"B2F_max": 55,
|
||||
"postion": "asic",
|
||||
"name": "Front-panel temp sensor 1",
|
||||
"i2c_path": "i2c-5/5-0048/hwmon/hwmon1", # u4 system-inlet
|
||||
},
|
||||
1: {
|
||||
"F2B_max": 50,
|
||||
"B2F_max": 55,
|
||||
"postion": "asic",
|
||||
"name": "Front-panel temp sensor 2",
|
||||
"i2c_path": "i2c-6/6-0049/hwmon/hwmon2", # u2 system-inlet
|
||||
},
|
||||
2: {
|
||||
"F2B_max": 70,
|
||||
"F2B_max_crit": 75,
|
||||
"B2F_max": 60,
|
||||
"B2F_max_crit": 65,
|
||||
"postion": "asic",
|
||||
"name": "ASIC temp sensor",
|
||||
"i2c_path": "i2c-7/7-004a/hwmon/hwmon3", # u44 bmc56960-on-board
|
||||
},
|
||||
3: {
|
||||
"F2B_max": 70,
|
||||
"F2B_max_crit": 75,
|
||||
"B2F_max": 70,
|
||||
"B2F_max_crit": 75,
|
||||
"postion": "cpu",
|
||||
"name": "Rear-panel temp sensor 1",
|
||||
"i2c_path": "i2c-14/14-0048/hwmon/hwmon4", # u9200 cpu-on-board
|
||||
},
|
||||
4: {
|
||||
"F2B_max": 70,
|
||||
"B2F_max": 55,
|
||||
"postion": "cpu",
|
||||
"name": "Rear-panel temp sensor 2",
|
||||
"i2c_path": "i2c-15/15-004e/hwmon/hwmon5" # u9201 system-outlet
|
||||
}
|
||||
}
|
||||
NULL_VAL = "N/A"
|
||||
I2C_ADAPTER_PATH = "/sys/class/i2c-adapter"
|
||||
|
||||
|
||||
class Thermal(ThermalBase):
|
||||
"""Platform-specific Thermal class"""
|
||||
|
||||
THERMAL_NAME_LIST = []
|
||||
I2C_ADAPTER_PATH = "/sys/class/i2c-adapter"
|
||||
SS_CONFIG_PATH = "/usr/share/sonic/device/x86_64-cel_seastone-r0/sensors.conf"
|
||||
|
||||
def __init__(self, thermal_index):
|
||||
def __init__(self, thermal_index, airflow):
|
||||
self.index = thermal_index
|
||||
self._api_helper = APIHelper()
|
||||
|
||||
# Add thermal name
|
||||
self.THERMAL_NAME_LIST.append("Front-panel temp sensor 1")
|
||||
self.THERMAL_NAME_LIST.append("Front-panel temp sensor 2")
|
||||
self.THERMAL_NAME_LIST.append("ASIC temp sensor")
|
||||
self.THERMAL_NAME_LIST.append("Rear-panel temp sensor 1")
|
||||
self.THERMAL_NAME_LIST.append("Rear-panel temp sensor 2")
|
||||
|
||||
# Set hwmon path
|
||||
i2c_path = {
|
||||
0: "i2c-5/5-0048/hwmon/hwmon1", # u4 system-inlet
|
||||
1: "i2c-6/6-0049/hwmon/hwmon2", # u2 system-inlet
|
||||
2: "i2c-7/7-004a/hwmon/hwmon3", # u44 bmc56960-on-board
|
||||
3: "i2c-14/14-0048/hwmon/hwmon4", # u9200 cpu-on-board
|
||||
4: "i2c-15/15-004e/hwmon/hwmon5" # u9201 system-outlet
|
||||
}.get(self.index, None)
|
||||
|
||||
self.hwmon_path = "{}/{}".format(self.I2C_ADAPTER_PATH, i2c_path)
|
||||
self.ss_key = self.THERMAL_NAME_LIST[self.index]
|
||||
self._airflow = airflow
|
||||
self._thermal_info = THERMAL_INFO[self.index]
|
||||
self._hwmon_path = "{}/{}".format(I2C_ADAPTER_PATH,
|
||||
self._thermal_info["i2c_path"])
|
||||
self.name = self.get_name()
|
||||
self.postion = self._thermal_info["postion"]
|
||||
self.ss_index = 1
|
||||
|
||||
def __get_temp(self, temp_file):
|
||||
temp_file_path = os.path.join(self.hwmon_path, temp_file)
|
||||
temp_file_path = os.path.join(self._hwmon_path, temp_file)
|
||||
raw_temp = self._api_helper.read_txt_file(temp_file_path)
|
||||
temp = float(raw_temp)/1000
|
||||
return float("{:.3f}".format(temp))
|
||||
|
||||
def __set_threshold(self, file_name, temperature):
|
||||
temp_file_path = os.path.join(self.hwmon_path, file_name)
|
||||
temp_file_path = os.path.join(self._hwmon_path, file_name)
|
||||
try:
|
||||
with open(temp_file_path, 'w') as fd:
|
||||
fd.write(str(temperature))
|
||||
@ -80,8 +109,17 @@ class Thermal(ThermalBase):
|
||||
A float number, the high threshold temperature of thermal in Celsius
|
||||
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||
"""
|
||||
temp_file = "temp{}_max".format(self.ss_index)
|
||||
return self.__get_temp(temp_file)
|
||||
max_crit_key = '{}_max'.format(self._airflow)
|
||||
return self._thermal_info.get(max_crit_key, None)
|
||||
|
||||
def get_low_threshold(self):
|
||||
"""
|
||||
Retrieves the low threshold temperature of thermal
|
||||
Returns:
|
||||
A float number, the low threshold temperature of thermal in Celsius
|
||||
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||
"""
|
||||
return 0.0
|
||||
|
||||
def set_high_threshold(self, temperature):
|
||||
"""
|
||||
@ -102,7 +140,7 @@ class Thermal(ThermalBase):
|
||||
f.seek(0)
|
||||
ss_found = False
|
||||
for idx, val in enumerate(content):
|
||||
if self.ss_key in val:
|
||||
if self.name in val:
|
||||
ss_found = True
|
||||
elif ss_found and temp_file in val:
|
||||
content[idx] = " set {} {}\n".format(
|
||||
@ -115,13 +153,43 @@ class Thermal(ThermalBase):
|
||||
|
||||
return is_set & file_set
|
||||
|
||||
def set_low_threshold(self, temperature):
|
||||
"""
|
||||
Sets the low threshold temperature of thermal
|
||||
Args :
|
||||
temperature: A float number up to nearest thousandth of one degree Celsius,
|
||||
e.g. 30.125
|
||||
Returns:
|
||||
A boolean, True if threshold is set successfully, False if not
|
||||
"""
|
||||
return False
|
||||
|
||||
def get_high_critical_threshold(self):
|
||||
"""
|
||||
Retrieves the high critical threshold temperature of thermal
|
||||
Returns:
|
||||
A float number, the high critical threshold temperature of thermal in Celsius
|
||||
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||
"""
|
||||
max_crit_key = '{}_max_crit'.format(self._airflow)
|
||||
return self._thermal_info.get(max_crit_key, None)
|
||||
|
||||
def get_low_critical_threshold(self):
|
||||
"""
|
||||
Retrieves the low critical threshold temperature of thermal
|
||||
Returns:
|
||||
A float number, the low critical threshold temperature of thermal in Celsius
|
||||
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||
"""
|
||||
return 0.0
|
||||
|
||||
def get_name(self):
|
||||
"""
|
||||
Retrieves the name of the thermal device
|
||||
Returns:
|
||||
string: The name of the thermal device
|
||||
"""
|
||||
return self.THERMAL_NAME_LIST[self.index]
|
||||
return self._thermal_info["name"]
|
||||
|
||||
def get_presence(self):
|
||||
"""
|
||||
@ -130,9 +198,25 @@ class Thermal(ThermalBase):
|
||||
bool: True if PSU is present, False if not
|
||||
"""
|
||||
temp_file = "temp{}_input".format(self.ss_index)
|
||||
temp_file_path = os.path.join(self.hwmon_path, temp_file)
|
||||
temp_file_path = os.path.join(self._hwmon_path, temp_file)
|
||||
return os.path.isfile(temp_file_path)
|
||||
|
||||
def get_model(self):
|
||||
"""
|
||||
Retrieves the model number (or part number) of the device
|
||||
Returns:
|
||||
string: Model/part number of device
|
||||
"""
|
||||
return NULL_VAL
|
||||
|
||||
def get_serial(self):
|
||||
"""
|
||||
Retrieves the serial number of the device
|
||||
Returns:
|
||||
string: Serial number of device
|
||||
"""
|
||||
return NULL_VAL
|
||||
|
||||
def get_status(self):
|
||||
"""
|
||||
Retrieves the operational status of the device
|
||||
@ -143,7 +227,7 @@ class Thermal(ThermalBase):
|
||||
return False
|
||||
|
||||
fault_file = "temp{}_fault".format(self.ss_index)
|
||||
fault_file_path = os.path.join(self.hwmon_path, fault_file)
|
||||
fault_file_path = os.path.join(self._hwmon_path, fault_file)
|
||||
if not os.path.isfile(fault_file_path):
|
||||
return True
|
||||
|
||||
|
@ -0,0 +1,78 @@
|
||||
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
|
||||
from .thermal_infos import ChassisInfo
|
||||
from .helper import APIHelper
|
||||
|
||||
|
||||
@thermal_json_object('thermal_control.control')
|
||||
class ControlThermalAlgoAction(ThermalPolicyActionBase):
|
||||
"""
|
||||
Action to control the thermal control algorithm
|
||||
"""
|
||||
# JSON field definition
|
||||
JSON_FIELD_STATUS = 'status'
|
||||
|
||||
def __init__(self):
|
||||
self.status = True
|
||||
|
||||
def load_from_json(self, json_obj):
|
||||
"""
|
||||
Construct ControlThermalAlgoAction via JSON. JSON example:
|
||||
{
|
||||
"type": "thermal_control.control"
|
||||
"status": "true"
|
||||
}
|
||||
:param json_obj: A JSON object representing a ControlThermalAlgoAction action.
|
||||
:return:
|
||||
"""
|
||||
if ControlThermalAlgoAction.JSON_FIELD_STATUS in json_obj:
|
||||
status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower()
|
||||
if status_str == 'true':
|
||||
self.status = True
|
||||
elif status_str == 'false':
|
||||
self.status = False
|
||||
else:
|
||||
raise ValueError('Invalid {} field value, please specify true of false'.
|
||||
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
|
||||
else:
|
||||
raise ValueError('ControlThermalAlgoAction '
|
||||
'missing mandatory field {} in JSON policy file'.
|
||||
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
|
||||
|
||||
def execute(self, thermal_info_dict):
|
||||
"""
|
||||
Disable thermal control algorithm
|
||||
:param thermal_info_dict: A dictionary stores all thermal information.
|
||||
:return:
|
||||
"""
|
||||
if ChassisInfo.INFO_NAME in thermal_info_dict:
|
||||
chassis_info_obj = thermal_info_dict[ChassisInfo.INFO_NAME]
|
||||
chassis = chassis_info_obj.get_chassis()
|
||||
thermal_manager = chassis.get_thermal_manager()
|
||||
if self.status:
|
||||
thermal_manager.start_thermal_control_algorithm()
|
||||
else:
|
||||
thermal_manager.stop_thermal_control_algorithm()
|
||||
|
||||
|
||||
@thermal_json_object('switch.power_cycling')
|
||||
class SwitchPolicyAction(ThermalPolicyActionBase):
|
||||
"""
|
||||
Base class for thermal action. Once all thermal conditions in a thermal policy are matched,
|
||||
all predefined thermal action will be executed.
|
||||
"""
|
||||
|
||||
def execute(self, thermal_info_dict):
|
||||
"""
|
||||
Take action when thermal condition matches. For example, power cycle the switch.
|
||||
:param thermal_info_dict: A dictionary stores all thermal information.
|
||||
:return:
|
||||
"""
|
||||
thermal_overload_position_path = '/tmp/thermal_overload_position'
|
||||
thermal_overload_position = APIHelper().read_one_line_file(
|
||||
thermal_overload_position_path)
|
||||
|
||||
cmd = 'bash /usr/share/sonic/platform/thermal_overload_control.sh {}'.format(
|
||||
thermal_overload_position)
|
||||
APIHelper().run_command(cmd)
|
@ -0,0 +1,77 @@
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_condition_base import ThermalPolicyConditionBase
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
|
||||
|
||||
|
||||
class FanCondition(ThermalPolicyConditionBase):
|
||||
def get_fan_info(self, thermal_info_dict):
|
||||
from .thermal_infos import FanInfo
|
||||
if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo):
|
||||
return thermal_info_dict[FanInfo.INFO_NAME]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@thermal_json_object('fan.any.absence')
|
||||
class AnyFanAbsenceCondition(FanCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
fan_info_obj = self.get_fan_info(thermal_info_dict)
|
||||
return len(fan_info_obj.get_absence_fans()) > 0 if fan_info_obj else False
|
||||
|
||||
|
||||
@thermal_json_object('fan.any.fault')
|
||||
class AnyFanFaultCondition(FanCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
fan_info_obj = self.get_fan_info(thermal_info_dict)
|
||||
return len(fan_info_obj.get_fault_fans()) > 0 if fan_info_obj else False
|
||||
|
||||
|
||||
@thermal_json_object('fan.all.presence')
|
||||
class AllFanPresenceCondition(FanCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
fan_info_obj = self.get_fan_info(thermal_info_dict)
|
||||
return len(fan_info_obj.get_absence_fans()) == 0 if fan_info_obj else False
|
||||
|
||||
|
||||
@thermal_json_object('fan.all.good')
|
||||
class AllFanGoodCondition(FanCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
fan_info_obj = self.get_fan_info(thermal_info_dict)
|
||||
return len(fan_info_obj.get_fault_fans()) == 0 if fan_info_obj else False
|
||||
|
||||
|
||||
class ThermalCondition(ThermalPolicyConditionBase):
|
||||
def get_thermal_info(self, thermal_info_dict):
|
||||
from .thermal_infos import ThermalInfo
|
||||
if ThermalInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ThermalInfo.INFO_NAME], ThermalInfo):
|
||||
return thermal_info_dict[ThermalInfo.INFO_NAME]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@thermal_json_object('thermal.over.high_threshold')
|
||||
class ThermalOverHighCriticalCondition(ThermalCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
thermal_info_obj = self.get_thermal_info(thermal_info_dict)
|
||||
if thermal_info_obj:
|
||||
return thermal_info_obj.is_over_high_threshold()
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
@thermal_json_object('thermal.over.high_critical_threshold')
|
||||
class ThermalOverHighCriticalCondition(ThermalCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
thermal_info_obj = self.get_thermal_info(thermal_info_dict)
|
||||
if thermal_info_obj:
|
||||
return thermal_info_obj.is_over_high_critical_threshold()
|
||||
else:
|
||||
return False
|
||||
|
||||
@thermal_json_object('thermal.all.good')
|
||||
class ThermalGoodCondition(ThermalCondition):
|
||||
def is_match(self, thermal_info_dict):
|
||||
thermal_info_obj = self.get_thermal_info(thermal_info_dict)
|
||||
if thermal_info_obj:
|
||||
return not thermal_info_obj.is_over_threshold()
|
||||
else:
|
||||
return False
|
@ -0,0 +1,165 @@
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_info_base import ThermalPolicyInfoBase
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
|
||||
from .helper import APIHelper
|
||||
import time
|
||||
|
||||
|
||||
@thermal_json_object('fan_info')
|
||||
class FanInfo(ThermalPolicyInfoBase):
|
||||
"""
|
||||
Fan information needed by thermal policy
|
||||
"""
|
||||
|
||||
# Fan information name
|
||||
INFO_NAME = 'fan_info'
|
||||
|
||||
def __init__(self):
|
||||
self._absence_fans = set()
|
||||
self._presence_fans = set()
|
||||
self._fault_fans = set()
|
||||
self._status_changed = False
|
||||
|
||||
def collect(self, chassis):
|
||||
"""
|
||||
Collect absence and presence fans.
|
||||
:param chassis: The chassis object
|
||||
:return:
|
||||
"""
|
||||
self._status_changed = False
|
||||
for fan in chassis.get_all_fans():
|
||||
presence = fan.get_presence()
|
||||
status = fan.get_status()
|
||||
if presence and fan not in self._presence_fans:
|
||||
self._presence_fans.add(fan)
|
||||
self._status_changed = True
|
||||
if fan in self._absence_fans:
|
||||
self._absence_fans.remove(fan)
|
||||
elif not presence and fan not in self._absence_fans:
|
||||
self._absence_fans.add(fan)
|
||||
self._status_changed = True
|
||||
if fan in self._presence_fans:
|
||||
self._presence_fans.remove(fan)
|
||||
|
||||
if not status and fan not in self._fault_fans:
|
||||
self._fault_fans.add(fan)
|
||||
self._status_changed = True
|
||||
|
||||
elif status and fan in self._fault_fans:
|
||||
self._fault_fans.remove(fan)
|
||||
self._status_changed = True
|
||||
|
||||
def get_absence_fans(self):
|
||||
"""
|
||||
Retrieves absence fans
|
||||
:return: A set of absence fans
|
||||
"""
|
||||
return self._absence_fans
|
||||
|
||||
def get_presence_fans(self):
|
||||
"""
|
||||
Retrieves presence fans
|
||||
:return: A set of presence fans
|
||||
"""
|
||||
return self._presence_fans
|
||||
|
||||
def get_fault_fans(self):
|
||||
"""
|
||||
Retrieves fault fans
|
||||
:return: A set of fault fans
|
||||
"""
|
||||
return self._fault_fans
|
||||
|
||||
def is_status_changed(self):
|
||||
"""
|
||||
Retrieves if the status of fan information changed
|
||||
:return: True if status changed else False
|
||||
"""
|
||||
return self._status_changed
|
||||
|
||||
|
||||
@thermal_json_object('thermal_info')
|
||||
class ThermalInfo(ThermalPolicyInfoBase):
|
||||
"""
|
||||
Thermal information needed by thermal policy
|
||||
"""
|
||||
|
||||
# Fan information name
|
||||
INFO_NAME = 'thermal_info'
|
||||
|
||||
def collect(self, chassis):
|
||||
"""
|
||||
Collect thermal sensor temperature change status
|
||||
:param chassis: The chassis object
|
||||
:return:
|
||||
"""
|
||||
self._over_high_threshold = False
|
||||
self._over_high_critical_threshold = False
|
||||
self._thermal_overload_position = 'cpu'
|
||||
|
||||
# Calculate average temp within the device
|
||||
temp = 0
|
||||
num_of_thermals = chassis.get_num_thermals()
|
||||
for index in range(num_of_thermals):
|
||||
thermal = chassis.get_thermal(index)
|
||||
temp = thermal.get_temperature()
|
||||
high_threshold = thermal.get_high_threshold()
|
||||
high_critical_threshold = thermal.get_high_critical_threshold()
|
||||
|
||||
if high_threshold and temp > high_threshold:
|
||||
self._over_high_threshold = True
|
||||
|
||||
if high_critical_threshold and temp > high_critical_threshold:
|
||||
self._thermal_overload_position = thermal.postion
|
||||
self._over_high_critical_threshold = True
|
||||
|
||||
def is_over_threshold(self):
|
||||
"""
|
||||
Retrieves if the temperature is over any threshold
|
||||
:return: True if the temperature is over any threshold else False
|
||||
"""
|
||||
return self._over_high_threshold or self._over_high_critical_threshold
|
||||
|
||||
def is_over_high_critical_threshold(self):
|
||||
"""
|
||||
Retrieves if the temperature is over high critical threshold
|
||||
:return: True if the temperature is over high critical threshold else False
|
||||
"""
|
||||
thermal_overload_position_path = '/tmp/thermal_overload_position'
|
||||
if self._over_high_critical_threshold:
|
||||
APIHelper().write_txt_file(thermal_overload_position_path,
|
||||
self._thermal_overload_position)
|
||||
time.sleep(1)
|
||||
return self._over_high_critical_threshold
|
||||
|
||||
def is_over_high_threshold(self):
|
||||
"""
|
||||
Retrieves if the temperature is over high threshold
|
||||
:return: True if the temperature is over high threshold else False
|
||||
"""
|
||||
return self._over_high_threshold
|
||||
|
||||
|
||||
@thermal_json_object('chassis_info')
|
||||
class ChassisInfo(ThermalPolicyInfoBase):
|
||||
"""
|
||||
Chassis information needed by thermal policy
|
||||
"""
|
||||
INFO_NAME = 'chassis_info'
|
||||
|
||||
def __init__(self):
|
||||
self._chassis = None
|
||||
|
||||
def collect(self, chassis):
|
||||
"""
|
||||
Collect platform chassis.
|
||||
:param chassis: The chassis object
|
||||
:return:
|
||||
"""
|
||||
self._chassis = chassis
|
||||
|
||||
def get_chassis(self):
|
||||
"""
|
||||
Retrieves platform chassis object
|
||||
:return: A platform chassis object.
|
||||
"""
|
||||
return self._chassis
|
@ -0,0 +1,46 @@
|
||||
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
|
||||
from .helper import APIHelper
|
||||
from .thermal_actions import *
|
||||
from .thermal_conditions import *
|
||||
from .thermal_infos import *
|
||||
|
||||
class ThermalManager(ThermalManagerBase):
|
||||
FSC_ALGORITHM_CMD = 'service fancontrol {}'
|
||||
|
||||
@classmethod
|
||||
def start_thermal_control_algorithm(cls):
|
||||
"""
|
||||
Start vendor specific thermal control algorithm. The default behavior of this function is a no-op.
|
||||
:return:
|
||||
"""
|
||||
return cls._enable_fancontrol_service(True)
|
||||
|
||||
@classmethod
|
||||
def stop_thermal_control_algorithm(cls):
|
||||
"""
|
||||
Stop thermal control algorithm
|
||||
Returns:
|
||||
bool: True if set success, False if fail.
|
||||
"""
|
||||
return cls._enable_fancontrol_service(False)
|
||||
|
||||
@classmethod
|
||||
def deinitialize(cls):
|
||||
"""
|
||||
Destroy thermal manager, including any vendor specific cleanup. The default behavior of this function
|
||||
is a no-op.
|
||||
:return:
|
||||
"""
|
||||
return cls._enable_fancontrol_service(True)
|
||||
|
||||
@classmethod
|
||||
def _enable_fancontrol_service(cls, enable):
|
||||
"""
|
||||
Control thermal by fcs algorithm
|
||||
Args:
|
||||
enable: Bool, indicate enable the algorithm or not
|
||||
Returns:
|
||||
bool: True if set success, False if fail.
|
||||
"""
|
||||
cmd = 'start' if enable else 'stop'
|
||||
return APIHelper().run_command(cls.FSC_ALGORITHM_CMD.format(cmd))
|
@ -0,0 +1,16 @@
|
||||
{
|
||||
"services_to_ignore": [],
|
||||
"devices_to_ignore": [
|
||||
"asic",
|
||||
"psu.temperature",
|
||||
"PSU2 Fan",
|
||||
"PSU1 Fan"
|
||||
],
|
||||
"user_defined_checkers": [],
|
||||
"polling_interval": 60,
|
||||
"led_color": {
|
||||
"fault": "orange",
|
||||
"normal": "green",
|
||||
"booting": "orange_blink"
|
||||
}
|
||||
}
|
93
device/celestica/x86_64-cel_seastone-r0/thermal_policy.json
Normal file
93
device/celestica/x86_64-cel_seastone-r0/thermal_policy.json
Normal file
@ -0,0 +1,93 @@
|
||||
{
|
||||
"thermal_control_algorithm": {
|
||||
"run_at_boot_up": "true"
|
||||
},
|
||||
"info_types": [
|
||||
{
|
||||
"type": "chassis_info"
|
||||
},
|
||||
{
|
||||
"type": "fan_info"
|
||||
},
|
||||
{
|
||||
"type": "thermal_info"
|
||||
}
|
||||
],
|
||||
"policies": [
|
||||
{
|
||||
"name": "any fan absence",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "fan.any.absence"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "any fan broken",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "fan.any.fault"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "any thermal over threshold",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "thermal.over.high_threshold"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "false"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "temp over high critical threshold",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "thermal.over.high_critical_threshold"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "switch.power_cycling"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "all fan presence / thermal no warning",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "fan.all.presence"
|
||||
},
|
||||
{
|
||||
"type": "fan.all.good"
|
||||
},
|
||||
{
|
||||
"type": "thermal.all.good"
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"type": "thermal_control.control",
|
||||
"status": "true"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@ -3,7 +3,6 @@ dx010/cfg/dx010-modules.conf etc/modules-load.d
|
||||
dx010/systemd/platform-modules-dx010.service lib/systemd/system
|
||||
dx010/scripts/fancontrol.sh etc/init.d
|
||||
dx010/scripts/fancontrol.service lib/systemd/system
|
||||
dx010/scripts/thermal_overload_control.sh usr/local/bin
|
||||
services/fancontrol/fancontrol usr/local/bin
|
||||
dx010/modules/sonic_platform-1.0-py2-none-any.whl usr/share/sonic/device/x86_64-cel_seastone-r0
|
||||
services/platform_api/platform_api_mgnt.sh usr/local/bin
|
||||
|
@ -6,4 +6,5 @@ systemctl start platform-modules-dx010.service
|
||||
systemctl start fancontrol.service
|
||||
|
||||
/usr/local/bin/platform_api_mgnt.sh install
|
||||
/etc/init.d/fancontrol.sh install
|
||||
|
||||
|
@ -7,75 +7,42 @@
|
||||
# Default-Start: 2 3 4 5
|
||||
# Default-Stop:
|
||||
# Short-Description: fancontrol
|
||||
# Description: fan speed regulator
|
||||
# Description: fancontrol configuration selector
|
||||
### END INIT INFO
|
||||
|
||||
. /lib/lsb/init-functions
|
||||
|
||||
[ -f /etc/default/rcS ] && . /etc/default/rcS
|
||||
PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin
|
||||
DAEMON=/usr/local/bin/fancontrol
|
||||
DESC="fan speed regulator"
|
||||
NAME="fancontrol"
|
||||
PIDFILE=/var/run/fancontrol.pid
|
||||
MAIN_CONF=/usr/share/sonic/device/x86_64-cel_seastone-r0/fancontrol
|
||||
DEVPATH=/sys/devices/pci0000:00/0000:00:13.0/i2c-*/i2c-13/13-002e
|
||||
GPIO_DIR=/sys/class/gpio
|
||||
BASE_GPIO=$(find $GPIO_DIR | grep gpiochip | grep -o '[[:digit:]]*')
|
||||
DIRGPIO_START=15
|
||||
|
||||
test -x $DAEMON || exit 0
|
||||
init() {
|
||||
DIRGPIO_START=15
|
||||
BASE_GPIO=$(find $GPIO_DIR | grep gpiochip | grep -o '[[:digit:]]*')
|
||||
FANDIR_GPIO_NUMBER=$((DIRGPIO_START + BASE_GPIO))
|
||||
FANDIR_VALUE=$(cat ${GPIO_DIR}/gpio${FANDIR_GPIO_NUMBER}/value)
|
||||
DIRGPIO_START=$((DIRGPIO_START + 1))
|
||||
FANDIR=$([ $FANDIR_VALUE = 1 ] && echo "B2F" || echo "F2B")
|
||||
CONF=${MAIN_CONF}-${FANDIR}
|
||||
echo $FANDIR > /usr/share/sonic/device/x86_64-cel_seastone-r0/fan_airflow
|
||||
}
|
||||
|
||||
for i in 1 2 3 4 5
|
||||
do
|
||||
FANFAULT=$(cat ${DEVPATH}/fan${i}_fault)
|
||||
[ $FANFAULT = 1 ] && continue
|
||||
FANDIR_GPIO_NUMBER=$((DIRGPIO_START + BASE_GPIO))
|
||||
FANDIR_VALUE=$(cat ${GPIO_DIR}/gpio${FANDIR_GPIO_NUMBER}/value)
|
||||
DIRGPIO_START=$((DIRGPIO_START+1))
|
||||
FANDIR=$([ $FANDIR_VALUE = 1 ] && echo "B2F" || echo "F2B")
|
||||
done
|
||||
CONF=${MAIN_CONF}-${FANDIR}
|
||||
install() {
|
||||
find /var/lib/docker/overlay*/ -path */sbin/fancontrol -exec cp /usr/local/bin/fancontrol {} \;
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
if [ -f $CONF ] ; then
|
||||
if $DAEMON --check $CONF 1>/dev/null 2>/dev/null ; then
|
||||
log_daemon_msg "Starting $DESC" "$NAME\n"
|
||||
start-stop-daemon --start --quiet --pidfile $PIDFILE --startas $DAEMON $CONF
|
||||
log_end_msg $?
|
||||
else
|
||||
log_failure_msg "Not starting fancontrol, broken configuration file; please re-run pwmconfig."
|
||||
fi
|
||||
else
|
||||
if [ "$VERBOSE" != no ]; then
|
||||
log_warning_msg "Not starting fancontrol; run pwmconfig first."
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
stop)
|
||||
log_daemon_msg "Stopping $DESC" "$NAME"
|
||||
start-stop-daemon --stop --quiet --pidfile $PIDFILE --oknodo --startas $DAEMON $CONF
|
||||
rm -f $PIDFILE
|
||||
log_end_msg $?
|
||||
;;
|
||||
restart)
|
||||
$0 stop
|
||||
sleep 3
|
||||
$0 start
|
||||
;;
|
||||
force-reload)
|
||||
if start-stop-daemon --stop --test --quiet --pidfile $PIDFILE --startas $DAEMON $CONF ; then
|
||||
$0 restart
|
||||
fi
|
||||
;;
|
||||
status)
|
||||
status_of_proc $DAEMON $NAME $CONF && exit 0 || exit $?
|
||||
;;
|
||||
*)
|
||||
log_success_msg "Usage: /etc/init.d/fancontrol {start|stop|restart|force-reload|status}"
|
||||
exit 1
|
||||
;;
|
||||
start)
|
||||
init
|
||||
cp $CONF $MAIN_CONF
|
||||
;;
|
||||
install)
|
||||
install
|
||||
;;
|
||||
*)
|
||||
log_success_msg "Usage: /etc/init.d/fancontrol {start} | {install}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
@ -180,42 +180,6 @@ function LoadConfig
|
||||
|
||||
}
|
||||
|
||||
function CheckFanFault()
|
||||
{
|
||||
let fancount=0
|
||||
while (( $fancount < ${#AFCFANFAULT[@]} )) # go through all fan fault.
|
||||
do
|
||||
fault=`cat ${AFCFANFAULT[$fancount]}`
|
||||
if [[ "$fault" == "1" ]]
|
||||
then
|
||||
return 1 # fan fault detected
|
||||
fi
|
||||
let fancount=$fancount+1
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
function CheckTempOver()
|
||||
{
|
||||
let tempcount=0
|
||||
while (( $tempcount < ${#CSTEMP[@]} )) # go through all temp.
|
||||
do
|
||||
ctemp=`cat ${CSTEMP[$tempcount]}`
|
||||
let maxcrit="${CSMAXTEMPCRIT[$tempcount]}*1000"
|
||||
if [ $ctemp -ge $maxcrit ]
|
||||
then
|
||||
logger "Thermal overload : ${CSMAXTEMPTYPE[$tempcount]} temperature ${ctemp} > ${maxcrit}"
|
||||
if [ -f "$THERMAL_OVERLOAD_CONTROL_FILE" ]
|
||||
then
|
||||
toc_cmd="${THERMAL_OVERLOAD_CONTROL_FILE} ${CSMAXTEMPTYPE[$tempcount],,}"
|
||||
bash $toc_cmd
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
let tempcount=$tempcount+1
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
function DevicePath()
|
||||
{
|
||||
@ -508,18 +472,6 @@ function UpdateFanSpeeds
|
||||
maxpwm=${AFCMAXPWM[$fcvcount]}
|
||||
let tHyst="${AFCTHYST[$fcvcount]}*1000"
|
||||
|
||||
#if some fan fault detected all pwm=100%
|
||||
CheckFanFault
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo $MAX > $pwmo
|
||||
let fcvcount=$fcvcount+1
|
||||
continue
|
||||
fi
|
||||
|
||||
#check thermal overload
|
||||
CheckTempOver
|
||||
|
||||
read tval < ${tsens}
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
@ -619,7 +571,7 @@ function UpdateFanSpeeds
|
||||
echo $minsa > $pwmo
|
||||
# Sleep while still handling signals
|
||||
sleep 1 &
|
||||
wait $!
|
||||
wait
|
||||
fi
|
||||
fi
|
||||
echo $pwmval > $pwmo # write new value to pwm output
|
||||
@ -658,5 +610,5 @@ do
|
||||
UpdateFanSpeeds
|
||||
# Sleep while still handling signals
|
||||
sleep $INTERVAL &
|
||||
wait $!
|
||||
wait
|
||||
done
|
||||
|
Loading…
Reference in New Issue
Block a user