[device/celestica]: Add thermalctld support on DX010 platform APIs (#6089)

**- Why I did it**
- The thermalctld daemon on the Pmon docker requires support from the thermal manager API.

**- How I did it**
- Removed the old function for detecting a faulty fan.
- Removed the old function for detecting excess temperature.
- Implement thermal_manager APIs based on ThermalManagerBase
- Implement thermal_conditions APIs based on ThermalPolicyConditionBase
- Implement thermal_actions APIs based on ThermalPolicyActionBase
- Implement thermal_info APIs based on ThermalPolicyInfoBase
- Add thermal_policy.json
This commit is contained in:
Wirut Getbamrung 2021-01-16 01:20:47 +07:00 committed by GitHub
parent c9d3e25115
commit 0ca343422d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 647 additions and 145 deletions

View File

@ -9,5 +9,4 @@ MINSTOP=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/
MINPWM=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/pwm5=89 13-004d/pwm1=89 13-004d/pwm2=89 13-004d/pwm3=89 13-004d/pwm4=89 13-004d/pwm5=89
MAXPWM=13-002e/pwm1=255 13-002e/pwm2=255 13-002e/pwm3=255 13-002e/pwm4=255 13-002e/pwm5=255 13-004d/pwm1=255 13-004d/pwm2=255 13-004d/pwm3=255 13-004d/pwm4=255 13-004d/pwm5=255
THYST=13-002e/pwm1=3 13-002e/pwm2=3 13-002e/pwm3=3 13-002e/pwm4=3 13-002e/pwm5=3 13-004d/pwm1=3 13-004d/pwm2=3 13-004d/pwm3=3 13-004d/pwm4=3 13-004d/pwm5=3
MAXTEMPCRIT=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=65 /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=75
MAXTEMPTYPE=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=ASIC /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=CPU

View File

@ -9,5 +9,4 @@ MINSTOP=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/
MINPWM=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/pwm5=89 13-004d/pwm1=89 13-004d/pwm2=89 13-004d/pwm3=89 13-004d/pwm4=89 13-004d/pwm5=89
MAXPWM=13-002e/pwm1=255 13-002e/pwm2=255 13-002e/pwm3=255 13-002e/pwm4=255 13-002e/pwm5=255 13-004d/pwm1=255 13-004d/pwm2=255 13-004d/pwm3=255 13-004d/pwm4=255 13-004d/pwm5=255
THYST=13-002e/pwm1=3 13-002e/pwm2=3 13-002e/pwm3=3 13-002e/pwm4=3 13-002e/pwm5=3 13-004d/pwm1=3 13-004d/pwm2=3 13-004d/pwm3=3 13-004d/pwm4=3 13-004d/pwm5=3
MAXTEMPCRIT=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=75 /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=75
MAXTEMPTYPE=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=ASIC /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=CPU

View File

@ -74,8 +74,9 @@ class Chassis(ChassisBase):
def __initialize_thermals(self):
from sonic_platform.thermal import Thermal
airflow = self.__get_air_flow()
for index in range(0, NUM_THERMAL):
thermal = Thermal(index)
thermal = Thermal(index, airflow)
self._thermal_list.append(thermal)
def __initialize_eeprom(self):
@ -88,6 +89,11 @@ class Chassis(ChassisBase):
component = Component(index)
self._component_list.append(component)
def __get_air_flow(self):
air_flow_path = '/usr/share/sonic/device/{}/fan_airflow'.format(self._api_helper.platform) if self.is_host else '/usr/share/sonic/platform/fan_airflow'
air_flow = self._api_helper.read_one_line_file(air_flow_path)
return air_flow or 'B2F'
def get_base_mac(self):
"""
Retrieves the base MAC address for the chassis
@ -291,3 +297,7 @@ class Chassis(ChassisBase):
A boolean value, True if device is operating properly, False if not
"""
return True
def get_thermal_manager(self):
from .thermal_manager import ThermalManager
return ThermalManager

View File

@ -304,4 +304,20 @@ class Fan(FanBase):
Returns:
A boolean value, True if device is operating properly, False if not
"""
return self.get_presence() and self.get_speed() > 0
status = 1
if self.is_psu_fan:
fan_fault_sysfs_name = "fan1_fault"
fan_fault_sysfs_path = self.__search_file_by_name(
self.psu_hwmon_path, fan_fault_sysfs_name)
status = self._api_helper.read_one_line_file(fan_fault_sysfs_path)
elif self.get_presence():
chip = self.emc2305_chip_mapping[self.fan_index]
device = chip['device']
fan_index = chip['index_map']
sysfs_path = "%s%s/%s" % (
EMC2305_PATH, device, 'fan{}_fault')
sysfs_path = sysfs_path.format(fan_index[self.fan_tray_index])
status = self._api_helper.read_one_line_file(sysfs_path)
return False if int(status) != 0 else True

View File

@ -16,46 +16,75 @@ try:
except ImportError as e:
raise ImportError(str(e) + "- required module not found")
THERMAL_INFO = {
0: {
"F2B_max": 50,
"B2F_max": 55,
"postion": "asic",
"name": "Front-panel temp sensor 1",
"i2c_path": "i2c-5/5-0048/hwmon/hwmon1", # u4 system-inlet
},
1: {
"F2B_max": 50,
"B2F_max": 55,
"postion": "asic",
"name": "Front-panel temp sensor 2",
"i2c_path": "i2c-6/6-0049/hwmon/hwmon2", # u2 system-inlet
},
2: {
"F2B_max": 70,
"F2B_max_crit": 75,
"B2F_max": 60,
"B2F_max_crit": 65,
"postion": "asic",
"name": "ASIC temp sensor",
"i2c_path": "i2c-7/7-004a/hwmon/hwmon3", # u44 bmc56960-on-board
},
3: {
"F2B_max": 70,
"F2B_max_crit": 75,
"B2F_max": 70,
"B2F_max_crit": 75,
"postion": "cpu",
"name": "Rear-panel temp sensor 1",
"i2c_path": "i2c-14/14-0048/hwmon/hwmon4", # u9200 cpu-on-board
},
4: {
"F2B_max": 70,
"B2F_max": 55,
"postion": "cpu",
"name": "Rear-panel temp sensor 2",
"i2c_path": "i2c-15/15-004e/hwmon/hwmon5" # u9201 system-outlet
}
}
NULL_VAL = "N/A"
I2C_ADAPTER_PATH = "/sys/class/i2c-adapter"
class Thermal(ThermalBase):
"""Platform-specific Thermal class"""
THERMAL_NAME_LIST = []
I2C_ADAPTER_PATH = "/sys/class/i2c-adapter"
SS_CONFIG_PATH = "/usr/share/sonic/device/x86_64-cel_seastone-r0/sensors.conf"
def __init__(self, thermal_index):
def __init__(self, thermal_index, airflow):
self.index = thermal_index
self._api_helper = APIHelper()
# Add thermal name
self.THERMAL_NAME_LIST.append("Front-panel temp sensor 1")
self.THERMAL_NAME_LIST.append("Front-panel temp sensor 2")
self.THERMAL_NAME_LIST.append("ASIC temp sensor")
self.THERMAL_NAME_LIST.append("Rear-panel temp sensor 1")
self.THERMAL_NAME_LIST.append("Rear-panel temp sensor 2")
# Set hwmon path
i2c_path = {
0: "i2c-5/5-0048/hwmon/hwmon1", # u4 system-inlet
1: "i2c-6/6-0049/hwmon/hwmon2", # u2 system-inlet
2: "i2c-7/7-004a/hwmon/hwmon3", # u44 bmc56960-on-board
3: "i2c-14/14-0048/hwmon/hwmon4", # u9200 cpu-on-board
4: "i2c-15/15-004e/hwmon/hwmon5" # u9201 system-outlet
}.get(self.index, None)
self.hwmon_path = "{}/{}".format(self.I2C_ADAPTER_PATH, i2c_path)
self.ss_key = self.THERMAL_NAME_LIST[self.index]
self._airflow = airflow
self._thermal_info = THERMAL_INFO[self.index]
self._hwmon_path = "{}/{}".format(I2C_ADAPTER_PATH,
self._thermal_info["i2c_path"])
self.name = self.get_name()
self.postion = self._thermal_info["postion"]
self.ss_index = 1
def __get_temp(self, temp_file):
temp_file_path = os.path.join(self.hwmon_path, temp_file)
temp_file_path = os.path.join(self._hwmon_path, temp_file)
raw_temp = self._api_helper.read_txt_file(temp_file_path)
temp = float(raw_temp)/1000
return float("{:.3f}".format(temp))
def __set_threshold(self, file_name, temperature):
temp_file_path = os.path.join(self.hwmon_path, file_name)
temp_file_path = os.path.join(self._hwmon_path, file_name)
try:
with open(temp_file_path, 'w') as fd:
fd.write(str(temperature))
@ -80,8 +109,17 @@ class Thermal(ThermalBase):
A float number, the high threshold temperature of thermal in Celsius
up to nearest thousandth of one degree Celsius, e.g. 30.125
"""
temp_file = "temp{}_max".format(self.ss_index)
return self.__get_temp(temp_file)
max_crit_key = '{}_max'.format(self._airflow)
return self._thermal_info.get(max_crit_key, None)
def get_low_threshold(self):
"""
Retrieves the low threshold temperature of thermal
Returns:
A float number, the low threshold temperature of thermal in Celsius
up to nearest thousandth of one degree Celsius, e.g. 30.125
"""
return 0.0
def set_high_threshold(self, temperature):
"""
@ -102,7 +140,7 @@ class Thermal(ThermalBase):
f.seek(0)
ss_found = False
for idx, val in enumerate(content):
if self.ss_key in val:
if self.name in val:
ss_found = True
elif ss_found and temp_file in val:
content[idx] = " set {} {}\n".format(
@ -115,13 +153,43 @@ class Thermal(ThermalBase):
return is_set & file_set
def set_low_threshold(self, temperature):
"""
Sets the low threshold temperature of thermal
Args :
temperature: A float number up to nearest thousandth of one degree Celsius,
e.g. 30.125
Returns:
A boolean, True if threshold is set successfully, False if not
"""
return False
def get_high_critical_threshold(self):
"""
Retrieves the high critical threshold temperature of thermal
Returns:
A float number, the high critical threshold temperature of thermal in Celsius
up to nearest thousandth of one degree Celsius, e.g. 30.125
"""
max_crit_key = '{}_max_crit'.format(self._airflow)
return self._thermal_info.get(max_crit_key, None)
def get_low_critical_threshold(self):
"""
Retrieves the low critical threshold temperature of thermal
Returns:
A float number, the low critical threshold temperature of thermal in Celsius
up to nearest thousandth of one degree Celsius, e.g. 30.125
"""
return 0.0
def get_name(self):
"""
Retrieves the name of the thermal device
Returns:
string: The name of the thermal device
"""
return self.THERMAL_NAME_LIST[self.index]
return self._thermal_info["name"]
def get_presence(self):
"""
@ -130,9 +198,25 @@ class Thermal(ThermalBase):
bool: True if PSU is present, False if not
"""
temp_file = "temp{}_input".format(self.ss_index)
temp_file_path = os.path.join(self.hwmon_path, temp_file)
temp_file_path = os.path.join(self._hwmon_path, temp_file)
return os.path.isfile(temp_file_path)
def get_model(self):
"""
Retrieves the model number (or part number) of the device
Returns:
string: Model/part number of device
"""
return NULL_VAL
def get_serial(self):
"""
Retrieves the serial number of the device
Returns:
string: Serial number of device
"""
return NULL_VAL
def get_status(self):
"""
Retrieves the operational status of the device
@ -143,7 +227,7 @@ class Thermal(ThermalBase):
return False
fault_file = "temp{}_fault".format(self.ss_index)
fault_file_path = os.path.join(self.hwmon_path, fault_file)
fault_file_path = os.path.join(self._hwmon_path, fault_file)
if not os.path.isfile(fault_file_path):
return True

View File

@ -0,0 +1,78 @@
from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
from .thermal_infos import ChassisInfo
from .helper import APIHelper
@thermal_json_object('thermal_control.control')
class ControlThermalAlgoAction(ThermalPolicyActionBase):
"""
Action to control the thermal control algorithm
"""
# JSON field definition
JSON_FIELD_STATUS = 'status'
def __init__(self):
self.status = True
def load_from_json(self, json_obj):
"""
Construct ControlThermalAlgoAction via JSON. JSON example:
{
"type": "thermal_control.control"
"status": "true"
}
:param json_obj: A JSON object representing a ControlThermalAlgoAction action.
:return:
"""
if ControlThermalAlgoAction.JSON_FIELD_STATUS in json_obj:
status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower()
if status_str == 'true':
self.status = True
elif status_str == 'false':
self.status = False
else:
raise ValueError('Invalid {} field value, please specify true of false'.
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
else:
raise ValueError('ControlThermalAlgoAction '
'missing mandatory field {} in JSON policy file'.
format(ControlThermalAlgoAction.JSON_FIELD_STATUS))
def execute(self, thermal_info_dict):
"""
Disable thermal control algorithm
:param thermal_info_dict: A dictionary stores all thermal information.
:return:
"""
if ChassisInfo.INFO_NAME in thermal_info_dict:
chassis_info_obj = thermal_info_dict[ChassisInfo.INFO_NAME]
chassis = chassis_info_obj.get_chassis()
thermal_manager = chassis.get_thermal_manager()
if self.status:
thermal_manager.start_thermal_control_algorithm()
else:
thermal_manager.stop_thermal_control_algorithm()
@thermal_json_object('switch.power_cycling')
class SwitchPolicyAction(ThermalPolicyActionBase):
"""
Base class for thermal action. Once all thermal conditions in a thermal policy are matched,
all predefined thermal action will be executed.
"""
def execute(self, thermal_info_dict):
"""
Take action when thermal condition matches. For example, power cycle the switch.
:param thermal_info_dict: A dictionary stores all thermal information.
:return:
"""
thermal_overload_position_path = '/tmp/thermal_overload_position'
thermal_overload_position = APIHelper().read_one_line_file(
thermal_overload_position_path)
cmd = 'bash /usr/share/sonic/platform/thermal_overload_control.sh {}'.format(
thermal_overload_position)
APIHelper().run_command(cmd)

View File

@ -0,0 +1,77 @@
from sonic_platform_base.sonic_thermal_control.thermal_condition_base import ThermalPolicyConditionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
class FanCondition(ThermalPolicyConditionBase):
def get_fan_info(self, thermal_info_dict):
from .thermal_infos import FanInfo
if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo):
return thermal_info_dict[FanInfo.INFO_NAME]
else:
return None
@thermal_json_object('fan.any.absence')
class AnyFanAbsenceCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_absence_fans()) > 0 if fan_info_obj else False
@thermal_json_object('fan.any.fault')
class AnyFanFaultCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_fault_fans()) > 0 if fan_info_obj else False
@thermal_json_object('fan.all.presence')
class AllFanPresenceCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_absence_fans()) == 0 if fan_info_obj else False
@thermal_json_object('fan.all.good')
class AllFanGoodCondition(FanCondition):
def is_match(self, thermal_info_dict):
fan_info_obj = self.get_fan_info(thermal_info_dict)
return len(fan_info_obj.get_fault_fans()) == 0 if fan_info_obj else False
class ThermalCondition(ThermalPolicyConditionBase):
def get_thermal_info(self, thermal_info_dict):
from .thermal_infos import ThermalInfo
if ThermalInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ThermalInfo.INFO_NAME], ThermalInfo):
return thermal_info_dict[ThermalInfo.INFO_NAME]
else:
return None
@thermal_json_object('thermal.over.high_threshold')
class ThermalOverHighCriticalCondition(ThermalCondition):
def is_match(self, thermal_info_dict):
thermal_info_obj = self.get_thermal_info(thermal_info_dict)
if thermal_info_obj:
return thermal_info_obj.is_over_high_threshold()
else:
return False
@thermal_json_object('thermal.over.high_critical_threshold')
class ThermalOverHighCriticalCondition(ThermalCondition):
def is_match(self, thermal_info_dict):
thermal_info_obj = self.get_thermal_info(thermal_info_dict)
if thermal_info_obj:
return thermal_info_obj.is_over_high_critical_threshold()
else:
return False
@thermal_json_object('thermal.all.good')
class ThermalGoodCondition(ThermalCondition):
def is_match(self, thermal_info_dict):
thermal_info_obj = self.get_thermal_info(thermal_info_dict)
if thermal_info_obj:
return not thermal_info_obj.is_over_threshold()
else:
return False

View File

@ -0,0 +1,165 @@
from sonic_platform_base.sonic_thermal_control.thermal_info_base import ThermalPolicyInfoBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
from .helper import APIHelper
import time
@thermal_json_object('fan_info')
class FanInfo(ThermalPolicyInfoBase):
"""
Fan information needed by thermal policy
"""
# Fan information name
INFO_NAME = 'fan_info'
def __init__(self):
self._absence_fans = set()
self._presence_fans = set()
self._fault_fans = set()
self._status_changed = False
def collect(self, chassis):
"""
Collect absence and presence fans.
:param chassis: The chassis object
:return:
"""
self._status_changed = False
for fan in chassis.get_all_fans():
presence = fan.get_presence()
status = fan.get_status()
if presence and fan not in self._presence_fans:
self._presence_fans.add(fan)
self._status_changed = True
if fan in self._absence_fans:
self._absence_fans.remove(fan)
elif not presence and fan not in self._absence_fans:
self._absence_fans.add(fan)
self._status_changed = True
if fan in self._presence_fans:
self._presence_fans.remove(fan)
if not status and fan not in self._fault_fans:
self._fault_fans.add(fan)
self._status_changed = True
elif status and fan in self._fault_fans:
self._fault_fans.remove(fan)
self._status_changed = True
def get_absence_fans(self):
"""
Retrieves absence fans
:return: A set of absence fans
"""
return self._absence_fans
def get_presence_fans(self):
"""
Retrieves presence fans
:return: A set of presence fans
"""
return self._presence_fans
def get_fault_fans(self):
"""
Retrieves fault fans
:return: A set of fault fans
"""
return self._fault_fans
def is_status_changed(self):
"""
Retrieves if the status of fan information changed
:return: True if status changed else False
"""
return self._status_changed
@thermal_json_object('thermal_info')
class ThermalInfo(ThermalPolicyInfoBase):
"""
Thermal information needed by thermal policy
"""
# Fan information name
INFO_NAME = 'thermal_info'
def collect(self, chassis):
"""
Collect thermal sensor temperature change status
:param chassis: The chassis object
:return:
"""
self._over_high_threshold = False
self._over_high_critical_threshold = False
self._thermal_overload_position = 'cpu'
# Calculate average temp within the device
temp = 0
num_of_thermals = chassis.get_num_thermals()
for index in range(num_of_thermals):
thermal = chassis.get_thermal(index)
temp = thermal.get_temperature()
high_threshold = thermal.get_high_threshold()
high_critical_threshold = thermal.get_high_critical_threshold()
if high_threshold and temp > high_threshold:
self._over_high_threshold = True
if high_critical_threshold and temp > high_critical_threshold:
self._thermal_overload_position = thermal.postion
self._over_high_critical_threshold = True
def is_over_threshold(self):
"""
Retrieves if the temperature is over any threshold
:return: True if the temperature is over any threshold else False
"""
return self._over_high_threshold or self._over_high_critical_threshold
def is_over_high_critical_threshold(self):
"""
Retrieves if the temperature is over high critical threshold
:return: True if the temperature is over high critical threshold else False
"""
thermal_overload_position_path = '/tmp/thermal_overload_position'
if self._over_high_critical_threshold:
APIHelper().write_txt_file(thermal_overload_position_path,
self._thermal_overload_position)
time.sleep(1)
return self._over_high_critical_threshold
def is_over_high_threshold(self):
"""
Retrieves if the temperature is over high threshold
:return: True if the temperature is over high threshold else False
"""
return self._over_high_threshold
@thermal_json_object('chassis_info')
class ChassisInfo(ThermalPolicyInfoBase):
"""
Chassis information needed by thermal policy
"""
INFO_NAME = 'chassis_info'
def __init__(self):
self._chassis = None
def collect(self, chassis):
"""
Collect platform chassis.
:param chassis: The chassis object
:return:
"""
self._chassis = chassis
def get_chassis(self):
"""
Retrieves platform chassis object
:return: A platform chassis object.
"""
return self._chassis

View File

@ -0,0 +1,46 @@
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
from .helper import APIHelper
from .thermal_actions import *
from .thermal_conditions import *
from .thermal_infos import *
class ThermalManager(ThermalManagerBase):
FSC_ALGORITHM_CMD = 'service fancontrol {}'
@classmethod
def start_thermal_control_algorithm(cls):
"""
Start vendor specific thermal control algorithm. The default behavior of this function is a no-op.
:return:
"""
return cls._enable_fancontrol_service(True)
@classmethod
def stop_thermal_control_algorithm(cls):
"""
Stop thermal control algorithm
Returns:
bool: True if set success, False if fail.
"""
return cls._enable_fancontrol_service(False)
@classmethod
def deinitialize(cls):
"""
Destroy thermal manager, including any vendor specific cleanup. The default behavior of this function
is a no-op.
:return:
"""
return cls._enable_fancontrol_service(True)
@classmethod
def _enable_fancontrol_service(cls, enable):
"""
Control thermal by fcs algorithm
Args:
enable: Bool, indicate enable the algorithm or not
Returns:
bool: True if set success, False if fail.
"""
cmd = 'start' if enable else 'stop'
return APIHelper().run_command(cls.FSC_ALGORITHM_CMD.format(cmd))

View File

@ -0,0 +1,16 @@
{
"services_to_ignore": [],
"devices_to_ignore": [
"asic",
"psu.temperature",
"PSU2 Fan",
"PSU1 Fan"
],
"user_defined_checkers": [],
"polling_interval": 60,
"led_color": {
"fault": "orange",
"normal": "green",
"booting": "orange_blink"
}
}

View File

@ -0,0 +1,93 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "true"
},
"info_types": [
{
"type": "chassis_info"
},
{
"type": "fan_info"
},
{
"type": "thermal_info"
}
],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
}
]
},
{
"name": "any fan broken",
"conditions": [
{
"type": "fan.any.fault"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
}
]
},
{
"name": "any thermal over threshold",
"conditions": [
{
"type": "thermal.over.high_threshold"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
}
]
},
{
"name": "temp over high critical threshold",
"conditions": [
{
"type": "thermal.over.high_critical_threshold"
}
],
"actions": [
{
"type": "switch.power_cycling"
}
]
},
{
"name": "all fan presence / thermal no warning",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "fan.all.good"
},
{
"type": "thermal.all.good"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "true"
}
]
}
]
}

View File

@ -3,7 +3,6 @@ dx010/cfg/dx010-modules.conf etc/modules-load.d
dx010/systemd/platform-modules-dx010.service lib/systemd/system
dx010/scripts/fancontrol.sh etc/init.d
dx010/scripts/fancontrol.service lib/systemd/system
dx010/scripts/thermal_overload_control.sh usr/local/bin
services/fancontrol/fancontrol usr/local/bin
dx010/modules/sonic_platform-1.0-py2-none-any.whl usr/share/sonic/device/x86_64-cel_seastone-r0
services/platform_api/platform_api_mgnt.sh usr/local/bin

View File

@ -6,4 +6,5 @@ systemctl start platform-modules-dx010.service
systemctl start fancontrol.service
/usr/local/bin/platform_api_mgnt.sh install
/etc/init.d/fancontrol.sh install

View File

@ -7,75 +7,42 @@
# Default-Start: 2 3 4 5
# Default-Stop:
# Short-Description: fancontrol
# Description: fan speed regulator
# Description: fancontrol configuration selector
### END INIT INFO
. /lib/lsb/init-functions
[ -f /etc/default/rcS ] && . /etc/default/rcS
PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin
DAEMON=/usr/local/bin/fancontrol
DESC="fan speed regulator"
NAME="fancontrol"
PIDFILE=/var/run/fancontrol.pid
MAIN_CONF=/usr/share/sonic/device/x86_64-cel_seastone-r0/fancontrol
DEVPATH=/sys/devices/pci0000:00/0000:00:13.0/i2c-*/i2c-13/13-002e
GPIO_DIR=/sys/class/gpio
BASE_GPIO=$(find $GPIO_DIR | grep gpiochip | grep -o '[[:digit:]]*')
DIRGPIO_START=15
test -x $DAEMON || exit 0
init() {
DIRGPIO_START=15
BASE_GPIO=$(find $GPIO_DIR | grep gpiochip | grep -o '[[:digit:]]*')
FANDIR_GPIO_NUMBER=$((DIRGPIO_START + BASE_GPIO))
FANDIR_VALUE=$(cat ${GPIO_DIR}/gpio${FANDIR_GPIO_NUMBER}/value)
DIRGPIO_START=$((DIRGPIO_START + 1))
FANDIR=$([ $FANDIR_VALUE = 1 ] && echo "B2F" || echo "F2B")
CONF=${MAIN_CONF}-${FANDIR}
echo $FANDIR > /usr/share/sonic/device/x86_64-cel_seastone-r0/fan_airflow
}
for i in 1 2 3 4 5
do
FANFAULT=$(cat ${DEVPATH}/fan${i}_fault)
[ $FANFAULT = 1 ] && continue
FANDIR_GPIO_NUMBER=$((DIRGPIO_START + BASE_GPIO))
FANDIR_VALUE=$(cat ${GPIO_DIR}/gpio${FANDIR_GPIO_NUMBER}/value)
DIRGPIO_START=$((DIRGPIO_START+1))
FANDIR=$([ $FANDIR_VALUE = 1 ] && echo "B2F" || echo "F2B")
done
CONF=${MAIN_CONF}-${FANDIR}
install() {
find /var/lib/docker/overlay*/ -path */sbin/fancontrol -exec cp /usr/local/bin/fancontrol {} \;
}
case "$1" in
start)
if [ -f $CONF ] ; then
if $DAEMON --check $CONF 1>/dev/null 2>/dev/null ; then
log_daemon_msg "Starting $DESC" "$NAME\n"
start-stop-daemon --start --quiet --pidfile $PIDFILE --startas $DAEMON $CONF
log_end_msg $?
else
log_failure_msg "Not starting fancontrol, broken configuration file; please re-run pwmconfig."
fi
else
if [ "$VERBOSE" != no ]; then
log_warning_msg "Not starting fancontrol; run pwmconfig first."
fi
fi
;;
stop)
log_daemon_msg "Stopping $DESC" "$NAME"
start-stop-daemon --stop --quiet --pidfile $PIDFILE --oknodo --startas $DAEMON $CONF
rm -f $PIDFILE
log_end_msg $?
;;
restart)
$0 stop
sleep 3
$0 start
;;
force-reload)
if start-stop-daemon --stop --test --quiet --pidfile $PIDFILE --startas $DAEMON $CONF ; then
$0 restart
fi
;;
status)
status_of_proc $DAEMON $NAME $CONF && exit 0 || exit $?
;;
*)
log_success_msg "Usage: /etc/init.d/fancontrol {start|stop|restart|force-reload|status}"
exit 1
;;
start)
init
cp $CONF $MAIN_CONF
;;
install)
install
;;
*)
log_success_msg "Usage: /etc/init.d/fancontrol {start} | {install}"
exit 1
;;
esac
exit 0

View File

@ -180,42 +180,6 @@ function LoadConfig
}
function CheckFanFault()
{
let fancount=0
while (( $fancount < ${#AFCFANFAULT[@]} )) # go through all fan fault.
do
fault=`cat ${AFCFANFAULT[$fancount]}`
if [[ "$fault" == "1" ]]
then
return 1 # fan fault detected
fi
let fancount=$fancount+1
done
return 0
}
function CheckTempOver()
{
let tempcount=0
while (( $tempcount < ${#CSTEMP[@]} )) # go through all temp.
do
ctemp=`cat ${CSTEMP[$tempcount]}`
let maxcrit="${CSMAXTEMPCRIT[$tempcount]}*1000"
if [ $ctemp -ge $maxcrit ]
then
logger "Thermal overload : ${CSMAXTEMPTYPE[$tempcount]} temperature ${ctemp} > ${maxcrit}"
if [ -f "$THERMAL_OVERLOAD_CONTROL_FILE" ]
then
toc_cmd="${THERMAL_OVERLOAD_CONTROL_FILE} ${CSMAXTEMPTYPE[$tempcount],,}"
bash $toc_cmd
exit 1
fi
fi
let tempcount=$tempcount+1
done
return 0
}
function DevicePath()
{
@ -508,18 +472,6 @@ function UpdateFanSpeeds
maxpwm=${AFCMAXPWM[$fcvcount]}
let tHyst="${AFCTHYST[$fcvcount]}*1000"
#if some fan fault detected all pwm=100%
CheckFanFault
if [ $? -ne 0 ]
then
echo $MAX > $pwmo
let fcvcount=$fcvcount+1
continue
fi
#check thermal overload
CheckTempOver
read tval < ${tsens}
if [ $? -ne 0 ]
then
@ -619,7 +571,7 @@ function UpdateFanSpeeds
echo $minsa > $pwmo
# Sleep while still handling signals
sleep 1 &
wait $!
wait
fi
fi
echo $pwmval > $pwmo # write new value to pwm output
@ -658,5 +610,5 @@ do
UpdateFanSpeeds
# Sleep while still handling signals
sleep $INTERVAL &
wait $!
wait
done