sonic-buildimage/platform/barefoot/sonic-platform-modules-bfn-montara/sonic_platform/thermal.py
Andriy Kokhan 304c6c80c4
[BFN] Reworked BFN platform thermals plugin (#11723)
* [BFN] Updated platform.json for wedge100bf_65x

Signed-off-by: Andriy Kokhan <andriyx.kokhan@intel.com>

* Reworked BFN platform thermal logic

* Implemented PSU thermal APIs

* Updated platform.json for accton_wedge100bf_32x

Signed-off-by: Andriy Kokhan <andriyx.kokhan@intel.com>

* Updated BFN platform plugins initialization flow

Signed-off-by: Andriy Kokhan <andriyx.kokhan@intel.com>

Signed-off-by: Andriy Kokhan <andriyx.kokhan@intel.com>
2022-10-11 09:12:28 +08:00

281 lines
9.2 KiB
Python

try:
import subprocess
import time
import threading
from collections import namedtuple
import json
from bfn_extensions.platform_sensors import platform_sensors_get
from sonic_platform_base.thermal_base import ThermalBase
from sonic_py_common import device_info
import logging
except ImportError as e:
raise ImportError (str(e) + "- required module not found")
'''
data argument is in "sensors -A -u" format, example:
coretemp-isa-0000
Package id 0:
temp1_input: 37.000
temp1_max: 82.000
temp1_crit: 104.000
temp1_crit_alarm: 0.000
Core 0:
temp2_input: 37.000
...
'''
Threshold = namedtuple('Threshold', ['crit', 'max', 'min', 'alarm'])
# Thermal -> ThermalBase -> DeviceBase
class Thermal(ThermalBase):
__sensors_info = None
__timestamp = 0
__lock = threading.Lock()
_thresholds = dict()
_max_temperature = 100.0
_min_temperature = 0.0
_min_high_threshold_temperature = 35.0
def __init__(self, chip, label, index = 0):
self.__chip = chip
self.__label = label
self.__name = f"{chip}:{label}".lower().replace(' ', '-')
self.__collect_temp = []
self.__index = index
self.__high_threshold = None
self.__low_threshold = None
f = None
try:
path = device_info.get_path_to_platform_dir() + '/' + 'thermal_thresholds.json'
f = open(path)
except FileNotFoundError:
logging.warning('can not open the file')
if f is not None:
self.__get_thresholds(f)
@staticmethod
def __sensors_chip_parsed(data: str):
def kv(line):
k, v, *_ = [t.strip(': ') for t in line.split(':') if t] + ['']
return k, v
chip, *data = data.strip().split('\n')
chip = chip.strip(': ')
sensors = []
for line in data:
if not line.startswith(' '):
sensor_label = line.strip(': ')
sensors.append((sensor_label, {}))
continue
if len(sensors) == 0:
raise RuntimeError(f'invalid data to parse: {data}')
attr, value = kv(line)
sensor_label, sensor_data = sensors[-1]
sensor_data.update({attr: value})
return chip, dict(sensors)
@classmethod
def __sensors_get(cls, cached=True) -> dict:
cls.__lock.acquire()
if time.time() > cls.__timestamp + 15:
# Update cache once per 15 seconds
try:
data = platform_sensors_get(['-A', '-u']) or ''
data += subprocess.check_output("/usr/bin/sensors -A -u",
shell=True, text=True)
data = data.split('\n\n')
data = [cls.__sensors_chip_parsed(chip_data) for chip_data in data if chip_data]
cls.__sensors_info = dict(data)
cls.__timestamp = time.time()
except Exception as e:
logging.warning("Failed to update sensors cache: " + str(e))
info = cls.__sensors_info
cls.__lock.release()
return info
@staticmethod
def __sensor_value_get(d: dict, key_prefix, key_suffix=''):
for k, v in d.items():
if k.startswith(key_prefix) and k.endswith(key_suffix):
return v
return None
@staticmethod
def __get_platform_json():
hwsku_path = device_info.get_path_to_platform_dir()
platform_json_path = "/".join([hwsku_path, "platform.json"])
f = open(platform_json_path)
return json.load(f)
@staticmethod
def get_chassis_thermals():
try:
platform_json = Thermal.__get_platform_json()
return platform_json["chassis"]["thermals"]
except Exception as e:
logging.exception("Failed to collect chassis thermals: " + str(e))
return None
@staticmethod
def get_psu_thermals(psu_name):
try:
platform_json = Thermal.__get_platform_json()
for psu in platform_json["chassis"]["psus"]:
if psu["name"] == psu_name:
return psu["thermals"]
except Exception as e:
logging.exception("Failed to collect chassis thermals: " + str(e))
return None
def __get_thresholds(self, f):
def_threshold_json = json.load(f)
all_data = def_threshold_json["thermals"]
for i in all_data:
for key, value in i.items():
self._thresholds[key] = Threshold(*value)
def check_in_range(self, temperature):
temp_f = float(temperature)
return temp_f > self._min_temperature and temp_f <= self._max_temperature
def check_high_threshold(self, temperature, attr_suffix):
temp_f = float(temperature)
check_range = True
if attr_suffix == 'max':
if temp_f < self._min_high_threshold_temperature:
if self.__name in self._thresholds:
temp = self._thresholds[self.__name].max
self.set_high_threshold(temp)
check_range = False
return check_range
def __get(self, attr_prefix, attr_suffix):
chip_data = Thermal.__sensors_get().get(self.__chip, {})
sensor_data = {}
for sensor, data in chip_data.items():
if sensor.lower().replace(' ', '-') == self.__label:
sensor_data = data
break
value = Thermal.__sensor_value_get(sensor_data, attr_prefix, attr_suffix)
# Can be float value or None
if attr_prefix == 'temp' and attr_suffix == 'input':
return value
if value is not None and self.check_in_range(value) and self.check_high_threshold(value, attr_suffix):
return value
elif self.__name in self._thresholds and attr_prefix == 'temp':
if attr_suffix == 'crit':
return self._thresholds[self.__name].crit
elif attr_suffix == 'max':
if self.__high_threshold is None:
return self._thresholds[self.__name].max
else:
return self.__high_threshold
elif attr_suffix == 'min':
if self.__low_threshold is None:
return self._thresholds[self.__name].min
else:
return self.__low_threshold
elif attr_suffix == 'alarm':
return self._thresholds[self.__name].alarm
else:
return 1.0
else:
return 0.05
# ThermalBase interface methods:
def get_temperature(self) -> float:
temp = self.__get('temp', 'input')
if temp is None:
return None
self.__collect_temp.append(float(temp))
self.__collect_temp.sort()
if len(self.__collect_temp) == 3:
del self.__collect_temp[1]
return float(temp)
def get_high_threshold(self) -> float:
if self.__high_threshold is None:
return float(self.__get('temp', 'max'))
return float(self.__high_threshold)
def get_high_critical_threshold(self) -> float:
return float(self.__get('temp', 'crit'))
def get_low_critical_threshold(self) -> float:
return float(self.__get('temp', 'alarm'))
def get_model(self):
return f"{self.__label}".lower()
# DeviceBase interface methods:
def get_name(self):
return self.__name
def get_presence(self):
return True
def get_status(self):
return True
def is_replaceable(self):
return False
def get_low_threshold(self) -> float:
if self.__low_threshold is None:
return float(self.__get('temp', 'min'))
return float(self.__low_threshold)
def get_serial(self):
return 'N/A'
def get_minimum_recorded(self) -> float:
temp = self.__collect_temp[0] if len(self.__collect_temp) > 0 else self.get_temperature()
temp = temp if temp <= 100.0 else 100.0
temp = temp if temp > 0.0 else 0.1
return float(temp)
def get_maximum_recorded(self) -> float:
temp = self.__collect_temp[-1] if len(self.__collect_temp) > 0 else self.get_temperature()
temp = temp if temp <= 100.0 else 100.0
temp = temp if temp > 0.0 else 0.1
return float(temp)
def get_position_in_parent(self):
return self.__index
def set_high_threshold(self, temperature):
if self.check_in_range(temperature):
self.__high_threshold = temperature
return True
return False
def set_low_threshold(self, temperature):
if self.check_in_range(temperature):
self.__low_threshold = temperature
return True
return False
def chassis_thermals_list_get():
thermal_list = []
thermals = Thermal.get_chassis_thermals()
for index, thermal in enumerate(thermals):
thermal = thermal["name"].split(':')
thermal_list.append(Thermal(thermal[0], thermal[1], index))
return thermal_list
def psu_thermals_list_get(psu_name):
thermal_list = []
thermals = Thermal.get_psu_thermals(psu_name)
for index, thermal in enumerate(thermals):
thermal = thermal["name"].split(':')
thermal_list.append(Thermal(thermal[0], thermal[1], index))
return thermal_list