[Mellanox] Support PSU power threshold checking (#11863)
* Support power threshold Signed-off-by: Stephen Sun <stephens@nvidia.com> * get_psu_power_warning_threshold => get_psu_power_warning_suppress_threshold Signed-off-by: Stephen Sun <stephens@nvidia.com> * Fix comments Signed-off-by: Stephen Sun <stephens@nvidia.com> Signed-off-by: Stephen Sun <stephens@nvidia.com>
This commit is contained in:
parent
f402e6b5c6
commit
5d457596ba
@ -216,6 +216,11 @@ class Psu(FixedPsu):
|
||||
PSU_VPD = "eeprom/psu{}_vpd"
|
||||
PSU_CURRENT_IN = "power/psu{}_curr_in"
|
||||
PSU_VOLT_IN = "power/psu{}_volt_in"
|
||||
PORT_AMBIENT_TEMP = os.path.join(PSU_PATH, "thermal/port_amb")
|
||||
FAN_AMBIENT_TEMP = os.path.join(PSU_PATH, "thermal/fan_amb")
|
||||
AMBIENT_TEMP_CRITICAL_THRESHOLD = os.path.join(PSU_PATH, "config/amb_tmp_crit_limit")
|
||||
AMBIENT_TEMP_WARNING_THRESHOLD = os.path.join(PSU_PATH, "config/amb_tmp_warn_limit")
|
||||
PSU_POWER_SLOPE = os.path.join(PSU_PATH, "config/psu_power_slope")
|
||||
|
||||
shared_led = None
|
||||
|
||||
@ -235,6 +240,8 @@ class Psu(FixedPsu):
|
||||
self.psu_power_max = self.psu_power + "_max"
|
||||
self.psu_presence = os.path.join(PSU_PATH, "thermal/psu{}_status".format(self.index))
|
||||
|
||||
self.psu_power_max_capacity = os.path.join(PSU_PATH, "config/psu{}_power_capacity".format(self.index))
|
||||
|
||||
self.psu_temp = os.path.join(PSU_PATH, 'thermal/psu{}_temp'.format(self.index))
|
||||
self.psu_temp_threshold = os.path.join(PSU_PATH, 'thermal/psu{}_temp_max'.format(self.index))
|
||||
|
||||
@ -505,6 +512,56 @@ class Psu(FixedPsu):
|
||||
return float(amperes) / 1000
|
||||
return None
|
||||
|
||||
def _get_psu_power_threshold(self, temp_threshold_path):
|
||||
"""
|
||||
Calculate power threshold for a PSU according to the maximum power capacity and ambient temperature
|
||||
amb_temp = min(port_amb, fan_amb)
|
||||
If amb_temp < ambient_temp_threshold
|
||||
threshold = max capacity
|
||||
else
|
||||
threshold = max capacity - slope*(amb_temp - ambient_temp_threshold)
|
||||
"""
|
||||
if self.get_powergood_status():
|
||||
if os.path.exists(self.psu_power_max_capacity):
|
||||
power_max_capacity = utils.read_int_from_file(self.psu_power_max_capacity)
|
||||
temp_threshold = utils.read_int_from_file(temp_threshold_path)
|
||||
fan_ambient_temp = utils.read_int_from_file(Psu.FAN_AMBIENT_TEMP)
|
||||
port_ambient_temp = utils.read_int_from_file(Psu.PORT_AMBIENT_TEMP)
|
||||
ambient_temp = min(fan_ambient_temp, port_ambient_temp)
|
||||
if ambient_temp < temp_threshold:
|
||||
power_threshold = power_max_capacity
|
||||
else:
|
||||
slope = utils.read_int_from_file(Psu.PSU_POWER_SLOPE)
|
||||
power_threshold = power_max_capacity - (ambient_temp - temp_threshold) * slope
|
||||
if power_threshold <= 0:
|
||||
logger.log_warning('Got negative PSU power threshold {} for {}'.format(power_threshold, self.get_name()))
|
||||
power_threshold = 0
|
||||
return float(power_threshold) / 1000000
|
||||
|
||||
return None
|
||||
|
||||
def get_psu_power_warning_suppress_threshold(self):
|
||||
"""
|
||||
Retrieve the warning suppress threshold of the power on this PSU
|
||||
The value can be volatile, so the caller should call the API each time it is used.
|
||||
On Mellanox platform, it is translated from the `warning threshold`
|
||||
|
||||
Returns:
|
||||
A float number, the warning suppress threshold of the PSU in watts.
|
||||
"""
|
||||
return self._get_psu_power_threshold(Psu.AMBIENT_TEMP_WARNING_THRESHOLD)
|
||||
|
||||
def get_psu_power_critical_threshold(self):
|
||||
"""
|
||||
Retrieve the critical threshold of the power on this PSU
|
||||
The value can be volatile, so the caller should call the API each time it is used.
|
||||
|
||||
Returns:
|
||||
A float number, the critical threshold of the PSU in watts.
|
||||
"""
|
||||
return self._get_psu_power_threshold(Psu.AMBIENT_TEMP_CRITICAL_THRESHOLD)
|
||||
|
||||
|
||||
class InvalidPsuVolWA:
|
||||
"""This class is created as a workaround for a known hardware issue that the PSU voltage threshold could be a
|
||||
invalid value 127998. Once we read a voltage threshold value equal to 127998, we should do following:
|
||||
|
@ -161,3 +161,58 @@ class TestPsu:
|
||||
vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY
|
||||
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999
|
||||
mock_run_command.assert_called_with(['sensors', '-s'])
|
||||
|
||||
@mock.patch('os.path.exists', mock.MagicMock(return_value=True))
|
||||
@mock.patch('sonic_platform.utils.read_int_from_file')
|
||||
def test_psu_power_threshold(self, mock_read_int_from_file):
|
||||
Psu.all_psus_support_power_threshold = True
|
||||
psu = Psu(0)
|
||||
common_info = {
|
||||
psu.psu_oper_status: 1,
|
||||
psu.psu_power_max_capacity: 100000000,
|
||||
psu.AMBIENT_TEMP_CRITICAL_THRESHOLD: 65000,
|
||||
psu.AMBIENT_TEMP_WARNING_THRESHOLD: 55000,
|
||||
psu.PSU_POWER_SLOPE: 2000
|
||||
}
|
||||
normal_data = {
|
||||
psu.PORT_AMBIENT_TEMP: 55000,
|
||||
psu.FAN_AMBIENT_TEMP: 50000,
|
||||
'warning_threshold': 100.0,
|
||||
'critical_threshold': 100.0
|
||||
}
|
||||
warning_data = {
|
||||
psu.PORT_AMBIENT_TEMP: 65000,
|
||||
psu.FAN_AMBIENT_TEMP: 60000,
|
||||
'warning_threshold': 90.0,
|
||||
'critical_threshold': 100.0
|
||||
}
|
||||
critical_data = {
|
||||
psu.PORT_AMBIENT_TEMP: 70000,
|
||||
psu.FAN_AMBIENT_TEMP: 75000,
|
||||
'warning_threshold': 70.0,
|
||||
'critical_threshold': 90.0
|
||||
}
|
||||
test_data = {}
|
||||
def mock_side_effect(value):
|
||||
if value in common_info:
|
||||
return common_info[value]
|
||||
else:
|
||||
return test_data[value]
|
||||
|
||||
mock_read_int_from_file.side_effect = mock_side_effect
|
||||
test_data = normal_data
|
||||
assert psu.get_psu_power_warning_suppress_threshold() == normal_data['warning_threshold']
|
||||
assert psu.get_psu_power_critical_threshold() == normal_data['critical_threshold']
|
||||
|
||||
test_data = warning_data
|
||||
assert psu.get_psu_power_warning_suppress_threshold() == warning_data['warning_threshold']
|
||||
assert psu.get_psu_power_critical_threshold() == warning_data['critical_threshold']
|
||||
|
||||
test_data = critical_data
|
||||
assert psu.get_psu_power_warning_suppress_threshold() == critical_data['warning_threshold']
|
||||
assert psu.get_psu_power_critical_threshold() == critical_data['critical_threshold']
|
||||
|
||||
def test_psu_not_support_power_threshold(self):
|
||||
psu = Psu(0)
|
||||
assert psu.get_psu_power_warning_suppress_threshold() is None
|
||||
assert psu.get_psu_power_critical_threshold() is None
|
||||
|
Loading…
Reference in New Issue
Block a user