[Mellanox] Support PSU power threshold checking (#11863)
* Support power threshold Signed-off-by: Stephen Sun <stephens@nvidia.com> * get_psu_power_warning_threshold => get_psu_power_warning_suppress_threshold Signed-off-by: Stephen Sun <stephens@nvidia.com> * Fix comments Signed-off-by: Stephen Sun <stephens@nvidia.com> Signed-off-by: Stephen Sun <stephens@nvidia.com>
This commit is contained in:
parent
2e0d958a42
commit
91e12d7b49
@ -216,6 +216,11 @@ class Psu(FixedPsu):
|
|||||||
PSU_VPD = "eeprom/psu{}_vpd"
|
PSU_VPD = "eeprom/psu{}_vpd"
|
||||||
PSU_CURRENT_IN = "power/psu{}_curr_in"
|
PSU_CURRENT_IN = "power/psu{}_curr_in"
|
||||||
PSU_VOLT_IN = "power/psu{}_volt_in"
|
PSU_VOLT_IN = "power/psu{}_volt_in"
|
||||||
|
PORT_AMBIENT_TEMP = os.path.join(PSU_PATH, "thermal/port_amb")
|
||||||
|
FAN_AMBIENT_TEMP = os.path.join(PSU_PATH, "thermal/fan_amb")
|
||||||
|
AMBIENT_TEMP_CRITICAL_THRESHOLD = os.path.join(PSU_PATH, "config/amb_tmp_crit_limit")
|
||||||
|
AMBIENT_TEMP_WARNING_THRESHOLD = os.path.join(PSU_PATH, "config/amb_tmp_warn_limit")
|
||||||
|
PSU_POWER_SLOPE = os.path.join(PSU_PATH, "config/psu_power_slope")
|
||||||
|
|
||||||
shared_led = None
|
shared_led = None
|
||||||
|
|
||||||
@ -235,6 +240,8 @@ class Psu(FixedPsu):
|
|||||||
self.psu_power_max = self.psu_power + "_max"
|
self.psu_power_max = self.psu_power + "_max"
|
||||||
self.psu_presence = os.path.join(PSU_PATH, "thermal/psu{}_status".format(self.index))
|
self.psu_presence = os.path.join(PSU_PATH, "thermal/psu{}_status".format(self.index))
|
||||||
|
|
||||||
|
self.psu_power_max_capacity = os.path.join(PSU_PATH, "config/psu{}_power_capacity".format(self.index))
|
||||||
|
|
||||||
self.psu_temp = os.path.join(PSU_PATH, 'thermal/psu{}_temp'.format(self.index))
|
self.psu_temp = os.path.join(PSU_PATH, 'thermal/psu{}_temp'.format(self.index))
|
||||||
self.psu_temp_threshold = os.path.join(PSU_PATH, 'thermal/psu{}_temp_max'.format(self.index))
|
self.psu_temp_threshold = os.path.join(PSU_PATH, 'thermal/psu{}_temp_max'.format(self.index))
|
||||||
|
|
||||||
@ -505,6 +512,56 @@ class Psu(FixedPsu):
|
|||||||
return float(amperes) / 1000
|
return float(amperes) / 1000
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _get_psu_power_threshold(self, temp_threshold_path):
|
||||||
|
"""
|
||||||
|
Calculate power threshold for a PSU according to the maximum power capacity and ambient temperature
|
||||||
|
amb_temp = min(port_amb, fan_amb)
|
||||||
|
If amb_temp < ambient_temp_threshold
|
||||||
|
threshold = max capacity
|
||||||
|
else
|
||||||
|
threshold = max capacity - slope*(amb_temp - ambient_temp_threshold)
|
||||||
|
"""
|
||||||
|
if self.get_powergood_status():
|
||||||
|
if os.path.exists(self.psu_power_max_capacity):
|
||||||
|
power_max_capacity = utils.read_int_from_file(self.psu_power_max_capacity)
|
||||||
|
temp_threshold = utils.read_int_from_file(temp_threshold_path)
|
||||||
|
fan_ambient_temp = utils.read_int_from_file(Psu.FAN_AMBIENT_TEMP)
|
||||||
|
port_ambient_temp = utils.read_int_from_file(Psu.PORT_AMBIENT_TEMP)
|
||||||
|
ambient_temp = min(fan_ambient_temp, port_ambient_temp)
|
||||||
|
if ambient_temp < temp_threshold:
|
||||||
|
power_threshold = power_max_capacity
|
||||||
|
else:
|
||||||
|
slope = utils.read_int_from_file(Psu.PSU_POWER_SLOPE)
|
||||||
|
power_threshold = power_max_capacity - (ambient_temp - temp_threshold) * slope
|
||||||
|
if power_threshold <= 0:
|
||||||
|
logger.log_warning('Got negative PSU power threshold {} for {}'.format(power_threshold, self.get_name()))
|
||||||
|
power_threshold = 0
|
||||||
|
return float(power_threshold) / 1000000
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_psu_power_warning_suppress_threshold(self):
|
||||||
|
"""
|
||||||
|
Retrieve the warning suppress threshold of the power on this PSU
|
||||||
|
The value can be volatile, so the caller should call the API each time it is used.
|
||||||
|
On Mellanox platform, it is translated from the `warning threshold`
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A float number, the warning suppress threshold of the PSU in watts.
|
||||||
|
"""
|
||||||
|
return self._get_psu_power_threshold(Psu.AMBIENT_TEMP_WARNING_THRESHOLD)
|
||||||
|
|
||||||
|
def get_psu_power_critical_threshold(self):
|
||||||
|
"""
|
||||||
|
Retrieve the critical threshold of the power on this PSU
|
||||||
|
The value can be volatile, so the caller should call the API each time it is used.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A float number, the critical threshold of the PSU in watts.
|
||||||
|
"""
|
||||||
|
return self._get_psu_power_threshold(Psu.AMBIENT_TEMP_CRITICAL_THRESHOLD)
|
||||||
|
|
||||||
|
|
||||||
class InvalidPsuVolWA:
|
class InvalidPsuVolWA:
|
||||||
"""This class is created as a workaround for a known hardware issue that the PSU voltage threshold could be a
|
"""This class is created as a workaround for a known hardware issue that the PSU voltage threshold could be a
|
||||||
invalid value 127998. Once we read a voltage threshold value equal to 127998, we should do following:
|
invalid value 127998. Once we read a voltage threshold value equal to 127998, we should do following:
|
||||||
|
@ -161,3 +161,58 @@ class TestPsu:
|
|||||||
vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY
|
vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY
|
||||||
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999
|
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999
|
||||||
mock_run_command.assert_called_with(['sensors', '-s'])
|
mock_run_command.assert_called_with(['sensors', '-s'])
|
||||||
|
|
||||||
|
@mock.patch('os.path.exists', mock.MagicMock(return_value=True))
|
||||||
|
@mock.patch('sonic_platform.utils.read_int_from_file')
|
||||||
|
def test_psu_power_threshold(self, mock_read_int_from_file):
|
||||||
|
Psu.all_psus_support_power_threshold = True
|
||||||
|
psu = Psu(0)
|
||||||
|
common_info = {
|
||||||
|
psu.psu_oper_status: 1,
|
||||||
|
psu.psu_power_max_capacity: 100000000,
|
||||||
|
psu.AMBIENT_TEMP_CRITICAL_THRESHOLD: 65000,
|
||||||
|
psu.AMBIENT_TEMP_WARNING_THRESHOLD: 55000,
|
||||||
|
psu.PSU_POWER_SLOPE: 2000
|
||||||
|
}
|
||||||
|
normal_data = {
|
||||||
|
psu.PORT_AMBIENT_TEMP: 55000,
|
||||||
|
psu.FAN_AMBIENT_TEMP: 50000,
|
||||||
|
'warning_threshold': 100.0,
|
||||||
|
'critical_threshold': 100.0
|
||||||
|
}
|
||||||
|
warning_data = {
|
||||||
|
psu.PORT_AMBIENT_TEMP: 65000,
|
||||||
|
psu.FAN_AMBIENT_TEMP: 60000,
|
||||||
|
'warning_threshold': 90.0,
|
||||||
|
'critical_threshold': 100.0
|
||||||
|
}
|
||||||
|
critical_data = {
|
||||||
|
psu.PORT_AMBIENT_TEMP: 70000,
|
||||||
|
psu.FAN_AMBIENT_TEMP: 75000,
|
||||||
|
'warning_threshold': 70.0,
|
||||||
|
'critical_threshold': 90.0
|
||||||
|
}
|
||||||
|
test_data = {}
|
||||||
|
def mock_side_effect(value):
|
||||||
|
if value in common_info:
|
||||||
|
return common_info[value]
|
||||||
|
else:
|
||||||
|
return test_data[value]
|
||||||
|
|
||||||
|
mock_read_int_from_file.side_effect = mock_side_effect
|
||||||
|
test_data = normal_data
|
||||||
|
assert psu.get_psu_power_warning_suppress_threshold() == normal_data['warning_threshold']
|
||||||
|
assert psu.get_psu_power_critical_threshold() == normal_data['critical_threshold']
|
||||||
|
|
||||||
|
test_data = warning_data
|
||||||
|
assert psu.get_psu_power_warning_suppress_threshold() == warning_data['warning_threshold']
|
||||||
|
assert psu.get_psu_power_critical_threshold() == warning_data['critical_threshold']
|
||||||
|
|
||||||
|
test_data = critical_data
|
||||||
|
assert psu.get_psu_power_warning_suppress_threshold() == critical_data['warning_threshold']
|
||||||
|
assert psu.get_psu_power_critical_threshold() == critical_data['critical_threshold']
|
||||||
|
|
||||||
|
def test_psu_not_support_power_threshold(self):
|
||||||
|
psu = Psu(0)
|
||||||
|
assert psu.get_psu_power_warning_suppress_threshold() is None
|
||||||
|
assert psu.get_psu_power_critical_threshold() is None
|
||||||
|
Reference in New Issue
Block a user