[Mellanox] Auto correct PSU voltage threshold (WA) (#10394)
- Why I did it There is a hardware bug that PSU voltage threshold sysfs returns incorrect value. The workaround is to call "sensor -s" to refresh it. - How I did it Call "sensor -s" when the threshold value is not incorrect and PSU is "DELTA 1100" - How to verify it Unit test and Manual test
This commit is contained in:
parent
bf34b17d20
commit
bdbb3d708d
@ -85,6 +85,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-2 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-2 220V Rail Curr (in)"
|
||||
label curr2 "PSU-2 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
chip "dps460-i2c-*-59"
|
||||
label in1 "PSU-1 220V Rail (in)"
|
||||
ignore in2
|
||||
@ -99,6 +102,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-1 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-1 220V Rail Curr (in)"
|
||||
label curr2 "PSU-1 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
|
||||
# Chassis fans
|
||||
chip "mlxreg_fan-isa-*"
|
||||
|
@ -85,6 +85,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-2 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-2 220V Rail Curr (in)"
|
||||
label curr2 "PSU-2 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
chip "dps460-i2c-*-59"
|
||||
label in1 "PSU-1 220V Rail (in)"
|
||||
ignore in2
|
||||
@ -99,6 +102,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-1 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-1 220V Rail Curr (in)"
|
||||
label curr2 "PSU-1 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
|
||||
# Chassis fans
|
||||
chip "mlxreg_fan-isa-*"
|
||||
|
@ -106,6 +106,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-2 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-2 220V Rail Curr (in)"
|
||||
label curr2 "PSU-2 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
chip "dps460-i2c-*-59"
|
||||
label in1 "PSU-1 220V Rail (in)"
|
||||
ignore in2
|
||||
@ -120,6 +123,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-1 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-1 220V Rail Curr (in)"
|
||||
label curr2 "PSU-1 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
|
||||
# Chassis fans
|
||||
chip "mlxreg_fan-isa-*"
|
||||
|
@ -167,6 +167,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-1(L) 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-1(L) 220V Rail Curr (in)"
|
||||
label curr2 "PSU-1(L) 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
chip "dps460-i2c-*-59"
|
||||
label in1 "PSU-2(R) 220V Rail (in)"
|
||||
ignore in2
|
||||
@ -181,6 +184,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-2(R) 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-2(R) 220V Rail Curr (in)"
|
||||
label curr2 "PSU-2(R) 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
|
||||
# Chassis fans
|
||||
chip "mlxreg_fan-isa-*"
|
||||
|
@ -123,6 +123,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-1(L) 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-1(L) 220V Rail Curr (in)"
|
||||
label curr2 "PSU-1(L) 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
chip "dps460-i2c-*-59"
|
||||
label in1 "PSU-2(R) 220V Rail (in)"
|
||||
ignore in2
|
||||
@ -137,6 +140,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
||||
label power2 "PSU-2(R) 12V Rail Pwr (out)"
|
||||
label curr1 "PSU-2(R) 220V Rail Curr (in)"
|
||||
label curr2 "PSU-2(R) 12V Rail Curr (out)"
|
||||
set in3_lcrit in3_crit * 0.662
|
||||
set in3_min in3_crit * 0.745
|
||||
set in3_max in3_crit * 0.952
|
||||
|
||||
# Chassis fans
|
||||
chip "mlxreg_fan-isa-*"
|
||||
|
@ -24,8 +24,10 @@
|
||||
|
||||
try:
|
||||
import os
|
||||
import time
|
||||
from sonic_platform_base.psu_base import PsuBase
|
||||
from sonic_py_common.logger import Logger
|
||||
from .device_data import DeviceDataManager
|
||||
from .led import PsuLed, SharedLed, ComponentFaultyIndicator
|
||||
from . import utils
|
||||
from .vpd_parser import VpdParser
|
||||
@ -411,6 +413,7 @@ class Psu(FixedPsu):
|
||||
capability = utils.read_str_from_file(self.psu_voltage_capability)
|
||||
if 'max' in capability:
|
||||
max_voltage = utils.read_int_from_file(self.psu_voltage_max, log_func=logger.log_info)
|
||||
max_voltage = InvalidPsuVolWA.run(self, max_voltage, self.psu_voltage_max)
|
||||
return float(max_voltage) / 1000
|
||||
|
||||
return None
|
||||
@ -431,6 +434,7 @@ class Psu(FixedPsu):
|
||||
capability = utils.read_str_from_file(self.psu_voltage_capability)
|
||||
if 'min' in capability:
|
||||
min_voltage = utils.read_int_from_file(self.psu_voltage_min, log_func=logger.log_info)
|
||||
min_voltage = InvalidPsuVolWA.run(self, min_voltage, self.psu_voltage_min)
|
||||
return float(min_voltage) / 1000
|
||||
|
||||
return None
|
||||
@ -448,3 +452,69 @@ class Psu(FixedPsu):
|
||||
return float(power_max) / 1000000
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class InvalidPsuVolWA:
|
||||
"""This class is created as a workaround for a known hardware issue that the PSU voltage threshold could be a
|
||||
invalid value 127998. Once we read a voltage threshold value equal to 127998, we should do following:
|
||||
1. Check the PSU vendor, it should be Delta
|
||||
2. Generate a temp sensor configuration file which contains a few set commands. Those set commands are the WA provided by low level team.
|
||||
3. Call "sensors -s -c <tmp_conf_file>"
|
||||
4. Wait for it to take effect
|
||||
|
||||
This issue is found on 3700, 3700c, 3800, 4600c
|
||||
"""
|
||||
|
||||
INVALID_VOLTAGE_VALUE = 127998
|
||||
EXPECT_VENDOR_NAME = 'DELTA'
|
||||
EXPECT_CAPACITY = '1100'
|
||||
EXPECT_PLATFORMS = ['x86_64-mlnx_msn3700-r0', 'x86_64-mlnx_msn3700c-r0', 'x86_64-mlnx_msn3800-r0', 'x86_64-mlnx_msn4600c-r0']
|
||||
MFR_FIELD = 'MFR_NAME'
|
||||
CAPACITY_FIELD = 'CAPACITY'
|
||||
WAIT_TIME = 5
|
||||
|
||||
@classmethod
|
||||
def run(cls, psu, threshold_value, threshold_file):
|
||||
if threshold_value != cls.INVALID_VOLTAGE_VALUE:
|
||||
# If the threshold value is not an invalid value, just return
|
||||
return threshold_value
|
||||
|
||||
platform_name = DeviceDataManager.get_platform_name()
|
||||
# Apply the WA to specified platforms
|
||||
if platform_name not in cls.EXPECT_PLATFORMS:
|
||||
# It is unlikely to go to this branch, so we log a warning here
|
||||
logger.log_warning('PSU {} threshold file {} value {}, but platform is {}'.format(psu.index, threshold_file, threshold_value, platform_name))
|
||||
return threshold_value
|
||||
|
||||
# Check PSU vendor, make sure it is DELTA
|
||||
vendor_name = psu.vpd_parser.get_entry_value(cls.MFR_FIELD)
|
||||
if vendor_name != 'N/A' and vendor_name != cls.EXPECT_VENDOR_NAME:
|
||||
# It is unlikely to go to this branch, so we log a warning here
|
||||
logger.log_warning('PSU {} threshold file {} value {}, but its vendor is {}'.format(psu.index, threshold_file, threshold_value, vendor_name))
|
||||
return threshold_value
|
||||
|
||||
# Check PSU version, make sure it is 1100
|
||||
capacity = psu.vpd_parser.get_entry_value(cls.CAPACITY_FIELD)
|
||||
if capacity != 'N/A' and capacity != cls.EXPECT_CAPACITY:
|
||||
logger.log_warning('PSU {} threshold file {} value {}, but its capacity is {}'.format(psu.index, threshold_file, threshold_value, capacity))
|
||||
return threshold_value
|
||||
|
||||
# Run a sensor -s command to triger hardware to get the real threashold value
|
||||
utils.run_command('sensor -s')
|
||||
|
||||
# Wait for the threshold value change
|
||||
return cls.wait_set_done(threshold_file)
|
||||
|
||||
@classmethod
|
||||
def wait_set_done(cls, threshold_file):
|
||||
wait_time = cls.WAIT_TIME
|
||||
while wait_time > 0:
|
||||
value = utils.read_int_from_file(threshold_file, log_func=logger.log_info)
|
||||
if value != cls.INVALID_VOLTAGE_VALUE:
|
||||
return value
|
||||
|
||||
wait_time -= 1
|
||||
time.sleep(1)
|
||||
|
||||
logger.log_error('sensor -s does not recover PSU threshold sensor after {} seconds'.format(cls.WAIT_TIME))
|
||||
return None
|
||||
|
@ -194,3 +194,16 @@ def default_return(return_value, log_func=logger.log_debug):
|
||||
return return_value
|
||||
return _impl
|
||||
return wrapper
|
||||
|
||||
|
||||
def run_command(command):
|
||||
"""
|
||||
Utility function to run an shell command and return the output.
|
||||
:param command: Shell command string.
|
||||
:return: Output of the shell command.
|
||||
"""
|
||||
try:
|
||||
process = subprocess.Popen(command, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
return process.communicate()[0].strip()
|
||||
except Exception:
|
||||
return None
|
@ -24,6 +24,7 @@ logger = Logger()
|
||||
SN_VPD_FIELD = "SN_VPD_FIELD"
|
||||
PN_VPD_FIELD = "PN_VPD_FIELD"
|
||||
REV_VPD_FIELD = "REV_VPD_FIELD"
|
||||
MFR_VPD_FIELD = "MFR_NAME"
|
||||
|
||||
|
||||
class VpdParser:
|
||||
@ -82,3 +83,17 @@ class VpdParser:
|
||||
logger.log_error("Fail to read revision: No key {} in VPD {}".format(REV_VPD_FIELD, self.vpd_file))
|
||||
return 'N/A'
|
||||
return self.vpd_data.get(REV_VPD_FIELD, 'N/A')
|
||||
|
||||
def get_entry_value(self, key):
|
||||
"""
|
||||
Retrieves an vpd entry of the device
|
||||
|
||||
Returns:
|
||||
string: Vpd entry value of device
|
||||
"""
|
||||
if self._get_data() and key not in self.vpd_data:
|
||||
logger.log_warning("Fail to read vpd info: No key {} in VPD {}".format(key, self.vpd_file))
|
||||
return 'N/A'
|
||||
return self.vpd_data.get(key, 'N/A')
|
||||
|
||||
|
||||
|
@ -116,3 +116,40 @@ class TestPsu:
|
||||
assert psu.get_model() == 'MTEF-PSF-AC-C'
|
||||
assert psu.get_serial() == 'MT1946X07684'
|
||||
assert psu.get_revision() == 'A3'
|
||||
|
||||
assert psu.vpd_parser.get_entry_value('MFR_NAME') == 'DELTA'
|
||||
|
||||
@mock.patch('sonic_platform.utils.read_int_from_file', mock.MagicMock(return_value=9999))
|
||||
@mock.patch('sonic_platform.utils.run_command')
|
||||
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_platform_name')
|
||||
@mock.patch('sonic_platform.vpd_parser.VpdParser.get_entry_value')
|
||||
def test_psu_workaround(self, mock_get_entry_value, mock_get_platform_name, mock_run_command):
|
||||
from sonic_platform.psu import InvalidPsuVolWA
|
||||
psu = Psu(0)
|
||||
# Threshold value is not InvalidPsuVolWA.INVALID_VOLTAGE_VALUE
|
||||
assert InvalidPsuVolWA.run(psu, 9999, '') == 9999
|
||||
|
||||
# Platform name is not in InvalidPsuVolWA.EXPECT_PLATFORMS
|
||||
mock_get_platform_name.return_value = 'some platform'
|
||||
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == InvalidPsuVolWA.INVALID_VOLTAGE_VALUE
|
||||
|
||||
# PSU vendor is not InvalidPsuVolWA.EXPECT_VENDOR_NAME
|
||||
vpd_info = {
|
||||
InvalidPsuVolWA.MFR_FIELD: 'some psu',
|
||||
InvalidPsuVolWA.CAPACITY_FIELD: 'some capacity'
|
||||
}
|
||||
def get_entry_value(key):
|
||||
return vpd_info[key]
|
||||
|
||||
mock_get_entry_value.side_effect = get_entry_value
|
||||
mock_get_platform_name.return_value = 'x86_64-mlnx_msn3700-r0'
|
||||
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == InvalidPsuVolWA.INVALID_VOLTAGE_VALUE
|
||||
|
||||
# PSU capacity is not InvalidPsuVolWA.EXPECT_CAPACITY
|
||||
vpd_info[InvalidPsuVolWA.MFR_FIELD] = InvalidPsuVolWA.EXPECT_VENDOR_NAME
|
||||
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == InvalidPsuVolWA.INVALID_VOLTAGE_VALUE
|
||||
|
||||
# Normal
|
||||
vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY
|
||||
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999
|
||||
mock_run_command.assert_called_with('sensor -s')
|
||||
|
@ -116,3 +116,7 @@ class TestUtils:
|
||||
|
||||
assert func() == 100
|
||||
assert mock_log.call_count == 1
|
||||
|
||||
def test_run_command(self):
|
||||
output = utils.run_command('ls')
|
||||
assert output
|
||||
|
Loading…
Reference in New Issue
Block a user