[Mellanox] Auto correct PSU voltage threshold (WA) (#10394)
- Why I did it There is a hardware bug that PSU voltage threshold sysfs returns incorrect value. The workaround is to call "sensor -s" to refresh it. - How I did it Call "sensor -s" when the threshold value is not incorrect and PSU is "DELTA 1100" - How to verify it Unit test and Manual test
This commit is contained in:
parent
bf34b17d20
commit
bdbb3d708d
@ -85,6 +85,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-2 12V Rail Pwr (out)"
|
label power2 "PSU-2 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-2 220V Rail Curr (in)"
|
label curr1 "PSU-2 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-2 12V Rail Curr (out)"
|
label curr2 "PSU-2 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
chip "dps460-i2c-*-59"
|
chip "dps460-i2c-*-59"
|
||||||
label in1 "PSU-1 220V Rail (in)"
|
label in1 "PSU-1 220V Rail (in)"
|
||||||
ignore in2
|
ignore in2
|
||||||
@ -99,6 +102,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-1 12V Rail Pwr (out)"
|
label power2 "PSU-1 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-1 220V Rail Curr (in)"
|
label curr1 "PSU-1 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-1 12V Rail Curr (out)"
|
label curr2 "PSU-1 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
|
|
||||||
# Chassis fans
|
# Chassis fans
|
||||||
chip "mlxreg_fan-isa-*"
|
chip "mlxreg_fan-isa-*"
|
||||||
|
@ -85,6 +85,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-2 12V Rail Pwr (out)"
|
label power2 "PSU-2 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-2 220V Rail Curr (in)"
|
label curr1 "PSU-2 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-2 12V Rail Curr (out)"
|
label curr2 "PSU-2 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
chip "dps460-i2c-*-59"
|
chip "dps460-i2c-*-59"
|
||||||
label in1 "PSU-1 220V Rail (in)"
|
label in1 "PSU-1 220V Rail (in)"
|
||||||
ignore in2
|
ignore in2
|
||||||
@ -99,6 +102,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-1 12V Rail Pwr (out)"
|
label power2 "PSU-1 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-1 220V Rail Curr (in)"
|
label curr1 "PSU-1 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-1 12V Rail Curr (out)"
|
label curr2 "PSU-1 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
|
|
||||||
# Chassis fans
|
# Chassis fans
|
||||||
chip "mlxreg_fan-isa-*"
|
chip "mlxreg_fan-isa-*"
|
||||||
|
@ -106,6 +106,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-2 12V Rail Pwr (out)"
|
label power2 "PSU-2 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-2 220V Rail Curr (in)"
|
label curr1 "PSU-2 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-2 12V Rail Curr (out)"
|
label curr2 "PSU-2 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
chip "dps460-i2c-*-59"
|
chip "dps460-i2c-*-59"
|
||||||
label in1 "PSU-1 220V Rail (in)"
|
label in1 "PSU-1 220V Rail (in)"
|
||||||
ignore in2
|
ignore in2
|
||||||
@ -120,6 +123,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-1 12V Rail Pwr (out)"
|
label power2 "PSU-1 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-1 220V Rail Curr (in)"
|
label curr1 "PSU-1 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-1 12V Rail Curr (out)"
|
label curr2 "PSU-1 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
|
|
||||||
# Chassis fans
|
# Chassis fans
|
||||||
chip "mlxreg_fan-isa-*"
|
chip "mlxreg_fan-isa-*"
|
||||||
|
@ -167,6 +167,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-1(L) 12V Rail Pwr (out)"
|
label power2 "PSU-1(L) 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-1(L) 220V Rail Curr (in)"
|
label curr1 "PSU-1(L) 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-1(L) 12V Rail Curr (out)"
|
label curr2 "PSU-1(L) 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
chip "dps460-i2c-*-59"
|
chip "dps460-i2c-*-59"
|
||||||
label in1 "PSU-2(R) 220V Rail (in)"
|
label in1 "PSU-2(R) 220V Rail (in)"
|
||||||
ignore in2
|
ignore in2
|
||||||
@ -181,6 +184,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-2(R) 12V Rail Pwr (out)"
|
label power2 "PSU-2(R) 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-2(R) 220V Rail Curr (in)"
|
label curr1 "PSU-2(R) 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-2(R) 12V Rail Curr (out)"
|
label curr2 "PSU-2(R) 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
|
|
||||||
# Chassis fans
|
# Chassis fans
|
||||||
chip "mlxreg_fan-isa-*"
|
chip "mlxreg_fan-isa-*"
|
||||||
|
@ -123,6 +123,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-1(L) 12V Rail Pwr (out)"
|
label power2 "PSU-1(L) 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-1(L) 220V Rail Curr (in)"
|
label curr1 "PSU-1(L) 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-1(L) 12V Rail Curr (out)"
|
label curr2 "PSU-1(L) 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
chip "dps460-i2c-*-59"
|
chip "dps460-i2c-*-59"
|
||||||
label in1 "PSU-2(R) 220V Rail (in)"
|
label in1 "PSU-2(R) 220V Rail (in)"
|
||||||
ignore in2
|
ignore in2
|
||||||
@ -137,6 +140,9 @@ bus "i2c-4" "i2c-1-mux (chan_id 3)"
|
|||||||
label power2 "PSU-2(R) 12V Rail Pwr (out)"
|
label power2 "PSU-2(R) 12V Rail Pwr (out)"
|
||||||
label curr1 "PSU-2(R) 220V Rail Curr (in)"
|
label curr1 "PSU-2(R) 220V Rail Curr (in)"
|
||||||
label curr2 "PSU-2(R) 12V Rail Curr (out)"
|
label curr2 "PSU-2(R) 12V Rail Curr (out)"
|
||||||
|
set in3_lcrit in3_crit * 0.662
|
||||||
|
set in3_min in3_crit * 0.745
|
||||||
|
set in3_max in3_crit * 0.952
|
||||||
|
|
||||||
# Chassis fans
|
# Chassis fans
|
||||||
chip "mlxreg_fan-isa-*"
|
chip "mlxreg_fan-isa-*"
|
||||||
|
@ -24,8 +24,10 @@
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
from sonic_platform_base.psu_base import PsuBase
|
from sonic_platform_base.psu_base import PsuBase
|
||||||
from sonic_py_common.logger import Logger
|
from sonic_py_common.logger import Logger
|
||||||
|
from .device_data import DeviceDataManager
|
||||||
from .led import PsuLed, SharedLed, ComponentFaultyIndicator
|
from .led import PsuLed, SharedLed, ComponentFaultyIndicator
|
||||||
from . import utils
|
from . import utils
|
||||||
from .vpd_parser import VpdParser
|
from .vpd_parser import VpdParser
|
||||||
@ -411,6 +413,7 @@ class Psu(FixedPsu):
|
|||||||
capability = utils.read_str_from_file(self.psu_voltage_capability)
|
capability = utils.read_str_from_file(self.psu_voltage_capability)
|
||||||
if 'max' in capability:
|
if 'max' in capability:
|
||||||
max_voltage = utils.read_int_from_file(self.psu_voltage_max, log_func=logger.log_info)
|
max_voltage = utils.read_int_from_file(self.psu_voltage_max, log_func=logger.log_info)
|
||||||
|
max_voltage = InvalidPsuVolWA.run(self, max_voltage, self.psu_voltage_max)
|
||||||
return float(max_voltage) / 1000
|
return float(max_voltage) / 1000
|
||||||
|
|
||||||
return None
|
return None
|
||||||
@ -431,6 +434,7 @@ class Psu(FixedPsu):
|
|||||||
capability = utils.read_str_from_file(self.psu_voltage_capability)
|
capability = utils.read_str_from_file(self.psu_voltage_capability)
|
||||||
if 'min' in capability:
|
if 'min' in capability:
|
||||||
min_voltage = utils.read_int_from_file(self.psu_voltage_min, log_func=logger.log_info)
|
min_voltage = utils.read_int_from_file(self.psu_voltage_min, log_func=logger.log_info)
|
||||||
|
min_voltage = InvalidPsuVolWA.run(self, min_voltage, self.psu_voltage_min)
|
||||||
return float(min_voltage) / 1000
|
return float(min_voltage) / 1000
|
||||||
|
|
||||||
return None
|
return None
|
||||||
@ -448,3 +452,69 @@ class Psu(FixedPsu):
|
|||||||
return float(power_max) / 1000000
|
return float(power_max) / 1000000
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidPsuVolWA:
|
||||||
|
"""This class is created as a workaround for a known hardware issue that the PSU voltage threshold could be a
|
||||||
|
invalid value 127998. Once we read a voltage threshold value equal to 127998, we should do following:
|
||||||
|
1. Check the PSU vendor, it should be Delta
|
||||||
|
2. Generate a temp sensor configuration file which contains a few set commands. Those set commands are the WA provided by low level team.
|
||||||
|
3. Call "sensors -s -c <tmp_conf_file>"
|
||||||
|
4. Wait for it to take effect
|
||||||
|
|
||||||
|
This issue is found on 3700, 3700c, 3800, 4600c
|
||||||
|
"""
|
||||||
|
|
||||||
|
INVALID_VOLTAGE_VALUE = 127998
|
||||||
|
EXPECT_VENDOR_NAME = 'DELTA'
|
||||||
|
EXPECT_CAPACITY = '1100'
|
||||||
|
EXPECT_PLATFORMS = ['x86_64-mlnx_msn3700-r0', 'x86_64-mlnx_msn3700c-r0', 'x86_64-mlnx_msn3800-r0', 'x86_64-mlnx_msn4600c-r0']
|
||||||
|
MFR_FIELD = 'MFR_NAME'
|
||||||
|
CAPACITY_FIELD = 'CAPACITY'
|
||||||
|
WAIT_TIME = 5
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def run(cls, psu, threshold_value, threshold_file):
|
||||||
|
if threshold_value != cls.INVALID_VOLTAGE_VALUE:
|
||||||
|
# If the threshold value is not an invalid value, just return
|
||||||
|
return threshold_value
|
||||||
|
|
||||||
|
platform_name = DeviceDataManager.get_platform_name()
|
||||||
|
# Apply the WA to specified platforms
|
||||||
|
if platform_name not in cls.EXPECT_PLATFORMS:
|
||||||
|
# It is unlikely to go to this branch, so we log a warning here
|
||||||
|
logger.log_warning('PSU {} threshold file {} value {}, but platform is {}'.format(psu.index, threshold_file, threshold_value, platform_name))
|
||||||
|
return threshold_value
|
||||||
|
|
||||||
|
# Check PSU vendor, make sure it is DELTA
|
||||||
|
vendor_name = psu.vpd_parser.get_entry_value(cls.MFR_FIELD)
|
||||||
|
if vendor_name != 'N/A' and vendor_name != cls.EXPECT_VENDOR_NAME:
|
||||||
|
# It is unlikely to go to this branch, so we log a warning here
|
||||||
|
logger.log_warning('PSU {} threshold file {} value {}, but its vendor is {}'.format(psu.index, threshold_file, threshold_value, vendor_name))
|
||||||
|
return threshold_value
|
||||||
|
|
||||||
|
# Check PSU version, make sure it is 1100
|
||||||
|
capacity = psu.vpd_parser.get_entry_value(cls.CAPACITY_FIELD)
|
||||||
|
if capacity != 'N/A' and capacity != cls.EXPECT_CAPACITY:
|
||||||
|
logger.log_warning('PSU {} threshold file {} value {}, but its capacity is {}'.format(psu.index, threshold_file, threshold_value, capacity))
|
||||||
|
return threshold_value
|
||||||
|
|
||||||
|
# Run a sensor -s command to triger hardware to get the real threashold value
|
||||||
|
utils.run_command('sensor -s')
|
||||||
|
|
||||||
|
# Wait for the threshold value change
|
||||||
|
return cls.wait_set_done(threshold_file)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def wait_set_done(cls, threshold_file):
|
||||||
|
wait_time = cls.WAIT_TIME
|
||||||
|
while wait_time > 0:
|
||||||
|
value = utils.read_int_from_file(threshold_file, log_func=logger.log_info)
|
||||||
|
if value != cls.INVALID_VOLTAGE_VALUE:
|
||||||
|
return value
|
||||||
|
|
||||||
|
wait_time -= 1
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
logger.log_error('sensor -s does not recover PSU threshold sensor after {} seconds'.format(cls.WAIT_TIME))
|
||||||
|
return None
|
||||||
|
@ -194,3 +194,16 @@ def default_return(return_value, log_func=logger.log_debug):
|
|||||||
return return_value
|
return return_value
|
||||||
return _impl
|
return _impl
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
def run_command(command):
|
||||||
|
"""
|
||||||
|
Utility function to run an shell command and return the output.
|
||||||
|
:param command: Shell command string.
|
||||||
|
:return: Output of the shell command.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
process = subprocess.Popen(command, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
return process.communicate()[0].strip()
|
||||||
|
except Exception:
|
||||||
|
return None
|
@ -24,6 +24,7 @@ logger = Logger()
|
|||||||
SN_VPD_FIELD = "SN_VPD_FIELD"
|
SN_VPD_FIELD = "SN_VPD_FIELD"
|
||||||
PN_VPD_FIELD = "PN_VPD_FIELD"
|
PN_VPD_FIELD = "PN_VPD_FIELD"
|
||||||
REV_VPD_FIELD = "REV_VPD_FIELD"
|
REV_VPD_FIELD = "REV_VPD_FIELD"
|
||||||
|
MFR_VPD_FIELD = "MFR_NAME"
|
||||||
|
|
||||||
|
|
||||||
class VpdParser:
|
class VpdParser:
|
||||||
@ -82,3 +83,17 @@ class VpdParser:
|
|||||||
logger.log_error("Fail to read revision: No key {} in VPD {}".format(REV_VPD_FIELD, self.vpd_file))
|
logger.log_error("Fail to read revision: No key {} in VPD {}".format(REV_VPD_FIELD, self.vpd_file))
|
||||||
return 'N/A'
|
return 'N/A'
|
||||||
return self.vpd_data.get(REV_VPD_FIELD, 'N/A')
|
return self.vpd_data.get(REV_VPD_FIELD, 'N/A')
|
||||||
|
|
||||||
|
def get_entry_value(self, key):
|
||||||
|
"""
|
||||||
|
Retrieves an vpd entry of the device
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
string: Vpd entry value of device
|
||||||
|
"""
|
||||||
|
if self._get_data() and key not in self.vpd_data:
|
||||||
|
logger.log_warning("Fail to read vpd info: No key {} in VPD {}".format(key, self.vpd_file))
|
||||||
|
return 'N/A'
|
||||||
|
return self.vpd_data.get(key, 'N/A')
|
||||||
|
|
||||||
|
|
||||||
|
@ -116,3 +116,40 @@ class TestPsu:
|
|||||||
assert psu.get_model() == 'MTEF-PSF-AC-C'
|
assert psu.get_model() == 'MTEF-PSF-AC-C'
|
||||||
assert psu.get_serial() == 'MT1946X07684'
|
assert psu.get_serial() == 'MT1946X07684'
|
||||||
assert psu.get_revision() == 'A3'
|
assert psu.get_revision() == 'A3'
|
||||||
|
|
||||||
|
assert psu.vpd_parser.get_entry_value('MFR_NAME') == 'DELTA'
|
||||||
|
|
||||||
|
@mock.patch('sonic_platform.utils.read_int_from_file', mock.MagicMock(return_value=9999))
|
||||||
|
@mock.patch('sonic_platform.utils.run_command')
|
||||||
|
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_platform_name')
|
||||||
|
@mock.patch('sonic_platform.vpd_parser.VpdParser.get_entry_value')
|
||||||
|
def test_psu_workaround(self, mock_get_entry_value, mock_get_platform_name, mock_run_command):
|
||||||
|
from sonic_platform.psu import InvalidPsuVolWA
|
||||||
|
psu = Psu(0)
|
||||||
|
# Threshold value is not InvalidPsuVolWA.INVALID_VOLTAGE_VALUE
|
||||||
|
assert InvalidPsuVolWA.run(psu, 9999, '') == 9999
|
||||||
|
|
||||||
|
# Platform name is not in InvalidPsuVolWA.EXPECT_PLATFORMS
|
||||||
|
mock_get_platform_name.return_value = 'some platform'
|
||||||
|
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == InvalidPsuVolWA.INVALID_VOLTAGE_VALUE
|
||||||
|
|
||||||
|
# PSU vendor is not InvalidPsuVolWA.EXPECT_VENDOR_NAME
|
||||||
|
vpd_info = {
|
||||||
|
InvalidPsuVolWA.MFR_FIELD: 'some psu',
|
||||||
|
InvalidPsuVolWA.CAPACITY_FIELD: 'some capacity'
|
||||||
|
}
|
||||||
|
def get_entry_value(key):
|
||||||
|
return vpd_info[key]
|
||||||
|
|
||||||
|
mock_get_entry_value.side_effect = get_entry_value
|
||||||
|
mock_get_platform_name.return_value = 'x86_64-mlnx_msn3700-r0'
|
||||||
|
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == InvalidPsuVolWA.INVALID_VOLTAGE_VALUE
|
||||||
|
|
||||||
|
# PSU capacity is not InvalidPsuVolWA.EXPECT_CAPACITY
|
||||||
|
vpd_info[InvalidPsuVolWA.MFR_FIELD] = InvalidPsuVolWA.EXPECT_VENDOR_NAME
|
||||||
|
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == InvalidPsuVolWA.INVALID_VOLTAGE_VALUE
|
||||||
|
|
||||||
|
# Normal
|
||||||
|
vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY
|
||||||
|
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999
|
||||||
|
mock_run_command.assert_called_with('sensor -s')
|
||||||
|
@ -116,3 +116,7 @@ class TestUtils:
|
|||||||
|
|
||||||
assert func() == 100
|
assert func() == 100
|
||||||
assert mock_log.call_count == 1
|
assert mock_log.call_count == 1
|
||||||
|
|
||||||
|
def test_run_command(self):
|
||||||
|
output = utils.run_command('ls')
|
||||||
|
assert output
|
||||||
|
Loading…
Reference in New Issue
Block a user