[Mellanox] Adjust PSU voltage WA (#10619)

- Why I did it
InvalidPsuVolWA.run might raise exception if user power off PSU when it is running. This exception is not caught and will be raised to psud which causes psud failed to update PSU data to DB.

- How I did it
1. Change the log level when WA does not work. This could happen when user power off PSU, hence changing the log level from error to warning is better
2. Change the wait time from 5 to 1 to avoid introduce too much delay in psud. 1 second is usually enough per my test
3. Give a default return value for function get_voltage_low_threshold and get_voltage_high_threshold to avoid exception reach to psud

- How to verify it
Manual test.
Run sonic-mgmt regression
This commit is contained in:
Junchao-Mellanox 2022-04-22 16:02:30 +08:00 committed by GitHub
parent 37debbeb38
commit af5e5c4c94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 7 deletions

View File

@ -397,6 +397,7 @@ class Psu(FixedPsu):
return None return None
@utils.default_return(None)
def get_voltage_high_threshold(self): def get_voltage_high_threshold(self):
""" """
Retrieves the high threshold PSU voltage output Retrieves the high threshold PSU voltage output
@ -414,10 +415,12 @@ class Psu(FixedPsu):
if 'max' in capability: if 'max' in capability:
max_voltage = utils.read_int_from_file(self.psu_voltage_max, log_func=logger.log_info) max_voltage = utils.read_int_from_file(self.psu_voltage_max, log_func=logger.log_info)
max_voltage = InvalidPsuVolWA.run(self, max_voltage, self.psu_voltage_max) max_voltage = InvalidPsuVolWA.run(self, max_voltage, self.psu_voltage_max)
return float(max_voltage) / 1000 if max_voltage:
return float(max_voltage) / 1000
return None return None
@utils.default_return(None)
def get_voltage_low_threshold(self): def get_voltage_low_threshold(self):
""" """
Retrieves the low threshold PSU voltage output Retrieves the low threshold PSU voltage output
@ -435,7 +438,8 @@ class Psu(FixedPsu):
if 'min' in capability: if 'min' in capability:
min_voltage = utils.read_int_from_file(self.psu_voltage_min, log_func=logger.log_info) min_voltage = utils.read_int_from_file(self.psu_voltage_min, log_func=logger.log_info)
min_voltage = InvalidPsuVolWA.run(self, min_voltage, self.psu_voltage_min) min_voltage = InvalidPsuVolWA.run(self, min_voltage, self.psu_voltage_min)
return float(min_voltage) / 1000 if min_voltage:
return float(min_voltage) / 1000
return None return None
@ -471,7 +475,7 @@ class InvalidPsuVolWA:
EXPECT_PLATFORMS = ['x86_64-mlnx_msn3700-r0', 'x86_64-mlnx_msn3700c-r0', 'x86_64-mlnx_msn3800-r0', 'x86_64-mlnx_msn4600c-r0'] EXPECT_PLATFORMS = ['x86_64-mlnx_msn3700-r0', 'x86_64-mlnx_msn3700c-r0', 'x86_64-mlnx_msn3800-r0', 'x86_64-mlnx_msn4600c-r0']
MFR_FIELD = 'MFR_NAME' MFR_FIELD = 'MFR_NAME'
CAPACITY_FIELD = 'CAPACITY' CAPACITY_FIELD = 'CAPACITY'
WAIT_TIME = 5 WAIT_TIME = 1
@classmethod @classmethod
def run(cls, psu, threshold_value, threshold_file): def run(cls, psu, threshold_value, threshold_file):
@ -499,8 +503,8 @@ class InvalidPsuVolWA:
logger.log_warning('PSU {} threshold file {} value {}, but its capacity is {}'.format(psu.index, threshold_file, threshold_value, capacity)) logger.log_warning('PSU {} threshold file {} value {}, but its capacity is {}'.format(psu.index, threshold_file, threshold_value, capacity))
return threshold_value return threshold_value
# Run a sensor -s command to triger hardware to get the real threashold value # Run a sensors -s command to triger hardware to get the real threashold value
utils.run_command('sensor -s') utils.run_command('sensors -s')
# Wait for the threshold value change # Wait for the threshold value change
return cls.wait_set_done(threshold_file) return cls.wait_set_done(threshold_file)
@ -516,5 +520,7 @@ class InvalidPsuVolWA:
wait_time -= 1 wait_time -= 1
time.sleep(1) time.sleep(1)
logger.log_error('sensor -s does not recover PSU threshold sensor after {} seconds'.format(cls.WAIT_TIME)) # It is enough to use warning here because user might power off/on the PSU which may cause threshold_file
# does not exist
logger.log_warning('sensors -s does not recover PSU threshold sensor after {} seconds'.format(cls.WAIT_TIME))
return None return None

View File

@ -152,4 +152,4 @@ class TestPsu:
# Normal # Normal
vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999 assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999
mock_run_command.assert_called_with('sensor -s') mock_run_command.assert_called_with('sensors -s')