[Mellanox] Fix issue: thermal zone threshold value 0 causes fan speed stuck at 100% (#10057)

- Why I did it
In SONiC thermal control algorithm, it compares thermal zone temperature with thermal zone threshold. Previously, a thermal zone with no thermal sensor can still get its threshold. However, a recently driver patch changes this behavior: a thermal zone with no thermal sensor will return 0 for threshold. We need to ignore such thermal zone.

- How I did it
Ignore thermal zones whose temperature is 0.

- How to verify it
Added unit test case and Manual test
This commit is contained in:
Junchao-Mellanox 2022-02-24 18:05:56 +08:00 committed by Judy Joseph
parent 00940941b2
commit ba4da1597a
2 changed files with 32 additions and 1 deletions

View File

@ -400,9 +400,14 @@ class Thermal(ThermalBase):
thermal_zone_present = False
try:
for thermal_zone_folder in glob.iglob(THERMAL_ZONE_FOLDER_WILDCARD):
current = utils.read_int_from_file(os.path.join(thermal_zone_folder, THERMAL_ZONE_TEMP_FILE))
if current == 0:
# Temperature value 0 means that this thermal zone has no
# sensor and it should be ignored in this loop
continue
thermal_zone_present = True
normal_thresh = utils.read_int_from_file(os.path.join(thermal_zone_folder, THERMAL_ZONE_NORMAL_THRESHOLD))
current = utils.read_int_from_file(os.path.join(thermal_zone_folder, THERMAL_ZONE_TEMP_FILE))
if current < normal_thresh - THERMAL_ZONE_HYSTERESIS:
continue

View File

@ -238,6 +238,32 @@ class TestThermal:
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 81000
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() is None
@mock.patch('glob.iglob', mock.MagicMock(return_value=['thermal_zone1', 'thermal_zone2']))
@mock.patch('sonic_platform.utils.read_int_from_file')
def test_no_sensor_thermal_zone(self, mock_read_file):
from sonic_platform.thermal import Thermal, THERMAL_ZONE_TEMP_FILE, THERMAL_ZONE_HIGH_THRESHOLD, THERMAL_ZONE_NORMAL_THRESHOLD, MIN_COOLING_LEVEL_FOR_HIGH, MIN_COOLING_LEVEL_FOR_NORMAL
mock_file_content = {}
def mock_read_int_from_file(file_path, **kwargs):
return mock_file_content[file_path]
mock_read_file.side_effect = mock_read_int_from_file
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_NORMAL_THRESHOLD)] = 0
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_HIGH_THRESHOLD)] = 0
mock_file_content[os.path.join('thermal_zone1', THERMAL_ZONE_TEMP_FILE)] = 0
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_NORMAL_THRESHOLD)] = 75000
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_HIGH_THRESHOLD)] = 85000
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_TEMP_FILE)] = 24000
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() == MIN_COOLING_LEVEL_FOR_NORMAL
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_TEMP_FILE)] = 71000
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() == MIN_COOLING_LEVEL_FOR_HIGH
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_TEMP_FILE)] = 79000
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() == MIN_COOLING_LEVEL_FOR_HIGH
mock_file_content[os.path.join('thermal_zone2', THERMAL_ZONE_TEMP_FILE)] = 81000
assert Thermal.get_min_allowed_cooling_level_by_thermal_zone() is None
def test_check_module_temperature_trustable(self):
from sonic_platform.thermal import Thermal