sonic-buildimage/platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py
Junchao-Mellanox 8de7cb5988
[202311] [Mellanox] update asic and module temperature in a thread for CMIS management (#16955) (#17699)
- Why I did it
When module is totally under software control, driver cannot get module temperature/temperature threshold from firmware. In this case, sonic needs to get temperature/temperature threshold from EEPROM. In this PR, a thread thermal updater is created to update module temperature/temperature threshold while software control is enabled.

- How I did it
Query ASIC temperature from SDK sysfs and update hw-management-tc periodically
Query Module temperature from EEPROM and update hw-management-tc periodically

- How to verify it
Manual test
New Unit tests
2024-01-08 10:50:59 -08:00

129 lines
5.1 KiB
Python

#
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
from unittest import mock
from sonic_platform import utils
from sonic_platform.thermal_updater import ThermalUpdater, hw_management_independent_mode_update
from sonic_platform.thermal_updater import ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD, \
ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD
mock_tc_config = """
{
"dev_parameters": {
"asic": {
"pwm_min": 20,
"pwm_max": 100,
"val_min": "!70000",
"val_max": "!105000",
"poll_time": 3
},
"module\\\\d+": {
"pwm_min": 20,
"pwm_max": 100,
"val_min": 60000,
"val_max": 80000,
"poll_time": 20
}
}
}
"""
class TestThermalUpdater:
def test_load_tc_config_non_exists(self):
updater = ThermalUpdater(None)
updater.load_tc_config()
assert updater._timer._timestamp_queue.qsize() == 2
def test_load_tc_config_mocked(self):
updater = ThermalUpdater(None)
mock_os_open = mock.mock_open(read_data=mock_tc_config)
with mock.patch('sonic_platform.utils.open', mock_os_open):
updater.load_tc_config()
assert updater._timer._timestamp_queue.qsize() == 2
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_asic', mock.MagicMock())
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_module', mock.MagicMock())
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.wait_all_sfp_ready')
@mock.patch('sonic_platform.utils.write_file')
def test_start_stop(self, mock_write, mock_wait):
mock_wait.return_value = True
mock_sfp = mock.MagicMock()
mock_sfp.sdk_index = 1
updater = ThermalUpdater([mock_sfp])
updater.start()
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 0)
utils.wait_until(updater._timer.is_alive, timeout=5)
mock_write.reset_mock()
updater.stop()
assert not updater._timer.is_alive()
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1)
mock_wait.return_value = False
mock_write.reset_mock()
updater.start()
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1)
updater.stop()
@mock.patch('sonic_platform.thermal_updater.time.sleep', mock.MagicMock())
def test_wait_all_sfp_ready(self):
mock_sfp = mock.MagicMock()
mock_sfp.is_sw_control = mock.MagicMock(return_value=True)
updater = ThermalUpdater([mock_sfp])
assert updater.wait_all_sfp_ready()
mock_sfp.is_sw_control.side_effect = Exception('')
assert not updater.wait_all_sfp_ready()
@mock.patch('sonic_platform.utils.read_int_from_file')
def test_update_asic(self, mock_read):
mock_read.return_value = 8
updater = ThermalUpdater(None)
assert updater.get_asic_temp() == 1000
assert updater.get_asic_temp_warning_threashold() == 1000
assert updater.get_asic_temp_critical_threashold() == 1000
updater.update_asic()
hw_management_independent_mode_update.thermal_data_set_asic.assert_called_once()
mock_read.return_value = None
assert updater.get_asic_temp() is None
assert updater.get_asic_temp_warning_threashold() == ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD
assert updater.get_asic_temp_critical_threashold() == ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD
def test_update_module(self):
mock_sfp = mock.MagicMock()
mock_sfp.sdk_index = 10
mock_sfp.get_presence = mock.MagicMock(return_value=True)
mock_sfp.get_temperature = mock.MagicMock(return_value=55.0)
mock_sfp.get_temperature_warning_threashold = mock.MagicMock(return_value=70.0)
mock_sfp.get_temperature_critical_threashold = mock.MagicMock(return_value=80.0)
updater = ThermalUpdater([mock_sfp])
updater.update_module()
hw_management_independent_mode_update.thermal_data_set_module.assert_called_once_with(0, 11, 55000, 80000, 70000, 0)
mock_sfp.get_temperature = mock.MagicMock(return_value=0.0)
hw_management_independent_mode_update.reset_mock()
updater.update_module()
hw_management_independent_mode_update.thermal_data_set_module.assert_called_once_with(0, 11, 0, 0, 0, 0)
mock_sfp.get_presence = mock.MagicMock(return_value=False)
updater.update_module()
hw_management_independent_mode_update.thermal_data_clean_module.assert_called_once_with(0, 11)