1b84f3daa5
- Why I did it When module is totally under software control, driver cannot get module temperature/temperature threshold from firmware. In this case, sonic needs to get temperature/temperature threshold from EEPROM. In this PR, a thread thermal updater is created to update module temperature/temperature threshold while software control is enabled. - How I did it Query ASIC temperature from SDK sysfs and update hw-management-tc periodically Query Module temperature from EEPROM and update hw-management-tc periodically - How to verify it Manual test New Unit tests
129 lines
5.1 KiB
Python
129 lines
5.1 KiB
Python
#
|
|
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
|
|
# Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import time
|
|
from unittest import mock
|
|
|
|
from sonic_platform import utils
|
|
from sonic_platform.thermal_updater import ThermalUpdater, hw_management_independent_mode_update
|
|
from sonic_platform.thermal_updater import ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD, \
|
|
ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD
|
|
|
|
|
|
mock_tc_config = """
|
|
{
|
|
"dev_parameters": {
|
|
"asic": {
|
|
"pwm_min": 20,
|
|
"pwm_max": 100,
|
|
"val_min": "!70000",
|
|
"val_max": "!105000",
|
|
"poll_time": 3
|
|
},
|
|
"module\\\\d+": {
|
|
"pwm_min": 20,
|
|
"pwm_max": 100,
|
|
"val_min": 60000,
|
|
"val_max": 80000,
|
|
"poll_time": 20
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
|
|
|
|
class TestThermalUpdater:
|
|
def test_load_tc_config_non_exists(self):
|
|
updater = ThermalUpdater(None)
|
|
updater.load_tc_config()
|
|
assert updater._timer._timestamp_queue.qsize() == 2
|
|
|
|
def test_load_tc_config_mocked(self):
|
|
updater = ThermalUpdater(None)
|
|
mock_os_open = mock.mock_open(read_data=mock_tc_config)
|
|
with mock.patch('sonic_platform.utils.open', mock_os_open):
|
|
updater.load_tc_config()
|
|
assert updater._timer._timestamp_queue.qsize() == 2
|
|
|
|
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_asic', mock.MagicMock())
|
|
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_module', mock.MagicMock())
|
|
@mock.patch('sonic_platform.thermal_updater.ThermalUpdater.wait_all_sfp_ready')
|
|
@mock.patch('sonic_platform.utils.write_file')
|
|
def test_start_stop(self, mock_write, mock_wait):
|
|
mock_wait.return_value = True
|
|
mock_sfp = mock.MagicMock()
|
|
mock_sfp.sdk_index = 1
|
|
updater = ThermalUpdater([mock_sfp])
|
|
updater.start()
|
|
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 0)
|
|
utils.wait_until(updater._timer.is_alive, timeout=5)
|
|
|
|
mock_write.reset_mock()
|
|
updater.stop()
|
|
assert not updater._timer.is_alive()
|
|
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1)
|
|
|
|
mock_wait.return_value = False
|
|
mock_write.reset_mock()
|
|
updater.start()
|
|
mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1)
|
|
updater.stop()
|
|
|
|
@mock.patch('sonic_platform.thermal_updater.time.sleep', mock.MagicMock())
|
|
def test_wait_all_sfp_ready(self):
|
|
mock_sfp = mock.MagicMock()
|
|
mock_sfp.is_sw_control = mock.MagicMock(return_value=True)
|
|
updater = ThermalUpdater([mock_sfp])
|
|
assert updater.wait_all_sfp_ready()
|
|
mock_sfp.is_sw_control.side_effect = Exception('')
|
|
assert not updater.wait_all_sfp_ready()
|
|
|
|
@mock.patch('sonic_platform.utils.read_int_from_file')
|
|
def test_update_asic(self, mock_read):
|
|
mock_read.return_value = 8
|
|
updater = ThermalUpdater(None)
|
|
assert updater.get_asic_temp() == 1000
|
|
assert updater.get_asic_temp_warning_threashold() == 1000
|
|
assert updater.get_asic_temp_critical_threashold() == 1000
|
|
updater.update_asic()
|
|
hw_management_independent_mode_update.thermal_data_set_asic.assert_called_once()
|
|
|
|
mock_read.return_value = None
|
|
assert updater.get_asic_temp() is None
|
|
assert updater.get_asic_temp_warning_threashold() == ASIC_DEFAULT_TEMP_WARNNING_THRESHOLD
|
|
assert updater.get_asic_temp_critical_threashold() == ASIC_DEFAULT_TEMP_CRITICAL_THRESHOLD
|
|
|
|
def test_update_module(self):
|
|
mock_sfp = mock.MagicMock()
|
|
mock_sfp.sdk_index = 10
|
|
mock_sfp.get_presence = mock.MagicMock(return_value=True)
|
|
mock_sfp.get_temperature = mock.MagicMock(return_value=55.0)
|
|
mock_sfp.get_temperature_warning_threashold = mock.MagicMock(return_value=70.0)
|
|
mock_sfp.get_temperature_critical_threashold = mock.MagicMock(return_value=80.0)
|
|
updater = ThermalUpdater([mock_sfp])
|
|
updater.update_module()
|
|
hw_management_independent_mode_update.thermal_data_set_module.assert_called_once_with(0, 11, 55000, 80000, 70000, 0)
|
|
|
|
mock_sfp.get_temperature = mock.MagicMock(return_value=0.0)
|
|
hw_management_independent_mode_update.reset_mock()
|
|
updater.update_module()
|
|
hw_management_independent_mode_update.thermal_data_set_module.assert_called_once_with(0, 11, 0, 0, 0, 0)
|
|
|
|
mock_sfp.get_presence = mock.MagicMock(return_value=False)
|
|
updater.update_module()
|
|
hw_management_independent_mode_update.thermal_data_clean_module.assert_called_once_with(0, 11)
|