8e924b9a70
- Why I did it Optimize thermal control policies to simplify the logic and add more protection code in policies to make sure it works even if kernel algorithm does not work. - How I did it Reduce unused thermal policies Add timely ASIC temperature check in thermal policy to make sure ASIC temperature and fan speed is coordinated Minimum allowed fan speed now is calculated by max of the expected fan speed among all policies Move some logic from fan.py to thermal.py to make it more readable - How to verify it 1. Manual test 2. Regression
466 lines
16 KiB
Python
466 lines
16 KiB
Python
import os
|
|
import sys
|
|
import pytest
|
|
import json
|
|
from mock import MagicMock, patch
|
|
from .mock_platform import MockChassis, MockFan, MockFanDrawer, MockPsu
|
|
|
|
test_path = os.path.dirname(os.path.abspath(__file__))
|
|
modules_path = os.path.dirname(test_path)
|
|
sys.path.insert(0, modules_path)
|
|
|
|
from sonic_platform.thermal_manager import ThermalManager
|
|
from sonic_platform.thermal_infos import FanInfo, PsuInfo
|
|
from sonic_platform.thermal import Thermal, MAX_COOLING_LEVEL
|
|
|
|
|
|
@pytest.fixture(scope='session', autouse=True)
|
|
def thermal_manager():
|
|
policy_file = os.path.join(test_path, 'thermal_policy.json')
|
|
ThermalManager.load(policy_file)
|
|
return ThermalManager
|
|
|
|
|
|
def test_load_policy(thermal_manager):
|
|
assert 'psu_info' in thermal_manager._thermal_info_dict
|
|
assert 'fan_info' in thermal_manager._thermal_info_dict
|
|
assert 'chassis_info' in thermal_manager._thermal_info_dict
|
|
|
|
assert 'any fan absence' in thermal_manager._policy_dict
|
|
assert 'any psu absence' in thermal_manager._policy_dict
|
|
assert 'any fan broken' in thermal_manager._policy_dict
|
|
assert 'all fan and psu presence' in thermal_manager._policy_dict
|
|
|
|
assert thermal_manager._fan_speed_when_suspend == 60
|
|
assert thermal_manager._run_thermal_algorithm_at_boot_up == False
|
|
|
|
|
|
def test_fan_info():
|
|
chassis = MockChassis()
|
|
chassis.make_fan_absence()
|
|
fan_info = FanInfo()
|
|
fan_info.collect(chassis)
|
|
assert len(fan_info.get_absence_fans()) == 1
|
|
assert len(fan_info.get_presence_fans()) == 0
|
|
assert len(fan_info.get_fault_fans()) == 0
|
|
assert fan_info.is_status_changed()
|
|
|
|
chassis.get_all_fan_drawers()[0].get_all_fans()[0].presence = True
|
|
fan_info.collect(chassis)
|
|
assert len(fan_info.get_absence_fans()) == 0
|
|
assert len(fan_info.get_presence_fans()) == 1
|
|
assert len(fan_info.get_fault_fans()) == 0
|
|
assert fan_info.is_status_changed()
|
|
|
|
chassis.get_all_fan_drawers()[0].get_all_fans()[0].status = False
|
|
fan_info.collect(chassis)
|
|
assert len(fan_info.get_absence_fans()) == 0
|
|
assert len(fan_info.get_presence_fans()) == 1
|
|
assert len(fan_info.get_fault_fans()) == 1
|
|
assert fan_info.is_status_changed()
|
|
|
|
def test_psu_info():
|
|
chassis = MockChassis()
|
|
chassis.make_psu_absence()
|
|
psu_info = PsuInfo()
|
|
psu_info.collect(chassis)
|
|
assert len(psu_info.get_absence_psus()) == 1
|
|
assert len(psu_info.get_presence_psus()) == 0
|
|
assert psu_info.is_status_changed()
|
|
|
|
psu_list = chassis.get_all_psus()
|
|
psu_list[0].presence = True
|
|
psu_info.collect(chassis)
|
|
assert len(psu_info.get_absence_psus()) == 0
|
|
assert len(psu_info.get_presence_psus()) == 1
|
|
assert psu_info.is_status_changed()
|
|
|
|
psu_list[0].powergood = False
|
|
psu_info.collect(chassis)
|
|
assert len(psu_info.get_absence_psus()) == 0
|
|
assert len(psu_info.get_presence_psus()) == 1
|
|
assert not psu_info.is_status_changed()
|
|
|
|
|
|
@patch('sonic_platform.thermal.Thermal.monitor_asic_themal_zone', MagicMock())
|
|
@patch('sonic_platform.thermal.Thermal.get_min_allowed_cooling_level_by_thermal_zone', MagicMock(return_value=2))
|
|
@patch('sonic_platform.thermal.Thermal.get_cooling_level', MagicMock(return_value=6))
|
|
@patch('sonic_platform.thermal.Thermal.set_cooling_state')
|
|
@patch('sonic_platform.thermal.Thermal.set_cooling_level')
|
|
def test_fan_policy(mock_set_cooling_level, mock_set_cooling_state, thermal_manager):
|
|
chassis = MockChassis()
|
|
chassis.make_fan_absence()
|
|
chassis.get_all_fan_drawers()[0].get_all_fans().append(MockFan())
|
|
chassis.platform_name = 'some_platform'
|
|
|
|
thermal_manager.run_policy(chassis)
|
|
mock_set_cooling_level.assert_called_with(MAX_COOLING_LEVEL)
|
|
mock_set_cooling_state.assert_called_with(MAX_COOLING_LEVEL)
|
|
|
|
Thermal.expect_cooling_level = None
|
|
fan_list = chassis.get_all_fan_drawers()[0].get_all_fans()
|
|
fan_list[0].presence = True
|
|
thermal_manager.run_policy(chassis)
|
|
mock_set_cooling_level.assert_called_with(6)
|
|
mock_set_cooling_state.assert_called_with(6)
|
|
|
|
Thermal.expect_cooling_level = None
|
|
fan_list[0].status = False
|
|
thermal_manager.run_policy(chassis)
|
|
mock_set_cooling_level.assert_called_with(MAX_COOLING_LEVEL)
|
|
|
|
Thermal.expect_cooling_level = None
|
|
fan_list[0].status = True
|
|
thermal_manager.run_policy(chassis)
|
|
mock_set_cooling_level.assert_called_with(6)
|
|
mock_set_cooling_state.assert_called_with(6)
|
|
|
|
|
|
@patch('sonic_platform.thermal.Thermal.monitor_asic_themal_zone', MagicMock())
|
|
@patch('sonic_platform.thermal.Thermal.get_min_allowed_cooling_level_by_thermal_zone', MagicMock(return_value=2))
|
|
@patch('sonic_platform.thermal.Thermal.get_cooling_level', MagicMock(return_value=6))
|
|
@patch('sonic_platform.thermal.Thermal.set_cooling_state')
|
|
@patch('sonic_platform.thermal.Thermal.set_cooling_level')
|
|
def test_psu_policy(mock_set_cooling_level, mock_set_cooling_state, thermal_manager):
|
|
chassis = MockChassis()
|
|
chassis.make_psu_absence()
|
|
chassis.platform_name = 'some_platform'
|
|
thermal_manager.run_policy(chassis)
|
|
mock_set_cooling_level.assert_called_with(MAX_COOLING_LEVEL)
|
|
mock_set_cooling_state.assert_called_with(MAX_COOLING_LEVEL)
|
|
|
|
psu_list = chassis.get_all_psus()
|
|
psu_list[0].presence = True
|
|
thermal_manager.run_policy(chassis)
|
|
mock_set_cooling_level.assert_called_with(6)
|
|
mock_set_cooling_state.assert_called_with(6)
|
|
|
|
|
|
def test_any_fan_absence_condition():
|
|
chassis = MockChassis()
|
|
chassis.make_fan_absence()
|
|
fan_info = FanInfo()
|
|
fan_info.collect(chassis)
|
|
|
|
from sonic_platform.thermal_conditions import AnyFanAbsenceCondition
|
|
condition = AnyFanAbsenceCondition()
|
|
assert condition.is_match({'fan_info': fan_info})
|
|
|
|
fan = chassis.get_all_fan_drawers()[0].get_all_fans()[0]
|
|
fan.presence = True
|
|
fan_info.collect(chassis)
|
|
assert not condition.is_match({'fan_info': fan_info})
|
|
|
|
|
|
def test_all_fan_absence_condition():
|
|
chassis = MockChassis()
|
|
chassis.make_fan_absence()
|
|
fan = MockFan()
|
|
fan_list = chassis.get_all_fan_drawers()[0].get_all_fans()
|
|
fan_list.append(fan)
|
|
fan_info = FanInfo()
|
|
fan_info.collect(chassis)
|
|
|
|
from sonic_platform.thermal_conditions import AllFanAbsenceCondition
|
|
condition = AllFanAbsenceCondition()
|
|
assert not condition.is_match({'fan_info': fan_info})
|
|
|
|
fan.presence = False
|
|
fan_info.collect(chassis)
|
|
assert condition.is_match({'fan_info': fan_info})
|
|
|
|
|
|
def test_all_fan_presence_condition():
|
|
chassis = MockChassis()
|
|
chassis.make_fan_absence()
|
|
fan = MockFan()
|
|
fan_list = chassis.get_all_fan_drawers()[0].get_all_fans()
|
|
fan_list.append(fan)
|
|
fan_info = FanInfo()
|
|
fan_info.collect(chassis)
|
|
|
|
from sonic_platform.thermal_conditions import AllFanPresenceCondition
|
|
condition = AllFanPresenceCondition()
|
|
assert not condition.is_match({'fan_info': fan_info})
|
|
|
|
fan_list[0].presence = True
|
|
fan_info.collect(chassis)
|
|
assert condition.is_match({'fan_info': fan_info})
|
|
|
|
def test_any_fan_fault_condition():
|
|
chassis = MockChassis()
|
|
chassis.get_all_fan_drawers().append(MockFanDrawer())
|
|
fan = MockFan()
|
|
fan_list = chassis.get_all_fan_drawers()[0].get_all_fans()
|
|
fan_list.append(fan)
|
|
fault_fan = MockFan()
|
|
fault_fan.status = False
|
|
fan_list.append(fault_fan)
|
|
fan_info = FanInfo()
|
|
fan_info.collect(chassis)
|
|
|
|
from sonic_platform.thermal_conditions import AnyFanFaultCondition
|
|
condition = AnyFanFaultCondition()
|
|
assert condition.is_match({'fan_info': fan_info})
|
|
|
|
fault_fan.status = True
|
|
fan_info.collect(chassis)
|
|
assert not condition.is_match({'fan_info': fan_info})
|
|
|
|
def test_all_fan_good_condition():
|
|
chassis = MockChassis()
|
|
chassis.get_all_fan_drawers().append(MockFanDrawer())
|
|
fan = MockFan()
|
|
fan_list = chassis.get_all_fan_drawers()[0].get_all_fans()
|
|
fan_list.append(fan)
|
|
fault_fan = MockFan()
|
|
fault_fan.status = False
|
|
fan_list.append(fault_fan)
|
|
fan_info = FanInfo()
|
|
fan_info.collect(chassis)
|
|
|
|
from sonic_platform.thermal_conditions import AllFanGoodCondition
|
|
condition = AllFanGoodCondition()
|
|
assert not condition.is_match({'fan_info': fan_info})
|
|
|
|
fault_fan.status = True
|
|
fan_info.collect(chassis)
|
|
assert condition.is_match({'fan_info': fan_info})
|
|
|
|
|
|
def test_any_psu_absence_condition():
|
|
chassis = MockChassis()
|
|
chassis.make_psu_absence()
|
|
psu_info = PsuInfo()
|
|
psu_info.collect(chassis)
|
|
|
|
from sonic_platform.thermal_conditions import AnyPsuAbsenceCondition
|
|
condition = AnyPsuAbsenceCondition()
|
|
assert condition.is_match({'psu_info': psu_info})
|
|
|
|
psu = chassis.get_all_psus()[0]
|
|
psu.presence = True
|
|
psu_info.collect(chassis)
|
|
assert not condition.is_match({'psu_info': psu_info})
|
|
|
|
|
|
def test_all_psu_absence_condition():
|
|
chassis = MockChassis()
|
|
chassis.make_psu_absence()
|
|
psu = MockPsu()
|
|
psu_list = chassis.get_all_psus()
|
|
psu_list.append(psu)
|
|
psu_info = PsuInfo()
|
|
psu_info.collect(chassis)
|
|
|
|
from sonic_platform.thermal_conditions import AllPsuAbsenceCondition
|
|
condition = AllPsuAbsenceCondition()
|
|
assert not condition.is_match({'psu_info': psu_info})
|
|
|
|
psu.presence = False
|
|
psu_info.collect(chassis)
|
|
assert condition.is_match({'psu_info': psu_info})
|
|
|
|
|
|
def test_all_fan_presence_condition():
|
|
chassis = MockChassis()
|
|
chassis.make_psu_absence()
|
|
psu = MockPsu()
|
|
psu_list = chassis.get_all_psus()
|
|
psu_list.append(psu)
|
|
psu_info = PsuInfo()
|
|
psu_info.collect(chassis)
|
|
|
|
from sonic_platform.thermal_conditions import AllPsuPresenceCondition
|
|
condition = AllPsuPresenceCondition()
|
|
assert not condition.is_match({'psu_info': psu_info})
|
|
|
|
psu_list[0].presence = True
|
|
psu_info.collect(chassis)
|
|
assert condition.is_match({'psu_info': psu_info})
|
|
|
|
|
|
def test_load_set_fan_speed_action():
|
|
from sonic_platform.thermal_actions import SetAllFanSpeedAction
|
|
action = SetAllFanSpeedAction()
|
|
json_str = '{\"speed\": \"50\"}'
|
|
json_obj = json.loads(json_str)
|
|
action.load_from_json(json_obj)
|
|
assert action.speed == 50
|
|
|
|
json_str = '{\"speed\": \"-1\"}'
|
|
json_obj = json.loads(json_str)
|
|
with pytest.raises(ValueError):
|
|
action.load_from_json(json_obj)
|
|
|
|
json_str = '{\"speed\": \"101\"}'
|
|
json_obj = json.loads(json_str)
|
|
with pytest.raises(ValueError):
|
|
action.load_from_json(json_obj)
|
|
|
|
json_str = '{\"invalid\": \"101\"}'
|
|
json_obj = json.loads(json_str)
|
|
with pytest.raises(ValueError):
|
|
action.load_from_json(json_obj)
|
|
|
|
|
|
@patch('sonic_platform.thermal.Thermal.set_cooling_level', MagicMock())
|
|
def test_execute_set_fan_speed_action():
|
|
chassis = MockChassis()
|
|
chassis.get_all_fan_drawers().append(MockFanDrawer())
|
|
fan_list = chassis.get_all_fan_drawers()[0].get_all_fans()
|
|
fan_list.append(MockFan())
|
|
fan_list.append(MockFan())
|
|
fan_info = FanInfo()
|
|
fan_info.collect(chassis)
|
|
|
|
Thermal.expect_cooling_level = None
|
|
from sonic_platform.thermal_actions import SetAllFanSpeedAction
|
|
action = SetAllFanSpeedAction()
|
|
action.speed = 20
|
|
action.execute({'fan_info': fan_info})
|
|
assert Thermal.expect_cooling_level == 2
|
|
|
|
|
|
def test_load_duplicate_condition():
|
|
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
|
|
with open(os.path.join(test_path, 'duplicate_condition.json')) as f:
|
|
json_obj = json.load(f)
|
|
policy = ThermalPolicy()
|
|
with pytest.raises(Exception):
|
|
policy.load_from_json(json_obj)
|
|
|
|
def test_load_duplicate_action():
|
|
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
|
|
with open(os.path.join(test_path, 'duplicate_action.json')) as f:
|
|
json_obj = json.load(f)
|
|
policy = ThermalPolicy()
|
|
with pytest.raises(Exception):
|
|
policy.load_from_json(json_obj)
|
|
|
|
def test_load_empty_condition():
|
|
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
|
|
with open(os.path.join(test_path, 'empty_condition.json')) as f:
|
|
json_obj = json.load(f)
|
|
policy = ThermalPolicy()
|
|
with pytest.raises(Exception):
|
|
policy.load_from_json(json_obj)
|
|
|
|
def test_load_empty_action():
|
|
from sonic_platform_base.sonic_thermal_control.thermal_policy import ThermalPolicy
|
|
with open(os.path.join(test_path, 'empty_action.json')) as f:
|
|
json_obj = json.load(f)
|
|
policy = ThermalPolicy()
|
|
with pytest.raises(Exception):
|
|
policy.load_from_json(json_obj)
|
|
|
|
def test_load_policy_with_same_conditions():
|
|
from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase
|
|
class MockThermalManager(ThermalManagerBase):
|
|
pass
|
|
|
|
with pytest.raises(Exception):
|
|
MockThermalManager.load(os.path.join(test_path, 'policy_with_same_conditions.json'))
|
|
|
|
def test_dynamic_minimum_table_data():
|
|
from sonic_platform.device_data import DEVICE_DATA
|
|
for platform, platform_data in DEVICE_DATA.items():
|
|
if 'thermal' in platform_data and 'minimum_table' in platform_data['thermal']:
|
|
minimum_table = platform_data['thermal']['minimum_table']
|
|
check_minimum_table_data(platform, minimum_table)
|
|
|
|
def check_minimum_table_data(platform, minimum_table):
|
|
valid_dir = ['p2c', 'c2p', 'unk']
|
|
valid_trust_state = ['trust', 'untrust']
|
|
|
|
for category, data in minimum_table.items():
|
|
key_data = category.split('_')
|
|
assert key_data[0] in valid_dir
|
|
assert key_data[1] in valid_trust_state
|
|
|
|
data_list = [(value, key) for key, value in data.items()]
|
|
data_list.sort(key=lambda x : x[0])
|
|
|
|
previous_edge = None
|
|
previous_cooling_level = None
|
|
for item in data_list:
|
|
cooling_level = item[0]
|
|
range_str = item[1]
|
|
|
|
ranges = range_str.split(':')
|
|
low = int(ranges[0])
|
|
high = int(ranges[1])
|
|
assert low < high
|
|
|
|
if previous_edge is None:
|
|
assert low == -127
|
|
else:
|
|
assert low - previous_edge == 1, '{}-{}-{} error, item={}'.format(platform, key_data[0], key_data[1], item)
|
|
previous_edge = high
|
|
|
|
assert 10 <= cooling_level <= 20
|
|
if previous_cooling_level is not None:
|
|
assert cooling_level > previous_cooling_level
|
|
previous_cooling_level = cooling_level
|
|
|
|
|
|
@patch('sonic_platform.thermal.Thermal.monitor_asic_themal_zone', MagicMock())
|
|
@patch('sonic_platform.thermal.Thermal.get_min_allowed_cooling_level_by_thermal_zone')
|
|
@patch('sonic_platform.thermal.Thermal.get_min_amb_temperature')
|
|
@patch('sonic_platform.thermal.Thermal.check_module_temperature_trustable')
|
|
def test_thermal_recover_policy(mock_check_trustable, mock_get_min_amb, moc_get_min_allowed):
|
|
from sonic_platform.thermal_infos import ChassisInfo
|
|
from sonic_platform.thermal_actions import ThermalRecoverAction
|
|
chassis = MockChassis()
|
|
chassis.platform_name = 'invalid'
|
|
info = ChassisInfo()
|
|
info._chassis = chassis
|
|
thermal_info_dict = {ChassisInfo.INFO_NAME: info}
|
|
|
|
Thermal.expect_cooling_level = None
|
|
action = ThermalRecoverAction()
|
|
moc_get_min_allowed.return_value = 2
|
|
action.execute(thermal_info_dict)
|
|
assert Thermal.expect_cooling_level == 6
|
|
Thermal.last_set_cooling_level = Thermal.expect_cooling_level
|
|
|
|
Thermal.expect_cooling_level = None
|
|
chassis.platform_name = 'x86_64-mlnx_msn2700-r0'
|
|
mock_check_trustable.return_value = 'trust'
|
|
mock_get_min_amb.return_value = 29999
|
|
moc_get_min_allowed.return_value = None
|
|
action.execute(thermal_info_dict)
|
|
assert Thermal.expect_cooling_level is None
|
|
|
|
moc_get_min_allowed.return_value = 4
|
|
action.execute(thermal_info_dict)
|
|
assert Thermal.expect_cooling_level == 4
|
|
Thermal.last_set_cooling_level = Thermal.expect_cooling_level
|
|
|
|
mock_check_trustable.return_value = 'untrust'
|
|
mock_get_min_amb.return_value = 31001
|
|
action.execute(thermal_info_dict)
|
|
assert Thermal.expect_cooling_level == 5
|
|
|
|
|
|
@patch('sonic_platform.utils.read_int_from_file')
|
|
def test_monitor_asic_themal_zone(mock_read_int):
|
|
mock_read_int.side_effect = [111000, 105000]
|
|
Thermal.monitor_asic_themal_zone()
|
|
assert Thermal.expect_cooling_state == MAX_COOLING_LEVEL
|
|
mock_read_int.reset()
|
|
mock_read_int.side_effect = [104000, 105000]
|
|
Thermal.monitor_asic_themal_zone()
|
|
assert Thermal.expect_cooling_state is None
|
|
|
|
|
|
def test_set_expect_cooling_level():
|
|
Thermal.set_expect_cooling_level(5)
|
|
assert Thermal.expect_cooling_level == 5
|
|
|
|
Thermal.set_expect_cooling_level(3)
|
|
assert Thermal.expect_cooling_level == 5
|
|
|
|
Thermal.set_expect_cooling_level(10)
|
|
assert Thermal.expect_cooling_level == 10
|