[Mellanox] Adjust log level to avoid too many thermal logs (#4631)

* Trigger thermal action log only if thermal condition changes
* test file existence before read file content
* fix error for set psu fan speed
* Remove logs because it print too frequently
This commit is contained in:
Junchao-Mellanox 2020-05-27 01:45:25 +08:00 committed by GitHub
parent 767152f09b
commit f277d13cd6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 19 additions and 13 deletions

View File

@ -221,6 +221,8 @@ class Fan(FanBase):
status = True status = True
if self.is_psu_fan: if self.is_psu_fan:
if not self.get_presence():
return False
from .thermal import logger from .thermal import logger
try: try:
with open(self.psu_i2c_bus_path, 'r') as f: with open(self.psu_i2c_bus_path, 'r') as f:

View File

@ -122,6 +122,8 @@ class Psu(PsuBase):
""" """
result = 0 result = 0
try: try:
if not os.path.exists(filename):
return result
with open(filename, 'r') as fileobj: with open(filename, 'r') as fileobj:
result = int(fileobj.read().strip()) result = int(fileobj.read().strip())
except Exception as e: except Exception as e:

View File

@ -495,12 +495,15 @@ class Thermal(ThermalBase):
We usually disable the algorithm when we want to set a fix speed. E.g, when We usually disable the algorithm when we want to set a fix speed. E.g, when
a fan unit is removed from system, we will set fan speed to 100% and disable a fan unit is removed from system, we will set fan speed to 100% and disable
the algorithm to avoid it adjust the speed. the algorithm to avoid it adjust the speed.
Returns:
True if thermal algorithm status changed.
""" """
if not cls.thermal_profile: if not cls.thermal_profile:
raise Exception("Fail to get thermal profile for this switch") raise Exception("Fail to get thermal profile for this switch")
if not force and cls.thermal_algorithm_status == status: if not force and cls.thermal_algorithm_status == status:
return return False
cls.thermal_algorithm_status = status cls.thermal_algorithm_status = status
content = "enabled" if status else "disabled" content = "enabled" if status else "disabled"
@ -521,6 +524,7 @@ class Thermal(ThermalBase):
for index in range(count): for index in range(count):
cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_MODE), content) cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_MODE), content)
cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_POLICY), policy) cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_POLICY), policy)
return True
@classmethod @classmethod
def check_thermal_zone_temperature(cls): def check_thermal_zone_temperature(cls):

View File

@ -66,9 +66,6 @@ class SetAllFanSpeedAction(SetFanSpeedAction):
for psu_fan in psu.get_all_fans(): for psu_fan in psu.get_all_fans():
psu_fan.set_speed(speed) psu_fan.set_speed(speed)
logger.log_info('Updated PSU FAN speed to {}%'.format(speed))
@thermal_json_object('fan.all.check_and_set_speed') @thermal_json_object('fan.all.check_and_set_speed')
class CheckAndSetAllFanSpeedAction(SetAllFanSpeedAction): class CheckAndSetAllFanSpeedAction(SetAllFanSpeedAction):
@ -131,14 +128,17 @@ class ControlThermalAlgoAction(ThermalPolicyActionBase):
from .thermal import Thermal from .thermal import Thermal
from .thermal_conditions import UpdateCoolingLevelToMinCondition from .thermal_conditions import UpdateCoolingLevelToMinCondition
from .fan import Fan from .fan import Fan
Thermal.set_thermal_algorithm_status(self.status, False) status_changed = Thermal.set_thermal_algorithm_status(self.status, False)
if self.status:
# Check thermal zone temperature, if all thermal zone temperature
# back to normal, set it to minimum allowed speed to
# save power
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
logger.log_info('Changed thermal algorithm status to {}'.format(self.status)) # Only update cooling level if thermal algorithm status changed
if status_changed:
if self.status:
# Check thermal zone temperature, if all thermal zone temperature
# back to normal, set it to minimum allowed speed to
# save power
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
logger.log_info('Changed thermal algorithm status to {}'.format(self.status))
class ChangeMinCoolingLevelAction(ThermalPolicyActionBase): class ChangeMinCoolingLevelAction(ThermalPolicyActionBase):
@ -174,8 +174,6 @@ class ChangeMinCoolingLevelAction(ThermalPolicyActionBase):
Fan.set_cooling_level(Fan.min_cooling_level, current_cooling_level) Fan.set_cooling_level(Fan.min_cooling_level, current_cooling_level)
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict) UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
logger.log_info('Changed minimum cooling level to {}'.format(Fan.min_cooling_level))
class UpdatePsuFanSpeedAction(ThermalPolicyActionBase): class UpdatePsuFanSpeedAction(ThermalPolicyActionBase):
def execute(self, thermal_info_dict): def execute(self, thermal_info_dict):