2ffc9d572f
- Why I did it Optimize thermal control policies to simplify the logic and add more protection code in policies to make sure it works even if kernel algorithm does not work. - How I did it Reduce unused thermal policies Add timely ASIC temperature check in thermal policy to make sure ASIC temperature and fan speed is coordinated Minimum allowed fan speed now is calculated by max of the expected fan speed among all policies Move some logic from fan.py to thermal.py to make it more readable - How to verify it 1. Manual test 2. Regression
335 lines
12 KiB
Python
335 lines
12 KiB
Python
#!/usr/bin/env python
|
|
|
|
#############################################################################
|
|
# Mellanox
|
|
#
|
|
# Module contains an implementation of SONiC Platform Base API and
|
|
# provides the FANs status which are available in the platform
|
|
#
|
|
#############################################################################
|
|
|
|
import os.path
|
|
import subprocess
|
|
|
|
try:
|
|
from sonic_platform_base.fan_base import FanBase
|
|
|
|
from .led import SharedLed, ComponentFaultyIndicator
|
|
from .utils import read_int_from_file, read_str_from_file, write_file
|
|
from .thermal import Thermal
|
|
except ImportError as e:
|
|
raise ImportError (str(e) + "- required module not found")
|
|
|
|
LED_ON = 1
|
|
LED_OFF = 0
|
|
|
|
PWM_MAX = 255
|
|
|
|
FAN_PATH = "/var/run/hw-management/thermal/"
|
|
LED_PATH = "/var/run/hw-management/led/"
|
|
CONFIG_PATH = "/var/run/hw-management/config"
|
|
# fan_dir isn't supported on Spectrum 1. It is supported on Spectrum 2 and later switches
|
|
FAN_DIR = "/var/run/hw-management/system/fan_dir"
|
|
|
|
# Platforms with unplugable FANs:
|
|
# 1. don't have fanX_status and should be treated as always present
|
|
platform_with_unplugable_fan = ['x86_64-mlnx_msn2010-r0', 'x86_64-mlnx_msn2100-r0']
|
|
|
|
VIRTUAL_DRAWER_INDEX = 0
|
|
|
|
|
|
class Fan(FanBase):
|
|
"""Platform-specific Fan class"""
|
|
|
|
STATUS_LED_COLOR_ORANGE = "orange"
|
|
|
|
# Fan drawer leds
|
|
fan_drawer_leds = {}
|
|
|
|
# PSU fan speed vector
|
|
PSU_FAN_SPEED = ['0x3c', '0x3c', '0x3c', '0x3c', '0x3c',
|
|
'0x3c', '0x3c', '0x46', '0x50', '0x5a', '0x64']
|
|
|
|
def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, platform = None):
|
|
# API index is starting from 0, Mellanox platform index is starting from 1
|
|
self.index = fan_index + 1
|
|
self.drawer_index = drawer_index + 1
|
|
|
|
self.is_psu_fan = psu_fan
|
|
self.always_presence = False if platform not in platform_with_unplugable_fan else True
|
|
|
|
if not self.is_psu_fan:
|
|
self.fan_speed_get_path = "fan{}_speed_get".format(self.index)
|
|
self.fan_speed_set_path = "fan{}_speed_set".format(self.index)
|
|
self.fan_presence_path = "fan{}_status".format(self.drawer_index)
|
|
self.fan_max_speed_path = os.path.join(FAN_PATH, "fan{}_max".format(self.index))
|
|
self.fan_min_speed_path = os.path.join(FAN_PATH, "fan{}_min".format(self.index))
|
|
self._name = "fan{}".format(self.index)
|
|
else:
|
|
self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index)
|
|
self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index)
|
|
self._name = 'psu_{}_fan_{}'.format(self.index, 1)
|
|
self.fan_max_speed_path = os.path.join(FAN_PATH, "psu{}_fan_max".format(self.index))
|
|
self.fan_min_speed_path = os.path.join(FAN_PATH, "psu{}_fan_min".format(self.index))
|
|
self.psu_i2c_bus_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_bus'.format(self.index))
|
|
self.psu_i2c_addr_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_addr'.format(self.index))
|
|
self.psu_i2c_command_path = os.path.join(CONFIG_PATH, 'fan_command')
|
|
|
|
self.fan_status_path = "fan{}_fault".format(self.index)
|
|
|
|
if not self.is_psu_fan: # We don't support PSU led management in 201911
|
|
if not self.always_presence:
|
|
if self.drawer_index not in Fan.fan_drawer_leds:
|
|
shared_led = SharedLed()
|
|
Fan.fan_drawer_leds[self.drawer_index] = shared_led
|
|
else:
|
|
shared_led = Fan.fan_drawer_leds[self.drawer_index]
|
|
self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index)
|
|
self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index)
|
|
self.fan_orange_led_path = "led_fan{}_orange".format(self.drawer_index)
|
|
self.fan_led_cap_path = "led_fan{}_capability".format(self.drawer_index)
|
|
else: # For 2010/2100, all fans share one LED
|
|
if VIRTUAL_DRAWER_INDEX not in Fan.fan_drawer_leds:
|
|
shared_led = SharedLed()
|
|
Fan.fan_drawer_leds[VIRTUAL_DRAWER_INDEX] = shared_led
|
|
else:
|
|
shared_led = Fan.fan_drawer_leds[VIRTUAL_DRAWER_INDEX]
|
|
self.fan_green_led_path = "led_fan_green"
|
|
self.fan_red_led_path = "led_fan_red"
|
|
self.fan_orange_led_path = "led_fan_orange"
|
|
self.fan_led_cap_path = "led_fan_capability"
|
|
|
|
self.fault_indicator = ComponentFaultyIndicator(shared_led)
|
|
|
|
if has_fan_dir:
|
|
self.fan_dir = FAN_DIR
|
|
else:
|
|
self.fan_dir = None
|
|
|
|
|
|
def get_direction(self):
|
|
"""
|
|
Retrieves the fan's direction
|
|
|
|
Returns:
|
|
A string, either FAN_DIRECTION_INTAKE or FAN_DIRECTION_EXHAUST
|
|
depending on fan direction
|
|
|
|
Notes:
|
|
What Mellanox calls forward:
|
|
Air flows from fans side to QSFP side, for example: MSN2700-CS2F
|
|
which means intake in community
|
|
What Mellanox calls reverse:
|
|
Air flow from QSFP side to fans side, for example: MSN2700-CS2R
|
|
which means exhaust in community
|
|
According to hw-mgmt:
|
|
1 stands for forward, in other words intake
|
|
0 stands for reverse, in other words exhaust
|
|
"""
|
|
if not self.fan_dir or self.is_psu_fan or not self.get_presence():
|
|
return self.FAN_DIRECTION_NOT_APPLICABLE
|
|
|
|
try:
|
|
with open(os.path.join(self.fan_dir), 'r') as fan_dir:
|
|
fan_dir_bits = int(fan_dir.read().strip())
|
|
fan_mask = 1 << self.drawer_index - 1
|
|
if fan_dir_bits & fan_mask:
|
|
return self.FAN_DIRECTION_INTAKE
|
|
else:
|
|
return self.FAN_DIRECTION_EXHAUST
|
|
except (ValueError, IOError) as e:
|
|
raise RuntimeError("Failed to read fan direction status to {}".format(repr(e)))
|
|
|
|
|
|
def get_name(self):
|
|
return self._name
|
|
|
|
def get_status(self):
|
|
"""
|
|
Retrieves the operational status of fan
|
|
|
|
Returns:
|
|
bool: True if fan is operating properly, False if not
|
|
"""
|
|
status = 0
|
|
if self.is_psu_fan:
|
|
status = 0
|
|
else:
|
|
status = read_int_from_file(os.path.join(FAN_PATH, self.fan_status_path), 1)
|
|
|
|
return status == 0
|
|
|
|
def get_presence(self):
|
|
"""
|
|
Retrieves the presence status of fan
|
|
|
|
Returns:
|
|
bool: True if fan is present, False if not
|
|
"""
|
|
status = 0
|
|
if self.is_psu_fan:
|
|
if os.path.exists(os.path.join(FAN_PATH, self.fan_presence_path)):
|
|
status = 1
|
|
else:
|
|
status = 0
|
|
else:
|
|
if self.always_presence:
|
|
status = 1
|
|
else:
|
|
status = read_int_from_file(os.path.join(FAN_PATH, self.fan_presence_path), 0)
|
|
|
|
return status == 1
|
|
|
|
def get_speed(self):
|
|
"""
|
|
Retrieves the speed of fan
|
|
|
|
Returns:
|
|
int: percentage of the max fan speed
|
|
"""
|
|
speed = 0
|
|
speed_in_rpm = read_int_from_file(os.path.join(FAN_PATH, self.fan_speed_get_path))
|
|
|
|
max_speed_in_rpm = read_int_from_file(self.fan_max_speed_path)
|
|
if max_speed_in_rpm == 0:
|
|
return speed_in_rpm
|
|
|
|
speed = 100*speed_in_rpm/max_speed_in_rpm
|
|
if speed > 100:
|
|
speed = 100
|
|
|
|
return speed
|
|
|
|
def get_target_speed(self):
|
|
"""
|
|
Retrieves the expected speed of fan
|
|
|
|
Returns:
|
|
int: percentage of the max fan speed
|
|
"""
|
|
if self.is_psu_fan:
|
|
try:
|
|
# Get PSU fan target speed according to current system cooling level
|
|
cooling_level = Thermal.get_cooling_level()
|
|
return int(self.PSU_FAN_SPEED[cooling_level], 16)
|
|
except Exception:
|
|
return self.get_speed()
|
|
|
|
pwm = read_int_from_file(os.path.join(FAN_PATH, self.fan_speed_set_path))
|
|
return int(round(pwm*100.0/PWM_MAX))
|
|
|
|
def set_speed(self, speed):
|
|
"""
|
|
Set fan speed to expected value
|
|
|
|
Args:
|
|
speed: An integer, the percentage of full fan speed to set fan to,
|
|
in the range 0 (off) to 100 (full speed)
|
|
|
|
Returns:
|
|
bool: True if set success, False if fail.
|
|
"""
|
|
status = True
|
|
|
|
if self.is_psu_fan:
|
|
if not self.get_presence():
|
|
return False
|
|
from .thermal import logger
|
|
try:
|
|
bus = read_str_from_file(self.psu_i2c_bus_path, raise_exception=True)
|
|
addr = read_str_from_file(self.psu_i2c_addr_path, raise_exception=True)
|
|
command = read_str_from_file(self.psu_i2c_command_path, raise_exception=True)
|
|
speed = Fan.PSU_FAN_SPEED[int(speed / 10)]
|
|
command = "i2cset -f -y {0} {1} {2} {3} wp".format(bus, addr, command, speed)
|
|
subprocess.check_call(command, shell = True)
|
|
return True
|
|
except subprocess.CalledProcessError as ce:
|
|
logger.log_error('Failed to call command {}, return code={}, command output={}'.format(ce.cmd, ce.returncode, ce.output))
|
|
return False
|
|
except Exception as e:
|
|
logger.log_error('Failed to set PSU FAN speed - {}'.format(e))
|
|
return False
|
|
|
|
try:
|
|
pwm = int(round(PWM_MAX*speed/100.0))
|
|
write_file(os.path.join(FAN_PATH, self.fan_speed_set_path), pwm, raise_exception=True)
|
|
except (ValueError, IOError):
|
|
status = False
|
|
|
|
return status
|
|
|
|
def _get_led_capability(self):
|
|
cap_list = None
|
|
try:
|
|
with open(os.path.join(LED_PATH, self.fan_led_cap_path), 'r') as fan_led_cap:
|
|
caps = fan_led_cap.read()
|
|
cap_list = caps.split()
|
|
except (ValueError, IOError):
|
|
status = 0
|
|
|
|
return cap_list
|
|
|
|
def set_status_led(self, color):
|
|
if self.is_psu_fan:
|
|
return False
|
|
self.fault_indicator.set_status(color)
|
|
if not self.always_presence:
|
|
target_color = Fan.fan_drawer_leds[self.drawer_index].get_status()
|
|
else:
|
|
target_color = Fan.fan_drawer_leds[VIRTUAL_DRAWER_INDEX].get_status()
|
|
return self._set_status_led(target_color)
|
|
|
|
def _set_status_led(self, color):
|
|
"""
|
|
Set led to expected color
|
|
|
|
Args:
|
|
color: A string representing the color with which to set the
|
|
fan module status LED
|
|
|
|
Returns:
|
|
bool: True if set success, False if fail.
|
|
"""
|
|
led_cap_list = self._get_led_capability()
|
|
if led_cap_list is None:
|
|
return False
|
|
|
|
status = False
|
|
try:
|
|
if color == 'green':
|
|
with open(os.path.join(LED_PATH, self.fan_green_led_path), 'w') as fan_led:
|
|
fan_led.write(str(LED_ON))
|
|
elif color == 'red':
|
|
# Some fan don't support red led but support orange led, in this case we set led to orange
|
|
if 'red' in led_cap_list:
|
|
led_path = os.path.join(LED_PATH, self.fan_red_led_path)
|
|
elif 'orange' in led_cap_list:
|
|
led_path = os.path.join(LED_PATH, self.fan_orange_led_path)
|
|
else:
|
|
return False
|
|
with open(led_path, 'w') as fan_led:
|
|
fan_led.write(str(LED_ON))
|
|
|
|
elif color == 'off':
|
|
with open(os.path.join(LED_PATH, self.fan_green_led_path), 'w') as fan_led:
|
|
fan_led.write(str(LED_OFF))
|
|
|
|
with open(os.path.join(LED_PATH, self.fan_red_led_path), 'w') as fan_led:
|
|
fan_led.write(str(LED_OFF))
|
|
else:
|
|
status = False
|
|
except (ValueError, IOError):
|
|
status = False
|
|
return status
|
|
|
|
def get_speed_tolerance(self):
|
|
"""
|
|
Retrieves the speed tolerance of the fan
|
|
|
|
Returns:
|
|
An integer, the percentage of variance from target speed which is
|
|
considered tolerable
|
|
"""
|
|
# The tolerance value is fixed as 50% for all the Mellanox platform
|
|
return 50
|