[Accton] AS4630-54TE Support SystemHealthMonitor (#8183)

1. Implement FanDrawer-Fan hierarchy.
2. Enable thermalctld, disable pcied.
3. Implement SystemLED in Chassis.
4. Correct Fan direction
5. Implement require Fan APIs for SystemHealthMonitoring.
6. Handle non-ascii character while reading PSU model/serial num.

```
Check if System-health can pass the check and display the SystemLED correctly.


///////// booting, DIAG_LED = GREEN_BLINKING /////////

root@sonic:/tmp# show system-health detail 
System is currently booting...
root@sonic:/tmp# cat /sys/class/leds/diag/brightness 
5


///////// container_checker fail, DIAG_LED = AMBER /////////

root@sonic:/sys/bus/i2c/devices# show system-health detail
System status summary

  System status LED  STATUS_LED_COLOR_AMBER
  Services:
    Status: OK
  Hardware:
    Status: Not OK
    Reasons: container_checker is not Status ok

System services and devices monitor list

Name                        Status    Type
--------------------------  --------  ----------
container_checker           Not OK    Program
sonic                       OK        System
rsyslog                     OK        Process
root-overlay                OK        Filesystem
var-log                     OK        Filesystem
routeCheck                  OK        Program
diskCheck                   OK        Program
container_memory_telemetry  OK        Program
FAN-1F                      OK        Fan
FAN-1R                      OK        Fan
FAN-2F                      OK        Fan
FAN-2R                      OK        Fan
FAN-3F                      OK        Fan
FAN-3R                      OK        Fan
PSU-1 FAN-1                 OK        Fan
PSU-2 FAN-1                 OK        Fan
PSU 1                       OK        PSU
PSU 2                       OK        PSU

System services and devices ignore list

Name             Status    Type
---------------  --------  ------
asic             Ignored   Device
psu.temperature  Ignored   Device

///////// skip container_checker, DIAG_LED = GREEN /////////

root@sonic:/sys/bus/i2c/devices# vi /usr/share/sonic/device/x86_64-accton_as4630_54te-r0/system_health_monitoring_config.json 
root@sonic:/sys/bus/i2c/devices# 
root@sonic:/sys/bus/i2c/devices# 
root@sonic:/sys/bus/i2c/devices# show system-health detail
System status summary

  System status LED  STATUS_LED_COLOR_GREEN
  Services:
    Status: OK
  Hardware:
    Status: OK

System services and devices monitor list

Name                        Status    Type
--------------------------  --------  ----------
sonic                       OK        System
rsyslog                     OK        Process
root-overlay                OK        Filesystem
var-log                     OK        Filesystem
routeCheck                  OK        Program
diskCheck                   OK        Program
container_memory_telemetry  OK        Program
FAN-1F                      OK        Fan
FAN-1R                      OK        Fan
FAN-2F                      OK        Fan
FAN-2R                      OK        Fan
FAN-3F                      OK        Fan
FAN-3R                      OK        Fan
PSU-1 FAN-1                 OK        Fan
PSU-2 FAN-1                 OK        Fan
PSU 1                       OK        PSU
PSU 2                       OK        PSU

System services and devices ignore list

Name               Status    Type
-----------------  --------  -------
container_checker  Ignored   Service
psu.temperature    Ignored   Device
asic               Ignored   Device
```


Signed-off-by: Sean Wu <sean_wu@edge-core.com>
This commit is contained in:
SeanWu 2021-07-25 07:27:34 +08:00 committed by GitHub
parent aa59bfeab7
commit fed8957b47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 333 additions and 27 deletions

View File

@ -1,5 +1,5 @@
{
"skip_ledd": true,
"skip_thermalctld": true
"skip_pcied": true
}

View File

@ -1,2 +1,2 @@
__all__ = ['chassis', 'eeprom', 'platform', 'psu', 'sfp', 'thermal', 'fan']
__all__ = [ "platform", "chassis", "sfp", "eeprom", "component", "psu", "thermal", "fan", "fan_drawer" ]
from . import platform

View File

@ -27,6 +27,13 @@ PMON_REBOOT_CAUSE_PATH = "/usr/share/sonic/platform/api_files/reboot-cause/"
REBOOT_CAUSE_FILE = "reboot-cause.txt"
PREV_REBOOT_CAUSE_FILE = "previous-reboot-cause.txt"
HOST_CHK_CMD = "docker > /dev/null 2>&1"
SYSLED_FNODE = "/sys/class/leds/diag/brightness"
SYSLED_MODES = {
"0" : "STATUS_LED_COLOR_OFF",
"1" : "STATUS_LED_COLOR_GREEN",
"2" : "STATUS_LED_COLOR_AMBER",
"5" : "STATUS_LED_COLOR_GREEN_BLINK"
}
class Chassis(ChassisBase):
@ -55,12 +62,11 @@ class Chassis(ChassisBase):
self.sfp_module_initialized = True
def __initialize_fan(self):
from sonic_platform.fan import Fan
for fant_index in range(0, NUM_FAN_TRAY):
for fan_index in range(0, NUM_FAN):
fan = Fan(fant_index, fan_index)
self._fan_list.append(fan)
from sonic_platform.fan_drawer import FanDrawer
for fant_index in range(NUM_FAN_TRAY):
fandrawer = FanDrawer(fant_index)
self._fan_drawer_list.append(fandrawer)
self._fan_list.extend(fandrawer._fan_list)
def __initialize_psu(self):
from sonic_platform.psu import Psu
for index in range(0, NUM_PSU):
@ -192,3 +198,39 @@ class Chassis(ChassisBase):
sys.stderr.write("SFP index {} out of range (1-{})\n".format(
index, len(self._sfp_list)))
return sfp
def get_position_in_parent(self):
"""
Retrieves 1-based relative physical position in parent device. If the agent cannot determine the parent-relative position
for some reason, or if the associated value of entPhysicalContainedIn is '0', then the value '-1' is returned
Returns:
integer: The 1-based relative physical position in parent device or -1 if cannot determine the position
"""
return -1
def is_replaceable(self):
"""
Indicate whether this device is replaceable.
Returns:
bool: True if it is replaceable.
"""
return False
def initizalize_system_led(self):
return True
def get_status_led(self):
val = self._api_helper.read_txt_file(SYSLED_FNODE)
return SYSLED_MODES[val] if val in SYSLED_MODES else "UNKNOWN"
def set_status_led(self, color):
mode = None
for key, val in SYSLED_MODES.items():
if val == color:
mode = key
break
if mode is None:
return False
else:
return self._api_helper.write_txt_file(SYSLED_FNODE, mode)

View File

@ -13,10 +13,10 @@ except ImportError as e:
raise ImportError(str(e) + "- required module not found")
PSU_FAN_MAX_RPM = 26688
CPLD_I2C_PATH = "/sys/bus/i2c/devices/3-0060/fan_"
PSU_HWMON_I2C_PATH ="/sys/bus/i2c/devices/{}-00{}/"
PSU_I2C_MAPPING = {
SPEED_TOLERANCE = 15
CPLD_FAN_I2C_PATH = "/sys/bus/i2c/devices/3-0060/fan_"
I2C_PATH ="/sys/bus/i2c/devices/{}-00{}/"
PSU_HWMON_I2C_MAPPING = {
0: {
"num": 10,
"addr": "58"
@ -27,6 +27,20 @@ PSU_I2C_MAPPING = {
},
}
PSU_CPLD_I2C_MAPPING = {
0: {
"num": 10,
"addr": "50"
},
1: {
"num": 11,
"addr": "51"
},
}
FAN_NAME_LIST = ["FAN-1F", "FAN-1R", "FAN-2F", "FAN-2R",
"FAN-3F", "FAN-3R"]
class Fan(FanBase):
"""Platform-specific Fan class"""
@ -39,9 +53,14 @@ class Fan(FanBase):
if self.is_psu_fan:
self.psu_index = psu_index
self.psu_i2c_num = PSU_I2C_MAPPING[self.psu_index]['num']
self.psu_i2c_addr = PSU_I2C_MAPPING[self.psu_index]['addr']
self.psu_hwmon_path = PSU_HWMON_I2C_PATH.format(
self.psu_i2c_num = PSU_HWMON_I2C_MAPPING[self.psu_index]['num']
self.psu_i2c_addr = PSU_HWMON_I2C_MAPPING[self.psu_index]['addr']
self.psu_hwmon_path = I2C_PATH.format(
self.psu_i2c_num, self.psu_i2c_addr)
self.psu_i2c_num = PSU_CPLD_I2C_MAPPING[self.psu_index]['num']
self.psu_i2c_addr = PSU_CPLD_I2C_MAPPING[self.psu_index]['addr']
self.psu_cpld_path = I2C_PATH.format(
self.psu_i2c_num, self.psu_i2c_addr)
FanBase.__init__(self)
@ -57,10 +76,10 @@ class Fan(FanBase):
if not self.is_psu_fan:
dir_str = "{}{}{}".format(CPLD_I2C_PATH, 'direction_', self.fan_tray_index)
dir_str = "{}{}{}".format(CPLD_FAN_I2C_PATH, 'direction_', self.fan_tray_index+1)
val=self._api_helper.read_txt_file(dir_str)
if val is not None:
if val==0:#F2B
if int(val, 10)==0:#F2B
direction=self.FAN_DIRECTION_EXHAUST
else:
direction=self.FAN_DIRECTION_INTAKE
@ -99,7 +118,7 @@ class Fan(FanBase):
else:
return 0
elif self.get_presence():
speed_path = "{}{}".format(CPLD_I2C_PATH, 'duty_cycle_percentage')
speed_path = "{}{}".format(CPLD_FAN_I2C_PATH, 'duty_cycle_percentage')
speed=self._api_helper.read_txt_file(speed_path)
if speed is None:
return 0
@ -116,7 +135,7 @@ class Fan(FanBase):
0 : when PWM mode is use
pwm : when pwm mode is not use
"""
return False #Not supported
return self.get_speed()
def get_speed_tolerance(self):
"""
@ -125,7 +144,7 @@ class Fan(FanBase):
An integer, the percentage of variance from target speed which is
considered tolerable
"""
return False #Not supported
return SPEED_TOLERANCE
def set_speed(self, speed):
"""
@ -138,7 +157,7 @@ class Fan(FanBase):
"""
if not self.is_psu_fan and self.get_presence():
speed_path = "{}{}".format(CPLD_I2C_PATH, 'duty_cycle_percentage')
speed_path = "{}{}".format(CPLD_FAN_I2C_PATH, 'duty_cycle_percentage')
return self._api_helper.write_txt_file(speed_path, int(speed))
return False
@ -154,18 +173,109 @@ class Fan(FanBase):
"""
return False #Not supported
def get_status_led(self):
"""
Gets the state of the fan status LED
Returns:
A string, one of the predefined STATUS_LED_COLOR_* strings above
"""
status=self.get_presence()
if status is None:
return self.STATUS_LED_COLOR_OFF
return {
1: self.STATUS_LED_COLOR_GREEN,
0: self.STATUS_LED_COLOR_RED
}.get(status, self.STATUS_LED_COLOR_OFF)
def get_name(self):
"""
Retrieves the name of the device
Returns:
string: The name of the device
"""
fan_name = FAN_NAME_LIST[self.fan_tray_index*2 + self.fan_index] \
if not self.is_psu_fan \
else "PSU-{} FAN-{}".format(self.psu_index+1, self.fan_index+1)
return fan_name
def get_presence(self):
"""
Retrieves the presence of the FAN
Returns:
bool: True if FAN is present, False if not
"""
present_path = "{}{}{}".format(CPLD_I2C_PATH, 'present_', self.fan_index+1)
if self.is_psu_fan:
present_path="{}{}".format(self.psu_cpld_path, 'psu_present')
else:
present_path = "{}{}{}".format(CPLD_FAN_I2C_PATH, 'present_', self.fan_tray_index+1)
val=self._api_helper.read_txt_file(present_path)
if not self.is_psu_fan:
if val is not None:
return int(val, 10)==1
else:
return False
def get_status(self):
"""
Retrieves the operational status of the device
Returns:
A boolean value, True if device is operating properly, False if not
"""
if self.is_psu_fan:
psu_fan_path= "{}{}".format(self.psu_hwmon_path, 'psu_fan1_fault')
val=self._api_helper.read_txt_file(psu_fan_path)
if val is not None:
return int(val, 10)==1
return int(val, 10)==0
else:
return False
else:
return True
path = "{}{}{}".format(CPLD_FAN_I2C_PATH, 'fault_', self.fan_tray_index+1)
val=self._api_helper.read_txt_file(path)
if val is not None:
return int(val, 10)==0
else:
return False
def get_model(self):
"""
Retrieves the model number (or part number) of the device
Returns:
string: Model/part number of device
"""
return "N/A"
def get_serial(self):
"""
Retrieves the serial number of the device
Returns:
string: Serial number of device
"""
return "N/A"
def get_position_in_parent(self):
"""
Retrieves 1-based relative physical position in parent device.
If the agent cannot determine the parent-relative position
for some reason, or if the associated value of
entPhysicalContainedIn is'0', then the value '-1' is returned
Returns:
integer: The 1-based relative physical position in parent device
or -1 if cannot determine the position
"""
return (self.fan_tray_index+1) \
if not self.is_psu_fan else (self.psu_index+1)
def is_replaceable(self):
"""
Indicate whether this device is replaceable.
Returns:
bool: True if it is replaceable.
"""
return True if not self.is_psu_fan else False

View File

@ -0,0 +1,90 @@
########################################################################
#
# Module contains an implementation of SONiC Platform Base API and
# provides the Fan-Drawers' information available in the platform.
#
########################################################################
try:
from sonic_platform_base.fan_drawer_base import FanDrawerBase
except ImportError as e:
raise ImportError(str(e) + "- required module not found")
FANS_PER_FANTRAY = 2
class FanDrawer(FanDrawerBase):
"""Platform-specific Fan class"""
def __init__(self, fantray_index):
FanDrawerBase.__init__(self)
# FanTray is 0-based in platforms
self.fantrayindex = fantray_index
self.__initialize_fan_drawer()
def __initialize_fan_drawer(self):
from sonic_platform.fan import Fan
for i in range(FANS_PER_FANTRAY):
self._fan_list.append(Fan(self.fantrayindex, i))
def get_name(self):
"""
Retrieves the fan drawer name
Returns:
string: The name of the device
"""
return "FanTray{}".format(self.fantrayindex+1)
def get_presence(self):
"""
Retrieves the presence of the device
Returns:
bool: True if device is present, False if not
"""
return self._fan_list[0].get_presence()
def get_model(self):
"""
Retrieves the model number (or part number) of the device
Returns:
string: Model/part number of device
"""
return self._fan_list[0].get_model()
def get_serial(self):
"""
Retrieves the serial number of the device
Returns:
string: Serial number of device
"""
return self._fan_list[0].get_serial()
def get_status(self):
"""
Retrieves the operational status of the device
Returns:
A boolean value, True if device is operating properly, False if not
"""
return self._fan_list[0].get_status()
def get_position_in_parent(self):
"""
Retrieves 1-based relative physical position in parent device.
If the agent cannot determine the parent-relative position
for some reason, or if the associated value of
entPhysicalContainedIn is'0', then the value '-1' is returned
Returns:
integer: The 1-based relative physical position in parent device
or -1 if cannot determine the position
"""
return (self.fantrayindex+1)
def is_replaceable(self):
"""
Indicate whether this device is replaceable.
Returns:
bool: True if it is replaceable.
"""
return True

View File

@ -51,7 +51,7 @@ class APIHelper():
def read_txt_file(self, file_path):
try:
with open(file_path, 'r') as fd:
with open(file_path, 'r', errors='replace') as fd:
data = fd.read()
return data.strip()
except IOError:

View File

@ -139,8 +139,15 @@ class Psu(PsuBase):
Returns:
A string, one of the predefined STATUS_LED_COLOR_* strings above
"""
status=self.get_status()
if status is None:
return self.STATUS_LED_COLOR_OFF
return {
1: self.STATUS_LED_COLOR_GREEN,
0: self.STATUS_LED_COLOR_RED
}.get(status, self.STATUS_LED_COLOR_OFF)
return False #Controlled by HW
def get_temperature(self):
"""
@ -226,3 +233,45 @@ class Psu(PsuBase):
return int(val, 10) == 1
else:
return 0
def get_model(self):
"""
Retrieves the model number (or part number) of the device
Returns:
string: Model/part number of device
"""
model_path="{}{}".format(self.cpld_path, 'psu_model_name')
model=self._api_helper.read_txt_file(model_path)
if model is None:
return "N/A"
return model
def get_serial(self):
"""
Retrieves the serial number of the device
Returns:
string: Serial number of device
"""
serial_path="{}{}".format(self.cpld_path, 'psu_serial_number')
serial=self._api_helper.read_txt_file(serial_path)
if serial is None:
return "N/A"
return serial
def get_position_in_parent(self):
"""
Retrieves 1-based relative physical position in parent device. If the agent cannot determine the parent-relative position
for some reason, or if the associated value of entPhysicalContainedIn is '0', then the value '-1' is returned
Returns:
integer: The 1-based relative physical position in parent device or -1 if cannot determine the position
"""
return self.index+1
def is_replaceable(self):
"""
Indicate whether this device is replaceable.
Returns:
bool: True if it is replaceable.
"""
return True

View File

@ -0,0 +1,15 @@
{
"services_to_ignore": [],
"devices_to_ignore": [
"asic",
"psu.temperature"
],
"user_defined_checkers": [],
"polling_interval": 60,
"led_color": {
"fault": "STATUS_LED_COLOR_AMBER",
"normal": "STATUS_LED_COLOR_GREEN",
"booting": "STATUS_LED_COLOR_GREEN_BLINK"
}
}