[Juniper][QFX5210] Platform monitoring updates (#3899)
As part of this commit, there are a few enhancements being made for EM policy implementation: a) Introduced hysteresis algorithm to prevent fan hunting b) Reading ASIC temperature to make decision for fan speed. As part of the PR# 3599, Workaround for the boot problem from secondary bios was addressed. When the SONiC image is upgraded, this resulted in creating multiple entries for BOOTX64.EFI. To fix the problem, as part of this changeset, introducing a check to see if there is already an UEFI entry for BOOTX64.EFI and accordingly creating / skipping the UEFI entry. Signed-off-by: Ciju Rajan K <crajank@juniper.net>
This commit is contained in:
parent
13eec88732
commit
f126258d5f
@ -1,3 +1,5 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
systemctl enable qfx5210-platform-init.service
|
systemctl enable qfx5210-platform-init.service
|
||||||
systemctl start qfx5210-platform-init.service
|
systemctl start qfx5210-platform-init.service
|
||||||
|
|
||||||
@ -18,5 +20,12 @@ if [ -f $FIRST_BOOT_FILE ]; then
|
|||||||
cp SONiC-OS/grubx64.efi BOOT/BOOTX64.EFI
|
cp SONiC-OS/grubx64.efi BOOT/BOOTX64.EFI
|
||||||
cd /tmp
|
cd /tmp
|
||||||
umount sda1
|
umount sda1
|
||||||
efibootmgr -c -L "SONiC" -l "\EFI\BOOT\BOOTX64.EFI" > /dev/null 2>&1
|
# This code block ensures that no additional entries
|
||||||
|
# are added. This is applicable during SONiC image
|
||||||
|
# upgrades.
|
||||||
|
entries=`efibootmgr -v | grep "BOOTX64"`
|
||||||
|
if [ -z "$entries" ]; then
|
||||||
|
# Creating the UEFI entry for the first time.
|
||||||
|
efibootmgr -c -L "SONiC" -l "\EFI\BOOT\BOOTX64.EFI" > /var/tmp/efi_log 2>&1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
@ -47,6 +47,7 @@ try:
|
|||||||
import traceback
|
import traceback
|
||||||
import glob
|
import glob
|
||||||
import collections
|
import collections
|
||||||
|
import StringIO
|
||||||
from tabulate import tabulate
|
from tabulate import tabulate
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError('%s - required module not found' % str(e))
|
raise ImportError('%s - required module not found' % str(e))
|
||||||
@ -54,15 +55,20 @@ except ImportError as e:
|
|||||||
# Deafults
|
# Deafults
|
||||||
VERSION = '1.0'
|
VERSION = '1.0'
|
||||||
FUNCTION_NAME = '/var/log/juniper_qfx5210_monitor'
|
FUNCTION_NAME = '/var/log/juniper_qfx5210_monitor'
|
||||||
|
verbose = False
|
||||||
|
DEBUG = False
|
||||||
|
|
||||||
global log_file
|
global log_file
|
||||||
global log_level
|
global log_level
|
||||||
|
|
||||||
|
|
||||||
global isPlatformAFI
|
global isPlatformAFI
|
||||||
|
global is80PerFlag
|
||||||
|
global is60PerFlag
|
||||||
global isFireThresholdReached
|
global isFireThresholdReached
|
||||||
FireThresholdSecsRemaining = 120
|
global isFireThresholdPrint
|
||||||
|
global PrevASICValue
|
||||||
|
global FireThresholdSecsRemaining
|
||||||
|
|
||||||
temp_policy_AFI = {
|
temp_policy_AFI = {
|
||||||
0: [[70, 0, 48000], [70, 48000, 53000], [80, 53000, 0], [80, 53000, 58000], [100, 58000, 0], ['Yellow Alarm', 64000, 70000], ['Red Alarm', 70000, 75000], ['Fire Shut Alarm', 75000, 0]],
|
0: [[70, 0, 48000], [70, 48000, 53000], [80, 53000, 0], [80, 53000, 58000], [100, 58000, 0], ['Yellow Alarm', 64000, 70000], ['Red Alarm', 70000, 75000], ['Fire Shut Alarm', 75000, 0]],
|
||||||
@ -72,6 +78,7 @@ temp_policy_AFI = {
|
|||||||
4: [[70, 0, 31000], [70, 31000, 36000], [80, 36000, 0], [80, 36000, 42000], [100, 42000, 0], ['Yellow Alarm', 48000, 55000], ['Red Alarm', 55000, 60000], ['Fire Shut Alarm', 60000, 0]],
|
4: [[70, 0, 31000], [70, 31000, 36000], [80, 36000, 0], [80, 36000, 42000], [100, 42000, 0], ['Yellow Alarm', 48000, 55000], ['Red Alarm', 55000, 60000], ['Fire Shut Alarm', 60000, 0]],
|
||||||
5: [[70, 0, 31000], [70, 31000, 36000], [80, 36000, 0], [80, 36000, 43000], [100, 43000, 0], ['Yellow Alarm', 49000, 56000], ['Red Alarm', 56000, 61000], ['Fire Shut Alarm', 61000, 0]],
|
5: [[70, 0, 31000], [70, 31000, 36000], [80, 36000, 0], [80, 36000, 43000], [100, 43000, 0], ['Yellow Alarm', 49000, 56000], ['Red Alarm', 56000, 61000], ['Fire Shut Alarm', 61000, 0]],
|
||||||
6: [[70, 0, 70000], [70, 70000, 78000], [80, 78000, 0], [80, 78000, 86000], [100, 86000, 0], ['Yellow Alarm', 91000, 96000], ['Red Alarm', 96000, 102000], ['Fire Shut Alarm', 102000, 0]],
|
6: [[70, 0, 70000], [70, 70000, 78000], [80, 78000, 0], [80, 78000, 86000], [100, 86000, 0], ['Yellow Alarm', 91000, 96000], ['Red Alarm', 96000, 102000], ['Fire Shut Alarm', 102000, 0]],
|
||||||
|
7: [[70, 0, 84000], [70, 84000, 91000], [80, 91000, 0], [80, 91000, 98000], [100, 98000, 0], ['Yellow Alarm', 103000, 108000], ['Red Alarm', 108000, 120000], ['Fire Shut Alarm', 120000, 0]],
|
||||||
}
|
}
|
||||||
|
|
||||||
temp_policy_AFO = {
|
temp_policy_AFO = {
|
||||||
@ -82,6 +89,7 @@ temp_policy_AFO = {
|
|||||||
4: [[60, 0, 39000], [60, 39000, 45000], [80, 45000, 0], [80, 45000, 52000], [100, 52000, 0], ['Yellow Alarm', 59000, 65000], ['Red Alarm', 65000, 69000], ['Fire Shut Alarm', 69000, 0]],
|
4: [[60, 0, 39000], [60, 39000, 45000], [80, 45000, 0], [80, 45000, 52000], [100, 52000, 0], ['Yellow Alarm', 59000, 65000], ['Red Alarm', 65000, 69000], ['Fire Shut Alarm', 69000, 0]],
|
||||||
5: [[60, 0, 37000], [60, 37000, 43000], [80, 43000, 0], [80, 43000, 50000], [100, 50000, 0], ['Yellow Alarm', 57000, 63000], ['Red Alarm', 63000, 67000], ['Fire Shut Alarm', 67000, 0]],
|
5: [[60, 0, 37000], [60, 37000, 43000], [80, 43000, 0], [80, 43000, 50000], [100, 50000, 0], ['Yellow Alarm', 57000, 63000], ['Red Alarm', 63000, 67000], ['Fire Shut Alarm', 67000, 0]],
|
||||||
6: [[60, 0, 70000], [60, 70000, 78000], [80, 78000, 0], [80, 78000, 86000], [100, 86000, 0], ['Yellow Alarm', 91000, 96000], ['Red Alarm', 96000, 102000], ['Fire Shut Alarm', 102000, 0]],
|
6: [[60, 0, 70000], [60, 70000, 78000], [80, 78000, 0], [80, 78000, 86000], [100, 86000, 0], ['Yellow Alarm', 91000, 96000], ['Red Alarm', 96000, 102000], ['Fire Shut Alarm', 102000, 0]],
|
||||||
|
7: [[60, 0, 84000], [60, 84000, 91000], [80, 91000, 0], [80, 91000, 98000], [100, 98000, 0], ['Yellow Alarm', 103000, 108000], ['Red Alarm', 108000, 120000], ['Fire Shut Alarm', 120000, 0]],
|
||||||
}
|
}
|
||||||
|
|
||||||
class QFX5210_FanUtil(object):
|
class QFX5210_FanUtil(object):
|
||||||
@ -97,7 +105,7 @@ class QFX5210_FanUtil(object):
|
|||||||
try:
|
try:
|
||||||
val_file = open(self.FAN_DUTY_PATH)
|
val_file = open(self.FAN_DUTY_PATH)
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
print "Error: unable to open file: %s" % str(e)
|
logging.error('get_fan_duty_cycle: unable to open file: %s', str(e))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
content = val_file.readline().rstrip()
|
content = val_file.readline().rstrip()
|
||||||
@ -110,7 +118,7 @@ class QFX5210_FanUtil(object):
|
|||||||
try:
|
try:
|
||||||
fan_file = open(self.FAN_DUTY_PATH, 'r+')
|
fan_file = open(self.FAN_DUTY_PATH, 'r+')
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
print "Error: unable to open file: %s" % str(e)
|
logging.error('set_fan_duty_cycle: unable to open file: %s', str(e))
|
||||||
return False
|
return False
|
||||||
fan_file.write(str(val))
|
fan_file.write(str(val))
|
||||||
fan_file.close()
|
fan_file.close()
|
||||||
@ -120,7 +128,8 @@ class QFX5210_ThermalUtil(object):
|
|||||||
"""QFX5210 Platform ThermalUtil class"""
|
"""QFX5210 Platform ThermalUtil class"""
|
||||||
|
|
||||||
SENSOR_NUM_ON_MAIN_BOARD = 6
|
SENSOR_NUM_ON_MAIN_BOARD = 6
|
||||||
SENSOR_CORETEMP_NUM_ON_MAIN_BOARD = 7
|
CORETEMP_INDEX_ON_MAIN_BOARD = 6
|
||||||
|
SENSOR_CORETEMP_NUM_ON_MAIN_BOARD = 8
|
||||||
CORETEMP_NUM_ON_MAIN_BOARD = 5
|
CORETEMP_NUM_ON_MAIN_BOARD = 5
|
||||||
THERMAL_NUM_RANGE = 8
|
THERMAL_NUM_RANGE = 8
|
||||||
SENSOR_NUM_1_IDX = 1
|
SENSOR_NUM_1_IDX = 1
|
||||||
@ -181,19 +190,19 @@ class QFX5210_ThermalUtil(object):
|
|||||||
try:
|
try:
|
||||||
val_file = open(filename, 'r')
|
val_file = open(filename, 'r')
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logging.error('GET. unable to open file: %s', str(e))
|
logging.error('get_sensor_node_val: unable to open file: %s', str(e))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
content = val_file.readline().rstrip()
|
content = val_file.readline().rstrip()
|
||||||
|
|
||||||
if content == '':
|
if content == '':
|
||||||
logging.debug('GET. content is NULL. device_path:%s', device_path)
|
logging.debug('get_sensor_node_val: content is NULL. device_path:%s', device_path)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
val_file.close()
|
val_file.close()
|
||||||
except:
|
except:
|
||||||
logging.debug('GET. unable to close file. device_path:%s', device_path)
|
logging.debug('get_sensor_node_val: unable to close file. device_path:%s', device_path)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return int(content)
|
return int(content)
|
||||||
@ -208,19 +217,19 @@ class QFX5210_ThermalUtil(object):
|
|||||||
try:
|
try:
|
||||||
val_file = open(filename, 'r')
|
val_file = open(filename, 'r')
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logging.error('GET. unable to open file: %s', str(e))
|
logging.error('get_coretemp_node_val: unable to open file: %s', str(e))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
content = val_file.readline().rstrip()
|
content = val_file.readline().rstrip()
|
||||||
|
|
||||||
if content == '':
|
if content == '':
|
||||||
logging.debug('GET. content is NULL. device_path:%s', device_path)
|
logging.debug('get_coretemp_node_val: content is NULL. device_path:%s', device_path)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
val_file.close()
|
val_file.close()
|
||||||
except:
|
except:
|
||||||
logging.debug('GET. unable to close file. device_path:%s', device_path)
|
logging.debug('get_coretemp_node_val: unable to close file. device_path:%s', device_path)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return int(content)
|
return int(content)
|
||||||
@ -243,7 +252,7 @@ class QFX5210_ThermalUtil(object):
|
|||||||
try:
|
try:
|
||||||
val_file = open(self.ALARM_LED_PATH)
|
val_file = open(self.ALARM_LED_PATH)
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
print "Error: unable to open file: %s" % str(e)
|
logging.error('get_alarm_led_brightness: unable to open file: %s', str(e))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
content = val_file.readline().rstrip()
|
content = val_file.readline().rstrip()
|
||||||
@ -260,7 +269,7 @@ class QFX5210_ThermalUtil(object):
|
|||||||
try:
|
try:
|
||||||
val_file = open(self.ALARM_LED_PATH, 'r+')
|
val_file = open(self.ALARM_LED_PATH, 'r+')
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
print "Error: unable to open file: %s" % str(e)
|
logging.error('set_alarm_led_brightness: unable to open file: %s', str(e))
|
||||||
return False
|
return False
|
||||||
val_file.write(str(val))
|
val_file.write(str(val))
|
||||||
val_file.close()
|
val_file.close()
|
||||||
@ -274,8 +283,12 @@ class QFX5210_ThermalUtil(object):
|
|||||||
def getSensorTemp(self):
|
def getSensorTemp(self):
|
||||||
sum = 0
|
sum = 0
|
||||||
global isPlatformAFI
|
global isPlatformAFI
|
||||||
|
global is80PerFlag
|
||||||
|
global is60PerFlag
|
||||||
global isFireThresholdReached
|
global isFireThresholdReached
|
||||||
global FireThresholdSecsRemaining
|
global FireThresholdSecsRemaining
|
||||||
|
global isFireThresholdPrint
|
||||||
|
global PrevASICValue
|
||||||
#AFI
|
#AFI
|
||||||
if (isPlatformAFI == True):
|
if (isPlatformAFI == True):
|
||||||
temp_policy = temp_policy_AFI
|
temp_policy = temp_policy_AFI
|
||||||
@ -294,14 +307,22 @@ class QFX5210_ThermalUtil(object):
|
|||||||
4: [0,0,0,0,0,0,0,0],
|
4: [0,0,0,0,0,0,0,0],
|
||||||
5: [0,0,0,0,0,0,0,0],
|
5: [0,0,0,0,0,0,0,0],
|
||||||
6: [0,0,0,0,0,0,0,0],
|
6: [0,0,0,0,0,0,0,0],
|
||||||
|
7: [0,0,0,0,0,0,0,0],
|
||||||
}
|
}
|
||||||
# if the Firethreshold Flag is set and 120 seconds have elapsed, invoking the "poweroff" to shutdown the box
|
# if the Firethreshold Flag is set and 120 seconds have elapsed, invoking the "poweroff" to shutdown the box
|
||||||
if (isFireThresholdReached == True):
|
if (isFireThresholdReached == True):
|
||||||
firethr = FireThresholdSecsRemaining - 20
|
firethr = FireThresholdSecsRemaining - 20
|
||||||
logging.critical('CRITICAL: Fire Threshold reached: System is going to shutdown in %s seconds', firethr)
|
if firethr == 0:
|
||||||
print "Fire Threshold reached: System is going to shutdown in %s seconds\n" % firethr
|
logging.critical('CRITICAL: Fire Threshold reached: System is going to shutdown now')
|
||||||
|
os.system("echo 'CRITICAL: Fire Threshold reached: System is going to shutdown now' > /dev/console")
|
||||||
|
else:
|
||||||
|
logging.critical('CRITICAL: Fire Threshold reached: System is going to shutdown in %s seconds', firethr)
|
||||||
|
os.system("echo 'CRITICAL: Fire Threshold reached: System is going to shutdown in %s seconds' > /dev/console" % firethr)
|
||||||
|
|
||||||
FireThresholdSecsRemaining = FireThresholdSecsRemaining - 20
|
FireThresholdSecsRemaining = FireThresholdSecsRemaining - 20
|
||||||
if (FireThresholdSecsRemaining == 20):
|
logging.critical('CRITICAL: Value of FireThresholdSecsRemaining %s seconds', FireThresholdSecsRemaining)
|
||||||
|
|
||||||
|
if (FireThresholdSecsRemaining == 0):
|
||||||
isFireThresholdReached == False
|
isFireThresholdReached == False
|
||||||
time.sleep(20)
|
time.sleep(20)
|
||||||
cmd = "poweroff"
|
cmd = "poweroff"
|
||||||
@ -310,8 +331,31 @@ class QFX5210_ThermalUtil(object):
|
|||||||
for x in range(self.SENSOR_CORETEMP_NUM_ON_MAIN_BOARD):
|
for x in range(self.SENSOR_CORETEMP_NUM_ON_MAIN_BOARD):
|
||||||
if x < self.SENSOR_NUM_ON_MAIN_BOARD:
|
if x < self.SENSOR_NUM_ON_MAIN_BOARD:
|
||||||
value = self._get_sensor_node_val(x+1)
|
value = self._get_sensor_node_val(x+1)
|
||||||
else:
|
logging.debug('Sensor value %d : %s', x, value)
|
||||||
|
elif x == self.CORETEMP_INDEX_ON_MAIN_BOARD:
|
||||||
value = self.get_coretempValue()
|
value = self.get_coretempValue()
|
||||||
|
logging.debug('Main Board CORE temp: %s', value)
|
||||||
|
else:
|
||||||
|
logging.debug('Reading ASIC Temp value using bcmcmd')
|
||||||
|
proc = subprocess.Popen("bcmcmd \"show temp\" | grep \"maximum peak temperature\" | awk '{ print $5 }' > /var/log/asic_value 2>&1 & ",shell=True)
|
||||||
|
time.sleep(2)
|
||||||
|
cmd = "kill -9 %s"%(proc.pid)
|
||||||
|
status, cmd_out = commands.getstatusoutput(cmd)
|
||||||
|
|
||||||
|
if os.stat("/var/log/asic_value").st_size == 0:
|
||||||
|
value = PrevASICValue
|
||||||
|
logging.debug('No ASIC Temp file, Prev ASIC Temp Value: %s', PrevASICValue)
|
||||||
|
else:
|
||||||
|
with open('/var/log/asic_value', 'r') as f:
|
||||||
|
value1 = f.readline()
|
||||||
|
value2 = float(value1)
|
||||||
|
value1 = value2 * 1000
|
||||||
|
value = int(value1)
|
||||||
|
PrevASICValue = value
|
||||||
|
logging.debug('Reading from ASIC Temp file: %s', value)
|
||||||
|
logging.debug('Reading from Prev ASIC Temp Value: %s', PrevASICValue)
|
||||||
|
|
||||||
|
os.system('rm /var/log/asic_value')
|
||||||
|
|
||||||
# 60% Duty Cycle for AFO and 70% Duty Cycle for AFI
|
# 60% Duty Cycle for AFO and 70% Duty Cycle for AFI
|
||||||
if value > temp_policy[x][0][1] and value <= temp_policy[x][0][2]:
|
if value > temp_policy[x][0][1] and value <= temp_policy[x][0][2]:
|
||||||
@ -350,57 +394,113 @@ class QFX5210_ThermalUtil(object):
|
|||||||
|
|
||||||
fan = QFX5210_FanUtil()
|
fan = QFX5210_FanUtil()
|
||||||
# CHECK IF ANY TEMPERATURE SENSORS HAS SET FIRE SHUTDOWN FLAG
|
# CHECK IF ANY TEMPERATURE SENSORS HAS SET FIRE SHUTDOWN FLAG
|
||||||
if SensorFlag[0][7] or SensorFlag[1][7] or SensorFlag[2][7] or SensorFlag[3][7] or SensorFlag[4][7] or SensorFlag[5][7] or SensorFlag[6][7]:
|
if SensorFlag[0][7] or SensorFlag[1][7] or SensorFlag[2][7] or SensorFlag[3][7] or SensorFlag[4][7] or SensorFlag[5][7] or SensorFlag[6][7] or SensorFlag[7][7]:
|
||||||
isFireThresholdReached = True
|
isFireThresholdReached = True
|
||||||
logging.critical('CRITICAL: Fire Threshold reached: System is going to shutdown in 120 seconds')
|
if (isFireThresholdPrint == True):
|
||||||
print "CRITICAL: Fire Threshold reached: System is going to shutdown in 120 seconds\n"
|
logging.critical('CRITICAL: Fire Threshold reached: System is going to shutdown in 120 seconds')
|
||||||
value = self.get_alarm_led_brightness()
|
os.system("echo 'CRITICAL: Fire Threshold reached: System is going to shutdown in 120 seconds' > /dev/console")
|
||||||
if ( value > 0):
|
isFireThresholdPrint = False
|
||||||
self.set_alarm_led_brightness(0)
|
|
||||||
|
|
||||||
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 'RED' ALARM FLAG, IF YES, SET THE ALARM LED TO 'RED'
|
logging.debug('Temp Sensor is set to FIRE SHUTDOWN Flag')
|
||||||
elif SensorFlag[0][6] or SensorFlag[1][6] or SensorFlag[2][6] or SensorFlag[3][6] or SensorFlag[4][6] or SensorFlag[5][6] or SensorFlag[6][6]:
|
fan.set_fan_duty_cycle(100)
|
||||||
self.set_alarm_led_brightness(2)
|
self.set_alarm_led_brightness(2)
|
||||||
|
|
||||||
|
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 'RED' ALARM FLAG, IF YES, SET THE ALARM LED TO 'RED'
|
||||||
|
elif SensorFlag[0][6] or SensorFlag[1][6] or SensorFlag[2][6] or SensorFlag[3][6] or SensorFlag[4][6] or SensorFlag[5][6] or SensorFlag[6][6] or SensorFlag[7][6]:
|
||||||
|
fan.set_fan_duty_cycle(100)
|
||||||
|
self.set_alarm_led_brightness(2)
|
||||||
|
logging.debug('Temp Sensor is set to Red Alarm Flag')
|
||||||
|
if (isFireThresholdReached == True):
|
||||||
|
logging.critical('CRITICAL: System Stabilized, not shutting down')
|
||||||
|
os.system("echo 'CRITICAL: System Stabilized, not shutting down' > /dev/console")
|
||||||
|
FireThresholdSecsRemaining = 120
|
||||||
|
isFireThresholdReached = False
|
||||||
|
|
||||||
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 'YELLOW' ALARM FLAG, IF YES, SET THE ALARM LED TO 'YELLOW'
|
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 'YELLOW' ALARM FLAG, IF YES, SET THE ALARM LED TO 'YELLOW'
|
||||||
elif SensorFlag[0][5] or SensorFlag[1][5] or SensorFlag[2][5] or SensorFlag[3][5] or SensorFlag[4][5] or SensorFlag[5][5] or SensorFlag[6][5]:
|
elif SensorFlag[0][5] or SensorFlag[1][5] or SensorFlag[2][5] or SensorFlag[3][5] or SensorFlag[4][5] or SensorFlag[5][5] or SensorFlag[6][5] or SensorFlag[7][5]:
|
||||||
|
fan.set_fan_duty_cycle(100)
|
||||||
self.set_alarm_led_brightness(1)
|
self.set_alarm_led_brightness(1)
|
||||||
|
logging.debug('Temp Sensor is set to Yellow Alarm Flag')
|
||||||
|
if (isFireThresholdReached == True):
|
||||||
|
logging.critical('CRITICAL: System Stabilized, not shutting down')
|
||||||
|
os.system("echo 'CRITICAL: System Stabilized, not shutting down' > /dev/console")
|
||||||
|
FireThresholdSecsRemaining = 120
|
||||||
|
isFireThresholdReached = False
|
||||||
|
|
||||||
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 100% DUTY CYCLE FLAG, IF YES, SET THE FAN DUTY CYCLE TO 100%
|
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 100% DUTY CYCLE FLAG, IF YES, SET THE FAN DUTY CYCLE TO 100%
|
||||||
elif SensorFlag[0][4] or SensorFlag[1][4] or SensorFlag[2][4] or SensorFlag[3][4] or SensorFlag[4][4] or SensorFlag[5][4] or SensorFlag[6][4]:
|
elif SensorFlag[0][4] or SensorFlag[1][4] or SensorFlag[2][4] or SensorFlag[3][4] or SensorFlag[4][4] or SensorFlag[5][4] or SensorFlag[6][4] or SensorFlag[7][4]:
|
||||||
fan.set_fan_duty_cycle(100)
|
fan.set_fan_duty_cycle(100)
|
||||||
value = self.get_alarm_led_brightness()
|
value = self.get_alarm_led_brightness()
|
||||||
if ( value > 0):
|
if ( value > 0):
|
||||||
self.set_alarm_led_brightness(0)
|
self.set_alarm_led_brightness(0)
|
||||||
|
is80PerFlag = False
|
||||||
|
is60PerFlag = False
|
||||||
|
logging.debug('Temp Sensor is set to 100% Duty Cycle Flag')
|
||||||
|
|
||||||
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 80% DUTY CYCLE PREV FLAG, IF YES, SET THE FAN DUTY CYCLE TO 80%
|
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 80% DUTY CYCLE PREV FLAG, IF YES, SET THE FAN DUTY CYCLE TO 80%
|
||||||
elif SensorFlag[0][3] or SensorFlag[1][3] or SensorFlag[2][3] or SensorFlag[3][3] or SensorFlag[4][3] or SensorFlag[5][3] or SensorFlag[6][3]:
|
elif SensorFlag[0][3] or SensorFlag[1][3] or SensorFlag[2][3] or SensorFlag[3][3] or SensorFlag[4][3] or SensorFlag[5][3] or SensorFlag[6][3] or SensorFlag[7][3]:
|
||||||
fan.set_fan_duty_cycle(80)
|
if (is80PerFlag == True):
|
||||||
|
fan.set_fan_duty_cycle(80)
|
||||||
|
is80PerFlag = False
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
value = self.get_alarm_led_brightness()
|
value = self.get_alarm_led_brightness()
|
||||||
if ( value > 0):
|
if ( value > 0):
|
||||||
self.set_alarm_led_brightness(0)
|
self.set_alarm_led_brightness(0)
|
||||||
|
|
||||||
|
if (isFireThresholdReached == True):
|
||||||
|
logging.critical('CRITICAL: System Stabilized, not shutting down')
|
||||||
|
os.system("echo 'CRITICAL: System Stabilized, not shutting down' > /dev/console")
|
||||||
|
FireThresholdSecsRemaining = 120
|
||||||
|
isFireThresholdReached = False
|
||||||
|
logging.debug('Temp Sensor is set to 80% Prev Duty Cycle Flag')
|
||||||
|
|
||||||
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 80% DUTY CYCLE FLAG, IF YES, SET THE FAN DUTY CYCLE TO 80%
|
# CHECK IF ANY TEMPERATURE SENSORS HAS SET 80% DUTY CYCLE FLAG, IF YES, SET THE FAN DUTY CYCLE TO 80%
|
||||||
elif SensorFlag[0][2] or SensorFlag[1][2] or SensorFlag[2][2] or SensorFlag[3][2] or SensorFlag[4][2] or SensorFlag[5][2] or SensorFlag[6][2]:
|
elif SensorFlag[0][2] or SensorFlag[1][2] or SensorFlag[2][2] or SensorFlag[3][2] or SensorFlag[4][2] or SensorFlag[5][2] or SensorFlag[6][2] or SensorFlag[7][2]:
|
||||||
fan.set_fan_duty_cycle(80)
|
fan.set_fan_duty_cycle(80)
|
||||||
value = self.get_alarm_led_brightness()
|
value = self.get_alarm_led_brightness()
|
||||||
if ( value > 0):
|
if ( value > 0):
|
||||||
self.set_alarm_led_brightness(0)
|
self.set_alarm_led_brightness(0)
|
||||||
|
is80PerFlag = True
|
||||||
|
|
||||||
|
if (isFireThresholdReached == True):
|
||||||
|
logging.critical('CRITICAL: System Stabilized, not shutting down')
|
||||||
|
os.system("echo 'CRITICAL: System Stabilized, not shutting down' > /dev/console")
|
||||||
|
FireThresholdSecsRemaining = 120
|
||||||
|
isFireThresholdReached = False
|
||||||
|
|
||||||
|
logging.debug('Temp Sensor is set to 80% Duty Cycle Flag')
|
||||||
|
|
||||||
# FOR "AFO" Platform CHECK IF ANY TEMPERATURE SENSORS HAS SET 60% DUTY CYCLE PREV FLAG, IF YES, SET THE FAN DUTY CYCLE TO 60%
|
# FOR "AFO" Platform CHECK IF ANY TEMPERATURE SENSORS HAS SET 60% DUTY CYCLE PREV FLAG, IF YES, SET THE FAN DUTY CYCLE TO 60%
|
||||||
# FOR "AFI" Platform CHECK IF ANY TEMPERATURE SENSORS HAS SET 70% DUTY CYCLE PREV FLAG, IF YES, SET THE FAN DUTY CYCLE TO 70%
|
# FOR "AFI" Platform CHECK IF ANY TEMPERATURE SENSORS HAS SET 70% DUTY CYCLE PREV FLAG, IF YES, SET THE FAN DUTY CYCLE TO 70%
|
||||||
elif SensorFlag[0][1] or SensorFlag[1][1] or SensorFlag[2][1] or SensorFlag[3][1] or SensorFlag[4][1] or SensorFlag[5][1] or SensorFlag[6][1]:
|
elif SensorFlag[0][1] or SensorFlag[1][1] or SensorFlag[2][1] or SensorFlag[3][1] or SensorFlag[4][1] or SensorFlag[5][1] or SensorFlag[6][1] or SensorFlag[7][1]:
|
||||||
if (isPlatformAFI == True):
|
if (is60PerFlag == True):
|
||||||
fan.set_fan_duty_cycle(70)
|
if (isPlatformAFI == True):
|
||||||
|
fan.set_fan_duty_cycle(70)
|
||||||
|
else:
|
||||||
|
fan.set_fan_duty_cycle(60)
|
||||||
|
|
||||||
|
is60PerFlag = False
|
||||||
|
is80PerFlag = True
|
||||||
else:
|
else:
|
||||||
fan.set_fan_duty_cycle(60)
|
pass
|
||||||
|
|
||||||
value = self.get_alarm_led_brightness()
|
value = self.get_alarm_led_brightness()
|
||||||
if ( value > 0):
|
if ( value > 0):
|
||||||
self.set_alarm_led_brightness(0)
|
self.set_alarm_led_brightness(0)
|
||||||
|
|
||||||
|
if (isFireThresholdReached == True):
|
||||||
|
logging.critical('CRITICAL: System Stabilized, not shutting down')
|
||||||
|
os.system("echo 'CRITICAL: System Stabilized, not shutting down' > /dev/console")
|
||||||
|
FireThresholdSecsRemaining = 120
|
||||||
|
isFireThresholdReached = False
|
||||||
|
|
||||||
|
logging.debug('Temp Sensor is set to 60% Prev Duty Cycle Flag')
|
||||||
|
|
||||||
# FOR "AFO" Platform CHECK IF ANY TEMPERATURE SENSORS HAS SET 60% DUTY CYCLE FLAG, IF YES, SET THE FAN DUTY CYCLE TO 60%
|
# FOR "AFO" Platform CHECK IF ANY TEMPERATURE SENSORS HAS SET 60% DUTY CYCLE FLAG, IF YES, SET THE FAN DUTY CYCLE TO 60%
|
||||||
# FOR "AFI" Platform CHECK IF ANY TEMPERATURE SENSORS HAS SET 70% DUTY CYCLE FLAG, IF YES, SET THE FAN DUTY CYCLE TO 70%
|
# FOR "AFI" Platform CHECK IF ANY TEMPERATURE SENSORS HAS SET 70% DUTY CYCLE FLAG, IF YES, SET THE FAN DUTY CYCLE TO 70%
|
||||||
elif SensorFlag[0][0] or SensorFlag[1][0] or SensorFlag[2][0] or SensorFlag[3][0] or SensorFlag[4][0] or SensorFlag[5][0] or SensorFlag[6][0]:
|
elif SensorFlag[0][0] or SensorFlag[1][0] or SensorFlag[2][0] or SensorFlag[3][0] or SensorFlag[4][0] or SensorFlag[5][0] or SensorFlag[6][0] or SensorFlag[7][0]:
|
||||||
if (isPlatformAFI == True):
|
if (isPlatformAFI == True):
|
||||||
fan.set_fan_duty_cycle(70)
|
fan.set_fan_duty_cycle(70)
|
||||||
else:
|
else:
|
||||||
@ -408,6 +508,15 @@ class QFX5210_ThermalUtil(object):
|
|||||||
value = self.get_alarm_led_brightness()
|
value = self.get_alarm_led_brightness()
|
||||||
if ( value > 0):
|
if ( value > 0):
|
||||||
self.set_alarm_led_brightness(0)
|
self.set_alarm_led_brightness(0)
|
||||||
|
is60PerFlag = True
|
||||||
|
is80PerFlag = True
|
||||||
|
|
||||||
|
if (isFireThresholdReached == True):
|
||||||
|
logging.critical('CRITICAL: System Stabilized, not shutting down')
|
||||||
|
os.system("echo 'CRITICAL: System Stabilized, not shutting down' > /dev/console")
|
||||||
|
FireThresholdSecsRemaining = 120
|
||||||
|
isFireThresholdReached = False
|
||||||
|
logging.debug('Temp Sensor is set to 60% Duty Cycle Flag')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
@ -418,13 +527,17 @@ class QFX5210_ThermalUtil(object):
|
|||||||
for y in range(self.THERMAL_NUM_RANGE):
|
for y in range(self.THERMAL_NUM_RANGE):
|
||||||
SensorFlag[x][y] = 0
|
SensorFlag[x][y] = 0
|
||||||
|
|
||||||
|
|
||||||
class device_monitor(object):
|
class device_monitor(object):
|
||||||
|
|
||||||
def __init__(self, log_file, log_level):
|
def __init__(self, log_file, log_level):
|
||||||
|
global DEBUG
|
||||||
global isPlatformAFI
|
global isPlatformAFI
|
||||||
global isFireThresholdReached
|
global isFireThresholdReached
|
||||||
|
global is80PerFlag
|
||||||
|
global is60PerFlag
|
||||||
|
global isFireThresholdPrint
|
||||||
|
global PrevASICValue
|
||||||
|
global FireThresholdSecsRemaining
|
||||||
MASTER_LED_PATH = '/sys/class/leds/master/brightness'
|
MASTER_LED_PATH = '/sys/class/leds/master/brightness'
|
||||||
SYSTEM_LED_PATH = '/sys/class/leds/system/brightness'
|
SYSTEM_LED_PATH = '/sys/class/leds/system/brightness'
|
||||||
FANTYPE_PATH = '/sys/bus/i2c/devices/17-0068/fan1_direction'
|
FANTYPE_PATH = '/sys/bus/i2c/devices/17-0068/fan1_direction'
|
||||||
@ -439,13 +552,14 @@ class device_monitor(object):
|
|||||||
datefmt='%H:%M:%S'
|
datefmt='%H:%M:%S'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if DEBUG == True:
|
||||||
# set up logging to console
|
# set up logging to console
|
||||||
if log_level == logging.DEBUG:
|
if log_level == logging.DEBUG:
|
||||||
console = logging.StreamHandler()
|
console = logging.StreamHandler()
|
||||||
console.setLevel(log_level)
|
console.setLevel(log_level)
|
||||||
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
|
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
|
||||||
console.setFormatter(formatter)
|
console.setFormatter(formatter)
|
||||||
logging.getLogger('').addHandler(console)
|
logging.getLogger('').addHandler(console)
|
||||||
|
|
||||||
import sonic_platform
|
import sonic_platform
|
||||||
platform = sonic_platform.platform.Platform()
|
platform = sonic_platform.platform.Platform()
|
||||||
@ -454,20 +568,25 @@ class device_monitor(object):
|
|||||||
|
|
||||||
# the return value of get_fan_type is AFO = 0, AFI = 1 and for error condition it is -1
|
# the return value of get_fan_type is AFO = 0, AFI = 1 and for error condition it is -1
|
||||||
# In the error condition also, we are making default platform as AFO, to continue with Energy Monitoring
|
# In the error condition also, we are making default platform as AFO, to continue with Energy Monitoring
|
||||||
if (fan_type == -1 or fan_type == 0):
|
if (int(fan_type) == -1 or int(fan_type) == 0):
|
||||||
if (fan_type == -1):
|
if (int(fan_type) == -1):
|
||||||
print "Error: unable to open sys file for fan handling, defaulting it to AFO"
|
logging.error('device_monitor: unable to open sys file for fan handling, defaulting it to AFO')
|
||||||
isPlatformAFI = False
|
isPlatformAFI = False
|
||||||
else:
|
else:
|
||||||
isPlatformAFI = True
|
isPlatformAFI = True
|
||||||
|
|
||||||
isFireThresholdReached = False
|
isFireThresholdReached = False
|
||||||
|
is80PerFlag = True
|
||||||
|
is60PerFlag = True
|
||||||
|
isFireThresholdPrint = True
|
||||||
|
FireThresholdSecsRemaining = 120
|
||||||
|
PrevASICValue = 0
|
||||||
|
|
||||||
master_led_value = 1
|
master_led_value = 1
|
||||||
try:
|
try:
|
||||||
masterLED_file = open(MASTER_LED_PATH, 'r+')
|
masterLED_file = open(MASTER_LED_PATH, 'r+')
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
print "Error: unable to open file: %s" % str(e)
|
logging.error('device_monitor: unable to open Master LED file: %s', str(e))
|
||||||
return False
|
return False
|
||||||
masterLED_file.write(str(master_led_value))
|
masterLED_file.write(str(master_led_value))
|
||||||
masterLED_file.close()
|
masterLED_file.close()
|
||||||
@ -476,7 +595,7 @@ class device_monitor(object):
|
|||||||
try:
|
try:
|
||||||
systemLED_file = open(SYSTEM_LED_PATH, 'r+')
|
systemLED_file = open(SYSTEM_LED_PATH, 'r+')
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
print "Error: unable to open file: %s" % str(e)
|
logging.error('device_monitor: unable to open System LED file: %s', str(e))
|
||||||
return False
|
return False
|
||||||
systemLED_file.write(str(system_led_value))
|
systemLED_file.write(str(system_led_value))
|
||||||
systemLED_file.close()
|
systemLED_file.close()
|
||||||
@ -490,6 +609,8 @@ def main():
|
|||||||
log_file = '%s.log' % FUNCTION_NAME
|
log_file = '%s.log' % FUNCTION_NAME
|
||||||
log_level = logging.DEBUG
|
log_level = logging.DEBUG
|
||||||
|
|
||||||
|
#Introducing sleep of 150 seconds to wait for all the docker containers to start before starting the EM policy.
|
||||||
|
time.sleep(150)
|
||||||
monitor = device_monitor(log_file, log_level)
|
monitor = device_monitor(log_file, log_level)
|
||||||
while True:
|
while True:
|
||||||
monitor.manage_device()
|
monitor.manage_device()
|
||||||
|
Loading…
Reference in New Issue
Block a user