[BFN] Update psu.py to process sigterm signal (#13350)
Why I did it Sometime, SIGTERM processing by psud takes more then default 10sec (please see stopwaitsecs in http://supervisord.org/configuration.html). Due to this, the following two testcases may fail: test_pmon_psud_stop_and_start_status test_pmon_psud_term_and_start_status How I did it Update PSU plugin to process sigterm signal so that psud runs faster to end last cycle in time How to verify it Run SONiC CTs: test_pmon_psud_stop_and_start_status test_pmon_psud_term_and_start_status
This commit is contained in:
parent
8fdbf9dce3
commit
5ff5e98437
@ -9,6 +9,8 @@ try:
|
|||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError(str(e) + "- required module not found")
|
raise ImportError(str(e) + "- required module not found")
|
||||||
|
|
||||||
|
SIGTERM_CAUGHT = False
|
||||||
|
|
||||||
def file_create(path, mode=None):
|
def file_create(path, mode=None):
|
||||||
"""
|
"""
|
||||||
Ensure that file is created with the appropriate permissions
|
Ensure that file is created with the appropriate permissions
|
||||||
@ -38,12 +40,15 @@ def cancel_on_sigterm(func):
|
|||||||
def handler(sig, frame):
|
def handler(sig, frame):
|
||||||
if sigterm_handler:
|
if sigterm_handler:
|
||||||
sigterm_handler(sig, frame)
|
sigterm_handler(sig, frame)
|
||||||
|
global SIGTERM_CAUGHT
|
||||||
|
SIGTERM_CAUGHT = True
|
||||||
raise Exception("Canceling {}() execution...".format(func.__name__))
|
raise Exception("Canceling {}() execution...".format(func.__name__))
|
||||||
|
|
||||||
sigterm_handler = signal.getsignal(signal.SIGTERM)
|
sigterm_handler = signal.getsignal(signal.SIGTERM)
|
||||||
signal.signal(signal.SIGTERM, handler)
|
signal.signal(signal.SIGTERM, handler)
|
||||||
result = None
|
result = None
|
||||||
try:
|
try:
|
||||||
|
if not SIGTERM_CAUGHT:
|
||||||
result = func(*args, **kwargs)
|
result = func(*args, **kwargs)
|
||||||
finally:
|
finally:
|
||||||
signal.signal(signal.SIGTERM, sigterm_handler)
|
signal.signal(signal.SIGTERM, sigterm_handler)
|
||||||
|
@ -28,6 +28,13 @@ class Psu(PsuBase):
|
|||||||
__sensors_info = None
|
__sensors_info = None
|
||||||
__timestamp = 0
|
__timestamp = 0
|
||||||
|
|
||||||
|
# When psud gets termination signal it starts processing last cycle.
|
||||||
|
# This cycle must be as fast as possible to be able to stop correctly,
|
||||||
|
# otherwise it will be killed, so the whole plugin must encounter
|
||||||
|
# this signal to process operations based on state, where the
|
||||||
|
# state is "termination signal got" and "no termination signal"
|
||||||
|
|
||||||
|
# State is "no termination signal"
|
||||||
sigterm = False
|
sigterm = False
|
||||||
sigterm_default_handler = None
|
sigterm_default_handler = None
|
||||||
cls_inited = False
|
cls_inited = False
|
||||||
@ -54,12 +61,15 @@ class Psu(PsuBase):
|
|||||||
if cls.sigterm_default_handler:
|
if cls.sigterm_default_handler:
|
||||||
cls.sigterm_default_handler(sig, frame)
|
cls.sigterm_default_handler(sig, frame)
|
||||||
syslog.syslog(syslog.LOG_INFO, "Canceling PSU platform API calls...")
|
syslog.syslog(syslog.LOG_INFO, "Canceling PSU platform API calls...")
|
||||||
|
# Changing state to "termination signal"
|
||||||
cls.sigterm = True
|
cls.sigterm = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __sensors_get(cls, cached=True):
|
def __sensors_get(cls, cached=True):
|
||||||
cls.__lock.acquire()
|
cls.__lock.acquire()
|
||||||
if time.time() > cls.__timestamp + 15:
|
# Operation may take a few seconds to process, so if state is
|
||||||
|
# "termination signal", plugin doesn't perform this operation
|
||||||
|
if time.time() > cls.__timestamp + 15 and not Psu.sigterm:
|
||||||
# Update cache once per 15 seconds
|
# Update cache once per 15 seconds
|
||||||
try:
|
try:
|
||||||
cls.__sensors_info = get_psu_metrics()
|
cls.__sensors_info = get_psu_metrics()
|
||||||
@ -83,6 +93,8 @@ class Psu(PsuBase):
|
|||||||
def psu_info_get(client):
|
def psu_info_get(client):
|
||||||
return client.pltfm_mgr.pltfm_mgr_pwr_supply_info_get(self.__index)
|
return client.pltfm_mgr.pltfm_mgr_pwr_supply_info_get(self.__index)
|
||||||
|
|
||||||
|
# Operation may take a few seconds to process, so if state is
|
||||||
|
# "termination signal", plugin doesn't perform this operation
|
||||||
# Update cache once per 2 seconds
|
# Update cache once per 2 seconds
|
||||||
if self.__ts + 2 < time.time() and not Psu.sigterm:
|
if self.__ts + 2 < time.time() and not Psu.sigterm:
|
||||||
self.__info = None
|
self.__info = None
|
||||||
@ -96,6 +108,10 @@ class Psu(PsuBase):
|
|||||||
return self.__info
|
return self.__info
|
||||||
return self.__info
|
return self.__info
|
||||||
|
|
||||||
|
@cancel_on_sigterm
|
||||||
|
def get_metric_value(self, metric_name):
|
||||||
|
return get_metric_value(Psu.__sensors_get(), "PSU%d ".format(self.__index) + metric_name)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_num_psus():
|
def get_num_psus():
|
||||||
"""
|
"""
|
||||||
@ -127,7 +143,7 @@ class Psu(PsuBase):
|
|||||||
A float number, the output voltage in volts,
|
A float number, the output voltage in volts,
|
||||||
e.g. 12.1
|
e.g. 12.1
|
||||||
"""
|
"""
|
||||||
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Voltage_in1_input" % self.__index)
|
return self.get_metric_value("12V Output Voltage_in1_input")
|
||||||
|
|
||||||
def get_current(self):
|
def get_current(self):
|
||||||
"""
|
"""
|
||||||
@ -136,7 +152,7 @@ class Psu(PsuBase):
|
|||||||
Returns:
|
Returns:
|
||||||
A float number, the electric current in amperes, e.g 15.4
|
A float number, the electric current in amperes, e.g 15.4
|
||||||
"""
|
"""
|
||||||
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Current_curr2_input" % self.__index)
|
return self.get_metric_value("12V Output Current_curr2_input")
|
||||||
|
|
||||||
def get_input_voltage(self):
|
def get_input_voltage(self):
|
||||||
"""
|
"""
|
||||||
@ -145,7 +161,7 @@ class Psu(PsuBase):
|
|||||||
A float number, the input voltage in volts,
|
A float number, the input voltage in volts,
|
||||||
e.g. 220
|
e.g. 220
|
||||||
"""
|
"""
|
||||||
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Voltage_in0_input" % self.__index)
|
return self.get_metric_value("Input Voltage_in0_input")
|
||||||
|
|
||||||
def get_input_current(self):
|
def get_input_current(self):
|
||||||
"""
|
"""
|
||||||
@ -153,7 +169,7 @@ class Psu(PsuBase):
|
|||||||
Returns:
|
Returns:
|
||||||
A float number, the electric current in amperes, e.g 0.8
|
A float number, the electric current in amperes, e.g 0.8
|
||||||
"""
|
"""
|
||||||
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Current_curr1_input" % self.__index)
|
return self.get_metric_value("Input Current_curr1_input")
|
||||||
|
|
||||||
def get_power(self):
|
def get_power(self):
|
||||||
"""
|
"""
|
||||||
@ -177,6 +193,9 @@ class Psu(PsuBase):
|
|||||||
return client.pltfm_mgr.pltfm_mgr_pwr_supply_present_get(self.__index)
|
return client.pltfm_mgr.pltfm_mgr_pwr_supply_present_get(self.__index)
|
||||||
|
|
||||||
status = False
|
status = False
|
||||||
|
if Psu.sigterm:
|
||||||
|
return status
|
||||||
|
|
||||||
try:
|
try:
|
||||||
status = thrift_try(psu_present_get, attempts=1)
|
status = thrift_try(psu_present_get, attempts=1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -267,6 +286,7 @@ class Psu(PsuBase):
|
|||||||
"""
|
"""
|
||||||
return self.__index
|
return self.__index
|
||||||
|
|
||||||
|
@cancel_on_sigterm
|
||||||
def get_temperature(self):
|
def get_temperature(self):
|
||||||
"""
|
"""
|
||||||
Retrieves current temperature reading from PSU
|
Retrieves current temperature reading from PSU
|
||||||
@ -274,8 +294,11 @@ class Psu(PsuBase):
|
|||||||
A float number of current temperature in Celsius up to nearest thousandth
|
A float number of current temperature in Celsius up to nearest thousandth
|
||||||
of one degree Celsius, e.g. 30.125
|
of one degree Celsius, e.g. 30.125
|
||||||
"""
|
"""
|
||||||
|
# Operation may take a few seconds to process, so if state is
|
||||||
|
# "termination signal", plugin doesn't perform this operation
|
||||||
return self.get_thermal(0).get_temperature()
|
return self.get_thermal(0).get_temperature()
|
||||||
|
|
||||||
|
@cancel_on_sigterm
|
||||||
def get_temperature_high_threshold(self):
|
def get_temperature_high_threshold(self):
|
||||||
"""
|
"""
|
||||||
Retrieves the high threshold temperature of PSU
|
Retrieves the high threshold temperature of PSU
|
||||||
@ -283,6 +306,8 @@ class Psu(PsuBase):
|
|||||||
A float number, the high threshold temperature of PSU in Celsius
|
A float number, the high threshold temperature of PSU in Celsius
|
||||||
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||||
"""
|
"""
|
||||||
|
# Operation may take a few seconds to process, so if state is
|
||||||
|
# "termination signal", plugin doesn't perform this operation
|
||||||
return self.get_thermal(0).get_high_threshold()
|
return self.get_thermal(0).get_high_threshold()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
Reference in New Issue
Block a user