[BFN] Update psu.py to process sigterm signal (#13350)
Why I did it Sometime, SIGTERM processing by psud takes more then default 10sec (please see stopwaitsecs in http://supervisord.org/configuration.html). Due to this, the following two testcases may fail: test_pmon_psud_stop_and_start_status test_pmon_psud_term_and_start_status How I did it Update PSU plugin to process sigterm signal so that psud runs faster to end last cycle in time How to verify it Run SONiC CTs: test_pmon_psud_stop_and_start_status test_pmon_psud_term_and_start_status
This commit is contained in:
parent
8fdbf9dce3
commit
5ff5e98437
@ -9,6 +9,8 @@ try:
|
||||
except ImportError as e:
|
||||
raise ImportError(str(e) + "- required module not found")
|
||||
|
||||
SIGTERM_CAUGHT = False
|
||||
|
||||
def file_create(path, mode=None):
|
||||
"""
|
||||
Ensure that file is created with the appropriate permissions
|
||||
@ -38,12 +40,15 @@ def cancel_on_sigterm(func):
|
||||
def handler(sig, frame):
|
||||
if sigterm_handler:
|
||||
sigterm_handler(sig, frame)
|
||||
global SIGTERM_CAUGHT
|
||||
SIGTERM_CAUGHT = True
|
||||
raise Exception("Canceling {}() execution...".format(func.__name__))
|
||||
|
||||
sigterm_handler = signal.getsignal(signal.SIGTERM)
|
||||
signal.signal(signal.SIGTERM, handler)
|
||||
result = None
|
||||
try:
|
||||
if not SIGTERM_CAUGHT:
|
||||
result = func(*args, **kwargs)
|
||||
finally:
|
||||
signal.signal(signal.SIGTERM, sigterm_handler)
|
||||
|
@ -28,6 +28,13 @@ class Psu(PsuBase):
|
||||
__sensors_info = None
|
||||
__timestamp = 0
|
||||
|
||||
# When psud gets termination signal it starts processing last cycle.
|
||||
# This cycle must be as fast as possible to be able to stop correctly,
|
||||
# otherwise it will be killed, so the whole plugin must encounter
|
||||
# this signal to process operations based on state, where the
|
||||
# state is "termination signal got" and "no termination signal"
|
||||
|
||||
# State is "no termination signal"
|
||||
sigterm = False
|
||||
sigterm_default_handler = None
|
||||
cls_inited = False
|
||||
@ -54,12 +61,15 @@ class Psu(PsuBase):
|
||||
if cls.sigterm_default_handler:
|
||||
cls.sigterm_default_handler(sig, frame)
|
||||
syslog.syslog(syslog.LOG_INFO, "Canceling PSU platform API calls...")
|
||||
# Changing state to "termination signal"
|
||||
cls.sigterm = True
|
||||
|
||||
@classmethod
|
||||
def __sensors_get(cls, cached=True):
|
||||
cls.__lock.acquire()
|
||||
if time.time() > cls.__timestamp + 15:
|
||||
# Operation may take a few seconds to process, so if state is
|
||||
# "termination signal", plugin doesn't perform this operation
|
||||
if time.time() > cls.__timestamp + 15 and not Psu.sigterm:
|
||||
# Update cache once per 15 seconds
|
||||
try:
|
||||
cls.__sensors_info = get_psu_metrics()
|
||||
@ -83,6 +93,8 @@ class Psu(PsuBase):
|
||||
def psu_info_get(client):
|
||||
return client.pltfm_mgr.pltfm_mgr_pwr_supply_info_get(self.__index)
|
||||
|
||||
# Operation may take a few seconds to process, so if state is
|
||||
# "termination signal", plugin doesn't perform this operation
|
||||
# Update cache once per 2 seconds
|
||||
if self.__ts + 2 < time.time() and not Psu.sigterm:
|
||||
self.__info = None
|
||||
@ -96,6 +108,10 @@ class Psu(PsuBase):
|
||||
return self.__info
|
||||
return self.__info
|
||||
|
||||
@cancel_on_sigterm
|
||||
def get_metric_value(self, metric_name):
|
||||
return get_metric_value(Psu.__sensors_get(), "PSU%d ".format(self.__index) + metric_name)
|
||||
|
||||
@staticmethod
|
||||
def get_num_psus():
|
||||
"""
|
||||
@ -127,7 +143,7 @@ class Psu(PsuBase):
|
||||
A float number, the output voltage in volts,
|
||||
e.g. 12.1
|
||||
"""
|
||||
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Voltage_in1_input" % self.__index)
|
||||
return self.get_metric_value("12V Output Voltage_in1_input")
|
||||
|
||||
def get_current(self):
|
||||
"""
|
||||
@ -136,7 +152,7 @@ class Psu(PsuBase):
|
||||
Returns:
|
||||
A float number, the electric current in amperes, e.g 15.4
|
||||
"""
|
||||
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Current_curr2_input" % self.__index)
|
||||
return self.get_metric_value("12V Output Current_curr2_input")
|
||||
|
||||
def get_input_voltage(self):
|
||||
"""
|
||||
@ -145,7 +161,7 @@ class Psu(PsuBase):
|
||||
A float number, the input voltage in volts,
|
||||
e.g. 220
|
||||
"""
|
||||
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Voltage_in0_input" % self.__index)
|
||||
return self.get_metric_value("Input Voltage_in0_input")
|
||||
|
||||
def get_input_current(self):
|
||||
"""
|
||||
@ -153,7 +169,7 @@ class Psu(PsuBase):
|
||||
Returns:
|
||||
A float number, the electric current in amperes, e.g 0.8
|
||||
"""
|
||||
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Current_curr1_input" % self.__index)
|
||||
return self.get_metric_value("Input Current_curr1_input")
|
||||
|
||||
def get_power(self):
|
||||
"""
|
||||
@ -177,6 +193,9 @@ class Psu(PsuBase):
|
||||
return client.pltfm_mgr.pltfm_mgr_pwr_supply_present_get(self.__index)
|
||||
|
||||
status = False
|
||||
if Psu.sigterm:
|
||||
return status
|
||||
|
||||
try:
|
||||
status = thrift_try(psu_present_get, attempts=1)
|
||||
except Exception as e:
|
||||
@ -267,6 +286,7 @@ class Psu(PsuBase):
|
||||
"""
|
||||
return self.__index
|
||||
|
||||
@cancel_on_sigterm
|
||||
def get_temperature(self):
|
||||
"""
|
||||
Retrieves current temperature reading from PSU
|
||||
@ -274,8 +294,11 @@ class Psu(PsuBase):
|
||||
A float number of current temperature in Celsius up to nearest thousandth
|
||||
of one degree Celsius, e.g. 30.125
|
||||
"""
|
||||
# Operation may take a few seconds to process, so if state is
|
||||
# "termination signal", plugin doesn't perform this operation
|
||||
return self.get_thermal(0).get_temperature()
|
||||
|
||||
@cancel_on_sigterm
|
||||
def get_temperature_high_threshold(self):
|
||||
"""
|
||||
Retrieves the high threshold temperature of PSU
|
||||
@ -283,6 +306,8 @@ class Psu(PsuBase):
|
||||
A float number, the high threshold temperature of PSU in Celsius
|
||||
up to nearest thousandth of one degree Celsius, e.g. 30.125
|
||||
"""
|
||||
# Operation may take a few seconds to process, so if state is
|
||||
# "termination signal", plugin doesn't perform this operation
|
||||
return self.get_thermal(0).get_high_threshold()
|
||||
|
||||
@property
|
||||
|
Loading…
Reference in New Issue
Block a user