[BFN] Update psu.py to process sigterm signal (#13350)

Why I did it
Sometime, SIGTERM processing by psud takes more then default 10sec (please see stopwaitsecs in http://supervisord.org/configuration.html).

Due to this, the following two testcases may fail:

test_pmon_psud_stop_and_start_status
test_pmon_psud_term_and_start_status
How I did it
Update PSU plugin to process sigterm signal so that psud runs faster to end last cycle in time

How to verify it
Run SONiC CTs:
test_pmon_psud_stop_and_start_status
test_pmon_psud_term_and_start_status
This commit is contained in:
Dmytro Lytvynenko 2023-02-06 19:52:28 +02:00 committed by GitHub
parent 8fdbf9dce3
commit 5ff5e98437
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 6 deletions

View File

@ -9,6 +9,8 @@ try:
except ImportError as e:
raise ImportError(str(e) + "- required module not found")
SIGTERM_CAUGHT = False
def file_create(path, mode=None):
"""
Ensure that file is created with the appropriate permissions
@ -38,12 +40,15 @@ def cancel_on_sigterm(func):
def handler(sig, frame):
if sigterm_handler:
sigterm_handler(sig, frame)
global SIGTERM_CAUGHT
SIGTERM_CAUGHT = True
raise Exception("Canceling {}() execution...".format(func.__name__))
sigterm_handler = signal.getsignal(signal.SIGTERM)
signal.signal(signal.SIGTERM, handler)
result = None
try:
if not SIGTERM_CAUGHT:
result = func(*args, **kwargs)
finally:
signal.signal(signal.SIGTERM, sigterm_handler)

View File

@ -28,6 +28,13 @@ class Psu(PsuBase):
__sensors_info = None
__timestamp = 0
# When psud gets termination signal it starts processing last cycle.
# This cycle must be as fast as possible to be able to stop correctly,
# otherwise it will be killed, so the whole plugin must encounter
# this signal to process operations based on state, where the
# state is "termination signal got" and "no termination signal"
# State is "no termination signal"
sigterm = False
sigterm_default_handler = None
cls_inited = False
@ -54,12 +61,15 @@ class Psu(PsuBase):
if cls.sigterm_default_handler:
cls.sigterm_default_handler(sig, frame)
syslog.syslog(syslog.LOG_INFO, "Canceling PSU platform API calls...")
# Changing state to "termination signal"
cls.sigterm = True
@classmethod
def __sensors_get(cls, cached=True):
cls.__lock.acquire()
if time.time() > cls.__timestamp + 15:
# Operation may take a few seconds to process, so if state is
# "termination signal", plugin doesn't perform this operation
if time.time() > cls.__timestamp + 15 and not Psu.sigterm:
# Update cache once per 15 seconds
try:
cls.__sensors_info = get_psu_metrics()
@ -83,6 +93,8 @@ class Psu(PsuBase):
def psu_info_get(client):
return client.pltfm_mgr.pltfm_mgr_pwr_supply_info_get(self.__index)
# Operation may take a few seconds to process, so if state is
# "termination signal", plugin doesn't perform this operation
# Update cache once per 2 seconds
if self.__ts + 2 < time.time() and not Psu.sigterm:
self.__info = None
@ -96,6 +108,10 @@ class Psu(PsuBase):
return self.__info
return self.__info
@cancel_on_sigterm
def get_metric_value(self, metric_name):
return get_metric_value(Psu.__sensors_get(), "PSU%d ".format(self.__index) + metric_name)
@staticmethod
def get_num_psus():
"""
@ -127,7 +143,7 @@ class Psu(PsuBase):
A float number, the output voltage in volts,
e.g. 12.1
"""
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Voltage_in1_input" % self.__index)
return self.get_metric_value("12V Output Voltage_in1_input")
def get_current(self):
"""
@ -136,7 +152,7 @@ class Psu(PsuBase):
Returns:
A float number, the electric current in amperes, e.g 15.4
"""
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Current_curr2_input" % self.__index)
return self.get_metric_value("12V Output Current_curr2_input")
def get_input_voltage(self):
"""
@ -145,7 +161,7 @@ class Psu(PsuBase):
A float number, the input voltage in volts,
e.g. 220
"""
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Voltage_in0_input" % self.__index)
return self.get_metric_value("Input Voltage_in0_input")
def get_input_current(self):
"""
@ -153,7 +169,7 @@ class Psu(PsuBase):
Returns:
A float number, the electric current in amperes, e.g 0.8
"""
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Current_curr1_input" % self.__index)
return self.get_metric_value("Input Current_curr1_input")
def get_power(self):
"""
@ -177,6 +193,9 @@ class Psu(PsuBase):
return client.pltfm_mgr.pltfm_mgr_pwr_supply_present_get(self.__index)
status = False
if Psu.sigterm:
return status
try:
status = thrift_try(psu_present_get, attempts=1)
except Exception as e:
@ -267,6 +286,7 @@ class Psu(PsuBase):
"""
return self.__index
@cancel_on_sigterm
def get_temperature(self):
"""
Retrieves current temperature reading from PSU
@ -274,8 +294,11 @@ class Psu(PsuBase):
A float number of current temperature in Celsius up to nearest thousandth
of one degree Celsius, e.g. 30.125
"""
# Operation may take a few seconds to process, so if state is
# "termination signal", plugin doesn't perform this operation
return self.get_thermal(0).get_temperature()
@cancel_on_sigterm
def get_temperature_high_threshold(self):
"""
Retrieves the high threshold temperature of PSU
@ -283,6 +306,8 @@ class Psu(PsuBase):
A float number, the high threshold temperature of PSU in Celsius
up to nearest thousandth of one degree Celsius, e.g. 30.125
"""
# Operation may take a few seconds to process, so if state is
# "termination signal", plugin doesn't perform this operation
return self.get_thermal(0).get_high_threshold()
@property