[BFN] Update psu.py to process sigterm signal (#13350)

Why I did it
Sometime, SIGTERM processing by psud takes more then default 10sec (please see stopwaitsecs in http://supervisord.org/configuration.html).

Due to this, the following two testcases may fail:

test_pmon_psud_stop_and_start_status
test_pmon_psud_term_and_start_status
How I did it
Update PSU plugin to process sigterm signal so that psud runs faster to end last cycle in time

How to verify it
Run SONiC CTs:
test_pmon_psud_stop_and_start_status
test_pmon_psud_term_and_start_status
This commit is contained in:
Dmytro Lytvynenko 2023-02-06 19:52:28 +02:00 committed by GitHub
parent 8fdbf9dce3
commit 5ff5e98437
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 6 deletions

View File

@ -9,6 +9,8 @@ try:
except ImportError as e: except ImportError as e:
raise ImportError(str(e) + "- required module not found") raise ImportError(str(e) + "- required module not found")
SIGTERM_CAUGHT = False
def file_create(path, mode=None): def file_create(path, mode=None):
""" """
Ensure that file is created with the appropriate permissions Ensure that file is created with the appropriate permissions
@ -38,13 +40,16 @@ def cancel_on_sigterm(func):
def handler(sig, frame): def handler(sig, frame):
if sigterm_handler: if sigterm_handler:
sigterm_handler(sig, frame) sigterm_handler(sig, frame)
global SIGTERM_CAUGHT
SIGTERM_CAUGHT = True
raise Exception("Canceling {}() execution...".format(func.__name__)) raise Exception("Canceling {}() execution...".format(func.__name__))
sigterm_handler = signal.getsignal(signal.SIGTERM) sigterm_handler = signal.getsignal(signal.SIGTERM)
signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGTERM, handler)
result = None result = None
try: try:
result = func(*args, **kwargs) if not SIGTERM_CAUGHT:
result = func(*args, **kwargs)
finally: finally:
signal.signal(signal.SIGTERM, sigterm_handler) signal.signal(signal.SIGTERM, sigterm_handler)
return result return result

View File

@ -28,6 +28,13 @@ class Psu(PsuBase):
__sensors_info = None __sensors_info = None
__timestamp = 0 __timestamp = 0
# When psud gets termination signal it starts processing last cycle.
# This cycle must be as fast as possible to be able to stop correctly,
# otherwise it will be killed, so the whole plugin must encounter
# this signal to process operations based on state, where the
# state is "termination signal got" and "no termination signal"
# State is "no termination signal"
sigterm = False sigterm = False
sigterm_default_handler = None sigterm_default_handler = None
cls_inited = False cls_inited = False
@ -54,12 +61,15 @@ class Psu(PsuBase):
if cls.sigterm_default_handler: if cls.sigterm_default_handler:
cls.sigterm_default_handler(sig, frame) cls.sigterm_default_handler(sig, frame)
syslog.syslog(syslog.LOG_INFO, "Canceling PSU platform API calls...") syslog.syslog(syslog.LOG_INFO, "Canceling PSU platform API calls...")
# Changing state to "termination signal"
cls.sigterm = True cls.sigterm = True
@classmethod @classmethod
def __sensors_get(cls, cached=True): def __sensors_get(cls, cached=True):
cls.__lock.acquire() cls.__lock.acquire()
if time.time() > cls.__timestamp + 15: # Operation may take a few seconds to process, so if state is
# "termination signal", plugin doesn't perform this operation
if time.time() > cls.__timestamp + 15 and not Psu.sigterm:
# Update cache once per 15 seconds # Update cache once per 15 seconds
try: try:
cls.__sensors_info = get_psu_metrics() cls.__sensors_info = get_psu_metrics()
@ -83,6 +93,8 @@ class Psu(PsuBase):
def psu_info_get(client): def psu_info_get(client):
return client.pltfm_mgr.pltfm_mgr_pwr_supply_info_get(self.__index) return client.pltfm_mgr.pltfm_mgr_pwr_supply_info_get(self.__index)
# Operation may take a few seconds to process, so if state is
# "termination signal", plugin doesn't perform this operation
# Update cache once per 2 seconds # Update cache once per 2 seconds
if self.__ts + 2 < time.time() and not Psu.sigterm: if self.__ts + 2 < time.time() and not Psu.sigterm:
self.__info = None self.__info = None
@ -96,6 +108,10 @@ class Psu(PsuBase):
return self.__info return self.__info
return self.__info return self.__info
@cancel_on_sigterm
def get_metric_value(self, metric_name):
return get_metric_value(Psu.__sensors_get(), "PSU%d ".format(self.__index) + metric_name)
@staticmethod @staticmethod
def get_num_psus(): def get_num_psus():
""" """
@ -127,7 +143,7 @@ class Psu(PsuBase):
A float number, the output voltage in volts, A float number, the output voltage in volts,
e.g. 12.1 e.g. 12.1
""" """
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Voltage_in1_input" % self.__index) return self.get_metric_value("12V Output Voltage_in1_input")
def get_current(self): def get_current(self):
""" """
@ -136,7 +152,7 @@ class Psu(PsuBase):
Returns: Returns:
A float number, the electric current in amperes, e.g 15.4 A float number, the electric current in amperes, e.g 15.4
""" """
return get_metric_value(Psu.__sensors_get(), "PSU%d 12V Output Current_curr2_input" % self.__index) return self.get_metric_value("12V Output Current_curr2_input")
def get_input_voltage(self): def get_input_voltage(self):
""" """
@ -145,7 +161,7 @@ class Psu(PsuBase):
A float number, the input voltage in volts, A float number, the input voltage in volts,
e.g. 220 e.g. 220
""" """
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Voltage_in0_input" % self.__index) return self.get_metric_value("Input Voltage_in0_input")
def get_input_current(self): def get_input_current(self):
""" """
@ -153,7 +169,7 @@ class Psu(PsuBase):
Returns: Returns:
A float number, the electric current in amperes, e.g 0.8 A float number, the electric current in amperes, e.g 0.8
""" """
return get_metric_value(Psu.__sensors_get(), "PSU%d Input Current_curr1_input" % self.__index) return self.get_metric_value("Input Current_curr1_input")
def get_power(self): def get_power(self):
""" """
@ -177,6 +193,9 @@ class Psu(PsuBase):
return client.pltfm_mgr.pltfm_mgr_pwr_supply_present_get(self.__index) return client.pltfm_mgr.pltfm_mgr_pwr_supply_present_get(self.__index)
status = False status = False
if Psu.sigterm:
return status
try: try:
status = thrift_try(psu_present_get, attempts=1) status = thrift_try(psu_present_get, attempts=1)
except Exception as e: except Exception as e:
@ -267,6 +286,7 @@ class Psu(PsuBase):
""" """
return self.__index return self.__index
@cancel_on_sigterm
def get_temperature(self): def get_temperature(self):
""" """
Retrieves current temperature reading from PSU Retrieves current temperature reading from PSU
@ -274,8 +294,11 @@ class Psu(PsuBase):
A float number of current temperature in Celsius up to nearest thousandth A float number of current temperature in Celsius up to nearest thousandth
of one degree Celsius, e.g. 30.125 of one degree Celsius, e.g. 30.125
""" """
# Operation may take a few seconds to process, so if state is
# "termination signal", plugin doesn't perform this operation
return self.get_thermal(0).get_temperature() return self.get_thermal(0).get_temperature()
@cancel_on_sigterm
def get_temperature_high_threshold(self): def get_temperature_high_threshold(self):
""" """
Retrieves the high threshold temperature of PSU Retrieves the high threshold temperature of PSU
@ -283,6 +306,8 @@ class Psu(PsuBase):
A float number, the high threshold temperature of PSU in Celsius A float number, the high threshold temperature of PSU in Celsius
up to nearest thousandth of one degree Celsius, e.g. 30.125 up to nearest thousandth of one degree Celsius, e.g. 30.125
""" """
# Operation may take a few seconds to process, so if state is
# "termination signal", plugin doesn't perform this operation
return self.get_thermal(0).get_high_threshold() return self.get_thermal(0).get_high_threshold()
@property @property