From 03804ad8d29eb72f22679c25d33a27478aab3542 Mon Sep 17 00:00:00 2001 From: Tomer Shalvi <116184476+tshalvi@users.noreply.github.com> Date: Tue, 17 Jan 2023 18:43:49 +0200 Subject: [PATCH] Moving multiprocessing.Manager to the correct sub-process (#13377) Why I did it There is a queue in sysmonitor.py that is created based on an object of multiprocessing.Manager. After performing fast-reboot, system health monitor is being shut down, what causes this Manager to be shut down as well, since it is a child-process of healthd. That's why I moved the creation of this Manager from the top of the file to the function Sysmonitor.system_service() (The only place it is used), to make Manager a child-process of Sysmonitor, instead of Healthd. This way both the queue (the Manager) and the processes that uses this queue will be child-processes of the same process, and the problematic scenario of sysmonitor sending messages to a dead queue will not be possible. How I did it Removed the definition of manager as global and moved it to system_service() function How to verify it Perform a fast reboot and verify the traceback issue is fixed --- src/system-health/health_checker/sysmonitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/system-health/health_checker/sysmonitor.py b/src/system-health/health_checker/sysmonitor.py index e69d289fc5..dde0b73d1b 100755 --- a/src/system-health/health_checker/sysmonitor.py +++ b/src/system-health/health_checker/sysmonitor.py @@ -19,7 +19,6 @@ spl_srv_list = ['database-chassis', 'gbsyncd'] SELECT_TIMEOUT_MSECS = 1000 QUEUE_TIMEOUT = 15 TASK_STOP_TIMEOUT = 10 -mpmgr = multiprocessing.Manager() logger = Logger(log_identifier=SYSLOG_IDENTIFIER) @@ -420,6 +419,7 @@ class Sysmonitor(ProcessTaskBase): self.state_db = swsscommon.SonicV2Connector(host='127.0.0.1') self.state_db.connect(self.state_db.STATE_DB) + mpmgr = multiprocessing.Manager() myQ = mpmgr.Queue() try: monitor_system_bus = MonitorSystemBusTask(myQ)