[supervisor] Add patch to prevent 'supervisorctl start' command from hanging if system time has rolled backward (#1311)

* Add patch to prevent 'supervisorctl start' command from hanging if system time has rolled backward

* Also add unit tests for clock rollback scenarios
This commit is contained in:
Joe LeVeque 2018-01-18 11:43:57 -08:00 committed by GitHub
parent 134707f822
commit 0fa64cc618
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 285 additions and 3 deletions

1
.gitignore vendored
View File

@ -35,6 +35,7 @@ src/sonic-device-data/src/device/
src/sonic-device-data/src/debian/
src/supervisor/*
!src/supervisor/Makefile
!src/supervisor/patch/
src/thrift/*
!src/thrift/Makefile

View File

@ -1,5 +1,9 @@
# supervisor package
SUPERVISOR = python-supervisor_3.3.2-1_all.deb
SUPERVISOR_VERSION = 3.3.2
export SUPERVISOR_VERSION
SUPERVISOR = python-supervisor_$(SUPERVISOR_VERSION)-1_all.deb
$(SUPERVISOR)_SRC_PATH = $(SRC_PATH)/supervisor
SONIC_MAKE_DEBS += $(SUPERVISOR)

View File

@ -2,7 +2,7 @@
SHELL = /bin/bash
.SHELLFLAGS += -e
MAIN_TARGET = python-supervisor_3.3.2-1_all.deb
MAIN_TARGET = python-supervisor_$(SUPERVISOR_VERSION)-1_all.deb
$(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% :
# Remove any stale files
@ -11,7 +11,15 @@ $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% :
# Clone supervisor repo
git clone https://github.com/Supervisor/supervisor.git
pushd ./supervisor
git checkout -f 3.3.2
# Reset HEAD to the commit of the proper tag
# NOTE: Using "git checkout <tag_name>" here detaches our HEAD,
# which stg doesn't like, so we use this method instead
git reset --hard $(SUPERVISOR_VERSION)
# Apply patches
stg init
stg import -s ../patch/series
# Build Python and Debian package
python setup.py --command-packages=stdeb.command bdist_deb

View File

@ -0,0 +1,83 @@
From bac496af4c2051b3c15038e06533f9094b1c3604 Mon Sep 17 00:00:00 2001
From: Joe LeVeque <jolevequ@microsoft.com>
Date: Tue, 16 Jan 2018 20:42:13 +0000
Subject: [PATCH 1/2] Prevent excessive hanging/delays in process state
transitions if the system time rolls backward
---
supervisor/process.py | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/supervisor/process.py b/supervisor/process.py
index f9ddcd9..216372f 100644
--- a/supervisor/process.py
+++ b/supervisor/process.py
@@ -357,6 +357,12 @@ class Subprocess:
""" Log a 'waiting for x to stop' message with throttling. """
if self.state == ProcessStates.STOPPING:
now = time.time()
+
+ # If the system clock has moved backward, reset
+ # self.laststopreport to current system time
+ if now < self.laststopreport:
+ self.laststopreport = now;
+
if now > (self.laststopreport + 2): # every 2 seconds
self.config.options.logger.info(
'waiting for %s to stop' % self.config.name)
@@ -604,12 +610,27 @@ class Subprocess:
# STOPPED -> STARTING
self.spawn()
elif state == ProcessStates.BACKOFF:
+ if self.delay > 0 and now < self.delay - self.backoff:
+ # The system clock appears to have moved backward
+ # Reset self.delay accordingly
+ self.delay = now + self.backoff
+
if self.backoff <= self.config.startretries:
if now > self.delay:
# BACKOFF -> STARTING
self.spawn()
if state == ProcessStates.STARTING:
+ if now < self.laststart:
+ # The system clock appears to have moved backward
+ # Reset self.laststart to current system time
+ self.laststart = now;
+
+ if self.delay > 0 and now < self.delay - self.config.startsecs:
+ # The system clock appears to have moved backward
+ # Reset self.delay accordingly
+ self.delay = now + self.config.startsecs
+
if now - self.laststart > self.config.startsecs:
# STARTING -> RUNNING if the proc has started
# successfully and it has stayed up for at least
@@ -633,6 +654,11 @@ class Subprocess:
logger.info('gave up: %s %s' % (self.config.name, msg))
elif state == ProcessStates.STOPPING:
+ if self.delay > 0 and now < self.delay - self.config.stopwaitsecs:
+ # The system clock appears to have moved backward
+ # Reset self.delay accordingly
+ self.delay = now + self.config.stopwaitsecs
+
time_left = self.delay - now
if time_left <= 0:
# kill processes which are taking too long to stop with a final
@@ -805,6 +831,12 @@ class EventListenerPool(ProcessGroupBase):
if dispatch_capable:
if self.dispatch_throttle:
now = time.time()
+
+ # If the system clock has moved backward, reset
+ # self.last_dispatch to current system time
+ if now < self.last_dispatch:
+ self.last_dispatch = now;
+
if now - self.last_dispatch < self.dispatch_throttle:
return
self.dispatch()
--
2.1.4

View File

@ -0,0 +1,184 @@
From 43b6f3ee93593f666af2c6a7af897bf3b5ee762d Mon Sep 17 00:00:00 2001
From: Joe LeVeque <jolevequ@microsoft.com>
Date: Wed, 17 Jan 2018 22:27:56 +0000
Subject: [PATCH 2/2] Add unit test cases to ensure state transitions work if
system time has rolled backward
---
supervisor/tests/test_process.py | 146 +++++++++++++++++++++++++++++++++++++++
1 file changed, 146 insertions(+)
diff --git a/supervisor/tests/test_process.py b/supervisor/tests/test_process.py
index 7f9bab2..ea16315 100644
--- a/supervisor/tests/test_process.py
+++ b/supervisor/tests/test_process.py
@@ -736,6 +736,40 @@ class SubprocessTests(unittest.TestCase):
instance.stop_report()
self.assertEqual(len(options.logger.data), 1) # throttled
+ def test_stop_report_laststopreport_in_future(self):
+ future_time = time.time() + 3600 # 1 hour into the future
+ options = DummyOptions()
+ config = DummyPConfig(options, 'test', '/test')
+ instance = self._makeOne(config)
+ instance.pid = 11
+ dispatcher = DummyDispatcher(writable=True)
+ instance.dispatchers = {'foo':dispatcher}
+ from supervisor.states import ProcessStates
+ instance.state = ProcessStates.STOPPING
+ instance.laststopreport = future_time
+
+ # This iteration of stop_report() should reset instance.laststopreport
+ # to the current time
+ instance.stop_report()
+
+ # No logging should have taken place
+ self.assertEqual(len(options.logger.data), 0)
+
+ # Ensure instance.laststopreport has rolled backward
+ self.assertTrue(instance.laststopreport < future_time)
+
+ # Sleep for 2 seconds
+ time.sleep(2)
+
+ # This iteration of stop_report() should actaully trigger the report
+ instance.stop_report()
+
+ self.assertEqual(len(options.logger.data), 1)
+ self.assertEqual(options.logger.data[0], 'waiting for test to stop')
+ self.assertNotEqual(instance.laststopreport, 0)
+ instance.stop_report()
+ self.assertEqual(len(options.logger.data), 1) # throttled
+
def test_give_up(self):
options = DummyOptions()
config = DummyPConfig(options, 'test', '/test')
@@ -1402,6 +1436,92 @@ class SubprocessTests(unittest.TestCase):
event = L[0]
self.assertEqual(event.__class__, events.ProcessStateRunningEvent)
+ def test_transition_starting_to_running_laststart_in_future(self):
+ from supervisor import events
+ L = []
+ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x))
+ from supervisor.states import ProcessStates
+
+ future_time = time.time() + 3600 # 1 hour into the future
+ options = DummyOptions()
+ test_startsecs = 2
+
+ # this should go from STARTING to RUNNING via transition()
+ pconfig = DummyPConfig(options, 'process', 'process','/bin/process',
+ startsecs=test_startsecs)
+ process = self._makeOne(pconfig)
+ process.backoff = 1
+ process.delay = 1
+ process.system_stop = False
+ process.laststart = future_time
+ process.pid = 1
+ process.stdout_buffer = 'abc'
+ process.stderr_buffer = 'def'
+ process.state = ProcessStates.STARTING
+
+ # This iteration of transition() should reset process.laststart
+ # to the current time
+ process.transition()
+
+ # Process state should still be STARTING
+ self.assertEqual(process.state, ProcessStates.STARTING)
+
+ # Ensure process.laststart has rolled backward
+ self.assertTrue(process.laststart < future_time)
+
+ # Sleep for (startsecs + 1)
+ time.sleep(test_startsecs + 1)
+
+ # This iteration of transition() should actaully trigger the state
+ # transition to RUNNING
+ process.transition()
+
+ # this implies RUNNING
+ self.assertEqual(process.backoff, 0)
+ self.assertEqual(process.delay, 0)
+ self.assertFalse(process.system_stop)
+ self.assertEqual(process.state, ProcessStates.RUNNING)
+ self.assertEqual(options.logger.data[0],
+ 'success: process entered RUNNING state, process has '
+ 'stayed up for > than {} seconds (startsecs)'.format(test_startsecs))
+ self.assertEqual(len(L), 1)
+ event = L[0]
+ self.assertEqual(event.__class__, events.ProcessStateRunningEvent)
+
+ def test_transition_backoff_to_starting_delay_in_future(self):
+ from supervisor import events
+ L = []
+ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x))
+ from supervisor.states import ProcessStates, SupervisorStates
+
+ future_time = time.time() + 3600 # 1 hour into the future
+ options = DummyOptions()
+
+ pconfig = DummyPConfig(options, 'process', 'process','/bin/process')
+ process = self._makeOne(pconfig)
+ process.laststart = 1
+ process.delay = future_time
+ process.backoff = 0
+ process.state = ProcessStates.BACKOFF
+
+ # This iteration of transition() should reset process.delay
+ # to the current time
+ process.transition()
+
+ # Process state should still be BACKOFF
+ self.assertEqual(process.state, ProcessStates.BACKOFF)
+
+ # Ensure process.delay has rolled backward
+ self.assertTrue(process.delay < future_time)
+
+ # This iteration of transition() should actaully trigger the state
+ # transition to STARTING
+ process.transition()
+
+ self.assertEqual(process.state, ProcessStates.STARTING)
+ self.assertEqual(len(L), 1)
+ self.assertEqual(L[0].__class__, events.ProcessStateStartingEvent)
+
def test_transition_backoff_to_fatal(self):
from supervisor import events
L = []
@@ -2033,6 +2153,32 @@ class EventListenerPoolTests(ProcessGroupBaseTests):
self.assertEqual(process1.listener_state, EventListenerStates.BUSY)
self.assertEqual(process1.event, event)
+ def test_transition_event_proc_running_with_dispatch_throttle_last_dispatch_in_future(self):
+ future_time = time.time() + 3600 # 1 hour into the future
+ options = DummyOptions()
+ from supervisor.states import ProcessStates
+ pconfig1 = DummyPConfig(options, 'process1', 'process1','/bin/process1')
+ process1 = DummyProcess(pconfig1, state=ProcessStates.RUNNING)
+ gconfig = DummyPGroupConfig(options, pconfigs=[pconfig1])
+ pool = self._makeOne(gconfig)
+ pool.dispatch_throttle = 5
+ pool.last_dispatch = future_time
+ pool.processes = {'process1': process1}
+ event = DummyEvent()
+ from supervisor.states import EventListenerStates
+ process1.listener_state = EventListenerStates.READY
+ class DummyGroup:
+ config = gconfig
+ process1.group = DummyGroup
+ pool._acceptEvent(event)
+ pool.transition()
+
+ self.assertEqual(process1.transitioned, True)
+ self.assertEqual(pool.event_buffer, [event]) # not popped
+
+ # Ensure pool.last_dispatch has been rolled backward
+ self.assertTrue(pool.last_dispatch < future_time)
+
def test__dispatchEvent_notready(self):
options = DummyOptions()
from supervisor.states import ProcessStates
--
2.1.4

View File

@ -0,0 +1,2 @@
0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch
0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch