[Mellanox] Fix issue: watchdogutil command does not work (#16091)
- Why I did it watchdogutil uses platform API watchdog instance to control/query watchdog status. In Nvidia watchdog status, it caches "armed" status in a object member "WatchdogImplBase.armed". This is not working for CLI infrastructure because each CLI will create a new watchdog instance, the status cached in previous instance will totally lose. Consider following commands: admin@sonic:~$ sudo watchdogutil arm -s 100 =====> watchdog instance1, armed=True Watchdog armed for 100 seconds admin@sonic:~$ sudo watchdogutil status ======> watchdog instance2, armed=False Status: Unarmed admin@sonic:~$ sudo watchdogutil disarm =======> watchdog instance3, armed=False Failed to disarm Watchdog - How I did it Use sysfs to query watchdog status - How to verify it Manual test Unit test
This commit is contained in:
parent
d42066cf8d
commit
95f317a5e2
@ -27,6 +27,7 @@ import array
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from sonic_platform_base.watchdog_base import WatchdogBase
|
from sonic_platform_base.watchdog_base import WatchdogBase
|
||||||
|
from . import utils
|
||||||
|
|
||||||
""" ioctl constants """
|
""" ioctl constants """
|
||||||
IO_WRITE = 0x40000000
|
IO_WRITE = 0x40000000
|
||||||
@ -80,16 +81,15 @@ class WatchdogImplBase(WatchdogBase):
|
|||||||
super(WatchdogImplBase, self).__init__()
|
super(WatchdogImplBase, self).__init__()
|
||||||
|
|
||||||
self.watchdog_path = wd_device_path
|
self.watchdog_path = wd_device_path
|
||||||
self.watchdog = self.open_handle()
|
self._watchdog = None
|
||||||
|
|
||||||
# Opening a watchdog descriptor starts
|
|
||||||
# watchdog timer;
|
|
||||||
# by default it should be stopped
|
|
||||||
self._disablecard()
|
|
||||||
self.armed = False
|
|
||||||
|
|
||||||
self.timeout = self._gettimeout()
|
self.timeout = self._gettimeout()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def watchdog(self):
|
||||||
|
if self._watchdog is None:
|
||||||
|
self._watchdog = self.open_handle()
|
||||||
|
return self._watchdog
|
||||||
|
|
||||||
def open_handle(self):
|
def open_handle(self):
|
||||||
return os.open(self.watchdog_path, os.O_WRONLY)
|
return os.open(self.watchdog_path, os.O_WRONLY)
|
||||||
|
|
||||||
@ -134,10 +134,7 @@ class WatchdogImplBase(WatchdogBase):
|
|||||||
@return watchdog timeout
|
@return watchdog timeout
|
||||||
"""
|
"""
|
||||||
|
|
||||||
req = array.array('I', [0])
|
return utils.read_int_from_file('/run/hw-management/watchdog/main/timeout')
|
||||||
fcntl.ioctl(self.watchdog, WDIOC_GETTIMEOUT, req, True)
|
|
||||||
|
|
||||||
return int(req[0])
|
|
||||||
|
|
||||||
def _gettimeleft(self):
|
def _gettimeleft(self):
|
||||||
"""
|
"""
|
||||||
@ -145,10 +142,7 @@ class WatchdogImplBase(WatchdogBase):
|
|||||||
@return time left in seconds
|
@return time left in seconds
|
||||||
"""
|
"""
|
||||||
|
|
||||||
req = array.array('I', [0])
|
return utils.read_int_from_file('/run/hw-management/watchdog/main/timeleft')
|
||||||
fcntl.ioctl(self.watchdog, WDIOC_GETTIMELEFT, req, True)
|
|
||||||
|
|
||||||
return int(req[0])
|
|
||||||
|
|
||||||
def arm(self, seconds):
|
def arm(self, seconds):
|
||||||
"""
|
"""
|
||||||
@ -162,11 +156,10 @@ class WatchdogImplBase(WatchdogBase):
|
|||||||
try:
|
try:
|
||||||
if self.timeout != seconds:
|
if self.timeout != seconds:
|
||||||
self.timeout = self._settimeout(seconds)
|
self.timeout = self._settimeout(seconds)
|
||||||
if self.armed:
|
if self.is_armed():
|
||||||
self._keepalive()
|
self._keepalive()
|
||||||
else:
|
else:
|
||||||
self._enablecard()
|
self._enablecard()
|
||||||
self.armed = True
|
|
||||||
ret = self.timeout
|
ret = self.timeout
|
||||||
except IOError:
|
except IOError:
|
||||||
pass
|
pass
|
||||||
@ -179,10 +172,9 @@ class WatchdogImplBase(WatchdogBase):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
disarmed = False
|
disarmed = False
|
||||||
if self.armed:
|
if self.is_armed():
|
||||||
try:
|
try:
|
||||||
self._disablecard()
|
self._disablecard()
|
||||||
self.armed = False
|
|
||||||
disarmed = True
|
disarmed = True
|
||||||
except IOError:
|
except IOError:
|
||||||
pass
|
pass
|
||||||
@ -194,7 +186,7 @@ class WatchdogImplBase(WatchdogBase):
|
|||||||
Implements is_armed WatchdogBase API
|
Implements is_armed WatchdogBase API
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.armed
|
return utils.read_str_from_file('/run/hw-management/watchdog/main/state') == 'active'
|
||||||
|
|
||||||
def get_remaining_time(self):
|
def get_remaining_time(self):
|
||||||
"""
|
"""
|
||||||
@ -203,7 +195,7 @@ class WatchdogImplBase(WatchdogBase):
|
|||||||
|
|
||||||
timeleft = WD_COMMON_ERROR
|
timeleft = WD_COMMON_ERROR
|
||||||
|
|
||||||
if self.armed:
|
if self.is_armed():
|
||||||
try:
|
try:
|
||||||
timeleft = self._gettimeleft()
|
timeleft = self._gettimeleft()
|
||||||
except IOError:
|
except IOError:
|
||||||
@ -216,13 +208,15 @@ class WatchdogImplBase(WatchdogBase):
|
|||||||
Close watchdog
|
Close watchdog
|
||||||
"""
|
"""
|
||||||
|
|
||||||
os.close(self.watchdog)
|
if self._watchdog is not None:
|
||||||
|
os.close(self._watchdog)
|
||||||
|
|
||||||
|
|
||||||
class WatchdogType1(WatchdogImplBase):
|
class WatchdogType1(WatchdogImplBase):
|
||||||
"""
|
"""
|
||||||
Watchdog type 1
|
Watchdog type 1
|
||||||
"""
|
"""
|
||||||
|
TIMESTAMP_FILE = '/tmp/nvidia/watchdog_timestamp'
|
||||||
|
|
||||||
def arm(self, seconds):
|
def arm(self, seconds):
|
||||||
"""
|
"""
|
||||||
@ -233,7 +227,8 @@ class WatchdogType1(WatchdogImplBase):
|
|||||||
ret = WatchdogImplBase.arm(self, seconds)
|
ret = WatchdogImplBase.arm(self, seconds)
|
||||||
# Save the watchdog arm timestamp
|
# Save the watchdog arm timestamp
|
||||||
# requiered for get_remaining_time()
|
# requiered for get_remaining_time()
|
||||||
self.arm_timestamp = time.time()
|
os.makedirs('/tmp/nvidia', exist_ok=True)
|
||||||
|
utils.write_file(self.TIMESTAMP_FILE, str(time.time()))
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@ -246,8 +241,9 @@ class WatchdogType1(WatchdogImplBase):
|
|||||||
|
|
||||||
timeleft = WD_COMMON_ERROR
|
timeleft = WD_COMMON_ERROR
|
||||||
|
|
||||||
if self.armed:
|
if self.is_armed():
|
||||||
timeleft = int(self.timeout - (time.time() - self.arm_timestamp))
|
arm_timestamp = utils.read_float_from_file(self.TIMESTAMP_FILE)
|
||||||
|
timeleft = int(self.timeout - (time.time() - arm_timestamp))
|
||||||
|
|
||||||
return timeleft
|
return timeleft
|
||||||
|
|
||||||
|
@ -84,46 +84,58 @@ class TestWatchdog:
|
|||||||
mock_exists.return_value = test_para
|
mock_exists.return_value = test_para
|
||||||
assert is_wd_type2('') is test_para
|
assert is_wd_type2('') is test_para
|
||||||
|
|
||||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
@mock.patch('sonic_platform.utils.read_str_from_file')
|
||||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
def test_is_armed(self, mock_read):
|
||||||
def test_arm_disarm_watchdog2(self):
|
|
||||||
watchdog = WatchdogType2('watchdog2')
|
watchdog = WatchdogType2('watchdog2')
|
||||||
assert watchdog.arm(-1) == -1
|
mock_read.return_value = 'inactive'
|
||||||
assert not watchdog.is_armed()
|
assert not watchdog.is_armed()
|
||||||
watchdog.arm(10)
|
mock_read.return_value = 'active'
|
||||||
assert watchdog.is_armed()
|
assert watchdog.is_armed()
|
||||||
watchdog.arm(5)
|
|
||||||
assert watchdog.is_armed()
|
|
||||||
watchdog.disarm()
|
|
||||||
assert not watchdog.is_armed()
|
|
||||||
|
|
||||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||||
def test_arm_disarm_watchdog1(self):
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.is_armed')
|
||||||
|
def test_arm_disarm_watchdog2(self, mock_is_armed):
|
||||||
|
watchdog = WatchdogType2('watchdog2')
|
||||||
|
assert watchdog.arm(-1) == -1
|
||||||
|
mock_is_armed.return_value = False
|
||||||
|
watchdog.arm(10)
|
||||||
|
mock_is_armed.return_value = True
|
||||||
|
watchdog.arm(5)
|
||||||
|
watchdog.disarm()
|
||||||
|
|
||||||
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||||
|
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||||
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.is_armed')
|
||||||
|
def test_arm_disarm_watchdog1(self, mock_is_armed):
|
||||||
watchdog = WatchdogType1('watchdog1')
|
watchdog = WatchdogType1('watchdog1')
|
||||||
assert watchdog.arm(-1) == -1
|
assert watchdog.arm(-1) == -1
|
||||||
assert not watchdog.is_armed()
|
mock_is_armed.return_value = False
|
||||||
watchdog.arm(10)
|
watchdog.arm(10)
|
||||||
assert watchdog.is_armed()
|
mock_is_armed.return_value = True
|
||||||
watchdog.arm(5)
|
watchdog.arm(5)
|
||||||
assert watchdog.is_armed()
|
|
||||||
watchdog.disarm()
|
watchdog.disarm()
|
||||||
assert not watchdog.is_armed()
|
|
||||||
|
|
||||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase._gettimeleft', mock.MagicMock(return_value=10))
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase._gettimeleft', mock.MagicMock(return_value=10))
|
||||||
def test_get_remaining_time_watchdog2(self):
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.is_armed')
|
||||||
|
def test_get_remaining_time_watchdog2(self, mock_is_armed):
|
||||||
watchdog = WatchdogType2('watchdog2')
|
watchdog = WatchdogType2('watchdog2')
|
||||||
|
mock_is_armed.return_value = False
|
||||||
assert watchdog.get_remaining_time() == -1
|
assert watchdog.get_remaining_time() == -1
|
||||||
watchdog.arm(10)
|
watchdog.arm(10)
|
||||||
|
mock_is_armed.return_value = True
|
||||||
assert watchdog.get_remaining_time() == 10
|
assert watchdog.get_remaining_time() == 10
|
||||||
|
|
||||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase._gettimeleft', mock.MagicMock(return_value=10))
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase._gettimeleft', mock.MagicMock(return_value=10))
|
||||||
def test_get_remaining_time_watchdog1(self):
|
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.is_armed')
|
||||||
|
def test_get_remaining_time_watchdog1(self, mock_is_armed):
|
||||||
watchdog = WatchdogType1('watchdog2')
|
watchdog = WatchdogType1('watchdog2')
|
||||||
|
mock_is_armed.return_value = False
|
||||||
assert watchdog.get_remaining_time() == -1
|
assert watchdog.get_remaining_time() == -1
|
||||||
watchdog.arm(10)
|
watchdog.arm(10)
|
||||||
|
mock_is_armed.return_value = True
|
||||||
assert watchdog.get_remaining_time() > 0
|
assert watchdog.get_remaining_time() > 0
|
||||||
|
Reference in New Issue
Block a user