- Why I did it watchdogutil uses platform API watchdog instance to control/query watchdog status. In Nvidia watchdog status, it caches "armed" status in a object member "WatchdogImplBase.armed". This is not working for CLI infrastructure because each CLI will create a new watchdog instance, the status cached in previous instance will totally lose. Consider following commands: admin@sonic:~$ sudo watchdogutil arm -s 100 =====> watchdog instance1, armed=True Watchdog armed for 100 seconds admin@sonic:~$ sudo watchdogutil status ======> watchdog instance2, armed=False Status: Unarmed admin@sonic:~$ sudo watchdogutil disarm =======> watchdog instance3, armed=False Failed to disarm Watchdog - How I did it Use sysfs to query watchdog status - How to verify it Manual test Unit test Conflicts: platform/mellanox/mlnx-platform-api/sonic_platform/watchdog.py platform/mellanox/mlnx-platform-api/tests/test_watchdog.py
This commit is contained in:
parent
611449dc88
commit
d19d904f6a
@ -27,6 +27,7 @@ import array
|
||||
import time
|
||||
|
||||
from sonic_platform_base.watchdog_base import WatchdogBase
|
||||
from . import utils
|
||||
|
||||
""" ioctl constants """
|
||||
IO_WRITE = 0x40000000
|
||||
@ -80,16 +81,18 @@ class WatchdogImplBase(WatchdogBase):
|
||||
super(WatchdogImplBase, self).__init__()
|
||||
|
||||
self.watchdog_path = wd_device_path
|
||||
self.watchdog = os.open(self.watchdog_path, os.O_WRONLY)
|
||||
|
||||
# Opening a watchdog descriptor starts
|
||||
# watchdog timer;
|
||||
# by default it should be stopped
|
||||
self._disablecard()
|
||||
self.armed = False
|
||||
|
||||
self._watchdog = None
|
||||
self.timeout = self._gettimeout()
|
||||
|
||||
@property
|
||||
def watchdog(self):
|
||||
if self._watchdog is None:
|
||||
self._watchdog = self.open_handle()
|
||||
return self._watchdog
|
||||
|
||||
def open_handle(self):
|
||||
return os.open(self.watchdog_path, os.O_WRONLY)
|
||||
|
||||
def _enablecard(self):
|
||||
"""
|
||||
Turn on the watchdog timer
|
||||
@ -131,10 +134,7 @@ class WatchdogImplBase(WatchdogBase):
|
||||
@return watchdog timeout
|
||||
"""
|
||||
|
||||
req = array.array('I', [0])
|
||||
fcntl.ioctl(self.watchdog, WDIOC_GETTIMEOUT, req, True)
|
||||
|
||||
return int(req[0])
|
||||
return utils.read_int_from_file('/run/hw-management/watchdog/main/timeout')
|
||||
|
||||
def _gettimeleft(self):
|
||||
"""
|
||||
@ -142,10 +142,7 @@ class WatchdogImplBase(WatchdogBase):
|
||||
@return time left in seconds
|
||||
"""
|
||||
|
||||
req = array.array('I', [0])
|
||||
fcntl.ioctl(self.watchdog, WDIOC_GETTIMELEFT, req, True)
|
||||
|
||||
return int(req[0])
|
||||
return utils.read_int_from_file('/run/hw-management/watchdog/main/timeleft')
|
||||
|
||||
def arm(self, seconds):
|
||||
"""
|
||||
@ -159,11 +156,10 @@ class WatchdogImplBase(WatchdogBase):
|
||||
try:
|
||||
if self.timeout != seconds:
|
||||
self.timeout = self._settimeout(seconds)
|
||||
if self.armed:
|
||||
if self.is_armed():
|
||||
self._keepalive()
|
||||
else:
|
||||
self._enablecard()
|
||||
self.armed = True
|
||||
ret = self.timeout
|
||||
except IOError:
|
||||
pass
|
||||
@ -176,10 +172,9 @@ class WatchdogImplBase(WatchdogBase):
|
||||
"""
|
||||
|
||||
disarmed = False
|
||||
if self.armed:
|
||||
if self.is_armed():
|
||||
try:
|
||||
self._disablecard()
|
||||
self.armed = False
|
||||
disarmed = True
|
||||
except IOError:
|
||||
pass
|
||||
@ -191,7 +186,7 @@ class WatchdogImplBase(WatchdogBase):
|
||||
Implements is_armed WatchdogBase API
|
||||
"""
|
||||
|
||||
return self.armed
|
||||
return utils.read_str_from_file('/run/hw-management/watchdog/main/state') == 'active'
|
||||
|
||||
def get_remaining_time(self):
|
||||
"""
|
||||
@ -200,7 +195,7 @@ class WatchdogImplBase(WatchdogBase):
|
||||
|
||||
timeleft = WD_COMMON_ERROR
|
||||
|
||||
if self.armed:
|
||||
if self.is_armed():
|
||||
try:
|
||||
timeleft = self._gettimeleft()
|
||||
except IOError:
|
||||
@ -213,13 +208,15 @@ class WatchdogImplBase(WatchdogBase):
|
||||
Close watchdog
|
||||
"""
|
||||
|
||||
os.close(self.watchdog)
|
||||
if self._watchdog is not None:
|
||||
os.close(self._watchdog)
|
||||
|
||||
|
||||
class WatchdogType1(WatchdogImplBase):
|
||||
"""
|
||||
Watchdog type 1
|
||||
"""
|
||||
TIMESTAMP_FILE = '/tmp/nvidia/watchdog_timestamp'
|
||||
|
||||
def arm(self, seconds):
|
||||
"""
|
||||
@ -230,7 +227,8 @@ class WatchdogType1(WatchdogImplBase):
|
||||
ret = WatchdogImplBase.arm(self, seconds)
|
||||
# Save the watchdog arm timestamp
|
||||
# requiered for get_remaining_time()
|
||||
self.arm_timestamp = time.time()
|
||||
os.makedirs('/tmp/nvidia', exist_ok=True)
|
||||
utils.write_file(self.TIMESTAMP_FILE, str(time.time()))
|
||||
|
||||
return ret
|
||||
|
||||
@ -243,8 +241,9 @@ class WatchdogType1(WatchdogImplBase):
|
||||
|
||||
timeleft = WD_COMMON_ERROR
|
||||
|
||||
if self.armed:
|
||||
timeleft = int(self.timeout - (time.time() - self.arm_timestamp))
|
||||
if self.is_armed():
|
||||
arm_timestamp = utils.read_float_from_file(self.TIMESTAMP_FILE)
|
||||
timeleft = int(self.timeout - (time.time() - arm_timestamp))
|
||||
|
||||
return timeleft
|
||||
|
||||
|
141
platform/mellanox/mlnx-platform-api/tests/test_watchdog.py
Normal file
141
platform/mellanox/mlnx-platform-api/tests/test_watchdog.py
Normal file
@ -0,0 +1,141 @@
|
||||
#
|
||||
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
|
||||
# Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import pytest
|
||||
import sys
|
||||
if sys.version_info.major == 3:
|
||||
from unittest import mock
|
||||
else:
|
||||
import mock
|
||||
|
||||
test_path = os.path.dirname(os.path.abspath(__file__))
|
||||
modules_path = os.path.dirname(test_path)
|
||||
sys.path.insert(0, modules_path)
|
||||
|
||||
from sonic_platform.chassis import Chassis
|
||||
from sonic_platform.watchdog import get_watchdog, \
|
||||
WatchdogType2, \
|
||||
WatchdogType1, \
|
||||
is_mlnx_wd_main, \
|
||||
is_wd_type2
|
||||
|
||||
|
||||
class TestWatchdog:
|
||||
@mock.patch('sonic_platform.watchdog.is_mlnx_wd_main')
|
||||
@mock.patch('sonic_platform.watchdog.os.listdir')
|
||||
def test_get_watchdog_no_device(self, mock_listdir, mock_is_main):
|
||||
mock_listdir.return_value = []
|
||||
assert get_watchdog() is None
|
||||
|
||||
mock_listdir.return_value = ['invalid']
|
||||
mock_is_main.return_value = True
|
||||
assert get_watchdog() is None
|
||||
|
||||
mock_listdir.return_value = ['watchdog1']
|
||||
mock_is_main.return_value = False
|
||||
assert get_watchdog() is None
|
||||
|
||||
@mock.patch('sonic_platform.watchdog.is_mlnx_wd_main')
|
||||
@mock.patch('sonic_platform.watchdog.is_wd_type2')
|
||||
@mock.patch('sonic_platform.watchdog.os.listdir', mock.MagicMock(return_value=['watchdog1', 'watchdog2']))
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||
@pytest.mark.parametrize('test_para',
|
||||
[(True, WatchdogType2), (False, WatchdogType1)])
|
||||
def test_get_watchdog(self, mock_is_type2, mock_is_main, test_para):
|
||||
mock_is_main.side_effect = lambda dev: dev == 'watchdog2'
|
||||
mock_is_type2.return_value = test_para[0]
|
||||
chassis = Chassis()
|
||||
watchdog = chassis.get_watchdog()
|
||||
assert isinstance(watchdog, test_para[1])
|
||||
assert watchdog.watchdog_path == '/dev/watchdog2'
|
||||
|
||||
def test_is_mlnx_wd_main(self):
|
||||
mock_os_open = mock.mock_open(read_data='mlx-wdt-main')
|
||||
with mock.patch('sonic_platform.watchdog.open', mock_os_open):
|
||||
assert is_mlnx_wd_main('')
|
||||
|
||||
mock_os_open = mock.mock_open(read_data='invalid')
|
||||
with mock.patch('sonic_platform.watchdog.open', mock_os_open):
|
||||
assert not is_mlnx_wd_main('')
|
||||
mock_os_open.side_effect = IOError
|
||||
with mock.patch('sonic_platform.watchdog.open', mock_os_open):
|
||||
assert not is_mlnx_wd_main('')
|
||||
|
||||
@mock.patch('sonic_platform.watchdog.os.path.exists')
|
||||
@pytest.mark.parametrize('test_para',
|
||||
[True, False])
|
||||
def test_is_wd_type2(self, mock_exists, test_para):
|
||||
mock_exists.return_value = test_para
|
||||
assert is_wd_type2('') is test_para
|
||||
|
||||
@mock.patch('sonic_platform.utils.read_str_from_file')
|
||||
def test_is_armed(self, mock_read):
|
||||
watchdog = WatchdogType2('watchdog2')
|
||||
mock_read.return_value = 'inactive'
|
||||
assert not watchdog.is_armed()
|
||||
mock_read.return_value = 'active'
|
||||
assert watchdog.is_armed()
|
||||
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.is_armed')
|
||||
def test_arm_disarm_watchdog2(self, mock_is_armed):
|
||||
watchdog = WatchdogType2('watchdog2')
|
||||
assert watchdog.arm(-1) == -1
|
||||
mock_is_armed.return_value = False
|
||||
watchdog.arm(10)
|
||||
mock_is_armed.return_value = True
|
||||
watchdog.arm(5)
|
||||
watchdog.disarm()
|
||||
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.is_armed')
|
||||
def test_arm_disarm_watchdog1(self, mock_is_armed):
|
||||
watchdog = WatchdogType1('watchdog1')
|
||||
assert watchdog.arm(-1) == -1
|
||||
mock_is_armed.return_value = False
|
||||
watchdog.arm(10)
|
||||
mock_is_armed.return_value = True
|
||||
watchdog.arm(5)
|
||||
watchdog.disarm()
|
||||
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase._gettimeleft', mock.MagicMock(return_value=10))
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.is_armed')
|
||||
def test_get_remaining_time_watchdog2(self, mock_is_armed):
|
||||
watchdog = WatchdogType2('watchdog2')
|
||||
mock_is_armed.return_value = False
|
||||
assert watchdog.get_remaining_time() == -1
|
||||
watchdog.arm(10)
|
||||
mock_is_armed.return_value = True
|
||||
assert watchdog.get_remaining_time() == 10
|
||||
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.open_handle', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.fcntl.ioctl', mock.MagicMock())
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase._gettimeleft', mock.MagicMock(return_value=10))
|
||||
@mock.patch('sonic_platform.watchdog.WatchdogImplBase.is_armed')
|
||||
def test_get_remaining_time_watchdog1(self, mock_is_armed):
|
||||
watchdog = WatchdogType1('watchdog2')
|
||||
mock_is_armed.return_value = False
|
||||
assert watchdog.get_remaining_time() == -1
|
||||
watchdog.arm(10)
|
||||
mock_is_armed.return_value = True
|
||||
assert watchdog.get_remaining_time() > 0
|
Loading…
Reference in New Issue
Block a user