[Mellanox] Advance hw-mgmt to v.7.0020.4104 (#13372)
- Why I did it Advance hw-mgmt service to V.7.0020.4100 Add missing thermal sensors that are supported by hw-mgmt package Delay system health service before hw-mgmt has started on Mellanox platform in order to avoid reading some sensors before ready. Depends on sonic-net/sonic-linux-kernel#305 - How I did it 1. Update hw mgmt version 2. Add missing sensors 3. Delay service - How to verify it Regression test. Signed-off-by: Stephen Sun <stephens@nvidia.com>
This commit is contained in:
parent
ead7925f7d
commit
3112997b5a
@ -67,6 +67,9 @@
|
||||
},
|
||||
{
|
||||
"name": "CPU Core 3 Temp"
|
||||
},
|
||||
{
|
||||
"name": "SODIMM 1 Temp"
|
||||
}
|
||||
],
|
||||
"sfps": [
|
||||
|
@ -67,6 +67,9 @@
|
||||
},
|
||||
{
|
||||
"name": "CPU Core 3 Temp"
|
||||
},
|
||||
{
|
||||
"name": "SODIMM 1 Temp"
|
||||
}
|
||||
],
|
||||
"sfps": [
|
||||
|
@ -114,6 +114,9 @@
|
||||
},
|
||||
{
|
||||
"name": "CPU Pack Temp"
|
||||
},
|
||||
{
|
||||
"name": "SODIMM 1 Temp"
|
||||
}
|
||||
],
|
||||
"sfps": [
|
||||
|
@ -114,6 +114,9 @@
|
||||
},
|
||||
{
|
||||
"name": "CPU Pack Temp"
|
||||
},
|
||||
{
|
||||
"name": "SODIMM 1 Temp"
|
||||
}
|
||||
],
|
||||
"sfps": [
|
||||
|
@ -101,6 +101,9 @@
|
||||
},
|
||||
{
|
||||
"name": "ASIC"
|
||||
},
|
||||
{
|
||||
"name": "SODIMM 1 Temp"
|
||||
}
|
||||
],
|
||||
"sfps": [{
|
||||
|
@ -16,7 +16,7 @@
|
||||
#
|
||||
# Mellanox HW Management
|
||||
|
||||
MLNX_HW_MANAGEMENT_VERSION = 7.0020.3006
|
||||
MLNX_HW_MANAGEMENT_VERSION = 7.0020.4104
|
||||
|
||||
export MLNX_HW_MANAGEMENT_VERSION
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
From 1a1011b6da491d35001df5a7204d4eecb2769767 Mon Sep 17 00:00:00 2001
|
||||
From 489764eb124e03087eb408dec27d769fa4f98459 Mon Sep 17 00:00:00 2001
|
||||
From: keboliu <kebol@mellanox.com>
|
||||
Date: Fri, 15 Jan 2021 14:41:16 +0800
|
||||
Subject: [PATCH] Make SONiC determine-reboot-cause service start after hw-mgmt
|
||||
service
|
||||
Subject: [PATCH 1/4] Make SONiC determine-reboot-cause service start after
|
||||
hw-mgmt service
|
||||
|
||||
Signed-off-by: Kebo Liu <kebol@nvidia.com>
|
||||
---
|
||||
@ -10,7 +10,7 @@ Signed-off-by: Kebo Liu <kebol@nvidia.com>
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/debian/hw-management.hw-management.service b/debian/hw-management.hw-management.service
|
||||
index 39a2a54..2104b87 100755
|
||||
index 8bdcaef..1c25ffb 100755
|
||||
--- a/debian/hw-management.hw-management.service
|
||||
+++ b/debian/hw-management.hw-management.service
|
||||
@@ -1,6 +1,7 @@
|
||||
@ -22,5 +22,5 @@ index 39a2a54..2104b87 100755
|
||||
[Service]
|
||||
Type=oneshot
|
||||
--
|
||||
1.9.1
|
||||
2.20.1
|
||||
|
||||
|
@ -1,59 +1,47 @@
|
||||
From 79dadd5b0d2f5e860b525c12d4d3843607b03a9f Mon Sep 17 00:00:00 2001
|
||||
From 422b64397f2f33b394d037820f0ceb4c09e3a725 Mon Sep 17 00:00:00 2001
|
||||
From: Alexander Allen <arallen@nvidia.com>
|
||||
Date: Fri, 21 Jan 2022 16:47:19 +0000
|
||||
Subject: [PATCH] Disable hw-mgmt on SimX platforms
|
||||
Subject: [PATCH 2/4] Disable hw-mgmt on SimX platforms
|
||||
|
||||
---
|
||||
usr/usr/bin/hw-management-ready.sh | 31 ++++++++++++++++--------------
|
||||
usr/usr/bin/hw-management-ready.sh | 11 +++++++----
|
||||
usr/usr/bin/hw-management.sh | 9 +++++++++
|
||||
2 files changed, 26 insertions(+), 14 deletions(-)
|
||||
2 files changed, 16 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/usr/usr/bin/hw-management-ready.sh b/usr/usr/bin/hw-management-ready.sh
|
||||
index 5a9698c..364f906 100755
|
||||
index 88672a8..7558c68 100755
|
||||
--- a/usr/usr/bin/hw-management-ready.sh
|
||||
+++ b/usr/usr/bin/hw-management-ready.sh
|
||||
@@ -51,19 +51,22 @@ if [ -d /var/run/hw-management ]; then
|
||||
@@ -51,17 +51,20 @@ if [ -d /var/run/hw-management ]; then
|
||||
rm -fr /var/run/hw-management
|
||||
fi
|
||||
|
||||
-case $board_type in
|
||||
-VMOD0014)
|
||||
- while [ ! -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]
|
||||
- do
|
||||
- sleep 1
|
||||
- done
|
||||
- ;;
|
||||
-*)
|
||||
- while [ ! -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]
|
||||
- do
|
||||
- sleep 1
|
||||
- done
|
||||
- ;;
|
||||
-esac
|
||||
+if [ -z "$(lspci -vvv | grep SimX)" ]; then
|
||||
+ case $board_type in
|
||||
+ VMOD0014)
|
||||
+ while [ ! -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]
|
||||
+ do
|
||||
+ sleep 1
|
||||
+ done
|
||||
+ ;;
|
||||
+ *)
|
||||
+ while [ ! -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]
|
||||
+ do
|
||||
+ sleep 1
|
||||
+ done
|
||||
+ ;;
|
||||
+ esac
|
||||
+ case $board_type in
|
||||
+ VMOD0014)
|
||||
if [ ! -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]; then
|
||||
timeout 180 bash -c 'until [ -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]; do sleep 0.2; done'
|
||||
fi
|
||||
;;
|
||||
-*)
|
||||
+ *)
|
||||
if [ ! -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]; then
|
||||
timeout 180 bash -c 'until [ -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]; do sleep 0.2; done'
|
||||
fi
|
||||
;;
|
||||
-esac
|
||||
+ esac
|
||||
+fi
|
||||
+
|
||||
echo "Start Chassis HW management service."
|
||||
logger -t hw-management -p daemon.notice "Start Chassis HW management service."
|
||||
diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
|
||||
index ebfabb0..c0c038e 100755
|
||||
index 1ee05b5..50d922b 100755
|
||||
--- a/usr/usr/bin/hw-management.sh
|
||||
+++ b/usr/usr/bin/hw-management.sh
|
||||
@@ -1495,6 +1495,13 @@ do_chip_down()
|
||||
@@ -2310,6 +2310,13 @@ do_chip_down()
|
||||
/usr/bin/hw-management-thermal-events.sh change hotplug_asic down %S %p
|
||||
}
|
||||
|
||||
@ -67,7 +55,7 @@ index ebfabb0..c0c038e 100755
|
||||
__usage="
|
||||
Usage: $(basename "$0") [Options]
|
||||
|
||||
@@ -1520,6 +1527,8 @@ Options:
|
||||
@@ -2335,6 +2342,8 @@ Options:
|
||||
force-reload Performs hw-management 'stop' and the 'start.
|
||||
"
|
||||
|
||||
@ -77,5 +65,5 @@ index ebfabb0..c0c038e 100755
|
||||
start)
|
||||
if [ -d /var/run/hw-management ]; then
|
||||
--
|
||||
2.17.1
|
||||
2.20.1
|
||||
|
||||
|
@ -1,14 +1,14 @@
|
||||
From 14b06a12802fc0e15116a64f419d002d0d21d695 Mon Sep 17 00:00:00 2001
|
||||
From 439639e939f896f9aee42a4dbd5216feb728220c Mon Sep 17 00:00:00 2001
|
||||
From: Alexander Allen <arallen@nvidia.com>
|
||||
Date: Thu, 17 Feb 2022 04:19:50 +0000
|
||||
Subject: [PATCH] Remove unused non-upstream kernel modules from load
|
||||
Subject: [PATCH 3/4] Remove unused non-upstream kernel modules from load
|
||||
|
||||
---
|
||||
usr/etc/modules-load.d/05-hw-management-modules.conf | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
diff --git a/usr/etc/modules-load.d/05-hw-management-modules.conf b/usr/etc/modules-load.d/05-hw-management-modules.conf
|
||||
index 39f621e..c0980bc 100644
|
||||
index cfcfaa4..dd3b5ca 100644
|
||||
--- a/usr/etc/modules-load.d/05-hw-management-modules.conf
|
||||
+++ b/usr/etc/modules-load.d/05-hw-management-modules.conf
|
||||
@@ -15,8 +15,6 @@ xdpe12284
|
||||
@ -21,5 +21,5 @@ index 39f621e..c0980bc 100644
|
||||
gpio-pca953x
|
||||
pmbus
|
||||
--
|
||||
2.17.1
|
||||
2.20.1
|
||||
|
||||
|
@ -0,0 +1,29 @@
|
||||
From 038bce6bf808ec9d082e96fec4184e060b3a85a9 Mon Sep 17 00:00:00 2001
|
||||
From: Stephen Sun <stephens@nvidia.com>
|
||||
Date: Mon, 28 Nov 2022 03:55:14 +0000
|
||||
Subject: [PATCH 4/4] Make system-health service starts after hw-management to
|
||||
avoid failures
|
||||
|
||||
On SN2410, it can fail to read the file led_status_capability if it starts from ONIE
|
||||
|
||||
Signed-off-by: Stephen Sun <stephens@nvidia.com>
|
||||
---
|
||||
debian/hw-management.hw-management.service | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/debian/hw-management.hw-management.service b/debian/hw-management.hw-management.service
|
||||
index 1c25ffb..0fbd877 100755
|
||||
--- a/debian/hw-management.hw-management.service
|
||||
+++ b/debian/hw-management.hw-management.service
|
||||
@@ -1,7 +1,7 @@
|
||||
[Unit]
|
||||
Description=Chassis HW management service of Mellanox systems
|
||||
Documentation=man:hw-management.service(8)
|
||||
-Before=determine-reboot-cause.service
|
||||
+Before=determine-reboot-cause.service system-health.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 137109ed15be147a99b59ceb9dfa1799688dfb71
|
||||
Subproject commit 9c3f6b2d6ac4be2eda936522f369ff8920deb4e2
|
Loading…
Reference in New Issue
Block a user