sonic-buildimage/platform/mellanox/non-upstream-patches/patches/0261-mlxbf-ptm-power-and-thermal-management-debugfs-drive.patch
Kebo Liu e286869b24
[Mellanox] Update HW-MGMT package to new version V.7.0030.1011 (#16239)
- Why I did it
1. Update Mellanox HW-MGMT package to newer version V.7.0030.1011
2. Replace the SONiC PMON Thermal control algorithm with the one inside the HW-MGMT package on all Nvidia platforms
3. Support Spectrum-4 systems

- How I did it
1. Update the HW-MGMT package version number and submodule pointer
2. Remove the thermal control algorithm implementation from Mellanox platform API
3. Revise the patch to HW-MGMT package which will disable HW-MGMT from running on SIMX
4. Update the downstream kernel patch list

Signed-off-by: Kebo Liu <kebol@nvidia.com>
2023-09-06 11:32:08 +03:00

257 lines
7.4 KiB
Diff

From 1193879b92e665c100056085385ffdb4ab2715cb Mon Sep 17 00:00:00 2001
From: Jitendra Lanka <jlanka@nvidia.com>
Date: Fri, 13 Jan 2023 15:21:02 -0500
Subject: [PATCH backport 5.10 2/6] mlxbf-ptm: power and thermal management
debugfs driver
mlxbf-ptm driver implements debugfs interface for Bluefield
devices power and thermal management. It provides some parameters
that can be monitored by system software.
Change-Id: I241e1406962548cef9b33c4b3dea925e675c3c88
Signed-off-by: Jitendra Lanka <jlanka@nvidia.com>
---
drivers/platform/mellanox/Kconfig | 10 ++
drivers/platform/mellanox/Makefile | 1 +
drivers/platform/mellanox/mlxbf-ptm.c | 195 ++++++++++++++++++++++++++
3 files changed, 206 insertions(+)
create mode 100644 drivers/platform/mellanox/mlxbf-ptm.c
diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index a5231c23a..48bd61f61 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -106,6 +106,16 @@ config MLXBF_TRIO
This driver supports the TRIO PCIe root complex interface on
Mellanox BlueField SoCs.
+config MLXBF_PTM
+ tristate "BlueField Power and Thermal Management debugfs interface"
+ depends on ARM64
+ depends on DEBUG_FS
+ help
+ If you say yes to this option, support will be added for the
+ mlxbf-ptm driver. This driver provides debugfs interface
+ to userspace with information related to power and thermal
+ management of the Bluefield device.
+
source "drivers/platform/mellanox/mlxbf_pka/Kconfig"
config NVSW_SN2201
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 7c6393ebe..6aa0ab157 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_MLXBF_PMC) += mlxbf-pmc.o
obj-$(CONFIG_MLXBF_TMFIFO) += mlxbf-tmfifo.o
obj-$(CONFIG_MLXBF_TRIO) += mlx-trio.o
obj-$(CONFIG_MLXBF_LIVEFISH) += mlxbf-livefish.o
+obj-$(CONFIG_MLXBF_PTM) += mlxbf-ptm.o
obj-$(CONFIG_MLXREG_HOTPLUG) += mlxreg-hotplug.o
obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
obj-$(CONFIG_MLXREG_LC) += mlxreg-lc.o
diff --git a/drivers/platform/mellanox/mlxbf-ptm.c b/drivers/platform/mellanox/mlxbf-ptm.c
new file mode 100644
index 000000000..307ba1f33
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-ptm.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+/*
+ * Copyright (C) 2023 NVIDIA Corporation & Affiliates.
+ *
+ * Nvidia Bluefield power and thermal debugfs driver
+ * This driver provides a debugfs interface for systems management
+ * software to monitor power and thermal actions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/arm-smccc.h>
+
+/* SMC IDs */
+#define MLNX_PTM_GET_VR0_POWER 0x82000101
+#define MLNX_PTM_GET_VR1_POWER 0x82000102
+#define MLNX_PTM_GET_THROTTLE_STATE 0x82000103
+#define MLNX_PTM_GET_DDR_THLD 0x82000104
+#define MLNX_PTM_GET_STATUS_REG 0x82000105
+#define MLNX_PTM_GET_PTHROTTLE 0x82000106
+#define MLNX_PTM_GET_TTHROTTLE 0x82000107
+#define MLNX_PTM_GET_MAX_TEMP 0x82000108
+#define MLNX_PTM_GET_PWR_EVT_CNT 0x82000109
+#define MLNX_PTM_GET_TEMP_EVT_CNT 0x8200010A
+
+#define MLNX_POWER_ERROR 300
+
+struct dentry *monitors;
+
+static int smc_call1(unsigned int smc_op, int smc_arg)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+#define smc_call0(smc_op) smc_call1(smc_op, 0)
+
+static int throttling_state_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_THROTTLE_STATE);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(throttling_state_fops,
+ throttling_state_show, NULL, "%llu\n");
+
+static int pthrottling_state_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_PTHROTTLE);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(pthrottling_state_fops,
+ pthrottling_state_show, NULL, "%llu\n");
+
+static int tthrottling_state_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_TTHROTTLE);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(tthrottling_state_fops,
+ tthrottling_state_show, NULL, "%llu\n");
+
+static int core_temp_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_MAX_TEMP);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(core_temp_fops,
+ core_temp_show, NULL, "%lld\n");
+
+static int pwr_evt_counter_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_PWR_EVT_CNT);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(pwr_evt_counter_fops,
+ pwr_evt_counter_show, NULL, "%llu\n");
+
+static int temp_evt_counter_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_TEMP_EVT_CNT);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(temp_evt_counter_fops,
+ temp_evt_counter_show, NULL, "%llu\n");
+
+static int vr0_power_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_VR0_POWER);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(vr0_power_fops, vr0_power_show, NULL, "%llu\n");
+
+static int vr1_power_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_VR1_POWER);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(vr1_power_fops, vr1_power_show, NULL, "%llu\n");
+
+static int total_power_show(void *data, u64 *val)
+{
+ u64 v0, v1;
+
+ v0 = smc_call0(MLNX_PTM_GET_VR0_POWER);
+ if (v0 > MLNX_POWER_ERROR)
+ v0 = 0;
+ v1 = smc_call0(MLNX_PTM_GET_VR1_POWER);
+ if (v1 > MLNX_POWER_ERROR)
+ v1 = 0;
+ *val = (v0 + v1);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(total_power_fops, total_power_show, NULL, "%llu\n");
+
+static int ddr_thld_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_DDR_THLD);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(ddr_thld_fops, ddr_thld_show, NULL, "%llu\n");
+
+static int error_status_show(void *data, u64 *val)
+{
+ *val = smc_call0(MLNX_PTM_GET_STATUS_REG);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(error_status_fops,
+ error_status_show, NULL, "%llu\n");
+
+
+static int __init mlxbf_ptm_init(void)
+{
+ struct dentry *ptm_root, *status;
+
+ ptm_root = debugfs_lookup("mlxbf-ptm", NULL);
+ if (!ptm_root)
+ ptm_root = debugfs_create_dir("mlxbf-ptm", NULL);
+
+ monitors = debugfs_create_dir("monitors", ptm_root);
+ status = debugfs_create_dir("status", monitors);
+
+ debugfs_create_file("vr0_power", S_IRUGO, status, NULL,
+ &vr0_power_fops);
+ debugfs_create_file("vr1_power", S_IRUGO, status, NULL,
+ &vr1_power_fops);
+ debugfs_create_file("total_power", S_IRUGO, status, NULL,
+ &total_power_fops);
+ debugfs_create_file("ddr_temp", S_IRUGO, status,
+ NULL, &ddr_thld_fops);
+ debugfs_create_file("core_temp", S_IRUGO, status,
+ NULL, &core_temp_fops);
+ debugfs_create_file("power_throttling_event_count", S_IRUGO, status,
+ NULL, &pwr_evt_counter_fops);
+ debugfs_create_file("thermal_throttling_event_count", S_IRUGO, status,
+ NULL, &temp_evt_counter_fops);
+ debugfs_create_file("throttling_state", S_IRUGO, status,
+ NULL, &throttling_state_fops);
+ debugfs_create_file("power_throttling_state", S_IRUGO, status,
+ NULL, &pthrottling_state_fops);
+ debugfs_create_file("thermal_throttling_state", S_IRUGO, status,
+ NULL, &tthrottling_state_fops);
+ debugfs_create_file("error_state", S_IRUGO, status,
+ NULL, &error_status_fops);
+
+ return 0;
+}
+
+static void __exit mlxbf_ptm_exit(void)
+{
+ debugfs_remove_recursive(monitors);
+}
+
+module_init(mlxbf_ptm_init);
+module_exit(mlxbf_ptm_exit);
+
+MODULE_AUTHOR("Jitendra Lanka <jlanka@nvidia.com>");
+MODULE_DESCRIPTION("Nvidia Bluefield power and thermal debugfs driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("1.0");
--
2.20.1