27f15d40e1
- Why I did it 1. Update Mellanox HW-MGMT package to newer version V.7.0030.1011 2. Replace the SONiC PMON Thermal control algorithm with the one inside the HW-MGMT package on all Nvidia platforms 3. Support Spectrum-4 systems - How I did it 1. Update the HW-MGMT package version number and submodule pointer 2. Remove the thermal control algorithm implementation from Mellanox platform API 3. Revise the patch to HW-MGMT package which will disable HW-MGMT from running on SIMX 4. Update the downstream kernel patch list Signed-off-by: Kebo Liu <kebol@nvidia.com>
100 lines
3.8 KiB
Diff
100 lines
3.8 KiB
Diff
From f2dffe0f83a05dfbf0190316f0d260f7d7ff76a8 Mon Sep 17 00:00:00 2001
|
|
From: Vadim Pasternak <vadimp@nvidia.com>
|
|
Date: Mon, 23 Jan 2023 21:38:24 +0200
|
|
Subject: [PATCH backport 5.10 181/182] Revert "mlxsw: thermal: Fix
|
|
out-of-bounds memory accesses"
|
|
|
|
This reverts commit e59d839743b50cb1d3f42a786bea48cc5621d254.
|
|
---
|
|
.../ethernet/mellanox/mlxsw/core_thermal.c | 52 +++++++++++++++++--
|
|
1 file changed, 47 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
|
|
index 529108aea3c6..88a2f63c8839 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
|
|
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
|
|
@@ -23,8 +23,16 @@
|
|
#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
|
|
#define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0)
|
|
#define MLXSW_THERMAL_MAX_STATE 10
|
|
-#define MLXSW_THERMAL_MIN_STATE 2
|
|
#define MLXSW_THERMAL_MAX_DUTY 255
|
|
+/* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
|
|
+ * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for
|
|
+ * setting fan speed dynamic minimum. For example, if value is set to 14 (40%)
|
|
+ * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
|
|
+ * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
|
|
+ */
|
|
+#define MLXSW_THERMAL_SPEED_MIN (MLXSW_THERMAL_MAX_STATE + 2)
|
|
+#define MLXSW_THERMAL_SPEED_MAX (MLXSW_THERMAL_MAX_STATE * 2)
|
|
+#define MLXSW_THERMAL_SPEED_MIN_LEVEL 2 /* 20% */
|
|
|
|
/* External cooling devices, allowed for binding to mlxsw thermal zones. */
|
|
static char * const mlxsw_thermal_external_allowed_cdev[] = {
|
|
@@ -656,16 +664,49 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
|
|
struct mlxsw_thermal *thermal = cdev->devdata;
|
|
struct device *dev = thermal->bus_info->dev;
|
|
char mfsc_pl[MLXSW_REG_MFSC_LEN];
|
|
+ unsigned long cur_state, i;
|
|
int idx;
|
|
+ u8 duty;
|
|
int err;
|
|
|
|
- if (state > MLXSW_THERMAL_MAX_STATE)
|
|
- return -EINVAL;
|
|
-
|
|
idx = mlxsw_get_cooling_device_idx(thermal, cdev);
|
|
if (idx < 0)
|
|
return idx;
|
|
|
|
+ /* Verify if this request is for changing allowed fan dynamical
|
|
+ * minimum. If it is - update cooling levels accordingly and update
|
|
+ * state, if current state is below the newly requested minimum state.
|
|
+ * For example, if current state is 5, and minimal state is to be
|
|
+ * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed
|
|
+ * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be
|
|
+ * overwritten.
|
|
+ */
|
|
+ if (state >= MLXSW_THERMAL_SPEED_MIN &&
|
|
+ state <= MLXSW_THERMAL_SPEED_MAX) {
|
|
+ state -= MLXSW_THERMAL_MAX_STATE;
|
|
+ for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++)
|
|
+ thermal->cooling_levels[i] = max(state, i);
|
|
+
|
|
+ mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
|
|
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
|
|
+ cur_state = mlxsw_duty_to_state(duty);
|
|
+
|
|
+ /* If current fan state is lower than requested dynamical
|
|
+ * minimum, increase fan speed up to dynamical minimum.
|
|
+ */
|
|
+ if (state < cur_state)
|
|
+ return 0;
|
|
+
|
|
+ state = cur_state;
|
|
+ }
|
|
+
|
|
+ if (state > MLXSW_THERMAL_MAX_STATE)
|
|
+ return -EINVAL;
|
|
+
|
|
/* Normalize the state to the valid speed range. */
|
|
state = thermal->cooling_levels[state];
|
|
mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
|
|
@@ -1143,7 +1184,8 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
|
|
|
|
/* Initialize cooling levels per PWM state. */
|
|
for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
|
|
- thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i);
|
|
+ thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL,
|
|
+ i);
|
|
|
|
thermal->polling_delay = bus_info->low_frequency ?
|
|
MLXSW_THERMAL_SLOW_POLL_INT :
|
|
--
|
|
2.20.1
|
|
|