Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'mlx5-add-sensor-name-in-temperature-message'

Tariq Toukan says:

====================
mlx5: Add sensor name in temperature message

This small series from Shahar adds the sensors names to the temperature
event messages, in addition to the existing bitmap indicators.
This improves human readability.

Series starts with simple refactoring and modifications. The top patch
adds the sensors names.
====================

Link: https://patch.msgid.link/20250213094641.226501-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+39 -3
+33 -3
drivers/net/ethernet/mellanox/mlx5/core/events.c
··· 6 6 #include "mlx5_core.h" 7 7 #include "lib/eq.h" 8 8 #include "lib/events.h" 9 + #include "hwmon.h" 9 10 10 11 struct mlx5_event_nb { 11 12 struct mlx5_nb nb; ··· 154 153 return NOTIFY_OK; 155 154 } 156 155 156 + #if IS_ENABLED(CONFIG_HWMON) 157 + static void print_sensor_names_in_bit_set(struct mlx5_core_dev *dev, struct mlx5_hwmon *hwmon, 158 + u64 bit_set, int bit_set_offset) 159 + { 160 + unsigned long *bit_set_ptr = (unsigned long *)&bit_set; 161 + int num_bits = sizeof(bit_set) * BITS_PER_BYTE; 162 + int i; 163 + 164 + for_each_set_bit(i, bit_set_ptr, num_bits) { 165 + const char *sensor_name = hwmon_get_sensor_name(hwmon, i + bit_set_offset); 166 + 167 + mlx5_core_warn(dev, "Sensor name[%d]: %s\n", i + bit_set_offset, sensor_name); 168 + } 169 + } 170 + #endif /* CONFIG_HWMON */ 171 + 157 172 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ 158 173 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) 159 174 { 160 175 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 161 176 struct mlx5_events *events = event_nb->ctx; 177 + struct mlx5_core_dev *dev = events->dev; 162 178 struct mlx5_eqe *eqe = data; 163 179 u64 value_lsb; 164 180 u64 value_msb; 165 181 166 182 value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); 183 + /* bit 1-63 are not supported for NICs, 184 + * hence read only bit 0 (asic) from lsb. 185 + */ 186 + value_lsb &= 0x1; 167 187 value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); 168 188 169 - mlx5_core_warn(events->dev, 170 - "High temperature on sensors with bit set %llx %llx", 171 - value_msb, value_lsb); 189 + if (net_ratelimit()) { 190 + mlx5_core_warn(dev, "High temperature on sensors with bit set %#llx %#llx.\n", 191 + value_msb, value_lsb); 192 + #if IS_ENABLED(CONFIG_HWMON) 193 + if (dev->hwmon) { 194 + print_sensor_names_in_bit_set(dev, dev->hwmon, value_lsb, 0); 195 + print_sensor_names_in_bit_set(dev, dev->hwmon, value_msb, 196 + sizeof(value_lsb) * BITS_PER_BYTE); 197 + } 198 + #endif 199 + } 172 200 173 201 return NOTIFY_OK; 174 202 }
+5
drivers/net/ethernet/mellanox/mlx5/core/hwmon.c
··· 416 416 mlx5_hwmon_free(hwmon); 417 417 mdev->hwmon = NULL; 418 418 } 419 + 420 + const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel) 421 + { 422 + return hwmon->temp_channel_desc[channel].sensor_name; 423 + }
+1
drivers/net/ethernet/mellanox/mlx5/core/hwmon.h
··· 10 10 11 11 int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev); 12 12 void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev); 13 + const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel); 13 14 14 15 #else 15 16 static inline int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev)