Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'mlx5-health-syndrome'

Tariq Toukan says:

====================
mlx5: Trust lockdown health syndrome

This series introduces a new error type in the health syndrome,
specifically for trust lock-down. Additionally, it exposes the CRR bit
in the health buffer, which, when set, indicates that the error cannot
be recovered without a process involving a cold reset. We add The CRR
bit value to the health buffer info log and update it to be logged on
any syndrome.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+14 -1
+14 -1
drivers/net/ethernet/mellanox/mlx5/core/health.c
··· 96 96 return rfr_severity >> MLX5_RFR_BIT_OFFSET; 97 97 } 98 98 99 + static int mlx5_health_get_crr(u8 rfr_severity) 100 + { 101 + return (rfr_severity >> MLX5_CRR_BIT_OFFSET) & 0x01; 102 + } 103 + 99 104 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) 100 105 { 101 106 struct mlx5_core_health *health = &dev->priv.health; ··· 380 375 return "High temperature"; 381 376 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR: 382 377 return "ICM fetch PCI data poisoned error"; 378 + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_TRUST_LOCKDOWN_ERR: 379 + return "Trust lockdown error"; 383 380 default: 384 381 return "unrecognized error"; 385 382 } ··· 449 442 mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time)); 450 443 mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); 451 444 mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity)); 445 + mlx5_log(dev, severity, "crr %d\n", mlx5_health_get_crr(rfr_severity)); 452 446 mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity)); 453 447 mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index)); 454 448 mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd), 455 449 hsynd_str(ioread8(&h->synd))); 456 450 mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); 457 451 mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver)); 452 + if (mlx5_health_get_crr(rfr_severity)) 453 + mlx5_core_warn(dev, "Cold reset is required\n"); 458 454 } 459 455 460 456 static int ··· 809 799 health->prev = count; 810 800 if (health->miss_counter == MAX_MISSES) { 811 801 mlx5_core_err(dev, "device's health compromised - reached miss count\n"); 802 + health->synd = ioread8(&h->synd); 812 803 print_health_info(dev); 813 804 queue_work(health->wq, &health->report_work); 814 805 } 815 806 816 807 prev_synd = health->synd; 817 808 health->synd = ioread8(&h->synd); 818 - if (health->synd && health->synd != prev_synd) 809 + if (health->synd && health->synd != prev_synd) { 810 + print_health_info(dev); 819 811 queue_work(health->wq, &health->report_work); 812 + } 820 813 821 814 out: 822 815 mod_timer(&health->timer, get_next_poll_jiffies(dev));