Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

PCI/AER: Report CXL or PCIe bus type in AER trace logging

The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
for all errors. Update the driver and aer_event tracing to log 'CXL Bus
Type' for CXL device errors.

This requires that AER can identify and distinguish between PCIe errors and
CXL errors.

Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
aer_get_device_error_info() and pci_print_aer().

Update the aer_event trace routine to accept a bus type string parameter.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260114182055.46029-15-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>

authored by

Terry Bowman and committed by
Dave Jiang
83cba5b3 da71bd36

+28 -12
+7 -1
drivers/pci/pci.h
··· 738 738 unsigned int multi_error_valid:1; 739 739 740 740 unsigned int first_error:5; 741 - unsigned int __pad2:2; 741 + unsigned int __pad2:1; 742 + unsigned int is_cxl:1; 742 743 unsigned int tlp_header_valid:1; 743 744 744 745 unsigned int status; /* COR/UNCOR Error Status */ ··· 749 748 750 749 int aer_get_device_error_info(struct aer_err_info *info, int i); 751 750 void aer_print_error(struct aer_err_info *info, int i); 751 + 752 + static inline const char *aer_err_bus(struct aer_err_info *info) 753 + { 754 + return info->is_cxl ? "CXL" : "PCIe"; 755 + } 752 756 753 757 int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, 754 758 unsigned int tlp_len, bool flit,
+13 -7
drivers/pci/pcie/aer.c
··· 870 870 struct pci_dev *dev; 871 871 int layer, agent, id; 872 872 const char *level = info->level; 873 + const char *bus_type = aer_err_bus(info); 873 874 874 875 if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) 875 876 return; ··· 880 879 881 880 pci_dev_aer_stats_incr(dev, info); 882 881 trace_aer_event(pci_name(dev), (info->status & ~info->mask), 883 - info->severity, info->tlp_header_valid, &info->tlp); 882 + info->severity, info->tlp_header_valid, &info->tlp, bus_type); 884 883 885 884 if (!info->ratelimit_print[i]) 886 885 return; 887 886 888 887 if (!info->status) { 889 - pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", 890 - aer_error_severity_string[info->severity]); 888 + pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", 889 + bus_type, aer_error_severity_string[info->severity]); 891 890 goto out; 892 891 } 893 892 894 893 layer = AER_GET_LAYER_ERROR(info->severity, info->status); 895 894 agent = AER_GET_AGENT(info->severity, info->status); 896 895 897 - aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", 898 - aer_error_severity_string[info->severity], 896 + aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n", 897 + bus_type, aer_error_severity_string[info->severity], 899 898 aer_error_layer[layer], aer_agent_string[agent]); 900 899 901 900 aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", ··· 929 928 void pci_print_aer(struct pci_dev *dev, int aer_severity, 930 929 struct aer_capability_regs *aer) 931 930 { 931 + const char *bus_type; 932 932 int layer, agent, tlp_header_valid = 0; 933 933 u32 status, mask; 934 934 struct aer_err_info info = { ··· 950 948 951 949 info.status = status; 952 950 info.mask = mask; 951 + info.is_cxl = pcie_is_cxl(dev); 952 + 953 + bus_type = aer_err_bus(&info); 953 954 954 955 pci_dev_aer_stats_incr(dev, &info); 955 - trace_aer_event(pci_name(dev), (status & ~mask), 956 - aer_severity, tlp_header_valid, &aer->header_log); 956 + trace_aer_event(pci_name(dev), (status & ~mask), aer_severity, 957 + tlp_header_valid, &aer->header_log, bus_type); 957 958 958 959 if (!aer_ratelimit(dev, info.severity)) 959 960 return; ··· 1311 1306 /* Must reset in this function */ 1312 1307 info->status = 0; 1313 1308 info->tlp_header_valid = 0; 1309 + info->is_cxl = pcie_is_cxl(dev); 1314 1310 1315 1311 /* The device might not support AER */ 1316 1312 if (!aer)
+8 -4
include/ras/ras_event.h
··· 339 339 const u32 status, 340 340 const u8 severity, 341 341 const u8 tlp_header_valid, 342 - struct pcie_tlp_log *tlp), 342 + struct pcie_tlp_log *tlp, 343 + const char *bus_type), 343 344 344 - TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), 345 + 346 + TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp, bus_type), 345 347 346 348 TP_STRUCT__entry( 347 349 __string( dev_name, dev_name ) ··· 351 349 __field( u8, severity ) 352 350 __field( u8, tlp_header_valid) 353 351 __array( u32, tlp_header, PCIE_STD_MAX_TLP_HEADERLOG) 352 + __string( bus_type, bus_type ) 354 353 ), 355 354 356 355 TP_fast_assign( 357 356 __assign_str(dev_name); 357 + __assign_str(bus_type); 358 358 __entry->status = status; 359 359 __entry->severity = severity; 360 360 __entry->tlp_header_valid = tlp_header_valid; ··· 368 364 } 369 365 ), 370 366 371 - TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n", 372 - __get_str(dev_name), 367 + TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n", 368 + __get_str(dev_name), __get_str(bus_type), 373 369 __entry->severity == AER_CORRECTABLE ? "Corrected" : 374 370 __entry->severity == AER_FATAL ? 375 371 "Fatal" : "Uncorrected, non-fatal",