Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ACPI: extlog: Trace CPER CXL Protocol Error Section

When Firmware First is enabled, BIOS handles errors first and then it
makes them available to the kernel via the Common Platform Error Record
(CPER) sections (UEFI 2.11 Appendix N.2.13). Linux parses the CPER
sections via one of two similar paths, either ELOG or GHES. The errors
managed by ELOG are signaled to the BIOS by the I/O Machine Check
Architecture (I/O MCA).

Currently, ELOG and GHES show some inconsistencies in how they report to
userspace via trace events.

Therefore, make the two mentioned paths act similarly by tracing the CPER
CXL Protocol Error Section.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Link: https://patch.msgid.link/20260114101543.85926-6-fabio.m.de.francesco@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Fabio M. De Francesco and committed by
Rafael J. Wysocki
95350eff ba8af8e1

+30 -1
+2
drivers/acpi/Kconfig
··· 494 494 tristate "Extended Error Log support" 495 495 depends on X86_MCE && X86_LOCAL_APIC && EDAC 496 496 select UEFI_CPER 497 + select ACPI_APEI 498 + select ACPI_APEI_GHES 497 499 help 498 500 Certain usages such as Predictive Failure Analysis (PFA) require 499 501 more information about the error than what can be described in
+24
drivers/acpi/acpi_extlog.c
··· 12 12 #include <linux/ratelimit.h> 13 13 #include <linux/edac.h> 14 14 #include <linux/ras.h> 15 + #include <cxl/event.h> 15 16 #include <acpi/ghes.h> 16 17 #include <asm/cpu.h> 17 18 #include <asm/mce.h> ··· 163 162 #endif 164 163 } 165 164 165 + static void 166 + extlog_cxl_cper_handle_prot_err(struct cxl_cper_sec_prot_err *prot_err, 167 + int severity) 168 + { 169 + #ifdef ACPI_APEI_PCIEAER 170 + struct cxl_cper_prot_err_work_data wd; 171 + 172 + if (cxl_cper_sec_prot_err_valid(prot_err)) 173 + return; 174 + 175 + if (cxl_cper_setup_prot_err_work_data(&wd, prot_err, severity)) 176 + return; 177 + 178 + cxl_cper_handle_prot_err(&wd); 179 + #endif 180 + } 181 + 166 182 static int extlog_print(struct notifier_block *nb, unsigned long val, 167 183 void *data) 168 184 { ··· 231 213 if (gdata->error_data_length >= sizeof(*mem)) 232 214 trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, 233 215 (u8)gdata->error_severity); 216 + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { 217 + struct cxl_cper_sec_prot_err *prot_err = 218 + acpi_hest_get_payload(gdata); 219 + 220 + extlog_cxl_cper_handle_prot_err(prot_err, 221 + gdata->error_severity); 234 222 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { 235 223 struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); 236 224
+2 -1
drivers/cxl/core/ras.c
··· 63 63 return 0; 64 64 } 65 65 66 - static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) 66 + void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) 67 67 { 68 68 unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device, 69 69 data->prot_err.agent_addr.function); ··· 104 104 else 105 105 cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap); 106 106 } 107 + EXPORT_SYMBOL_GPL(cxl_cper_handle_prot_err); 107 108 108 109 static void cxl_cper_prot_err_work_fn(struct work_struct *work) 109 110 {
+2
include/cxl/event.h
··· 340 340 } 341 341 #endif 342 342 343 + void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *wd); 344 + 343 345 #endif /* _LINUX_CXL_EVENT_H */