Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

PCI/AER: Move CXL RCH error handling to aer_cxl_rch.c

The Restricted CXL Host (RCH) AER error handling logic currently resides
in the AER driver file, aer.c. CXL specific changes conditionally compiled
using #ifdefs.

Improve the AER driver maintainability by separating the RCH specific logic
from the AER driver's core functionality and removing the ifdefs. Introduce
drivers/pci/pcie/aer_cxl_rch.c for moving the RCH AER logic into. Conditionally
compile the file using the CONFIG_CXL_RCH_RAS Kconfig.

Move the CXL logic into the new file but leave CXL helper function
is_internal_error() in aer.c for now as it will be moved in future patch
for CXL Virtual Hierarchy handling.

To maintain compilation after the move other changes are required. Change
cxl_rch_handle_error(), cxl_rch_enable_rcec(), and is_internal_error() to
be non-static inorder for accessing from the AER driver.

Update the new file with the SPDX and 2023 AMD copyright notations because
the RCH bits were initially contributed in 2023 by AMD. See commit:
commit 0a867568bb0d ("PCI/AER: Forward RCH downstream port-detected errors to the CXL.mem dev handler")

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260114182055.46029-12-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>

authored by

Terry Bowman and committed by
Dave Jiang
59010029 51ce56b1

+114 -101
+1
drivers/pci/pcie/Makefile
··· 8 8 9 9 obj-y += aspm.o 10 10 obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o 11 + obj-$(CONFIG_CXL_RAS) += aer_cxl_rch.o 11 12 obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o 12 13 obj-$(CONFIG_PCIE_PME) += pme.o 13 14 obj-$(CONFIG_PCIE_DPC) += dpc.o
+1 -98
drivers/pci/pcie/aer.c
··· 1150 1150 */ 1151 1151 EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core"); 1152 1152 1153 - #ifdef CONFIG_PCIEAER_CXL 1154 - static bool is_cxl_mem_dev(struct pci_dev *dev) 1155 - { 1156 - /* 1157 - * The capability, status, and control fields in Device 0, 1158 - * Function 0 DVSEC control the CXL functionality of the 1159 - * entire device (CXL 3.0, 8.1.3). 1160 - */ 1161 - if (dev->devfn != PCI_DEVFN(0, 0)) 1162 - return false; 1163 - 1164 - /* 1165 - * CXL Memory Devices must have the 502h class code set (CXL 1166 - * 3.0, 8.1.12.1). 1167 - */ 1168 - if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) 1169 - return false; 1170 - 1171 - return true; 1172 - } 1173 - 1153 + #ifdef CONFIG_CXL_RAS 1174 1154 bool is_aer_internal_error(struct aer_err_info *info) 1175 1155 { 1176 1156 if (info->severity == AER_CORRECTABLE) ··· 1158 1178 1159 1179 return info->status & PCI_ERR_UNC_INTN; 1160 1180 } 1161 - 1162 - static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) 1163 - { 1164 - struct aer_err_info *info = (struct aer_err_info *)data; 1165 - const struct pci_error_handlers *err_handler; 1166 - 1167 - if (!is_cxl_mem_dev(dev) || !pcie_aer_is_native(dev)) 1168 - return 0; 1169 - 1170 - /* Protect dev->driver */ 1171 - device_lock(&dev->dev); 1172 - 1173 - err_handler = dev->driver ? dev->driver->err_handler : NULL; 1174 - if (!err_handler) 1175 - goto out; 1176 - 1177 - if (info->severity == AER_CORRECTABLE) { 1178 - if (err_handler->cor_error_detected) 1179 - err_handler->cor_error_detected(dev); 1180 - } else if (err_handler->error_detected) { 1181 - if (info->severity == AER_NONFATAL) 1182 - err_handler->error_detected(dev, pci_channel_io_normal); 1183 - else if (info->severity == AER_FATAL) 1184 - err_handler->error_detected(dev, pci_channel_io_frozen); 1185 - } 1186 - out: 1187 - device_unlock(&dev->dev); 1188 - return 0; 1189 - } 1190 - 1191 - static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) 1192 - { 1193 - /* 1194 - * Internal errors of an RCEC indicate an AER error in an 1195 - * RCH's downstream port. Check and handle them in the CXL.mem 1196 - * device driver. 1197 - */ 1198 - if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && 1199 - is_aer_internal_error(info)) 1200 - pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); 1201 - } 1202 - 1203 - static int handles_cxl_error_iter(struct pci_dev *dev, void *data) 1204 - { 1205 - bool *handles_cxl = data; 1206 - 1207 - if (!*handles_cxl) 1208 - *handles_cxl = is_cxl_mem_dev(dev) && pcie_aer_is_native(dev); 1209 - 1210 - /* Non-zero terminates iteration */ 1211 - return *handles_cxl; 1212 - } 1213 - 1214 - static bool handles_cxl_errors(struct pci_dev *rcec) 1215 - { 1216 - bool handles_cxl = false; 1217 - 1218 - if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && 1219 - pcie_aer_is_native(rcec)) 1220 - pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); 1221 - 1222 - return handles_cxl; 1223 - } 1224 - 1225 - static void cxl_rch_enable_rcec(struct pci_dev *rcec) 1226 - { 1227 - if (!handles_cxl_errors(rcec)) 1228 - return; 1229 - 1230 - pci_aer_unmask_internal_errors(rcec); 1231 - pci_info(rcec, "CXL: Internal errors unmasked"); 1232 - } 1233 - 1234 - #else 1235 - static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } 1236 - static inline void cxl_rch_handle_error(struct pci_dev *dev, 1237 - struct aer_err_info *info) { } 1238 1181 #endif 1239 1182 1240 1183 /**
+106
drivers/pci/pcie/aer_cxl_rch.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright(c) 2023 AMD Corporation. All rights reserved. */ 3 + 4 + #include <linux/pci.h> 5 + #include <linux/aer.h> 6 + #include <linux/bitfield.h> 7 + #include "../pci.h" 8 + #include "portdrv.h" 9 + 10 + static bool is_cxl_mem_dev(struct pci_dev *dev) 11 + { 12 + /* 13 + * The capability, status, and control fields in Device 0, 14 + * Function 0 DVSEC control the CXL functionality of the 15 + * entire device (CXL 3.0, 8.1.3). 16 + */ 17 + if (dev->devfn != PCI_DEVFN(0, 0)) 18 + return false; 19 + 20 + /* 21 + * CXL Memory Devices must have the 502h class code set (CXL 22 + * 3.0, 8.1.12.1). 23 + */ 24 + if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) 25 + return false; 26 + 27 + return true; 28 + } 29 + 30 + static bool cxl_error_is_native(struct pci_dev *dev) 31 + { 32 + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); 33 + 34 + return (pcie_ports_native || host->native_aer); 35 + } 36 + 37 + static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) 38 + { 39 + struct aer_err_info *info = (struct aer_err_info *)data; 40 + const struct pci_error_handlers *err_handler; 41 + 42 + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) 43 + return 0; 44 + 45 + device_lock(&dev->dev); 46 + 47 + err_handler = dev->driver ? dev->driver->err_handler : NULL; 48 + if (!err_handler) 49 + goto out; 50 + 51 + if (info->severity == AER_CORRECTABLE) { 52 + if (err_handler->cor_error_detected) 53 + err_handler->cor_error_detected(dev); 54 + } else if (err_handler->error_detected) { 55 + if (info->severity == AER_NONFATAL) 56 + err_handler->error_detected(dev, pci_channel_io_normal); 57 + else if (info->severity == AER_FATAL) 58 + err_handler->error_detected(dev, pci_channel_io_frozen); 59 + } 60 + out: 61 + device_unlock(&dev->dev); 62 + return 0; 63 + } 64 + 65 + void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) 66 + { 67 + /* 68 + * Internal errors of an RCEC indicate an AER error in an 69 + * RCH's downstream port. Check and handle them in the CXL.mem 70 + * device driver. 71 + */ 72 + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && 73 + is_aer_internal_error(info)) 74 + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); 75 + } 76 + 77 + static int handles_cxl_error_iter(struct pci_dev *dev, void *data) 78 + { 79 + bool *handles_cxl = data; 80 + 81 + if (!*handles_cxl) 82 + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); 83 + 84 + /* Non-zero terminates iteration */ 85 + return *handles_cxl; 86 + } 87 + 88 + static bool handles_cxl_errors(struct pci_dev *rcec) 89 + { 90 + bool handles_cxl = false; 91 + 92 + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && 93 + pcie_aer_is_native(rcec)) 94 + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); 95 + 96 + return handles_cxl; 97 + } 98 + 99 + void cxl_rch_enable_rcec(struct pci_dev *rcec) 100 + { 101 + if (!handles_cxl_errors(rcec)) 102 + return; 103 + 104 + pci_aer_unmask_internal_errors(rcec); 105 + pci_info(rcec, "CXL: Internal errors unmasked"); 106 + }
+6 -3
drivers/pci/pcie/portdrv.h
··· 126 126 127 127 struct aer_err_info; 128 128 129 - #ifdef CONFIG_PCIEAER_CXL 129 + #ifdef CONFIG_CXL_RAS 130 130 bool is_aer_internal_error(struct aer_err_info *info); 131 + void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info); 132 + void cxl_rch_enable_rcec(struct pci_dev *rcec); 131 133 #else 132 134 static inline bool is_aer_internal_error(struct aer_err_info *info) { return false; } 133 - #endif /* CONFIG_PCIEAER_CXL */ 134 - 135 + static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { } 136 + static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } 137 + #endif /* CONFIG_CXL_RAS */ 135 138 #endif /* _PORTDRV_H_ */