Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

cxl/pci: Remove CXL VH handling in CONFIG_PCIEAER_CXL conditional blocks from core/pci.c

Create new config CONFIG_CXL_RAS and put all CXL RAS items behind the
config. The config will depend on CPER and PCIE AER to build. Move the
related VH RAS code from core/pci.c to core/ras.c.

Restricted CXL host (RCH) RAS functions will be moved in a future patch.

Cc: Robert Richter <rrichter@amd.com>
Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Co-developed-by: Terry Bowman <terry.bowman@amd.com>
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260114182055.46029-8-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>

+233 -195
+4
drivers/cxl/Kconfig
··· 233 233 def_bool y 234 234 depends on X86_MCE && MEMORY_FAILURE 235 235 236 + config CXL_RAS 237 + def_bool y 238 + depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI 239 + 236 240 endif
+1 -1
drivers/cxl/core/Makefile
··· 14 14 cxl_core-y += hdm.o 15 15 cxl_core-y += pmu.o 16 16 cxl_core-y += cdat.o 17 - cxl_core-y += ras.o 18 17 cxl_core-$(CONFIG_TRACING) += trace.o 19 18 cxl_core-$(CONFIG_CXL_REGION) += region.o 20 19 cxl_core-$(CONFIG_CXL_MCE) += mce.o 21 20 cxl_core-$(CONFIG_CXL_FEATURES) += features.o 22 21 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o 22 + cxl_core-$(CONFIG_CXL_RAS) += ras.o
+31
drivers/cxl/core/core.h
··· 144 144 int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, 145 145 struct access_coordinate *c); 146 146 147 + #ifdef CONFIG_CXL_RAS 147 148 int cxl_ras_init(void); 148 149 void cxl_ras_exit(void); 150 + bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); 151 + void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); 152 + #else 153 + static inline int cxl_ras_init(void) 154 + { 155 + return 0; 156 + } 157 + 158 + static inline void cxl_ras_exit(void) 159 + { 160 + } 161 + 162 + static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) 163 + { 164 + return false; 165 + } 166 + static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { } 167 + #endif /* CONFIG_CXL_RAS */ 168 + 169 + /* Restricted CXL Host specific RAS functions */ 170 + #ifdef CONFIG_CXL_RAS 171 + void cxl_dport_map_rch_aer(struct cxl_dport *dport); 172 + void cxl_disable_rch_root_ints(struct cxl_dport *dport); 173 + void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); 174 + #else 175 + static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } 176 + static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } 177 + static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } 178 + #endif /* CONFIG_CXL_RAS */ 179 + 149 180 int cxl_gpf_port_setup(struct cxl_dport *dport); 150 181 151 182 struct cxl_hdm;
+4 -185
drivers/cxl/core/pci.c
··· 632 632 } 633 633 EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); 634 634 635 - static void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, 636 - void __iomem *ras_base) 637 - { 638 - void __iomem *addr; 639 - u32 status; 640 - 641 - if (!ras_base) 642 - return; 643 - 644 - addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; 645 - status = readl(addr); 646 - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { 647 - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); 648 - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); 649 - } 650 - } 651 - 652 - /* CXL spec rev3.0 8.2.4.16.1 */ 653 - static void header_log_copy(void __iomem *ras_base, u32 *log) 654 - { 655 - void __iomem *addr; 656 - u32 *log_addr; 657 - int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); 658 - 659 - addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; 660 - log_addr = log; 661 - 662 - for (i = 0; i < log_u32_size; i++) { 663 - *log_addr = readl(addr); 664 - log_addr++; 665 - addr += sizeof(u32); 666 - } 667 - } 668 - 669 - /* 670 - * Log the state of the RAS status registers and prepare them to log the 671 - * next error status. Return 1 if reset needed. 672 - */ 673 - static bool cxl_handle_ras(struct cxl_dev_state *cxlds, 674 - void __iomem *ras_base) 675 - { 676 - u32 hl[CXL_HEADERLOG_SIZE_U32]; 677 - void __iomem *addr; 678 - u32 status; 679 - u32 fe; 680 - 681 - if (!ras_base) 682 - return false; 683 - 684 - addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; 685 - status = readl(addr); 686 - if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) 687 - return false; 688 - 689 - /* If multiple errors, log header points to first error from ctrl reg */ 690 - if (hweight32(status) > 1) { 691 - void __iomem *rcc_addr = 692 - ras_base + CXL_RAS_CAP_CONTROL_OFFSET; 693 - 694 - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, 695 - readl(rcc_addr))); 696 - } else { 697 - fe = status; 698 - } 699 - 700 - header_log_copy(ras_base, hl); 701 - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); 702 - writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); 703 - 704 - return true; 705 - } 706 - 707 - #ifdef CONFIG_PCIEAER_CXL 708 - 709 - static void cxl_dport_map_rch_aer(struct cxl_dport *dport) 635 + #ifdef CONFIG_CXL_RAS 636 + void cxl_dport_map_rch_aer(struct cxl_dport *dport) 710 637 { 711 638 resource_size_t aer_phys; 712 639 struct device *host; ··· 648 721 } 649 722 } 650 723 651 - static void cxl_dport_map_ras(struct cxl_dport *dport) 652 - { 653 - struct cxl_register_map *map = &dport->reg_map; 654 - struct device *dev = dport->dport_dev; 655 - 656 - if (!map->component_map.ras.valid) 657 - dev_dbg(dev, "RAS registers not found\n"); 658 - else if (cxl_map_component_regs(map, &dport->regs.component, 659 - BIT(CXL_CM_CAP_CAP_ID_RAS))) 660 - dev_dbg(dev, "Failed to map RAS capability.\n"); 661 - } 662 - 663 - static void cxl_disable_rch_root_ints(struct cxl_dport *dport) 724 + void cxl_disable_rch_root_ints(struct cxl_dport *dport) 664 725 { 665 726 void __iomem *aer_base = dport->regs.dport_aer; 666 727 u32 aer_cmd_mask, aer_cmd; ··· 671 756 aer_cmd &= ~aer_cmd_mask; 672 757 writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); 673 758 } 674 - 675 - /** 676 - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport 677 - * @dport: the cxl_dport that needs to be initialized 678 - * @host: host device for devm operations 679 - */ 680 - void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) 681 - { 682 - dport->reg_map.host = host; 683 - cxl_dport_map_ras(dport); 684 - 685 - if (dport->rch) { 686 - struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); 687 - 688 - if (!host_bridge->native_aer) 689 - return; 690 - 691 - cxl_dport_map_rch_aer(dport); 692 - cxl_disable_rch_root_ints(dport); 693 - } 694 - } 695 - EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); 696 759 697 760 /* 698 761 * Copy the AER capability registers using 32 bit read accesses. ··· 720 827 return false; 721 828 } 722 829 723 - static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) 830 + void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) 724 831 { 725 832 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 726 833 struct aer_capability_regs aer_regs; ··· 745 852 else 746 853 cxl_handle_ras(cxlds, dport->regs.ras); 747 854 } 748 - 749 - #else 750 - static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } 751 855 #endif 752 - 753 - void cxl_cor_error_detected(struct pci_dev *pdev) 754 - { 755 - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 756 - struct device *dev = &cxlds->cxlmd->dev; 757 - 758 - scoped_guard(device, dev) { 759 - if (!dev->driver) { 760 - dev_warn(&pdev->dev, 761 - "%s: memdev disabled, abort error handling\n", 762 - dev_name(dev)); 763 - return; 764 - } 765 - 766 - if (cxlds->rcd) 767 - cxl_handle_rdport_errors(cxlds); 768 - 769 - cxl_handle_cor_ras(cxlds, cxlds->regs.ras); 770 - } 771 - } 772 - EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); 773 - 774 - pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, 775 - pci_channel_state_t state) 776 - { 777 - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 778 - struct cxl_memdev *cxlmd = cxlds->cxlmd; 779 - struct device *dev = &cxlmd->dev; 780 - bool ue; 781 - 782 - scoped_guard(device, dev) { 783 - if (!dev->driver) { 784 - dev_warn(&pdev->dev, 785 - "%s: memdev disabled, abort error handling\n", 786 - dev_name(dev)); 787 - return PCI_ERS_RESULT_DISCONNECT; 788 - } 789 - 790 - if (cxlds->rcd) 791 - cxl_handle_rdport_errors(cxlds); 792 - /* 793 - * A frozen channel indicates an impending reset which is fatal to 794 - * CXL.mem operation, and will likely crash the system. On the off 795 - * chance the situation is recoverable dump the status of the RAS 796 - * capability registers and bounce the active state of the memdev. 797 - */ 798 - ue = cxl_handle_ras(cxlds, cxlds->regs.ras); 799 - } 800 - 801 - 802 - switch (state) { 803 - case pci_channel_io_normal: 804 - if (ue) { 805 - device_release_driver(dev); 806 - return PCI_ERS_RESULT_NEED_RESET; 807 - } 808 - return PCI_ERS_RESULT_CAN_RECOVER; 809 - case pci_channel_io_frozen: 810 - dev_warn(&pdev->dev, 811 - "%s: frozen state error detected, disable CXL.mem\n", 812 - dev_name(dev)); 813 - device_release_driver(dev); 814 - return PCI_ERS_RESULT_NEED_RESET; 815 - case pci_channel_io_perm_failure: 816 - dev_warn(&pdev->dev, 817 - "failure state error detected, request disconnect\n"); 818 - return PCI_ERS_RESULT_DISCONNECT; 819 - } 820 - return PCI_ERS_RESULT_NEED_RESET; 821 - } 822 - EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); 823 856 824 857 static int cxl_flit_size(struct pci_dev *pdev) 825 858 {
+176
drivers/cxl/core/ras.c
··· 5 5 #include <linux/aer.h> 6 6 #include <cxl/event.h> 7 7 #include <cxlmem.h> 8 + #include <cxlpci.h> 8 9 #include "trace.h" 9 10 10 11 static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, ··· 125 124 cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); 126 125 cancel_work_sync(&cxl_cper_prot_err_work); 127 126 } 127 + 128 + static void cxl_dport_map_ras(struct cxl_dport *dport) 129 + { 130 + struct cxl_register_map *map = &dport->reg_map; 131 + struct device *dev = dport->dport_dev; 132 + 133 + if (!map->component_map.ras.valid) 134 + dev_dbg(dev, "RAS registers not found\n"); 135 + else if (cxl_map_component_regs(map, &dport->regs.component, 136 + BIT(CXL_CM_CAP_CAP_ID_RAS))) 137 + dev_dbg(dev, "Failed to map RAS capability.\n"); 138 + } 139 + 140 + /** 141 + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport 142 + * @dport: the cxl_dport that needs to be initialized 143 + * @host: host device for devm operations 144 + */ 145 + void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) 146 + { 147 + dport->reg_map.host = host; 148 + cxl_dport_map_ras(dport); 149 + 150 + if (dport->rch) { 151 + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); 152 + 153 + if (!host_bridge->native_aer) 154 + return; 155 + 156 + cxl_dport_map_rch_aer(dport); 157 + cxl_disable_rch_root_ints(dport); 158 + } 159 + } 160 + EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); 161 + 162 + void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) 163 + { 164 + void __iomem *addr; 165 + u32 status; 166 + 167 + if (!ras_base) 168 + return; 169 + 170 + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; 171 + status = readl(addr); 172 + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { 173 + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); 174 + trace_cxl_aer_correctable_error(cxlds->cxlmd, status); 175 + } 176 + } 177 + 178 + /* CXL spec rev3.0 8.2.4.16.1 */ 179 + static void header_log_copy(void __iomem *ras_base, u32 *log) 180 + { 181 + void __iomem *addr; 182 + u32 *log_addr; 183 + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); 184 + 185 + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; 186 + log_addr = log; 187 + 188 + for (i = 0; i < log_u32_size; i++) { 189 + *log_addr = readl(addr); 190 + log_addr++; 191 + addr += sizeof(u32); 192 + } 193 + } 194 + 195 + /* 196 + * Log the state of the RAS status registers and prepare them to log the 197 + * next error status. Return 1 if reset needed. 198 + */ 199 + bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) 200 + { 201 + u32 hl[CXL_HEADERLOG_SIZE_U32]; 202 + void __iomem *addr; 203 + u32 status; 204 + u32 fe; 205 + 206 + if (!ras_base) 207 + return false; 208 + 209 + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; 210 + status = readl(addr); 211 + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) 212 + return false; 213 + 214 + /* If multiple errors, log header points to first error from ctrl reg */ 215 + if (hweight32(status) > 1) { 216 + void __iomem *rcc_addr = 217 + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; 218 + 219 + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, 220 + readl(rcc_addr))); 221 + } else { 222 + fe = status; 223 + } 224 + 225 + header_log_copy(ras_base, hl); 226 + trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); 227 + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); 228 + 229 + return true; 230 + } 231 + 232 + void cxl_cor_error_detected(struct pci_dev *pdev) 233 + { 234 + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 235 + struct device *dev = &cxlds->cxlmd->dev; 236 + 237 + scoped_guard(device, dev) { 238 + if (!dev->driver) { 239 + dev_warn(&pdev->dev, 240 + "%s: memdev disabled, abort error handling\n", 241 + dev_name(dev)); 242 + return; 243 + } 244 + 245 + if (cxlds->rcd) 246 + cxl_handle_rdport_errors(cxlds); 247 + 248 + cxl_handle_cor_ras(cxlds, cxlds->regs.ras); 249 + } 250 + } 251 + EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); 252 + 253 + pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, 254 + pci_channel_state_t state) 255 + { 256 + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 257 + struct cxl_memdev *cxlmd = cxlds->cxlmd; 258 + struct device *dev = &cxlmd->dev; 259 + bool ue; 260 + 261 + scoped_guard(device, dev) { 262 + if (!dev->driver) { 263 + dev_warn(&pdev->dev, 264 + "%s: memdev disabled, abort error handling\n", 265 + dev_name(dev)); 266 + return PCI_ERS_RESULT_DISCONNECT; 267 + } 268 + 269 + if (cxlds->rcd) 270 + cxl_handle_rdport_errors(cxlds); 271 + /* 272 + * A frozen channel indicates an impending reset which is fatal to 273 + * CXL.mem operation, and will likely crash the system. On the off 274 + * chance the situation is recoverable dump the status of the RAS 275 + * capability registers and bounce the active state of the memdev. 276 + */ 277 + ue = cxl_handle_ras(cxlds, cxlds->regs.ras); 278 + } 279 + 280 + 281 + switch (state) { 282 + case pci_channel_io_normal: 283 + if (ue) { 284 + device_release_driver(dev); 285 + return PCI_ERS_RESULT_NEED_RESET; 286 + } 287 + return PCI_ERS_RESULT_CAN_RECOVER; 288 + case pci_channel_io_frozen: 289 + dev_warn(&pdev->dev, 290 + "%s: frozen state error detected, disable CXL.mem\n", 291 + dev_name(dev)); 292 + device_release_driver(dev); 293 + return PCI_ERS_RESULT_NEED_RESET; 294 + case pci_channel_io_perm_failure: 295 + dev_warn(&pdev->dev, 296 + "failure state error detected, request disconnect\n"); 297 + return PCI_ERS_RESULT_DISCONNECT; 298 + } 299 + return PCI_ERS_RESULT_NEED_RESET; 300 + } 301 + EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
-8
drivers/cxl/cxl.h
··· 803 803 struct device *dport_dev, int port_id, 804 804 resource_size_t rcrb); 805 805 806 - #ifdef CONFIG_PCIEAER_CXL 807 - void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); 808 - void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); 809 - #else 810 - static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, 811 - struct device *host) { } 812 - #endif 813 - 814 806 struct cxl_decoder *to_cxl_decoder(struct device *dev); 815 807 struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); 816 808 struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
+16
drivers/cxl/cxlpci.h
··· 76 76 77 77 struct cxl_dev_state; 78 78 void read_cdat_data(struct cxl_port *port); 79 + 80 + #ifdef CONFIG_CXL_RAS 79 81 void cxl_cor_error_detected(struct pci_dev *pdev); 80 82 pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, 81 83 pci_channel_state_t state); 84 + void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); 85 + #else 86 + static inline void cxl_cor_error_detected(struct pci_dev *pdev) { } 87 + 88 + static inline pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, 89 + pci_channel_state_t state) 90 + { 91 + return PCI_ERS_RESULT_NONE; 92 + } 93 + 94 + static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, 95 + struct device *host) { } 96 + #endif 97 + 82 98 #endif /* __CXL_PCI_H__ */
+1 -1
tools/testing/cxl/Kbuild
··· 57 57 cxl_core-y += $(CXL_CORE_SRC)/hdm.o 58 58 cxl_core-y += $(CXL_CORE_SRC)/pmu.o 59 59 cxl_core-y += $(CXL_CORE_SRC)/cdat.o 60 - cxl_core-y += $(CXL_CORE_SRC)/ras.o 61 60 cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o 62 61 cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o 63 62 cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o 64 63 cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o 65 64 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o 65 + cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o 66 66 cxl_core-y += config_check.o 67 67 cxl_core-y += cxl_core_test.o 68 68 cxl_core-y += cxl_core_exports.o