Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'cxl-fixes-6.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull Compute Express Link (CXL) fixes from Dave Jiang:
"These fixes address a few issues in the CXL subsystem, including
dealing with some bugs in the CXL EDAC and RAS drivers:

- Fix return value of cxlctl_validate_set_features()

- Fix min_scrub_cycle of a region miscaculation and add additional
documentation

- Fix potential memory leak issues for CXL EDAC

- Fix CPER handler device confusion for CXL RAS

- Fix using wrong repair type to check DRAM event record"

* tag 'cxl-fixes-6.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
cxl/edac: Fix using wrong repair type to check dram event record
cxl/ras: Fix CPER handler device confusion
cxl/edac: Fix potential memory leak issues
cxl/Documentation: Add more description about min/max scrub cycle
cxl/edac: Fix the min_scrub_cycle of a region miscalculation
cxl: fix return value in cxlctl_validate_set_features()

+57 -26
+16
Documentation/ABI/testing/sysfs-edac-scrub
··· 49 49 (RO) Supported minimum scrub cycle duration in seconds 50 50 by the memory scrubber. 51 51 52 + Device-based scrub: returns the minimum scrub cycle 53 + supported by the memory device. 54 + 55 + Region-based scrub: returns the max of minimum scrub cycles 56 + supported by individual memory devices that back the region. 57 + 52 58 What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration 53 59 Date: March 2025 54 60 KernelVersion: 6.15 ··· 62 56 Description: 63 57 (RO) Supported maximum scrub cycle duration in seconds 64 58 by the memory scrubber. 59 + 60 + Device-based scrub: returns the maximum scrub cycle supported 61 + by the memory device. 62 + 63 + Region-based scrub: returns the min of maximum scrub cycles 64 + supported by individual memory devices that back the region. 65 + 66 + If the memory device does not provide maximum scrub cycle 67 + information, return the maximum supported value of the scrub 68 + cycle field. 65 69 66 70 What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration 67 71 Date: March 2025
+13 -5
drivers/cxl/core/edac.c
··· 103 103 u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) 104 104 { 105 105 struct cxl_mailbox *cxl_mbox; 106 - u8 min_scrub_cycle = U8_MAX; 107 106 struct cxl_region_params *p; 108 107 struct cxl_memdev *cxlmd; 109 108 struct cxl_region *cxlr; 109 + u8 min_scrub_cycle = 0; 110 110 int i, ret; 111 111 112 112 if (!cxl_ps_ctx->cxlr) { ··· 133 133 if (ret) 134 134 return ret; 135 135 136 + /* 137 + * The min_scrub_cycle of a region is the max of minimum scrub 138 + * cycles supported by memdevs that back the region. 139 + */ 136 140 if (min_cycle) 137 - min_scrub_cycle = min(*min_cycle, min_scrub_cycle); 141 + min_scrub_cycle = max(*min_cycle, min_scrub_cycle); 138 142 } 139 143 140 144 if (min_cycle) ··· 1103 1099 old_rec = xa_store(&array_rec->rec_gen_media, 1104 1100 le64_to_cpu(rec->media_hdr.phys_addr), rec, 1105 1101 GFP_KERNEL); 1106 - if (xa_is_err(old_rec)) 1102 + if (xa_is_err(old_rec)) { 1103 + kfree(rec); 1107 1104 return xa_err(old_rec); 1105 + } 1108 1106 1109 1107 kfree(old_rec); 1110 1108 ··· 1133 1127 old_rec = xa_store(&array_rec->rec_dram, 1134 1128 le64_to_cpu(rec->media_hdr.phys_addr), rec, 1135 1129 GFP_KERNEL); 1136 - if (xa_is_err(old_rec)) 1130 + if (xa_is_err(old_rec)) { 1131 + kfree(rec); 1137 1132 return xa_err(old_rec); 1133 + } 1138 1134 1139 1135 kfree(old_rec); 1140 1136 ··· 1323 1315 attrbs.bank = ctx->bank; 1324 1316 break; 1325 1317 case EDAC_REPAIR_RANK_SPARING: 1326 - attrbs.repair_type = CXL_BANK_SPARING; 1318 + attrbs.repair_type = CXL_RANK_SPARING; 1327 1319 break; 1328 1320 default: 1329 1321 return NULL;
+1 -1
drivers/cxl/core/features.c
··· 544 544 u32 flags; 545 545 546 546 if (rpc_in->op_size < sizeof(uuid_t)) 547 - return ERR_PTR(-EINVAL); 547 + return false; 548 548 549 549 feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid); 550 550 if (IS_ERR(feat))
+27 -20
drivers/cxl/core/ras.c
··· 31 31 ras_cap.header_log); 32 32 } 33 33 34 - static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, 35 - struct cxl_ras_capability_regs ras_cap) 34 + static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, 35 + struct cxl_ras_capability_regs ras_cap) 36 36 { 37 37 u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; 38 - struct cxl_dev_state *cxlds; 39 38 40 - cxlds = pci_get_drvdata(pdev); 41 - if (!cxlds) 42 - return; 43 - 44 - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); 39 + trace_cxl_aer_correctable_error(cxlmd, status); 45 40 } 46 41 47 - static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, 48 - struct cxl_ras_capability_regs ras_cap) 42 + static void 43 + cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, 44 + struct cxl_ras_capability_regs ras_cap) 49 45 { 50 46 u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; 51 - struct cxl_dev_state *cxlds; 52 47 u32 fe; 53 - 54 - cxlds = pci_get_drvdata(pdev); 55 - if (!cxlds) 56 - return; 57 48 58 49 if (hweight32(status) > 1) 59 50 fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, ··· 52 61 else 53 62 fe = status; 54 63 55 - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, 64 + trace_cxl_aer_uncorrectable_error(cxlmd, status, fe, 56 65 ras_cap.header_log); 66 + } 67 + 68 + static int match_memdev_by_parent(struct device *dev, const void *uport) 69 + { 70 + if (is_cxl_memdev(dev) && dev->parent == uport) 71 + return 1; 72 + return 0; 57 73 } 58 74 59 75 static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) ··· 71 73 pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, 72 74 data->prot_err.agent_addr.bus, 73 75 devfn); 76 + struct cxl_memdev *cxlmd; 74 77 int port_type; 75 78 76 79 if (!pdev) 77 80 return; 78 - 79 - guard(device)(&pdev->dev); 80 81 81 82 port_type = pci_pcie_type(pdev); 82 83 if (port_type == PCI_EXP_TYPE_ROOT_PORT || ··· 89 92 return; 90 93 } 91 94 95 + guard(device)(&pdev->dev); 96 + if (!pdev->dev.driver) 97 + return; 98 + 99 + struct device *mem_dev __free(put_device) = bus_find_device( 100 + &cxl_bus_type, NULL, pdev, match_memdev_by_parent); 101 + if (!mem_dev) 102 + return; 103 + 104 + cxlmd = to_cxl_memdev(mem_dev); 92 105 if (data->severity == AER_CORRECTABLE) 93 - cxl_cper_trace_corr_prot_err(pdev, data->ras_cap); 106 + cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap); 94 107 else 95 - cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap); 108 + cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap); 96 109 } 97 110 98 111 static void cxl_cper_prot_err_work_fn(struct work_struct *work)