Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'edac_updates_for_v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:

- Add a driver for the RAS functionality on Xilinx's on chip memory
controller

- Add support for decoding errors from the first and second level
memory on SKL-based hardware

- Add support for the memory controllers in Intel Granite Rapids and
Emerald Rapids machines

- First round of amd64_edac driver simplification and removal of
unneeded functionality

- The usual cleanups and fixes

* tag 'edac_updates_for_v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
EDAC/amd64: Shut up an -Werror,-Wsometimes-uninitialized clang false positive
EDAC/amd64: Remove early_channel_count()
EDAC/amd64: Remove PCI Function 0
EDAC/amd64: Remove PCI Function 6
EDAC/amd64: Remove scrub rate control for Family 17h and later
EDAC/amd64: Don't set up EDAC PCI control on Family 17h+
EDAC/i10nm: Add driver decoder for Sapphire Rapids server
EDAC/i10nm: Add Intel Granite Rapids server support
EDAC/i10nm: Make more configurations CPU model specific
EDAC/i10nm: Add Intel Emerald Rapids server support
EDAC/skx_common: Delete duplicated and unreachable code
EDAC/skx_common: Enable EDAC support for the "near" memory
EDAC/qcom: Add platform_device_id table for module autoloading
EDAC/zynqmp: Add EDAC support for Xilinx ZynqMP OCM
dt-bindings: edac: Add bindings for Xilinx ZynqMP OCM

+1013 -359
+45
Documentation/devicetree/bindings/memory-controllers/xlnx,zynqmp-ocmc-1.0.yaml
··· 1 + # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 + %YAML 1.2 3 + --- 4 + $id: http://devicetree.org/schemas/memory-controllers/xlnx,zynqmp-ocmc-1.0.yaml# 5 + $schema: http://devicetree.org/meta-schemas/core.yaml# 6 + 7 + title: Xilinx Zynqmp OCM(On-Chip Memory) Controller 8 + 9 + maintainers: 10 + - Shubhrajyoti Datta <shubhrajyoti.datta@amd.com> 11 + - Sai Krishna Potthuri <sai.krishna.potthuri@amd.com> 12 + 13 + description: | 14 + The OCM supports 64-bit wide ECC functionality to detect multi-bit errors 15 + and recover from a single-bit memory fault.On a write, if all bytes are 16 + being written, the ECC is generated and written into the ECC RAM along with 17 + the write-data that is written into the data RAM. If one or more bytes are 18 + not written, then the read operation results in an correctable error or 19 + uncorrectable error. 20 + 21 + properties: 22 + compatible: 23 + const: xlnx,zynqmp-ocmc-1.0 24 + 25 + reg: 26 + maxItems: 1 27 + 28 + interrupts: 29 + maxItems: 1 30 + 31 + required: 32 + - compatible 33 + - reg 34 + - interrupts 35 + 36 + additionalProperties: false 37 + 38 + examples: 39 + - | 40 + #include <dt-bindings/interrupt-controller/arm-gic.h> 41 + memory-controller@ff960000 { 42 + compatible = "xlnx,zynqmp-ocmc-1.0"; 43 + reg = <0xff960000 0x1000>; 44 + interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; 45 + };
+7
MAINTAINERS
··· 22743 22743 F: drivers/dma/xilinx/xilinx_dpdma.c 22744 22744 F: include/dt-bindings/dma/xlnx-zynqmp-dpdma.h 22745 22745 22746 + XILINX ZYNQMP OCM EDAC DRIVER 22747 + M: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com> 22748 + M: Sai Krishna Potthuri <sai.krishna.potthuri@amd.com> 22749 + S: Maintained 22750 + F: Documentation/devicetree/bindings/memory-controllers/xlnx,zynqmp-ocmc-1.0.yaml 22751 + F: drivers/edac/zynqmp_edac.c 22752 + 22746 22753 XILINX ZYNQMP PSGTR PHY DRIVER 22747 22754 M: Anurag Kumar Vulisha <anurag.kumar.vulisha@xilinx.com> 22748 22755 M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+8
drivers/edac/Kconfig
··· 542 542 Support for error detection and correction on the 543 543 SoCs with ARM DMC-520 DRAM controller. 544 544 545 + config EDAC_ZYNQMP 546 + tristate "Xilinx ZynqMP OCM Controller" 547 + depends on ARCH_ZYNQMP || COMPILE_TEST 548 + help 549 + This driver supports error detection and correction for the 550 + Xilinx ZynqMP OCM (On Chip Memory) controller. It can also be 551 + built as a module. In that case it will be called zynqmp_edac. 552 + 545 553 endif # EDAC
+1
drivers/edac/Makefile
··· 84 84 obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o 85 85 obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o 86 86 obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o 87 + obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o
+17 -200
drivers/edac/amd64_edac.c
··· 182 182 * other archs, we might not have access to the caches directly. 183 183 */ 184 184 185 - static inline void __f17h_set_scrubval(struct amd64_pvt *pvt, u32 scrubval) 186 - { 187 - /* 188 - * Fam17h supports scrub values between 0x5 and 0x14. Also, the values 189 - * are shifted down by 0x5, so scrubval 0x5 is written to the register 190 - * as 0x0, scrubval 0x6 as 0x1, etc. 191 - */ 192 - if (scrubval >= 0x5 && scrubval <= 0x14) { 193 - scrubval -= 0x5; 194 - pci_write_bits32(pvt->F6, F17H_SCR_LIMIT_ADDR, scrubval, 0xF); 195 - pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 1, 0x1); 196 - } else { 197 - pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 0, 0x1); 198 - } 199 - } 200 185 /* 201 186 * Scan the scrub rate mapping table for a close or matching bandwidth value to 202 187 * issue. If requested is too big, then use last maximum value found. ··· 214 229 215 230 scrubval = scrubrates[i].scrubval; 216 231 217 - if (pvt->umc) { 218 - __f17h_set_scrubval(pvt, scrubval); 219 - } else if (pvt->fam == 0x15 && pvt->model == 0x60) { 232 + if (pvt->fam == 0x15 && pvt->model == 0x60) { 220 233 f15h_select_dct(pvt, 0); 221 234 pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F); 222 235 f15h_select_dct(pvt, 1); ··· 254 271 int i, retval = -EINVAL; 255 272 u32 scrubval = 0; 256 273 257 - if (pvt->umc) { 258 - amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); 259 - if (scrubval & BIT(0)) { 260 - amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); 261 - scrubval &= 0xF; 262 - scrubval += 0x5; 263 - } else { 264 - scrubval = 0; 265 - } 266 - } else if (pvt->fam == 0x15) { 274 + if (pvt->fam == 0x15) { 267 275 /* Erratum #505 */ 268 276 if (pvt->model < 0x10) 269 277 f15h_select_dct(pvt, 0); ··· 1428 1454 1429 1455 debug_display_dimm_sizes_df(pvt, i); 1430 1456 } 1431 - 1432 - edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n", 1433 - pvt->dhar, dhar_base(pvt)); 1434 1457 } 1435 1458 1436 1459 /* Display and decode various NB registers for debug purposes. */ ··· 1462 1491 /* Only if NOT ganged does dclr1 have valid info */ 1463 1492 if (!dct_ganging_enabled(pvt)) 1464 1493 debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); 1494 + 1495 + edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); 1465 1496 } 1466 1497 1467 1498 /* Display and decode various NB registers for debug purposes. */ ··· 1473 1500 __dump_misc_regs_df(pvt); 1474 1501 else 1475 1502 __dump_misc_regs(pvt); 1476 - 1477 - edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); 1478 1503 1479 1504 amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); 1480 1505 } ··· 1701 1730 1702 1731 ddr3: 1703 1732 pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; 1704 - } 1705 - 1706 - /* Get the number of DCT channels the memory controller is using. */ 1707 - static int k8_early_channel_count(struct amd64_pvt *pvt) 1708 - { 1709 - int flag; 1710 - 1711 - if (pvt->ext_model >= K8_REV_F) 1712 - /* RevF (NPT) and later */ 1713 - flag = pvt->dclr0 & WIDTH_128; 1714 - else 1715 - /* RevE and earlier */ 1716 - flag = pvt->dclr0 & REVE_WIDTH_128; 1717 - 1718 - /* not used */ 1719 - pvt->dclr1 = 0; 1720 - 1721 - return (flag) ? 2 : 1; 1722 1733 } 1723 1734 1724 1735 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ ··· 1952 1999 WARN_ON(cs_mode > 6); 1953 2000 return 32 << cs_mode; 1954 2001 } 1955 - } 1956 - 1957 - /* 1958 - * Get the number of DCT channels in use. 1959 - * 1960 - * Return: 1961 - * number of Memory Channels in operation 1962 - * Pass back: 1963 - * contents of the DCL0_LOW register 1964 - */ 1965 - static int f1x_early_channel_count(struct amd64_pvt *pvt) 1966 - { 1967 - int i, j, channels = 0; 1968 - 1969 - /* On F10h, if we are in 128 bit mode, then we are using 2 channels */ 1970 - if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128)) 1971 - return 2; 1972 - 1973 - /* 1974 - * Need to check if in unganged mode: In such, there are 2 channels, 1975 - * but they are not in 128 bit mode and thus the above 'dclr0' status 1976 - * bit will be OFF. 1977 - * 1978 - * Need to check DCT0[0] and DCT1[0] to see if only one of them has 1979 - * their CSEnable bit on. If so, then SINGLE DIMM case. 1980 - */ 1981 - edac_dbg(0, "Data width is not 128 bits - need more decoding\n"); 1982 - 1983 - /* 1984 - * Check DRAM Bank Address Mapping values for each DIMM to see if there 1985 - * is more than just one DIMM present in unganged mode. Need to check 1986 - * both controllers since DIMMs can be placed in either one. 1987 - */ 1988 - for (i = 0; i < 2; i++) { 1989 - u32 dbam = (i ? pvt->dbam1 : pvt->dbam0); 1990 - 1991 - for (j = 0; j < 4; j++) { 1992 - if (DBAM_DIMM(j, dbam) > 0) { 1993 - channels++; 1994 - break; 1995 - } 1996 - } 1997 - } 1998 - 1999 - if (channels > 2) 2000 - channels = 2; 2001 - 2002 - amd64_info("MCT channel count: %d\n", channels); 2003 - 2004 - return channels; 2005 - } 2006 - 2007 - static int f17_early_channel_count(struct amd64_pvt *pvt) 2008 - { 2009 - int i, channels = 0; 2010 - 2011 - /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */ 2012 - for_each_umc(i) 2013 - channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); 2014 - 2015 - amd64_info("MCT channel count: %d\n", channels); 2016 - 2017 - return channels; 2018 2002 } 2019 2003 2020 2004 static int ddr3_cs_size(unsigned i, bool dct_width) ··· 2748 2858 .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, 2749 2859 .max_mcs = 2, 2750 2860 .ops = { 2751 - .early_channel_count = k8_early_channel_count, 2752 2861 .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, 2753 2862 .dbam_to_cs = k8_dbam_to_chip_select, 2754 2863 } ··· 2758 2869 .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, 2759 2870 .max_mcs = 2, 2760 2871 .ops = { 2761 - .early_channel_count = f1x_early_channel_count, 2762 2872 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2763 2873 .dbam_to_cs = f10_dbam_to_chip_select, 2764 2874 } ··· 2768 2880 .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, 2769 2881 .max_mcs = 2, 2770 2882 .ops = { 2771 - .early_channel_count = f1x_early_channel_count, 2772 2883 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2773 2884 .dbam_to_cs = f15_dbam_to_chip_select, 2774 2885 } ··· 2778 2891 .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, 2779 2892 .max_mcs = 2, 2780 2893 .ops = { 2781 - .early_channel_count = f1x_early_channel_count, 2782 2894 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2783 2895 .dbam_to_cs = f16_dbam_to_chip_select, 2784 2896 } ··· 2788 2902 .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, 2789 2903 .max_mcs = 2, 2790 2904 .ops = { 2791 - .early_channel_count = f1x_early_channel_count, 2792 2905 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2793 2906 .dbam_to_cs = f15_m60h_dbam_to_chip_select, 2794 2907 } ··· 2798 2913 .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, 2799 2914 .max_mcs = 2, 2800 2915 .ops = { 2801 - .early_channel_count = f1x_early_channel_count, 2802 2916 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2803 2917 .dbam_to_cs = f16_dbam_to_chip_select, 2804 2918 } ··· 2808 2924 .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, 2809 2925 .max_mcs = 2, 2810 2926 .ops = { 2811 - .early_channel_count = f1x_early_channel_count, 2812 2927 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2813 2928 .dbam_to_cs = f16_dbam_to_chip_select, 2814 2929 } 2815 2930 }, 2816 2931 [F17_CPUS] = { 2817 2932 .ctl_name = "F17h", 2818 - .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0, 2819 - .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, 2820 2933 .max_mcs = 2, 2821 2934 .ops = { 2822 - .early_channel_count = f17_early_channel_count, 2823 2935 .dbam_to_cs = f17_addr_mask_to_cs_size, 2824 2936 } 2825 2937 }, 2826 2938 [F17_M10H_CPUS] = { 2827 2939 .ctl_name = "F17h_M10h", 2828 - .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, 2829 - .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, 2830 2940 .max_mcs = 2, 2831 2941 .ops = { 2832 - .early_channel_count = f17_early_channel_count, 2833 2942 .dbam_to_cs = f17_addr_mask_to_cs_size, 2834 2943 } 2835 2944 }, 2836 2945 [F17_M30H_CPUS] = { 2837 2946 .ctl_name = "F17h_M30h", 2838 - .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, 2839 - .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, 2840 2947 .max_mcs = 8, 2841 2948 .ops = { 2842 - .early_channel_count = f17_early_channel_count, 2843 2949 .dbam_to_cs = f17_addr_mask_to_cs_size, 2844 2950 } 2845 2951 }, 2846 2952 [F17_M60H_CPUS] = { 2847 2953 .ctl_name = "F17h_M60h", 2848 - .f0_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F0, 2849 - .f6_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F6, 2850 2954 .max_mcs = 2, 2851 2955 .ops = { 2852 - .early_channel_count = f17_early_channel_count, 2853 2956 .dbam_to_cs = f17_addr_mask_to_cs_size, 2854 2957 } 2855 2958 }, 2856 2959 [F17_M70H_CPUS] = { 2857 2960 .ctl_name = "F17h_M70h", 2858 - .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, 2859 - .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, 2860 2961 .max_mcs = 2, 2861 2962 .ops = { 2862 - .early_channel_count = f17_early_channel_count, 2863 2963 .dbam_to_cs = f17_addr_mask_to_cs_size, 2864 2964 } 2865 2965 }, 2866 2966 [F19_CPUS] = { 2867 2967 .ctl_name = "F19h", 2868 - .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, 2869 - .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6, 2870 2968 .max_mcs = 8, 2871 2969 .ops = { 2872 - .early_channel_count = f17_early_channel_count, 2873 2970 .dbam_to_cs = f17_addr_mask_to_cs_size, 2874 2971 } 2875 2972 }, 2876 2973 [F19_M10H_CPUS] = { 2877 2974 .ctl_name = "F19h_M10h", 2878 - .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, 2879 - .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, 2880 2975 .max_mcs = 12, 2881 2976 .flags.zn_regs_v2 = 1, 2882 2977 .ops = { 2883 - .early_channel_count = f17_early_channel_count, 2884 2978 .dbam_to_cs = f17_addr_mask_to_cs_size, 2885 2979 } 2886 2980 }, 2887 2981 [F19_M50H_CPUS] = { 2888 2982 .ctl_name = "F19h_M50h", 2889 - .f0_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F0, 2890 - .f6_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F6, 2891 2983 .max_mcs = 2, 2892 2984 .ops = { 2893 - .early_channel_count = f17_early_channel_count, 2894 2985 .dbam_to_cs = f17_addr_mask_to_cs_size, 2895 2986 } 2896 2987 }, ··· 3175 3316 /* 3176 3317 * Use pvt->F3 which contains the F3 CPU PCI device to get the related 3177 3318 * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error. 3178 - * Reserve F0 and F6 on systems with a UMC. 3179 3319 */ 3180 3320 static int 3181 3321 reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) 3182 3322 { 3183 - if (pvt->umc) { 3184 - pvt->F0 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); 3185 - if (!pvt->F0) { 3186 - edac_dbg(1, "F0 not found, device 0x%x\n", pci_id1); 3187 - return -ENODEV; 3188 - } 3189 - 3190 - pvt->F6 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3); 3191 - if (!pvt->F6) { 3192 - pci_dev_put(pvt->F0); 3193 - pvt->F0 = NULL; 3194 - 3195 - edac_dbg(1, "F6 not found: device 0x%x\n", pci_id2); 3196 - return -ENODEV; 3197 - } 3198 - 3199 - if (!pci_ctl_dev) 3200 - pci_ctl_dev = &pvt->F0->dev; 3201 - 3202 - edac_dbg(1, "F0: %s\n", pci_name(pvt->F0)); 3203 - edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); 3204 - edac_dbg(1, "F6: %s\n", pci_name(pvt->F6)); 3205 - 3323 + if (pvt->umc) 3206 3324 return 0; 3207 - } 3208 3325 3209 3326 /* Reserve the ADDRESS MAP Device */ 3210 3327 pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); ··· 3212 3377 static void free_mc_sibling_devs(struct amd64_pvt *pvt) 3213 3378 { 3214 3379 if (pvt->umc) { 3215 - pci_dev_put(pvt->F0); 3216 - pci_dev_put(pvt->F6); 3380 + return; 3217 3381 } else { 3218 3382 pci_dev_put(pvt->F1); 3219 3383 pci_dev_put(pvt->F2); ··· 3302 3468 3303 3469 if (pvt->umc) { 3304 3470 __read_mc_regs_df(pvt); 3305 - amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar); 3306 3471 3307 3472 goto skip; 3308 3473 } ··· 3524 3691 : EDAC_SECDED; 3525 3692 } 3526 3693 3527 - for (j = 0; j < pvt->channel_count; j++) { 3694 + for (j = 0; j < fam_type->max_mcs; j++) { 3528 3695 dimm = csrow->channels[j]->dimm; 3529 3696 dimm->mtype = pvt->dram_type; 3530 3697 dimm->edac_mode = edac_mode; ··· 3800 3967 mci->dev_name = pci_name(pvt->F3); 3801 3968 mci->ctl_page_to_phys = NULL; 3802 3969 3970 + if (pvt->fam >= 0x17) 3971 + return; 3972 + 3803 3973 /* memory scrubber interface */ 3804 3974 mci->set_sdram_scrub_rate = set_scrub_rate; 3805 3975 mci->get_sdram_scrub_rate = get_scrub_rate; ··· 3928 4092 3929 4093 static int hw_info_get(struct amd64_pvt *pvt) 3930 4094 { 3931 - u16 pci_id1, pci_id2; 4095 + u16 pci_id1 = 0, pci_id2 = 0; 3932 4096 int ret; 3933 4097 3934 4098 if (pvt->fam >= 0x17) { 3935 4099 pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); 3936 4100 if (!pvt->umc) 3937 4101 return -ENOMEM; 3938 - 3939 - pci_id1 = fam_type->f0_id; 3940 - pci_id2 = fam_type->f6_id; 3941 4102 } else { 3942 4103 pci_id1 = fam_type->f1_id; 3943 4104 pci_id2 = fam_type->f2_id; ··· 3951 4118 3952 4119 static void hw_info_put(struct amd64_pvt *pvt) 3953 4120 { 3954 - if (pvt->F0 || pvt->F1) 4121 + if (pvt->F1) 3955 4122 free_mc_sibling_devs(pvt); 3956 4123 3957 4124 kfree(pvt->umc); ··· 3961 4128 { 3962 4129 struct mem_ctl_info *mci = NULL; 3963 4130 struct edac_mc_layer layers[2]; 3964 - int ret = -EINVAL; 4131 + int ret = -ENOMEM; 3965 4132 3966 - /* 3967 - * We need to determine how many memory channels there are. Then use 3968 - * that information for calculating the size of the dynamic instance 3969 - * tables in the 'mci' structure. 3970 - */ 3971 - pvt->channel_count = pvt->ops->early_channel_count(pvt); 3972 - if (pvt->channel_count < 0) 3973 - return ret; 3974 - 3975 - ret = -ENOMEM; 3976 4133 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; 3977 4134 layers[0].size = pvt->csels[0].b_cnt; 3978 4135 layers[0].is_virt_csrow = true; 3979 4136 layers[1].type = EDAC_MC_LAYER_CHANNEL; 3980 - 3981 - /* 3982 - * Always allocate two channels since we can have setups with DIMMs on 3983 - * only one channel. Also, this simplifies handling later for the price 3984 - * of a couple of KBs tops. 3985 - */ 3986 4137 layers[1].size = fam_type->max_mcs; 3987 4138 layers[1].is_virt_csrow = false; 3988 4139 ··· 4187 4370 } 4188 4371 4189 4372 /* register stuff with EDAC MCE */ 4190 - if (boot_cpu_data.x86 >= 0x17) 4373 + if (boot_cpu_data.x86 >= 0x17) { 4191 4374 amd_register_ecc_decoder(decode_umc_error); 4192 - else 4375 + } else { 4193 4376 amd_register_ecc_decoder(decode_bus_error); 4194 - 4195 - setup_pci_device(); 4377 + setup_pci_device(); 4378 + } 4196 4379 4197 4380 #ifdef CONFIG_X86_32 4198 4381 amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
+2 -22
drivers/edac/amd64_edac.h
··· 114 114 #define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 115 115 #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581 116 116 #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 117 - #define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460 118 - #define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 119 - #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 120 - #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee 121 - #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490 122 - #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 123 - #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F0 0x1448 124 - #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F6 0x144e 125 - #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 126 - #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 127 - #define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 128 - #define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 129 - #define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad 130 - #define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3 131 - #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F0 0x166a 132 - #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F6 0x1670 133 117 134 118 /* 135 119 * Function 1 - Address Map ··· 199 215 #define DCT_SEL_HI 0x114 200 216 201 217 #define F15H_M60H_SCRCTRL 0x1C8 202 - #define F17H_SCR_BASE_ADDR 0x48 203 - #define F17H_SCR_LIMIT_ADDR 0x4C 204 218 205 219 /* 206 220 * Function 3 - Misc Control ··· 338 356 struct low_ops *ops; 339 357 340 358 /* pci_device handles which we utilize */ 341 - struct pci_dev *F0, *F1, *F2, *F3, *F6; 359 + struct pci_dev *F1, *F2, *F3; 342 360 343 361 u16 mc_node_id; /* MC index of this MC node */ 344 362 u8 fam; /* CPU family */ ··· 346 364 u8 stepping; /* ... stepping */ 347 365 348 366 int ext_model; /* extended model value of this node */ 349 - int channel_count; 350 367 351 368 /* Raw registers */ 352 369 u32 dclr0; /* DRAM Configuration Low DCT0 reg */ ··· 465 484 * functions and per device encoding/decoding logic. 466 485 */ 467 486 struct low_ops { 468 - int (*early_channel_count) (struct amd64_pvt *pvt); 469 487 void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, 470 488 struct err_info *); 471 489 int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, ··· 483 503 484 504 struct amd64_family_type { 485 505 const char *ctl_name; 486 - u16 f0_id, f1_id, f2_id, f6_id; 506 + u16 f1_id, f2_id; 487 507 /* Maximum number of memory controllers per die/node. */ 488 508 u8 max_mcs; 489 509 struct amd64_family_flags flags;
+369 -90
drivers/edac/i10nm_base.c
··· 13 13 #include "edac_module.h" 14 14 #include "skx_common.h" 15 15 16 - #define I10NM_REVISION "v0.0.5" 16 + #define I10NM_REVISION "v0.0.6" 17 17 #define EDAC_MOD_STR "i10nm_edac" 18 18 19 19 /* Debug macros */ ··· 22 22 23 23 #define I10NM_GET_SCK_BAR(d, reg) \ 24 24 pci_read_config_dword((d)->uracu, 0xd0, &(reg)) 25 - #define I10NM_GET_IMC_BAR(d, i, reg) \ 26 - pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) 25 + #define I10NM_GET_IMC_BAR(d, i, reg) \ 26 + pci_read_config_dword((d)->uracu, \ 27 + (res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg)) 27 28 #define I10NM_GET_SAD(d, offset, i, reg)\ 28 - pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg)) 29 + pci_read_config_dword((d)->sad_all, (offset) + (i) * \ 30 + (res_cfg->type == GNR ? 12 : 8), &(reg)) 29 31 #define I10NM_GET_HBM_IMC_BAR(d, reg) \ 30 32 pci_read_config_dword((d)->uracu, 0xd4, &(reg)) 31 33 #define I10NM_GET_CAPID3_CFG(d, reg) \ 32 - pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg)) 34 + pci_read_config_dword((d)->pcu_cr3, \ 35 + res_cfg->type == GNR ? 0x290 : 0x90, &(reg)) 36 + #define I10NM_GET_CAPID5_CFG(d, reg) \ 37 + pci_read_config_dword((d)->pcu_cr3, \ 38 + res_cfg->type == GNR ? 0x298 : 0x98, &(reg)) 33 39 #define I10NM_GET_DIMMMTR(m, i, j) \ 34 - readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \ 40 + readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \ 41 + (res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \ 35 42 (i) * (m)->chan_mmio_sz + (j) * 4) 36 43 #define I10NM_GET_MCDDRTCFG(m, i) \ 37 44 readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ 38 45 (i) * (m)->chan_mmio_sz) 39 46 #define I10NM_GET_MCMTR(m, i) \ 40 - readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \ 47 + readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \ 48 + (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \ 41 49 (i) * (m)->chan_mmio_sz) 42 50 #define I10NM_GET_AMAP(m, i) \ 43 - readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \ 51 + readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \ 52 + (res_cfg->type == GNR ? 0xc14 : 0x20814)) + \ 44 53 (i) * (m)->chan_mmio_sz) 45 54 #define I10NM_GET_REG32(m, i, offset) \ 46 55 readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) ··· 65 56 #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \ 66 57 ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) 67 58 59 + #define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000 60 + #define I10NM_GNR_IMC_MMIO_SIZE 0x4000 68 61 #define I10NM_HBM_IMC_MMIO_SIZE 0x9000 62 + #define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24) 69 63 #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) 70 64 #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) 71 65 ··· 160 148 161 149 static void enable_retry_rd_err_log(bool enable) 162 150 { 151 + int i, j, imc_num, chan_num; 163 152 struct skx_imc *imc; 164 153 struct skx_dev *d; 165 - int i, j; 166 154 167 155 edac_dbg(2, "\n"); 168 156 169 - list_for_each_entry(d, i10nm_edac_list, list) 170 - for (i = 0; i < I10NM_NUM_IMC; i++) { 157 + list_for_each_entry(d, i10nm_edac_list, list) { 158 + imc_num = res_cfg->ddr_imc_num; 159 + chan_num = res_cfg->ddr_chan_num; 160 + 161 + for (i = 0; i < imc_num; i++) { 171 162 imc = &d->imc[i]; 172 163 if (!imc->mbase) 173 164 continue; 174 165 175 - for (j = 0; j < I10NM_NUM_CHANNELS; j++) { 176 - if (imc->hbm_mc) { 177 - __enable_retry_rd_err_log(imc, j, enable, 178 - res_cfg->offsets_scrub_hbm0, 179 - res_cfg->offsets_demand_hbm0, 180 - NULL); 181 - __enable_retry_rd_err_log(imc, j, enable, 182 - res_cfg->offsets_scrub_hbm1, 183 - res_cfg->offsets_demand_hbm1, 184 - NULL); 185 - } else { 186 - __enable_retry_rd_err_log(imc, j, enable, 187 - res_cfg->offsets_scrub, 188 - res_cfg->offsets_demand, 189 - res_cfg->offsets_demand2); 190 - } 166 + for (j = 0; j < chan_num; j++) 167 + __enable_retry_rd_err_log(imc, j, enable, 168 + res_cfg->offsets_scrub, 169 + res_cfg->offsets_demand, 170 + res_cfg->offsets_demand2); 171 + } 172 + 173 + imc_num += res_cfg->hbm_imc_num; 174 + chan_num = res_cfg->hbm_chan_num; 175 + 176 + for (; i < imc_num; i++) { 177 + imc = &d->imc[i]; 178 + if (!imc->mbase || !imc->hbm_mc) 179 + continue; 180 + 181 + for (j = 0; j < chan_num; j++) { 182 + __enable_retry_rd_err_log(imc, j, enable, 183 + res_cfg->offsets_scrub_hbm0, 184 + res_cfg->offsets_demand_hbm0, 185 + NULL); 186 + __enable_retry_rd_err_log(imc, j, enable, 187 + res_cfg->offsets_scrub_hbm1, 188 + res_cfg->offsets_demand_hbm1, 189 + NULL); 191 190 } 191 + } 192 192 } 193 193 } 194 194 ··· 335 311 return pdev; 336 312 } 337 313 314 + /** 315 + * i10nm_get_imc_num() - Get the number of present DDR memory controllers. 316 + * 317 + * @cfg : The pointer to the structure of EDAC resource configurations. 318 + * 319 + * For Granite Rapids CPUs, the number of present DDR memory controllers read 320 + * at runtime overwrites the value statically configured in @cfg->ddr_imc_num. 321 + * For other CPUs, the number of present DDR memory controllers is statically 322 + * configured in @cfg->ddr_imc_num. 323 + * 324 + * RETURNS : 0 on success, < 0 on failure. 325 + */ 326 + static int i10nm_get_imc_num(struct res_config *cfg) 327 + { 328 + int n, imc_num, chan_num = 0; 329 + struct skx_dev *d; 330 + u32 reg; 331 + 332 + list_for_each_entry(d, i10nm_edac_list, list) { 333 + d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus], 334 + res_cfg->pcu_cr3_bdf.dev, 335 + res_cfg->pcu_cr3_bdf.fun); 336 + if (!d->pcu_cr3) 337 + continue; 338 + 339 + if (I10NM_GET_CAPID5_CFG(d, reg)) 340 + continue; 341 + 342 + n = I10NM_DDR_IMC_CH_CNT(reg); 343 + 344 + if (!chan_num) { 345 + chan_num = n; 346 + edac_dbg(2, "Get DDR CH number: %d\n", chan_num); 347 + } else if (chan_num != n) { 348 + i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n); 349 + } 350 + } 351 + 352 + switch (cfg->type) { 353 + case GNR: 354 + /* 355 + * One channel per DDR memory controller for Granite Rapids CPUs. 356 + */ 357 + imc_num = chan_num; 358 + 359 + if (!imc_num) { 360 + i10nm_printk(KERN_ERR, "Invalid DDR MC number\n"); 361 + return -ENODEV; 362 + } 363 + 364 + if (imc_num > I10NM_NUM_DDR_IMC) { 365 + i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num); 366 + return -EINVAL; 367 + } 368 + 369 + if (cfg->ddr_imc_num != imc_num) { 370 + /* 371 + * Store the number of present DDR memory controllers. 372 + */ 373 + cfg->ddr_imc_num = imc_num; 374 + edac_dbg(2, "Set DDR MC number: %d", imc_num); 375 + } 376 + 377 + return 0; 378 + default: 379 + /* 380 + * For other CPUs, the number of present DDR memory controllers 381 + * is statically pre-configured in cfg->ddr_imc_num. 382 + */ 383 + return 0; 384 + } 385 + } 386 + 338 387 static bool i10nm_check_2lm(struct res_config *cfg) 339 388 { 340 389 struct skx_dev *d; ··· 415 318 int i; 416 319 417 320 list_for_each_entry(d, i10nm_edac_list, list) { 418 - d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1], 419 - PCI_SLOT(cfg->sad_all_devfn), 420 - PCI_FUNC(cfg->sad_all_devfn)); 321 + d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus], 322 + res_cfg->sad_all_bdf.dev, 323 + res_cfg->sad_all_bdf.fun); 421 324 if (!d->sad_all) 422 325 continue; 423 326 ··· 434 337 } 435 338 436 339 /* 437 - * Check whether the error comes from DDRT by ICX/Tremont model specific error code. 438 - * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS. 340 + * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code. 341 + * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS. 439 342 */ 440 343 static bool i10nm_mscod_is_ddrt(u32 mscod) 441 344 { 442 - switch (mscod) { 443 - case 0x0106: case 0x0107: 444 - case 0x0800: case 0x0804: 445 - case 0x0806 ... 0x0808: 446 - case 0x080a ... 0x080e: 447 - case 0x0810: case 0x0811: 448 - case 0x0816: case 0x081e: 449 - case 0x081f: 450 - return true; 345 + switch (res_cfg->type) { 346 + case I10NM: 347 + switch (mscod) { 348 + case 0x0106: case 0x0107: 349 + case 0x0800: case 0x0804: 350 + case 0x0806 ... 0x0808: 351 + case 0x080a ... 0x080e: 352 + case 0x0810: case 0x0811: 353 + case 0x0816: case 0x081e: 354 + case 0x081f: 355 + return true; 356 + } 357 + 358 + break; 359 + case SPR: 360 + switch (mscod) { 361 + case 0x0800: case 0x0804: 362 + case 0x0806 ... 0x0808: 363 + case 0x080a ... 0x080e: 364 + case 0x0810: case 0x0811: 365 + case 0x0816: case 0x081e: 366 + case 0x081f: 367 + return true; 368 + } 369 + 370 + break; 371 + default: 372 + return false; 451 373 } 452 374 453 375 return false; ··· 474 358 475 359 static bool i10nm_mc_decode_available(struct mce *mce) 476 360 { 361 + #define ICX_IMCx_CHy 0x06666000 477 362 u8 bank; 478 363 479 364 if (!decoding_via_mca || mem_cfg_2lm) ··· 488 371 489 372 switch (res_cfg->type) { 490 373 case I10NM: 491 - if (bank < 13 || bank > 26) 374 + /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */ 375 + if (!(ICX_IMCx_CHy & (1 << bank))) 492 376 return false; 493 - 494 - /* DDRT errors can't be decoded from MCA bank registers */ 495 - if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) 377 + break; 378 + case SPR: 379 + if (bank < 13 || bank > 20) 496 380 return false; 497 - 498 - if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) 499 - return false; 500 - 501 - /* Check whether one of {13,14,17,18,21,22,25,26} */ 502 - return ((bank - 13) & BIT(1)) == 0; 381 + break; 503 382 default: 504 383 return false; 505 384 } 385 + 386 + /* DDRT errors can't be decoded from MCA bank registers */ 387 + if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) 388 + return false; 389 + 390 + if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) 391 + return false; 392 + 393 + return true; 506 394 } 507 395 508 396 static bool i10nm_mc_decode(struct decoded_addr *res) ··· 529 407 530 408 switch (res_cfg->type) { 531 409 case I10NM: 532 - bank = m->bank - 13; 533 - res->imc = bank / 4; 534 - res->channel = bank % 2; 410 + bank = m->bank - 13; 411 + res->imc = bank / 4; 412 + res->channel = bank % 2; 413 + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; 414 + res->row = GET_BITFIELD(m->misc, 19, 39); 415 + res->bank_group = GET_BITFIELD(m->misc, 40, 41); 416 + res->bank_address = GET_BITFIELD(m->misc, 42, 43); 417 + res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; 418 + res->rank = GET_BITFIELD(m->misc, 56, 58); 419 + res->dimm = res->rank >> 2; 420 + res->rank = res->rank % 4; 421 + break; 422 + case SPR: 423 + bank = m->bank - 13; 424 + res->imc = bank / 2; 425 + res->channel = bank % 2; 426 + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; 427 + res->row = GET_BITFIELD(m->misc, 19, 36); 428 + res->bank_group = GET_BITFIELD(m->misc, 37, 38); 429 + res->bank_address = GET_BITFIELD(m->misc, 39, 40); 430 + res->bank_group |= GET_BITFIELD(m->misc, 41, 41) << 2; 431 + res->rank = GET_BITFIELD(m->misc, 57, 57); 432 + res->dimm = GET_BITFIELD(m->misc, 58, 58); 535 433 break; 536 434 default: 537 435 return false; ··· 563 421 return false; 564 422 } 565 423 566 - res->column = GET_BITFIELD(m->misc, 9, 18) << 2; 567 - res->row = GET_BITFIELD(m->misc, 19, 39); 568 - res->bank_group = GET_BITFIELD(m->misc, 40, 41); 569 - res->bank_address = GET_BITFIELD(m->misc, 42, 43); 570 - res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; 571 - res->rank = GET_BITFIELD(m->misc, 56, 58); 572 - res->dimm = res->rank >> 2; 573 - res->rank = res->rank % 4; 574 - 575 424 return true; 425 + } 426 + 427 + /** 428 + * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller. 429 + * 430 + * @d : The pointer to the structure of CPU socket EDAC device. 431 + * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1). 432 + * @physical_idx : To store the corresponding physical index of @logical_idx. 433 + * 434 + * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure. 435 + */ 436 + static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx) 437 + { 438 + #define GNR_MAX_IMC_PCI_CNT 28 439 + 440 + struct pci_dev *mdev; 441 + int i, logical = 0; 442 + 443 + /* 444 + * Detect present memory controllers from { PCI device: 8-5, function 7-1 } 445 + */ 446 + for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) { 447 + mdev = pci_get_dev_wrapper(d->seg, 448 + d->bus[res_cfg->ddr_mdev_bdf.bus], 449 + res_cfg->ddr_mdev_bdf.dev + i / 7, 450 + res_cfg->ddr_mdev_bdf.fun + i % 7); 451 + 452 + if (mdev) { 453 + if (logical == logical_idx) { 454 + *physical_idx = i; 455 + return mdev; 456 + } 457 + 458 + pci_dev_put(mdev); 459 + logical++; 460 + } 461 + } 462 + 463 + return NULL; 464 + } 465 + 466 + /** 467 + * get_ddr_munit() - Get the resource of the i-th DDR memory controller. 468 + * 469 + * @d : The pointer to the structure of CPU socket EDAC device. 470 + * @i : The index of the CPU socket relative DDR memory controller. 471 + * @offset : To store the MMIO offset of the i-th DDR memory controller. 472 + * @size : To store the MMIO size of the i-th DDR memory controller. 473 + * 474 + * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure. 475 + */ 476 + static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size) 477 + { 478 + struct pci_dev *mdev; 479 + int physical_idx; 480 + u32 reg; 481 + 482 + switch (res_cfg->type) { 483 + case GNR: 484 + if (I10NM_GET_IMC_BAR(d, 0, reg)) { 485 + i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n"); 486 + return NULL; 487 + } 488 + 489 + mdev = get_gnr_mdev(d, i, &physical_idx); 490 + if (!mdev) 491 + return NULL; 492 + 493 + *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) + 494 + I10NM_GNR_IMC_MMIO_OFFSET + 495 + physical_idx * I10NM_GNR_IMC_MMIO_SIZE; 496 + *size = I10NM_GNR_IMC_MMIO_SIZE; 497 + 498 + break; 499 + default: 500 + if (I10NM_GET_IMC_BAR(d, i, reg)) { 501 + i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i); 502 + return NULL; 503 + } 504 + 505 + mdev = pci_get_dev_wrapper(d->seg, 506 + d->bus[res_cfg->ddr_mdev_bdf.bus], 507 + res_cfg->ddr_mdev_bdf.dev + i, 508 + res_cfg->ddr_mdev_bdf.fun); 509 + if (!mdev) 510 + return NULL; 511 + 512 + *offset = I10NM_GET_IMC_MMIO_OFFSET(reg); 513 + *size = I10NM_GET_IMC_MMIO_SIZE(reg); 514 + } 515 + 516 + return mdev; 576 517 } 577 518 578 519 static int i10nm_get_ddr_munits(void) ··· 669 444 u64 base; 670 445 671 446 list_for_each_entry(d, i10nm_edac_list, list) { 672 - d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1); 447 + d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus], 448 + res_cfg->util_all_bdf.dev, 449 + res_cfg->util_all_bdf.fun); 673 450 if (!d->util_all) 674 451 return -ENODEV; 675 452 676 - d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1); 453 + d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus], 454 + res_cfg->uracu_bdf.dev, 455 + res_cfg->uracu_bdf.fun); 677 456 if (!d->uracu) 678 457 return -ENODEV; 679 458 ··· 690 461 edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", 691 462 j++, base, reg); 692 463 693 - for (i = 0; i < I10NM_NUM_DDR_IMC; i++) { 694 - mdev = pci_get_dev_wrapper(d->seg, d->bus[0], 695 - 12 + i, 0); 464 + for (i = 0; i < res_cfg->ddr_imc_num; i++) { 465 + mdev = get_ddr_munit(d, i, &off, &size); 466 + 696 467 if (i == 0 && !mdev) { 697 468 i10nm_printk(KERN_ERR, "No IMC found\n"); 698 469 return -ENODEV; ··· 702 473 703 474 d->imc[i].mdev = mdev; 704 475 705 - if (I10NM_GET_IMC_BAR(d, i, reg)) { 706 - i10nm_printk(KERN_ERR, "Failed to get mc bar\n"); 707 - return -ENODEV; 708 - } 709 - 710 - off = I10NM_GET_IMC_MMIO_OFFSET(reg); 711 - size = I10NM_GET_IMC_MMIO_SIZE(reg); 712 476 edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n", 713 477 i, base + off, size, reg); 714 478 ··· 741 519 u64 base; 742 520 743 521 list_for_each_entry(d, i10nm_edac_list, list) { 744 - d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3); 745 522 if (!d->pcu_cr3) 746 523 return -ENODEV; 747 524 ··· 761 540 } 762 541 base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg); 763 542 764 - lmc = I10NM_NUM_DDR_IMC; 543 + lmc = res_cfg->ddr_imc_num; 765 544 766 - for (i = 0; i < I10NM_NUM_HBM_IMC; i++) { 767 - mdev = pci_get_dev_wrapper(d->seg, d->bus[0], 768 - 12 + i / 4, 1 + i % 4); 545 + for (i = 0; i < res_cfg->hbm_imc_num; i++) { 546 + mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus], 547 + res_cfg->hbm_mdev_bdf.dev + i / 4, 548 + res_cfg->hbm_mdev_bdf.fun + i % 4); 549 + 769 550 if (i == 0 && !mdev) { 770 551 i10nm_printk(KERN_ERR, "No hbm mc found\n"); 771 552 return -ENODEV; ··· 817 594 .type = I10NM, 818 595 .decs_did = 0x3452, 819 596 .busno_cfg_offset = 0xcc, 597 + .ddr_imc_num = 4, 598 + .ddr_chan_num = 2, 599 + .ddr_dimm_num = 2, 820 600 .ddr_chan_mmio_sz = 0x4000, 821 - .sad_all_devfn = PCI_DEVFN(29, 0), 601 + .sad_all_bdf = {1, 29, 0}, 602 + .pcu_cr3_bdf = {1, 30, 3}, 603 + .util_all_bdf = {1, 29, 1}, 604 + .uracu_bdf = {0, 0, 1}, 605 + .ddr_mdev_bdf = {0, 12, 0}, 606 + .hbm_mdev_bdf = {0, 12, 1}, 822 607 .sad_all_offset = 0x108, 823 608 .offsets_scrub = offsets_scrub_icx, 824 609 .offsets_demand = offsets_demand_icx, ··· 836 605 .type = I10NM, 837 606 .decs_did = 0x3452, 838 607 .busno_cfg_offset = 0xd0, 608 + .ddr_imc_num = 4, 609 + .ddr_chan_num = 2, 610 + .ddr_dimm_num = 2, 839 611 .ddr_chan_mmio_sz = 0x4000, 840 - .sad_all_devfn = PCI_DEVFN(29, 0), 612 + .sad_all_bdf = {1, 29, 0}, 613 + .pcu_cr3_bdf = {1, 30, 3}, 614 + .util_all_bdf = {1, 29, 1}, 615 + .uracu_bdf = {0, 0, 1}, 616 + .ddr_mdev_bdf = {0, 12, 0}, 617 + .hbm_mdev_bdf = {0, 12, 1}, 841 618 .sad_all_offset = 0x108, 842 619 .offsets_scrub = offsets_scrub_icx, 843 620 .offsets_demand = offsets_demand_icx, ··· 855 616 .type = SPR, 856 617 .decs_did = 0x3252, 857 618 .busno_cfg_offset = 0xd0, 619 + .ddr_imc_num = 4, 620 + .ddr_chan_num = 2, 621 + .ddr_dimm_num = 2, 622 + .hbm_imc_num = 16, 623 + .hbm_chan_num = 2, 624 + .hbm_dimm_num = 1, 858 625 .ddr_chan_mmio_sz = 0x8000, 859 626 .hbm_chan_mmio_sz = 0x4000, 860 627 .support_ddr5 = true, 861 - .sad_all_devfn = PCI_DEVFN(10, 0), 628 + .sad_all_bdf = {1, 10, 0}, 629 + .pcu_cr3_bdf = {1, 30, 3}, 630 + .util_all_bdf = {1, 29, 1}, 631 + .uracu_bdf = {0, 0, 1}, 632 + .ddr_mdev_bdf = {0, 12, 0}, 633 + .hbm_mdev_bdf = {0, 12, 1}, 862 634 .sad_all_offset = 0x300, 863 635 .offsets_scrub = offsets_scrub_spr, 864 636 .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, ··· 880 630 .offsets_demand_hbm1 = offsets_demand_spr_hbm1, 881 631 }; 882 632 633 + static struct res_config gnr_cfg = { 634 + .type = GNR, 635 + .decs_did = 0x3252, 636 + .busno_cfg_offset = 0xd0, 637 + .ddr_imc_num = 12, 638 + .ddr_chan_num = 1, 639 + .ddr_dimm_num = 2, 640 + .ddr_chan_mmio_sz = 0x4000, 641 + .support_ddr5 = true, 642 + .sad_all_bdf = {0, 13, 0}, 643 + .pcu_cr3_bdf = {0, 5, 0}, 644 + .util_all_bdf = {0, 13, 1}, 645 + .uracu_bdf = {0, 0, 1}, 646 + .ddr_mdev_bdf = {0, 5, 1}, 647 + .sad_all_offset = 0x300, 648 + }; 649 + 883 650 static const struct x86_cpu_id i10nm_cpuids[] = { 884 651 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), 885 652 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), ··· 904 637 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), 905 638 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1), 906 639 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), 640 + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), 641 + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), 907 642 {} 908 643 }; 909 644 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); ··· 925 656 { 926 657 struct skx_pvt *pvt = mci->pvt_info; 927 658 struct skx_imc *imc = pvt->imc; 928 - u32 mtr, amap, mcddrtcfg; 659 + u32 mtr, amap, mcddrtcfg = 0; 929 660 struct dimm_info *dimm; 930 661 int i, j, ndimms; 931 662 ··· 935 666 936 667 ndimms = 0; 937 668 amap = I10NM_GET_AMAP(imc, i); 938 - mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); 669 + 670 + if (res_cfg->type != GNR) 671 + mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); 672 + 939 673 for (j = 0; j < imc->num_dimms; j++) { 940 674 dimm = edac_get_dimm(mci, i, j, 0); 941 675 mtr = I10NM_GET_DIMMMTR(imc, i, j); ··· 1024 752 struct skx_dev *d; 1025 753 int rc, i, off[3] = {0xd0, 0xc8, 0xcc}; 1026 754 u64 tolm, tohm; 755 + int imc_num; 1027 756 1028 757 edac_dbg(2, "\n"); 1029 758 ··· 1057 784 return -ENODEV; 1058 785 } 1059 786 787 + rc = i10nm_get_imc_num(cfg); 788 + if (rc < 0) 789 + goto fail; 790 + 1060 791 mem_cfg_2lm = i10nm_check_2lm(cfg); 1061 792 skx_set_mem_cfg(mem_cfg_2lm); 1062 793 ··· 1068 791 1069 792 if (i10nm_get_hbm_munits() && rc) 1070 793 goto fail; 794 + 795 + imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num; 1071 796 1072 797 list_for_each_entry(d, i10nm_edac_list, list) { 1073 798 rc = skx_get_src_id(d, 0xf8, &src_id); ··· 1081 802 goto fail; 1082 803 1083 804 edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id); 1084 - for (i = 0; i < I10NM_NUM_IMC; i++) { 805 + for (i = 0; i < imc_num; i++) { 1085 806 if (!d->imc[i].mdev) 1086 807 continue; 1087 808 ··· 1091 812 d->imc[i].node_id = node_id; 1092 813 if (d->imc[i].hbm_mc) { 1093 814 d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; 1094 - d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS; 1095 - d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS; 815 + d->imc[i].num_channels = cfg->hbm_chan_num; 816 + d->imc[i].num_dimms = cfg->hbm_dimm_num; 1096 817 } else { 1097 818 d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; 1098 - d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS; 1099 - d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS; 819 + d->imc[i].num_channels = cfg->ddr_chan_num; 820 + d->imc[i].num_dimms = cfg->ddr_dimm_num; 1100 821 } 1101 822 1102 823 rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
+7
drivers/edac/qcom_edac.c
··· 396 396 return 0; 397 397 } 398 398 399 + static const struct platform_device_id qcom_llcc_edac_id_table[] = { 400 + { .name = "qcom_llcc_edac" }, 401 + {} 402 + }; 403 + MODULE_DEVICE_TABLE(platform, qcom_llcc_edac_id_table); 404 + 399 405 static struct platform_driver qcom_llcc_edac_driver = { 400 406 .probe = qcom_llcc_edac_probe, 401 407 .remove = qcom_llcc_edac_remove, 402 408 .driver = { 403 409 .name = "qcom_llcc_edac", 404 410 }, 411 + .id_table = qcom_llcc_edac_id_table, 405 412 }; 406 413 module_platform_driver(qcom_llcc_edac_driver); 407 414
+33 -43
drivers/edac/skx_common.c
··· 560 560 tp_event = HW_EVENT_ERR_CORRECTED; 561 561 } 562 562 563 - /* 564 - * According to Intel Architecture spec vol 3B, 565 - * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" 566 - * memory errors should fit one of these masks: 567 - * 000f 0000 1mmm cccc (binary) 568 - * 000f 0010 1mmm cccc (binary) [RAM used as cache] 569 - * where: 570 - * f = Correction Report Filtering Bit. If 1, subsequent errors 571 - * won't be shown 572 - * mmm = error type 573 - * cccc = channel 574 - * If the mask doesn't match, report an error to the parsing logic 575 - */ 576 - if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) { 577 - optype = "Can't parse: it is not a mem"; 578 - } else { 579 - switch (optypenum) { 580 - case 0: 581 - optype = "generic undef request error"; 582 - break; 583 - case 1: 584 - optype = "memory read error"; 585 - break; 586 - case 2: 587 - optype = "memory write error"; 588 - break; 589 - case 3: 590 - optype = "addr/cmd error"; 591 - break; 592 - case 4: 593 - optype = "memory scrubbing error"; 594 - scrub_err = true; 595 - break; 596 - default: 597 - optype = "reserved"; 598 - break; 599 - } 563 + switch (optypenum) { 564 + case 0: 565 + optype = "generic undef request error"; 566 + break; 567 + case 1: 568 + optype = "memory read error"; 569 + break; 570 + case 2: 571 + optype = "memory write error"; 572 + break; 573 + case 3: 574 + optype = "addr/cmd error"; 575 + break; 576 + case 4: 577 + optype = "memory scrubbing error"; 578 + scrub_err = true; 579 + break; 580 + default: 581 + optype = "reserved"; 582 + break; 600 583 } 584 + 601 585 if (res->decoded_by_adxl) { 602 586 len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", 603 587 overflow ? " OVERFLOW" : "", ··· 616 632 if (!skx_mem_cfg_2lm) 617 633 return false; 618 634 619 - errcode = GET_BITFIELD(m->status, 0, 15); 635 + errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; 620 636 621 - if ((errcode & 0xef80) != 0x280) 622 - return false; 637 + return errcode == MCACOD_EXT_MEM_ERR; 638 + } 623 639 624 - return true; 640 + static bool skx_error_in_mem(const struct mce *m) 641 + { 642 + u32 errcode; 643 + 644 + errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; 645 + 646 + return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR); 625 647 } 626 648 627 649 int skx_mce_check_error(struct notifier_block *nb, unsigned long val, ··· 641 651 if (mce->kflags & MCE_HANDLED_CEC) 642 652 return NOTIFY_DONE; 643 653 644 - /* ignore unless this is memory related with an address */ 645 - if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) 654 + /* Ignore unless this is memory related with an address */ 655 + if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV)) 646 656 return NOTIFY_DONE; 647 657 648 658 memset(&res, 0, sizeof(res));
+57 -4
drivers/edac/skx_common.h
··· 33 33 #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ 34 34 #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ 35 35 36 - #define I10NM_NUM_DDR_IMC 4 36 + #define I10NM_NUM_DDR_IMC 12 37 37 #define I10NM_NUM_DDR_CHANNELS 2 38 38 #define I10NM_NUM_DDR_DIMMS 2 39 39 ··· 55 55 56 56 #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) 57 57 #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ 58 + 59 + /* 60 + * According to Intel Architecture spec vol 3B, 61 + * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" 62 + * memory errors should fit one of these masks: 63 + * 000f 0000 1mmm cccc (binary) 64 + * 000f 0010 1mmm cccc (binary) [RAM used as cache] 65 + * where: 66 + * f = Correction Report Filtering Bit. If 1, subsequent errors 67 + * won't be shown 68 + * mmm = error type 69 + * cccc = channel 70 + */ 71 + #define MCACOD_MEM_ERR_MASK 0xef80 72 + /* 73 + * Errors from either the memory of the 1-level memory system or the 74 + * 2nd level memory (the slow "far" memory) of the 2-level memory system. 75 + */ 76 + #define MCACOD_MEM_CTL_ERR 0x80 77 + /* 78 + * Errors from the 1st level memory (the fast "near" memory as cache) 79 + * of the 2-level memory system. 80 + */ 81 + #define MCACOD_EXT_MEM_ERR 0x280 58 82 59 83 /* 60 84 * Each cpu socket contains some pci devices that provide global ··· 129 105 enum type { 130 106 SKX, 131 107 I10NM, 132 - SPR 108 + SPR, 109 + GNR 133 110 }; 134 111 135 112 enum { ··· 174 149 bool decoded_by_adxl; 175 150 }; 176 151 152 + struct pci_bdf { 153 + u32 bus : 8; 154 + u32 dev : 5; 155 + u32 fun : 3; 156 + }; 157 + 177 158 struct res_config { 178 159 enum type type; 179 160 /* Configuration agent device ID */ 180 161 unsigned int decs_did; 181 162 /* Default bus number configuration register offset */ 182 163 int busno_cfg_offset; 164 + /* DDR memory controllers per socket */ 165 + int ddr_imc_num; 166 + /* DDR channels per DDR memory controller */ 167 + int ddr_chan_num; 168 + /* DDR DIMMs per DDR memory channel */ 169 + int ddr_dimm_num; 183 170 /* Per DDR channel memory-mapped I/O size */ 184 171 int ddr_chan_mmio_sz; 172 + /* HBM memory controllers per socket */ 173 + int hbm_imc_num; 174 + /* HBM channels per HBM memory controller */ 175 + int hbm_chan_num; 176 + /* HBM DIMMs per HBM memory channel */ 177 + int hbm_dimm_num; 185 178 /* Per HBM channel memory-mapped I/O size */ 186 179 int hbm_chan_mmio_sz; 187 180 bool support_ddr5; 188 - /* SAD device number and function number */ 189 - unsigned int sad_all_devfn; 181 + /* SAD device BDF */ 182 + struct pci_bdf sad_all_bdf; 183 + /* PCU device BDF */ 184 + struct pci_bdf pcu_cr3_bdf; 185 + /* UTIL device BDF */ 186 + struct pci_bdf util_all_bdf; 187 + /* URACU device BDF */ 188 + struct pci_bdf uracu_bdf; 189 + /* DDR mdev device BDF */ 190 + struct pci_bdf ddr_mdev_bdf; 191 + /* HBM mdev device BDF */ 192 + struct pci_bdf hbm_mdev_bdf; 190 193 int sad_all_offset; 191 194 /* Offsets of retry_rd_err_log registers */ 192 195 u32 *offsets_scrub;
+467
drivers/edac/zynqmp_edac.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Xilinx ZynqMP OCM ECC Driver 4 + * 5 + * Copyright (C) 2022 Advanced Micro Devices, Inc. 6 + */ 7 + 8 + #include <linux/edac.h> 9 + #include <linux/interrupt.h> 10 + #include <linux/module.h> 11 + #include <linux/of.h> 12 + #include <linux/of_platform.h> 13 + #include <linux/platform_device.h> 14 + 15 + #include "edac_module.h" 16 + 17 + #define ZYNQMP_OCM_EDAC_MSG_SIZE 256 18 + 19 + #define ZYNQMP_OCM_EDAC_STRING "zynqmp_ocm" 20 + 21 + /* Error/Interrupt registers */ 22 + #define ERR_CTRL_OFST 0x0 23 + #define OCM_ISR_OFST 0x04 24 + #define OCM_IMR_OFST 0x08 25 + #define OCM_IEN_OFST 0x0C 26 + #define OCM_IDS_OFST 0x10 27 + 28 + /* ECC control register */ 29 + #define ECC_CTRL_OFST 0x14 30 + 31 + /* Correctable error info registers */ 32 + #define CE_FFA_OFST 0x1C 33 + #define CE_FFD0_OFST 0x20 34 + #define CE_FFD1_OFST 0x24 35 + #define CE_FFD2_OFST 0x28 36 + #define CE_FFD3_OFST 0x2C 37 + #define CE_FFE_OFST 0x30 38 + 39 + /* Uncorrectable error info registers */ 40 + #define UE_FFA_OFST 0x34 41 + #define UE_FFD0_OFST 0x38 42 + #define UE_FFD1_OFST 0x3C 43 + #define UE_FFD2_OFST 0x40 44 + #define UE_FFD3_OFST 0x44 45 + #define UE_FFE_OFST 0x48 46 + 47 + /* ECC control register bit field definitions */ 48 + #define ECC_CTRL_CLR_CE_ERR 0x40 49 + #define ECC_CTRL_CLR_UE_ERR 0x80 50 + 51 + /* Fault injection data and count registers */ 52 + #define OCM_FID0_OFST 0x4C 53 + #define OCM_FID1_OFST 0x50 54 + #define OCM_FID2_OFST 0x54 55 + #define OCM_FID3_OFST 0x58 56 + #define OCM_FIC_OFST 0x74 57 + 58 + #define UE_MAX_BITPOS_LOWER 31 59 + #define UE_MIN_BITPOS_UPPER 32 60 + #define UE_MAX_BITPOS_UPPER 63 61 + 62 + /* Interrupt masks */ 63 + #define OCM_CEINTR_MASK BIT(6) 64 + #define OCM_UEINTR_MASK BIT(7) 65 + #define OCM_ECC_ENABLE_MASK BIT(0) 66 + 67 + #define OCM_FICOUNT_MASK GENMASK(23, 0) 68 + #define OCM_NUM_UE_BITPOS 2 69 + #define OCM_BASEVAL 0xFFFC0000 70 + #define EDAC_DEVICE "ZynqMP-OCM" 71 + 72 + /** 73 + * struct ecc_error_info - ECC error log information 74 + * @addr: Fault generated at this address 75 + * @fault_lo: Generated fault data (lower 32-bit) 76 + * @fault_hi: Generated fault data (upper 32-bit) 77 + */ 78 + struct ecc_error_info { 79 + u32 addr; 80 + u32 fault_lo; 81 + u32 fault_hi; 82 + }; 83 + 84 + /** 85 + * struct ecc_status - ECC status information to report 86 + * @ce_cnt: Correctable error count 87 + * @ue_cnt: Uncorrectable error count 88 + * @ceinfo: Correctable error log information 89 + * @ueinfo: Uncorrectable error log information 90 + */ 91 + struct ecc_status { 92 + u32 ce_cnt; 93 + u32 ue_cnt; 94 + struct ecc_error_info ceinfo; 95 + struct ecc_error_info ueinfo; 96 + }; 97 + 98 + /** 99 + * struct edac_priv - OCM private instance data 100 + * @baseaddr: Base address of the OCM 101 + * @message: Buffer for framing the event specific info 102 + * @stat: ECC status information 103 + * @ce_cnt: Correctable Error count 104 + * @ue_cnt: Uncorrectable Error count 105 + * @debugfs_dir: Directory entry for debugfs 106 + * @ce_bitpos: Bit position for Correctable Error 107 + * @ue_bitpos: Array to store UnCorrectable Error bit positions 108 + * @fault_injection_cnt: Fault Injection Counter value 109 + */ 110 + struct edac_priv { 111 + void __iomem *baseaddr; 112 + char message[ZYNQMP_OCM_EDAC_MSG_SIZE]; 113 + struct ecc_status stat; 114 + u32 ce_cnt; 115 + u32 ue_cnt; 116 + #ifdef CONFIG_EDAC_DEBUG 117 + struct dentry *debugfs_dir; 118 + u8 ce_bitpos; 119 + u8 ue_bitpos[OCM_NUM_UE_BITPOS]; 120 + u32 fault_injection_cnt; 121 + #endif 122 + }; 123 + 124 + /** 125 + * get_error_info - Get the current ECC error info 126 + * @base: Pointer to the base address of the OCM 127 + * @p: Pointer to the OCM ECC status structure 128 + * @mask: Status register mask value 129 + * 130 + * Determines there is any ECC error or not 131 + * 132 + */ 133 + static void get_error_info(void __iomem *base, struct ecc_status *p, int mask) 134 + { 135 + if (mask & OCM_CEINTR_MASK) { 136 + p->ce_cnt++; 137 + p->ceinfo.fault_lo = readl(base + CE_FFD0_OFST); 138 + p->ceinfo.fault_hi = readl(base + CE_FFD1_OFST); 139 + p->ceinfo.addr = (OCM_BASEVAL | readl(base + CE_FFA_OFST)); 140 + writel(ECC_CTRL_CLR_CE_ERR, base + OCM_ISR_OFST); 141 + } else if (mask & OCM_UEINTR_MASK) { 142 + p->ue_cnt++; 143 + p->ueinfo.fault_lo = readl(base + UE_FFD0_OFST); 144 + p->ueinfo.fault_hi = readl(base + UE_FFD1_OFST); 145 + p->ueinfo.addr = (OCM_BASEVAL | readl(base + UE_FFA_OFST)); 146 + writel(ECC_CTRL_CLR_UE_ERR, base + OCM_ISR_OFST); 147 + } 148 + } 149 + 150 + /** 151 + * handle_error - Handle error types CE and UE 152 + * @dci: Pointer to the EDAC device instance 153 + * @p: Pointer to the OCM ECC status structure 154 + * 155 + * Handles correctable and uncorrectable errors. 156 + */ 157 + static void handle_error(struct edac_device_ctl_info *dci, struct ecc_status *p) 158 + { 159 + struct edac_priv *priv = dci->pvt_info; 160 + struct ecc_error_info *pinf; 161 + 162 + if (p->ce_cnt) { 163 + pinf = &p->ceinfo; 164 + snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE, 165 + "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]", 166 + "CE", pinf->addr, pinf->fault_hi, pinf->fault_lo); 167 + edac_device_handle_ce(dci, 0, 0, priv->message); 168 + } 169 + 170 + if (p->ue_cnt) { 171 + pinf = &p->ueinfo; 172 + snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE, 173 + "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]", 174 + "UE", pinf->addr, pinf->fault_hi, pinf->fault_lo); 175 + edac_device_handle_ue(dci, 0, 0, priv->message); 176 + } 177 + 178 + memset(p, 0, sizeof(*p)); 179 + } 180 + 181 + /** 182 + * intr_handler - ISR routine 183 + * @irq: irq number 184 + * @dev_id: device id pointer 185 + * 186 + * Return: IRQ_NONE, if CE/UE interrupt not set or IRQ_HANDLED otherwise 187 + */ 188 + static irqreturn_t intr_handler(int irq, void *dev_id) 189 + { 190 + struct edac_device_ctl_info *dci = dev_id; 191 + struct edac_priv *priv = dci->pvt_info; 192 + int regval; 193 + 194 + regval = readl(priv->baseaddr + OCM_ISR_OFST); 195 + if (!(regval & (OCM_CEINTR_MASK | OCM_UEINTR_MASK))) { 196 + WARN_ONCE(1, "Unhandled IRQ%d, ISR: 0x%x", irq, regval); 197 + return IRQ_NONE; 198 + } 199 + 200 + get_error_info(priv->baseaddr, &priv->stat, regval); 201 + 202 + priv->ce_cnt += priv->stat.ce_cnt; 203 + priv->ue_cnt += priv->stat.ue_cnt; 204 + handle_error(dci, &priv->stat); 205 + 206 + return IRQ_HANDLED; 207 + } 208 + 209 + /** 210 + * get_eccstate - Return the ECC status 211 + * @base: Pointer to the OCM base address 212 + * 213 + * Get the ECC enable/disable status 214 + * 215 + * Return: ECC status 0/1. 216 + */ 217 + static bool get_eccstate(void __iomem *base) 218 + { 219 + return readl(base + ECC_CTRL_OFST) & OCM_ECC_ENABLE_MASK; 220 + } 221 + 222 + #ifdef CONFIG_EDAC_DEBUG 223 + /** 224 + * write_fault_count - write fault injection count 225 + * @priv: Pointer to the EDAC private struct 226 + * 227 + * Update the fault injection count register, once the counter reaches 228 + * zero, it injects errors 229 + */ 230 + static void write_fault_count(struct edac_priv *priv) 231 + { 232 + u32 ficount = priv->fault_injection_cnt; 233 + 234 + if (ficount & ~OCM_FICOUNT_MASK) { 235 + ficount &= OCM_FICOUNT_MASK; 236 + edac_printk(KERN_INFO, EDAC_DEVICE, 237 + "Fault injection count value truncated to %d\n", ficount); 238 + } 239 + 240 + writel(ficount, priv->baseaddr + OCM_FIC_OFST); 241 + } 242 + 243 + /* 244 + * To get the Correctable Error injected, the following steps are needed: 245 + * - Setup the optional Fault Injection Count: 246 + * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count 247 + * - Write the Correctable Error bit position value: 248 + * echo <bit_pos val> > /sys/kernel/debug/edac/ocm/inject_ce_bitpos 249 + */ 250 + static ssize_t inject_ce_write(struct file *file, const char __user *data, 251 + size_t count, loff_t *ppos) 252 + { 253 + struct edac_device_ctl_info *edac_dev = file->private_data; 254 + struct edac_priv *priv = edac_dev->pvt_info; 255 + int ret; 256 + 257 + if (!data) 258 + return -EFAULT; 259 + 260 + ret = kstrtou8_from_user(data, count, 0, &priv->ce_bitpos); 261 + if (ret) 262 + return ret; 263 + 264 + if (priv->ce_bitpos > UE_MAX_BITPOS_UPPER) 265 + return -EINVAL; 266 + 267 + if (priv->ce_bitpos <= UE_MAX_BITPOS_LOWER) { 268 + writel(BIT(priv->ce_bitpos), priv->baseaddr + OCM_FID0_OFST); 269 + writel(0, priv->baseaddr + OCM_FID1_OFST); 270 + } else { 271 + writel(BIT(priv->ce_bitpos - UE_MIN_BITPOS_UPPER), 272 + priv->baseaddr + OCM_FID1_OFST); 273 + writel(0, priv->baseaddr + OCM_FID0_OFST); 274 + } 275 + 276 + write_fault_count(priv); 277 + 278 + return count; 279 + } 280 + 281 + static const struct file_operations inject_ce_fops = { 282 + .open = simple_open, 283 + .write = inject_ce_write, 284 + .llseek = generic_file_llseek, 285 + }; 286 + 287 + /* 288 + * To get the Uncorrectable Error injected, the following steps are needed: 289 + * - Setup the optional Fault Injection Count: 290 + * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count 291 + * - Write the Uncorrectable Error bit position values: 292 + * echo <bit_pos0 val>,<bit_pos1 val> > /sys/kernel/debug/edac/ocm/inject_ue_bitpos 293 + */ 294 + static ssize_t inject_ue_write(struct file *file, const char __user *data, 295 + size_t count, loff_t *ppos) 296 + { 297 + struct edac_device_ctl_info *edac_dev = file->private_data; 298 + struct edac_priv *priv = edac_dev->pvt_info; 299 + char buf[6], *pbuf, *token[2]; 300 + u64 ue_bitpos; 301 + int i, ret; 302 + u8 len; 303 + 304 + if (!data) 305 + return -EFAULT; 306 + 307 + len = min_t(size_t, count, sizeof(buf)); 308 + if (copy_from_user(buf, data, len)) 309 + return -EFAULT; 310 + 311 + buf[len] = '\0'; 312 + pbuf = &buf[0]; 313 + for (i = 0; i < OCM_NUM_UE_BITPOS; i++) 314 + token[i] = strsep(&pbuf, ","); 315 + 316 + ret = kstrtou8(token[0], 0, &priv->ue_bitpos[0]); 317 + if (ret) 318 + return ret; 319 + 320 + ret = kstrtou8(token[1], 0, &priv->ue_bitpos[1]); 321 + if (ret) 322 + return ret; 323 + 324 + if (priv->ue_bitpos[0] > UE_MAX_BITPOS_UPPER || 325 + priv->ue_bitpos[1] > UE_MAX_BITPOS_UPPER) 326 + return -EINVAL; 327 + 328 + if (priv->ue_bitpos[0] == priv->ue_bitpos[1]) { 329 + edac_printk(KERN_ERR, EDAC_DEVICE, "Bit positions should not be equal\n"); 330 + return -EINVAL; 331 + } 332 + 333 + ue_bitpos = BIT(priv->ue_bitpos[0]) | BIT(priv->ue_bitpos[1]); 334 + 335 + writel((u32)ue_bitpos, priv->baseaddr + OCM_FID0_OFST); 336 + writel((u32)(ue_bitpos >> 32), priv->baseaddr + OCM_FID1_OFST); 337 + 338 + write_fault_count(priv); 339 + 340 + return count; 341 + } 342 + 343 + static const struct file_operations inject_ue_fops = { 344 + .open = simple_open, 345 + .write = inject_ue_write, 346 + .llseek = generic_file_llseek, 347 + }; 348 + 349 + static void setup_debugfs(struct edac_device_ctl_info *edac_dev) 350 + { 351 + struct edac_priv *priv = edac_dev->pvt_info; 352 + 353 + priv->debugfs_dir = edac_debugfs_create_dir("ocm"); 354 + if (!priv->debugfs_dir) 355 + return; 356 + 357 + edac_debugfs_create_x32("inject_fault_count", 0644, priv->debugfs_dir, 358 + &priv->fault_injection_cnt); 359 + edac_debugfs_create_file("inject_ue_bitpos", 0644, priv->debugfs_dir, 360 + edac_dev, &inject_ue_fops); 361 + edac_debugfs_create_file("inject_ce_bitpos", 0644, priv->debugfs_dir, 362 + edac_dev, &inject_ce_fops); 363 + } 364 + #endif 365 + 366 + static int edac_probe(struct platform_device *pdev) 367 + { 368 + struct edac_device_ctl_info *dci; 369 + struct edac_priv *priv; 370 + void __iomem *baseaddr; 371 + struct resource *res; 372 + int irq, ret; 373 + 374 + baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res); 375 + if (IS_ERR(baseaddr)) 376 + return PTR_ERR(baseaddr); 377 + 378 + if (!get_eccstate(baseaddr)) { 379 + edac_printk(KERN_INFO, EDAC_DEVICE, "ECC not enabled\n"); 380 + return -ENXIO; 381 + } 382 + 383 + dci = edac_device_alloc_ctl_info(sizeof(*priv), ZYNQMP_OCM_EDAC_STRING, 384 + 1, ZYNQMP_OCM_EDAC_STRING, 1, 0, NULL, 0, 385 + edac_device_alloc_index()); 386 + if (!dci) 387 + return -ENOMEM; 388 + 389 + priv = dci->pvt_info; 390 + platform_set_drvdata(pdev, dci); 391 + dci->dev = &pdev->dev; 392 + priv->baseaddr = baseaddr; 393 + dci->mod_name = pdev->dev.driver->name; 394 + dci->ctl_name = ZYNQMP_OCM_EDAC_STRING; 395 + dci->dev_name = dev_name(&pdev->dev); 396 + 397 + irq = platform_get_irq(pdev, 0); 398 + if (irq < 0) { 399 + ret = irq; 400 + goto free_dev_ctl; 401 + } 402 + 403 + ret = devm_request_irq(&pdev->dev, irq, intr_handler, 0, 404 + dev_name(&pdev->dev), dci); 405 + if (ret) { 406 + edac_printk(KERN_ERR, EDAC_DEVICE, "Failed to request Irq\n"); 407 + goto free_dev_ctl; 408 + } 409 + 410 + /* Enable UE, CE interrupts */ 411 + writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IEN_OFST); 412 + 413 + #ifdef CONFIG_EDAC_DEBUG 414 + setup_debugfs(dci); 415 + #endif 416 + 417 + ret = edac_device_add_device(dci); 418 + if (ret) 419 + goto free_dev_ctl; 420 + 421 + return 0; 422 + 423 + free_dev_ctl: 424 + edac_device_free_ctl_info(dci); 425 + 426 + return ret; 427 + } 428 + 429 + static int edac_remove(struct platform_device *pdev) 430 + { 431 + struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); 432 + struct edac_priv *priv = dci->pvt_info; 433 + 434 + /* Disable UE, CE interrupts */ 435 + writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IDS_OFST); 436 + 437 + #ifdef CONFIG_EDAC_DEBUG 438 + debugfs_remove_recursive(priv->debugfs_dir); 439 + #endif 440 + 441 + edac_device_del_device(&pdev->dev); 442 + edac_device_free_ctl_info(dci); 443 + 444 + return 0; 445 + } 446 + 447 + static const struct of_device_id zynqmp_ocm_edac_match[] = { 448 + { .compatible = "xlnx,zynqmp-ocmc-1.0"}, 449 + { /* end of table */ } 450 + }; 451 + 452 + MODULE_DEVICE_TABLE(of, zynqmp_ocm_edac_match); 453 + 454 + static struct platform_driver zynqmp_ocm_edac_driver = { 455 + .driver = { 456 + .name = "zynqmp-ocm-edac", 457 + .of_match_table = zynqmp_ocm_edac_match, 458 + }, 459 + .probe = edac_probe, 460 + .remove = edac_remove, 461 + }; 462 + 463 + module_platform_driver(zynqmp_ocm_edac_driver); 464 + 465 + MODULE_AUTHOR("Advanced Micro Devices, Inc"); 466 + MODULE_DESCRIPTION("Xilinx ZynqMP OCM ECC driver"); 467 + MODULE_LICENSE("GPL");