Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull more EDAC updates from Borislav Petkov:
"The second part of the EDAC pile which contains the ADXL user and a
build fix which addresses a not-so-sensical .config but fixes
randconfig builds people do:

- skx_edac: Address translation for NVDIMMs (Tony Luck and Qiuxu Zhuo)

- ACPI_ADXL build fix"

[ I don't think "sensical" is a word, particularly when used in the
context of actually meaning "nonsensical", but I like it - Linus ]

* tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
EDAC, skx: Fix randconfig builds
EDAC, skx_edac: Add address translation for non-volatile DIMMs

+186 -13
+1
drivers/edac/Kconfig
··· 234 234 depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG 235 235 depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y 236 236 select DMI 237 + select ACPI_ADXL if ACPI 237 238 help 238 239 Support for error detection and correction the Intel 239 240 Skylake server Integrated Memory Controllers. If your
+180 -13
drivers/edac/skx_edac.c
··· 26 26 #include <linux/bitmap.h> 27 27 #include <linux/math64.h> 28 28 #include <linux/mod_devicetable.h> 29 + #include <linux/adxl.h> 29 30 #include <acpi/nfit.h> 30 31 #include <asm/cpu_device_id.h> 31 32 #include <asm/intel-family.h> ··· 36 35 #include "edac_module.h" 37 36 38 37 #define EDAC_MOD_STR "skx_edac" 38 + #define MSG_SIZE 1024 39 39 40 40 /* 41 41 * Debug macros ··· 56 54 static LIST_HEAD(skx_edac_list); 57 55 58 56 static u64 skx_tolm, skx_tohm; 57 + static char *skx_msg; 58 + static unsigned int nvdimm_count; 59 + 60 + enum { 61 + INDEX_SOCKET, 62 + INDEX_MEMCTRL, 63 + INDEX_CHANNEL, 64 + INDEX_DIMM, 65 + INDEX_MAX 66 + }; 67 + 68 + static const char * const component_names[] = { 69 + [INDEX_SOCKET] = "ProcessorSocketId", 70 + [INDEX_MEMCTRL] = "MemoryControllerId", 71 + [INDEX_CHANNEL] = "ChannelId", 72 + [INDEX_DIMM] = "DimmSlotId", 73 + }; 74 + 75 + static int component_indices[ARRAY_SIZE(component_names)]; 76 + static int adxl_component_count; 77 + static const char * const *adxl_component_names; 78 + static u64 *adxl_values; 79 + static char *adxl_msg; 59 80 60 81 #define NUM_IMC 2 /* memory controllers per socket */ 61 82 #define NUM_CHANNELS 3 /* channels per memory controller */ ··· 417 392 u32 dev_handle; 418 393 u16 flags; 419 394 u64 size = 0; 395 + 396 + nvdimm_count++; 420 397 421 398 dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, 422 399 imc->src_id, 0); ··· 968 941 } 969 942 #endif /*CONFIG_EDAC_DEBUG*/ 970 943 944 + static bool skx_adxl_decode(struct decoded_addr *res) 945 + 946 + { 947 + int i, len = 0; 948 + 949 + if (res->addr >= skx_tohm || (res->addr >= skx_tolm && 950 + res->addr < BIT_ULL(32))) { 951 + edac_dbg(0, "Address 0x%llx out of range\n", res->addr); 952 + return false; 953 + } 954 + 955 + if (adxl_decode(res->addr, adxl_values)) { 956 + edac_dbg(0, "Failed to decode 0x%llx\n", res->addr); 957 + return false; 958 + } 959 + 960 + res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; 961 + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; 962 + res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; 963 + res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; 964 + 965 + for (i = 0; i < adxl_component_count; i++) { 966 + if (adxl_values[i] == ~0x0ull) 967 + continue; 968 + 969 + len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx", 970 + adxl_component_names[i], adxl_values[i]); 971 + if (MSG_SIZE - len <= 0) 972 + break; 973 + } 974 + 975 + return true; 976 + } 977 + 971 978 static void skx_mce_output_error(struct mem_ctl_info *mci, 972 979 const struct mce *m, 973 980 struct decoded_addr *res) 974 981 { 975 982 enum hw_event_mc_err_type tp_event; 976 - char *type, *optype, msg[256]; 983 + char *type, *optype; 977 984 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 978 985 bool overflow = GET_BITFIELD(m->status, 62, 62); 979 986 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); ··· 1068 1007 break; 1069 1008 } 1070 1009 } 1010 + if (adxl_component_count) { 1011 + snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s", 1012 + overflow ? " OVERFLOW" : "", 1013 + (uncorrected_error && recoverable) ? " recoverable" : "", 1014 + mscod, errcode, adxl_msg); 1015 + } else { 1016 + snprintf(skx_msg, MSG_SIZE, 1017 + "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", 1018 + overflow ? " OVERFLOW" : "", 1019 + (uncorrected_error && recoverable) ? " recoverable" : "", 1020 + mscod, errcode, 1021 + res->socket, res->imc, res->rank, 1022 + res->bank_group, res->bank_address, res->row, res->column); 1023 + } 1071 1024 1072 - snprintf(msg, sizeof(msg), 1073 - "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", 1074 - overflow ? " OVERFLOW" : "", 1075 - (uncorrected_error && recoverable) ? " recoverable" : "", 1076 - mscod, errcode, 1077 - res->socket, res->imc, res->rank, 1078 - res->bank_group, res->bank_address, res->row, res->column); 1079 - 1080 - edac_dbg(0, "%s\n", msg); 1025 + edac_dbg(0, "%s\n", skx_msg); 1081 1026 1082 1027 /* Call the helper to output message */ 1083 1028 edac_mc_handle_error(tp_event, mci, core_err_cnt, 1084 1029 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, 1085 1030 res->channel, res->dimm, -1, 1086 - optype, msg); 1031 + optype, skx_msg); 1032 + } 1033 + 1034 + static struct mem_ctl_info *get_mci(int src_id, int lmc) 1035 + { 1036 + struct skx_dev *d; 1037 + 1038 + if (lmc > NUM_IMC - 1) { 1039 + skx_printk(KERN_ERR, "Bad lmc %d\n", lmc); 1040 + return NULL; 1041 + } 1042 + 1043 + list_for_each_entry(d, &skx_edac_list, list) { 1044 + if (d->imc[0].src_id == src_id) 1045 + return d->imc[lmc].mci; 1046 + } 1047 + 1048 + skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc); 1049 + 1050 + return NULL; 1087 1051 } 1088 1052 1089 1053 static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, ··· 1126 1040 if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) 1127 1041 return NOTIFY_DONE; 1128 1042 1043 + memset(&res, 0, sizeof(res)); 1129 1044 res.addr = mce->addr; 1130 - if (!skx_decode(&res)) 1045 + 1046 + if (adxl_component_count) { 1047 + if (!skx_adxl_decode(&res)) 1048 + return NOTIFY_DONE; 1049 + 1050 + mci = get_mci(res.socket, res.imc); 1051 + } else { 1052 + if (!skx_decode(&res)) 1053 + return NOTIFY_DONE; 1054 + 1055 + mci = res.dev->imc[res.imc].mci; 1056 + } 1057 + 1058 + if (!mci) 1131 1059 return NOTIFY_DONE; 1132 - mci = res.dev->imc[res.imc].mci; 1133 1060 1134 1061 if (mce->mcgstatus & MCG_STATUS_MCIP) 1135 1062 type = "Exception"; ··· 1191 1092 1192 1093 kfree(d); 1193 1094 } 1095 + } 1096 + 1097 + static void __init skx_adxl_get(void) 1098 + { 1099 + const char * const *names; 1100 + int i, j; 1101 + 1102 + names = adxl_get_component_names(); 1103 + if (!names) { 1104 + skx_printk(KERN_NOTICE, "No firmware support for address translation."); 1105 + skx_printk(KERN_CONT, " Only decoding DDR4 address!\n"); 1106 + return; 1107 + } 1108 + 1109 + for (i = 0; i < INDEX_MAX; i++) { 1110 + for (j = 0; names[j]; j++) { 1111 + if (!strcmp(component_names[i], names[j])) { 1112 + component_indices[i] = j; 1113 + break; 1114 + } 1115 + } 1116 + 1117 + if (!names[j]) 1118 + goto err; 1119 + } 1120 + 1121 + adxl_component_names = names; 1122 + while (*names++) 1123 + adxl_component_count++; 1124 + 1125 + adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values), 1126 + GFP_KERNEL); 1127 + if (!adxl_values) { 1128 + adxl_component_count = 0; 1129 + return; 1130 + } 1131 + 1132 + adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL); 1133 + if (!adxl_msg) { 1134 + adxl_component_count = 0; 1135 + kfree(adxl_values); 1136 + } 1137 + 1138 + return; 1139 + err: 1140 + skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ", 1141 + component_names[i]); 1142 + for (j = 0; names[j]; j++) 1143 + skx_printk(KERN_CONT, "%s ", names[j]); 1144 + skx_printk(KERN_CONT, "\n"); 1145 + } 1146 + 1147 + static void __exit skx_adxl_put(void) 1148 + { 1149 + kfree(adxl_values); 1150 + kfree(adxl_msg); 1194 1151 } 1195 1152 1196 1153 /* ··· 1313 1158 } 1314 1159 } 1315 1160 1161 + skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL); 1162 + if (!skx_msg) { 1163 + rc = -ENOMEM; 1164 + goto fail; 1165 + } 1166 + 1167 + if (nvdimm_count) 1168 + skx_adxl_get(); 1169 + 1316 1170 /* Ensure that the OPSTATE is set correctly for POLL or NMI */ 1317 1171 opstate_init(); 1318 1172 ··· 1340 1176 edac_dbg(2, "\n"); 1341 1177 mce_unregister_decode_chain(&skx_mce_dec); 1342 1178 skx_remove(); 1179 + if (nvdimm_count) 1180 + skx_adxl_put(); 1181 + kfree(skx_msg); 1343 1182 teardown_skx_debug(); 1344 1183 } 1345 1184
+5
include/linux/adxl.h
··· 7 7 #ifndef _LINUX_ADXL_H 8 8 #define _LINUX_ADXL_H 9 9 10 + #ifdef CONFIG_ACPI_ADXL 10 11 const char * const *adxl_get_component_names(void); 11 12 int adxl_decode(u64 addr, u64 component_values[]); 13 + #else 14 + static inline const char * const *adxl_get_component_names(void) { return NULL; } 15 + static inline int adxl_decode(u64 addr, u64 component_values[]) { return -EOPNOTSUPP; } 16 + #endif 12 17 13 18 #endif /* _LINUX_ADXL_H */