Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ras_core_for_v6.3_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

- Add support for reporting more bits of the physical address on error,
on newer AMD CPUs

- Mask out bits which don't belong to the address of the error being
reported

* tag 'ras_core_for_v6.3_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mce: Mask out non-address bits from machine check bank
x86/mce: Add support for Extended Physical Address MCA changes
x86/mce: Define a function to extract ErrorAddr from MCA_ADDR

+62 -31
+3
arch/x86/include/asm/mce.h
··· 88 88 #define MCI_MISC_ADDR_MEM 3 /* memory address */ 89 89 #define MCI_MISC_ADDR_GENERIC 7 /* generic */ 90 90 91 + /* MCi_ADDR register defines */ 92 + #define MCI_ADDR_PHYSADDR GENMASK_ULL(boot_cpu_data.x86_phys_bits - 1, 0) 93 + 91 94 /* CTL2 register defines */ 92 95 #define MCI_CTL2_CMCI_EN BIT_ULL(30) 93 96 #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
+3 -9
arch/x86/kernel/cpu/mce/amd.c
··· 306 306 if ((low & BIT(5)) && !((high >> 5) & 0x3)) 307 307 high |= BIT(5); 308 308 309 + this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8)); 310 + 309 311 wrmsr(smca_config, low, high); 310 312 } 311 313 ··· 738 736 if (m.status & MCI_STATUS_ADDRV) { 739 737 m.addr = addr; 740 738 741 - /* 742 - * Extract [55:<lsb>] where lsb is the least significant 743 - * *valid* bit of the address bits. 744 - */ 745 - if (mce_flags.smca) { 746 - u8 lsb = (m.addr >> 56) & 0x3f; 747 - 748 - m.addr &= GENMASK_ULL(55, lsb); 749 - } 739 + smca_extract_err_addr(&m); 750 740 } 751 741 752 742 if (mce_flags.smca) {
+11 -21
arch/x86/kernel/cpu/mce/core.c
··· 67 67 68 68 DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); 69 69 70 - struct mce_bank { 71 - u64 ctl; /* subevents to enable */ 72 - 73 - __u64 init : 1, /* initialise bank? */ 74 - __reserved_1 : 63; 75 - }; 76 - static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); 70 + DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); 77 71 78 72 #define ATTR_LEN 16 79 73 /* One object for each MCE bank, shared by all CPUs */ ··· 573 579 mce->severity != MCE_DEFERRED_SEVERITY) 574 580 return NOTIFY_DONE; 575 581 576 - pfn = mce->addr >> PAGE_SHIFT; 582 + pfn = (mce->addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT; 577 583 if (!memory_failure(pfn, 0)) { 578 584 set_mce_nospec(pfn); 579 585 mce->kflags |= MCE_HANDLED_UC; ··· 627 633 m->addr <<= shift; 628 634 } 629 635 630 - /* 631 - * Extract [55:<lsb>] where lsb is the least significant 632 - * *valid* bit of the address bits. 633 - */ 634 - if (mce_flags.smca) { 635 - u8 lsb = (m->addr >> 56) & 0x3f; 636 - 637 - m->addr &= GENMASK_ULL(55, lsb); 638 - } 636 + smca_extract_err_addr(m); 639 637 } 640 638 641 639 if (mce_flags.smca) { ··· 1294 1308 { 1295 1309 struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); 1296 1310 int flags = MF_ACTION_REQUIRED; 1311 + unsigned long pfn; 1297 1312 int ret; 1298 1313 1299 1314 p->mce_count = 0; ··· 1303 1316 if (!p->mce_ripv) 1304 1317 flags |= MF_MUST_KILL; 1305 1318 1306 - ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags); 1319 + pfn = (p->mce_addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT; 1320 + ret = memory_failure(pfn, flags); 1307 1321 if (!ret) { 1308 - set_mce_nospec(p->mce_addr >> PAGE_SHIFT); 1322 + set_mce_nospec(pfn); 1309 1323 sync_core(); 1310 1324 return; 1311 1325 } ··· 1328 1340 static void kill_me_never(struct callback_head *cb) 1329 1341 { 1330 1342 struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); 1343 + unsigned long pfn; 1331 1344 1332 1345 p->mce_count = 0; 1333 1346 pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr); 1334 - if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0)) 1335 - set_mce_nospec(p->mce_addr >> PAGE_SHIFT); 1347 + pfn = (p->mce_addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT; 1348 + if (!memory_failure(pfn, 0)) 1349 + set_mce_nospec(pfn); 1336 1350 } 1337 1351 1338 1352 static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
+44
arch/x86/kernel/cpu/mce/internal.h
··· 177 177 178 178 extern struct mce_vendor_flags mce_flags; 179 179 180 + struct mce_bank { 181 + /* subevents to enable */ 182 + u64 ctl; 183 + 184 + /* initialise bank? */ 185 + __u64 init : 1, 186 + 187 + /* 188 + * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: When set, this bit indicates 189 + * the LSB field is found in MCA_STATUS and not in MCA_ADDR. 190 + */ 191 + lsb_in_status : 1, 192 + 193 + __reserved_1 : 62; 194 + }; 195 + 196 + DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); 197 + 180 198 enum mca_msr { 181 199 MCA_CTL, 182 200 MCA_STATUS, ··· 207 189 208 190 #ifdef CONFIG_X86_MCE_AMD 209 191 extern bool amd_filter_mce(struct mce *m); 192 + 193 + /* 194 + * If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits 195 + * [56:0] of MCA_STATUS, else in bits [55:0] of MCA_ADDR. 196 + */ 197 + static __always_inline void smca_extract_err_addr(struct mce *m) 198 + { 199 + u8 lsb; 200 + 201 + if (!mce_flags.smca) 202 + return; 203 + 204 + if (this_cpu_ptr(mce_banks_array)[m->bank].lsb_in_status) { 205 + lsb = (m->status >> 24) & 0x3f; 206 + 207 + m->addr &= GENMASK_ULL(56, lsb); 208 + 209 + return; 210 + } 211 + 212 + lsb = (m->addr >> 56) & 0x3f; 213 + 214 + m->addr &= GENMASK_ULL(55, lsb); 215 + } 216 + 210 217 #else 211 218 static inline bool amd_filter_mce(struct mce *m) { return false; } 219 + static inline void smca_extract_err_addr(struct mce *m) { } 212 220 #endif 213 221 214 222 #ifdef CONFIG_X86_ANCIENT_MCE
+1 -1
drivers/edac/skx_common.c
··· 657 657 658 658 memset(&res, 0, sizeof(res)); 659 659 res.mce = mce; 660 - res.addr = mce->addr; 660 + res.addr = mce->addr & MCI_ADDR_PHYSADDR; 661 661 662 662 /* Try driver decoder first */ 663 663 if (!(driver_decode && driver_decode(&res))) {