Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86/mce: Add wrapper for struct mce to export vendor specific info

Currently, exporting new additional machine check error information
involves adding new fields for the same at the end of the struct mce.
This additional information can then be consumed through mcelog or
tracepoint.

However, as new MSRs are being added (and will be added in the future)
by CPU vendors on their newer CPUs with additional machine check error
information to be exported, the size of struct mce will balloon on some
CPUs, unnecessarily, since those fields are vendor-specific. Moreover,
different CPU vendors may export the additional information in varying
sizes.

The problem particularly intensifies since struct mce is exposed to
userspace as part of UAPI. It's bloating through vendor-specific data
should be avoided to limit the information being sent out to userspace.

Add a new structure mce_hw_err to wrap the existing struct mce. The same
will prevent its ballooning since vendor-specifc data, if any, can now be
exported through a union within the wrapper structure and through
__dynamic_array in mce_record tracepoint.

Furthermore, new internal kernel fields can be added to the wrapper
struct without impacting the user space API.

[ bp: Restore reverse x-mas tree order of function vars declarations. ]

Suggested-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Link: https://lore.kernel.org/r/20241022194158.110073-2-avadhut.naik@amd.com

authored by

Avadhut Naik and committed by
Borislav Petkov (AMD)
750fd239 754269cc

+202 -161
+12 -2
arch/x86/include/asm/mce.h
··· 187 187 MCE_PRIO_HIGHEST = MCE_PRIO_CEC 188 188 }; 189 189 190 + /** 191 + * struct mce_hw_err - Hardware Error Record. 192 + * @m: Machine Check record. 193 + */ 194 + struct mce_hw_err { 195 + struct mce m; 196 + }; 197 + 198 + #define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m) 199 + 190 200 struct notifier_block; 191 201 extern void mce_register_decode_chain(struct notifier_block *nb); 192 202 extern void mce_unregister_decode_chain(struct notifier_block *nb); ··· 231 221 u64 lapic_id) { return -EINVAL; } 232 222 #endif 233 223 234 - void mce_prep_record(struct mce *m); 235 - void mce_log(struct mce *m); 224 + void mce_prep_record(struct mce_hw_err *err); 225 + void mce_log(struct mce_hw_err *err); 236 226 DECLARE_PER_CPU(struct device *, mce_device); 237 227 238 228 /* Maximum number of MCA banks per CPU. */
+14 -13
arch/x86/kernel/cpu/mce/amd.c
··· 778 778 779 779 static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) 780 780 { 781 - struct mce m; 781 + struct mce_hw_err err; 782 + struct mce *m = &err.m; 782 783 783 - mce_prep_record(&m); 784 + mce_prep_record(&err); 784 785 785 - m.status = status; 786 - m.misc = misc; 787 - m.bank = bank; 788 - m.tsc = rdtsc(); 786 + m->status = status; 787 + m->misc = misc; 788 + m->bank = bank; 789 + m->tsc = rdtsc(); 789 790 790 - if (m.status & MCI_STATUS_ADDRV) { 791 - m.addr = addr; 791 + if (m->status & MCI_STATUS_ADDRV) { 792 + m->addr = addr; 792 793 793 - smca_extract_err_addr(&m); 794 + smca_extract_err_addr(m); 794 795 } 795 796 796 797 if (mce_flags.smca) { 797 - rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid); 798 + rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid); 798 799 799 - if (m.status & MCI_STATUS_SYNDV) 800 - rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); 800 + if (m->status & MCI_STATUS_SYNDV) 801 + rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd); 801 802 } 802 803 803 - mce_log(&m); 804 + mce_log(&err); 804 805 } 805 806 806 807 DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
+25 -20
arch/x86/kernel/cpu/mce/apei.c
··· 28 28 29 29 void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) 30 30 { 31 - struct mce m; 31 + struct mce_hw_err err; 32 + struct mce *m; 32 33 int lsb; 33 34 34 35 if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) ··· 45 44 else 46 45 lsb = PAGE_SHIFT; 47 46 48 - mce_prep_record(&m); 49 - m.bank = -1; 47 + mce_prep_record(&err); 48 + m = &err.m; 49 + m->bank = -1; 50 50 /* Fake a memory read error with unknown channel */ 51 - m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f; 52 - m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb; 51 + m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f; 52 + m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb; 53 53 54 54 if (severity >= GHES_SEV_RECOVERABLE) 55 - m.status |= MCI_STATUS_UC; 55 + m->status |= MCI_STATUS_UC; 56 56 57 57 if (severity >= GHES_SEV_PANIC) { 58 - m.status |= MCI_STATUS_PCC; 59 - m.tsc = rdtsc(); 58 + m->status |= MCI_STATUS_PCC; 59 + m->tsc = rdtsc(); 60 60 } 61 61 62 - m.addr = mem_err->physical_addr; 63 - mce_log(&m); 62 + m->addr = mem_err->physical_addr; 63 + mce_log(&err); 64 64 } 65 65 EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); 66 66 ··· 69 67 { 70 68 const u64 *i_mce = ((const u64 *) (ctx_info + 1)); 71 69 bool apicid_found = false; 70 + struct mce_hw_err err; 72 71 unsigned int cpu; 73 - struct mce m; 72 + struct mce *m; 74 73 75 74 if (!boot_cpu_has(X86_FEATURE_SMCA)) 76 75 return -EINVAL; ··· 111 108 if (!apicid_found) 112 109 return -EINVAL; 113 110 114 - mce_prep_record_common(&m); 115 - mce_prep_record_per_cpu(cpu, &m); 111 + m = &err.m; 112 + memset(&err, 0, sizeof(struct mce_hw_err)); 113 + mce_prep_record_common(m); 114 + mce_prep_record_per_cpu(cpu, m); 116 115 117 - m.bank = (ctx_info->msr_addr >> 4) & 0xFF; 118 - m.status = *i_mce; 119 - m.addr = *(i_mce + 1); 120 - m.misc = *(i_mce + 2); 116 + m->bank = (ctx_info->msr_addr >> 4) & 0xFF; 117 + m->status = *i_mce; 118 + m->addr = *(i_mce + 1); 119 + m->misc = *(i_mce + 2); 121 120 /* Skipping MCA_CONFIG */ 122 - m.ipid = *(i_mce + 4); 123 - m.synd = *(i_mce + 5); 121 + m->ipid = *(i_mce + 4); 122 + m->synd = *(i_mce + 5); 124 123 125 - mce_log(&m); 124 + mce_log(&err); 126 125 127 126 return 0; 128 127 }
+115 -92
arch/x86/kernel/cpu/mce/core.c
··· 88 88 .monarch_timeout = -1 89 89 }; 90 90 91 - static DEFINE_PER_CPU(struct mce, mces_seen); 91 + static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen); 92 92 static unsigned long mce_need_notify; 93 93 94 94 /* ··· 119 119 120 120 void mce_prep_record_common(struct mce *m) 121 121 { 122 - memset(m, 0, sizeof(struct mce)); 123 - 124 122 m->cpuid = cpuid_eax(1); 125 123 m->cpuvendor = boot_cpu_data.x86_vendor; 126 124 m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP); ··· 136 138 m->socketid = topology_physical_package_id(cpu); 137 139 } 138 140 139 - /* Do initial initialization of a struct mce */ 140 - void mce_prep_record(struct mce *m) 141 + /* Do initial initialization of struct mce_hw_err */ 142 + void mce_prep_record(struct mce_hw_err *err) 141 143 { 144 + struct mce *m = &err->m; 145 + 146 + memset(err, 0, sizeof(struct mce_hw_err)); 142 147 mce_prep_record_common(m); 143 148 mce_prep_record_per_cpu(smp_processor_id(), m); 144 149 } ··· 149 148 DEFINE_PER_CPU(struct mce, injectm); 150 149 EXPORT_PER_CPU_SYMBOL_GPL(injectm); 151 150 152 - void mce_log(struct mce *m) 151 + void mce_log(struct mce_hw_err *err) 153 152 { 154 - if (!mce_gen_pool_add(m)) 153 + if (!mce_gen_pool_add(err)) 155 154 irq_work_queue(&mce_irq_work); 156 155 } 157 156 EXPORT_SYMBOL_GPL(mce_log); ··· 172 171 } 173 172 EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); 174 173 175 - static void __print_mce(struct mce *m) 174 + static void __print_mce(struct mce_hw_err *err) 176 175 { 176 + struct mce *m = &err->m; 177 + 177 178 pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n", 178 179 m->extcpu, 179 180 (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""), ··· 217 214 m->microcode); 218 215 } 219 216 220 - static void print_mce(struct mce *m) 217 + static void print_mce(struct mce_hw_err *err) 221 218 { 222 - __print_mce(m); 219 + struct mce *m = &err->m; 220 + 221 + __print_mce(err); 223 222 224 223 if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON) 225 224 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); ··· 256 251 return NULL; 257 252 } 258 253 259 - static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) 254 + static noinstr void mce_panic(const char *msg, struct mce_hw_err *final, char *exp) 260 255 { 261 256 struct llist_node *pending; 262 257 struct mce_evt_llist *l; ··· 287 282 pending = mce_gen_pool_prepare_records(); 288 283 /* First print corrected ones that are still unlogged */ 289 284 llist_for_each_entry(l, pending, llnode) { 290 - struct mce *m = &l->mce; 285 + struct mce_hw_err *err = &l->err; 286 + struct mce *m = &err->m; 291 287 if (!(m->status & MCI_STATUS_UC)) { 292 - print_mce(m); 288 + print_mce(err); 293 289 if (!apei_err) 294 290 apei_err = apei_write_mce(m); 295 291 } 296 292 } 297 293 /* Now print uncorrected but with the final one last */ 298 294 llist_for_each_entry(l, pending, llnode) { 299 - struct mce *m = &l->mce; 295 + struct mce_hw_err *err = &l->err; 296 + struct mce *m = &err->m; 300 297 if (!(m->status & MCI_STATUS_UC)) 301 298 continue; 302 - if (!final || mce_cmp(m, final)) { 303 - print_mce(m); 299 + if (!final || mce_cmp(m, &final->m)) { 300 + print_mce(err); 304 301 if (!apei_err) 305 302 apei_err = apei_write_mce(m); 306 303 } ··· 310 303 if (final) { 311 304 print_mce(final); 312 305 if (!apei_err) 313 - apei_err = apei_write_mce(final); 306 + apei_err = apei_write_mce(&final->m); 314 307 } 315 308 if (exp) 316 309 pr_emerg(HW_ERR "Machine check: %s\n", exp); 317 310 318 - memmsg = mce_dump_aux_info(final); 311 + memmsg = mce_dump_aux_info(&final->m); 319 312 if (memmsg) 320 313 pr_emerg(HW_ERR "Machine check: %s\n", memmsg); 321 314 ··· 330 323 * panic. 331 324 */ 332 325 if (kexec_crash_loaded()) { 333 - if (final && (final->status & MCI_STATUS_ADDRV)) { 326 + if (final && (final->m.status & MCI_STATUS_ADDRV)) { 334 327 struct page *p; 335 - p = pfn_to_online_page(final->addr >> PAGE_SHIFT); 328 + p = pfn_to_online_page(final->m.addr >> PAGE_SHIFT); 336 329 if (p) 337 330 SetPageHWPoison(p); 338 331 } ··· 452 445 * check into our "mce" struct so that we can use it later to assess 453 446 * the severity of the problem as we read per-bank specific details. 454 447 */ 455 - static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs) 448 + static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs) 456 449 { 450 + struct mce *m; 457 451 /* 458 452 * Enable instrumentation around mce_prep_record() which calls external 459 453 * facilities. 460 454 */ 461 455 instrumentation_begin(); 462 - mce_prep_record(m); 456 + mce_prep_record(err); 463 457 instrumentation_end(); 464 458 459 + m = &err->m; 465 460 m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 466 461 if (regs) { 467 462 /* ··· 583 574 static int mce_early_notifier(struct notifier_block *nb, unsigned long val, 584 575 void *data) 585 576 { 586 - struct mce *m = (struct mce *)data; 577 + struct mce_hw_err *err = to_mce_hw_err(data); 587 578 588 - if (!m) 579 + if (!err) 589 580 return NOTIFY_DONE; 590 581 591 582 /* Emit the trace record: */ 592 - trace_mce_record(m); 583 + trace_mce_record(err); 593 584 594 585 set_bit(0, &mce_need_notify); 595 586 ··· 633 624 static int mce_default_notifier(struct notifier_block *nb, unsigned long val, 634 625 void *data) 635 626 { 636 - struct mce *m = (struct mce *)data; 627 + struct mce_hw_err *err = to_mce_hw_err(data); 637 628 638 - if (!m) 629 + if (!err) 639 630 return NOTIFY_DONE; 640 631 641 - if (mca_cfg.print_all || !m->kflags) 642 - __print_mce(m); 632 + if (mca_cfg.print_all || !(err->m.kflags)) 633 + __print_mce(err); 643 634 644 635 return NOTIFY_DONE; 645 636 } ··· 653 644 /* 654 645 * Read ADDR and MISC registers. 655 646 */ 656 - static noinstr void mce_read_aux(struct mce *m, int i) 647 + static noinstr void mce_read_aux(struct mce_hw_err *err, int i) 657 648 { 649 + struct mce *m = &err->m; 650 + 658 651 if (m->status & MCI_STATUS_MISCV) 659 652 m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC)); 660 653 ··· 703 692 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 704 693 { 705 694 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); 706 - struct mce m; 695 + struct mce_hw_err err; 696 + struct mce *m; 707 697 int i; 708 698 709 699 this_cpu_inc(mce_poll_count); 710 700 711 - mce_gather_info(&m, NULL); 701 + mce_gather_info(&err, NULL); 702 + m = &err.m; 712 703 713 704 if (flags & MCP_TIMESTAMP) 714 - m.tsc = rdtsc(); 705 + m->tsc = rdtsc(); 715 706 716 707 for (i = 0; i < this_cpu_read(mce_num_banks); i++) { 717 708 if (!mce_banks[i].ctl || !test_bit(i, *b)) 718 709 continue; 719 710 720 - m.misc = 0; 721 - m.addr = 0; 722 - m.bank = i; 711 + m->misc = 0; 712 + m->addr = 0; 713 + m->bank = i; 723 714 724 715 barrier(); 725 - m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); 716 + m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); 726 717 727 718 /* 728 719 * Update storm tracking here, before checking for the ··· 734 721 * storm status. 735 722 */ 736 723 if (!mca_cfg.cmci_disabled) 737 - mce_track_storm(&m); 724 + mce_track_storm(m); 738 725 739 726 /* If this entry is not valid, ignore it */ 740 - if (!(m.status & MCI_STATUS_VAL)) 727 + if (!(m->status & MCI_STATUS_VAL)) 741 728 continue; 742 729 743 730 /* 744 731 * If we are logging everything (at CPU online) or this 745 732 * is a corrected error, then we must log it. 746 733 */ 747 - if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC)) 734 + if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC)) 748 735 goto log_it; 749 736 750 737 /* ··· 754 741 * everything else. 755 742 */ 756 743 if (!mca_cfg.ser) { 757 - if (m.status & MCI_STATUS_UC) 744 + if (m->status & MCI_STATUS_UC) 758 745 continue; 759 746 goto log_it; 760 747 } 761 748 762 749 /* Log "not enabled" (speculative) errors */ 763 - if (!(m.status & MCI_STATUS_EN)) 750 + if (!(m->status & MCI_STATUS_EN)) 764 751 goto log_it; 765 752 766 753 /* 767 754 * Log UCNA (SDM: 15.6.3 "UCR Error Classification") 768 755 * UC == 1 && PCC == 0 && S == 0 769 756 */ 770 - if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S)) 757 + if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S)) 771 758 goto log_it; 772 759 773 760 /* ··· 781 768 if (flags & MCP_DONTLOG) 782 769 goto clear_it; 783 770 784 - mce_read_aux(&m, i); 785 - m.severity = mce_severity(&m, NULL, NULL, false); 771 + mce_read_aux(&err, i); 772 + m->severity = mce_severity(m, NULL, NULL, false); 786 773 /* 787 774 * Don't get the IP here because it's unlikely to 788 775 * have anything to do with the actual error location. 789 776 */ 790 777 791 - if (mca_cfg.dont_log_ce && !mce_usable_address(&m)) 778 + if (mca_cfg.dont_log_ce && !mce_usable_address(m)) 792 779 goto clear_it; 793 780 794 781 if (flags & MCP_QUEUE_LOG) 795 - mce_gen_pool_add(&m); 782 + mce_gen_pool_add(&err); 796 783 else 797 - mce_log(&m); 784 + mce_log(&err); 798 785 799 786 clear_it: 800 787 /* ··· 918 905 * Do a quick check if any of the events requires a panic. 919 906 * This decides if we keep the events around or clear them. 920 907 */ 921 - static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, 908 + static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, unsigned long *validp, 922 909 struct pt_regs *regs) 923 910 { 911 + struct mce *m = &err->m; 924 912 char *tmp = *msg; 925 913 int i; 926 914 ··· 939 925 940 926 m->bank = i; 941 927 if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) { 942 - mce_read_aux(m, i); 928 + mce_read_aux(err, i); 943 929 *msg = tmp; 944 930 return 1; 945 931 } ··· 1030 1016 */ 1031 1017 static void mce_reign(void) 1032 1018 { 1033 - int cpu; 1019 + struct mce_hw_err *err = NULL; 1034 1020 struct mce *m = NULL; 1035 1021 int global_worst = 0; 1036 1022 char *msg = NULL; 1023 + int cpu; 1037 1024 1038 1025 /* 1039 1026 * This CPU is the Monarch and the other CPUs have run ··· 1042 1027 * Grade the severity of the errors of all the CPUs. 1043 1028 */ 1044 1029 for_each_possible_cpu(cpu) { 1045 - struct mce *mtmp = &per_cpu(mces_seen, cpu); 1030 + struct mce_hw_err *etmp = &per_cpu(hw_errs_seen, cpu); 1031 + struct mce *mtmp = &etmp->m; 1046 1032 1047 1033 if (mtmp->severity > global_worst) { 1048 1034 global_worst = mtmp->severity; 1049 - m = &per_cpu(mces_seen, cpu); 1035 + err = &per_cpu(hw_errs_seen, cpu); 1036 + m = &err->m; 1050 1037 } 1051 1038 } 1052 1039 ··· 1060 1043 if (m && global_worst >= MCE_PANIC_SEVERITY) { 1061 1044 /* call mce_severity() to get "msg" for panic */ 1062 1045 mce_severity(m, NULL, &msg, true); 1063 - mce_panic("Fatal machine check", m, msg); 1046 + mce_panic("Fatal machine check", err, msg); 1064 1047 } 1065 1048 1066 1049 /* ··· 1077 1060 mce_panic("Fatal machine check from unknown source", NULL, NULL); 1078 1061 1079 1062 /* 1080 - * Now clear all the mces_seen so that they don't reappear on 1063 + * Now clear all the hw_errs_seen so that they don't reappear on 1081 1064 * the next mce. 1082 1065 */ 1083 1066 for_each_possible_cpu(cpu) 1084 - memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); 1067 + memset(&per_cpu(hw_errs_seen, cpu), 0, sizeof(struct mce_hw_err)); 1085 1068 } 1086 1069 1087 1070 static atomic_t global_nwo; ··· 1285 1268 } 1286 1269 1287 1270 static __always_inline int 1288 - __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final, 1289 - unsigned long *toclear, unsigned long *valid_banks, int no_way_out, 1290 - int *worst) 1271 + __mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs, 1272 + struct mce_hw_err *final, unsigned long *toclear, 1273 + unsigned long *valid_banks, int no_way_out, int *worst) 1291 1274 { 1292 1275 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); 1293 1276 struct mca_config *cfg = &mca_cfg; 1294 1277 int severity, i, taint = 0; 1278 + struct mce *m = &err->m; 1295 1279 1296 1280 for (i = 0; i < this_cpu_read(mce_num_banks); i++) { 1297 1281 arch___clear_bit(i, toclear); ··· 1337 1319 if (severity == MCE_NO_SEVERITY) 1338 1320 continue; 1339 1321 1340 - mce_read_aux(m, i); 1322 + mce_read_aux(err, i); 1341 1323 1342 1324 /* assuming valid severity level != 0 */ 1343 1325 m->severity = severity; ··· 1347 1329 * done in #MC context, where instrumentation is disabled. 1348 1330 */ 1349 1331 instrumentation_begin(); 1350 - mce_log(m); 1332 + mce_log(err); 1351 1333 instrumentation_end(); 1352 1334 1353 1335 if (severity > *worst) { 1354 - *final = *m; 1336 + *final = *err; 1355 1337 *worst = severity; 1356 1338 } 1357 1339 } 1358 1340 1359 1341 /* mce_clear_state will clear *final, save locally for use later */ 1360 - *m = *final; 1342 + *err = *final; 1361 1343 1362 1344 return taint; 1363 1345 } ··· 1417 1399 set_mce_nospec(pfn); 1418 1400 } 1419 1401 1420 - static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *)) 1402 + static void queue_task_work(struct mce_hw_err *err, char *msg, void (*func)(struct callback_head *)) 1421 1403 { 1422 1404 int count = ++current->mce_count; 1405 + struct mce *m = &err->m; 1423 1406 1424 1407 /* First call, save all the details */ 1425 1408 if (count == 1) { ··· 1433 1414 1434 1415 /* Ten is likely overkill. Don't expect more than two faults before task_work() */ 1435 1416 if (count > 10) 1436 - mce_panic("Too many consecutive machine checks while accessing user data", m, msg); 1417 + mce_panic("Too many consecutive machine checks while accessing user data", 1418 + err, msg); 1437 1419 1438 1420 /* Second or later call, make sure page address matches the one from first call */ 1439 1421 if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT)) 1440 - mce_panic("Consecutive machine checks to different user pages", m, msg); 1422 + mce_panic("Consecutive machine checks to different user pages", err, msg); 1441 1423 1442 1424 /* Do not call task_work_add() more than once */ 1443 1425 if (count > 1) ··· 1487 1467 int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0; 1488 1468 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 }; 1489 1469 DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 }; 1490 - struct mce m, *final; 1470 + struct mce_hw_err *final; 1471 + struct mce_hw_err err; 1491 1472 char *msg = NULL; 1473 + struct mce *m; 1492 1474 1493 1475 if (unlikely(mce_flags.p5)) 1494 1476 return pentium_machine_check(regs); ··· 1528 1506 1529 1507 this_cpu_inc(mce_exception_count); 1530 1508 1531 - mce_gather_info(&m, regs); 1532 - m.tsc = rdtsc(); 1509 + mce_gather_info(&err, regs); 1510 + m = &err.m; 1511 + m->tsc = rdtsc(); 1533 1512 1534 - final = this_cpu_ptr(&mces_seen); 1535 - *final = m; 1513 + final = this_cpu_ptr(&hw_errs_seen); 1514 + *final = err; 1536 1515 1537 - no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs); 1516 + no_way_out = mce_no_way_out(&err, &msg, valid_banks, regs); 1538 1517 1539 1518 barrier(); 1540 1519 ··· 1544 1521 * Assume the worst for now, but if we find the 1545 1522 * severity is MCE_AR_SEVERITY we have other options. 1546 1523 */ 1547 - if (!(m.mcgstatus & MCG_STATUS_RIPV)) 1524 + if (!(m->mcgstatus & MCG_STATUS_RIPV)) 1548 1525 kill_current_task = 1; 1549 1526 /* 1550 1527 * Check if this MCE is signaled to only this logical processor, 1551 1528 * on Intel, Zhaoxin only. 1552 1529 */ 1553 - if (m.cpuvendor == X86_VENDOR_INTEL || 1554 - m.cpuvendor == X86_VENDOR_ZHAOXIN) 1555 - lmce = m.mcgstatus & MCG_STATUS_LMCES; 1530 + if (m->cpuvendor == X86_VENDOR_INTEL || 1531 + m->cpuvendor == X86_VENDOR_ZHAOXIN) 1532 + lmce = m->mcgstatus & MCG_STATUS_LMCES; 1556 1533 1557 1534 /* 1558 1535 * Local machine check may already know that we have to panic. ··· 1563 1540 */ 1564 1541 if (lmce) { 1565 1542 if (no_way_out) 1566 - mce_panic("Fatal local machine check", &m, msg); 1543 + mce_panic("Fatal local machine check", &err, msg); 1567 1544 } else { 1568 1545 order = mce_start(&no_way_out); 1569 1546 } 1570 1547 1571 - taint = __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst); 1548 + taint = __mc_scan_banks(&err, regs, final, toclear, valid_banks, no_way_out, &worst); 1572 1549 1573 1550 if (!no_way_out) 1574 1551 mce_clear_state(toclear); ··· 1583 1560 no_way_out = worst >= MCE_PANIC_SEVERITY; 1584 1561 1585 1562 if (no_way_out) 1586 - mce_panic("Fatal machine check on current CPU", &m, msg); 1563 + mce_panic("Fatal machine check on current CPU", &err, msg); 1587 1564 } 1588 1565 } else { 1589 1566 /* ··· 1595 1572 * make sure we have the right "msg". 1596 1573 */ 1597 1574 if (worst >= MCE_PANIC_SEVERITY) { 1598 - mce_severity(&m, regs, &msg, true); 1599 - mce_panic("Local fatal machine check!", &m, msg); 1575 + mce_severity(m, regs, &msg, true); 1576 + mce_panic("Local fatal machine check!", &err, msg); 1600 1577 } 1601 1578 } 1602 1579 ··· 1614 1591 goto out; 1615 1592 1616 1593 /* Fault was in user mode and we need to take some action */ 1617 - if ((m.cs & 3) == 3) { 1594 + if ((m->cs & 3) == 3) { 1618 1595 /* If this triggers there is no way to recover. Die hard. */ 1619 1596 BUG_ON(!on_thread_stack() || !user_mode(regs)); 1620 1597 1621 - if (!mce_usable_address(&m)) 1622 - queue_task_work(&m, msg, kill_me_now); 1598 + if (!mce_usable_address(m)) 1599 + queue_task_work(&err, msg, kill_me_now); 1623 1600 else 1624 - queue_task_work(&m, msg, kill_me_maybe); 1601 + queue_task_work(&err, msg, kill_me_maybe); 1625 1602 1626 - } else if (m.mcgstatus & MCG_STATUS_SEAM_NR) { 1603 + } else if (m->mcgstatus & MCG_STATUS_SEAM_NR) { 1627 1604 /* 1628 1605 * Saved RIP on stack makes it look like the machine check 1629 1606 * was taken in the kernel on the instruction following ··· 1635 1612 * not occur there. Mark the page as poisoned so it won't 1636 1613 * be added to free list when the guest is terminated. 1637 1614 */ 1638 - if (mce_usable_address(&m)) { 1639 - struct page *p = pfn_to_online_page(m.addr >> PAGE_SHIFT); 1615 + if (mce_usable_address(m)) { 1616 + struct page *p = pfn_to_online_page(m->addr >> PAGE_SHIFT); 1640 1617 1641 1618 if (p) 1642 1619 SetPageHWPoison(p); ··· 1651 1628 * corresponding exception handler which would do that is the 1652 1629 * proper one. 1653 1630 */ 1654 - if (m.kflags & MCE_IN_KERNEL_RECOV) { 1631 + if (m->kflags & MCE_IN_KERNEL_RECOV) { 1655 1632 if (!fixup_exception(regs, X86_TRAP_MC, 0, 0)) 1656 - mce_panic("Failed kernel mode recovery", &m, msg); 1633 + mce_panic("Failed kernel mode recovery", &err, msg); 1657 1634 } 1658 1635 1659 - if (m.kflags & MCE_IN_KERNEL_COPYIN) 1660 - queue_task_work(&m, msg, kill_me_never); 1636 + if (m->kflags & MCE_IN_KERNEL_COPYIN) 1637 + queue_task_work(&err, msg, kill_me_never); 1661 1638 } 1662 1639 1663 1640 out:
+9 -9
arch/x86/kernel/cpu/mce/genpool.c
··· 31 31 */ 32 32 static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l) 33 33 { 34 + struct mce_hw_err *err1, *err2; 34 35 struct mce_evt_llist *node; 35 - struct mce *m1, *m2; 36 36 37 - m1 = &t->mce; 37 + err1 = &t->err; 38 38 39 39 llist_for_each_entry(node, &l->llnode, llnode) { 40 - m2 = &node->mce; 40 + err2 = &node->err; 41 41 42 - if (!mce_cmp(m1, m2)) 42 + if (!mce_cmp(&err1->m, &err2->m)) 43 43 return true; 44 44 } 45 45 return false; ··· 73 73 74 74 void mce_gen_pool_process(struct work_struct *__unused) 75 75 { 76 - struct llist_node *head; 77 76 struct mce_evt_llist *node, *tmp; 77 + struct llist_node *head; 78 78 struct mce *mce; 79 79 80 80 head = llist_del_all(&mce_event_llist); ··· 83 83 84 84 head = llist_reverse_order(head); 85 85 llist_for_each_entry_safe(node, tmp, head, llnode) { 86 - mce = &node->mce; 86 + mce = &node->err.m; 87 87 blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); 88 88 gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); 89 89 } ··· 94 94 return llist_empty(&mce_event_llist); 95 95 } 96 96 97 - int mce_gen_pool_add(struct mce *mce) 97 + int mce_gen_pool_add(struct mce_hw_err *err) 98 98 { 99 99 struct mce_evt_llist *node; 100 100 101 - if (filter_mce(mce)) 101 + if (filter_mce(&err->m)) 102 102 return -EINVAL; 103 103 104 104 if (!mce_evt_pool) ··· 110 110 return -ENOMEM; 111 111 } 112 112 113 - memcpy(&node->mce, mce, sizeof(*mce)); 113 + memcpy(&node->err, err, sizeof(*err)); 114 114 llist_add(&node->llnode, &mce_event_llist); 115 115 116 116 return 0;
+4 -2
arch/x86/kernel/cpu/mce/inject.c
··· 502 502 503 503 static void do_inject(void) 504 504 { 505 - u64 mcg_status = 0; 506 505 unsigned int cpu = i_mce.extcpu; 506 + struct mce_hw_err err; 507 + u64 mcg_status = 0; 507 508 u8 b = i_mce.bank; 508 509 509 510 i_mce.tsc = rdtsc_ordered(); ··· 518 517 i_mce.status |= MCI_STATUS_SYNDV; 519 518 520 519 if (inj_type == SW_INJ) { 521 - mce_log(&i_mce); 520 + err.m = i_mce; 521 + mce_log(&err); 522 522 return; 523 523 } 524 524
+2 -2
arch/x86/kernel/cpu/mce/internal.h
··· 26 26 27 27 struct mce_evt_llist { 28 28 struct llist_node llnode; 29 - struct mce mce; 29 + struct mce_hw_err err; 30 30 }; 31 31 32 32 void mce_gen_pool_process(struct work_struct *__unused); 33 33 bool mce_gen_pool_empty(void); 34 - int mce_gen_pool_add(struct mce *mce); 34 + int mce_gen_pool_add(struct mce_hw_err *err); 35 35 int mce_gen_pool_init(void); 36 36 struct llist_node *mce_gen_pool_prepare_records(void); 37 37
+21 -21
include/trace/events/mce.h
··· 19 19 20 20 TRACE_EVENT(mce_record, 21 21 22 - TP_PROTO(struct mce *m), 22 + TP_PROTO(struct mce_hw_err *err), 23 23 24 - TP_ARGS(m), 24 + TP_ARGS(err), 25 25 26 26 TP_STRUCT__entry( 27 27 __field( u64, mcgcap ) ··· 46 46 ), 47 47 48 48 TP_fast_assign( 49 - __entry->mcgcap = m->mcgcap; 50 - __entry->mcgstatus = m->mcgstatus; 51 - __entry->status = m->status; 52 - __entry->addr = m->addr; 53 - __entry->misc = m->misc; 54 - __entry->synd = m->synd; 55 - __entry->ipid = m->ipid; 56 - __entry->ip = m->ip; 57 - __entry->tsc = m->tsc; 58 - __entry->ppin = m->ppin; 59 - __entry->walltime = m->time; 60 - __entry->cpu = m->extcpu; 61 - __entry->cpuid = m->cpuid; 62 - __entry->apicid = m->apicid; 63 - __entry->socketid = m->socketid; 64 - __entry->cs = m->cs; 65 - __entry->bank = m->bank; 66 - __entry->cpuvendor = m->cpuvendor; 67 - __entry->microcode = m->microcode; 49 + __entry->mcgcap = err->m.mcgcap; 50 + __entry->mcgstatus = err->m.mcgstatus; 51 + __entry->status = err->m.status; 52 + __entry->addr = err->m.addr; 53 + __entry->misc = err->m.misc; 54 + __entry->synd = err->m.synd; 55 + __entry->ipid = err->m.ipid; 56 + __entry->ip = err->m.ip; 57 + __entry->tsc = err->m.tsc; 58 + __entry->ppin = err->m.ppin; 59 + __entry->walltime = err->m.time; 60 + __entry->cpu = err->m.extcpu; 61 + __entry->cpuid = err->m.cpuid; 62 + __entry->apicid = err->m.apicid; 63 + __entry->socketid = err->m.socketid; 64 + __entry->cs = err->m.cs; 65 + __entry->bank = err->m.bank; 66 + __entry->cpuvendor = err->m.cpuvendor; 67 + __entry->microcode = err->m.microcode; 68 68 ), 69 69 70 70 TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x",