Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ras_core_for_v6.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS updates from Borislav Petkov:

- The second part of the AMD MCA interrupts rework after the
last-minute show-stopper from the last merge window was sorted out.
After this, the AMD MCA deferred errors, thresholding and corrected
errors interrupt handlers use common MCA code and are tightly
integrated into the core MCA code, thereby getting rid of
considerable duplication. All culminating into allowing CMCI error
thresholding storms to be detected at AMD too, using the common
infrastructure

- Add support for two new MCA bank bits on AMD Zen6 which denote
whether the error address logged is a system physical address, which
obviates the need for it to be translated before further error
recovery can be done

* tag 'ras_core_for_v6.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mce: Handle AMD threshold interrupt storms
x86/mce: Do not clear bank's poll bit in mce_poll_banks on AMD SMCA systems
x86/mce: Add support for physical address valid bit
x86/mce: Save and use APEI corrected threshold limit
x86/mce/amd: Define threshold restart function for banks
x86/mce/amd: Remove redundant reset_block()
x86/mce/amd: Support SMCA Corrected Error Interrupt
x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems
x86/mce: Unify AMD DFR handler with MCA Polling
x86/mce: Unify AMD THR handler with MCA Polling

+210 -216
+14
arch/x86/include/asm/mce.h
··· 48 48 49 49 /* AMD-specific bits */ 50 50 #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ 51 + #define MCI_STATUS_PADDRV BIT_ULL(54) /* Valid System Physical Address */ 51 52 #define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ 52 53 #define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ 53 54 #define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ ··· 63 62 */ 64 63 #define MCI_CONFIG_MCAX 0x1 65 64 #define MCI_CONFIG_FRUTEXT BIT_ULL(9) 65 + #define MCI_CONFIG_PADDRV BIT_ULL(11) 66 66 #define MCI_IPID_MCATYPE 0xFFFF0000 67 67 #define MCI_IPID_HWID 0xFFF 68 68 ··· 166 164 * treat it like a fault taken in user mode. 167 165 */ 168 166 #define MCE_IN_KERNEL_COPYIN BIT_ULL(7) 167 + 168 + /* 169 + * Indicates that handler should check and clear Deferred error registers 170 + * rather than common ones. 171 + */ 172 + #define MCE_CHECK_DFR_REGS BIT_ULL(8) 169 173 170 174 /* 171 175 * This structure contains all data related to the MCE log. Also ··· 309 301 310 302 /* Disable CMCI/polling for MCA bank claimed by firmware */ 311 303 extern void mce_disable_bank(int bank); 304 + 305 + #ifdef CONFIG_X86_MCE_THRESHOLD 306 + void mce_save_apei_thr_limit(u32 thr_limit); 307 + #else 308 + static inline void mce_save_apei_thr_limit(u32 thr_limit) { } 309 + #endif /* CONFIG_X86_MCE_THRESHOLD */ 312 310 313 311 /* 314 312 * Exception handler
+2
arch/x86/kernel/acpi/apei.c
··· 19 19 if (!cmc->enabled) 20 20 return 0; 21 21 22 + mce_save_apei_thr_limit(cmc->notify.error_threshold_value); 23 + 22 24 /* 23 25 * We expect HEST to provide a list of MC banks that report errors 24 26 * in firmware first mode. Otherwise, return non-zero value to
+142 -214
arch/x86/kernel/cpu/mce/amd.c
··· 43 43 /* Deferred error settings */ 44 44 #define MSR_CU_DEF_ERR 0xC0000410 45 45 #define MASK_DEF_LVTOFF 0x000000F0 46 - #define MASK_DEF_INT_TYPE 0x00000006 47 - #define DEF_LVT_OFF 0x2 48 - #define DEF_INT_TYPE_APIC 0x2 49 46 50 47 /* Scalable MCA: */ 51 48 ··· 50 53 #define SMCA_THR_LVT_OFF 0xF000 51 54 52 55 static bool thresholding_irq_en; 56 + 57 + struct mce_amd_cpu_data { 58 + mce_banks_t thr_intr_banks; 59 + mce_banks_t dfr_intr_banks; 60 + 61 + u32 thr_intr_en: 1, 62 + dfr_intr_en: 1, 63 + __resv: 30; 64 + }; 65 + 66 + static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data); 53 67 54 68 static const char * const th_names[] = { 55 69 "load_store", ··· 87 79 const struct smca_hwid *hwid; 88 80 u32 id; /* Value of MCA_IPID[InstanceId]. */ 89 81 u8 sysfs_id; /* Value used for sysfs name. */ 82 + u64 paddrv :1, /* Physical Address Valid bit in MCA_CONFIG */ 83 + __reserved :63; 90 84 }; 91 85 92 86 static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks); ··· 274 264 275 265 static void smca_configure(unsigned int bank, unsigned int cpu) 276 266 { 267 + struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data); 277 268 u8 *bank_counts = this_cpu_ptr(smca_bank_counts); 278 269 const struct smca_hwid *s_hwid; 279 270 unsigned int i, hwid_mcatype; ··· 305 294 * APIC based interrupt. First, check that no interrupt has been 306 295 * set. 307 296 */ 308 - if ((low & BIT(5)) && !((high >> 5) & 0x3)) 297 + if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) { 298 + __set_bit(bank, data->dfr_intr_banks); 309 299 high |= BIT(5); 300 + } 301 + 302 + /* 303 + * SMCA Corrected Error Interrupt 304 + * 305 + * MCA_CONFIG[IntPresent] is bit 10, and tells us if the bank can 306 + * send an MCA Thresholding interrupt without the OS initializing 307 + * this feature. This can be used if the threshold limit is managed 308 + * by the platform. 309 + * 310 + * MCA_CONFIG[IntEn] is bit 40 (8 in the high portion of the MSR). 311 + * The OS should set this to inform the platform that the OS is ready 312 + * to handle the MCA Thresholding interrupt. 313 + */ 314 + if ((low & BIT(10)) && data->thr_intr_en) { 315 + __set_bit(bank, data->thr_intr_banks); 316 + high |= BIT(8); 317 + } 310 318 311 319 this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8)); 320 + 321 + if (low & MCI_CONFIG_PADDRV) 322 + this_cpu_ptr(smca_banks)[bank].paddrv = 1; 312 323 313 324 wrmsr(smca_config, low, high); 314 325 } ··· 401 368 { 402 369 int msr = (hi & MASK_LVTOFF_HI) >> 20; 403 370 371 + /* 372 + * On SMCA CPUs, LVT offset is programmed at a different MSR, and 373 + * the BIOS provides the value. The original field where LVT offset 374 + * was set is reserved. Return early here: 375 + */ 376 + if (mce_flags.smca) 377 + return false; 378 + 404 379 if (apic < 0) { 405 380 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " 406 381 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, ··· 417 376 } 418 377 419 378 if (apic != msr) { 420 - /* 421 - * On SMCA CPUs, LVT offset is programmed at a different MSR, and 422 - * the BIOS provides the value. The original field where LVT offset 423 - * was set is reserved. Return early here: 424 - */ 425 - if (mce_flags.smca) 426 - return false; 427 - 428 379 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " 429 380 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", 430 381 b->cpu, apic, b->bank, b->block, b->address, hi, lo); ··· 476 443 wrmsr(tr->b->address, lo, hi); 477 444 } 478 445 446 + static void threshold_restart_bank(unsigned int bank, bool intr_en) 447 + { 448 + struct threshold_bank **thr_banks = this_cpu_read(threshold_banks); 449 + struct threshold_block *block, *tmp; 450 + struct thresh_restart tr; 451 + 452 + if (!thr_banks || !thr_banks[bank]) 453 + return; 454 + 455 + memset(&tr, 0, sizeof(tr)); 456 + 457 + list_for_each_entry_safe(block, tmp, &thr_banks[bank]->miscj, miscj) { 458 + tr.b = block; 459 + tr.b->interrupt_enable = intr_en; 460 + threshold_restart_block(&tr); 461 + } 462 + } 463 + 464 + /* Try to use the threshold limit reported through APEI. */ 465 + static u16 get_thr_limit(void) 466 + { 467 + u32 thr_limit = mce_get_apei_thr_limit(); 468 + 469 + /* Fallback to old default if APEI limit is not available. */ 470 + if (!thr_limit) 471 + return THRESHOLD_MAX; 472 + 473 + return min(thr_limit, THRESHOLD_MAX); 474 + } 475 + 479 476 static void mce_threshold_block_init(struct threshold_block *b, int offset) 480 477 { 481 478 struct thresh_restart tr = { ··· 514 451 .lvt_off = offset, 515 452 }; 516 453 517 - b->threshold_limit = THRESHOLD_MAX; 454 + b->threshold_limit = get_thr_limit(); 518 455 threshold_restart_block(&tr); 519 456 }; 520 457 ··· 525 462 return new; 526 463 527 464 return reserved; 528 - } 529 - 530 - static int setup_APIC_deferred_error(int reserved, int new) 531 - { 532 - if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR, 533 - APIC_EILVT_MSG_FIX, 0)) 534 - return new; 535 - 536 - return reserved; 537 - } 538 - 539 - static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) 540 - { 541 - u32 low = 0, high = 0; 542 - int def_offset = -1, def_new; 543 - 544 - if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high)) 545 - return; 546 - 547 - def_new = (low & MASK_DEF_LVTOFF) >> 4; 548 - if (!(low & MASK_DEF_LVTOFF)) { 549 - pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n"); 550 - def_new = DEF_LVT_OFF; 551 - low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4); 552 - } 553 - 554 - def_offset = setup_APIC_deferred_error(def_offset, def_new); 555 - if ((def_offset == def_new) && 556 - (deferred_error_int_vector != amd_deferred_error_interrupt)) 557 - deferred_error_int_vector = amd_deferred_error_interrupt; 558 - 559 - if (!mce_flags.smca) 560 - low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; 561 - 562 - wrmsr(MSR_CU_DEF_ERR, low, high); 563 465 } 564 466 565 467 static u32 get_block_address(u32 current_addr, u32 low, u32 high, ··· 562 534 return addr; 563 535 } 564 536 565 - static int 566 - prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, 567 - int offset, u32 misc_high) 537 + static int prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, 538 + int offset, u32 misc_high) 568 539 { 569 540 unsigned int cpu = smp_processor_id(); 570 - u32 smca_low, smca_high; 571 541 struct threshold_block b; 572 542 int new; 573 543 ··· 582 556 if (!b.interrupt_capable) 583 557 goto done; 584 558 559 + __set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks); 585 560 b.interrupt_enable = 1; 586 561 587 - if (!mce_flags.smca) { 588 - new = (misc_high & MASK_LVTOFF_HI) >> 20; 589 - goto set_offset; 590 - } 562 + if (mce_flags.smca) 563 + goto done; 591 564 592 - /* Gather LVT offset for thresholding: */ 593 - if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) 594 - goto out; 595 - 596 - new = (smca_low & SMCA_THR_LVT_OFF) >> 12; 597 - 598 - set_offset: 565 + new = (misc_high & MASK_LVTOFF_HI) >> 20; 599 566 offset = setup_APIC_mce_threshold(offset, new); 600 567 if (offset == new) 601 568 thresholding_irq_en = true; ··· 596 577 done: 597 578 mce_threshold_block_init(&b, offset); 598 579 599 - out: 600 580 return offset; 601 581 } 602 582 ··· 686 668 mce_banks[0].ctl = 0; 687 669 } 688 670 671 + /* 672 + * Enable the APIC LVT interrupt vectors once per-CPU. This should be done before hardware is 673 + * ready to send interrupts. 674 + * 675 + * Individual error sources are enabled later during per-bank init. 676 + */ 677 + static void smca_enable_interrupt_vectors(void) 678 + { 679 + struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data); 680 + u64 mca_intr_cfg, offset; 681 + 682 + if (!mce_flags.smca || !mce_flags.succor) 683 + return; 684 + 685 + if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg)) 686 + return; 687 + 688 + offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12; 689 + if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0)) 690 + data->thr_intr_en = 1; 691 + 692 + offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4; 693 + if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0)) 694 + data->dfr_intr_en = 1; 695 + } 696 + 689 697 /* cpu init entry point, called from mce.c with preempt off */ 690 698 void mce_amd_feature_init(struct cpuinfo_x86 *c) 691 699 { ··· 723 679 724 680 mce_flags.amd_threshold = 1; 725 681 682 + smca_enable_interrupt_vectors(); 683 + 726 684 for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { 727 - if (mce_flags.smca) 685 + if (mce_flags.smca) { 728 686 smca_configure(bank, cpu); 687 + 688 + if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en) 689 + continue; 690 + } 729 691 730 692 disable_err_thresholding(c, bank); 731 693 ··· 753 703 offset = prepare_threshold_block(bank, block, address, offset, high); 754 704 } 755 705 } 756 - 757 - if (mce_flags.succor) 758 - deferred_error_interrupt_enable(c); 759 706 } 760 707 761 708 void smca_bsp_init(void) ··· 795 748 } 796 749 797 750 /* 798 - * AMD systems do not have an explicit indicator that the value in MCA_ADDR is 799 - * a system physical address. Therefore, individual cases need to be detected. 800 - * Future cases and checks will be added as needed. 751 + * Some AMD systems have an explicit indicator that the value in MCA_ADDR is a 752 + * system physical address. Individual cases though, need to be detected for 753 + * other systems. Future cases will be added as needed. 801 754 * 802 755 * 1) General case 803 756 * a) Assume address is not usable. ··· 811 764 * a) Reported in legacy bank 4 with extended error code (XEC) 8. 812 765 * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore, 813 766 * this bit should not be checked. 767 + * 4) MCI_STATUS_PADDRVAL is set 768 + * a) Will provide a valid system physical address. 814 769 * 815 770 * NOTE: SMCA UMC memory errors fall into case #1. 816 771 */ ··· 826 777 return false; 827 778 } 828 779 780 + if (this_cpu_ptr(smca_banks)[m->bank].paddrv) 781 + return m->status & MCI_STATUS_PADDRV; 782 + 829 783 /* Check poison bit for all other bank types. */ 830 784 if (m->status & MCI_STATUS_POISON) 831 785 return true; 832 786 833 787 /* Assume address is not usable for all others. */ 834 788 return false; 835 - } 836 - 837 - static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) 838 - { 839 - struct mce_hw_err err; 840 - struct mce *m = &err.m; 841 - 842 - mce_prep_record(&err); 843 - 844 - m->status = status; 845 - m->misc = misc; 846 - m->bank = bank; 847 - m->tsc = rdtsc(); 848 - 849 - if (m->status & MCI_STATUS_ADDRV) { 850 - m->addr = addr; 851 - 852 - smca_extract_err_addr(m); 853 - } 854 - 855 - if (mce_flags.smca) { 856 - rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid); 857 - 858 - if (m->status & MCI_STATUS_SYNDV) { 859 - rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd); 860 - rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1); 861 - rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2); 862 - } 863 - } 864 - 865 - mce_log(&err); 866 789 } 867 790 868 791 DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) ··· 846 825 apic_eoi(); 847 826 } 848 827 849 - /* 850 - * Returns true if the logged error is deferred. False, otherwise. 851 - */ 852 - static inline bool 853 - _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) 854 - { 855 - u64 status, addr = 0; 856 - 857 - rdmsrq(msr_stat, status); 858 - if (!(status & MCI_STATUS_VAL)) 859 - return false; 860 - 861 - if (status & MCI_STATUS_ADDRV) 862 - rdmsrq(msr_addr, addr); 863 - 864 - __log_error(bank, status, addr, misc); 865 - 866 - wrmsrq(msr_stat, 0); 867 - 868 - return status & MCI_STATUS_DEFERRED; 869 - } 870 - 871 - static bool _log_error_deferred(unsigned int bank, u32 misc) 872 - { 873 - if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), 874 - mca_msr_reg(bank, MCA_ADDR), misc)) 875 - return false; 876 - 877 - /* 878 - * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers. 879 - * Return true here to avoid accessing these registers. 880 - */ 881 - if (!mce_flags.smca) 882 - return true; 883 - 884 - /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */ 885 - wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); 886 - return true; 887 - } 888 - 889 - /* 890 - * We have three scenarios for checking for Deferred errors: 891 - * 892 - * 1) Non-SMCA systems check MCA_STATUS and log error if found. 893 - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also 894 - * clear MCA_DESTAT. 895 - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and 896 - * log it. 897 - */ 898 - static void log_error_deferred(unsigned int bank) 899 - { 900 - if (_log_error_deferred(bank, 0)) 901 - return; 902 - 903 - /* 904 - * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check 905 - * for a valid error. 906 - */ 907 - _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank), 908 - MSR_AMD64_SMCA_MCx_DEADDR(bank), 0); 909 - } 910 - 911 828 /* APIC interrupt handler for deferred errors */ 912 829 static void amd_deferred_error_interrupt(void) 913 830 { 914 - unsigned int bank; 915 - 916 - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) 917 - log_error_deferred(bank); 831 + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks); 918 832 } 919 833 920 - static void log_error_thresholding(unsigned int bank, u64 misc) 834 + void mce_amd_handle_storm(unsigned int bank, bool on) 921 835 { 922 - _log_error_deferred(bank, misc); 836 + threshold_restart_bank(bank, on); 923 837 } 924 838 925 - static void log_and_reset_block(struct threshold_block *block) 839 + static void amd_reset_thr_limit(unsigned int bank) 926 840 { 927 - struct thresh_restart tr; 928 - u32 low = 0, high = 0; 929 - 930 - if (!block) 931 - return; 932 - 933 - if (rdmsr_safe(block->address, &low, &high)) 934 - return; 935 - 936 - if (!(high & MASK_OVERFLOW_HI)) 937 - return; 938 - 939 - /* Log the MCE which caused the threshold event. */ 940 - log_error_thresholding(block->bank, ((u64)high << 32) | low); 941 - 942 - /* Reset threshold block after logging error. */ 943 - memset(&tr, 0, sizeof(tr)); 944 - tr.b = block; 945 - threshold_restart_block(&tr); 841 + threshold_restart_bank(bank, true); 946 842 } 947 843 948 844 /* ··· 868 930 */ 869 931 static void amd_threshold_interrupt(void) 870 932 { 871 - struct threshold_bank **bp = this_cpu_read(threshold_banks), *thr_bank; 872 - unsigned int bank, cpu = smp_processor_id(); 873 - struct threshold_block *block, *tmp; 874 - 875 - /* 876 - * Validate that the threshold bank has been initialized already. The 877 - * handler is installed at boot time, but on a hotplug event the 878 - * interrupt might fire before the data has been initialized. 879 - */ 880 - if (!bp) 881 - return; 882 - 883 - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { 884 - if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) 885 - continue; 886 - 887 - thr_bank = bp[bank]; 888 - if (!thr_bank) 889 - continue; 890 - 891 - list_for_each_entry_safe(block, tmp, &thr_bank->miscj, miscj) 892 - log_and_reset_block(block); 893 - } 933 + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->thr_intr_banks); 894 934 } 895 935 896 936 void amd_clear_bank(struct mce *m) 897 937 { 938 + amd_reset_thr_limit(m->bank); 939 + 940 + /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */ 941 + if (m->status & MCI_STATUS_DEFERRED) 942 + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); 943 + 944 + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ 945 + if (m->kflags & MCE_CHECK_DFR_REGS) 946 + return; 947 + 898 948 mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); 899 949 } 900 950 ··· 1098 1172 b->address = address; 1099 1173 b->interrupt_enable = 0; 1100 1174 b->interrupt_capable = lvt_interrupt_supported(bank, high); 1101 - b->threshold_limit = THRESHOLD_MAX; 1175 + b->threshold_limit = get_thr_limit(); 1102 1176 1103 1177 if (b->interrupt_capable) { 1104 1178 default_attrs[2] = &interrupt_enable.attr; ··· 1108 1182 } 1109 1183 1110 1184 list_add(&b->miscj, &tb->miscj); 1185 + 1186 + mce_threshold_block_init(b, (high & MASK_LVTOFF_HI) >> 20); 1111 1187 1112 1188 err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b)); 1113 1189 if (err)
+30 -1
arch/x86/kernel/cpu/mce/core.c
··· 687 687 m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC)); 688 688 689 689 if (m->status & MCI_STATUS_ADDRV) { 690 - m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); 690 + if (m->kflags & MCE_CHECK_DFR_REGS) 691 + m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i)); 692 + else 693 + m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); 691 694 692 695 /* 693 696 * Mask the reported address by the reported granularity. ··· 718 715 DEFINE_PER_CPU(unsigned, mce_poll_count); 719 716 720 717 /* 718 + * We have three scenarios for checking for Deferred errors: 719 + * 720 + * 1) Non-SMCA systems check MCA_STATUS and log error if found. 721 + * 2) SMCA systems check MCA_STATUS. If error is found then log it and also 722 + * clear MCA_DESTAT. 723 + * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and 724 + * log it. 725 + */ 726 + static bool smca_should_log_poll_error(struct mce *m) 727 + { 728 + if (m->status & MCI_STATUS_VAL) 729 + return true; 730 + 731 + m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank)); 732 + if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) { 733 + m->kflags |= MCE_CHECK_DFR_REGS; 734 + return true; 735 + } 736 + 737 + return false; 738 + } 739 + 740 + /* 721 741 * Newer Intel systems that support software error 722 742 * recovery need to make additional checks. Other 723 743 * CPUs should skip over uncorrected errors, but log ··· 765 739 static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err) 766 740 { 767 741 struct mce *m = &err->m; 742 + 743 + if (mce_flags.smca) 744 + return smca_should_log_poll_error(m); 768 745 769 746 /* If this entry is not valid, ignore it. */ 770 747 if (!(m->status & MCI_STATUS_VAL))
+4
arch/x86/kernel/cpu/mce/internal.h
··· 67 67 void mce_inherit_storm(unsigned int bank); 68 68 bool mce_get_storm_mode(void); 69 69 void mce_set_storm_mode(bool storm); 70 + u32 mce_get_apei_thr_limit(void); 70 71 #else 71 72 static inline void cmci_storm_begin(unsigned int bank) {} 72 73 static inline void cmci_storm_end(unsigned int bank) {} ··· 75 74 static inline void mce_inherit_storm(unsigned int bank) {} 76 75 static inline bool mce_get_storm_mode(void) { return false; } 77 76 static inline void mce_set_storm_mode(bool storm) {} 77 + static inline u32 mce_get_apei_thr_limit(void) { return 0; } 78 78 #endif 79 79 80 80 /* ··· 269 267 #ifdef CONFIG_X86_MCE_AMD 270 268 void mce_threshold_create_device(unsigned int cpu); 271 269 void mce_threshold_remove_device(unsigned int cpu); 270 + void mce_amd_handle_storm(unsigned int bank, bool on); 272 271 extern bool amd_filter_mce(struct mce *m); 273 272 bool amd_mce_usable_address(struct mce *m); 274 273 void amd_clear_bank(struct mce *m); ··· 302 299 #else 303 300 static inline void mce_threshold_create_device(unsigned int cpu) { } 304 301 static inline void mce_threshold_remove_device(unsigned int cpu) { } 302 + static inline void mce_amd_handle_storm(unsigned int bank, bool on) { } 305 303 static inline bool amd_filter_mce(struct mce *m) { return false; } 306 304 static inline bool amd_mce_usable_address(struct mce *m) { return false; } 307 305 static inline void amd_clear_bank(struct mce *m) { }
+18 -1
arch/x86/kernel/cpu/mce/threshold.c
··· 13 13 14 14 #include "internal.h" 15 15 16 + static u32 mce_apei_thr_limit; 17 + 18 + void mce_save_apei_thr_limit(u32 thr_limit) 19 + { 20 + mce_apei_thr_limit = thr_limit; 21 + pr_info("HEST corrected error threshold limit: %u\n", thr_limit); 22 + } 23 + 24 + u32 mce_get_apei_thr_limit(void) 25 + { 26 + return mce_apei_thr_limit; 27 + } 28 + 16 29 static void default_threshold_interrupt(void) 17 30 { 18 31 pr_err("Unexpected threshold interrupt at vector %x\n", ··· 76 63 case X86_VENDOR_INTEL: 77 64 mce_intel_handle_storm(bank, on); 78 65 break; 66 + case X86_VENDOR_AMD: 67 + mce_amd_handle_storm(bank, on); 68 + break; 79 69 } 80 70 } 81 71 ··· 101 85 { 102 86 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 103 87 104 - __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); 88 + if (!mce_flags.amd_threshold) 89 + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); 105 90 storm->banks[bank].history = 0; 106 91 storm->banks[bank].in_storm_mode = false; 107 92