Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ras_core_for_v6.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS updates from Borislav Petkov:

- Unify and refactor the MCA arch side and better separate code

- Cleanup and simplify the AMD RAS side, unify code, drop unused stuff

* tag 'ras_core_for_v6.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mce: Add a clear_bank() helper
x86/mce: Move machine_check_poll() status checks to helper functions
x86/mce: Separate global and per-CPU quirks
x86/mce: Do 'UNKNOWN' vendor check early
x86/mce: Define BSP-only SMCA init
x86/mce: Define BSP-only init
x86/mce: Set CR4.MCE last during init
x86/mce: Remove __mcheck_cpu_init_early()
x86/mce: Cleanup bank processing on init
x86/mce/amd: Put list_head in threshold_bank
x86/mce/amd: Remove smca_banks_map
x86/mce/amd: Remove return value for mce_threshold_{create,remove}_device()
x86/mce/amd: Rename threshold restart function

+238 -283
+3 -8
arch/x86/include/asm/mce.h
··· 241 241 242 242 #ifdef CONFIG_X86_MCE 243 243 int mcheck_init(void); 244 + void mca_bsp_init(struct cpuinfo_x86 *c); 244 245 void mcheck_cpu_init(struct cpuinfo_x86 *c); 245 246 void mcheck_cpu_clear(struct cpuinfo_x86 *c); 246 247 int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, 247 248 u64 lapic_id); 248 249 #else 249 250 static inline int mcheck_init(void) { return 0; } 251 + static inline void mca_bsp_init(struct cpuinfo_x86 *c) {} 250 252 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} 251 253 static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} 252 254 static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, ··· 292 290 enum mcp_flags { 293 291 MCP_TIMESTAMP = BIT(0), /* log time stamp */ 294 292 MCP_UC = BIT(1), /* log uncorrected errors */ 295 - MCP_DONTLOG = BIT(2), /* only clear, don't log */ 296 - MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ 293 + MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */ 297 294 }; 298 295 299 296 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); ··· 372 371 373 372 extern bool amd_mce_is_memory_error(struct mce *m); 374 373 375 - extern int mce_threshold_create_device(unsigned int cpu); 376 - extern int mce_threshold_remove_device(unsigned int cpu); 377 - 378 374 void mce_amd_feature_init(struct cpuinfo_x86 *c); 379 375 enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank); 380 376 #else 381 - 382 - static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; 383 - static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; 384 377 static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; 385 378 static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } 386 379 #endif
+1
arch/x86/kernel/cpu/common.c
··· 1808 1808 setup_clear_cpu_cap(X86_FEATURE_LA57); 1809 1809 1810 1810 detect_nopl(); 1811 + mca_bsp_init(c); 1811 1812 } 1812 1813 1813 1814 void __init init_cpu_devs(void)
+73 -90
arch/x86/kernel/cpu/mce/amd.c
··· 241 241 242 242 struct threshold_bank { 243 243 struct kobject *kobj; 244 - struct threshold_block *blocks; 244 + /* List of threshold blocks within this MCA bank. */ 245 + struct list_head miscj; 245 246 }; 246 247 247 248 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); ··· 253 252 */ 254 253 static DEFINE_PER_CPU(u64, bank_map); 255 254 256 - /* Map of banks that have more than MCA_MISC0 available. */ 257 - static DEFINE_PER_CPU(u64, smca_misc_banks_map); 258 - 259 255 static void amd_threshold_interrupt(void); 260 256 static void amd_deferred_error_interrupt(void); 261 257 ··· 261 263 pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR); 262 264 } 263 265 void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; 264 - 265 - static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) 266 - { 267 - u32 low, high; 268 - 269 - /* 270 - * For SMCA enabled processors, BLKPTR field of the first MISC register 271 - * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). 272 - */ 273 - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) 274 - return; 275 - 276 - if (!(low & MCI_CONFIG_MCAX)) 277 - return; 278 - 279 - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high)) 280 - return; 281 - 282 - if (low & MASK_BLKPTR_LO) 283 - per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank); 284 - 285 - } 286 266 287 267 static void smca_configure(unsigned int bank, unsigned int cpu) 288 268 { ··· 301 325 302 326 wrmsr(smca_config, low, high); 303 327 } 304 - 305 - smca_set_misc_banks_map(bank, cpu); 306 328 307 329 if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { 308 330 pr_warn("Failed to read MCA_IPID for bank %d\n", bank); ··· 393 419 return true; 394 420 }; 395 421 396 - /* Reprogram MCx_MISC MSR behind this threshold bank. */ 397 - static void threshold_restart_bank(void *_tr) 422 + /* Reprogram MCx_MISC MSR behind this threshold block. */ 423 + static void threshold_restart_block(void *_tr) 398 424 { 399 425 struct thresh_restart *tr = _tr; 400 426 u32 hi, lo; ··· 452 478 }; 453 479 454 480 b->threshold_limit = THRESHOLD_MAX; 455 - threshold_restart_bank(&tr); 481 + threshold_restart_block(&tr); 456 482 }; 457 483 458 484 static int setup_APIC_mce_threshold(int reserved, int new) ··· 499 525 wrmsr(MSR_CU_DEF_ERR, low, high); 500 526 } 501 527 502 - static u32 smca_get_block_address(unsigned int bank, unsigned int block, 503 - unsigned int cpu) 504 - { 505 - if (!block) 506 - return MSR_AMD64_SMCA_MCx_MISC(bank); 507 - 508 - if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank))) 509 - return 0; 510 - 511 - return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); 512 - } 513 - 514 528 static u32 get_block_address(u32 current_addr, u32 low, u32 high, 515 529 unsigned int bank, unsigned int block, 516 530 unsigned int cpu) ··· 508 546 if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) 509 547 return addr; 510 548 511 - if (mce_flags.smca) 512 - return smca_get_block_address(bank, block, cpu); 549 + if (mce_flags.smca) { 550 + if (!block) 551 + return MSR_AMD64_SMCA_MCx_MISC(bank); 552 + 553 + if (!(low & MASK_BLKPTR_LO)) 554 + return 0; 555 + 556 + return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); 557 + } 513 558 514 559 /* Fall back to method we used for older processors: */ 515 560 switch (block) { ··· 646 677 wrmsrq(MSR_K7_HWCR, hwcr); 647 678 } 648 679 680 + static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c) 681 + { 682 + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); 683 + 684 + /* This should be disabled by the BIOS, but isn't always */ 685 + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { 686 + /* 687 + * disable GART TBL walk error reporting, which 688 + * trips off incorrectly with the IOMMU & 3ware 689 + * & Cerberus: 690 + */ 691 + clear_bit(10, (unsigned long *)&mce_banks[4].ctl); 692 + } 693 + 694 + /* 695 + * Various K7s with broken bank 0 around. Always disable 696 + * by default. 697 + */ 698 + if (c->x86 == 6 && this_cpu_read(mce_num_banks)) 699 + mce_banks[0].ctl = 0; 700 + } 701 + 649 702 /* cpu init entry point, called from mce.c with preempt off */ 650 703 void mce_amd_feature_init(struct cpuinfo_x86 *c) 651 704 { ··· 675 684 u32 low = 0, high = 0, address = 0; 676 685 int offset = -1; 677 686 687 + amd_apply_cpu_quirks(c); 688 + 689 + mce_flags.amd_threshold = 1; 678 690 679 691 for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { 680 692 if (mce_flags.smca) ··· 706 712 707 713 if (mce_flags.succor) 708 714 deferred_error_interrupt_enable(c); 715 + } 716 + 717 + void smca_bsp_init(void) 718 + { 719 + mce_threshold_vector = amd_threshold_interrupt; 720 + deferred_error_int_vector = amd_deferred_error_interrupt; 709 721 } 710 722 711 723 /* ··· 921 921 /* Reset threshold block after logging error. */ 922 922 memset(&tr, 0, sizeof(tr)); 923 923 tr.b = block; 924 - threshold_restart_bank(&tr); 924 + threshold_restart_block(&tr); 925 925 } 926 926 927 927 /* ··· 930 930 */ 931 931 static void amd_threshold_interrupt(void) 932 932 { 933 - struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; 934 - struct threshold_bank **bp = this_cpu_read(threshold_banks); 933 + struct threshold_bank **bp = this_cpu_read(threshold_banks), *thr_bank; 935 934 unsigned int bank, cpu = smp_processor_id(); 935 + struct threshold_block *block, *tmp; 936 936 937 937 /* 938 938 * Validate that the threshold bank has been initialized already. The ··· 946 946 if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) 947 947 continue; 948 948 949 - first_block = bp[bank]->blocks; 950 - if (!first_block) 949 + thr_bank = bp[bank]; 950 + if (!thr_bank) 951 951 continue; 952 952 953 - /* 954 - * The first block is also the head of the list. Check it first 955 - * before iterating over the rest. 956 - */ 957 - log_and_reset_block(first_block); 958 - list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj) 953 + list_for_each_entry_safe(block, tmp, &thr_bank->miscj, miscj) 959 954 log_and_reset_block(block); 960 955 } 956 + } 957 + 958 + void amd_clear_bank(struct mce *m) 959 + { 960 + mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); 961 961 } 962 962 963 963 /* ··· 995 995 memset(&tr, 0, sizeof(tr)); 996 996 tr.b = b; 997 997 998 - if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1)) 998 + if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1)) 999 999 return -ENODEV; 1000 1000 1001 1001 return size; ··· 1020 1020 b->threshold_limit = new; 1021 1021 tr.b = b; 1022 1022 1023 - if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1)) 1023 + if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1)) 1024 1024 return -ENODEV; 1025 1025 1026 1026 return size; ··· 1181 1181 default_attrs[2] = NULL; 1182 1182 } 1183 1183 1184 - INIT_LIST_HEAD(&b->miscj); 1185 - 1186 - /* This is safe as @tb is not visible yet */ 1187 - if (tb->blocks) 1188 - list_add(&b->miscj, &tb->blocks->miscj); 1189 - else 1190 - tb->blocks = b; 1184 + list_add(&b->miscj, &tb->miscj); 1191 1185 1192 1186 err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b)); 1193 1187 if (err) ··· 1232 1238 goto out_free; 1233 1239 } 1234 1240 1241 + INIT_LIST_HEAD(&b->miscj); 1242 + 1235 1243 err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC)); 1236 1244 if (err) 1237 1245 goto out_kobj; ··· 1254 1258 kfree(to_block(kobj)); 1255 1259 } 1256 1260 1257 - static void deallocate_threshold_blocks(struct threshold_bank *bank) 1261 + static void threshold_remove_bank(struct threshold_bank *bank) 1258 1262 { 1259 1263 struct threshold_block *pos, *tmp; 1260 1264 1261 - list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) { 1265 + list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) { 1262 1266 list_del(&pos->miscj); 1263 1267 kobject_put(&pos->kobj); 1264 1268 } 1265 1269 1266 - kobject_put(&bank->blocks->kobj); 1267 - } 1268 - 1269 - static void threshold_remove_bank(struct threshold_bank *bank) 1270 - { 1271 - if (!bank->blocks) 1272 - goto out_free; 1273 - 1274 - deallocate_threshold_blocks(bank); 1275 - 1276 - out_free: 1277 1270 kobject_put(bank->kobj); 1278 1271 kfree(bank); 1279 1272 } ··· 1281 1296 kfree(bp); 1282 1297 } 1283 1298 1284 - int mce_threshold_remove_device(unsigned int cpu) 1299 + void mce_threshold_remove_device(unsigned int cpu) 1285 1300 { 1286 1301 struct threshold_bank **bp = this_cpu_read(threshold_banks); 1287 1302 1288 1303 if (!bp) 1289 - return 0; 1304 + return; 1290 1305 1291 1306 /* 1292 1307 * Clear the pointer before cleaning up, so that the interrupt won't ··· 1295 1310 this_cpu_write(threshold_banks, NULL); 1296 1311 1297 1312 __threshold_remove_device(bp); 1298 - return 0; 1313 + return; 1299 1314 } 1300 1315 1301 1316 /** ··· 1309 1324 * thread running on @cpu. The callback is invoked on all CPUs which are 1310 1325 * online when the callback is installed or during a real hotplug event. 1311 1326 */ 1312 - int mce_threshold_create_device(unsigned int cpu) 1327 + void mce_threshold_create_device(unsigned int cpu) 1313 1328 { 1314 1329 unsigned int numbanks, bank; 1315 1330 struct threshold_bank **bp; 1316 - int err; 1317 1331 1318 1332 if (!mce_flags.amd_threshold) 1319 - return 0; 1333 + return; 1320 1334 1321 1335 bp = this_cpu_read(threshold_banks); 1322 1336 if (bp) 1323 - return 0; 1337 + return; 1324 1338 1325 1339 numbanks = this_cpu_read(mce_num_banks); 1326 1340 bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL); 1327 1341 if (!bp) 1328 - return -ENOMEM; 1342 + return; 1329 1343 1330 1344 for (bank = 0; bank < numbanks; ++bank) { 1331 1345 if (!(this_cpu_read(bank_map) & BIT_ULL(bank))) 1332 1346 continue; 1333 - err = threshold_create_bank(bp, cpu, bank); 1334 - if (err) { 1347 + if (threshold_create_bank(bp, cpu, bank)) { 1335 1348 __threshold_remove_device(bp); 1336 - return err; 1349 + return; 1337 1350 } 1338 1351 } 1339 1352 this_cpu_write(threshold_banks, bp); 1340 1353 1341 1354 if (thresholding_irq_en) 1342 1355 mce_threshold_vector = amd_threshold_interrupt; 1343 - return 0; 1356 + return; 1344 1357 }
+134 -185
arch/x86/kernel/cpu/mce/core.c
··· 423 423 return EAX_EDX_VAL(val, low, high); 424 424 } 425 425 426 - static noinstr void mce_wrmsrq(u32 msr, u64 v) 426 + noinstr void mce_wrmsrq(u32 msr, u64 v) 427 427 { 428 428 u32 low, high; 429 429 ··· 715 715 DEFINE_PER_CPU(unsigned, mce_poll_count); 716 716 717 717 /* 718 + * Newer Intel systems that support software error 719 + * recovery need to make additional checks. Other 720 + * CPUs should skip over uncorrected errors, but log 721 + * everything else. 722 + */ 723 + static bool ser_should_log_poll_error(struct mce *m) 724 + { 725 + /* Log "not enabled" (speculative) errors */ 726 + if (!(m->status & MCI_STATUS_EN)) 727 + return true; 728 + 729 + /* 730 + * Log UCNA (SDM: 15.6.3 "UCR Error Classification") 731 + * UC == 1 && PCC == 0 && S == 0 732 + */ 733 + if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S)) 734 + return true; 735 + 736 + return false; 737 + } 738 + 739 + static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err) 740 + { 741 + struct mce *m = &err->m; 742 + 743 + /* If this entry is not valid, ignore it. */ 744 + if (!(m->status & MCI_STATUS_VAL)) 745 + return false; 746 + 747 + /* 748 + * If we are logging everything (at CPU online) or this 749 + * is a corrected error, then we must log it. 750 + */ 751 + if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC)) 752 + return true; 753 + 754 + if (mca_cfg.ser) 755 + return ser_should_log_poll_error(m); 756 + 757 + if (m->status & MCI_STATUS_UC) 758 + return false; 759 + 760 + return true; 761 + } 762 + 763 + static void clear_bank(struct mce *m) 764 + { 765 + if (m->cpuvendor == X86_VENDOR_AMD) 766 + return amd_clear_bank(m); 767 + 768 + mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); 769 + } 770 + 771 + /* 718 772 * Poll for corrected events or events that happened before reset. 719 773 * Those are just logged through /dev/mcelog. 720 774 * ··· 819 765 if (!mca_cfg.cmci_disabled) 820 766 mce_track_storm(m); 821 767 822 - /* If this entry is not valid, ignore it */ 823 - if (!(m->status & MCI_STATUS_VAL)) 768 + /* Verify that the error should be logged based on hardware conditions. */ 769 + if (!should_log_poll_error(flags, &err)) 824 770 continue; 825 - 826 - /* 827 - * If we are logging everything (at CPU online) or this 828 - * is a corrected error, then we must log it. 829 - */ 830 - if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC)) 831 - goto log_it; 832 - 833 - /* 834 - * Newer Intel systems that support software error 835 - * recovery need to make additional checks. Other 836 - * CPUs should skip over uncorrected errors, but log 837 - * everything else. 838 - */ 839 - if (!mca_cfg.ser) { 840 - if (m->status & MCI_STATUS_UC) 841 - continue; 842 - goto log_it; 843 - } 844 - 845 - /* Log "not enabled" (speculative) errors */ 846 - if (!(m->status & MCI_STATUS_EN)) 847 - goto log_it; 848 - 849 - /* 850 - * Log UCNA (SDM: 15.6.3 "UCR Error Classification") 851 - * UC == 1 && PCC == 0 && S == 0 852 - */ 853 - if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S)) 854 - goto log_it; 855 - 856 - /* 857 - * Skip anything else. Presumption is that our read of this 858 - * bank is racing with a machine check. Leave the log alone 859 - * for do_machine_check() to deal with it. 860 - */ 861 - continue; 862 - 863 - log_it: 864 - if (flags & MCP_DONTLOG) 865 - goto clear_it; 866 771 867 772 mce_read_aux(&err, i); 868 773 m->severity = mce_severity(m, NULL, NULL, false); ··· 839 826 mce_log(&err); 840 827 841 828 clear_it: 842 - /* 843 - * Clear state for this bank. 844 - */ 845 - mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0); 829 + clear_bank(m); 846 830 } 847 831 848 832 /* ··· 1820 1810 struct mce_bank *b = &mce_banks[i]; 1821 1811 1822 1812 /* 1823 - * Init them all, __mcheck_cpu_apply_quirks() is going to apply 1824 - * the required vendor quirks before 1825 - * __mcheck_cpu_init_clear_banks() does the final bank setup. 1813 + * Init them all by default. 1814 + * 1815 + * The required vendor quirks will be applied before 1816 + * __mcheck_cpu_init_prepare_banks() does the final bank setup. 1826 1817 */ 1827 1818 b->ctl = -1ULL; 1828 1819 b->init = true; ··· 1851 1840 this_cpu_write(mce_num_banks, b); 1852 1841 1853 1842 __mcheck_cpu_mce_banks_init(); 1854 - 1855 - /* Use accurate RIP reporting if available. */ 1856 - if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) 1857 - mca_cfg.rip_msr = MSR_IA32_MCG_EIP; 1858 - 1859 - if (cap & MCG_SER_P) 1860 - mca_cfg.ser = 1; 1861 1843 } 1862 1844 1863 1845 static void __mcheck_cpu_init_generic(void) 1864 1846 { 1865 - enum mcp_flags m_fl = 0; 1866 - mce_banks_t all_banks; 1867 1847 u64 cap; 1868 - 1869 - if (!mca_cfg.bootlog) 1870 - m_fl = MCP_DONTLOG; 1871 - 1872 - /* 1873 - * Log the machine checks left over from the previous reset. Log them 1874 - * only, do not start processing them. That will happen in mcheck_late_init() 1875 - * when all consumers have been registered on the notifier chain. 1876 - */ 1877 - bitmap_fill(all_banks, MAX_NR_BANKS); 1878 - machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); 1879 - 1880 - cr4_set_bits(X86_CR4_MCE); 1881 1848 1882 1849 rdmsrq(MSR_IA32_MCG_CAP, cap); 1883 1850 if (cap & MCG_CTL_P) 1884 1851 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1885 1852 } 1886 1853 1887 - static void __mcheck_cpu_init_clear_banks(void) 1888 - { 1889 - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); 1890 - int i; 1891 - 1892 - for (i = 0; i < this_cpu_read(mce_num_banks); i++) { 1893 - struct mce_bank *b = &mce_banks[i]; 1894 - 1895 - if (!b->init) 1896 - continue; 1897 - wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl); 1898 - wrmsrq(mca_msr_reg(i, MCA_STATUS), 0); 1899 - } 1900 - } 1901 - 1902 - /* 1903 - * Do a final check to see if there are any unused/RAZ banks. 1904 - * 1905 - * This must be done after the banks have been initialized and any quirks have 1906 - * been applied. 1907 - * 1908 - * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. 1909 - * Otherwise, a user who disables a bank will not be able to re-enable it 1910 - * without a system reboot. 1911 - */ 1912 - static void __mcheck_cpu_check_banks(void) 1854 + static void __mcheck_cpu_init_prepare_banks(void) 1913 1855 { 1914 1856 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); 1915 1857 u64 msrval; 1916 1858 int i; 1917 1859 1860 + /* 1861 + * Log the machine checks left over from the previous reset. Log them 1862 + * only, do not start processing them. That will happen in mcheck_late_init() 1863 + * when all consumers have been registered on the notifier chain. 1864 + */ 1865 + if (mca_cfg.bootlog) { 1866 + mce_banks_t all_banks; 1867 + 1868 + bitmap_fill(all_banks, MAX_NR_BANKS); 1869 + machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks); 1870 + } 1871 + 1918 1872 for (i = 0; i < this_cpu_read(mce_num_banks); i++) { 1919 1873 struct mce_bank *b = &mce_banks[i]; 1920 1874 1921 1875 if (!b->init) 1922 1876 continue; 1877 + 1878 + wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl); 1879 + wrmsrq(mca_msr_reg(i, MCA_STATUS), 0); 1923 1880 1924 1881 rdmsrq(mca_msr_reg(i, MCA_CTL), msrval); 1925 1882 b->init = !!msrval; 1926 1883 } 1927 1884 } 1928 1885 1929 - static void apply_quirks_amd(struct cpuinfo_x86 *c) 1886 + static void amd_apply_global_quirks(struct cpuinfo_x86 *c) 1930 1887 { 1931 - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); 1932 - 1933 - /* This should be disabled by the BIOS, but isn't always */ 1934 - if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { 1935 - /* 1936 - * disable GART TBL walk error reporting, which 1937 - * trips off incorrectly with the IOMMU & 3ware 1938 - * & Cerberus: 1939 - */ 1940 - clear_bit(10, (unsigned long *)&mce_banks[4].ctl); 1941 - } 1942 - 1943 1888 if (c->x86 < 0x11 && mca_cfg.bootlog < 0) { 1944 1889 /* 1945 1890 * Lots of broken BIOS around that don't clear them ··· 1903 1936 */ 1904 1937 mca_cfg.bootlog = 0; 1905 1938 } 1906 - 1907 - /* 1908 - * Various K7s with broken bank 0 around. Always disable 1909 - * by default. 1910 - */ 1911 - if (c->x86 == 6 && this_cpu_read(mce_num_banks)) 1912 - mce_banks[0].ctl = 0; 1913 1939 1914 1940 /* 1915 1941 * overflow_recov is supported for F15h Models 00h-0fh ··· 1915 1955 mce_flags.zen_ifu_quirk = 1; 1916 1956 } 1917 1957 1918 - static void apply_quirks_intel(struct cpuinfo_x86 *c) 1958 + static void intel_apply_global_quirks(struct cpuinfo_x86 *c) 1919 1959 { 1920 - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); 1921 - 1922 1960 /* Older CPUs (prior to family 6) don't need quirks. */ 1923 1961 if (c->x86_vfm < INTEL_PENTIUM_PRO) 1924 1962 return; 1925 - 1926 - /* 1927 - * SDM documents that on family 6 bank 0 should not be written 1928 - * because it aliases to another special BIOS controlled 1929 - * register. 1930 - * But it's not aliased anymore on model 0x1a+ 1931 - * Don't ignore bank 0 completely because there could be a 1932 - * valid event later, merely don't write CTL0. 1933 - */ 1934 - if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks)) 1935 - mce_banks[0].init = false; 1936 1963 1937 1964 /* 1938 1965 * All newer Intel systems support MCE broadcasting. Enable ··· 1946 1999 mce_flags.skx_repmov_quirk = 1; 1947 2000 } 1948 2001 1949 - static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c) 2002 + static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c) 1950 2003 { 1951 2004 /* 1952 2005 * All newer Zhaoxin CPUs support MCE broadcasting. Enable ··· 1956 2009 if (mca_cfg.monarch_timeout < 0) 1957 2010 mca_cfg.monarch_timeout = USEC_PER_SEC; 1958 2011 } 1959 - } 1960 - 1961 - /* Add per CPU specific workarounds here */ 1962 - static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) 1963 - { 1964 - struct mca_config *cfg = &mca_cfg; 1965 - 1966 - switch (c->x86_vendor) { 1967 - case X86_VENDOR_UNKNOWN: 1968 - pr_info("unknown CPU type - not enabling MCE support\n"); 1969 - return false; 1970 - case X86_VENDOR_AMD: 1971 - apply_quirks_amd(c); 1972 - break; 1973 - case X86_VENDOR_INTEL: 1974 - apply_quirks_intel(c); 1975 - break; 1976 - case X86_VENDOR_ZHAOXIN: 1977 - apply_quirks_zhaoxin(c); 1978 - break; 1979 - } 1980 - 1981 - if (cfg->monarch_timeout < 0) 1982 - cfg->monarch_timeout = 0; 1983 - if (cfg->bootlog != 0) 1984 - cfg->panic_timeout = 30; 1985 - 1986 - return true; 1987 2012 } 1988 2013 1989 2014 static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) ··· 1977 2058 } 1978 2059 1979 2060 return false; 1980 - } 1981 - 1982 - /* 1983 - * Init basic CPU features needed for early decoding of MCEs. 1984 - */ 1985 - static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) 1986 - { 1987 - if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) { 1988 - mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV); 1989 - mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); 1990 - mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); 1991 - mce_flags.amd_threshold = 1; 1992 - } 1993 2061 } 1994 2062 1995 2063 static void mce_centaur_feature_init(struct cpuinfo_x86 *c) ··· 2187 2281 } 2188 2282 #endif 2189 2283 2284 + void mca_bsp_init(struct cpuinfo_x86 *c) 2285 + { 2286 + u64 cap; 2287 + 2288 + if (!mce_available(c)) 2289 + return; 2290 + 2291 + if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 2292 + mca_cfg.disabled = 1; 2293 + pr_info("unknown CPU type - not enabling MCE support\n"); 2294 + return; 2295 + } 2296 + 2297 + mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV); 2298 + mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR); 2299 + mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA); 2300 + 2301 + if (mce_flags.smca) 2302 + smca_bsp_init(); 2303 + 2304 + rdmsrq(MSR_IA32_MCG_CAP, cap); 2305 + 2306 + /* Use accurate RIP reporting if available. */ 2307 + if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) 2308 + mca_cfg.rip_msr = MSR_IA32_MCG_EIP; 2309 + 2310 + if (cap & MCG_SER_P) 2311 + mca_cfg.ser = 1; 2312 + 2313 + switch (c->x86_vendor) { 2314 + case X86_VENDOR_AMD: 2315 + amd_apply_global_quirks(c); 2316 + break; 2317 + case X86_VENDOR_INTEL: 2318 + intel_apply_global_quirks(c); 2319 + break; 2320 + case X86_VENDOR_ZHAOXIN: 2321 + zhaoxin_apply_global_quirks(c); 2322 + break; 2323 + } 2324 + 2325 + if (mca_cfg.monarch_timeout < 0) 2326 + mca_cfg.monarch_timeout = 0; 2327 + if (mca_cfg.bootlog != 0) 2328 + mca_cfg.panic_timeout = 30; 2329 + } 2330 + 2190 2331 /* 2191 2332 * Called for each booted CPU to set up machine checks. 2192 2333 * Must be called with preempt off: ··· 2251 2298 2252 2299 __mcheck_cpu_cap_init(); 2253 2300 2254 - if (!__mcheck_cpu_apply_quirks(c)) { 2255 - mca_cfg.disabled = 1; 2256 - return; 2257 - } 2258 - 2259 2301 if (!mce_gen_pool_init()) { 2260 2302 mca_cfg.disabled = 1; 2261 2303 pr_emerg("Couldn't allocate MCE records pool!\n"); ··· 2259 2311 2260 2312 mca_cfg.initialized = 1; 2261 2313 2262 - __mcheck_cpu_init_early(c); 2263 2314 __mcheck_cpu_init_generic(); 2264 2315 __mcheck_cpu_init_vendor(c); 2265 - __mcheck_cpu_init_clear_banks(); 2266 - __mcheck_cpu_check_banks(); 2316 + __mcheck_cpu_init_prepare_banks(); 2267 2317 __mcheck_cpu_setup_timer(); 2318 + cr4_set_bits(X86_CR4_MCE); 2268 2319 } 2269 2320 2270 2321 /* ··· 2430 2483 { 2431 2484 __mcheck_cpu_init_generic(); 2432 2485 __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); 2433 - __mcheck_cpu_init_clear_banks(); 2486 + __mcheck_cpu_init_prepare_banks(); 2487 + cr4_set_bits(X86_CR4_MCE); 2434 2488 } 2435 2489 2436 2490 static struct syscore_ops mce_syscore_ops = { ··· 2449 2501 if (!mce_available(raw_cpu_ptr(&cpu_info))) 2450 2502 return; 2451 2503 __mcheck_cpu_init_generic(); 2452 - __mcheck_cpu_init_clear_banks(); 2504 + __mcheck_cpu_init_prepare_banks(); 2453 2505 __mcheck_cpu_init_timer(); 2506 + cr4_set_bits(X86_CR4_MCE); 2454 2507 } 2455 2508 2456 2509 /* Reinit MCEs after user configuration changes */
+18
arch/x86/kernel/cpu/mce/intel.c
··· 468 468 } 469 469 } 470 470 471 + static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c) 472 + { 473 + /* 474 + * SDM documents that on family 6 bank 0 should not be written 475 + * because it aliases to another special BIOS controlled 476 + * register. 477 + * But it's not aliased anymore on model 0x1a+ 478 + * Don't ignore bank 0 completely because there could be a 479 + * valid event later, merely don't write CTL0. 480 + * 481 + * Older CPUs (prior to family 6) can't reach this point and already 482 + * return early due to the check of __mcheck_cpu_ancient_init(). 483 + */ 484 + if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks)) 485 + this_cpu_ptr(mce_banks_array)[0].init = false; 486 + } 487 + 471 488 void mce_intel_feature_init(struct cpuinfo_x86 *c) 472 489 { 490 + intel_apply_cpu_quirks(c); 473 491 intel_init_cmci(); 474 492 intel_init_lmce(); 475 493 intel_imc_init(c);
+9
arch/x86/kernel/cpu/mce/internal.h
··· 265 265 void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m); 266 266 267 267 #ifdef CONFIG_X86_MCE_AMD 268 + void mce_threshold_create_device(unsigned int cpu); 269 + void mce_threshold_remove_device(unsigned int cpu); 268 270 extern bool amd_filter_mce(struct mce *m); 269 271 bool amd_mce_usable_address(struct mce *m); 272 + void amd_clear_bank(struct mce *m); 270 273 271 274 /* 272 275 * If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits ··· 295 292 m->addr &= GENMASK_ULL(55, lsb); 296 293 } 297 294 295 + void smca_bsp_init(void); 298 296 #else 297 + static inline void mce_threshold_create_device(unsigned int cpu) { } 298 + static inline void mce_threshold_remove_device(unsigned int cpu) { } 299 299 static inline bool amd_filter_mce(struct mce *m) { return false; } 300 300 static inline bool amd_mce_usable_address(struct mce *m) { return false; } 301 + static inline void amd_clear_bank(struct mce *m) { } 301 302 static inline void smca_extract_err_addr(struct mce *m) { } 303 + static inline void smca_bsp_init(void) { } 302 304 #endif 303 305 304 306 #ifdef CONFIG_X86_ANCIENT_MCE ··· 321 313 #endif 322 314 323 315 noinstr u64 mce_rdmsrq(u32 msr); 316 + noinstr void mce_wrmsrq(u32 msr, u64 v); 324 317 325 318 static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg) 326 319 {