Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86_urgent_for_v5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
"A bunch of x86/urgent stuff accumulated for the last two weeks so
lemme unload it to you.

It should be all totally risk-free, of course. :-)

- Fix out-of-spec hardware (1st gen Hygon) which does not implement
MSR_AMD64_SEV even though the spec clearly states so, and check
CPUID bits first.

- Send only one signal to a task when it is a SEGV_PKUERR si_code
type.

- Do away with all the wankery of reserving X amount of memory in the
first megabyte to prevent BIOS corrupting it and simply and
unconditionally reserve the whole first megabyte.

- Make alternatives NOP optimization work at an arbitrary position
within the patched sequence because the compiler can put
single-byte NOPs for alignment anywhere in the sequence (32-bit
retpoline), vs our previous assumption that the NOPs are only
appended.

- Force-disable ENQCMD[S] instructions support and remove
update_pasid() because of insufficient protection against FPU state
modification in an interrupt context, among other xstate horrors
which are being addressed at the moment. This one limits the
fallout until proper enablement.

- Use cpu_feature_enabled() in the idxd driver so that it can be
build-time disabled through the defines in disabled-features.h.

- Fix LVT thermal setup for SMI delivery mode by making sure the APIC
LVT value is read before APIC initialization so that softlockups
during boot do not happen at least on one machine.

- Mark all legacy interrupts as legacy vectors when the IO-APIC is
disabled and when all legacy interrupts are routed through the PIC"

* tag 'x86_urgent_for_v5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/sev: Check SME/SEV support in CPUID first
x86/fault: Don't send SIGSEGV twice on SEGV_PKUERR
x86/setup: Always reserve the first 1M of RAM
x86/alternative: Optimize single-byte NOPs at an arbitrary position
x86/cpufeatures: Force disable X86_FEATURE_ENQCMD and remove update_pasid()
dmaengine: idxd: Use cpu_feature_enabled()
x86/thermal: Fix LVT thermal setup for SMI delivery mode
x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing

+145 -126
+1
arch/x86/include/asm/apic.h
··· 174 174 extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); 175 175 extern void lapic_assign_system_vectors(void); 176 176 extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); 177 + extern void lapic_update_legacy_vectors(void); 177 178 extern void lapic_online(void); 178 179 extern void lapic_offline(void); 179 180 extern bool apic_needs_pit(void);
+2 -5
arch/x86/include/asm/disabled-features.h
··· 56 56 # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) 57 57 #endif 58 58 59 - #ifdef CONFIG_IOMMU_SUPPORT 60 - # define DISABLE_ENQCMD 0 61 - #else 62 - # define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) 63 - #endif 59 + /* Force disable because it's broken beyond repair */ 60 + #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) 64 61 65 62 #ifdef CONFIG_X86_SGX 66 63 # define DISABLE_SGX 0
+1 -5
arch/x86/include/asm/fpu/api.h
··· 106 106 */ 107 107 #define PASID_DISABLED 0 108 108 109 - #ifdef CONFIG_IOMMU_SUPPORT 110 - /* Update current's PASID MSR/state by mm's PASID. */ 111 - void update_pasid(void); 112 - #else 113 109 static inline void update_pasid(void) { } 114 - #endif 110 + 115 111 #endif /* _ASM_X86_FPU_API_H */
-7
arch/x86/include/asm/fpu/internal.h
··· 584 584 pkru_val = pk->pkru; 585 585 } 586 586 __write_pkru(pkru_val); 587 - 588 - /* 589 - * Expensive PASID MSR write will be avoided in update_pasid() because 590 - * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated 591 - * unless it's different from mm->pasid to reduce overhead. 592 - */ 593 - update_pasid(); 594 587 } 595 588 596 589 #endif /* _ASM_X86_FPU_INTERNAL_H */
+3 -1
arch/x86/include/asm/thermal.h
··· 3 3 #define _ASM_X86_THERMAL_H 4 4 5 5 #ifdef CONFIG_X86_THERMAL_VECTOR 6 + void therm_lvt_init(void); 6 7 void intel_init_thermal(struct cpuinfo_x86 *c); 7 8 bool x86_thermal_enabled(void); 8 9 void intel_thermal_interrupt(void); 9 10 #else 10 - static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } 11 + static inline void therm_lvt_init(void) { } 12 + static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } 11 13 #endif 12 14 13 15 #endif /* _ASM_X86_THERMAL_H */
+46 -18
arch/x86/kernel/alternative.c
··· 183 183 } 184 184 185 185 /* 186 + * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) 187 + * 188 + * @instr: instruction byte stream 189 + * @instrlen: length of the above 190 + * @off: offset within @instr where the first NOP has been detected 191 + * 192 + * Return: number of NOPs found (and replaced). 193 + */ 194 + static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) 195 + { 196 + unsigned long flags; 197 + int i = off, nnops; 198 + 199 + while (i < instrlen) { 200 + if (instr[i] != 0x90) 201 + break; 202 + 203 + i++; 204 + } 205 + 206 + nnops = i - off; 207 + 208 + if (nnops <= 1) 209 + return nnops; 210 + 211 + local_irq_save(flags); 212 + add_nops(instr + off, nnops); 213 + local_irq_restore(flags); 214 + 215 + DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); 216 + 217 + return nnops; 218 + } 219 + 220 + /* 186 221 * "noinline" to cause control flow change and thus invalidate I$ and 187 222 * cause refetch after modification. 188 223 */ 189 224 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) 190 225 { 191 - unsigned long flags; 192 226 struct insn insn; 193 - int nop, i = 0; 227 + int i = 0; 194 228 195 229 /* 196 - * Jump over the non-NOP insns, the remaining bytes must be single-byte 197 - * NOPs, optimize them. 230 + * Jump over the non-NOP insns and optimize single-byte NOPs into bigger 231 + * ones. 198 232 */ 199 233 for (;;) { 200 234 if (insn_decode_kernel(&insn, &instr[i])) 201 235 return; 202 236 237 + /* 238 + * See if this and any potentially following NOPs can be 239 + * optimized. 240 + */ 203 241 if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) 204 - break; 242 + i += optimize_nops_range(instr, a->instrlen, i); 243 + else 244 + i += insn.length; 205 245 206 - if ((i += insn.length) >= a->instrlen) 246 + if (i >= a->instrlen) 207 247 return; 208 248 } 209 - 210 - for (nop = i; i < a->instrlen; i++) { 211 - if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i])) 212 - return; 213 - } 214 - 215 - local_irq_save(flags); 216 - add_nops(instr + nop, i - nop); 217 - local_irq_restore(flags); 218 - 219 - DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", 220 - instr, nop, a->instrlen); 221 249 } 222 250 223 251 /*
+1
arch/x86/kernel/apic/apic.c
··· 2604 2604 end_local_APIC_setup(); 2605 2605 irq_remap_enable_fault_handling(); 2606 2606 setup_IO_APIC(); 2607 + lapic_update_legacy_vectors(); 2607 2608 } 2608 2609 2609 2610 #ifdef CONFIG_UP_LATE_INIT
+20
arch/x86/kernel/apic/vector.c
··· 738 738 irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); 739 739 } 740 740 741 + void __init lapic_update_legacy_vectors(void) 742 + { 743 + unsigned int i; 744 + 745 + if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0) 746 + return; 747 + 748 + /* 749 + * If the IO/APIC is disabled via config, kernel command line or 750 + * lack of enumeration then all legacy interrupts are routed 751 + * through the PIC. Make sure that they are marked as legacy 752 + * vectors. PIC_CASCADE_IRQ has already been marked in 753 + * lapic_assign_system_vectors(). 754 + */ 755 + for (i = 0; i < nr_legacy_irqs(); i++) { 756 + if (i != PIC_CASCADE_IR) 757 + lapic_assign_legacy_vector(i, true); 758 + } 759 + } 760 + 741 761 void __init lapic_assign_system_vectors(void) 742 762 { 743 763 unsigned int i, vector = 0;
-57
arch/x86/kernel/fpu/xstate.c
··· 1402 1402 return 0; 1403 1403 } 1404 1404 #endif /* CONFIG_PROC_PID_ARCH_STATUS */ 1405 - 1406 - #ifdef CONFIG_IOMMU_SUPPORT 1407 - void update_pasid(void) 1408 - { 1409 - u64 pasid_state; 1410 - u32 pasid; 1411 - 1412 - if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) 1413 - return; 1414 - 1415 - if (!current->mm) 1416 - return; 1417 - 1418 - pasid = READ_ONCE(current->mm->pasid); 1419 - /* Set the valid bit in the PASID MSR/state only for valid pasid. */ 1420 - pasid_state = pasid == PASID_DISABLED ? 1421 - pasid : pasid | MSR_IA32_PASID_VALID; 1422 - 1423 - /* 1424 - * No need to hold fregs_lock() since the task's fpstate won't 1425 - * be changed by others (e.g. ptrace) while the task is being 1426 - * switched to or is in IPI. 1427 - */ 1428 - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { 1429 - /* The MSR is active and can be directly updated. */ 1430 - wrmsrl(MSR_IA32_PASID, pasid_state); 1431 - } else { 1432 - struct fpu *fpu = &current->thread.fpu; 1433 - struct ia32_pasid_state *ppasid_state; 1434 - struct xregs_state *xsave; 1435 - 1436 - /* 1437 - * The CPU's xstate registers are not currently active. Just 1438 - * update the PASID state in the memory buffer here. The 1439 - * PASID MSR will be loaded when returning to user mode. 1440 - */ 1441 - xsave = &fpu->state.xsave; 1442 - xsave->header.xfeatures |= XFEATURE_MASK_PASID; 1443 - ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID); 1444 - /* 1445 - * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state 1446 - * won't be NULL and no need to check its value. 1447 - * 1448 - * Only update the task's PASID state when it's different 1449 - * from the mm's pasid. 1450 - */ 1451 - if (ppasid_state->pasid != pasid_state) { 1452 - /* 1453 - * Invalid fpregs so that state restoring will pick up 1454 - * the PASID state. 1455 - */ 1456 - __fpu_invalidate_fpregs_state(fpu); 1457 - ppasid_state->pasid = pasid_state; 1458 - } 1459 - } 1460 - } 1461 - #endif /* CONFIG_IOMMU_SUPPORT */
+30 -14
arch/x86/kernel/setup.c
··· 44 44 #include <asm/pci-direct.h> 45 45 #include <asm/prom.h> 46 46 #include <asm/proto.h> 47 + #include <asm/thermal.h> 47 48 #include <asm/unwind.h> 48 49 #include <asm/vsyscall.h> 49 50 #include <linux/vmalloc.h> ··· 638 637 * them from accessing certain memory ranges, namely anything below 639 638 * 1M and in the pages listed in bad_pages[] above. 640 639 * 641 - * To avoid these pages being ever accessed by SNB gfx devices 642 - * reserve all memory below the 1 MB mark and bad_pages that have 643 - * not already been reserved at boot time. 640 + * To avoid these pages being ever accessed by SNB gfx devices reserve 641 + * bad_pages that have not already been reserved at boot time. 642 + * All memory below the 1 MB mark is anyway reserved later during 643 + * setup_arch(), so there is no need to reserve it here. 644 644 */ 645 - memblock_reserve(0, 1<<20); 646 645 647 646 for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { 648 647 if (memblock_reserve(bad_pages[i], PAGE_SIZE)) ··· 734 733 * The first 4Kb of memory is a BIOS owned area, but generally it is 735 734 * not listed as such in the E820 table. 736 735 * 737 - * Reserve the first memory page and typically some additional 738 - * memory (64KiB by default) since some BIOSes are known to corrupt 739 - * low memory. See the Kconfig help text for X86_RESERVE_LOW. 736 + * Reserve the first 64K of memory since some BIOSes are known to 737 + * corrupt low memory. After the real mode trampoline is allocated the 738 + * rest of the memory below 640k is reserved. 740 739 * 741 740 * In addition, make sure page 0 is always reserved because on 742 741 * systems with L1TF its contents can be leaked to user processes. 743 742 */ 744 - memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); 743 + memblock_reserve(0, SZ_64K); 745 744 746 745 early_reserve_initrd(); 747 746 ··· 752 751 753 752 reserve_ibft_region(); 754 753 reserve_bios_regions(); 754 + trim_snb_memory(); 755 755 } 756 756 757 757 /* ··· 1083 1081 (max_pfn_mapped<<PAGE_SHIFT) - 1); 1084 1082 #endif 1085 1083 1086 - reserve_real_mode(); 1087 - 1088 1084 /* 1089 - * Reserving memory causing GPU hangs on Sandy Bridge integrated 1090 - * graphics devices should be done after we allocated memory under 1091 - * 1M for the real mode trampoline. 1085 + * Find free memory for the real mode trampoline and place it 1086 + * there. 1087 + * If there is not enough free memory under 1M, on EFI-enabled 1088 + * systems there will be additional attempt to reclaim the memory 1089 + * for the real mode trampoline at efi_free_boot_services(). 1090 + * 1091 + * Unconditionally reserve the entire first 1M of RAM because 1092 + * BIOSes are know to corrupt low memory and several 1093 + * hundred kilobytes are not worth complex detection what memory gets 1094 + * clobbered. Moreover, on machines with SandyBridge graphics or in 1095 + * setups that use crashkernel the entire 1M is reserved anyway. 1092 1096 */ 1093 - trim_snb_memory(); 1097 + reserve_real_mode(); 1094 1098 1095 1099 init_mem_mapping(); 1096 1100 ··· 1233 1225 x86_init.oem.banner(); 1234 1226 1235 1227 x86_init.timers.wallclock_init(); 1228 + 1229 + /* 1230 + * This needs to run before setup_local_APIC() which soft-disables the 1231 + * local APIC temporarily and that masks the thermal LVT interrupt, 1232 + * leading to softlockups on machines which have configured SMI 1233 + * interrupt delivery. 1234 + */ 1235 + therm_lvt_init(); 1236 1236 1237 1237 mcheck_init(); 1238 1238
+2 -2
arch/x86/mm/fault.c
··· 836 836 837 837 if (si_code == SEGV_PKUERR) 838 838 force_sig_pkuerr((void __user *)address, pkey); 839 - 840 - force_sig_fault(SIGSEGV, si_code, (void __user *)address); 839 + else 840 + force_sig_fault(SIGSEGV, si_code, (void __user *)address); 841 841 842 842 local_irq_disable(); 843 843 }
+6 -5
arch/x86/mm/mem_encrypt_identity.c
··· 504 504 #define AMD_SME_BIT BIT(0) 505 505 #define AMD_SEV_BIT BIT(1) 506 506 507 - /* Check the SEV MSR whether SEV or SME is enabled */ 508 - sev_status = __rdmsr(MSR_AMD64_SEV); 509 - feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; 510 - 511 507 /* 512 508 * Check for the SME/SEV feature: 513 509 * CPUID Fn8000_001F[EAX] ··· 515 519 eax = 0x8000001f; 516 520 ecx = 0; 517 521 native_cpuid(&eax, &ebx, &ecx, &edx); 518 - if (!(eax & feature_mask)) 522 + /* Check whether SEV or SME is supported */ 523 + if (!(eax & (AMD_SEV_BIT | AMD_SME_BIT))) 519 524 return; 520 525 521 526 me_mask = 1UL << (ebx & 0x3f); 527 + 528 + /* Check the SEV MSR whether SEV or SME is enabled */ 529 + sev_status = __rdmsr(MSR_AMD64_SEV); 530 + feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; 522 531 523 532 /* Check if memory encryption is enabled */ 524 533 if (feature_mask == AMD_SME_BIT) {
+12
arch/x86/platform/efi/quirks.c
··· 450 450 size -= rm_size; 451 451 } 452 452 453 + /* 454 + * Don't free memory under 1M for two reasons: 455 + * - BIOS might clobber it 456 + * - Crash kernel needs it to be reserved 457 + */ 458 + if (start + size < SZ_1M) 459 + continue; 460 + if (start < SZ_1M) { 461 + size -= (SZ_1M - start); 462 + start = SZ_1M; 463 + } 464 + 453 465 memblock_free_late(start, size); 454 466 } 455 467
+8 -6
arch/x86/realmode/init.c
··· 29 29 30 30 /* Has to be under 1M so we can execute real-mode AP code. */ 31 31 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); 32 - if (!mem) { 32 + if (!mem) 33 33 pr_info("No sub-1M memory is available for the trampoline\n"); 34 - return; 35 - } 34 + else 35 + set_real_mode_mem(mem); 36 36 37 - memblock_reserve(mem, size); 38 - set_real_mode_mem(mem); 39 - crash_reserve_low_1M(); 37 + /* 38 + * Unconditionally reserve the entire fisrt 1M, see comment in 39 + * setup_arch(). 40 + */ 41 + memblock_reserve(0, SZ_1M); 40 42 } 41 43 42 44 static void sme_sev_setup_real_mode(struct trampoline_header *th)
+2 -2
drivers/dma/idxd/init.c
··· 745 745 * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in 746 746 * enumerating the device. We can not utilize it. 747 747 */ 748 - if (!boot_cpu_has(X86_FEATURE_MOVDIR64B)) { 748 + if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) { 749 749 pr_warn("idxd driver failed to load without MOVDIR64B.\n"); 750 750 return -ENODEV; 751 751 } 752 752 753 - if (!boot_cpu_has(X86_FEATURE_ENQCMD)) 753 + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) 754 754 pr_warn("Platform does not have ENQCMD(S) support.\n"); 755 755 else 756 756 support_enqcmd = true;
+11 -4
drivers/thermal/intel/therm_throt.c
··· 621 621 return atomic_read(&therm_throt_en); 622 622 } 623 623 624 + void __init therm_lvt_init(void) 625 + { 626 + /* 627 + * This function is only called on boot CPU. Save the init thermal 628 + * LVT value on BSP and use that value to restore APs' thermal LVT 629 + * entry BIOS programmed later 630 + */ 631 + if (intel_thermal_supported(&boot_cpu_data)) 632 + lvtthmr_init = apic_read(APIC_LVTTHMR); 633 + } 634 + 624 635 void intel_init_thermal(struct cpuinfo_x86 *c) 625 636 { 626 637 unsigned int cpu = smp_processor_id(); ··· 640 629 641 630 if (!intel_thermal_supported(c)) 642 631 return; 643 - 644 - /* On the BSP? */ 645 - if (c == &boot_cpu_data) 646 - lvtthmr_init = apic_read(APIC_LVTTHMR); 647 632 648 633 /* 649 634 * First check if its enabled already, in which case there might