Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
"I hope this is the last batch of x86/urgent updates for this round:

- Remove superfluous EFI PGD range checks which lead to those
assertions failing with certain kernel configs and LLVM.

- Disable setting breakpoints on facilities involved in #DB exception
handling to avoid infinite loops.

- Add extra serialization to non-serializing MSRs (IA32_TSC_DEADLINE
and x2 APIC MSRs) to adhere to SDM's recommendation and avoid any
theoretical issues.

- Re-add the EPB MSR reading on turbostat so that it works on older
kernels which don't have the corresponding EPB sysfs file.

- Add Alder Lake to the list of CPUs which support split lock.

- Fix %dr6 register handling in order to be able to set watchpoints
with gdb again.

- Disable CET instrumentation in the kernel so that gcc doesn't add
ENDBR64 to kernel code and thus confuse tracing"

* tag 'x86_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/efi: Remove EFI PGD build time checks
x86/debug: Prevent data breakpoints on cpu_dr7
x86/debug: Prevent data breakpoints on __per_cpu_offset
x86/apic: Add extra serialization for non-serializing MSRs
tools/power/turbostat: Fallback to an MSR read for EPB
x86/split_lock: Enable the split lock feature on another Alder Lake CPU
x86/debug: Fix DR6 handling
x86/build: Disable CET instrumentation in the kernel

+87 -64
-6
Makefile
··· 949 949 # change __FILE__ to the relative path from the srctree 950 950 KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) 951 951 952 - # ensure -fcf-protection is disabled when using retpoline as it is 953 - # incompatible with -mindirect-branch=thunk-extern 954 - ifdef CONFIG_RETPOLINE 955 - KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) 956 - endif 957 - 958 952 # include additional Makefiles when needed 959 953 include-y := scripts/Makefile.extrawarn 960 954 include-$(CONFIG_KASAN) += scripts/Makefile.kasan
+3
arch/x86/Makefile
··· 120 120 121 121 KBUILD_CFLAGS += -mno-red-zone 122 122 KBUILD_CFLAGS += -mcmodel=kernel 123 + 124 + # Intel CET isn't enabled in the kernel 125 + KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) 123 126 endif 124 127 125 128 ifdef CONFIG_X86_X32
-10
arch/x86/include/asm/apic.h
··· 197 197 #endif /* !CONFIG_X86_LOCAL_APIC */ 198 198 199 199 #ifdef CONFIG_X86_X2APIC 200 - /* 201 - * Make previous memory operations globally visible before 202 - * sending the IPI through x2apic wrmsr. We need a serializing instruction or 203 - * mfence for this. 204 - */ 205 - static inline void x2apic_wrmsr_fence(void) 206 - { 207 - asm volatile("mfence" : : : "memory"); 208 - } 209 - 210 200 static inline void native_apic_msr_write(u32 reg, u32 v) 211 201 { 212 202 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
+18
arch/x86/include/asm/barrier.h
··· 84 84 85 85 #include <asm-generic/barrier.h> 86 86 87 + /* 88 + * Make previous memory operations globally visible before 89 + * a WRMSR. 90 + * 91 + * MFENCE makes writes visible, but only affects load/store 92 + * instructions. WRMSR is unfortunately not a load/store 93 + * instruction and is unaffected by MFENCE. The LFENCE ensures 94 + * that the WRMSR is not reordered. 95 + * 96 + * Most WRMSRs are full serializing instructions themselves and 97 + * do not require this barrier. This is only required for the 98 + * IA32_TSC_DEADLINE and X2APIC MSRs. 99 + */ 100 + static inline void weak_wrmsr_fence(void) 101 + { 102 + asm volatile("mfence; lfence" : : : "memory"); 103 + } 104 + 87 105 #endif /* _ASM_X86_BARRIER_H */
+4
arch/x86/kernel/apic/apic.c
··· 41 41 #include <asm/perf_event.h> 42 42 #include <asm/x86_init.h> 43 43 #include <linux/atomic.h> 44 + #include <asm/barrier.h> 44 45 #include <asm/mpspec.h> 45 46 #include <asm/i8259.h> 46 47 #include <asm/proto.h> ··· 477 476 struct clock_event_device *evt) 478 477 { 479 478 u64 tsc; 479 + 480 + /* This MSR is special and need a special fence: */ 481 + weak_wrmsr_fence(); 480 482 481 483 tsc = rdtsc(); 482 484 wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+4 -2
arch/x86/kernel/apic/x2apic_cluster.c
··· 29 29 { 30 30 u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); 31 31 32 - x2apic_wrmsr_fence(); 32 + /* x2apic MSRs are special and need a special fence: */ 33 + weak_wrmsr_fence(); 33 34 __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); 34 35 } 35 36 ··· 42 41 unsigned long flags; 43 42 u32 dest; 44 43 45 - x2apic_wrmsr_fence(); 44 + /* x2apic MSRs are special and need a special fence: */ 45 + weak_wrmsr_fence(); 46 46 local_irq_save(flags); 47 47 48 48 tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
+6 -3
arch/x86/kernel/apic/x2apic_phys.c
··· 43 43 { 44 44 u32 dest = per_cpu(x86_cpu_to_apicid, cpu); 45 45 46 - x2apic_wrmsr_fence(); 46 + /* x2apic MSRs are special and need a special fence: */ 47 + weak_wrmsr_fence(); 47 48 __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); 48 49 } 49 50 ··· 55 54 unsigned long this_cpu; 56 55 unsigned long flags; 57 56 58 - x2apic_wrmsr_fence(); 57 + /* x2apic MSRs are special and need a special fence: */ 58 + weak_wrmsr_fence(); 59 59 60 60 local_irq_save(flags); 61 61 ··· 127 125 { 128 126 unsigned long cfg = __prepare_ICR(which, vector, 0); 129 127 130 - x2apic_wrmsr_fence(); 128 + /* x2apic MSRs are special and need a special fence: */ 129 + weak_wrmsr_fence(); 131 130 native_x2apic_icr_write(cfg, 0); 132 131 } 133 132
+1
arch/x86/kernel/cpu/intel.c
··· 1159 1159 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), 1160 1160 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), 1161 1161 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), 1162 + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1), 1162 1163 {} 1163 1164 }; 1164 1165
+42 -23
arch/x86/kernel/hw_breakpoint.c
··· 269 269 CPU_ENTRY_AREA_TOTAL_SIZE)) 270 270 return true; 271 271 272 + /* 273 + * When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU 274 + * GSBASE value via __per_cpu_offset or pcpu_unit_offsets. 275 + */ 276 + #ifdef CONFIG_SMP 277 + if (within_area(addr, end, (unsigned long)__per_cpu_offset, 278 + sizeof(unsigned long) * nr_cpu_ids)) 279 + return true; 280 + #else 281 + if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets, 282 + sizeof(pcpu_unit_offsets))) 283 + return true; 284 + #endif 285 + 272 286 for_each_possible_cpu(cpu) { 273 287 /* The original rw GDT is being used after load_direct_gdt() */ 274 288 if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), ··· 306 292 if (within_area(addr, end, 307 293 (unsigned long)&per_cpu(cpu_tlbstate, cpu), 308 294 sizeof(struct tlb_state))) 295 + return true; 296 + 297 + /* 298 + * When in guest (X86_FEATURE_HYPERVISOR), local_db_save() 299 + * will read per-cpu cpu_dr7 before clear dr7 register. 300 + */ 301 + if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu), 302 + sizeof(cpu_dr7))) 309 303 return true; 310 304 } 311 305 ··· 513 491 struct perf_event *bp; 514 492 unsigned long *dr6_p; 515 493 unsigned long dr6; 494 + bool bpx; 516 495 517 496 /* The DR6 value is pointed by args->err */ 518 497 dr6_p = (unsigned long *)ERR_PTR(args->err); 519 498 dr6 = *dr6_p; 520 - 521 - /* If it's a single step, TRAP bits are random */ 522 - if (dr6 & DR_STEP) 523 - return NOTIFY_DONE; 524 499 525 500 /* Do an early return if no trap bits are set in DR6 */ 526 501 if ((dr6 & DR_TRAP_BITS) == 0) ··· 528 509 if (likely(!(dr6 & (DR_TRAP0 << i)))) 529 510 continue; 530 511 531 - /* 532 - * The counter may be concurrently released but that can only 533 - * occur from a call_rcu() path. We can then safely fetch 534 - * the breakpoint, use its callback, touch its counter 535 - * while we are in an rcu_read_lock() path. 536 - */ 537 - rcu_read_lock(); 538 - 539 512 bp = this_cpu_read(bp_per_reg[i]); 513 + if (!bp) 514 + continue; 515 + 516 + bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE; 517 + 518 + /* 519 + * TF and data breakpoints are traps and can be merged, however 520 + * instruction breakpoints are faults and will be raised 521 + * separately. 522 + * 523 + * However DR6 can indicate both TF and instruction 524 + * breakpoints. In that case take TF as that has precedence and 525 + * delay the instruction breakpoint for the next exception. 526 + */ 527 + if (bpx && (dr6 & DR_STEP)) 528 + continue; 529 + 540 530 /* 541 531 * Reset the 'i'th TRAP bit in dr6 to denote completion of 542 532 * exception handling 543 533 */ 544 534 (*dr6_p) &= ~(DR_TRAP0 << i); 545 - /* 546 - * bp can be NULL due to lazy debug register switching 547 - * or due to concurrent perf counter removing. 548 - */ 549 - if (!bp) { 550 - rcu_read_unlock(); 551 - break; 552 - } 553 535 554 536 perf_bp_event(bp, args->regs); 555 537 ··· 558 538 * Set up resume flag to avoid breakpoint recursion when 559 539 * returning back to origin. 560 540 */ 561 - if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) 541 + if (bpx) 562 542 args->regs->flags |= X86_EFLAGS_RF; 563 - 564 - rcu_read_unlock(); 565 543 } 544 + 566 545 /* 567 546 * Further processing in do_debug() is needed for a) user-space 568 547 * breakpoints (to generate signals) and b) when the system has
-19
arch/x86/platform/efi/efi_64.c
··· 115 115 pud_t *pud_k, *pud_efi; 116 116 pgd_t *efi_pgd = efi_mm.pgd; 117 117 118 - /* 119 - * We can share all PGD entries apart from the one entry that 120 - * covers the EFI runtime mapping space. 121 - * 122 - * Make sure the EFI runtime region mappings are guaranteed to 123 - * only span a single PGD entry and that the entry also maps 124 - * other important kernel regions. 125 - */ 126 - MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); 127 - MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != 128 - (EFI_VA_END & PGDIR_MASK)); 129 - 130 118 pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); 131 119 pgd_k = pgd_offset_k(PAGE_OFFSET); 132 120 133 121 num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); 134 122 memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); 135 - 136 - /* 137 - * As with PGDs, we share all P4D entries apart from the one entry 138 - * that covers the EFI runtime mapping space. 139 - */ 140 - BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END)); 141 - BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK)); 142 123 143 124 pgd_efi = efi_pgd + pgd_index(EFI_VA_END); 144 125 pgd_k = pgd_offset_k(EFI_VA_END);
+9 -1
tools/power/x86/turbostat/turbostat.c
··· 1834 1834 int get_epb(int cpu) 1835 1835 { 1836 1836 char path[128 + PATH_BYTES]; 1837 + unsigned long long msr; 1837 1838 int ret, epb = -1; 1838 1839 FILE *fp; 1839 1840 1840 1841 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); 1841 1842 1842 - fp = fopen_or_die(path, "r"); 1843 + fp = fopen(path, "r"); 1844 + if (!fp) 1845 + goto msr_fallback; 1843 1846 1844 1847 ret = fscanf(fp, "%d", &epb); 1845 1848 if (ret != 1) ··· 1851 1848 fclose(fp); 1852 1849 1853 1850 return epb; 1851 + 1852 + msr_fallback: 1853 + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); 1854 + 1855 + return msr & 0xf; 1854 1856 } 1855 1857 1856 1858 void get_apic_id(struct thread_data *t)