Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
"A handful of x86 fixes:

- a syscall ABI fix, fixing an Android breakage
- a Xen PV guest fix relating to the RTC device, causing a
non-working console
- a Xen guest syscall stack frame fix
- an MCE hotplug CPU crash fix"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/numachip: Fix NumaConnect2 MMCFG PCI access
x86/entry: Restore traditional SYSENTER calling convention
x86/entry: Fix some comments
x86/paravirt: Prevent rtc_cmos platform device init on PV guests
x86/xen: Avoid fast syscall path for Xen PV guests
x86/mce: Ensure offline CPUs don't participate in rendezvous process

+97 -33
+3 -3
arch/x86/entry/common.c
··· 421 421 regs->ip = landing_pad; 422 422 423 423 /* 424 - * Fetch ECX from where the vDSO stashed it. 424 + * Fetch EBP from where the vDSO stashed it. 425 425 * 426 426 * WARNING: We are in CONTEXT_USER and RCU isn't paying attention! 427 427 */ ··· 432 432 * Micro-optimization: the pointer we're following is explicitly 433 433 * 32 bits, so it can't be out of range. 434 434 */ 435 - __get_user(*(u32 *)&regs->cx, 435 + __get_user(*(u32 *)&regs->bp, 436 436 (u32 __user __force *)(unsigned long)(u32)regs->sp) 437 437 #else 438 - get_user(*(u32 *)&regs->cx, 438 + get_user(*(u32 *)&regs->bp, 439 439 (u32 __user __force *)(unsigned long)(u32)regs->sp) 440 440 #endif 441 441 ) {
+4 -3
arch/x86/entry/entry_32.S
··· 292 292 movl TSS_sysenter_sp0(%esp), %esp 293 293 sysenter_past_esp: 294 294 pushl $__USER_DS /* pt_regs->ss */ 295 - pushl %ecx /* pt_regs->cx */ 295 + pushl %ebp /* pt_regs->sp (stashed in bp) */ 296 296 pushfl /* pt_regs->flags (except IF = 0) */ 297 297 orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ 298 298 pushl $__USER_CS /* pt_regs->cs */ ··· 308 308 309 309 movl %esp, %eax 310 310 call do_fast_syscall_32 311 - testl %eax, %eax 312 - jz .Lsyscall_32_done 311 + /* XEN PV guests always use IRET path */ 312 + ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 313 + "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 313 314 314 315 /* Opportunistic SYSEXIT */ 315 316 TRACE_IRQS_ON /* User mode traces as IRQs on. */
+11 -9
arch/x86/entry/entry_64_compat.S
··· 63 63 64 64 /* Construct struct pt_regs on stack */ 65 65 pushq $__USER32_DS /* pt_regs->ss */ 66 - pushq %rcx /* pt_regs->sp */ 66 + pushq %rbp /* pt_regs->sp (stashed in bp) */ 67 67 68 68 /* 69 69 * Push flags. This is nasty. First, interrupts are currently ··· 82 82 pushq %rdi /* pt_regs->di */ 83 83 pushq %rsi /* pt_regs->si */ 84 84 pushq %rdx /* pt_regs->dx */ 85 - pushq %rcx /* pt_regs->cx (will be overwritten) */ 85 + pushq %rcx /* pt_regs->cx */ 86 86 pushq $-ENOSYS /* pt_regs->ax */ 87 87 pushq %r8 /* pt_regs->r8 = 0 */ 88 88 pushq %r8 /* pt_regs->r9 = 0 */ 89 89 pushq %r8 /* pt_regs->r10 = 0 */ 90 90 pushq %r8 /* pt_regs->r11 = 0 */ 91 91 pushq %rbx /* pt_regs->rbx */ 92 - pushq %rbp /* pt_regs->rbp */ 92 + pushq %rbp /* pt_regs->rbp (will be overwritten) */ 93 93 pushq %r8 /* pt_regs->r12 = 0 */ 94 94 pushq %r8 /* pt_regs->r13 = 0 */ 95 95 pushq %r8 /* pt_regs->r14 = 0 */ ··· 121 121 122 122 movq %rsp, %rdi 123 123 call do_fast_syscall_32 124 - testl %eax, %eax 125 - jz .Lsyscall_32_done 124 + /* XEN PV guests always use IRET path */ 125 + ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 126 + "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 126 127 jmp sysret32_from_system_call 127 128 128 129 sysenter_fix_flags: ··· 179 178 pushq %rdi /* pt_regs->di */ 180 179 pushq %rsi /* pt_regs->si */ 181 180 pushq %rdx /* pt_regs->dx */ 182 - pushq %rcx /* pt_regs->cx (will be overwritten) */ 181 + pushq %rbp /* pt_regs->cx (stashed in bp) */ 183 182 pushq $-ENOSYS /* pt_regs->ax */ 184 183 xorq %r8,%r8 185 184 pushq %r8 /* pt_regs->r8 = 0 */ ··· 187 186 pushq %r8 /* pt_regs->r10 = 0 */ 188 187 pushq %r8 /* pt_regs->r11 = 0 */ 189 188 pushq %rbx /* pt_regs->rbx */ 190 - pushq %rbp /* pt_regs->rbp */ 189 + pushq %rbp /* pt_regs->rbp (will be overwritten) */ 191 190 pushq %r8 /* pt_regs->r12 = 0 */ 192 191 pushq %r8 /* pt_regs->r13 = 0 */ 193 192 pushq %r8 /* pt_regs->r14 = 0 */ ··· 201 200 202 201 movq %rsp, %rdi 203 202 call do_fast_syscall_32 204 - testl %eax, %eax 205 - jz .Lsyscall_32_done 203 + /* XEN PV guests always use IRET path */ 204 + ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 205 + "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 206 206 207 207 /* Opportunistic SYSRET */ 208 208 sysret32_from_system_call:
+44 -12
arch/x86/entry/vdso/vdso32/system_call.S
··· 1 1 /* 2 - * Code for the vDSO. This version uses the old int $0x80 method. 2 + * AT_SYSINFO entry point 3 3 */ 4 4 5 5 #include <asm/dwarf2.h> ··· 21 21 /* 22 22 * Reshuffle regs so that all of any of the entry instructions 23 23 * will preserve enough state. 24 + * 25 + * A really nice entry sequence would be: 26 + * pushl %edx 27 + * pushl %ecx 28 + * movl %esp, %ecx 29 + * 30 + * Unfortunately, naughty Android versions between July and December 31 + * 2015 actually hardcode the traditional Linux SYSENTER entry 32 + * sequence. That is severely broken for a number of reasons (ask 33 + * anyone with an AMD CPU, for example). Nonetheless, we try to keep 34 + * it working approximately as well as it ever worked. 35 + * 36 + * This link may eludicate some of the history: 37 + * https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7 38 + * personally, I find it hard to understand what's going on there. 39 + * 40 + * Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE. 41 + * Execute an indirect call to the address in the AT_SYSINFO auxv 42 + * entry. That is the ONLY correct way to make a fast 32-bit system 43 + * call on Linux. (Open-coding int $0x80 is also fine, but it's 44 + * slow.) 24 45 */ 25 - pushl %edx 26 - CFI_ADJUST_CFA_OFFSET 4 27 - CFI_REL_OFFSET edx, 0 28 46 pushl %ecx 29 47 CFI_ADJUST_CFA_OFFSET 4 30 48 CFI_REL_OFFSET ecx, 0 31 - movl %esp, %ecx 49 + pushl %edx 50 + CFI_ADJUST_CFA_OFFSET 4 51 + CFI_REL_OFFSET edx, 0 52 + pushl %ebp 53 + CFI_ADJUST_CFA_OFFSET 4 54 + CFI_REL_OFFSET ebp, 0 55 + 56 + #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter" 57 + #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall" 32 58 33 59 #ifdef CONFIG_X86_64 34 60 /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ 35 - ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \ 36 - "syscall", X86_FEATURE_SYSCALL32 61 + ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \ 62 + SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 37 63 #else 38 - ALTERNATIVE "", "sysenter", X86_FEATURE_SEP 64 + ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP 39 65 #endif 40 66 41 67 /* Enter using int $0x80 */ 42 - movl (%esp), %ecx 43 68 int $0x80 44 69 GLOBAL(int80_landing_pad) 45 70 46 - /* Restore ECX and EDX in case they were clobbered. */ 47 - popl %ecx 48 - CFI_RESTORE ecx 71 + /* 72 + * Restore EDX and ECX in case they were clobbered. EBP is not 73 + * clobbered (the kernel restores it), but it's cleaner and 74 + * probably faster to pop it than to adjust ESP using addl. 75 + */ 76 + popl %ebp 77 + CFI_RESTORE ebp 49 78 CFI_ADJUST_CFA_OFFSET -4 50 79 popl %edx 51 80 CFI_RESTORE edx 81 + CFI_ADJUST_CFA_OFFSET -4 82 + popl %ecx 83 + CFI_RESTORE ecx 52 84 CFI_ADJUST_CFA_OFFSET -4 53 85 ret 54 86 CFI_ENDPROC
+1
arch/x86/include/asm/cpufeature.h
··· 216 216 #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ 217 217 #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ 218 218 #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ 219 + #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ 219 220 220 221 221 222 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+6
arch/x86/include/asm/paravirt.h
··· 19 19 return pv_info.paravirt_enabled; 20 20 } 21 21 22 + static inline int paravirt_has_feature(unsigned int feature) 23 + { 24 + WARN_ON_ONCE(!pv_info.paravirt_enabled); 25 + return (pv_info.features & feature); 26 + } 27 + 22 28 static inline void load_sp0(struct tss_struct *tss, 23 29 struct thread_struct *thread) 24 30 {
+5
arch/x86/include/asm/paravirt_types.h
··· 70 70 #endif 71 71 72 72 int paravirt_enabled; 73 + unsigned int features; /* valid only if paravirt_enabled is set */ 73 74 const char *name; 74 75 }; 76 + 77 + #define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x) 78 + /* Supported features */ 79 + #define PV_SUPPORTED_RTC (1<<0) 75 80 76 81 struct pv_init_ops { 77 82 /*
+1
arch/x86/include/asm/processor.h
··· 472 472 #else 473 473 #define __cpuid native_cpuid 474 474 #define paravirt_enabled() 0 475 + #define paravirt_has(x) 0 475 476 476 477 static inline void load_sp0(struct tss_struct *tss, 477 478 struct thread_struct *thread)
+1 -4
arch/x86/kernel/apic/apic_numachip.c
··· 193 193 case 1: 194 194 init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE); 195 195 numachip_apic_icr_write = numachip1_apic_icr_write; 196 - x86_init.pci.arch_init = pci_numachip_init; 197 196 break; 198 197 case 2: 199 198 init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE); 200 199 numachip_apic_icr_write = numachip2_apic_icr_write; 201 - 202 - /* Use MCFG config cycles rather than locked CF8 cycles */ 203 - raw_pci_ops = &pci_mmcfg; 204 200 break; 205 201 default: 206 202 return 0; 207 203 } 208 204 209 205 x86_cpuinit.fixup_cpu_id = fixup_cpu_id; 206 + x86_init.pci.arch_init = pci_numachip_init; 210 207 211 208 return 0; 212 209 }
+11
arch/x86/kernel/cpu/mcheck/mce.c
··· 999 999 int flags = MF_ACTION_REQUIRED; 1000 1000 int lmce = 0; 1001 1001 1002 + /* If this CPU is offline, just bail out. */ 1003 + if (cpu_is_offline(smp_processor_id())) { 1004 + u64 mcgstatus; 1005 + 1006 + mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 1007 + if (mcgstatus & MCG_STATUS_RIPV) { 1008 + mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); 1009 + return; 1010 + } 1011 + } 1012 + 1002 1013 ist_enter(regs); 1003 1014 1004 1015 this_cpu_inc(mce_exception_count);
+3
arch/x86/kernel/rtc.c
··· 200 200 } 201 201 #endif 202 202 203 + if (paravirt_enabled() && !paravirt_has(RTC)) 204 + return -ENODEV; 205 + 203 206 platform_device_register(&rtc_device); 204 207 dev_info(&rtc_device.dev, 205 208 "registered platform RTC device (no PNP device found)\n");
+1
arch/x86/lguest/boot.c
··· 1414 1414 pv_info.kernel_rpl = 1; 1415 1415 /* Everyone except Xen runs with this set. */ 1416 1416 pv_info.shared_kernel_pmd = 1; 1417 + pv_info.features = 0; 1417 1418 1418 1419 /* 1419 1420 * We set up all the lguest overrides for sensitive operations. These
+6 -2
arch/x86/xen/enlighten.c
··· 1192 1192 #ifdef CONFIG_X86_64 1193 1193 .extra_user_64bit_cs = FLAT_USER_CS64, 1194 1194 #endif 1195 - 1195 + .features = 0, 1196 1196 .name = "Xen", 1197 1197 }; 1198 1198 ··· 1535 1535 1536 1536 /* Install Xen paravirt ops */ 1537 1537 pv_info = xen_info; 1538 + if (xen_initial_domain()) 1539 + pv_info.features |= PV_SUPPORTED_RTC; 1538 1540 pv_init_ops = xen_init_ops; 1539 1541 pv_apic_ops = xen_apic_ops; 1540 1542 if (!xen_pvh_domain()) { ··· 1888 1886 1889 1887 static void xen_set_cpu_features(struct cpuinfo_x86 *c) 1890 1888 { 1891 - if (xen_pv_domain()) 1889 + if (xen_pv_domain()) { 1892 1890 clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); 1891 + set_cpu_cap(c, X86_FEATURE_XENPV); 1892 + } 1893 1893 } 1894 1894 1895 1895 const struct hypervisor_x86 x86_hyper_xen = {