Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'x86-traps' (trap handling from Andy Lutomirski)

Merge x86-64 iret fixes from Andy Lutomirski:
"This addresses the following issues:

- an unrecoverable double-fault triggerable with modify_ldt.
- invalid stack usage in espfix64 failed IRET recovery from IST
context.
- invalid stack usage in non-espfix64 failed IRET recovery from IST
context.

It also makes a good but IMO scary change: non-espfix64 failed IRET
will now report the correct error. Hopefully nothing depended on the
old incorrect behavior, but maybe Wine will get confused in some
obscure corner case"

* emailed patches from Andy Lutomirski <luto@amacapital.net>:
x86_64, traps: Rework bad_iret
x86_64, traps: Stop using IST for #SS
x86_64, traps: Fix the espfix64 #DF fixup and rewrite it in C

+82 -84
-1
arch/x86/include/asm/page_32_types.h
··· 20 20 #define THREAD_SIZE_ORDER 1 21 21 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 22 22 23 - #define STACKFAULT_STACK 0 24 23 #define DOUBLEFAULT_STACK 1 25 24 #define NMI_STACK 0 26 25 #define DEBUG_STACK 0
+5 -6
arch/x86/include/asm/page_64_types.h
··· 14 14 #define IRQ_STACK_ORDER 2 15 15 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) 16 16 17 - #define STACKFAULT_STACK 1 18 - #define DOUBLEFAULT_STACK 2 19 - #define NMI_STACK 3 20 - #define DEBUG_STACK 4 21 - #define MCE_STACK 5 22 - #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ 17 + #define DOUBLEFAULT_STACK 1 18 + #define NMI_STACK 2 19 + #define DEBUG_STACK 3 20 + #define MCE_STACK 4 21 + #define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ 23 22 24 23 #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) 25 24 #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+1
arch/x86/include/asm/traps.h
··· 39 39 40 40 #ifdef CONFIG_TRACING 41 41 asmlinkage void trace_page_fault(void); 42 + #define trace_stack_segment stack_segment 42 43 #define trace_divide_error divide_error 43 44 #define trace_bounds bounds 44 45 #define trace_invalid_op invalid_op
-1
arch/x86/kernel/dumpstack_64.c
··· 24 24 [ DEBUG_STACK-1 ] = "#DB", 25 25 [ NMI_STACK-1 ] = "NMI", 26 26 [ DOUBLEFAULT_STACK-1 ] = "#DF", 27 - [ STACKFAULT_STACK-1 ] = "#SS", 28 27 [ MCE_STACK-1 ] = "#MC", 29 28 #if DEBUG_STKSZ > EXCEPTION_STKSZ 30 29 [ N_EXCEPTION_STACKS ...
+22 -59
arch/x86/kernel/entry_64.S
··· 828 828 jnz native_irq_return_ldt 829 829 #endif 830 830 831 + .global native_irq_return_iret 831 832 native_irq_return_iret: 833 + /* 834 + * This may fault. Non-paranoid faults on return to userspace are 835 + * handled by fixup_bad_iret. These include #SS, #GP, and #NP. 836 + * Double-faults due to espfix64 are handled in do_double_fault. 837 + * Other faults here are fatal. 838 + */ 832 839 iretq 833 - _ASM_EXTABLE(native_irq_return_iret, bad_iret) 834 840 835 841 #ifdef CONFIG_X86_ESPFIX64 836 842 native_irq_return_ldt: ··· 863 857 popq_cfi %rax 864 858 jmp native_irq_return_iret 865 859 #endif 866 - 867 - .section .fixup,"ax" 868 - bad_iret: 869 - /* 870 - * The iret traps when the %cs or %ss being restored is bogus. 871 - * We've lost the original trap vector and error code. 872 - * #GPF is the most likely one to get for an invalid selector. 873 - * So pretend we completed the iret and took the #GPF in user mode. 874 - * 875 - * We are now running with the kernel GS after exception recovery. 876 - * But error_entry expects us to have user GS to match the user %cs, 877 - * so swap back. 878 - */ 879 - pushq $0 880 - 881 - SWAPGS 882 - jmp general_protection 883 - 884 - .previous 885 860 886 861 /* edi: workmask, edx: work */ 887 862 retint_careful: ··· 908 921 #endif 909 922 CFI_ENDPROC 910 923 END(common_interrupt) 911 - 912 - /* 913 - * If IRET takes a fault on the espfix stack, then we 914 - * end up promoting it to a doublefault. In that case, 915 - * modify the stack to make it look like we just entered 916 - * the #GP handler from user space, similar to bad_iret. 917 - */ 918 - #ifdef CONFIG_X86_ESPFIX64 919 - ALIGN 920 - __do_double_fault: 921 - XCPT_FRAME 1 RDI+8 922 - movq RSP(%rdi),%rax /* Trap on the espfix stack? */ 923 - sarq $PGDIR_SHIFT,%rax 924 - cmpl $ESPFIX_PGD_ENTRY,%eax 925 - jne do_double_fault /* No, just deliver the fault */ 926 - cmpl $__KERNEL_CS,CS(%rdi) 927 - jne do_double_fault 928 - movq RIP(%rdi),%rax 929 - cmpq $native_irq_return_iret,%rax 930 - jne do_double_fault /* This shouldn't happen... */ 931 - movq PER_CPU_VAR(kernel_stack),%rax 932 - subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ 933 - movq %rax,RSP(%rdi) 934 - movq $0,(%rax) /* Missing (lost) #GP error code */ 935 - movq $general_protection,RIP(%rdi) 936 - retq 937 - CFI_ENDPROC 938 - END(__do_double_fault) 939 - #else 940 - # define __do_double_fault do_double_fault 941 - #endif 942 924 943 925 /* 944 926 * APIC interrupts. ··· 1080 1124 idtentry bounds do_bounds has_error_code=0 1081 1125 idtentry invalid_op do_invalid_op has_error_code=0 1082 1126 idtentry device_not_available do_device_not_available has_error_code=0 1083 - idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 1127 + idtentry double_fault do_double_fault has_error_code=1 paranoid=1 1084 1128 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 1085 1129 idtentry invalid_TSS do_invalid_TSS has_error_code=1 1086 1130 idtentry segment_not_present do_segment_not_present has_error_code=1 ··· 1245 1289 1246 1290 idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1247 1291 idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1248 - idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 1292 + idtentry stack_segment do_stack_segment has_error_code=1 1249 1293 #ifdef CONFIG_XEN 1250 1294 idtentry xen_debug do_debug has_error_code=0 1251 1295 idtentry xen_int3 do_int3 has_error_code=0 ··· 1355 1399 1356 1400 /* 1357 1401 * There are two places in the kernel that can potentially fault with 1358 - * usergs. Handle them here. The exception handlers after iret run with 1359 - * kernel gs again, so don't set the user space flag. B stepping K8s 1360 - * sometimes report an truncated RIP for IRET exceptions returning to 1361 - * compat mode. Check for these here too. 1402 + * usergs. Handle them here. B stepping K8s sometimes report a 1403 + * truncated RIP for IRET exceptions returning to compat mode. Check 1404 + * for these here too. 1362 1405 */ 1363 1406 error_kernelspace: 1364 1407 CFI_REL_OFFSET rcx, RCX+8 1365 1408 incl %ebx 1366 1409 leaq native_irq_return_iret(%rip),%rcx 1367 1410 cmpq %rcx,RIP+8(%rsp) 1368 - je error_swapgs 1411 + je error_bad_iret 1369 1412 movl %ecx,%eax /* zero extend */ 1370 1413 cmpq %rax,RIP+8(%rsp) 1371 1414 je bstep_iret ··· 1375 1420 bstep_iret: 1376 1421 /* Fix truncated RIP */ 1377 1422 movq %rcx,RIP+8(%rsp) 1378 - jmp error_swapgs 1423 + /* fall through */ 1424 + 1425 + error_bad_iret: 1426 + SWAPGS 1427 + mov %rsp,%rdi 1428 + call fixup_bad_iret 1429 + mov %rax,%rsp 1430 + decl %ebx /* Return to usergs */ 1431 + jmp error_sti 1379 1432 CFI_ENDPROC 1380 1433 END(error_entry) 1381 1434
+54 -17
arch/x86/kernel/traps.c
··· 233 233 DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) 234 234 DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) 235 235 DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) 236 - #ifdef CONFIG_X86_32 237 236 DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 238 - #endif 239 237 DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 240 238 241 239 #ifdef CONFIG_X86_64 242 240 /* Runs on IST stack */ 243 - dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) 244 - { 245 - enum ctx_state prev_state; 246 - 247 - prev_state = exception_enter(); 248 - if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 249 - X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { 250 - preempt_conditional_sti(regs); 251 - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); 252 - preempt_conditional_cli(regs); 253 - } 254 - exception_exit(prev_state); 255 - } 256 - 257 241 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 258 242 { 259 243 static const char str[] = "double fault"; 260 244 struct task_struct *tsk = current; 245 + 246 + #ifdef CONFIG_X86_ESPFIX64 247 + extern unsigned char native_irq_return_iret[]; 248 + 249 + /* 250 + * If IRET takes a non-IST fault on the espfix64 stack, then we 251 + * end up promoting it to a doublefault. In that case, modify 252 + * the stack to make it look like we just entered the #GP 253 + * handler from user space, similar to bad_iret. 254 + */ 255 + if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 256 + regs->cs == __KERNEL_CS && 257 + regs->ip == (unsigned long)native_irq_return_iret) 258 + { 259 + struct pt_regs *normal_regs = task_pt_regs(current); 260 + 261 + /* Fake a #GP(0) from userspace. */ 262 + memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 263 + normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 264 + regs->ip = (unsigned long)general_protection; 265 + regs->sp = (unsigned long)&normal_regs->orig_ax; 266 + return; 267 + } 268 + #endif 261 269 262 270 exception_enter(); 263 271 /* Return not checked because double check cannot be ignored */ ··· 407 399 return regs; 408 400 } 409 401 NOKPROBE_SYMBOL(sync_regs); 402 + 403 + struct bad_iret_stack { 404 + void *error_entry_ret; 405 + struct pt_regs regs; 406 + }; 407 + 408 + asmlinkage __visible 409 + struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) 410 + { 411 + /* 412 + * This is called from entry_64.S early in handling a fault 413 + * caused by a bad iret to user mode. To handle the fault 414 + * correctly, we want move our stack frame to task_pt_regs 415 + * and we want to pretend that the exception came from the 416 + * iret target. 417 + */ 418 + struct bad_iret_stack *new_stack = 419 + container_of(task_pt_regs(current), 420 + struct bad_iret_stack, regs); 421 + 422 + /* Copy the IRET target to the new stack. */ 423 + memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); 424 + 425 + /* Copy the remainder of the stack from the current stack. */ 426 + memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); 427 + 428 + BUG_ON(!user_mode_vm(&new_stack->regs)); 429 + return new_stack; 430 + } 410 431 #endif 411 432 412 433 /* ··· 815 778 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); 816 779 set_intr_gate(X86_TRAP_TS, invalid_TSS); 817 780 set_intr_gate(X86_TRAP_NP, segment_not_present); 818 - set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); 781 + set_intr_gate(X86_TRAP_SS, stack_segment); 819 782 set_intr_gate(X86_TRAP_GP, general_protection); 820 783 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); 821 784 set_intr_gate(X86_TRAP_MF, coprocessor_error);