Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 page table isolation fixes from Thomas Gleixner:
"A couple of urgent fixes for PTI:

- Fix a PTE mismatch between user and kernel visible mapping of the
cpu entry area (differs vs. the GLB bit) and causes a TLB mismatch
MCE on older AMD K8 machines

- Fix the misplaced CR3 switch in the SYSCALL compat entry code which
causes access to unmapped kernel memory resulting in double faults.

- Fix the section mismatch of the cpu_tss_rw percpu storage caused by
using a different mechanism for declaration and definition.

- Two fixes for dumpstack which help to decode entry stack issues
better

- Enable PTI by default in Kconfig. We should have done that earlier,
but it slipped through the cracks.

- Exclude AMD from the PTI enforcement. Not necessarily a fix, but if
AMD is so confident that they are not affected, then we should not
burden users with the overhead"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/process: Define cpu_tss_rw in same section as declaration
x86/pti: Switch to kernel CR3 at early in entry_SYSCALL_compat()
x86/dumpstack: Print registers for first stack frame
x86/dumpstack: Fix partial register dumps
x86/pti: Make sure the user/kernel PTEs match
x86/cpu, x86/pti: Do not enable PTI on AMD processors
x86/pti: Enable PTI by default

+48 -25
+6 -7
arch/x86/entry/entry_64_compat.S
··· 190 190 /* Interrupts are off on entry. */ 191 191 swapgs 192 192 193 - /* Stash user ESP and switch to the kernel stack. */ 193 + /* Stash user ESP */ 194 194 movl %esp, %r8d 195 + 196 + /* Use %rsp as scratch reg. User ESP is stashed in r8 */ 197 + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp 198 + 199 + /* Switch to the kernel stack */ 195 200 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 196 201 197 202 /* Construct struct pt_regs on stack */ ··· 223 218 pushq $0 /* pt_regs->r13 = 0 */ 224 219 pushq $0 /* pt_regs->r14 = 0 */ 225 220 pushq $0 /* pt_regs->r15 = 0 */ 226 - 227 - /* 228 - * We just saved %rdi so it is safe to clobber. It is not 229 - * preserved during the C calls inside TRACE_IRQS_OFF anyway. 230 - */ 231 - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi 232 221 233 222 /* 234 223 * User mode is traced as though IRQs are on, and SYSENTER
+13 -4
arch/x86/include/asm/unwind.h
··· 56 56 57 57 #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) 58 58 /* 59 - * WARNING: The entire pt_regs may not be safe to dereference. In some cases, 60 - * only the iret frame registers are accessible. Use with caution! 59 + * If 'partial' returns true, only the iret frame registers are valid. 61 60 */ 62 - static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 61 + static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, 62 + bool *partial) 63 63 { 64 64 if (unwind_done(state)) 65 65 return NULL; 66 66 67 + if (partial) { 68 + #ifdef CONFIG_UNWINDER_ORC 69 + *partial = !state->full_regs; 70 + #else 71 + *partial = false; 72 + #endif 73 + } 74 + 67 75 return state->regs; 68 76 } 69 77 #else 70 - static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 78 + static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, 79 + bool *partial) 71 80 { 72 81 return NULL; 73 82 }
+2 -2
arch/x86/kernel/cpu/common.c
··· 923 923 924 924 setup_force_cpu_cap(X86_FEATURE_ALWAYS); 925 925 926 - /* Assume for now that ALL x86 CPUs are insecure */ 927 - setup_force_cpu_bug(X86_BUG_CPU_INSECURE); 926 + if (c->x86_vendor != X86_VENDOR_AMD) 927 + setup_force_cpu_bug(X86_BUG_CPU_INSECURE); 928 928 929 929 fpu__init_system(c); 930 930
+22 -9
arch/x86/kernel/dumpstack.c
··· 76 76 regs->sp, regs->flags); 77 77 } 78 78 79 - static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) 79 + static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, 80 + bool partial) 80 81 { 81 - if (on_stack(info, regs, sizeof(*regs))) 82 + /* 83 + * These on_stack() checks aren't strictly necessary: the unwind code 84 + * has already validated the 'regs' pointer. The checks are done for 85 + * ordering reasons: if the registers are on the next stack, we don't 86 + * want to print them out yet. Otherwise they'll be shown as part of 87 + * the wrong stack. Later, when show_trace_log_lvl() switches to the 88 + * next stack, this function will be called again with the same regs so 89 + * they can be printed in the right context. 90 + */ 91 + if (!partial && on_stack(info, regs, sizeof(*regs))) { 82 92 __show_regs(regs, 0); 83 - else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, 84 - IRET_FRAME_SIZE)) { 93 + 94 + } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, 95 + IRET_FRAME_SIZE)) { 85 96 /* 86 97 * When an interrupt or exception occurs in entry code, the 87 98 * full pt_regs might not have been saved yet. In that case ··· 109 98 struct stack_info stack_info = {0}; 110 99 unsigned long visit_mask = 0; 111 100 int graph_idx = 0; 101 + bool partial; 112 102 113 103 printk("%sCall Trace:\n", log_lvl); 114 104 115 105 unwind_start(&state, task, regs, stack); 116 106 stack = stack ? : get_stack_pointer(task, regs); 107 + regs = unwind_get_entry_regs(&state, &partial); 117 108 118 109 /* 119 110 * Iterate through the stacks, starting with the current stack pointer. ··· 133 120 * - hardirq stack 134 121 * - entry stack 135 122 */ 136 - for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 123 + for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 137 124 const char *stack_name; 138 125 139 126 if (get_stack_info(stack, task, &stack_info, &visit_mask)) { ··· 153 140 printk("%s <%s>\n", log_lvl, stack_name); 154 141 155 142 if (regs) 156 - show_regs_safe(&stack_info, regs); 143 + show_regs_if_on_stack(&stack_info, regs, partial); 157 144 158 145 /* 159 146 * Scan the stack, printing any text addresses we find. At the ··· 177 164 178 165 /* 179 166 * Don't print regs->ip again if it was already printed 180 - * by show_regs_safe() below. 167 + * by show_regs_if_on_stack(). 181 168 */ 182 169 if (regs && stack == &regs->ip) 183 170 goto next; ··· 212 199 unwind_next_frame(&state); 213 200 214 201 /* if the frame has entry regs, print them */ 215 - regs = unwind_get_entry_regs(&state); 202 + regs = unwind_get_entry_regs(&state, &partial); 216 203 if (regs) 217 - show_regs_safe(&stack_info, regs); 204 + show_regs_if_on_stack(&stack_info, regs, partial); 218 205 } 219 206 220 207 if (stack_name)
+1 -1
arch/x86/kernel/process.c
··· 47 47 * section. Since TSS's are completely CPU-local, we want them 48 48 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 49 49 */ 50 - __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { 50 + __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { 51 51 .x86_tss = { 52 52 /* 53 53 * .sp0 is only used when entering ring 0 from a lower
+1 -1
arch/x86/kernel/stacktrace.c
··· 102 102 for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); 103 103 unwind_next_frame(&state)) { 104 104 105 - regs = unwind_get_entry_regs(&state); 105 + regs = unwind_get_entry_regs(&state, NULL); 106 106 if (regs) { 107 107 /* 108 108 * Kernel mode registers on the stack indicate an
+2 -1
arch/x86/mm/pti.c
··· 367 367 static void __init pti_clone_entry_text(void) 368 368 { 369 369 pti_clone_pmds((unsigned long) __entry_text_start, 370 - (unsigned long) __irqentry_text_end, _PAGE_RW); 370 + (unsigned long) __irqentry_text_end, 371 + _PAGE_RW | _PAGE_GLOBAL); 371 372 } 372 373 373 374 /*
+1
security/Kconfig
··· 56 56 57 57 config PAGE_TABLE_ISOLATION 58 58 bool "Remove the kernel mapping in user mode" 59 + default y 59 60 depends on X86_64 && !UML 60 61 help 61 62 This feature reduces the number of hardware side channels by