Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86_urgent_for_6.5_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 CFI fixes from Peter Zijlstra:
"Fix kCFI/FineIBT weaknesses

The primary bug Alyssa noticed was that with FineIBT enabled function
prologues have a spurious ENDBR instruction:

__cfi_foo:
endbr64
subl $hash, %r10d
jz 1f
ud2
nop
1:
foo:
endbr64 <--- *sadface*

This means that any indirect call that fails to target the __cfi
symbol and instead targets (the regular old) foo+0, will succeed due
to that second ENDBR.

Fixing this led to the discovery of a single indirect call that was
still doing this: ret_from_fork(). Since that's an assembly stub the
compiler would not generate the proper kCFI indirect call magic and it
would not get patched.

Brian came up with the most comprehensive fix -- convert the thing to
C with only a very thin asm wrapper. This ensures the kernel thread
boostrap is a proper kCFI call.

While discussing all this, Kees noted that kCFI hashes could/should be
poisoned to seal all functions whose address is never taken, further
limiting the valid kCFI targets -- much like we already do for IBT.

So what was a 'simple' observation and fix cascaded into a bunch of
inter-related CFI infrastructure fixes"

* tag 'x86_urgent_for_6.5_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/cfi: Only define poison_cfi() if CONFIG_X86_KERNEL_IBT=y
x86/fineibt: Poison ENDBR at +0
x86: Rewrite ret_from_fork() in C
x86/32: Remove schedule_tail_wrapper()
x86/cfi: Extend ENDBR sealing to kCFI
x86/alternative: Rename apply_ibt_endbr()
x86/cfi: Extend {JMP,CAKK}_NOSPEC comment

+120 -75
+1 -1
arch/um/kernel/um_arch.c
··· 437 437 os_check_bugs(); 438 438 } 439 439 440 - void apply_ibt_endbr(s32 *start, s32 *end) 440 + void apply_seal_endbr(s32 *start, s32 *end) 441 441 { 442 442 } 443 443
+13 -40
arch/x86/entry/entry_32.S
··· 720 720 .popsection 721 721 722 722 /* 723 - * The unwinder expects the last frame on the stack to always be at the same 724 - * offset from the end of the page, which allows it to validate the stack. 725 - * Calling schedule_tail() directly would break that convention because its an 726 - * asmlinkage function so its argument has to be pushed on the stack. This 727 - * wrapper creates a proper "end of stack" frame header before the call. 728 - */ 729 - .pushsection .text, "ax" 730 - SYM_FUNC_START(schedule_tail_wrapper) 731 - FRAME_BEGIN 732 - 733 - pushl %eax 734 - call schedule_tail 735 - popl %eax 736 - 737 - FRAME_END 738 - RET 739 - SYM_FUNC_END(schedule_tail_wrapper) 740 - .popsection 741 - 742 - /* 743 723 * A newly forked process directly context switches into this address. 744 724 * 745 725 * eax: prev task we switched from ··· 727 747 * edi: kernel thread arg 728 748 */ 729 749 .pushsection .text, "ax" 730 - SYM_CODE_START(ret_from_fork) 731 - call schedule_tail_wrapper 750 + SYM_CODE_START(ret_from_fork_asm) 751 + movl %esp, %edx /* regs */ 732 752 733 - testl %ebx, %ebx 734 - jnz 1f /* kernel threads are uncommon */ 753 + /* return address for the stack unwinder */ 754 + pushl $.Lsyscall_32_done 735 755 736 - 2: 737 - /* When we fork, we trace the syscall return in the child, too. */ 738 - movl %esp, %eax 739 - call syscall_exit_to_user_mode 740 - jmp .Lsyscall_32_done 756 + FRAME_BEGIN 757 + /* prev already in EAX */ 758 + movl %ebx, %ecx /* fn */ 759 + pushl %edi /* fn_arg */ 760 + call ret_from_fork 761 + addl $4, %esp 762 + FRAME_END 741 763 742 - /* kernel thread */ 743 - 1: movl %edi, %eax 744 - CALL_NOSPEC ebx 745 - /* 746 - * A kernel thread is allowed to return here after successfully 747 - * calling kernel_execve(). Exit to userspace to complete the execve() 748 - * syscall. 749 - */ 750 - movl $0, PT_EAX(%esp) 751 - jmp 2b 752 - SYM_CODE_END(ret_from_fork) 764 + RET 765 + SYM_CODE_END(ret_from_fork_asm) 753 766 .popsection 754 767 755 768 SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
+8 -25
arch/x86/entry/entry_64.S
··· 284 284 * r12: kernel thread arg 285 285 */ 286 286 .pushsection .text, "ax" 287 - __FUNC_ALIGN 288 - SYM_CODE_START_NOALIGN(ret_from_fork) 289 - UNWIND_HINT_END_OF_STACK 287 + SYM_CODE_START(ret_from_fork_asm) 288 + UNWIND_HINT_REGS 290 289 ANNOTATE_NOENDBR // copy_thread 291 290 CALL_DEPTH_ACCOUNT 292 - movq %rax, %rdi 293 - call schedule_tail /* rdi: 'prev' task parameter */ 294 291 295 - testq %rbx, %rbx /* from kernel_thread? */ 296 - jnz 1f /* kernel threads are uncommon */ 292 + movq %rax, %rdi /* prev */ 293 + movq %rsp, %rsi /* regs */ 294 + movq %rbx, %rdx /* fn */ 295 + movq %r12, %rcx /* fn_arg */ 296 + call ret_from_fork 297 297 298 - 2: 299 - UNWIND_HINT_REGS 300 - movq %rsp, %rdi 301 - call syscall_exit_to_user_mode /* returns with IRQs disabled */ 302 298 jmp swapgs_restore_regs_and_return_to_usermode 303 - 304 - 1: 305 - /* kernel thread */ 306 - UNWIND_HINT_END_OF_STACK 307 - movq %r12, %rdi 308 - CALL_NOSPEC rbx 309 - /* 310 - * A kernel thread is allowed to return here after successfully 311 - * calling kernel_execve(). Exit to userspace to complete the execve() 312 - * syscall. 313 - */ 314 - movq $0, RAX(%rsp) 315 - jmp 2b 316 - SYM_CODE_END(ret_from_fork) 299 + SYM_CODE_END(ret_from_fork_asm) 317 300 .popsection 318 301 319 302 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+1 -1
arch/x86/include/asm/alternative.h
··· 96 96 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 97 97 extern void apply_retpolines(s32 *start, s32 *end); 98 98 extern void apply_returns(s32 *start, s32 *end); 99 - extern void apply_ibt_endbr(s32 *start, s32 *end); 99 + extern void apply_seal_endbr(s32 *start, s32 *end); 100 100 extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, 101 101 s32 *start_cfi, s32 *end_cfi); 102 102
+1 -1
arch/x86/include/asm/ibt.h
··· 34 34 /* 35 35 * Create a dummy function pointer reference to prevent objtool from marking 36 36 * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by 37 - * apply_ibt_endbr()). 37 + * apply_seal_endbr()). 38 38 */ 39 39 #define IBT_NOSEAL(fname) \ 40 40 ".pushsection .discard.ibt_endbr_noseal\n\t" \
+4
arch/x86/include/asm/nospec-branch.h
··· 234 234 * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple 235 235 * indirect jmp/call which may be susceptible to the Spectre variant 2 236 236 * attack. 237 + * 238 + * NOTE: these do not take kCFI into account and are thus not comparable to C 239 + * indirect calls, take care when using. The target of these should be an ENDBR 240 + * instruction irrespective of kCFI. 237 241 */ 238 242 .macro JMP_NOSPEC reg:req 239 243 #ifdef CONFIG_RETPOLINE
+3 -1
arch/x86/include/asm/switch_to.h
··· 12 12 __visible struct task_struct *__switch_to(struct task_struct *prev, 13 13 struct task_struct *next); 14 14 15 - asmlinkage void ret_from_fork(void); 15 + asmlinkage void ret_from_fork_asm(void); 16 + __visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, 17 + int (*fn)(void *), void *fn_arg); 16 18 17 19 /* 18 20 * This is the structure pointed to by thread.sp for an inactive task. The
+67 -4
arch/x86/kernel/alternative.c
··· 778 778 779 779 #ifdef CONFIG_X86_KERNEL_IBT 780 780 781 + static void poison_cfi(void *addr); 782 + 781 783 static void __init_or_module poison_endbr(void *addr, bool warn) 782 784 { 783 785 u32 endbr, poison = gen_endbr_poison(); ··· 804 802 805 803 /* 806 804 * Generated by: objtool --ibt 805 + * 806 + * Seal the functions for indirect calls by clobbering the ENDBR instructions 807 + * and the kCFI hash value. 807 808 */ 808 - void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) 809 + void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) 809 810 { 810 811 s32 *s; 811 812 ··· 817 812 818 813 poison_endbr(addr, true); 819 814 if (IS_ENABLED(CONFIG_FINEIBT)) 820 - poison_endbr(addr - 16, false); 815 + poison_cfi(addr - 16); 821 816 } 822 817 } 823 818 824 819 #else 825 820 826 - void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { } 821 + void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } 827 822 828 823 #endif /* CONFIG_X86_KERNEL_IBT */ 829 824 ··· 1068 1063 return 0; 1069 1064 } 1070 1065 1066 + static void cfi_rewrite_endbr(s32 *start, s32 *end) 1067 + { 1068 + s32 *s; 1069 + 1070 + for (s = start; s < end; s++) { 1071 + void *addr = (void *)s + *s; 1072 + 1073 + poison_endbr(addr+16, false); 1074 + } 1075 + } 1076 + 1071 1077 /* .retpoline_sites */ 1072 1078 static int cfi_rand_callers(s32 *start, s32 *end) 1073 1079 { ··· 1173 1157 return; 1174 1158 1175 1159 case CFI_FINEIBT: 1160 + /* place the FineIBT preamble at func()-16 */ 1176 1161 ret = cfi_rewrite_preamble(start_cfi, end_cfi); 1177 1162 if (ret) 1178 1163 goto err; 1179 1164 1165 + /* rewrite the callers to target func()-16 */ 1180 1166 ret = cfi_rewrite_callers(start_retpoline, end_retpoline); 1181 1167 if (ret) 1182 1168 goto err; 1169 + 1170 + /* now that nobody targets func()+0, remove ENDBR there */ 1171 + cfi_rewrite_endbr(start_cfi, end_cfi); 1183 1172 1184 1173 if (builtin) 1185 1174 pr_info("Using FineIBT CFI\n"); ··· 1198 1177 pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); 1199 1178 } 1200 1179 1180 + static inline void poison_hash(void *addr) 1181 + { 1182 + *(u32 *)addr = 0; 1183 + } 1184 + 1185 + static void poison_cfi(void *addr) 1186 + { 1187 + switch (cfi_mode) { 1188 + case CFI_FINEIBT: 1189 + /* 1190 + * __cfi_\func: 1191 + * osp nopl (%rax) 1192 + * subl $0, %r10d 1193 + * jz 1f 1194 + * ud2 1195 + * 1: nop 1196 + */ 1197 + poison_endbr(addr, false); 1198 + poison_hash(addr + fineibt_preamble_hash); 1199 + break; 1200 + 1201 + case CFI_KCFI: 1202 + /* 1203 + * __cfi_\func: 1204 + * movl $0, %eax 1205 + * .skip 11, 0x90 1206 + */ 1207 + poison_hash(addr + 1); 1208 + break; 1209 + 1210 + default: 1211 + break; 1212 + } 1213 + } 1214 + 1201 1215 #else 1202 1216 1203 1217 static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 1204 1218 s32 *start_cfi, s32 *end_cfi, bool builtin) 1205 1219 { 1206 1220 } 1221 + 1222 + #ifdef CONFIG_X86_KERNEL_IBT 1223 + static void poison_cfi(void *addr) { } 1224 + #endif 1207 1225 1208 1226 #endif 1209 1227 ··· 1625 1565 */ 1626 1566 callthunks_patch_builtin_calls(); 1627 1567 1628 - apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); 1568 + /* 1569 + * Seal all functions that do not have their address taken. 1570 + */ 1571 + apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); 1629 1572 1630 1573 #ifdef CONFIG_SMP 1631 1574 /* Patch to UP if other cpus not imminent. */
+1 -1
arch/x86/kernel/module.c
··· 358 358 } 359 359 if (ibt_endbr) { 360 360 void *iseg = (void *)ibt_endbr->sh_addr; 361 - apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); 361 + apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size); 362 362 } 363 363 if (locks) { 364 364 void *lseg = (void *)locks->sh_addr;
+21 -1
arch/x86/kernel/process.c
··· 28 28 #include <linux/static_call.h> 29 29 #include <trace/events/power.h> 30 30 #include <linux/hw_breakpoint.h> 31 + #include <linux/entry-common.h> 31 32 #include <asm/cpu.h> 32 33 #include <asm/apic.h> 33 34 #include <linux/uaccess.h> ··· 135 134 return do_set_thread_area_64(p, ARCH_SET_FS, tls); 136 135 } 137 136 137 + __visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, 138 + int (*fn)(void *), void *fn_arg) 139 + { 140 + schedule_tail(prev); 141 + 142 + /* Is this a kernel thread? */ 143 + if (unlikely(fn)) { 144 + fn(fn_arg); 145 + /* 146 + * A kernel thread is allowed to return here after successfully 147 + * calling kernel_execve(). Exit to userspace to complete the 148 + * execve() syscall. 149 + */ 150 + regs->ax = 0; 151 + } 152 + 153 + syscall_exit_to_user_mode(regs); 154 + } 155 + 138 156 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) 139 157 { 140 158 unsigned long clone_flags = args->flags; ··· 169 149 frame = &fork_frame->frame; 170 150 171 151 frame->bp = encode_frame_pointer(childregs); 172 - frame->ret_addr = (unsigned long) ret_from_fork; 152 + frame->ret_addr = (unsigned long) ret_from_fork_asm; 173 153 p->thread.sp = (unsigned long) fork_frame; 174 154 p->thread.io_bitmap = NULL; 175 155 p->thread.iopl_warn = 0;