Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86_core_for_v6.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull more x86 updates from Borislav Petkov:

- Remove a bunch of asm implementing condition flags testing in KVM's
emulator in favor of int3_emulate_jcc() which is written in C

- Replace KVM fastops with C-based stubs which avoids problems with the
fastop infra related to latter not adhering to the C ABI due to their
special calling convention and, more importantly, bypassing compiler
control-flow integrity checking because they're written in asm

- Remove wrongly used static branches and other ugliness accumulated
over time in hyperv's hypercall implementation with a proper static
function call to the correct hypervisor call variant

- Add some fixes and modifications to allow running FRED-enabled
kernels in KVM even on non-FRED hardware

- Add kCFI improvements like validating indirect calls and prepare for
enabling kCFI with GCC. Add cmdline params documentation and other
code cleanups

- Use the single-byte 0xd6 insn as the official #UD single-byte
undefined opcode instruction as agreed upon by both x86 vendors

- Other smaller cleanups and touchups all over the place

* tag 'x86_core_for_v6.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
x86,retpoline: Optimize patch_retpoline()
x86,ibt: Use UDB instead of 0xEA
x86/cfi: Remove __noinitretpoline and __noretpoline
x86/cfi: Add "debug" option to "cfi=" bootparam
x86/cfi: Standardize on common "CFI:" prefix for CFI reports
x86/cfi: Document the "cfi=" bootparam options
x86/traps: Clarify KCFI instruction layout
compiler_types.h: Move __nocfi out of compiler-specific header
objtool: Validate kCFI calls
x86/fred: KVM: VMX: Always use FRED for IRQs when CONFIG_X86_FRED=y
x86/fred: Play nice with invoking asm_fred_entry_from_kvm() on non-FRED hardware
x86/fred: Install system vector handlers even if FRED isn't fully enabled
x86/hyperv: Use direct call to hypercall-page
x86/hyperv: Clean up hv_do_hypercall()
KVM: x86: Remove fastops
KVM: x86: Convert em_salc() to C
KVM: x86: Introduce EM_ASM_3WCL
KVM: x86: Introduce EM_ASM_1SRC2
KVM: x86: Introduce EM_ASM_2CL
KVM: x86: Introduce EM_ASM_2W
...

+707 -644
+18
Documentation/admin-guide/kernel-parameters.txt
··· 608 608 ccw_timeout_log [S390] 609 609 See Documentation/arch/s390/common_io.rst for details. 610 610 611 + cfi= [X86-64] Set Control Flow Integrity checking features 612 + when CONFIG_FINEIBT is enabled. 613 + Format: feature[,feature...] 614 + Default: auto 615 + 616 + auto: Use FineIBT if IBT available, otherwise kCFI. 617 + Under FineIBT, enable "paranoid" mode when 618 + FRED is not available. 619 + off: Turn off CFI checking. 620 + kcfi: Use kCFI (disable FineIBT). 621 + fineibt: Use FineIBT (even if IBT not available). 622 + norand: Do not re-randomize CFI hashes. 623 + paranoid: Add caller hash checking under FineIBT. 624 + bhi: Enable register poisoning to stop speculation 625 + across FineIBT. (Disabled by default.) 626 + warn: Do not enforce CFI checking: warn only. 627 + debug: Report CFI initialization details. 628 + 611 629 cgroup_disable= [KNL] Disable a particular controller or optional feature 612 630 Format: {name of the controller(s) or feature(s) to disable} 613 631 The effects of cgroup_disable=foo are:
+5 -6
arch/x86/entry/calling.h
··· 99 99 .endif 100 100 .endm 101 101 102 - .macro CLEAR_REGS clear_bp=1 102 + .macro CLEAR_REGS clear_callee=1 103 103 /* 104 104 * Sanitize registers of values that a speculation attack might 105 105 * otherwise want to exploit. The lower registers are likely clobbered ··· 113 113 xorl %r9d, %r9d /* nospec r9 */ 114 114 xorl %r10d, %r10d /* nospec r10 */ 115 115 xorl %r11d, %r11d /* nospec r11 */ 116 + .if \clear_callee 116 117 xorl %ebx, %ebx /* nospec rbx */ 117 - .if \clear_bp 118 118 xorl %ebp, %ebp /* nospec rbp */ 119 - .endif 120 119 xorl %r12d, %r12d /* nospec r12 */ 121 120 xorl %r13d, %r13d /* nospec r13 */ 122 121 xorl %r14d, %r14d /* nospec r14 */ 123 122 xorl %r15d, %r15d /* nospec r15 */ 124 - 123 + .endif 125 124 .endm 126 125 127 - .macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_bp=1 unwind_hint=1 126 + .macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_callee=1 unwind_hint=1 128 127 PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint 129 - CLEAR_REGS clear_bp=\clear_bp 128 + CLEAR_REGS clear_callee=\clear_callee 130 129 .endm 131 130 132 131 .macro POP_REGS pop_rdi=1
+27 -8
arch/x86/entry/entry_64_fred.S
··· 111 111 push %rax /* Return RIP */ 112 112 push $0 /* Error code, 0 for IRQ/NMI */ 113 113 114 - PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0 114 + PUSH_AND_CLEAR_REGS clear_callee=0 unwind_hint=0 115 + 115 116 movq %rsp, %rdi /* %rdi -> pt_regs */ 116 - call __fred_entry_from_kvm /* Call the C entry point */ 117 - POP_REGS 118 - ERETS 119 - 1: 120 117 /* 121 - * Objtool doesn't understand what ERETS does, this hint tells it that 122 - * yes, we'll reach here and with what stack state. A save/restore pair 123 - * isn't strictly needed, but it's the simplest form. 118 + * At this point: {rdi, rsi, rdx, rcx, r8, r9}, {r10, r11}, {rax, rdx} 119 + * are clobbered, which corresponds to: arguments, extra caller-saved 120 + * and return. All registers a C function is allowed to clobber. 121 + * 122 + * Notably, the callee-saved registers: {rbx, r12, r13, r14, r15} 123 + * are untouched, with the exception of rbp, which carries the stack 124 + * frame and will be restored before exit. 125 + * 126 + * Further calling another C function will not alter this state. 127 + */ 128 + call __fred_entry_from_kvm /* Call the C entry point */ 129 + 130 + /* 131 + * When FRED, use ERETS to potentially clear NMIs, otherwise simply 132 + * restore the stack pointer. 133 + */ 134 + ALTERNATIVE "nop; nop; mov %rbp, %rsp", \ 135 + __stringify(add $C_PTREGS_SIZE, %rsp; ERETS), \ 136 + X86_FEATURE_FRED 137 + 138 + 1: /* 139 + * Objtool doesn't understand ERETS, and the cfi register state is 140 + * different from initial_func_cfi due to PUSH_REGS. Tell it the state 141 + * is similar to where UNWIND_HINT_SAVE is. 124 142 */ 125 143 UNWIND_HINT_RESTORE 144 + 126 145 pop %rbp 127 146 RET 128 147
+44 -25
arch/x86/hyperv/hv_init.c
··· 17 17 #include <asm/desc.h> 18 18 #include <asm/e820/api.h> 19 19 #include <asm/sev.h> 20 - #include <asm/ibt.h> 21 20 #include <asm/hypervisor.h> 22 21 #include <hyperv/hvhdk.h> 23 22 #include <asm/mshyperv.h> ··· 36 37 #include <linux/export.h> 37 38 38 39 void *hv_hypercall_pg; 40 + 41 + #ifdef CONFIG_X86_64 42 + static u64 __hv_hyperfail(u64 control, u64 param1, u64 param2) 43 + { 44 + return U64_MAX; 45 + } 46 + 47 + DEFINE_STATIC_CALL(__hv_hypercall, __hv_hyperfail); 48 + 49 + u64 hv_std_hypercall(u64 control, u64 param1, u64 param2) 50 + { 51 + u64 hv_status; 52 + 53 + register u64 __r8 asm("r8") = param2; 54 + asm volatile ("call " STATIC_CALL_TRAMP_STR(__hv_hypercall) 55 + : "=a" (hv_status), ASM_CALL_CONSTRAINT, 56 + "+c" (control), "+d" (param1), "+r" (__r8) 57 + : : "cc", "memory", "r9", "r10", "r11"); 58 + 59 + return hv_status; 60 + } 61 + 62 + typedef u64 (*hv_hypercall_f)(u64 control, u64 param1, u64 param2); 63 + 64 + static inline void hv_set_hypercall_pg(void *ptr) 65 + { 66 + hv_hypercall_pg = ptr; 67 + 68 + if (!ptr) 69 + ptr = &__hv_hyperfail; 70 + static_call_update(__hv_hypercall, (hv_hypercall_f)ptr); 71 + } 72 + #else 73 + static inline void hv_set_hypercall_pg(void *ptr) 74 + { 75 + hv_hypercall_pg = ptr; 76 + } 39 77 EXPORT_SYMBOL_GPL(hv_hypercall_pg); 78 + #endif 40 79 41 80 union hv_ghcb * __percpu *hv_ghcb_pg; 42 81 ··· 367 330 * pointer is restored on resume. 368 331 */ 369 332 hv_hypercall_pg_saved = hv_hypercall_pg; 370 - hv_hypercall_pg = NULL; 333 + hv_set_hypercall_pg(NULL); 371 334 372 335 /* Disable the hypercall page in the hypervisor */ 373 336 rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); ··· 393 356 vmalloc_to_pfn(hv_hypercall_pg_saved); 394 357 wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 395 358 396 - hv_hypercall_pg = hv_hypercall_pg_saved; 359 + hv_set_hypercall_pg(hv_hypercall_pg_saved); 397 360 hv_hypercall_pg_saved = NULL; 398 361 399 362 /* ··· 513 476 if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) 514 477 goto skip_hypercall_pg_init; 515 478 516 - hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, 517 - VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, 479 + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, MODULES_VADDR, 480 + MODULES_END, GFP_KERNEL, PAGE_KERNEL_ROX, 518 481 VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, 519 482 __builtin_return_address(0)); 520 483 if (hv_hypercall_pg == NULL) ··· 552 515 wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 553 516 } 554 517 555 - skip_hypercall_pg_init: 556 - /* 557 - * Some versions of Hyper-V that provide IBT in guest VMs have a bug 558 - * in that there's no ENDBR64 instruction at the entry to the 559 - * hypercall page. Because hypercalls are invoked via an indirect call 560 - * to the hypercall page, all hypercall attempts fail when IBT is 561 - * enabled, and Linux panics. For such buggy versions, disable IBT. 562 - * 563 - * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall 564 - * page, so if future Linux kernel versions enable IBT for 32-bit 565 - * builds, additional hypercall page hackery will be required here 566 - * to provide an ENDBR32. 567 - */ 568 - #ifdef CONFIG_X86_KERNEL_IBT 569 - if (cpu_feature_enabled(X86_FEATURE_IBT) && 570 - *(u32 *)hv_hypercall_pg != gen_endbr()) { 571 - setup_clear_cpu_cap(X86_FEATURE_IBT); 572 - pr_warn("Disabling IBT because of Hyper-V bug\n"); 573 - } 574 - #endif 518 + hv_set_hypercall_pg(hv_hypercall_pg); 575 519 520 + skip_hypercall_pg_init: 576 521 /* 577 522 * hyperv_init() is called before LAPIC is initialized: see 578 523 * apic_intr_mode_init() -> x86_platform.apic_post_init() and
+15
arch/x86/hyperv/ivm.c
··· 385 385 return ret; 386 386 } 387 387 388 + u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) 389 + { 390 + u64 hv_status; 391 + 392 + register u64 __r8 asm("r8") = param2; 393 + asm volatile("vmmcall" 394 + : "=a" (hv_status), ASM_CALL_CONSTRAINT, 395 + "+c" (control), "+d" (param1), "+r" (__r8) 396 + : : "cc", "memory", "r9", "r10", "r11"); 397 + 398 + return hv_status; 399 + } 400 + 388 401 #else 389 402 static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} 390 403 static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} 404 + u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } 391 405 #endif /* CONFIG_AMD_MEM_ENCRYPT */ 392 406 393 407 #ifdef CONFIG_INTEL_TDX_GUEST ··· 451 437 #else 452 438 static inline void hv_tdx_msr_write(u64 msr, u64 value) {} 453 439 static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} 440 + u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } 454 441 #endif /* CONFIG_INTEL_TDX_GUEST */ 455 442 456 443 #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
+7 -2
arch/x86/include/asm/bug.h
··· 5 5 #include <linux/stringify.h> 6 6 #include <linux/instrumentation.h> 7 7 #include <linux/objtool.h> 8 + #include <asm/asm.h> 8 9 9 10 /* 10 11 * Despite that some emulators terminate on UD2, we use it for WARN(). 11 12 */ 12 - #define ASM_UD2 ".byte 0x0f, 0x0b" 13 + #define ASM_UD2 _ASM_BYTES(0x0f, 0x0b) 13 14 #define INSN_UD2 0x0b0f 14 15 #define LEN_UD2 2 16 + 17 + #define ASM_UDB _ASM_BYTES(0xd6) 18 + #define INSN_UDB 0xd6 19 + #define LEN_UDB 1 15 20 16 21 /* 17 22 * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit. ··· 31 26 #define BUG_UD2 0xfffe 32 27 #define BUG_UD1 0xfffd 33 28 #define BUG_UD1_UBSAN 0xfffc 34 - #define BUG_EA 0xffea 29 + #define BUG_UDB 0xffd6 35 30 #define BUG_LOCK 0xfff0 36 31 37 32 #ifdef CONFIG_GENERIC_BUG
+6 -8
arch/x86/include/asm/cfi.h
··· 71 71 * 72 72 * __cfi_foo: 73 73 * endbr64 74 - * subl 0x12345678, %r10d 75 - * jz foo 76 - * ud2 77 - * nop 74 + * subl 0x12345678, %eax 75 + * jne.32,pn foo+3 78 76 * foo: 79 - * osp nop3 # was endbr64 77 + * nopl -42(%rax) # was endbr64 80 78 * ... code here ... 81 79 * ret 82 80 * ··· 84 86 * indirect caller: 85 87 * lea foo(%rip), %r11 86 88 * ... 87 - * movl $0x12345678, %r10d 88 - * subl $16, %r11 89 - * nop4 89 + * movl $0x12345678, %eax 90 + * lea -0x10(%r11), %r11 91 + * nop5 90 92 * call *%r11 91 93 * 92 94 */
+3 -7
arch/x86/include/asm/ibt.h
··· 59 59 static __always_inline __attribute_const__ u32 gen_endbr_poison(void) 60 60 { 61 61 /* 62 - * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it 63 - * will be unique to (former) ENDBR sites. 62 + * 4 byte NOP that isn't NOP4, such that it will be unique to (former) 63 + * ENDBR sites. Additionally it carries UDB as immediate. 64 64 */ 65 - return 0x001f0f66; /* osp nopl (%rax) */ 65 + return 0xd6401f0f; /* nopl -42(%rax) */ 66 66 } 67 67 68 68 static inline bool __is_endbr(u32 val) 69 69 { 70 70 if (val == gen_endbr_poison()) 71 - return true; 72 - 73 - /* See cfi_fineibt_bhi_preamble() */ 74 - if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5) 75 71 return true; 76 72 77 73 val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
+2 -7
arch/x86/include/asm/idtentry.h
··· 460 460 #endif 461 461 462 462 void idt_install_sysvec(unsigned int n, const void *function); 463 - 464 - #ifdef CONFIG_X86_FRED 465 463 void fred_install_sysvec(unsigned int vector, const idtentry_t function); 466 - #else 467 - static inline void fred_install_sysvec(unsigned int vector, const idtentry_t function) { } 468 - #endif 469 464 470 465 #define sysvec_install(vector, function) { \ 471 - if (cpu_feature_enabled(X86_FEATURE_FRED)) \ 466 + if (IS_ENABLED(CONFIG_X86_FRED)) \ 472 467 fred_install_sysvec(vector, function); \ 473 - else \ 468 + if (!cpu_feature_enabled(X86_FEATURE_FRED)) \ 474 469 idt_install_sysvec(vector, asm_##function); \ 475 470 } 476 471
+41 -96
arch/x86/include/asm/mshyperv.h
··· 6 6 #include <linux/nmi.h> 7 7 #include <linux/msi.h> 8 8 #include <linux/io.h> 9 + #include <linux/static_call.h> 9 10 #include <asm/nospec-branch.h> 10 11 #include <asm/paravirt.h> 11 12 #include <asm/msr.h> ··· 40 39 return 0; 41 40 } 42 41 43 - #if IS_ENABLED(CONFIG_HYPERV) 44 - extern bool hyperv_paravisor_present; 42 + extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); 43 + extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2); 44 + extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2); 45 45 46 + #if IS_ENABLED(CONFIG_HYPERV) 46 47 extern void *hv_hypercall_pg; 47 48 48 49 extern union hv_ghcb * __percpu *hv_ghcb_pg; 49 50 50 51 bool hv_isolation_type_snp(void); 51 52 bool hv_isolation_type_tdx(void); 52 - u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); 53 + 54 + #ifdef CONFIG_X86_64 55 + DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall); 56 + #endif 53 57 54 58 /* 55 59 * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA ··· 71 65 { 72 66 u64 input_address = input ? virt_to_phys(input) : 0; 73 67 u64 output_address = output ? virt_to_phys(output) : 0; 74 - u64 hv_status; 75 68 76 69 #ifdef CONFIG_X86_64 77 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) 78 - return hv_tdx_hypercall(control, input_address, output_address); 79 - 80 - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { 81 - __asm__ __volatile__("mov %[output_address], %%r8\n" 82 - "vmmcall" 83 - : "=a" (hv_status), ASM_CALL_CONSTRAINT, 84 - "+c" (control), "+d" (input_address) 85 - : [output_address] "r" (output_address) 86 - : "cc", "memory", "r8", "r9", "r10", "r11"); 87 - return hv_status; 88 - } 89 - 90 - if (!hv_hypercall_pg) 91 - return U64_MAX; 92 - 93 - __asm__ __volatile__("mov %[output_address], %%r8\n" 94 - CALL_NOSPEC 95 - : "=a" (hv_status), ASM_CALL_CONSTRAINT, 96 - "+c" (control), "+d" (input_address) 97 - : [output_address] "r" (output_address), 98 - THUNK_TARGET(hv_hypercall_pg) 99 - : "cc", "memory", "r8", "r9", "r10", "r11"); 70 + return static_call_mod(hv_hypercall)(control, input_address, output_address); 100 71 #else 101 72 u32 input_address_hi = upper_32_bits(input_address); 102 73 u32 input_address_lo = lower_32_bits(input_address); 103 74 u32 output_address_hi = upper_32_bits(output_address); 104 75 u32 output_address_lo = lower_32_bits(output_address); 76 + u64 hv_status; 105 77 106 78 if (!hv_hypercall_pg) 107 79 return U64_MAX; ··· 92 108 "D"(output_address_hi), "S"(output_address_lo), 93 109 THUNK_TARGET(hv_hypercall_pg) 94 110 : "cc", "memory"); 95 - #endif /* !x86_64 */ 96 111 return hv_status; 112 + #endif /* !x86_64 */ 97 113 } 98 114 99 115 /* Fast hypercall with 8 bytes of input and no output */ 100 116 static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) 101 117 { 118 + #ifdef CONFIG_X86_64 119 + return static_call_mod(hv_hypercall)(control, input1, 0); 120 + #else 121 + u32 input1_hi = upper_32_bits(input1); 122 + u32 input1_lo = lower_32_bits(input1); 102 123 u64 hv_status; 103 124 104 - #ifdef CONFIG_X86_64 105 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) 106 - return hv_tdx_hypercall(control, input1, 0); 107 - 108 - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { 109 - __asm__ __volatile__( 110 - "vmmcall" 111 - : "=a" (hv_status), ASM_CALL_CONSTRAINT, 112 - "+c" (control), "+d" (input1) 113 - :: "cc", "r8", "r9", "r10", "r11"); 114 - } else { 115 - __asm__ __volatile__(CALL_NOSPEC 116 - : "=a" (hv_status), ASM_CALL_CONSTRAINT, 117 - "+c" (control), "+d" (input1) 118 - : THUNK_TARGET(hv_hypercall_pg) 119 - : "cc", "r8", "r9", "r10", "r11"); 120 - } 121 - #else 122 - { 123 - u32 input1_hi = upper_32_bits(input1); 124 - u32 input1_lo = lower_32_bits(input1); 125 - 126 - __asm__ __volatile__ (CALL_NOSPEC 127 - : "=A"(hv_status), 128 - "+c"(input1_lo), 129 - ASM_CALL_CONSTRAINT 130 - : "A" (control), 131 - "b" (input1_hi), 132 - THUNK_TARGET(hv_hypercall_pg) 133 - : "cc", "edi", "esi"); 134 - } 135 - #endif 125 + __asm__ __volatile__ (CALL_NOSPEC 126 + : "=A"(hv_status), 127 + "+c"(input1_lo), 128 + ASM_CALL_CONSTRAINT 129 + : "A" (control), 130 + "b" (input1_hi), 131 + THUNK_TARGET(hv_hypercall_pg) 132 + : "cc", "edi", "esi"); 136 133 return hv_status; 134 + #endif 137 135 } 138 136 139 137 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) ··· 128 162 /* Fast hypercall with 16 bytes of input */ 129 163 static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) 130 164 { 165 + #ifdef CONFIG_X86_64 166 + return static_call_mod(hv_hypercall)(control, input1, input2); 167 + #else 168 + u32 input1_hi = upper_32_bits(input1); 169 + u32 input1_lo = lower_32_bits(input1); 170 + u32 input2_hi = upper_32_bits(input2); 171 + u32 input2_lo = lower_32_bits(input2); 131 172 u64 hv_status; 132 173 133 - #ifdef CONFIG_X86_64 134 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) 135 - return hv_tdx_hypercall(control, input1, input2); 136 - 137 - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { 138 - __asm__ __volatile__("mov %[input2], %%r8\n" 139 - "vmmcall" 140 - : "=a" (hv_status), ASM_CALL_CONSTRAINT, 141 - "+c" (control), "+d" (input1) 142 - : [input2] "r" (input2) 143 - : "cc", "r8", "r9", "r10", "r11"); 144 - } else { 145 - __asm__ __volatile__("mov %[input2], %%r8\n" 146 - CALL_NOSPEC 147 - : "=a" (hv_status), ASM_CALL_CONSTRAINT, 148 - "+c" (control), "+d" (input1) 149 - : [input2] "r" (input2), 150 - THUNK_TARGET(hv_hypercall_pg) 151 - : "cc", "r8", "r9", "r10", "r11"); 152 - } 153 - #else 154 - { 155 - u32 input1_hi = upper_32_bits(input1); 156 - u32 input1_lo = lower_32_bits(input1); 157 - u32 input2_hi = upper_32_bits(input2); 158 - u32 input2_lo = lower_32_bits(input2); 159 - 160 - __asm__ __volatile__ (CALL_NOSPEC 161 - : "=A"(hv_status), 162 - "+c"(input1_lo), ASM_CALL_CONSTRAINT 163 - : "A" (control), "b" (input1_hi), 164 - "D"(input2_hi), "S"(input2_lo), 165 - THUNK_TARGET(hv_hypercall_pg) 166 - : "cc"); 167 - } 168 - #endif 174 + __asm__ __volatile__ (CALL_NOSPEC 175 + : "=A"(hv_status), 176 + "+c"(input1_lo), ASM_CALL_CONSTRAINT 177 + : "A" (control), "b" (input1_hi), 178 + "D"(input2_hi), "S"(input2_lo), 179 + THUNK_TARGET(hv_hypercall_pg) 180 + : "cc"); 169 181 return hv_status; 182 + #endif 170 183 } 171 184 172 185 static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+13 -7
arch/x86/include/asm/text-patching.h
··· 178 178 } 179 179 180 180 static __always_inline 181 - void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) 181 + bool __emulate_cc(unsigned long flags, u8 cc) 182 182 { 183 - static const unsigned long jcc_mask[6] = { 183 + static const unsigned long cc_mask[6] = { 184 184 [0] = X86_EFLAGS_OF, 185 185 [1] = X86_EFLAGS_CF, 186 186 [2] = X86_EFLAGS_ZF, ··· 193 193 bool match; 194 194 195 195 if (cc < 0xc) { 196 - match = regs->flags & jcc_mask[cc >> 1]; 196 + match = flags & cc_mask[cc >> 1]; 197 197 } else { 198 - match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ 199 - ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); 198 + match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ 199 + ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); 200 200 if (cc >= 0xe) 201 - match = match || (regs->flags & X86_EFLAGS_ZF); 201 + match = match || (flags & X86_EFLAGS_ZF); 202 202 } 203 203 204 - if ((match && !invert) || (!match && invert)) 204 + return (match && !invert) || (!match && invert); 205 + } 206 + 207 + static __always_inline 208 + void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) 209 + { 210 + if (__emulate_cc(regs->flags, cc)) 205 211 ip += disp; 206 212 207 213 int3_emulate_jmp(regs, ip);
+176 -116
arch/x86/kernel/alternative.c
··· 147 147 /* 148 148 * When ITS uses indirect branch thunk the fineibt_paranoid 149 149 * caller sequence doesn't fit in the caller site. So put the 150 - * remaining part of the sequence (<ea> + JNE) into the ITS 150 + * remaining part of the sequence (UDB + JNE) into the ITS 151 151 * thunk. 152 152 */ 153 - bytes[i++] = 0xea; /* invalid instruction */ 153 + bytes[i++] = 0xd6; /* UDB */ 154 154 bytes[i++] = 0x75; /* JNE */ 155 155 bytes[i++] = 0xfd; 156 156 ··· 163 163 reg -= 8; 164 164 } 165 165 bytes[i++] = 0xff; 166 - bytes[i++] = 0xe0 + reg; /* jmp *reg */ 166 + bytes[i++] = 0xe0 + reg; /* JMP *reg */ 167 167 bytes[i++] = 0xcc; 168 168 169 169 return thunk + offset; ··· 713 713 #if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL) 714 714 715 715 /* 716 - * CALL/JMP *%\reg 716 + * [CS]{,3} CALL/JMP *%\reg [INT3]* 717 717 */ 718 - static int emit_indirect(int op, int reg, u8 *bytes) 718 + static int emit_indirect(int op, int reg, u8 *bytes, int len) 719 719 { 720 + int cs = 0, bp = 0; 720 721 int i = 0; 721 722 u8 modrm; 723 + 724 + /* 725 + * Set @len to the excess bytes after writing the instruction. 726 + */ 727 + len -= 2 + (reg >= 8); 728 + WARN_ON_ONCE(len < 0); 722 729 723 730 switch (op) { 724 731 case CALL_INSN_OPCODE: 725 732 modrm = 0x10; /* Reg = 2; CALL r/m */ 733 + /* 734 + * Additional NOP is better than prefix decode penalty. 735 + */ 736 + if (len <= 3) 737 + cs = len; 726 738 break; 727 739 728 740 case JMP32_INSN_OPCODE: 729 741 modrm = 0x20; /* Reg = 4; JMP r/m */ 742 + bp = len; 730 743 break; 731 744 732 745 default: 733 746 WARN_ON_ONCE(1); 734 747 return -1; 735 748 } 749 + 750 + while (cs--) 751 + bytes[i++] = 0x2e; /* CS-prefix */ 736 752 737 753 if (reg >= 8) { 738 754 bytes[i++] = 0x41; /* REX.B prefix */ ··· 760 744 761 745 bytes[i++] = 0xff; /* opcode */ 762 746 bytes[i++] = modrm; 747 + 748 + while (bp--) 749 + bytes[i++] = 0xcc; /* INT3 */ 763 750 764 751 return i; 765 752 } ··· 937 918 return emit_its_trampoline(addr, insn, reg, bytes); 938 919 #endif 939 920 940 - ret = emit_indirect(op, reg, bytes + i); 921 + ret = emit_indirect(op, reg, bytes + i, insn->length - i); 941 922 if (ret < 0) 942 923 return ret; 943 924 i += ret; 944 - 945 - /* 946 - * The compiler is supposed to EMIT an INT3 after every unconditional 947 - * JMP instruction due to AMD BTC. However, if the compiler is too old 948 - * or MITIGATION_SLS isn't enabled, we still need an INT3 after 949 - * indirect JMPs even on Intel. 950 - */ 951 - if (op == JMP32_INSN_OPCODE && i < insn->length) 952 - bytes[i++] = INT3_INSN_OPCODE; 953 925 954 926 for (; i < insn->length;) 955 927 bytes[i++] = BYTES_NOP1; ··· 980 970 case JMP32_INSN_OPCODE: 981 971 /* Check for cfi_paranoid + ITS */ 982 972 dest = addr + insn.length + insn.immediate.value; 983 - if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) { 973 + if (dest[-1] == 0xd6 && (dest[0] & 0xf0) == 0x70) { 984 974 WARN_ON_ONCE(cfi_mode != CFI_FINEIBT); 985 975 continue; 986 976 } ··· 1187 1177 #endif 1188 1178 1189 1179 enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT; 1180 + static bool cfi_debug __ro_after_init; 1190 1181 1191 1182 #ifdef CONFIG_FINEIBT_BHI 1192 1183 bool cfi_bhi __ro_after_init = false; ··· 1270 1259 } else if (!strcmp(str, "off")) { 1271 1260 cfi_mode = CFI_OFF; 1272 1261 cfi_rand = false; 1262 + } else if (!strcmp(str, "debug")) { 1263 + cfi_debug = true; 1273 1264 } else if (!strcmp(str, "kcfi")) { 1274 1265 cfi_mode = CFI_KCFI; 1275 1266 } else if (!strcmp(str, "fineibt")) { ··· 1279 1266 } else if (!strcmp(str, "norand")) { 1280 1267 cfi_rand = false; 1281 1268 } else if (!strcmp(str, "warn")) { 1282 - pr_alert("CFI mismatch non-fatal!\n"); 1269 + pr_alert("CFI: mismatch non-fatal!\n"); 1283 1270 cfi_warn = true; 1284 1271 } else if (!strcmp(str, "paranoid")) { 1285 1272 if (cfi_mode == CFI_FINEIBT) { 1286 1273 cfi_paranoid = true; 1287 1274 } else { 1288 - pr_err("Ignoring paranoid; depends on fineibt.\n"); 1275 + pr_err("CFI: ignoring paranoid; depends on fineibt.\n"); 1289 1276 } 1290 1277 } else if (!strcmp(str, "bhi")) { 1291 1278 #ifdef CONFIG_FINEIBT_BHI 1292 1279 if (cfi_mode == CFI_FINEIBT) { 1293 1280 cfi_bhi = true; 1294 1281 } else { 1295 - pr_err("Ignoring bhi; depends on fineibt.\n"); 1282 + pr_err("CFI: ignoring bhi; depends on fineibt.\n"); 1296 1283 } 1297 1284 #else 1298 - pr_err("Ignoring bhi; depends on FINEIBT_BHI=y.\n"); 1285 + pr_err("CFI: ignoring bhi; depends on FINEIBT_BHI=y.\n"); 1299 1286 #endif 1300 1287 } else { 1301 - pr_err("Ignoring unknown cfi option (%s).", str); 1288 + pr_err("CFI: Ignoring unknown option (%s).", str); 1302 1289 } 1303 1290 1304 1291 str = next; ··· 1313 1300 * 1314 1301 * __cfi_\func: __cfi_\func: 1315 1302 * movl $0x12345678,%eax // 5 endbr64 // 4 1316 - * nop subl $0x12345678,%r10d // 7 1317 - * nop jne __cfi_\func+6 // 2 1318 - * nop nop3 // 3 1303 + * nop subl $0x12345678,%eax // 5 1304 + * nop jne.d32,pn \func+3 // 7 1319 1305 * nop 1320 1306 * nop 1321 1307 * nop ··· 1323 1311 * nop 1324 1312 * nop 1325 1313 * nop 1314 + * nop 1315 + * \func: \func: 1316 + * endbr64 nopl -42(%rax) 1326 1317 * 1327 1318 * 1328 1319 * caller: caller: 1329 - * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 1320 + * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%eax // 5 1330 1321 * addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4 1331 - * je 1f // 2 nop4 // 4 1322 + * je 1f // 2 nop5 // 5 1332 1323 * ud2 // 2 1333 1324 * 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6 1334 1325 * 1326 + * 1327 + * Notably, the FineIBT sequences are crafted such that branches are presumed 1328 + * non-taken. This is based on Agner Fog's optimization manual, which states: 1329 + * 1330 + * "Make conditional jumps most often not taken: The efficiency and throughput 1331 + * for not-taken branches is better than for taken branches on most 1332 + * processors. Therefore, it is good to place the most frequent branch first" 1335 1333 */ 1336 1334 1337 1335 /* 1338 1336 * <fineibt_preamble_start>: 1339 1337 * 0: f3 0f 1e fa endbr64 1340 - * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d 1341 - * b: 75 f9 jne 6 <fineibt_preamble_start+0x6> 1342 - * d: 0f 1f 00 nopl (%rax) 1338 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax 1339 + * 9: 2e 0f 85 03 00 00 00 jne,pn 13 <fineibt_preamble_start+0x13> 1340 + * 10: 0f 1f 40 d6 nopl -0x2a(%rax) 1343 1341 * 1344 - * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as 1345 - * (bad) on x86_64 and raises #UD. 1342 + * Note that the JNE target is the 0xD6 byte inside the NOPL, this decodes as 1343 + * UDB on x86_64 and raises #UD. 1346 1344 */ 1347 1345 asm( ".pushsection .rodata \n" 1348 1346 "fineibt_preamble_start: \n" 1349 1347 " endbr64 \n" 1350 - " subl $0x12345678, %r10d \n" 1348 + " subl $0x12345678, %eax \n" 1351 1349 "fineibt_preamble_bhi: \n" 1352 - " jne fineibt_preamble_start+6 \n" 1353 - ASM_NOP3 1350 + " cs jne.d32 fineibt_preamble_start+0x13 \n" 1351 + "#fineibt_func: \n" 1352 + " nopl -42(%rax) \n" 1354 1353 "fineibt_preamble_end: \n" 1355 1354 ".popsection\n" 1356 1355 ); ··· 1372 1349 1373 1350 #define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) 1374 1351 #define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start) 1375 - #define fineibt_preamble_ud 6 1376 - #define fineibt_preamble_hash 7 1352 + #define fineibt_preamble_ud 0x13 1353 + #define fineibt_preamble_hash 5 1377 1354 1378 1355 /* 1379 1356 * <fineibt_caller_start>: 1380 - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 1381 - * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11 1382 - * a: 0f 1f 40 00 nopl 0x0(%rax) 1357 + * 0: b8 78 56 34 12 mov $0x12345678, %eax 1358 + * 5: 4d 8d 5b f0 lea -0x10(%r11), %r11 1359 + * 9: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 1383 1360 */ 1384 1361 asm( ".pushsection .rodata \n" 1385 1362 "fineibt_caller_start: \n" 1386 - " movl $0x12345678, %r10d \n" 1363 + " movl $0x12345678, %eax \n" 1387 1364 " lea -0x10(%r11), %r11 \n" 1388 - ASM_NOP4 1365 + ASM_NOP5 1389 1366 "fineibt_caller_end: \n" 1390 1367 ".popsection \n" 1391 1368 ); ··· 1394 1371 extern u8 fineibt_caller_end[]; 1395 1372 1396 1373 #define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start) 1397 - #define fineibt_caller_hash 2 1374 + #define fineibt_caller_hash 1 1398 1375 1399 1376 #define fineibt_caller_jmp (fineibt_caller_size - 2) 1400 1377 ··· 1411 1388 * of adding a load. 1412 1389 * 1413 1390 * <fineibt_paranoid_start>: 1414 - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 1415 - * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d 1416 - * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 1391 + * 0: b8 78 56 34 12 mov $0x12345678, %eax 1392 + * 5: 41 3b 43 f5 cmp -0x11(%r11), %eax 1393 + * 9: 2e 4d 8d 5b <f0> cs lea -0x10(%r11), %r11 1417 1394 * e: 75 fd jne d <fineibt_paranoid_start+0xd> 1418 1395 * 10: 41 ff d3 call *%r11 1419 1396 * 13: 90 nop ··· 1425 1402 */ 1426 1403 asm( ".pushsection .rodata \n" 1427 1404 "fineibt_paranoid_start: \n" 1428 - " movl $0x12345678, %r10d \n" 1429 - " cmpl -9(%r11), %r10d \n" 1430 - " lea -0x10(%r11), %r11 \n" 1405 + " mov $0x12345678, %eax \n" 1406 + " cmpl -11(%r11), %eax \n" 1407 + " cs lea -0x10(%r11), %r11 \n" 1408 + "#fineibt_caller_size: \n" 1431 1409 " jne fineibt_paranoid_start+0xd \n" 1432 1410 "fineibt_paranoid_ind: \n" 1433 - " call *%r11 \n" 1434 - " nop \n" 1411 + " cs call *%r11 \n" 1435 1412 "fineibt_paranoid_end: \n" 1436 1413 ".popsection \n" 1437 1414 ); ··· 1543 1520 return 0; 1544 1521 } 1545 1522 1523 + /* 1524 + * Inline the bhi-arity 1 case: 1525 + * 1526 + * __cfi_foo: 1527 + * 0: f3 0f 1e fa endbr64 1528 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax 1529 + * 9: 49 0f 45 fa cmovne %rax, %rdi 1530 + * d: 2e 75 03 jne,pn foo+0x3 1531 + * 1532 + * foo: 1533 + * 10: 0f 1f 40 <d6> nopl -42(%rax) 1534 + * 1535 + * Notably, this scheme is incompatible with permissive CFI 1536 + * because the CMOVcc is unconditional and RDI will have been 1537 + * clobbered. 1538 + */ 1539 + asm( ".pushsection .rodata \n" 1540 + "fineibt_bhi1_start: \n" 1541 + " cmovne %rax, %rdi \n" 1542 + " cs jne fineibt_bhi1_func + 0x3 \n" 1543 + "fineibt_bhi1_func: \n" 1544 + " nopl -42(%rax) \n" 1545 + "fineibt_bhi1_end: \n" 1546 + ".popsection \n" 1547 + ); 1548 + 1549 + extern u8 fineibt_bhi1_start[]; 1550 + extern u8 fineibt_bhi1_end[]; 1551 + 1552 + #define fineibt_bhi1_size (fineibt_bhi1_end - fineibt_bhi1_start) 1553 + 1546 1554 static void cfi_fineibt_bhi_preamble(void *addr, int arity) 1547 1555 { 1556 + u8 bytes[MAX_INSN_SIZE]; 1557 + 1548 1558 if (!arity) 1549 1559 return; 1550 1560 1551 1561 if (!cfi_warn && arity == 1) { 1552 - /* 1553 - * Crazy scheme to allow arity-1 inline: 1554 - * 1555 - * __cfi_foo: 1556 - * 0: f3 0f 1e fa endbr64 1557 - * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d 1558 - * b: 49 0f 45 fa cmovne %r10, %rdi 1559 - * f: 75 f5 jne __cfi_foo+6 1560 - * 11: 0f 1f 00 nopl (%rax) 1561 - * 1562 - * Code that direct calls to foo()+0, decodes the tail end as: 1563 - * 1564 - * foo: 1565 - * 0: f5 cmc 1566 - * 1: 0f 1f 00 nopl (%rax) 1567 - * 1568 - * which clobbers CF, but does not affect anything ABI 1569 - * wise. 1570 - * 1571 - * Notably, this scheme is incompatible with permissive CFI 1572 - * because the CMOVcc is unconditional and RDI will have been 1573 - * clobbered. 1574 - */ 1575 - const u8 magic[9] = { 1576 - 0x49, 0x0f, 0x45, 0xfa, 1577 - 0x75, 0xf5, 1578 - BYTES_NOP3, 1579 - }; 1580 - 1581 - text_poke_early(addr + fineibt_preamble_bhi, magic, 9); 1582 - 1562 + text_poke_early(addr + fineibt_preamble_bhi, 1563 + fineibt_bhi1_start, fineibt_bhi1_size); 1583 1564 return; 1584 1565 } 1585 1566 1586 - text_poke_early(addr + fineibt_preamble_bhi, 1587 - text_gen_insn(CALL_INSN_OPCODE, 1588 - addr + fineibt_preamble_bhi, 1589 - __bhi_args[arity]), 1590 - CALL_INSN_SIZE); 1567 + /* 1568 + * Replace the bytes at fineibt_preamble_bhi with a CALL instruction 1569 + * that lines up exactly with the end of the preamble, such that the 1570 + * return address will be foo+0. 1571 + * 1572 + * __cfi_foo: 1573 + * 0: f3 0f 1e fa endbr64 1574 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax 1575 + * 9: 2e 2e e8 DD DD DD DD cs cs call __bhi_args[arity] 1576 + */ 1577 + bytes[0] = 0x2e; 1578 + bytes[1] = 0x2e; 1579 + __text_gen_insn(bytes + 2, CALL_INSN_OPCODE, 1580 + addr + fineibt_preamble_bhi + 2, 1581 + __bhi_args[arity], CALL_INSN_SIZE); 1582 + 1583 + text_poke_early(addr + fineibt_preamble_bhi, bytes, 7); 1591 1584 } 1592 1585 1593 1586 static int cfi_rewrite_preamble(s32 *start, s32 *end) ··· 1694 1655 { 1695 1656 s32 *s; 1696 1657 1697 - BUG_ON(fineibt_paranoid_size != 20); 1698 - 1699 1658 for (s = start; s < end; s++) { 1700 1659 void *addr = (void *)s + *s; 1701 1660 struct insn insn; ··· 1733 1696 emit_paranoid_trampoline(addr + fineibt_caller_size, 1734 1697 &insn, 11, bytes + fineibt_caller_size); 1735 1698 } else { 1736 - ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind); 1737 - if (WARN_ON_ONCE(ret != 3)) 1699 + int len = fineibt_paranoid_size - fineibt_paranoid_ind; 1700 + ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind, len); 1701 + if (WARN_ON_ONCE(ret != len)) 1738 1702 continue; 1739 1703 } 1740 1704 ··· 1745 1707 return 0; 1746 1708 } 1747 1709 1710 + #define pr_cfi_debug(X...) if (cfi_debug) pr_info(X) 1711 + 1712 + #define FINEIBT_WARN(_f, _v) \ 1713 + WARN_ONCE((_f) != (_v), "FineIBT: " #_f " %ld != %d\n", _f, _v) 1714 + 1748 1715 static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 1749 1716 s32 *start_cfi, s32 *end_cfi, bool builtin) 1750 1717 { 1751 1718 int ret; 1752 1719 1753 - if (WARN_ONCE(fineibt_preamble_size != 16, 1754 - "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) 1720 + if (FINEIBT_WARN(fineibt_preamble_size, 20) || 1721 + FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) || 1722 + FINEIBT_WARN(fineibt_caller_size, 14) || 1723 + FINEIBT_WARN(fineibt_paranoid_size, 20)) 1755 1724 return; 1756 1725 1757 1726 if (cfi_mode == CFI_AUTO) { ··· 1779 1734 * rewrite them. This disables all CFI. If this succeeds but any of the 1780 1735 * later stages fails, we're without CFI. 1781 1736 */ 1737 + pr_cfi_debug("CFI: disabling all indirect call checking\n"); 1782 1738 ret = cfi_disable_callers(start_retpoline, end_retpoline); 1783 1739 if (ret) 1784 1740 goto err; ··· 1790 1744 cfi_bpf_hash = cfi_rehash(cfi_bpf_hash); 1791 1745 cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash); 1792 1746 } 1747 + pr_cfi_debug("CFI: cfi_seed: 0x%08x\n", cfi_seed); 1793 1748 1749 + pr_cfi_debug("CFI: rehashing all preambles\n"); 1794 1750 ret = cfi_rand_preamble(start_cfi, end_cfi); 1795 1751 if (ret) 1796 1752 goto err; 1797 1753 1754 + pr_cfi_debug("CFI: rehashing all indirect calls\n"); 1798 1755 ret = cfi_rand_callers(start_retpoline, end_retpoline); 1799 1756 if (ret) 1800 1757 goto err; 1758 + } else { 1759 + pr_cfi_debug("CFI: rehashing disabled\n"); 1801 1760 } 1802 1761 1803 1762 switch (cfi_mode) { 1804 1763 case CFI_OFF: 1805 1764 if (builtin) 1806 - pr_info("Disabling CFI\n"); 1765 + pr_info("CFI: disabled\n"); 1807 1766 return; 1808 1767 1809 1768 case CFI_KCFI: 1769 + pr_cfi_debug("CFI: re-enabling all indirect call checking\n"); 1810 1770 ret = cfi_enable_callers(start_retpoline, end_retpoline); 1811 1771 if (ret) 1812 1772 goto err; 1813 1773 1814 1774 if (builtin) 1815 - pr_info("Using kCFI\n"); 1775 + pr_info("CFI: Using %sretpoline kCFI\n", 1776 + cfi_rand ? "rehashed " : ""); 1816 1777 return; 1817 1778 1818 1779 case CFI_FINEIBT: 1780 + pr_cfi_debug("CFI: adding FineIBT to all preambles\n"); 1819 1781 /* place the FineIBT preamble at func()-16 */ 1820 1782 ret = cfi_rewrite_preamble(start_cfi, end_cfi); 1821 1783 if (ret) 1822 1784 goto err; 1823 1785 1824 1786 /* rewrite the callers to target func()-16 */ 1787 + pr_cfi_debug("CFI: rewriting indirect call sites to use FineIBT\n"); 1825 1788 ret = cfi_rewrite_callers(start_retpoline, end_retpoline); 1826 1789 if (ret) 1827 1790 goto err; 1828 1791 1829 1792 /* now that nobody targets func()+0, remove ENDBR there */ 1793 + pr_cfi_debug("CFI: removing old endbr insns\n"); 1830 1794 cfi_rewrite_endbr(start_cfi, end_cfi); 1831 1795 1832 1796 if (builtin) { ··· 1879 1823 1880 1824 /* 1881 1825 * __cfi_\func: 1882 - * osp nopl (%rax) 1883 - * subl $0, %r10d 1884 - * jz 1f 1885 - * ud2 1886 - * 1: nop 1826 + * nopl -42(%rax) 1827 + * sub $0, %eax 1828 + * jne \func+3 1829 + * \func: 1830 + * nopl -42(%rax) 1887 1831 */ 1888 1832 poison_endbr(addr); 1889 1833 poison_hash(addr + fineibt_preamble_hash); ··· 1909 1853 } 1910 1854 } 1911 1855 1856 + #define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE) 1857 + 1912 1858 /* 1913 - * When regs->ip points to a 0xEA byte in the FineIBT preamble, 1859 + * When regs->ip points to a 0xD6 byte in the FineIBT preamble, 1914 1860 * return true and fill out target and type. 1915 1861 * 1916 1862 * We check the preamble by checking for the ENDBR instruction relative to the 1917 - * 0xEA instruction. 1863 + * UDB instruction. 1918 1864 */ 1919 1865 static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type) 1920 1866 { ··· 1926 1868 if (!exact_endbr((void *)addr)) 1927 1869 return false; 1928 1870 1929 - *target = addr + fineibt_preamble_size; 1871 + *target = addr + fineibt_prefix_size; 1930 1872 1931 1873 __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); 1932 - *type = (u32)regs->r10 + hash; 1874 + *type = (u32)regs->ax + hash; 1933 1875 1934 1876 /* 1935 1877 * Since regs->ip points to the middle of an instruction; it cannot ··· 1967 1909 __get_kernel_nofault(&addr, regs->sp, unsigned long, Efault); 1968 1910 *target = addr; 1969 1911 1970 - addr -= fineibt_preamble_size; 1912 + addr -= fineibt_prefix_size; 1971 1913 if (!exact_endbr((void *)addr)) 1972 1914 return false; 1973 1915 1974 1916 __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); 1975 - *type = (u32)regs->r10 + hash; 1917 + *type = (u32)regs->ax + hash; 1976 1918 1977 1919 /* 1978 1920 * The UD2 sites are constructed with a RET immediately following, ··· 1989 1931 u32 thunk; 1990 1932 1991 1933 __get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault); 1992 - return (thunk & 0x00FFFFFF) == 0xfd75ea; 1934 + return (thunk & 0x00FFFFFF) == 0xfd75d6; 1993 1935 1994 1936 Efault: 1995 1937 return false; ··· 1997 1939 1998 1940 /* 1999 1941 * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[] 2000 - * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS 2001 - * thunk. 1942 + * sequence, or to UDB + Jcc.d8 for cfi_paranoid + ITS thunk. 2002 1943 */ 2003 1944 static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type) 2004 1945 { ··· 2007 1950 return false; 2008 1951 2009 1952 if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) { 2010 - *target = regs->r11 + fineibt_preamble_size; 2011 - *type = regs->r10; 1953 + *target = regs->r11 + fineibt_prefix_size; 1954 + *type = regs->ax; 2012 1955 2013 1956 /* 2014 1957 * Since the trapping instruction is the exact, but LOCK prefixed, ··· 2020 1963 /* 2021 1964 * The cfi_paranoid + ITS thunk combination results in: 2022 1965 * 2023 - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 2024 - * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d 2025 - * a: 4d 8d 5b f0 lea -0x10(%r11), %r11 1966 + * 0: b8 78 56 34 12 mov $0x12345678, %eax 1967 + * 5: 41 3b 43 f7 cmp -11(%r11), %eax 1968 + * a: 2e 3d 8d 5b f0 cs lea -0x10(%r11), %r11 2026 1969 * e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 2027 1970 * 2028 1971 * Where the paranoid_thunk looks like: 2029 1972 * 2030 - * 1d: <ea> (bad) 1973 + * 1d: <d6> udb 2031 1974 * __x86_indirect_paranoid_thunk_r11: 2032 1975 * 1e: 75 fd jne 1d 2033 1976 * __x86_indirect_its_thunk_r11: ··· 2036 1979 * 2037 1980 */ 2038 1981 if (is_paranoid_thunk(regs->ip)) { 2039 - *target = regs->r11 + fineibt_preamble_size; 2040 - *type = regs->r10; 1982 + *target = regs->r11 + fineibt_prefix_size; 1983 + *type = regs->ax; 2041 1984 2042 1985 regs->ip = *target; 2043 1986 return true; ··· 2062 2005 static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 2063 2006 s32 *start_cfi, s32 *end_cfi, bool builtin) 2064 2007 { 2008 + if (IS_ENABLED(CONFIG_CFI) && builtin) 2009 + pr_info("CFI: Using standard kCFI\n"); 2065 2010 } 2066 2011 2067 2012 #ifdef CONFIG_X86_KERNEL_IBT ··· 2380 2321 2381 2322 __apply_fineibt(__retpoline_sites, __retpoline_sites_end, 2382 2323 __cfi_sites, __cfi_sites_end, true); 2324 + cfi_debug = false; 2383 2325 2384 2326 /* 2385 2327 * Rewrite the retpolines, must be done before alternatives since
+1
arch/x86/kernel/asm-offsets.c
··· 102 102 103 103 BLANK(); 104 104 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); 105 + OFFSET(C_PTREGS_SIZE, pt_regs, orig_ax); 105 106 106 107 /* TLB state for the entry code */ 107 108 OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
+1 -1
arch/x86/kernel/cfi.c
··· 27 27 * for indirect call checks: 28 28 * 29 29 *   movl -<id>, %r10d ; 6 bytes 30 - * addl -4(%reg), %r10d ; 4 bytes 30 + * addl -<pos>(%reg), %r10d; 4 bytes 31 31 * je .Ltmp1 ; 2 bytes 32 32 * ud2 ; <- regs->ip 33 33 * .Ltmp1:
+13 -6
arch/x86/kernel/cpu/mshyperv.c
··· 38 38 bool hv_nested; 39 39 struct ms_hyperv_info ms_hyperv; 40 40 41 - /* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */ 42 - bool hyperv_paravisor_present __ro_after_init; 43 - EXPORT_SYMBOL_GPL(hyperv_paravisor_present); 44 - 45 41 #if IS_ENABLED(CONFIG_HYPERV) 46 42 static inline unsigned int hv_get_nested_msr(unsigned int reg) 47 43 { ··· 284 288 old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; 285 289 x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; 286 290 } 291 + 292 + #ifdef CONFIG_X86_64 293 + DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall); 294 + EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall); 295 + #define hypercall_update(hc) static_call_update(hv_hypercall, hc) 296 + #endif 287 297 #endif /* CONFIG_HYPERV */ 298 + 299 + #ifndef hypercall_update 300 + #define hypercall_update(hc) (void)hc 301 + #endif 288 302 289 303 static uint32_t __init ms_hyperv_platform(void) 290 304 { ··· 490 484 ms_hyperv.shared_gpa_boundary = 491 485 BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); 492 486 493 - hyperv_paravisor_present = !!ms_hyperv.paravisor_present; 494 - 495 487 pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", 496 488 ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); 497 489 498 490 499 491 if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { 500 492 static_branch_enable(&isolation_type_snp); 493 + if (!ms_hyperv.paravisor_present) 494 + hypercall_update(hv_snp_hypercall); 501 495 } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { 502 496 static_branch_enable(&isolation_type_tdx); 503 497 ··· 505 499 ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; 506 500 507 501 if (!ms_hyperv.paravisor_present) { 502 + hypercall_update(hv_tdx_hypercall); 508 503 /* 509 504 * Mark the Hyper-V TSC page feature as disabled 510 505 * in a TDX VM without paravisor so that the
+4 -2
arch/x86/kernel/irqinit.c
··· 97 97 /* Execute any quirks before the call gates are initialised: */ 98 98 x86_init.irqs.pre_vector_init(); 99 99 100 - if (cpu_feature_enabled(X86_FEATURE_FRED)) 100 + /* FRED's IRQ path may be used even if FRED isn't fully enabled. */ 101 + if (IS_ENABLED(CONFIG_X86_FRED)) 101 102 fred_complete_exception_setup(); 102 - else 103 + 104 + if (!cpu_feature_enabled(X86_FEATURE_FRED)) 103 105 idt_setup_apic_and_irq_gates(); 104 106 105 107 lapic_assign_system_vectors();
+4
arch/x86/kernel/machine_kexec_64.c
··· 479 479 480 480 __ftrace_enabled_restore(save_ftrace_enabled); 481 481 } 482 + /* 483 + * Handover to the next kernel, no CFI concern. 484 + */ 485 + ANNOTATE_NOCFI_SYM(machine_kexec); 482 486 483 487 /* arch-dependent functionality related to kexec file-based syscall */ 484 488
+4 -4
arch/x86/kernel/traps.c
··· 97 97 * Check for UD1 or UD2, accounting for Address Size Override Prefixes. 98 98 * If it's a UD1, further decode to determine its use: 99 99 * 100 - * FineIBT: ea (bad) 100 + * FineIBT: d6 udb 101 101 * FineIBT: f0 75 f9 lock jne . - 6 102 102 * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax 103 103 * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax ··· 130 130 WARN_ON_ONCE(!lock); 131 131 return BUG_LOCK; 132 132 133 - case 0xea: 133 + case 0xd6: 134 134 *len = addr - start; 135 - return BUG_EA; 135 + return BUG_UDB; 136 136 137 137 case OPCODE_ESCAPE: 138 138 break; ··· 341 341 } 342 342 fallthrough; 343 343 344 - case BUG_EA: 344 + case BUG_UDB: 345 345 case BUG_LOCK: 346 346 if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { 347 347 handled = true;
+1
arch/x86/kvm/Kconfig
··· 96 96 config KVM_INTEL 97 97 tristate "KVM for Intel (and compatible) processors support" 98 98 depends on KVM && IA32_FEAT_CTL 99 + select X86_FRED if X86_64 99 100 help 100 101 Provides support for KVM on processors equipped with Intel's VT 101 102 extensions, a.k.a. Virtual Machine Extensions (VMX).
+210 -296
arch/x86/kvm/emulate.c
··· 26 26 #include <asm/debugreg.h> 27 27 #include <asm/nospec-branch.h> 28 28 #include <asm/ibt.h> 29 + #include <asm/text-patching.h> 29 30 30 31 #include "x86.h" 31 32 #include "tss.h" ··· 167 166 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */ 168 167 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */ 169 168 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ 170 - #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ 171 169 #define NoWrite ((u64)1 << 45) /* No writeback */ 172 170 #define SrcWrite ((u64)1 << 46) /* Write back src operand */ 173 171 #define NoMod ((u64)1 << 47) /* Mod field is ignored */ ··· 203 203 const struct escape *esc; 204 204 const struct instr_dual *idual; 205 205 const struct mode_dual *mdual; 206 - void (*fastop)(struct fastop *fake); 207 206 } u; 208 207 int (*check_perm)(struct x86_emulate_ctxt *ctxt); 209 208 }; ··· 266 267 X86_EFLAGS_PF|X86_EFLAGS_CF) 267 268 268 269 #ifdef CONFIG_X86_64 269 - #define ON64(x) x 270 + #define ON64(x...) x 270 271 #else 271 - #define ON64(x) 272 + #define ON64(x...) 272 273 #endif 273 274 274 - /* 275 - * fastop functions have a special calling convention: 276 - * 277 - * dst: rax (in/out) 278 - * src: rdx (in/out) 279 - * src2: rcx (in) 280 - * flags: rflags (in/out) 281 - * ex: rsi (in:fastop pointer, out:zero if exception) 282 - * 283 - * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for 284 - * different operand sizes can be reached by calculation, rather than a jump 285 - * table (which would be bigger than the code). 286 - * 287 - * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR 288 - * and 1 for the straight line speculation INT3, leaves 7 bytes for the 289 - * body of the function. Currently none is larger than 4. 290 - */ 291 - static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); 275 + #define EM_ASM_START(op) \ 276 + static int em_##op(struct x86_emulate_ctxt *ctxt) \ 277 + { \ 278 + unsigned long flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; \ 279 + int bytes = 1, ok = 1; \ 280 + if (!(ctxt->d & ByteOp)) \ 281 + bytes = ctxt->dst.bytes; \ 282 + switch (bytes) { 292 283 293 - #define FASTOP_SIZE 16 284 + #define __EM_ASM(str) \ 285 + asm("push %[flags]; popf \n\t" \ 286 + "10: " str \ 287 + "pushf; pop %[flags] \n\t" \ 288 + "11: \n\t" \ 289 + : "+a" (ctxt->dst.val), \ 290 + "+d" (ctxt->src.val), \ 291 + [flags] "+D" (flags), \ 292 + "+S" (ok) \ 293 + : "c" (ctxt->src2.val)) 294 294 295 - #define __FOP_FUNC(name) \ 296 - ".align " __stringify(FASTOP_SIZE) " \n\t" \ 297 - ".type " name ", @function \n\t" \ 298 - name ":\n\t" \ 299 - ASM_ENDBR \ 300 - IBT_NOSEAL(name) 295 + #define __EM_ASM_1(op, dst) \ 296 + __EM_ASM(#op " %%" #dst " \n\t") 301 297 302 - #define FOP_FUNC(name) \ 303 - __FOP_FUNC(#name) 298 + #define __EM_ASM_1_EX(op, dst) \ 299 + __EM_ASM(#op " %%" #dst " \n\t" \ 300 + _ASM_EXTABLE_TYPE_REG(10b, 11f, EX_TYPE_ZERO_REG, %%esi)) 304 301 305 - #define __FOP_RET(name) \ 306 - "11: " ASM_RET \ 307 - ".size " name ", .-" name "\n\t" 302 + #define __EM_ASM_2(op, dst, src) \ 303 + __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") 308 304 309 - #define FOP_RET(name) \ 310 - __FOP_RET(#name) 305 + #define __EM_ASM_3(op, dst, src, src2) \ 306 + __EM_ASM(#op " %%" #src2 ", %%" #src ", %%" #dst " \n\t") 311 307 312 - #define __FOP_START(op, align) \ 313 - extern void em_##op(struct fastop *fake); \ 314 - asm(".pushsection .text, \"ax\" \n\t" \ 315 - ".global em_" #op " \n\t" \ 316 - ".align " __stringify(align) " \n\t" \ 317 - "em_" #op ":\n\t" 308 + #define EM_ASM_END \ 309 + } \ 310 + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); \ 311 + return !ok ? emulate_de(ctxt) : X86EMUL_CONTINUE; \ 312 + } 318 313 319 - #define FOP_START(op) __FOP_START(op, FASTOP_SIZE) 314 + /* 1-operand, using "a" (dst) */ 315 + #define EM_ASM_1(op) \ 316 + EM_ASM_START(op) \ 317 + case 1: __EM_ASM_1(op##b, al); break; \ 318 + case 2: __EM_ASM_1(op##w, ax); break; \ 319 + case 4: __EM_ASM_1(op##l, eax); break; \ 320 + ON64(case 8: __EM_ASM_1(op##q, rax); break;) \ 321 + EM_ASM_END 320 322 321 - #define FOP_END \ 322 - ".popsection") 323 + /* 1-operand, using "c" (src2) */ 324 + #define EM_ASM_1SRC2(op, name) \ 325 + EM_ASM_START(name) \ 326 + case 1: __EM_ASM_1(op##b, cl); break; \ 327 + case 2: __EM_ASM_1(op##w, cx); break; \ 328 + case 4: __EM_ASM_1(op##l, ecx); break; \ 329 + ON64(case 8: __EM_ASM_1(op##q, rcx); break;) \ 330 + EM_ASM_END 323 331 324 - #define __FOPNOP(name) \ 325 - __FOP_FUNC(name) \ 326 - __FOP_RET(name) 332 + /* 1-operand, using "c" (src2) with exception */ 333 + #define EM_ASM_1SRC2EX(op, name) \ 334 + EM_ASM_START(name) \ 335 + case 1: __EM_ASM_1_EX(op##b, cl); break; \ 336 + case 2: __EM_ASM_1_EX(op##w, cx); break; \ 337 + case 4: __EM_ASM_1_EX(op##l, ecx); break; \ 338 + ON64(case 8: __EM_ASM_1_EX(op##q, rcx); break;) \ 339 + EM_ASM_END 327 340 328 - #define FOPNOP() \ 329 - __FOPNOP(__stringify(__UNIQUE_ID(nop))) 341 + /* 2-operand, using "a" (dst), "d" (src) */ 342 + #define EM_ASM_2(op) \ 343 + EM_ASM_START(op) \ 344 + case 1: __EM_ASM_2(op##b, al, dl); break; \ 345 + case 2: __EM_ASM_2(op##w, ax, dx); break; \ 346 + case 4: __EM_ASM_2(op##l, eax, edx); break; \ 347 + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ 348 + EM_ASM_END 330 349 331 - #define FOP1E(op, dst) \ 332 - __FOP_FUNC(#op "_" #dst) \ 333 - "10: " #op " %" #dst " \n\t" \ 334 - __FOP_RET(#op "_" #dst) 350 + /* 2-operand, reversed */ 351 + #define EM_ASM_2R(op, name) \ 352 + EM_ASM_START(name) \ 353 + case 1: __EM_ASM_2(op##b, dl, al); break; \ 354 + case 2: __EM_ASM_2(op##w, dx, ax); break; \ 355 + case 4: __EM_ASM_2(op##l, edx, eax); break; \ 356 + ON64(case 8: __EM_ASM_2(op##q, rdx, rax); break;) \ 357 + EM_ASM_END 335 358 336 - #define FOP1EEX(op, dst) \ 337 - FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi) 359 + /* 2-operand, word only (no byte op) */ 360 + #define EM_ASM_2W(op) \ 361 + EM_ASM_START(op) \ 362 + case 1: break; \ 363 + case 2: __EM_ASM_2(op##w, ax, dx); break; \ 364 + case 4: __EM_ASM_2(op##l, eax, edx); break; \ 365 + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ 366 + EM_ASM_END 338 367 339 - #define FASTOP1(op) \ 340 - FOP_START(op) \ 341 - FOP1E(op##b, al) \ 342 - FOP1E(op##w, ax) \ 343 - FOP1E(op##l, eax) \ 344 - ON64(FOP1E(op##q, rax)) \ 345 - FOP_END 368 + /* 2-operand, using "a" (dst) and CL (src2) */ 369 + #define EM_ASM_2CL(op) \ 370 + EM_ASM_START(op) \ 371 + case 1: __EM_ASM_2(op##b, al, cl); break; \ 372 + case 2: __EM_ASM_2(op##w, ax, cl); break; \ 373 + case 4: __EM_ASM_2(op##l, eax, cl); break; \ 374 + ON64(case 8: __EM_ASM_2(op##q, rax, cl); break;) \ 375 + EM_ASM_END 346 376 347 - /* 1-operand, using src2 (for MUL/DIV r/m) */ 348 - #define FASTOP1SRC2(op, name) \ 349 - FOP_START(name) \ 350 - FOP1E(op, cl) \ 351 - FOP1E(op, cx) \ 352 - FOP1E(op, ecx) \ 353 - ON64(FOP1E(op, rcx)) \ 354 - FOP_END 377 + /* 3-operand, using "a" (dst), "d" (src) and CL (src2) */ 378 + #define EM_ASM_3WCL(op) \ 379 + EM_ASM_START(op) \ 380 + case 1: break; \ 381 + case 2: __EM_ASM_3(op##w, ax, dx, cl); break; \ 382 + case 4: __EM_ASM_3(op##l, eax, edx, cl); break; \ 383 + ON64(case 8: __EM_ASM_3(op##q, rax, rdx, cl); break;) \ 384 + EM_ASM_END 355 385 356 - /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */ 357 - #define FASTOP1SRC2EX(op, name) \ 358 - FOP_START(name) \ 359 - FOP1EEX(op, cl) \ 360 - FOP1EEX(op, cx) \ 361 - FOP1EEX(op, ecx) \ 362 - ON64(FOP1EEX(op, rcx)) \ 363 - FOP_END 364 - 365 - #define FOP2E(op, dst, src) \ 366 - __FOP_FUNC(#op "_" #dst "_" #src) \ 367 - #op " %" #src ", %" #dst " \n\t" \ 368 - __FOP_RET(#op "_" #dst "_" #src) 369 - 370 - #define FASTOP2(op) \ 371 - FOP_START(op) \ 372 - FOP2E(op##b, al, dl) \ 373 - FOP2E(op##w, ax, dx) \ 374 - FOP2E(op##l, eax, edx) \ 375 - ON64(FOP2E(op##q, rax, rdx)) \ 376 - FOP_END 377 - 378 - /* 2 operand, word only */ 379 - #define FASTOP2W(op) \ 380 - FOP_START(op) \ 381 - FOPNOP() \ 382 - FOP2E(op##w, ax, dx) \ 383 - FOP2E(op##l, eax, edx) \ 384 - ON64(FOP2E(op##q, rax, rdx)) \ 385 - FOP_END 386 - 387 - /* 2 operand, src is CL */ 388 - #define FASTOP2CL(op) \ 389 - FOP_START(op) \ 390 - FOP2E(op##b, al, cl) \ 391 - FOP2E(op##w, ax, cl) \ 392 - FOP2E(op##l, eax, cl) \ 393 - ON64(FOP2E(op##q, rax, cl)) \ 394 - FOP_END 395 - 396 - /* 2 operand, src and dest are reversed */ 397 - #define FASTOP2R(op, name) \ 398 - FOP_START(name) \ 399 - FOP2E(op##b, dl, al) \ 400 - FOP2E(op##w, dx, ax) \ 401 - FOP2E(op##l, edx, eax) \ 402 - ON64(FOP2E(op##q, rdx, rax)) \ 403 - FOP_END 404 - 405 - #define FOP3E(op, dst, src, src2) \ 406 - __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \ 407 - #op " %" #src2 ", %" #src ", %" #dst " \n\t"\ 408 - __FOP_RET(#op "_" #dst "_" #src "_" #src2) 409 - 410 - /* 3-operand, word-only, src2=cl */ 411 - #define FASTOP3WCL(op) \ 412 - FOP_START(op) \ 413 - FOPNOP() \ 414 - FOP3E(op##w, ax, dx, cl) \ 415 - FOP3E(op##l, eax, edx, cl) \ 416 - ON64(FOP3E(op##q, rax, rdx, cl)) \ 417 - FOP_END 418 - 419 - /* Special case for SETcc - 1 instruction per cc */ 420 - #define FOP_SETCC(op) \ 421 - FOP_FUNC(op) \ 422 - #op " %al \n\t" \ 423 - FOP_RET(op) 424 - 425 - FOP_START(setcc) 426 - FOP_SETCC(seto) 427 - FOP_SETCC(setno) 428 - FOP_SETCC(setc) 429 - FOP_SETCC(setnc) 430 - FOP_SETCC(setz) 431 - FOP_SETCC(setnz) 432 - FOP_SETCC(setbe) 433 - FOP_SETCC(setnbe) 434 - FOP_SETCC(sets) 435 - FOP_SETCC(setns) 436 - FOP_SETCC(setp) 437 - FOP_SETCC(setnp) 438 - FOP_SETCC(setl) 439 - FOP_SETCC(setnl) 440 - FOP_SETCC(setle) 441 - FOP_SETCC(setnle) 442 - FOP_END; 443 - 444 - FOP_START(salc) 445 - FOP_FUNC(salc) 446 - "pushf; sbb %al, %al; popf \n\t" 447 - FOP_RET(salc) 448 - FOP_END; 386 + static int em_salc(struct x86_emulate_ctxt *ctxt) 387 + { 388 + /* 389 + * Set AL 0xFF if CF is set, or 0x00 when clear. 390 + */ 391 + ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF); 392 + return X86EMUL_CONTINUE; 393 + } 449 394 450 395 /* 451 396 * XXX: inoutclob user must know where the argument is being expanded. ··· 950 1007 return rc; 951 1008 } 952 1009 953 - FASTOP2(add); 954 - FASTOP2(or); 955 - FASTOP2(adc); 956 - FASTOP2(sbb); 957 - FASTOP2(and); 958 - FASTOP2(sub); 959 - FASTOP2(xor); 960 - FASTOP2(cmp); 961 - FASTOP2(test); 1010 + EM_ASM_2(add); 1011 + EM_ASM_2(or); 1012 + EM_ASM_2(adc); 1013 + EM_ASM_2(sbb); 1014 + EM_ASM_2(and); 1015 + EM_ASM_2(sub); 1016 + EM_ASM_2(xor); 1017 + EM_ASM_2(cmp); 1018 + EM_ASM_2(test); 1019 + EM_ASM_2(xadd); 962 1020 963 - FASTOP1SRC2(mul, mul_ex); 964 - FASTOP1SRC2(imul, imul_ex); 965 - FASTOP1SRC2EX(div, div_ex); 966 - FASTOP1SRC2EX(idiv, idiv_ex); 1021 + EM_ASM_1SRC2(mul, mul_ex); 1022 + EM_ASM_1SRC2(imul, imul_ex); 1023 + EM_ASM_1SRC2EX(div, div_ex); 1024 + EM_ASM_1SRC2EX(idiv, idiv_ex); 967 1025 968 - FASTOP3WCL(shld); 969 - FASTOP3WCL(shrd); 1026 + EM_ASM_3WCL(shld); 1027 + EM_ASM_3WCL(shrd); 970 1028 971 - FASTOP2W(imul); 1029 + EM_ASM_2W(imul); 972 1030 973 - FASTOP1(not); 974 - FASTOP1(neg); 975 - FASTOP1(inc); 976 - FASTOP1(dec); 1031 + EM_ASM_1(not); 1032 + EM_ASM_1(neg); 1033 + EM_ASM_1(inc); 1034 + EM_ASM_1(dec); 977 1035 978 - FASTOP2CL(rol); 979 - FASTOP2CL(ror); 980 - FASTOP2CL(rcl); 981 - FASTOP2CL(rcr); 982 - FASTOP2CL(shl); 983 - FASTOP2CL(shr); 984 - FASTOP2CL(sar); 1036 + EM_ASM_2CL(rol); 1037 + EM_ASM_2CL(ror); 1038 + EM_ASM_2CL(rcl); 1039 + EM_ASM_2CL(rcr); 1040 + EM_ASM_2CL(shl); 1041 + EM_ASM_2CL(shr); 1042 + EM_ASM_2CL(sar); 985 1043 986 - FASTOP2W(bsf); 987 - FASTOP2W(bsr); 988 - FASTOP2W(bt); 989 - FASTOP2W(bts); 990 - FASTOP2W(btr); 991 - FASTOP2W(btc); 1044 + EM_ASM_2W(bsf); 1045 + EM_ASM_2W(bsr); 1046 + EM_ASM_2W(bt); 1047 + EM_ASM_2W(bts); 1048 + EM_ASM_2W(btr); 1049 + EM_ASM_2W(btc); 992 1050 993 - FASTOP2(xadd); 994 - 995 - FASTOP2R(cmp, cmp_r); 1051 + EM_ASM_2R(cmp, cmp_r); 996 1052 997 1053 static int em_bsf_c(struct x86_emulate_ctxt *ctxt) 998 1054 { 999 1055 /* If src is zero, do not writeback, but update flags */ 1000 1056 if (ctxt->src.val == 0) 1001 1057 ctxt->dst.type = OP_NONE; 1002 - return fastop(ctxt, em_bsf); 1058 + return em_bsf(ctxt); 1003 1059 } 1004 1060 1005 1061 static int em_bsr_c(struct x86_emulate_ctxt *ctxt) ··· 1006 1064 /* If src is zero, do not writeback, but update flags */ 1007 1065 if (ctxt->src.val == 0) 1008 1066 ctxt->dst.type = OP_NONE; 1009 - return fastop(ctxt, em_bsr); 1067 + return em_bsr(ctxt); 1010 1068 } 1011 1069 1012 1070 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) 1013 1071 { 1014 - u8 rc; 1015 - void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf); 1016 - 1017 - flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; 1018 - asm("push %[flags]; popf; " CALL_NOSPEC 1019 - : "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags)); 1020 - return rc; 1072 + return __emulate_cc(flags, condition & 0xf); 1021 1073 } 1022 1074 1023 1075 static void fetch_register_operand(struct operand *op) ··· 2261 2325 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); 2262 2326 ctxt->src.orig_val = ctxt->src.val; 2263 2327 ctxt->src.val = ctxt->dst.orig_val; 2264 - fastop(ctxt, em_cmp); 2328 + em_cmp(ctxt); 2265 2329 2266 2330 if (ctxt->eflags & X86_EFLAGS_ZF) { 2267 2331 /* Success: write back to memory; no update of EAX */ ··· 3026 3090 ctxt->src.type = OP_IMM; 3027 3091 ctxt->src.val = 0; 3028 3092 ctxt->src.bytes = 1; 3029 - fastop(ctxt, em_or); 3093 + em_or(ctxt); 3030 3094 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); 3031 3095 if (cf) 3032 3096 ctxt->eflags |= X86_EFLAGS_CF; ··· 3052 3116 ctxt->src.type = OP_IMM; 3053 3117 ctxt->src.val = 0; 3054 3118 ctxt->src.bytes = 1; 3055 - fastop(ctxt, em_or); 3119 + em_or(ctxt); 3056 3120 3057 3121 return X86EMUL_CONTINUE; 3058 3122 } ··· 3070 3134 ctxt->src.type = OP_IMM; 3071 3135 ctxt->src.val = 0; 3072 3136 ctxt->src.bytes = 1; 3073 - fastop(ctxt, em_or); 3137 + em_or(ctxt); 3074 3138 3075 3139 return X86EMUL_CONTINUE; 3076 3140 } ··· 3161 3225 static int em_imul_3op(struct x86_emulate_ctxt *ctxt) 3162 3226 { 3163 3227 ctxt->dst.val = ctxt->src2.val; 3164 - return fastop(ctxt, em_imul); 3228 + return em_imul(ctxt); 3165 3229 } 3166 3230 3167 3231 static int em_cwd(struct x86_emulate_ctxt *ctxt) ··· 3940 4004 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) } 3941 4005 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } 3942 4006 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3943 - #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } 3944 4007 #define II(_f, _e, _i) \ 3945 4008 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i } 3946 4009 #define IIP(_f, _e, _i, _p) \ ··· 3954 4019 #define I2bvIP(_f, _e, _i, _p) \ 3955 4020 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) 3956 4021 3957 - #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \ 3958 - F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ 3959 - F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 4022 + #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ 4023 + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ 4024 + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 3960 4025 3961 4026 static const struct opcode group7_rm0[] = { 3962 4027 N, ··· 3994 4059 }; 3995 4060 3996 4061 static const struct opcode group1[] = { 3997 - F(Lock, em_add), 3998 - F(Lock | PageTable, em_or), 3999 - F(Lock, em_adc), 4000 - F(Lock, em_sbb), 4001 - F(Lock | PageTable, em_and), 4002 - F(Lock, em_sub), 4003 - F(Lock, em_xor), 4004 - F(NoWrite, em_cmp), 4062 + I(Lock, em_add), 4063 + I(Lock | PageTable, em_or), 4064 + I(Lock, em_adc), 4065 + I(Lock, em_sbb), 4066 + I(Lock | PageTable, em_and), 4067 + I(Lock, em_sub), 4068 + I(Lock, em_xor), 4069 + I(NoWrite, em_cmp), 4005 4070 }; 4006 4071 4007 4072 static const struct opcode group1A[] = { ··· 4009 4074 }; 4010 4075 4011 4076 static const struct opcode group2[] = { 4012 - F(DstMem | ModRM, em_rol), 4013 - F(DstMem | ModRM, em_ror), 4014 - F(DstMem | ModRM, em_rcl), 4015 - F(DstMem | ModRM, em_rcr), 4016 - F(DstMem | ModRM, em_shl), 4017 - F(DstMem | ModRM, em_shr), 4018 - F(DstMem | ModRM, em_shl), 4019 - F(DstMem | ModRM, em_sar), 4077 + I(DstMem | ModRM, em_rol), 4078 + I(DstMem | ModRM, em_ror), 4079 + I(DstMem | ModRM, em_rcl), 4080 + I(DstMem | ModRM, em_rcr), 4081 + I(DstMem | ModRM, em_shl), 4082 + I(DstMem | ModRM, em_shr), 4083 + I(DstMem | ModRM, em_shl), 4084 + I(DstMem | ModRM, em_sar), 4020 4085 }; 4021 4086 4022 4087 static const struct opcode group3[] = { 4023 - F(DstMem | SrcImm | NoWrite, em_test), 4024 - F(DstMem | SrcImm | NoWrite, em_test), 4025 - F(DstMem | SrcNone | Lock, em_not), 4026 - F(DstMem | SrcNone | Lock, em_neg), 4027 - F(DstXacc | Src2Mem, em_mul_ex), 4028 - F(DstXacc | Src2Mem, em_imul_ex), 4029 - F(DstXacc | Src2Mem, em_div_ex), 4030 - F(DstXacc | Src2Mem, em_idiv_ex), 4088 + I(DstMem | SrcImm | NoWrite, em_test), 4089 + I(DstMem | SrcImm | NoWrite, em_test), 4090 + I(DstMem | SrcNone | Lock, em_not), 4091 + I(DstMem | SrcNone | Lock, em_neg), 4092 + I(DstXacc | Src2Mem, em_mul_ex), 4093 + I(DstXacc | Src2Mem, em_imul_ex), 4094 + I(DstXacc | Src2Mem, em_div_ex), 4095 + I(DstXacc | Src2Mem, em_idiv_ex), 4031 4096 }; 4032 4097 4033 4098 static const struct opcode group4[] = { 4034 - F(ByteOp | DstMem | SrcNone | Lock, em_inc), 4035 - F(ByteOp | DstMem | SrcNone | Lock, em_dec), 4099 + I(ByteOp | DstMem | SrcNone | Lock, em_inc), 4100 + I(ByteOp | DstMem | SrcNone | Lock, em_dec), 4036 4101 N, N, N, N, N, N, 4037 4102 }; 4038 4103 4039 4104 static const struct opcode group5[] = { 4040 - F(DstMem | SrcNone | Lock, em_inc), 4041 - F(DstMem | SrcNone | Lock, em_dec), 4105 + I(DstMem | SrcNone | Lock, em_inc), 4106 + I(DstMem | SrcNone | Lock, em_dec), 4042 4107 I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs), 4043 4108 I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far), 4044 4109 I(SrcMem | NearBranch | IsBranch, em_jmp_abs), ··· 4074 4139 4075 4140 static const struct opcode group8[] = { 4076 4141 N, N, N, N, 4077 - F(DstMem | SrcImmByte | NoWrite, em_bt), 4078 - F(DstMem | SrcImmByte | Lock | PageTable, em_bts), 4079 - F(DstMem | SrcImmByte | Lock, em_btr), 4080 - F(DstMem | SrcImmByte | Lock | PageTable, em_btc), 4142 + I(DstMem | SrcImmByte | NoWrite, em_bt), 4143 + I(DstMem | SrcImmByte | Lock | PageTable, em_bts), 4144 + I(DstMem | SrcImmByte | Lock, em_btr), 4145 + I(DstMem | SrcImmByte | Lock | PageTable, em_btc), 4081 4146 }; 4082 4147 4083 4148 /* ··· 4214 4279 4215 4280 static const struct opcode opcode_table[256] = { 4216 4281 /* 0x00 - 0x07 */ 4217 - F6ALU(Lock, em_add), 4282 + I6ALU(Lock, em_add), 4218 4283 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), 4219 4284 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), 4220 4285 /* 0x08 - 0x0F */ 4221 - F6ALU(Lock | PageTable, em_or), 4286 + I6ALU(Lock | PageTable, em_or), 4222 4287 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), 4223 4288 N, 4224 4289 /* 0x10 - 0x17 */ 4225 - F6ALU(Lock, em_adc), 4290 + I6ALU(Lock, em_adc), 4226 4291 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), 4227 4292 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), 4228 4293 /* 0x18 - 0x1F */ 4229 - F6ALU(Lock, em_sbb), 4294 + I6ALU(Lock, em_sbb), 4230 4295 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), 4231 4296 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), 4232 4297 /* 0x20 - 0x27 */ 4233 - F6ALU(Lock | PageTable, em_and), N, N, 4298 + I6ALU(Lock | PageTable, em_and), N, N, 4234 4299 /* 0x28 - 0x2F */ 4235 - F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), 4300 + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), 4236 4301 /* 0x30 - 0x37 */ 4237 - F6ALU(Lock, em_xor), N, N, 4302 + I6ALU(Lock, em_xor), N, N, 4238 4303 /* 0x38 - 0x3F */ 4239 - F6ALU(NoWrite, em_cmp), N, N, 4304 + I6ALU(NoWrite, em_cmp), N, N, 4240 4305 /* 0x40 - 0x4F */ 4241 - X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)), 4306 + X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)), 4242 4307 /* 0x50 - 0x57 */ 4243 4308 X8(I(SrcReg | Stack, em_push)), 4244 4309 /* 0x58 - 0x5F */ ··· 4262 4327 G(DstMem | SrcImm, group1), 4263 4328 G(ByteOp | DstMem | SrcImm | No64, group1), 4264 4329 G(DstMem | SrcImmByte, group1), 4265 - F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), 4330 + I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), 4266 4331 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), 4267 4332 /* 0x88 - 0x8F */ 4268 4333 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), ··· 4283 4348 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 4284 4349 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), 4285 4350 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov), 4286 - F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), 4351 + I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), 4287 4352 /* 0xA8 - 0xAF */ 4288 - F2bv(DstAcc | SrcImm | NoWrite, em_test), 4353 + I2bv(DstAcc | SrcImm | NoWrite, em_test), 4289 4354 I2bv(SrcAcc | DstDI | Mov | String, em_mov), 4290 4355 I2bv(SrcSI | DstAcc | Mov | String, em_mov), 4291 - F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), 4356 + I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), 4292 4357 /* 0xB0 - 0xB7 */ 4293 4358 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), 4294 4359 /* 0xB8 - 0xBF */ ··· 4313 4378 G(Src2CL | ByteOp, group2), G(Src2CL, group2), 4314 4379 I(DstAcc | SrcImmUByte | No64, em_aam), 4315 4380 I(DstAcc | SrcImmUByte | No64, em_aad), 4316 - F(DstAcc | ByteOp | No64, em_salc), 4381 + I(DstAcc | ByteOp | No64, em_salc), 4317 4382 I(DstAcc | SrcXLat | ByteOp, em_mov), 4318 4383 /* 0xD8 - 0xDF */ 4319 4384 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, ··· 4398 4463 /* 0xA0 - 0xA7 */ 4399 4464 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), 4400 4465 II(ImplicitOps, em_cpuid, cpuid), 4401 - F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), 4402 - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), 4403 - F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, 4466 + I(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), 4467 + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), 4468 + I(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, 4404 4469 /* 0xA8 - 0xAF */ 4405 4470 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), 4406 4471 II(EmulateOnUD | ImplicitOps, em_rsm, rsm), 4407 - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4408 - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), 4409 - F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), 4410 - GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), 4472 + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4473 + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), 4474 + I(DstMem | SrcReg | Src2CL | ModRM, em_shrd), 4475 + GD(0, &group15), I(DstReg | SrcMem | ModRM, em_imul), 4411 4476 /* 0xB0 - 0xB7 */ 4412 4477 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg), 4413 4478 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4414 - F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), 4479 + I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), 4415 4480 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), 4416 4481 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), 4417 4482 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4418 4483 /* 0xB8 - 0xBF */ 4419 4484 N, N, 4420 4485 G(BitOp, group8), 4421 - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), 4486 + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), 4422 4487 I(DstReg | SrcMem | ModRM, em_bsf_c), 4423 4488 I(DstReg | SrcMem | ModRM, em_bsr_c), 4424 4489 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4425 4490 /* 0xC0 - 0xC7 */ 4426 - F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), 4491 + I2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), 4427 4492 N, ID(0, &instr_dual_0f_c3), 4428 4493 N, N, N, GD(0, &group9), 4429 4494 /* 0xC8 - 0xCF */ ··· 5133 5198 kvm_read_mmx_reg(op->addr.mm, &op->mm_val); 5134 5199 } 5135 5200 5136 - static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) 5137 - { 5138 - ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; 5139 - 5140 - if (!(ctxt->d & ByteOp)) 5141 - fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; 5142 - 5143 - asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" 5144 - : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), 5145 - [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT 5146 - : "c"(ctxt->src2.val)); 5147 - 5148 - ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); 5149 - if (!fop) /* exception is returned in fop variable */ 5150 - return emulate_de(ctxt); 5151 - return X86EMUL_CONTINUE; 5152 - } 5153 - 5154 5201 void init_decode_cache(struct x86_emulate_ctxt *ctxt) 5155 5202 { 5156 5203 /* Clear fields that are set conditionally but read without a guard. */ ··· 5296 5379 ctxt->eflags &= ~X86_EFLAGS_RF; 5297 5380 5298 5381 if (ctxt->execute) { 5299 - if (ctxt->d & Fastop) 5300 - rc = fastop(ctxt, ctxt->fop); 5301 - else 5302 - rc = ctxt->execute(ctxt); 5382 + rc = ctxt->execute(ctxt); 5303 5383 if (rc != X86EMUL_CONTINUE) 5304 5384 goto done; 5305 5385 goto writeback;
+4
arch/x86/kvm/vmx/vmenter.S
··· 361 361 362 362 .section .text, "ax" 363 363 364 + #ifndef CONFIG_X86_FRED 365 + 364 366 SYM_FUNC_START(vmx_do_interrupt_irqoff) 365 367 VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 366 368 SYM_FUNC_END(vmx_do_interrupt_irqoff) 369 + 370 + #endif
+7 -1
arch/x86/kvm/vmx/vmx.c
··· 7021 7021 "unexpected VM-Exit interrupt info: 0x%x", intr_info)) 7022 7022 return; 7023 7023 7024 + /* 7025 + * Invoke the kernel's IRQ handler for the vector. Use the FRED path 7026 + * when it's available even if FRED isn't fully enabled, e.g. even if 7027 + * FRED isn't supported in hardware, in order to avoid the indirect 7028 + * CALL in the non-FRED path. 7029 + */ 7024 7030 kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); 7025 - if (cpu_feature_enabled(X86_FEATURE_FRED)) 7031 + if (IS_ENABLED(CONFIG_X86_FRED)) 7026 7032 fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector); 7027 7033 else 7028 7034 vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
+29 -29
arch/x86/lib/bhi.S
··· 5 5 #include <asm/nospec-branch.h> 6 6 7 7 /* 8 - * Notably, the FineIBT preamble calling these will have ZF set and r10 zero. 8 + * Notably, the FineIBT preamble calling these will have ZF set and eax zero. 9 9 * 10 10 * The very last element is in fact larger than 32 bytes, but since its the 11 11 * last element, this does not matter, ··· 36 36 ANNOTATE_NOENDBR 37 37 UNWIND_HINT_FUNC 38 38 jne .Lud_1 39 - cmovne %r10, %rdi 39 + cmovne %rax, %rdi 40 40 ANNOTATE_UNRET_SAFE 41 41 ret 42 42 int3 ··· 53 53 ANNOTATE_NOENDBR 54 54 UNWIND_HINT_FUNC 55 55 jne .Lud_1 56 - cmovne %r10, %rdi 57 - cmovne %r10, %rsi 56 + cmovne %rax, %rdi 57 + cmovne %rax, %rsi 58 58 ANNOTATE_UNRET_SAFE 59 59 ret 60 60 int3 ··· 64 64 ANNOTATE_NOENDBR 65 65 UNWIND_HINT_FUNC 66 66 jne .Lud_1 67 - cmovne %r10, %rdi 68 - cmovne %r10, %rsi 69 - cmovne %r10, %rdx 67 + cmovne %rax, %rdi 68 + cmovne %rax, %rsi 69 + cmovne %rax, %rdx 70 70 ANNOTATE_UNRET_SAFE 71 71 ret 72 72 int3 ··· 76 76 ANNOTATE_NOENDBR 77 77 UNWIND_HINT_FUNC 78 78 jne .Lud_2 79 - cmovne %r10, %rdi 80 - cmovne %r10, %rsi 81 - cmovne %r10, %rdx 82 - cmovne %r10, %rcx 79 + cmovne %rax, %rdi 80 + cmovne %rax, %rsi 81 + cmovne %rax, %rdx 82 + cmovne %rax, %rcx 83 83 ANNOTATE_UNRET_SAFE 84 84 ret 85 85 int3 ··· 89 89 ANNOTATE_NOENDBR 90 90 UNWIND_HINT_FUNC 91 91 jne .Lud_2 92 - cmovne %r10, %rdi 93 - cmovne %r10, %rsi 94 - cmovne %r10, %rdx 95 - cmovne %r10, %rcx 96 - cmovne %r10, %r8 92 + cmovne %rax, %rdi 93 + cmovne %rax, %rsi 94 + cmovne %rax, %rdx 95 + cmovne %rax, %rcx 96 + cmovne %rax, %r8 97 97 ANNOTATE_UNRET_SAFE 98 98 ret 99 99 int3 ··· 110 110 ANNOTATE_NOENDBR 111 111 UNWIND_HINT_FUNC 112 112 jne .Lud_2 113 - cmovne %r10, %rdi 114 - cmovne %r10, %rsi 115 - cmovne %r10, %rdx 116 - cmovne %r10, %rcx 117 - cmovne %r10, %r8 118 - cmovne %r10, %r9 113 + cmovne %rax, %rdi 114 + cmovne %rax, %rsi 115 + cmovne %rax, %rdx 116 + cmovne %rax, %rcx 117 + cmovne %rax, %r8 118 + cmovne %rax, %r9 119 119 ANNOTATE_UNRET_SAFE 120 120 ret 121 121 int3 ··· 125 125 ANNOTATE_NOENDBR 126 126 UNWIND_HINT_FUNC 127 127 jne .Lud_2 128 - cmovne %r10, %rdi 129 - cmovne %r10, %rsi 130 - cmovne %r10, %rdx 131 - cmovne %r10, %rcx 132 - cmovne %r10, %r8 133 - cmovne %r10, %r9 134 - cmovne %r10, %rsp 128 + cmovne %rax, %rdi 129 + cmovne %rax, %rsi 130 + cmovne %rax, %rdx 131 + cmovne %rax, %rcx 132 + cmovne %rax, %r8 133 + cmovne %rax, %r9 134 + cmovne %rax, %rsp 135 135 ANNOTATE_UNRET_SAFE 136 136 ret 137 137 int3
+2 -2
arch/x86/lib/retpoline.S
··· 134 134 .macro ITS_THUNK reg 135 135 136 136 /* 137 - * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b) 137 + * If CFI paranoid is used then the ITS thunk starts with opcodes (1: udb; jne 1b) 138 138 * that complete the fineibt_paranoid caller sequence. 139 139 */ 140 - 1: .byte 0xea 140 + 1: ASM_UDB 141 141 SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL) 142 142 UNWIND_HINT_UNDEFINED 143 143 ANNOTATE_NOENDBR
+3 -3
arch/x86/net/bpf_jit_comp.c
··· 420 420 u8 *prog = *pprog; 421 421 422 422 EMIT_ENDBR(); 423 - EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */ 423 + EMIT1_off32(0x2d, hash); /* subl $hash, %eax */ 424 424 if (cfi_bhi) { 425 + EMIT2(0x2e, 0x2e); /* cs cs */ 425 426 emit_call(&prog, __bhi_args[arity], ip + 11); 426 427 } else { 427 - EMIT2(0x75, 0xf9); /* jne.d8 .-7 */ 428 - EMIT3(0x0f, 0x1f, 0x00); /* nop3 */ 428 + EMIT3_off32(0x2e, 0x0f, 0x85, 3); /* jne.d32,pn 3 */ 429 429 } 430 430 EMIT_ENDBR_POISON(); 431 431
+4
arch/x86/platform/efi/efi_stub_64.S
··· 11 11 #include <asm/nospec-branch.h> 12 12 13 13 SYM_FUNC_START(__efi_call) 14 + /* 15 + * The EFI code doesn't have any CFI, annotate away the CFI violation. 16 + */ 17 + ANNOTATE_NOCFI_SYM 14 18 pushq %rbp 15 19 movq %rsp, %rbp 16 20 and $~0xf, %rsp
+5
drivers/misc/lkdtm/perms.c
··· 9 9 #include <linux/vmalloc.h> 10 10 #include <linux/mman.h> 11 11 #include <linux/uaccess.h> 12 + #include <linux/objtool.h> 12 13 #include <asm/cacheflush.h> 13 14 #include <asm/sections.h> 14 15 ··· 87 86 func(); 88 87 pr_err("FAIL: func returned\n"); 89 88 } 89 + /* 90 + * Explicitly doing the wrong thing for testing. 91 + */ 92 + ANNOTATE_NOCFI_SYM(execute_location); 90 93 91 94 static void execute_user_location(void *dst) 92 95 {
-5
include/linux/compiler-clang.h
··· 115 115 # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) 116 116 #endif 117 117 118 - #if __has_feature(kcfi) 119 - /* Disable CFI checking inside a function. */ 120 - #define __nocfi __attribute__((__no_sanitize__("kcfi"))) 121 - #endif 122 - 123 118 /* 124 119 * Turn individual warnings and errors on and off locally, depending 125 120 * on version.
-4
include/linux/compiler-gcc.h
··· 35 35 (typeof(ptr)) (__ptr + (off)); \ 36 36 }) 37 37 38 - #ifdef CONFIG_MITIGATION_RETPOLINE 39 - #define __noretpoline __attribute__((__indirect_branch__("keep"))) 40 - #endif 41 - 42 38 #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) 43 39 #define __latent_entropy __attribute__((latent_entropy)) 44 40 #endif
+3 -1
include/linux/compiler_types.h
··· 455 455 # define __noscs 456 456 #endif 457 457 458 - #ifndef __nocfi 458 + #if defined(CONFIG_CFI) 459 + # define __nocfi __attribute__((__no_sanitize__("kcfi"))) 460 + #else 459 461 # define __nocfi 460 462 #endif 461 463
-8
include/linux/init.h
··· 7 7 #include <linux/stringify.h> 8 8 #include <linux/types.h> 9 9 10 - /* Built-in __init functions needn't be compiled with retpoline */ 11 - #if defined(__noretpoline) && !defined(MODULE) 12 - #define __noinitretpoline __noretpoline 13 - #else 14 - #define __noinitretpoline 15 - #endif 16 - 17 10 /* These macros are used to mark some functions or 18 11 * initialized data (doesn't apply to uninitialized data) 19 12 * as `initialization' functions. The kernel can take this ··· 43 50 /* These are for everybody (although not all archs will actually 44 51 discard it in modules) */ 45 52 #define __init __section(".init.text") __cold __latent_entropy \ 46 - __noinitretpoline \ 47 53 __no_kstack_erase 48 54 #define __initdata __section(".init.data") 49 55 #define __initconst __section(".init.rodata")
+10
include/linux/objtool.h
··· 184 184 * WARN using UD2. 185 185 */ 186 186 #define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) 187 + /* 188 + * This should not be used; it annotates away CFI violations. There are a few 189 + * valid use cases like kexec handover to the next kernel image, and there is 190 + * no security concern there. 191 + * 192 + * There are also a few real issues annotated away, like EFI because we can't 193 + * control the EFI code. 194 + */ 195 + #define ANNOTATE_NOCFI_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI)) 187 196 188 197 #else 189 198 #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR ··· 203 194 #define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL 204 195 #define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN 205 196 #define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE 197 + #define ANNOTATE_NOCFI_SYM ANNOTATE type=ANNOTYPE_NOCFI 206 198 #endif 207 199 208 200 #if defined(CONFIG_NOINSTR_VALIDATION) && \
+1
include/linux/objtool_types.h
··· 65 65 #define ANNOTYPE_IGNORE_ALTS 6 66 66 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 67 67 #define ANNOTYPE_REACHABLE 8 68 + #define ANNOTYPE_NOCFI 9 68 69 69 70 #endif /* _LINUX_OBJTOOL_TYPES_H */
+1
tools/include/linux/objtool_types.h
··· 65 65 #define ANNOTYPE_IGNORE_ALTS 6 66 66 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 67 67 #define ANNOTYPE_REACHABLE 8 68 + #define ANNOTYPE_NOCFI 9 68 69 69 70 #endif /* _LINUX_OBJTOOL_TYPES_H */
+42
tools/objtool/check.c
··· 2392 2392 2393 2393 static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) 2394 2394 { 2395 + struct symbol *sym; 2396 + 2395 2397 switch (type) { 2396 2398 case ANNOTYPE_NOENDBR: 2397 2399 /* early */ ··· 2433 2431 2434 2432 case ANNOTYPE_REACHABLE: 2435 2433 insn->dead_end = false; 2434 + break; 2435 + 2436 + case ANNOTYPE_NOCFI: 2437 + sym = insn->sym; 2438 + if (!sym) { 2439 + ERROR_INSN(insn, "dodgy NOCFI annotation"); 2440 + return -1; 2441 + } 2442 + insn->sym->nocfi = 1; 2436 2443 break; 2437 2444 2438 2445 default: ··· 4003 3992 WARN_INSN(insn, "indirect %s found in MITIGATION_RETPOLINE build", 4004 3993 insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); 4005 3994 warnings++; 3995 + } 3996 + 3997 + if (!opts.cfi) 3998 + return warnings; 3999 + 4000 + /* 4001 + * kCFI call sites look like: 4002 + * 4003 + * movl $(-0x12345678), %r10d 4004 + * addl -4(%r11), %r10d 4005 + * jz 1f 4006 + * ud2 4007 + * 1: cs call __x86_indirect_thunk_r11 4008 + * 4009 + * Verify all indirect calls are kCFI adorned by checking for the 4010 + * UD2. Notably, doing __nocfi calls to regular (cfi) functions is 4011 + * broken. 4012 + */ 4013 + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { 4014 + struct symbol *sym = insn->sym; 4015 + 4016 + if (sym && (sym->type == STT_NOTYPE || 4017 + sym->type == STT_FUNC) && !sym->nocfi) { 4018 + struct instruction *prev = 4019 + prev_insn_same_sym(file, insn); 4020 + 4021 + if (!prev || prev->type != INSN_BUG) { 4022 + WARN_INSN(insn, "no-cfi indirect call!"); 4023 + warnings++; 4024 + } 4025 + } 4006 4026 } 4007 4027 4008 4028 return warnings;
+1
tools/objtool/include/objtool/elf.h
··· 70 70 u8 local_label : 1; 71 71 u8 frame_pointer : 1; 72 72 u8 ignore : 1; 73 + u8 nocfi : 1; 73 74 struct list_head pv_target; 74 75 struct reloc *relocs; 75 76 struct section *group_sec;