Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86/decompressor: Avoid the need for a stack in the 32-bit trampoline

The 32-bit trampoline no longer uses the stack for anything except
performing a far return back to long mode, and preserving the caller's
stack pointer value. Currently, the trampoline stack is placed in the
same page that carries the trampoline code, which means this page must
be mapped writable and executable, and the stack is therefore executable
as well.

Replace the far return with a far jump, so that the return address can
be pre-calculated and patched into the code before it is called. This
removes the need for a 32-bit addressable stack entirely, and in a later
patch, this will be taken advantage of by removing writable permissions
from (and adding executable permissions to) the trampoline code page
when booting via the EFI stub.

Note that the value of RSP still needs to be preserved explicitly across
the switch into 32-bit mode, as the register may get truncated to 32
bits.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-12-ardb@kernel.org

authored by

Ard Biesheuvel and committed by
Borislav Petkov (AMD)
bd328aa0 918a7a04

+40 -21
+27 -18
arch/x86/boot/compressed/head_64.S
··· 540 540 * trampoline memory. A non-zero second argument (ESI) means that the 541 541 * trampoline needs to enable 5-level paging. 542 542 */ 543 + .section ".rodata", "a", @progbits 543 544 SYM_CODE_START(trampoline_32bit_src) 544 545 /* 545 546 * Preserve live 64-bit registers on the stack: this is necessary ··· 551 550 pushq %rbp 552 551 pushq %rbx 553 552 554 - /* Set up 32-bit addressable stack and push the old RSP value */ 555 - leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx 556 - movq %rsp, (%rbx) 557 - movq %rbx, %rsp 558 - 559 - /* Take the address of the trampoline exit code */ 560 - leaq .Lret(%rip), %rbx 553 + /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ 554 + movq %rsp, %rbx 555 + shrq $32, %rbx 561 556 562 557 /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ 563 558 pushq $__KERNEL32_CS ··· 561 564 pushq %rax 562 565 lretq 563 566 567 + /* 568 + * The 32-bit code below will do a far jump back to long mode and end 569 + * up here after reconfiguring the number of paging levels. First, the 570 + * stack pointer needs to be restored to its full 64-bit value before 571 + * the callee save register contents can be popped from the stack. 572 + */ 564 573 .Lret: 574 + shlq $32, %rbx 575 + orq %rbx, %rsp 576 + 565 577 /* Restore the preserved 64-bit registers */ 566 - movq (%rsp), %rsp 567 578 popq %rbx 568 579 popq %rbp 569 580 popq %r15 ··· 579 574 580 575 .code32 581 576 0: 582 - /* Set up data and stack segments */ 583 - movl $__KERNEL_DS, %eax 584 - movl %eax, %ds 585 - movl %eax, %ss 586 - 587 577 /* Disable paging */ 588 578 movl %cr0, %eax 589 579 btrl $X86_CR0_PG_BIT, %eax ··· 633 633 1: 634 634 movl %eax, %cr4 635 635 636 - /* Prepare the stack for far return to Long Mode */ 637 - pushl $__KERNEL_CS 638 - pushl %ebx 639 - 640 636 /* Enable paging again. */ 641 637 movl %cr0, %eax 642 638 btsl $X86_CR0_PG_BIT, %eax 643 639 movl %eax, %cr0 644 640 645 - lret 641 + /* 642 + * Return to the 64-bit calling code using LJMP rather than LRET, to 643 + * avoid the need for a 32-bit addressable stack. The destination 644 + * address will be adjusted after the template code is copied into a 645 + * 32-bit addressable buffer. 646 + */ 647 + .Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) 646 648 SYM_CODE_END(trampoline_32bit_src) 649 + 650 + /* 651 + * This symbol is placed right after trampoline_32bit_src() so its address can 652 + * be used to infer the size of the trampoline code. 653 + */ 654 + SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) 647 655 648 656 /* 649 657 * The trampoline code has a size limit. ··· 660 652 */ 661 653 .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE 662 654 655 + .text 663 656 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) 664 657 /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 665 658 1:
+2 -2
arch/x86/boot/compressed/pgtable.h
··· 8 8 #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE 9 9 #define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 10 10 11 - #define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE 12 - 13 11 #ifndef __ASSEMBLER__ 14 12 15 13 extern unsigned long *trampoline_32bit; 16 14 17 15 extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); 16 + 17 + extern const u16 trampoline_ljmp_imm_offset; 18 18 19 19 #endif /* __ASSEMBLER__ */ 20 20 #endif /* BOOT_COMPRESSED_PAGETABLE_H */
+11 -1
arch/x86/boot/compressed/pgtable_64.c
··· 109 109 struct paging_config paging_prepare(void *rmode) 110 110 { 111 111 struct paging_config paging_config = {}; 112 + void *tramp_code; 112 113 113 114 /* Initialize boot_params. Required for cmdline_find_option_bool(). */ 114 115 boot_params = rmode; ··· 149 148 memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); 150 149 151 150 /* Copy trampoline code in place */ 152 - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), 151 + tramp_code = memcpy(trampoline_32bit + 152 + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), 153 153 &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); 154 + 155 + /* 156 + * Avoid the need for a stack in the 32-bit trampoline code, by using 157 + * LJMP rather than LRET to return back to long mode. LJMP takes an 158 + * immediate absolute address, which needs to be adjusted based on the 159 + * placement of the trampoline. 160 + */ 161 + *(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code; 154 162 155 163 /* 156 164 * The code below prepares page table in trampoline memory.