Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86/efistub: Perform 4/5 level paging switch from the stub

In preparation for updating the EFI stub boot flow to avoid the bare
metal decompressor code altogether, implement the support code for
switching between 4 and 5 levels of paging before jumping to the kernel
proper.

Reuse the newly refactored trampoline that the bare metal decompressor
uses, but relies on EFI APIs to allocate 32-bit addressable memory and
remap it with the appropriate permissions. Given that the bare metal
decompressor will no longer call into the trampoline if the number of
paging levels is already set correctly, it is no longer needed to remove
NX restrictions from the memory range where this trampoline may end up.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-17-ardb@kernel.org

authored by

Ard Biesheuvel and committed by
Borislav Petkov (AMD)
cb1c9e02 03dda951

+130 -26
+1
drivers/firmware/efi/libstub/Makefile
··· 88 88 lib-$(CONFIG_ARM) += arm32-stub.o 89 89 lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o 90 90 lib-$(CONFIG_X86) += x86-stub.o 91 + lib-$(CONFIG_X86_64) += x86-5lvl.o 91 92 lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o 92 93 lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o 93 94
+2
drivers/firmware/efi/libstub/efi-stub-helper.c
··· 73 73 efi_loglevel = CONSOLE_LOGLEVEL_QUIET; 74 74 } else if (!strcmp(param, "noinitrd")) { 75 75 efi_noinitrd = true; 76 + } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { 77 + efi_no5lvl = true; 76 78 } else if (!strcmp(param, "efi") && val) { 77 79 efi_nochunk = parse_option_str(val, "nochunk"); 78 80 efi_novamap |= parse_option_str(val, "novamap");
+1
drivers/firmware/efi/libstub/efistub.h
··· 33 33 #define EFI_ALLOC_LIMIT ULONG_MAX 34 34 #endif 35 35 36 + extern bool efi_no5lvl; 36 37 extern bool efi_nochunk; 37 38 extern bool efi_nokaslr; 38 39 extern int efi_loglevel;
+95
drivers/firmware/efi/libstub/x86-5lvl.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <linux/efi.h> 3 + 4 + #include <asm/boot.h> 5 + #include <asm/desc.h> 6 + #include <asm/efi.h> 7 + 8 + #include "efistub.h" 9 + #include "x86-stub.h" 10 + 11 + bool efi_no5lvl; 12 + 13 + static void (*la57_toggle)(void *cr3); 14 + 15 + static const struct desc_struct gdt[] = { 16 + [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), 17 + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), 18 + }; 19 + 20 + /* 21 + * Enabling (or disabling) 5 level paging is tricky, because it can only be 22 + * done from 32-bit mode with paging disabled. This means not only that the 23 + * code itself must be running from 32-bit addressable physical memory, but 24 + * also that the root page table must be 32-bit addressable, as programming 25 + * a 64-bit value into CR3 when running in 32-bit mode is not supported. 26 + */ 27 + efi_status_t efi_setup_5level_paging(void) 28 + { 29 + u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src; 30 + efi_status_t status; 31 + u8 *la57_code; 32 + 33 + if (!efi_is_64bit()) 34 + return EFI_SUCCESS; 35 + 36 + /* check for 5 level paging support */ 37 + if (native_cpuid_eax(0) < 7 || 38 + !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) 39 + return EFI_SUCCESS; 40 + 41 + /* allocate some 32-bit addressable memory for code and a page table */ 42 + status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code, 43 + U32_MAX); 44 + if (status != EFI_SUCCESS) 45 + return status; 46 + 47 + la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size); 48 + memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size); 49 + 50 + /* 51 + * To avoid the need to allocate a 32-bit addressable stack, the 52 + * trampoline uses a LJMP instruction to switch back to long mode. 53 + * LJMP takes an absolute destination address, which needs to be 54 + * fixed up at runtime. 55 + */ 56 + *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code; 57 + 58 + efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE); 59 + 60 + return EFI_SUCCESS; 61 + } 62 + 63 + void efi_5level_switch(void) 64 + { 65 + bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl; 66 + bool have_la57 = native_read_cr4() & X86_CR4_LA57; 67 + bool need_toggle = want_la57 ^ have_la57; 68 + u64 *pgt = (void *)la57_toggle + PAGE_SIZE; 69 + u64 *cr3 = (u64 *)__native_read_cr3(); 70 + u64 *new_cr3; 71 + 72 + if (!la57_toggle || !need_toggle) 73 + return; 74 + 75 + if (!have_la57) { 76 + /* 77 + * 5 level paging will be enabled, so a root level page needs 78 + * to be allocated from the 32-bit addressable physical region, 79 + * with its first entry referring to the existing hierarchy. 80 + */ 81 + new_cr3 = memset(pgt, 0, PAGE_SIZE); 82 + new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC; 83 + } else { 84 + /* take the new root table pointer from the current entry #0 */ 85 + new_cr3 = (u64 *)(cr3[0] & PAGE_MASK); 86 + 87 + /* copy the new root table if it is not 32-bit addressable */ 88 + if ((u64)new_cr3 > U32_MAX) 89 + new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE); 90 + } 91 + 92 + native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt }); 93 + 94 + la57_toggle(new_cr3); 95 + }
+14 -26
drivers/firmware/efi/libstub/x86-stub.c
··· 17 17 #include <asm/boot.h> 18 18 19 19 #include "efistub.h" 20 + #include "x86-stub.h" 20 21 21 22 /* Maximum physical address for 64-bit kernel with 4-level paging */ 22 23 #define MAXMEM_X86_64_4LEVEL (1ull << 46) ··· 224 223 } 225 224 } 226 225 227 - static void 228 - adjust_memory_range_protection(unsigned long start, unsigned long size) 226 + void efi_adjust_memory_range_protection(unsigned long start, 227 + unsigned long size) 229 228 { 230 229 efi_status_t status; 231 230 efi_gcd_memory_space_desc_t desc; ··· 279 278 } 280 279 } 281 280 282 - /* 283 - * Trampoline takes 2 pages and can be loaded in first megabyte of memory 284 - * with its end placed between 128k and 640k where BIOS might start. 285 - * (see arch/x86/boot/compressed/pgtable_64.c) 286 - * 287 - * We cannot find exact trampoline placement since memory map 288 - * can be modified by UEFI, and it can alter the computed address. 289 - */ 290 - 291 - #define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) 292 - #define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) 293 - 294 281 extern const u8 startup_32[], startup_64[]; 295 282 296 283 static void 297 284 setup_memory_protection(unsigned long image_base, unsigned long image_size) 298 285 { 299 - /* 300 - * Allow execution of possible trampoline used 301 - * for switching between 4- and 5-level page tables 302 - * and relocated kernel image. 303 - */ 304 - 305 - adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, 306 - TRAMPOLINE_PLACEMENT_SIZE); 307 - 308 286 #ifdef CONFIG_64BIT 309 287 if (image_base != (unsigned long)startup_32) 310 - adjust_memory_range_protection(image_base, image_size); 288 + efi_adjust_memory_range_protection(image_base, image_size); 311 289 #else 312 290 /* 313 291 * Clear protection flags on a whole range of possible ··· 296 316 * need to remove possible protection on relocated image 297 317 * itself disregarding further relocations. 298 318 */ 299 - adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, 300 - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); 319 + efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, 320 + KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); 301 321 #endif 302 322 } 303 323 ··· 819 839 efi_dxe_table = NULL; 820 840 } 821 841 842 + status = efi_setup_5level_paging(); 843 + if (status != EFI_SUCCESS) { 844 + efi_err("efi_setup_5level_paging() failed!\n"); 845 + goto fail; 846 + } 847 + 822 848 /* 823 849 * If the kernel isn't already loaded at a suitable address, 824 850 * relocate it. ··· 944 958 efi_err("exit_boot() failed!\n"); 945 959 goto fail; 946 960 } 961 + 962 + efi_5level_switch(); 947 963 948 964 if (IS_ENABLED(CONFIG_X86_64)) 949 965 bzimage_addr += startup_64 - startup_32;
+17
drivers/firmware/efi/libstub/x86-stub.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + 3 + #include <linux/efi.h> 4 + 5 + extern void trampoline_32bit_src(void *, bool); 6 + extern const u16 trampoline_ljmp_imm_offset; 7 + 8 + void efi_adjust_memory_range_protection(unsigned long start, 9 + unsigned long size); 10 + 11 + #ifdef CONFIG_X86_64 12 + efi_status_t efi_setup_5level_paging(void); 13 + void efi_5level_switch(void); 14 + #else 15 + static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; } 16 + static inline void efi_5level_switch(void) {} 17 + #endif