Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86-boot-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot code updates from Ingo Molnar:

- Memblock setup and other early boot code cleanups (Mike Rapoport)

- Export e820_table_kexec[] to sysfs (Dave Young)

- Baby steps of adding relocate_kernel() debugging support (David
Woodhouse)

- Replace open-coded parity calculation with parity8() (Kuan-Wei Chiu)

- Move the LA57 trampoline to separate source file (Ard Biesheuvel)

- Misc micro-optimizations (Uros Bizjak)

- Drop obsolete E820_TYPE_RESERVED_KERN and related code (Mike
Rapoport)

* tag 'x86-boot-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/kexec: Add relocate_kernel() debugging support: Load a GDT
x86/boot: Move the LA57 trampoline to separate source file
x86/boot: Do not test if AC and ID eflags are changeable on x86_64
x86/bootflag: Replace open-coded parity calculation with parity8()
x86/bootflag: Micro-optimize sbf_write()
x86/boot: Add missing has_cpuflag() prototype
x86/kexec: Export e820_table_kexec[] to sysfs
x86/boot: Change some static bootflag functions to bool
x86/e820: Drop obsolete E820_TYPE_RESERVED_KERN and related code
x86/boot: Split parsing of boot_params into the parse_boot_params() helper function
x86/boot: Split kernel resources setup into the setup_kernel_resources() helper function
x86/boot: Move setting of memblock parameters to e820__memblock_setup()

+266 -306
+1
arch/x86/boot/compressed/Makefile
··· 98 98 vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o 99 99 vmlinux-objs-y += $(obj)/pgtable_64.o 100 100 vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o 101 + vmlinux-objs-y += $(obj)/la57toggle.o 101 102 endif 102 103 103 104 vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
-103
arch/x86/boot/compressed/head_64.S
··· 483 483 jmp *%rax 484 484 SYM_FUNC_END(.Lrelocated) 485 485 486 - /* 487 - * This is the 32-bit trampoline that will be copied over to low memory. It 488 - * will be called using the ordinary 64-bit calling convention from code 489 - * running in 64-bit mode. 490 - * 491 - * Return address is at the top of the stack (might be above 4G). 492 - * The first argument (EDI) contains the address of the temporary PGD level 493 - * page table in 32-bit addressable memory which will be programmed into 494 - * register CR3. 495 - */ 496 - .section ".rodata", "a", @progbits 497 - SYM_CODE_START(trampoline_32bit_src) 498 - /* 499 - * Preserve callee save 64-bit registers on the stack: this is 500 - * necessary because the architecture does not guarantee that GPRs will 501 - * retain their full 64-bit values across a 32-bit mode switch. 502 - */ 503 - pushq %r15 504 - pushq %r14 505 - pushq %r13 506 - pushq %r12 507 - pushq %rbp 508 - pushq %rbx 509 - 510 - /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ 511 - movq %rsp, %rbx 512 - shrq $32, %rbx 513 - 514 - /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ 515 - pushq $__KERNEL32_CS 516 - leaq 0f(%rip), %rax 517 - pushq %rax 518 - lretq 519 - 520 - /* 521 - * The 32-bit code below will do a far jump back to long mode and end 522 - * up here after reconfiguring the number of paging levels. First, the 523 - * stack pointer needs to be restored to its full 64-bit value before 524 - * the callee save register contents can be popped from the stack. 525 - */ 526 - .Lret: 527 - shlq $32, %rbx 528 - orq %rbx, %rsp 529 - 530 - /* Restore the preserved 64-bit registers */ 531 - popq %rbx 532 - popq %rbp 533 - popq %r12 534 - popq %r13 535 - popq %r14 536 - popq %r15 537 - retq 538 - 539 486 .code32 540 - 0: 541 - /* Disable paging */ 542 - movl %cr0, %eax 543 - btrl $X86_CR0_PG_BIT, %eax 544 - movl %eax, %cr0 545 - 546 - /* Point CR3 to the trampoline's new top level page table */ 547 - movl %edi, %cr3 548 - 549 - /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ 550 - movl $MSR_EFER, %ecx 551 - rdmsr 552 - btsl $_EFER_LME, %eax 553 - /* Avoid writing EFER if no change was made (for TDX guest) */ 554 - jc 1f 555 - wrmsr 556 - 1: 557 - /* Toggle CR4.LA57 */ 558 - movl %cr4, %eax 559 - btcl $X86_CR4_LA57_BIT, %eax 560 - movl %eax, %cr4 561 - 562 - /* Enable paging again. */ 563 - movl %cr0, %eax 564 - btsl $X86_CR0_PG_BIT, %eax 565 - movl %eax, %cr0 566 - 567 - /* 568 - * Return to the 64-bit calling code using LJMP rather than LRET, to 569 - * avoid the need for a 32-bit addressable stack. The destination 570 - * address will be adjusted after the template code is copied into a 571 - * 32-bit addressable buffer. 572 - */ 573 - .Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) 574 - SYM_CODE_END(trampoline_32bit_src) 575 - 576 - /* 577 - * This symbol is placed right after trampoline_32bit_src() so its address can 578 - * be used to infer the size of the trampoline code. 579 - */ 580 - SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) 581 - 582 - /* 583 - * The trampoline code has a size limit. 584 - * Make sure we fail to compile if the trampoline code grows 585 - * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. 586 - */ 587 - .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE 588 - 589 - .text 590 487 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) 591 488 /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 592 489 1:
+112
arch/x86/boot/compressed/la57toggle.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #include <linux/linkage.h> 4 + #include <asm/segment.h> 5 + #include <asm/boot.h> 6 + #include <asm/msr.h> 7 + #include <asm/processor-flags.h> 8 + #include "pgtable.h" 9 + 10 + /* 11 + * This is the 32-bit trampoline that will be copied over to low memory. It 12 + * will be called using the ordinary 64-bit calling convention from code 13 + * running in 64-bit mode. 14 + * 15 + * Return address is at the top of the stack (might be above 4G). 16 + * The first argument (EDI) contains the address of the temporary PGD level 17 + * page table in 32-bit addressable memory which will be programmed into 18 + * register CR3. 19 + */ 20 + 21 + .section ".rodata", "a", @progbits 22 + SYM_CODE_START(trampoline_32bit_src) 23 + /* 24 + * Preserve callee save 64-bit registers on the stack: this is 25 + * necessary because the architecture does not guarantee that GPRs will 26 + * retain their full 64-bit values across a 32-bit mode switch. 27 + */ 28 + pushq %r15 29 + pushq %r14 30 + pushq %r13 31 + pushq %r12 32 + pushq %rbp 33 + pushq %rbx 34 + 35 + /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ 36 + movq %rsp, %rbx 37 + shrq $32, %rbx 38 + 39 + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ 40 + pushq $__KERNEL32_CS 41 + leaq 0f(%rip), %rax 42 + pushq %rax 43 + lretq 44 + 45 + /* 46 + * The 32-bit code below will do a far jump back to long mode and end 47 + * up here after reconfiguring the number of paging levels. First, the 48 + * stack pointer needs to be restored to its full 64-bit value before 49 + * the callee save register contents can be popped from the stack. 50 + */ 51 + .Lret: 52 + shlq $32, %rbx 53 + orq %rbx, %rsp 54 + 55 + /* Restore the preserved 64-bit registers */ 56 + popq %rbx 57 + popq %rbp 58 + popq %r12 59 + popq %r13 60 + popq %r14 61 + popq %r15 62 + retq 63 + 64 + .code32 65 + 0: 66 + /* Disable paging */ 67 + movl %cr0, %eax 68 + btrl $X86_CR0_PG_BIT, %eax 69 + movl %eax, %cr0 70 + 71 + /* Point CR3 to the trampoline's new top level page table */ 72 + movl %edi, %cr3 73 + 74 + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ 75 + movl $MSR_EFER, %ecx 76 + rdmsr 77 + btsl $_EFER_LME, %eax 78 + /* Avoid writing EFER if no change was made (for TDX guest) */ 79 + jc 1f 80 + wrmsr 81 + 1: 82 + /* Toggle CR4.LA57 */ 83 + movl %cr4, %eax 84 + btcl $X86_CR4_LA57_BIT, %eax 85 + movl %eax, %cr4 86 + 87 + /* Enable paging again. */ 88 + movl %cr0, %eax 89 + btsl $X86_CR0_PG_BIT, %eax 90 + movl %eax, %cr0 91 + 92 + /* 93 + * Return to the 64-bit calling code using LJMP rather than LRET, to 94 + * avoid the need for a 32-bit addressable stack. The destination 95 + * address will be adjusted after the template code is copied into a 96 + * 32-bit addressable buffer. 97 + */ 98 + .Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) 99 + SYM_CODE_END(trampoline_32bit_src) 100 + 101 + /* 102 + * This symbol is placed right after trampoline_32bit_src() so its address can 103 + * be used to infer the size of the trampoline code. 104 + */ 105 + SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) 106 + 107 + /* 108 + * The trampoline code has a size limit. 109 + * Make sure we fail to compile if the trampoline code grows 110 + * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. 111 + */ 112 + .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+9 -17
arch/x86/boot/cpuflags.c
··· 28 28 return fsw == 0 && (fcw & 0x103f) == 0x003f; 29 29 } 30 30 31 + #ifdef CONFIG_X86_32 31 32 /* 32 33 * For building the 16-bit code we want to explicitly specify 32-bit 33 34 * push/pop operations, rather than just saying 'pushf' or 'popf' and 34 - * letting the compiler choose. But this is also included from the 35 - * compressed/ directory where it may be 64-bit code, and thus needs 36 - * to be 'pushfq' or 'popfq' in that case. 35 + * letting the compiler choose. 37 36 */ 38 - #ifdef __x86_64__ 39 - #define PUSHF "pushfq" 40 - #define POPF "popfq" 41 - #else 42 - #define PUSHF "pushfl" 43 - #define POPF "popfl" 44 - #endif 45 - 46 - int has_eflag(unsigned long mask) 37 + bool has_eflag(unsigned long mask) 47 38 { 48 39 unsigned long f0, f1; 49 40 50 - asm volatile(PUSHF " \n\t" 51 - PUSHF " \n\t" 41 + asm volatile("pushfl \n\t" 42 + "pushfl \n\t" 52 43 "pop %0 \n\t" 53 44 "mov %0,%1 \n\t" 54 45 "xor %2,%1 \n\t" 55 46 "push %1 \n\t" 56 - POPF " \n\t" 57 - PUSHF " \n\t" 47 + "popfl \n\t" 48 + "pushfl \n\t" 58 49 "pop %1 \n\t" 59 - POPF 50 + "popfl" 60 51 : "=&r" (f0), "=&r" (f1) 61 52 : "ri" (mask)); 62 53 63 54 return !!((f0^f1) & mask); 64 55 } 56 + #endif 65 57 66 58 void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d) 67 59 {
+6 -1
arch/x86/boot/cpuflags.h
··· 15 15 extern struct cpu_features cpu; 16 16 extern u32 cpu_vendor[3]; 17 17 18 - int has_eflag(unsigned long mask); 18 + #ifdef CONFIG_X86_32 19 + bool has_eflag(unsigned long mask); 20 + #else 21 + static inline bool has_eflag(unsigned long mask) { return true; } 22 + #endif 19 23 void get_cpuflags(void); 20 24 void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d); 25 + bool has_cpuflag(int flag); 21 26 22 27 #endif
-1
arch/x86/include/asm/e820/api.h
··· 29 29 extern u64 e820__memblock_alloc_reserved(u64 size, u64 align); 30 30 extern void e820__memblock_setup(void); 31 31 32 - extern void e820__reserve_setup_data(void); 33 32 extern void e820__finish_early_params(void); 34 33 extern void e820__reserve_resources(void); 35 34 extern void e820__reserve_resources_late(void);
-9
arch/x86/include/asm/e820/types.h
··· 35 35 * marking it with the IORES_DESC_SOFT_RESERVED designation. 36 36 */ 37 37 E820_TYPE_SOFT_RESERVED = 0xefffffff, 38 - 39 - /* 40 - * Reserved RAM used by the kernel itself if 41 - * CONFIG_INTEL_TXT=y is enabled, memory of this type 42 - * will be included in the S3 integrity calculation 43 - * and so should not include any memory that the BIOS 44 - * might alter over the S3 transition: 45 - */ 46 - E820_TYPE_RESERVED_KERN = 128, 47 38 }; 48 39 49 40 /*
+8 -21
arch/x86/kernel/bootflag.c
··· 8 8 #include <linux/string.h> 9 9 #include <linux/spinlock.h> 10 10 #include <linux/acpi.h> 11 + #include <linux/bitops.h> 11 12 #include <asm/io.h> 12 13 13 14 #include <linux/mc146818rtc.h> ··· 21 20 22 21 int sbf_port __initdata = -1; /* set via acpi_boot_init() */ 23 22 24 - static int __init parity(u8 v) 25 - { 26 - int x = 0; 27 - int i; 28 - 29 - for (i = 0; i < 8; i++) { 30 - x ^= (v & 1); 31 - v >>= 1; 32 - } 33 - 34 - return x; 35 - } 36 - 37 23 static void __init sbf_write(u8 v) 38 24 { 39 25 unsigned long flags; 40 26 41 27 if (sbf_port != -1) { 42 - v &= ~SBF_PARITY; 43 - if (!parity(v)) 44 - v |= SBF_PARITY; 28 + if (!parity8(v)) 29 + v ^= SBF_PARITY; 45 30 46 31 printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", 47 32 sbf_port, v); ··· 53 66 return v; 54 67 } 55 68 56 - static int __init sbf_value_valid(u8 v) 69 + static bool __init sbf_value_valid(u8 v) 57 70 { 58 71 if (v & SBF_RESERVED) /* Reserved bits */ 59 - return 0; 60 - if (!parity(v)) 61 - return 0; 72 + return false; 73 + if (!parity8(v)) 74 + return false; 62 75 63 - return 1; 76 + return true; 64 77 } 65 78 66 79 static int __init sbf_init(void)
+43 -72
arch/x86/kernel/e820.c
··· 28 28 * the first 128 E820 memory entries in boot_params.e820_table and the remaining 29 29 * (if any) entries of the SETUP_E820_EXT nodes. We use this to: 30 30 * 31 - * - inform the user about the firmware's notion of memory layout 32 - * via /sys/firmware/memmap 33 - * 34 31 * - the hibernation code uses it to generate a kernel-independent CRC32 35 32 * checksum of the physical memory layout of a system. 36 33 * 37 34 * - 'e820_table_kexec': a slightly modified (by the kernel) firmware version 38 35 * passed to us by the bootloader - the major difference between 39 - * e820_table_firmware[] and this one is that, the latter marks the setup_data 40 - * list created by the EFI boot stub as reserved, so that kexec can reuse the 41 - * setup_data information in the second kernel. Besides, e820_table_kexec[] 42 - * might also be modified by the kexec itself to fake a mptable. 36 + * e820_table_firmware[] and this one is that e820_table_kexec[] 37 + * might be modified by the kexec itself to fake an mptable. 43 38 * We use this to: 44 39 * 45 40 * - kexec, which is a bootloader in disguise, uses the original E820 46 41 * layout to pass to the kexec-ed kernel. This way the original kernel 47 42 * can have a restricted E820 map while the kexec()-ed kexec-kernel 48 43 * can have access to full memory - etc. 44 + * 45 + * Export the memory layout via /sys/firmware/memmap. kexec-tools uses 46 + * the entries to create an E820 table for the kexec kernel. 47 + * 48 + * kexec_file_load in-kernel code uses the table for the kexec kernel. 49 49 * 50 50 * - 'e820_table': this is the main E820 table that is massaged by the 51 51 * low level x86 platform code, or modified by boot parameters, before ··· 187 187 static void __init e820_print_type(enum e820_type type) 188 188 { 189 189 switch (type) { 190 - case E820_TYPE_RAM: /* Fall through: */ 191 - case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break; 190 + case E820_TYPE_RAM: pr_cont("usable"); break; 192 191 case E820_TYPE_RESERVED: pr_cont("reserved"); break; 193 192 case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved"); break; 194 193 case E820_TYPE_ACPI: pr_cont("ACPI data"); break; ··· 763 764 764 765 pfn = PFN_DOWN(entry->addr + entry->size); 765 766 766 - if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) 767 + if (entry->type != E820_TYPE_RAM) 767 768 register_nosave_region(PFN_UP(entry->addr), pfn); 768 769 769 770 if (pfn >= limit_pfn) ··· 990 991 early_param("memmap", parse_memmap_opt); 991 992 992 993 /* 993 - * Reserve all entries from the bootloader's extensible data nodes list, 994 - * because if present we are going to use it later on to fetch e820 995 - * entries from it: 996 - */ 997 - void __init e820__reserve_setup_data(void) 998 - { 999 - struct setup_indirect *indirect; 1000 - struct setup_data *data; 1001 - u64 pa_data, pa_next; 1002 - u32 len; 1003 - 1004 - pa_data = boot_params.hdr.setup_data; 1005 - if (!pa_data) 1006 - return; 1007 - 1008 - while (pa_data) { 1009 - data = early_memremap(pa_data, sizeof(*data)); 1010 - if (!data) { 1011 - pr_warn("e820: failed to memremap setup_data entry\n"); 1012 - return; 1013 - } 1014 - 1015 - len = sizeof(*data); 1016 - pa_next = data->next; 1017 - 1018 - e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); 1019 - 1020 - if (data->type == SETUP_INDIRECT) { 1021 - len += data->len; 1022 - early_memunmap(data, sizeof(*data)); 1023 - data = early_memremap(pa_data, len); 1024 - if (!data) { 1025 - pr_warn("e820: failed to memremap indirect setup_data\n"); 1026 - return; 1027 - } 1028 - 1029 - indirect = (struct setup_indirect *)data->data; 1030 - 1031 - if (indirect->type != SETUP_INDIRECT) 1032 - e820__range_update(indirect->addr, indirect->len, 1033 - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); 1034 - } 1035 - 1036 - pa_data = pa_next; 1037 - early_memunmap(data, len); 1038 - } 1039 - 1040 - e820__update_table(e820_table); 1041 - 1042 - pr_info("extended physical RAM map:\n"); 1043 - e820__print_table("reserve setup_data"); 1044 - } 1045 - 1046 - /* 1047 994 * Called after parse_early_param(), after early parameters (such as mem=) 1048 995 * have been processed, in which case we already have an E820 table filled in 1049 996 * via the parameter callback function(s), but it's not sorted and printed yet: ··· 1008 1063 static const char *__init e820_type_to_string(struct e820_entry *entry) 1009 1064 { 1010 1065 switch (entry->type) { 1011 - case E820_TYPE_RESERVED_KERN: /* Fall-through: */ 1012 1066 case E820_TYPE_RAM: return "System RAM"; 1013 1067 case E820_TYPE_ACPI: return "ACPI Tables"; 1014 1068 case E820_TYPE_NVS: return "ACPI Non-volatile Storage"; ··· 1023 1079 static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry) 1024 1080 { 1025 1081 switch (entry->type) { 1026 - case E820_TYPE_RESERVED_KERN: /* Fall-through: */ 1027 1082 case E820_TYPE_RAM: return IORESOURCE_SYSTEM_RAM; 1028 1083 case E820_TYPE_ACPI: /* Fall-through: */ 1029 1084 case E820_TYPE_NVS: /* Fall-through: */ ··· 1044 1101 case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; 1045 1102 case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; 1046 1103 case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED; 1047 - case E820_TYPE_RESERVED_KERN: /* Fall-through: */ 1048 1104 case E820_TYPE_RAM: /* Fall-through: */ 1049 1105 case E820_TYPE_UNUSABLE: /* Fall-through: */ 1050 1106 default: return IORES_DESC_NONE; ··· 1066 1124 case E820_TYPE_PRAM: 1067 1125 case E820_TYPE_PMEM: 1068 1126 return false; 1069 - case E820_TYPE_RESERVED_KERN: 1070 1127 case E820_TYPE_RAM: 1071 1128 case E820_TYPE_ACPI: 1072 1129 case E820_TYPE_NVS: ··· 1117 1176 res++; 1118 1177 } 1119 1178 1120 - /* Expose the bootloader-provided memory layout to the sysfs. */ 1121 - for (i = 0; i < e820_table_firmware->nr_entries; i++) { 1122 - struct e820_entry *entry = e820_table_firmware->entries + i; 1179 + /* Expose the kexec e820 table to the sysfs. */ 1180 + for (i = 0; i < e820_table_kexec->nr_entries; i++) { 1181 + struct e820_entry *entry = e820_table_kexec->entries + i; 1123 1182 1124 1183 firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry)); 1125 1184 } ··· 1243 1302 int i; 1244 1303 u64 end; 1245 1304 1305 + #ifdef CONFIG_MEMORY_HOTPLUG 1306 + /* 1307 + * Memory used by the kernel cannot be hot-removed because Linux 1308 + * cannot migrate the kernel pages. When memory hotplug is 1309 + * enabled, we should prevent memblock from allocating memory 1310 + * for the kernel. 1311 + * 1312 + * ACPI SRAT records all hotpluggable memory ranges. But before 1313 + * SRAT is parsed, we don't know about it. 1314 + * 1315 + * The kernel image is loaded into memory at very early time. We 1316 + * cannot prevent this anyway. So on NUMA system, we set any 1317 + * node the kernel resides in as un-hotpluggable. 1318 + * 1319 + * Since on modern servers, one node could have double-digit 1320 + * gigabytes memory, we can assume the memory around the kernel 1321 + * image is also un-hotpluggable. So before SRAT is parsed, just 1322 + * allocate memory near the kernel image to try the best to keep 1323 + * the kernel away from hotpluggable memory. 1324 + */ 1325 + if (movable_node_is_enabled()) 1326 + memblock_set_bottom_up(true); 1327 + #endif 1328 + 1329 + /* 1330 + * At this point only the first megabyte is mapped for sure, the 1331 + * rest of the memory cannot be used for memblock resizing 1332 + */ 1333 + memblock_set_current_limit(ISA_END_ADDRESS); 1334 + 1246 1335 /* 1247 1336 * The bootstrap memblock region count maximum is 128 entries 1248 1337 * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries ··· 1294 1323 if (entry->type == E820_TYPE_SOFT_RESERVED) 1295 1324 memblock_reserve(entry->addr, entry->size); 1296 1325 1297 - if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) 1326 + if (entry->type != E820_TYPE_RAM) 1298 1327 continue; 1299 1328 1300 1329 memblock_add(entry->addr, entry->size);
+23
arch/x86/kernel/relocate_kernel_64.S
··· 40 40 SYM_DATA(kexec_pa_swap_page, .quad 0) 41 41 SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) 42 42 43 + .balign 16 44 + SYM_DATA_START_LOCAL(kexec_debug_gdt) 45 + .word kexec_debug_gdt_end - kexec_debug_gdt - 1 46 + .long 0 47 + .word 0 48 + .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ 49 + .quad 0x00af9a000000ffff /* __KERNEL_CS */ 50 + .quad 0x00cf92000000ffff /* __KERNEL_DS */ 51 + SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) 52 + 43 53 .section .text..relocate_kernel,"ax"; 44 54 .code64 45 55 SYM_CODE_START_NOALIGN(relocate_kernel) ··· 125 115 126 116 /* store the start address on the stack */ 127 117 pushq %rdx 118 + 119 + /* Create a GDTR (16 bits limit, 64 bits addr) on stack */ 120 + leaq kexec_debug_gdt(%rip), %rax 121 + pushq %rax 122 + pushw (%rax) 123 + 124 + /* Load the GDT, put the stack back */ 125 + lgdt (%rsp) 126 + addq $10, %rsp 127 + 128 + /* Test that we can load segments */ 129 + movq %ds, %rax 130 + movq %rax, %ds 128 131 129 132 /* 130 133 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
+63 -71
arch/x86/kernel/setup.c
··· 495 495 } 496 496 } 497 497 498 + /* 499 + * Translate the fields of 'struct boot_param' into global variables 500 + * representing these parameters. 501 + */ 502 + static void __init parse_boot_params(void) 503 + { 504 + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); 505 + screen_info = boot_params.screen_info; 506 + edid_info = boot_params.edid_info; 507 + #ifdef CONFIG_X86_32 508 + apm_info.bios = boot_params.apm_bios_info; 509 + ist_info = boot_params.ist_info; 510 + #endif 511 + saved_video_mode = boot_params.hdr.vid_mode; 512 + bootloader_type = boot_params.hdr.type_of_loader; 513 + if ((bootloader_type >> 4) == 0xe) { 514 + bootloader_type &= 0xf; 515 + bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4; 516 + } 517 + bootloader_version = bootloader_type & 0xf; 518 + bootloader_version |= boot_params.hdr.ext_loader_ver << 4; 519 + 520 + #ifdef CONFIG_BLK_DEV_RAM 521 + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; 522 + #endif 523 + #ifdef CONFIG_EFI 524 + if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 525 + EFI32_LOADER_SIGNATURE, 4)) { 526 + set_bit(EFI_BOOT, &efi.flags); 527 + } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 528 + EFI64_LOADER_SIGNATURE, 4)) { 529 + set_bit(EFI_BOOT, &efi.flags); 530 + set_bit(EFI_64BIT, &efi.flags); 531 + } 532 + #endif 533 + 534 + if (!boot_params.hdr.root_flags) 535 + root_mountflags &= ~MS_RDONLY; 536 + } 537 + 498 538 static void __init memblock_x86_reserve_range_setup_data(void) 499 539 { 500 540 struct setup_indirect *indirect; ··· 631 591 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) 632 592 request_resource(&ioport_resource, &standard_io_resources[i]); 633 593 594 + } 595 + 596 + static void __init setup_kernel_resources(void) 597 + { 598 + code_resource.start = __pa_symbol(_text); 599 + code_resource.end = __pa_symbol(_etext)-1; 600 + rodata_resource.start = __pa_symbol(__start_rodata); 601 + rodata_resource.end = __pa_symbol(__end_rodata)-1; 602 + data_resource.start = __pa_symbol(_sdata); 603 + data_resource.end = __pa_symbol(_edata)-1; 604 + bss_resource.start = __pa_symbol(__bss_start); 605 + bss_resource.end = __pa_symbol(__bss_stop)-1; 606 + 607 + insert_resource(&iomem_resource, &code_resource); 608 + insert_resource(&iomem_resource, &rodata_resource); 609 + insert_resource(&iomem_resource, &data_resource); 610 + insert_resource(&iomem_resource, &bss_resource); 634 611 } 635 612 636 613 static bool __init snb_gfx_workaround_needed(void) ··· 912 855 913 856 setup_olpc_ofw_pgd(); 914 857 915 - ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); 916 - screen_info = boot_params.screen_info; 917 - edid_info = boot_params.edid_info; 918 - #ifdef CONFIG_X86_32 919 - apm_info.bios = boot_params.apm_bios_info; 920 - ist_info = boot_params.ist_info; 921 - #endif 922 - saved_video_mode = boot_params.hdr.vid_mode; 923 - bootloader_type = boot_params.hdr.type_of_loader; 924 - if ((bootloader_type >> 4) == 0xe) { 925 - bootloader_type &= 0xf; 926 - bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4; 927 - } 928 - bootloader_version = bootloader_type & 0xf; 929 - bootloader_version |= boot_params.hdr.ext_loader_ver << 4; 930 - 931 - #ifdef CONFIG_BLK_DEV_RAM 932 - rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; 933 - #endif 934 - #ifdef CONFIG_EFI 935 - if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 936 - EFI32_LOADER_SIGNATURE, 4)) { 937 - set_bit(EFI_BOOT, &efi.flags); 938 - } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 939 - EFI64_LOADER_SIGNATURE, 4)) { 940 - set_bit(EFI_BOOT, &efi.flags); 941 - set_bit(EFI_64BIT, &efi.flags); 942 - } 943 - #endif 858 + parse_boot_params(); 944 859 945 860 x86_init.oem.arch_setup(); 946 861 ··· 936 907 937 908 copy_edd(); 938 909 939 - if (!boot_params.hdr.root_flags) 940 - root_mountflags &= ~MS_RDONLY; 941 910 setup_initial_init_mm(_text, _etext, _edata, (void *)_brk_end); 942 - 943 - code_resource.start = __pa_symbol(_text); 944 - code_resource.end = __pa_symbol(_etext)-1; 945 - rodata_resource.start = __pa_symbol(__start_rodata); 946 - rodata_resource.end = __pa_symbol(__end_rodata)-1; 947 - data_resource.start = __pa_symbol(_sdata); 948 - data_resource.end = __pa_symbol(_edata)-1; 949 - bss_resource.start = __pa_symbol(__bss_start); 950 - bss_resource.end = __pa_symbol(__bss_stop)-1; 951 911 952 912 /* 953 913 * x86_configure_nx() is called before parse_early_param() to detect ··· 950 932 if (efi_enabled(EFI_BOOT)) 951 933 efi_memblock_x86_reserve_range(); 952 934 953 - #ifdef CONFIG_MEMORY_HOTPLUG 954 - /* 955 - * Memory used by the kernel cannot be hot-removed because Linux 956 - * cannot migrate the kernel pages. When memory hotplug is 957 - * enabled, we should prevent memblock from allocating memory 958 - * for the kernel. 959 - * 960 - * ACPI SRAT records all hotpluggable memory ranges. But before 961 - * SRAT is parsed, we don't know about it. 962 - * 963 - * The kernel image is loaded into memory at very early time. We 964 - * cannot prevent this anyway. So on NUMA system, we set any 965 - * node the kernel resides in as un-hotpluggable. 966 - * 967 - * Since on modern servers, one node could have double-digit 968 - * gigabytes memory, we can assume the memory around the kernel 969 - * image is also un-hotpluggable. So before SRAT is parsed, just 970 - * allocate memory near the kernel image to try the best to keep 971 - * the kernel away from hotpluggable memory. 972 - */ 973 - if (movable_node_is_enabled()) 974 - memblock_set_bottom_up(true); 975 - #endif 976 - 977 935 x86_report_nx(); 978 936 979 937 apic_setup_apic_calls(); ··· 961 967 setup_clear_cpu_cap(X86_FEATURE_APIC); 962 968 } 963 969 964 - e820__reserve_setup_data(); 965 970 e820__finish_early_params(); 966 971 967 972 if (efi_enabled(EFI_BOOT)) ··· 980 987 tsc_early_init(); 981 988 x86_init.resources.probe_roms(); 982 989 983 - /* after parse_early_param, so could debug it */ 984 - insert_resource(&iomem_resource, &code_resource); 985 - insert_resource(&iomem_resource, &rodata_resource); 986 - insert_resource(&iomem_resource, &data_resource); 987 - insert_resource(&iomem_resource, &bss_resource); 990 + /* 991 + * Add resources for kernel text and data to the iomem_resource. 992 + * Do it after parse_early_param, so it can be debugged. 993 + */ 994 + setup_kernel_resources(); 988 995 989 996 e820_add_kernel_range(); 990 997 trim_bios_range(); ··· 1049 1056 1050 1057 cleanup_highmap(); 1051 1058 1052 - memblock_set_current_limit(ISA_END_ADDRESS); 1053 1059 e820__memblock_setup(); 1054 1060 1055 1061 /*
+1 -2
arch/x86/kernel/tboot.c
··· 200 200 tboot->num_mac_regions = 0; 201 201 202 202 for (i = 0; i < e820_table->nr_entries; i++) { 203 - if ((e820_table->entries[i].type != E820_TYPE_RAM) 204 - && (e820_table->entries[i].type != E820_TYPE_RESERVED_KERN)) 203 + if (e820_table->entries[i].type != E820_TYPE_RAM) 205 204 continue; 206 205 207 206 add_mac_region(e820_table->entries[i].addr, e820_table->entries[i].size);
-8
arch/x86/mm/init_64.c
··· 469 469 !e820__mapped_any(paddr & PAGE_MASK, paddr_next, 470 470 E820_TYPE_RAM) && 471 471 !e820__mapped_any(paddr & PAGE_MASK, paddr_next, 472 - E820_TYPE_RESERVED_KERN) && 473 - !e820__mapped_any(paddr & PAGE_MASK, paddr_next, 474 472 E820_TYPE_ACPI)) 475 473 set_pte_init(pte, __pte(0), init); 476 474 continue; ··· 523 525 if (!after_bootmem && 524 526 !e820__mapped_any(paddr & PMD_MASK, paddr_next, 525 527 E820_TYPE_RAM) && 526 - !e820__mapped_any(paddr & PMD_MASK, paddr_next, 527 - E820_TYPE_RESERVED_KERN) && 528 528 !e820__mapped_any(paddr & PMD_MASK, paddr_next, 529 529 E820_TYPE_ACPI)) 530 530 set_pmd_init(pmd, __pmd(0), init); ··· 611 615 !e820__mapped_any(paddr & PUD_MASK, paddr_next, 612 616 E820_TYPE_RAM) && 613 617 !e820__mapped_any(paddr & PUD_MASK, paddr_next, 614 - E820_TYPE_RESERVED_KERN) && 615 - !e820__mapped_any(paddr & PUD_MASK, paddr_next, 616 618 E820_TYPE_ACPI)) 617 619 set_pud_init(pud, __pud(0), init); 618 620 continue; ··· 697 703 if (!after_bootmem && 698 704 !e820__mapped_any(paddr & P4D_MASK, paddr_next, 699 705 E820_TYPE_RAM) && 700 - !e820__mapped_any(paddr & P4D_MASK, paddr_next, 701 - E820_TYPE_RESERVED_KERN) && 702 706 !e820__mapped_any(paddr & P4D_MASK, paddr_next, 703 707 E820_TYPE_ACPI)) 704 708 set_p4d_init(p4d, __p4d(0), init);
-1
arch/x86/virt/svm/sev.c
··· 198 198 pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa); 199 199 e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); 200 200 e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); 201 - e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); 202 201 if (!memblock_is_region_reserved(pa, PMD_SIZE)) 203 202 memblock_reserve(pa, PMD_SIZE); 204 203 }