Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86/percpu/64: Use relative percpu offsets

The percpu section is currently linked at absolute address 0, because
older compilers hard-coded the stack protector canary value at a fixed
offset from the start of the GS segment. Now that the canary is a
normal percpu variable, the percpu section does not need to be linked
at a specific address.

x86-64 will now calculate the percpu offsets as the delta between the
initial percpu address and the dynamically allocated memory, like other
architectures. Note that GSBASE is limited to the canonical address
width (48 or 57 bits, sign-extended). As long as the kernel text,
modules, and the dynamically allocated percpu memory are all in the
negative address space, the delta will not overflow this limit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20250123190747.745588-9-brgerst@gmail.com

authored by

Brian Gerst and committed by
Ingo Molnar
9d7de2aa 80d47def

+27 -65
+5 -1
arch/x86/include/asm/processor.h
··· 431 431 432 432 static inline unsigned long cpu_kernelmode_gs_base(int cpu) 433 433 { 434 - return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); 434 + #ifdef CONFIG_SMP 435 + return per_cpu_offset(cpu); 436 + #else 437 + return 0; 438 + #endif 435 439 } 436 440 437 441 extern asmlinkage void entry_SYSCALL32_ignore(void);
+9 -10
arch/x86/kernel/head_64.S
··· 61 61 /* Set up the stack for verify_cpu() */ 62 62 leaq __top_init_kernel_stack(%rip), %rsp 63 63 64 - /* Setup GSBASE to allow stack canary access for C code */ 64 + /* 65 + * Set up GSBASE. 66 + * Note that on SMP the boot CPU uses the init data section until 67 + * the per-CPU areas are set up. 68 + */ 65 69 movl $MSR_GS_BASE, %ecx 66 - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx 67 - movl %edx, %eax 68 - shrq $32, %rdx 70 + xorl %eax, %eax 71 + xorl %edx, %edx 69 72 wrmsr 70 73 71 74 call startup_64_setup_gdt_idt ··· 362 359 movl %eax,%fs 363 360 movl %eax,%gs 364 361 365 - /* Set up %gs. 366 - * 367 - * The base of %gs always points to fixed_percpu_data. 362 + /* 363 + * Set up GSBASE. 368 364 * Note that, on SMP, the boot cpu uses init data section until 369 365 * the per cpu areas are set up. 370 366 */ 371 367 movl $MSR_GS_BASE,%ecx 372 - #ifndef CONFIG_SMP 373 - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx 374 - #endif 375 368 movl %edx, %eax 376 369 shrq $32, %rdx 377 370 wrmsr
+2 -10
arch/x86/kernel/setup_percpu.c
··· 23 23 #include <asm/cpumask.h> 24 24 #include <asm/cpu.h> 25 25 26 - #ifdef CONFIG_X86_64 27 - #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) 28 - #else 29 - #define BOOT_PERCPU_OFFSET 0 30 - #endif 31 - 32 - DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; 26 + DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); 33 27 EXPORT_PER_CPU_SYMBOL(this_cpu_off); 34 28 35 - unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = { 36 - [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, 37 - }; 29 + unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init; 38 30 EXPORT_SYMBOL(__per_cpu_offset); 39 31 40 32 /*
+1 -28
arch/x86/kernel/vmlinux.lds.S
··· 112 112 PHDRS { 113 113 text PT_LOAD FLAGS(5); /* R_E */ 114 114 data PT_LOAD FLAGS(6); /* RW_ */ 115 - #ifdef CONFIG_X86_64 116 - #ifdef CONFIG_SMP 117 - percpu PT_LOAD FLAGS(6); /* RW_ */ 118 - #endif 119 - init PT_LOAD FLAGS(7); /* RWE */ 120 - #endif 121 115 note PT_NOTE FLAGS(0); /* ___ */ 122 116 } 123 117 ··· 210 216 __init_begin = .; /* paired with __init_end */ 211 217 } 212 218 213 - #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) 214 - /* 215 - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the 216 - * output PHDR, so the next output section - .init.text - should 217 - * start another segment - init. 218 - */ 219 - PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) 220 - ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START, 221 - "per-CPU data too large - increase CONFIG_PHYSICAL_START") 222 - #endif 223 - 224 219 INIT_TEXT_SECTION(PAGE_SIZE) 225 - #ifdef CONFIG_X86_64 226 - :init 227 - #endif 228 220 229 221 /* 230 222 * Section for code used exclusively before alternatives are run. All ··· 327 347 EXIT_DATA 328 348 } 329 349 330 - #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) 331 350 PERCPU_SECTION(INTERNODE_CACHE_BYTES) 332 - #endif 333 351 334 352 RUNTIME_CONST_VARIABLES 335 353 RUNTIME_CONST(ptr, USER_PTR_MAX) ··· 475 497 * Per-cpu symbols which need to be offset from __per_cpu_load 476 498 * for the boot processor. 477 499 */ 478 - #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load 500 + #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) 479 501 INIT_PER_CPU(gdt_page); 480 502 INIT_PER_CPU(fixed_percpu_data); 481 503 INIT_PER_CPU(irq_stack_backing_store); 482 - 483 - #ifdef CONFIG_SMP 484 - . = ASSERT((fixed_percpu_data == 0), 485 - "fixed_percpu_data is not at start of per-cpu area"); 486 - #endif 487 504 488 505 #ifdef CONFIG_MITIGATION_UNRET_ENTRY 489 506 . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
+2 -3
arch/x86/platform/pvh/head.S
··· 179 179 * the per-CPU areas are set up. 180 180 */ 181 181 movl $MSR_GS_BASE,%ecx 182 - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx 183 - movq %edx, %eax 184 - shrq $32, %rdx 182 + xorl %eax, %eax 183 + xorl %edx, %edx 185 184 wrmsr 186 185 187 186 /* Call xen_prepare_pvh() via the kernel virtual mapping */
+3 -7
arch/x86/tools/relocs.c
··· 835 835 */ 836 836 static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) 837 837 { 838 - int shndx = sym_index(sym); 839 - 840 - return (shndx == per_cpu_shndx) && 841 - strcmp(symname, "__init_begin") && 842 - strcmp(symname, "__per_cpu_load") && 843 - strncmp(symname, "init_per_cpu_", 13); 838 + return 0; 844 839 } 845 840 846 841 ··· 1057 1062 1058 1063 static void sort_relocs(struct relocs *r) 1059 1064 { 1060 - qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); 1065 + if (r->count) 1066 + qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); 1061 1067 } 1062 1068 1063 1069 static int write32(uint32_t v, FILE *f)
+4 -5
arch/x86/xen/xen-head.S
··· 31 31 32 32 leaq __top_init_kernel_stack(%rip), %rsp 33 33 34 - /* Set up %gs. 35 - * 36 - * The base of %gs always points to fixed_percpu_data. 34 + /* 35 + * Set up GSBASE. 37 36 * Note that, on SMP, the boot cpu uses init data section until 38 37 * the per cpu areas are set up. 39 38 */ 40 39 movl $MSR_GS_BASE,%ecx 41 - movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax 42 - cdq 40 + xorl %eax, %eax 41 + xorl %edx, %edx 43 42 wrmsr 44 43 45 44 mov %rsi, %rdi
+1 -1
init/Kconfig
··· 1872 1872 config KALLSYMS_ABSOLUTE_PERCPU 1873 1873 bool 1874 1874 depends on KALLSYMS 1875 - default X86_64 && SMP 1875 + default n 1876 1876 1877 1877 # end of the "standard kernel features (expert users)" menu 1878 1878