Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86/vdso: Switch to generic storage implementation

The generic storage implementation provides the same features as the
custom one. However it can be shared between architectures, making
maintenance easier.

This switch also moves the random state data out of the time data page.
The currently used hardcoded __VDSO_RND_DATA_OFFSET does not take into
account changes to the time data page layout.

Co-developed-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-15-13a4669dfc8c@linutronix.de

authored by

Thomas Weißschuh and committed by
Thomas Gleixner
dafde296 223970df

+19 -178
+1
arch/x86/Kconfig
··· 178 178 select GENERIC_SMP_IDLE_THREAD 179 179 select GENERIC_TIME_VSYSCALL 180 180 select GENERIC_GETTIMEOFDAY 181 + select GENERIC_VDSO_DATA_STORE 181 182 select GENERIC_VDSO_TIME_NS 182 183 select GENERIC_VDSO_OVERFLOW_PROTECT 183 184 select GUP_GET_PXX_LOW_HIGH if X86_PAE
+7 -7
arch/x86/entry/vdso/vdso-layout.lds.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 #include <asm/vdso.h> 3 3 #include <asm/vdso/vsyscall.h> 4 + #include <vdso/datapage.h> 4 5 5 6 /* 6 7 * Linker script for vDSO. This is an ELF shared object prelinked to ··· 18 17 * segment. 19 18 */ 20 19 21 - vvar_start = . - __VVAR_PAGES * PAGE_SIZE; 22 - vvar_page = vvar_start; 20 + VDSO_VVAR_SYMS 23 21 24 - vdso_rng_data = vvar_page + __VDSO_RND_DATA_OFFSET; 25 - 26 - timens_page = vvar_start + PAGE_SIZE; 27 - 28 - vclock_pages = VDSO_VCLOCK_PAGES_START(vvar_start); 22 + vclock_pages = VDSO_VCLOCK_PAGES_START(vdso_u_data); 29 23 pvclock_page = vclock_pages + VDSO_PAGE_PVCLOCK_OFFSET * PAGE_SIZE; 30 24 hvclock_page = vclock_pages + VDSO_PAGE_HVCLOCK_OFFSET * PAGE_SIZE; 25 + 26 + /* For compatibility with vdso2c */ 27 + vvar_page = vdso_u_data; 28 + vvar_start = vdso_u_data; 31 29 32 30 . = SIZEOF_HEADERS; 33 31
+6 -117
arch/x86/entry/vdso/vma.c
··· 14 14 #include <linux/elf.h> 15 15 #include <linux/cpu.h> 16 16 #include <linux/ptrace.h> 17 - #include <linux/time_namespace.h> 17 + #include <linux/vdso_datastore.h> 18 18 19 19 #include <asm/pvclock.h> 20 20 #include <asm/vgtod.h> ··· 27 27 #include <asm/vdso/vsyscall.h> 28 28 #include <clocksource/hyperv_timer.h> 29 29 30 - struct vdso_data *arch_get_vdso_data(void *vvar_page) 31 - { 32 - return (struct vdso_data *)vvar_page; 33 - } 34 - 35 - static union vdso_data_store vdso_data_store __page_aligned_data; 36 - struct vdso_data *vdso_data = vdso_data_store.data; 30 + static_assert(VDSO_NR_PAGES + VDSO_NR_VCLOCK_PAGES == __VDSO_PAGES); 37 31 38 32 unsigned int vclocks_used __read_mostly; 39 33 ··· 48 54 return 0; 49 55 } 50 56 51 - static const struct vm_special_mapping vvar_mapping; 52 57 struct linux_binprm; 53 58 54 59 static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, ··· 91 98 return 0; 92 99 } 93 100 94 - #ifdef CONFIG_TIME_NS 95 - /* 96 - * The vvar page layout depends on whether a task belongs to the root or 97 - * non-root time namespace. Whenever a task changes its namespace, the VVAR 98 - * page tables are cleared and then they will re-faulted with a 99 - * corresponding layout. 100 - * See also the comment near timens_setup_vdso_data() for details. 101 - */ 102 - int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) 103 - { 104 - struct mm_struct *mm = task->mm; 105 - struct vm_area_struct *vma; 106 - VMA_ITERATOR(vmi, mm, 0); 107 - 108 - mmap_read_lock(mm); 109 - for_each_vma(vmi, vma) { 110 - if (vma_is_special_mapping(vma, &vvar_mapping)) 111 - zap_vma_pages(vma); 112 - } 113 - mmap_read_unlock(mm); 114 - 115 - return 0; 116 - } 117 - #endif 118 - 119 - static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, 120 - struct vm_area_struct *vma, struct vm_fault *vmf) 121 - { 122 - const struct vdso_image *image = vma->vm_mm->context.vdso_image; 123 - unsigned long pfn; 124 - long sym_offset; 125 - 126 - if (!image) 127 - return VM_FAULT_SIGBUS; 128 - 129 - sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + 130 - image->sym_vvar_start; 131 - 132 - /* 133 - * Sanity check: a symbol offset of zero means that the page 134 - * does not exist for this vdso image, not that the page is at 135 - * offset zero relative to the text mapping. This should be 136 - * impossible here, because sym_offset should only be zero for 137 - * the page past the end of the vvar mapping. 138 - */ 139 - if (sym_offset == 0) 140 - return VM_FAULT_SIGBUS; 141 - 142 - if (sym_offset == image->sym_vvar_page) { 143 - struct page *timens_page = find_timens_vvar_page(vma); 144 - 145 - pfn = __pa_symbol(vdso_data) >> PAGE_SHIFT; 146 - 147 - /* 148 - * If a task belongs to a time namespace then a namespace 149 - * specific VVAR is mapped with the sym_vvar_page offset and 150 - * the real VVAR page is mapped with the sym_timens_page 151 - * offset. 152 - * See also the comment near timens_setup_vdso_data(). 153 - */ 154 - if (timens_page) { 155 - unsigned long addr; 156 - vm_fault_t err; 157 - 158 - /* 159 - * Optimization: inside time namespace pre-fault 160 - * VVAR page too. As on timens page there are only 161 - * offsets for clocks on VVAR, it'll be faulted 162 - * shortly by VDSO code. 163 - */ 164 - addr = vmf->address + (image->sym_timens_page - sym_offset); 165 - err = vmf_insert_pfn(vma, addr, pfn); 166 - if (unlikely(err & VM_FAULT_ERROR)) 167 - return err; 168 - 169 - pfn = page_to_pfn(timens_page); 170 - } 171 - 172 - return vmf_insert_pfn(vma, vmf->address, pfn); 173 - 174 - } else if (sym_offset == image->sym_timens_page) { 175 - struct page *timens_page = find_timens_vvar_page(vma); 176 - 177 - if (!timens_page) 178 - return VM_FAULT_SIGBUS; 179 - 180 - pfn = __pa_symbol(vdso_data) >> PAGE_SHIFT; 181 - return vmf_insert_pfn(vma, vmf->address, pfn); 182 - } 183 - 184 - return VM_FAULT_SIGBUS; 185 - } 186 - 187 101 static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm, 188 102 struct vm_area_struct *vma, struct vm_fault *vmf) 189 103 { ··· 112 212 case VDSO_PAGE_HVCLOCK_OFFSET: 113 213 { 114 214 unsigned long pfn = hv_get_tsc_pfn(); 115 - 116 215 if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) 117 216 return vmf_insert_pfn(vma, vmf->address, pfn); 118 217 break; ··· 126 227 .name = "[vdso]", 127 228 .fault = vdso_fault, 128 229 .mremap = vdso_mremap, 129 - }; 130 - static const struct vm_special_mapping vvar_mapping = { 131 - .name = "[vvar]", 132 - .fault = vvar_fault, 133 230 }; 134 231 static const struct vm_special_mapping vvar_vclock_mapping = { 135 232 .name = "[vvar_vclock]", ··· 148 253 return -EINTR; 149 254 150 255 addr = get_unmapped_area(NULL, addr, 151 - image->size - image->sym_vvar_start, 0, 0); 256 + image->size + __VDSO_PAGES * PAGE_SIZE, 0, 0); 152 257 if (IS_ERR_VALUE(addr)) { 153 258 ret = addr; 154 259 goto up_fail; 155 260 } 156 261 157 - text_start = addr - image->sym_vvar_start; 262 + text_start = addr + __VDSO_PAGES * PAGE_SIZE; 158 263 159 264 /* 160 265 * MAYWRITE to allow gdb to COW and set breakpoints ··· 171 276 goto up_fail; 172 277 } 173 278 174 - vma = _install_special_mapping(mm, 175 - addr, 176 - (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE, 177 - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| 178 - VM_PFNMAP, 179 - &vvar_mapping); 180 - 279 + vma = vdso_install_vvar_mapping(mm, addr); 181 280 if (IS_ERR(vma)) { 182 281 ret = PTR_ERR(vma); 183 282 do_munmap(mm, text_start, image->size, NULL); ··· 216 327 */ 217 328 for_each_vma(vmi, vma) { 218 329 if (vma_is_special_mapping(vma, &vdso_mapping) || 219 - vma_is_special_mapping(vma, &vvar_mapping) || 330 + vma_is_special_mapping(vma, &vdso_vvar_mapping) || 220 331 vma_is_special_mapping(vma, &vvar_vclock_mapping)) { 221 332 mmap_write_unlock(mm); 222 333 return -EEXIST;
-10
arch/x86/include/asm/vdso/getrandom.h
··· 27 27 return ret; 28 28 } 29 29 30 - extern struct vdso_rng_data vdso_rng_data 31 - __attribute__((visibility("hidden"))); 32 - 33 - static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) 34 - { 35 - if (IS_ENABLED(CONFIG_TIME_NS) && __arch_get_vdso_data()->clock_mode == VDSO_CLOCKMODE_TIMENS) 36 - return (void *)&vdso_rng_data + ((void *)&timens_page - (void *)__arch_get_vdso_data()); 37 - return &vdso_rng_data; 38 - } 39 - 40 30 #endif /* !__ASSEMBLY__ */ 41 31 42 32 #endif /* __ASM_VDSO_GETRANDOM_H */
+3 -22
arch/x86/include/asm/vdso/gettimeofday.h
··· 19 19 #include <asm/pvclock.h> 20 20 #include <clocksource/hyperv_timer.h> 21 21 22 - extern struct vdso_data vvar_page 23 - __attribute__((visibility("hidden"))); 24 - 25 - extern struct vdso_data timens_page 26 - __attribute__((visibility("hidden"))); 27 - 28 22 #define VDSO_HAS_TIME 1 29 23 30 24 #define VDSO_HAS_CLOCK_GETRES 1 ··· 51 57 #ifdef CONFIG_HYPERV_TIMER 52 58 extern struct ms_hyperv_tsc_page hvclock_page 53 59 __attribute__((visibility("hidden"))); 54 - #endif 55 - 56 - #ifdef CONFIG_TIME_NS 57 - static __always_inline 58 - const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) 59 - { 60 - return &timens_page; 61 - } 62 60 #endif 63 61 64 62 #ifndef BUILD_VDSO32 ··· 236 250 #endif 237 251 238 252 static inline u64 __arch_get_hw_counter(s32 clock_mode, 239 - const struct vdso_data *vd) 253 + const struct vdso_time_data *vd) 240 254 { 241 255 if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) 242 256 return (u64)rdtsc_ordered() & S64_MAX; ··· 261 275 return U64_MAX; 262 276 } 263 277 264 - static __always_inline const struct vdso_data *__arch_get_vdso_data(void) 265 - { 266 - return &vvar_page; 267 - } 268 - 269 - static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) 278 + static inline bool arch_vdso_clocksource_ok(const struct vdso_time_data *vd) 270 279 { 271 280 return true; 272 281 } ··· 300 319 * declares everything with the MSB/Sign-bit set as invalid. Therefore the 301 320 * effective mask is S64_MAX. 302 321 */ 303 - static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) 322 + static __always_inline u64 vdso_calc_ns(const struct vdso_time_data *vd, u64 cycles, u64 base) 304 323 { 305 324 u64 delta = cycles - vd->cycle_last; 306 325
+2 -22
arch/x86/include/asm/vdso/vsyscall.h
··· 2 2 #ifndef __ASM_VDSO_VSYSCALL_H 3 3 #define __ASM_VDSO_VSYSCALL_H 4 4 5 - #define __VDSO_RND_DATA_OFFSET 640 6 - #define __VVAR_PAGES 4 5 + #define __VDSO_PAGES 6 7 6 8 7 #define VDSO_NR_VCLOCK_PAGES 2 9 - #define VDSO_VCLOCK_PAGES_START(_b) ((_b) + (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE) 8 + #define VDSO_VCLOCK_PAGES_START(_b) ((_b) + (__VDSO_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE) 10 9 #define VDSO_PAGE_PVCLOCK_OFFSET 0 11 10 #define VDSO_PAGE_HVCLOCK_OFFSET 1 12 11 ··· 13 14 14 15 #include <vdso/datapage.h> 15 16 #include <asm/vgtod.h> 16 - 17 - extern struct vdso_data *vdso_data; 18 - 19 - /* 20 - * Update the vDSO data page to keep in sync with kernel timekeeping. 21 - */ 22 - static __always_inline 23 - struct vdso_data *__x86_get_k_vdso_data(void) 24 - { 25 - return vdso_data; 26 - } 27 - #define __arch_get_k_vdso_data __x86_get_k_vdso_data 28 - 29 - static __always_inline 30 - struct vdso_rng_data *__x86_get_k_vdso_rng_data(void) 31 - { 32 - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; 33 - } 34 - #define __arch_get_k_vdso_rng_data __x86_get_k_vdso_rng_data 35 17 36 18 /* The asm-generic header needs to be included after the definitions above */ 37 19 #include <asm-generic/vdso/vsyscall.h>