Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

vdso/timens: Move functions to new file

As a preparation of the untangling of time namespaces and the vDSO, move
the glue functions between those subsystems into a new file.

While at it, switch the mutex lock and mmap_read_lock() in the vDSO
namespace code to guard().

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260326-vdso-timens-decoupling-v2-1-c82693a7775f@linutronix.de

authored by

Thomas Weißschuh and committed by
Thomas Gleixner
5dc9cf83 bed0053a

+166 -154
+2
MAINTAINERS
··· 10768 10768 T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso 10769 10769 F: include/asm-generic/vdso/vsyscall.h 10770 10770 F: include/vdso/ 10771 + F: kernel/time/namespace_vdso.c 10771 10772 F: kernel/time/vsyscall.c 10772 10773 F: lib/vdso/ 10773 10774 F: tools/testing/selftests/vDSO/ ··· 21001 21000 F: kernel/time/itimer.c 21002 21001 F: kernel/time/posix-* 21003 21002 F: kernel/time/namespace.c 21003 + F: kernel/time/namespace_vdso.c 21004 21004 21005 21005 POWER MANAGEMENT CORE 21006 21006 M: "Rafael J. Wysocki" <rafael@kernel.org>
-8
include/linux/time_namespace.h
··· 38 38 return container_of(ns, struct time_namespace, ns); 39 39 } 40 40 void __init time_ns_init(void); 41 - extern int vdso_join_timens(struct task_struct *task, 42 - struct time_namespace *ns); 43 41 extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); 44 42 45 43 static inline struct time_namespace *get_time_ns(struct time_namespace *ns) ··· 113 115 #else 114 116 static inline void __init time_ns_init(void) 115 117 { 116 - } 117 - 118 - static inline int vdso_join_timens(struct task_struct *task, 119 - struct time_namespace *ns) 120 - { 121 - return 0; 122 118 } 123 119 124 120 static inline void timens_commit(struct task_struct *tsk,
+1 -1
kernel/time/Makefile
··· 29 29 obj-$(CONFIG_GENERIC_GETTIMEOFDAY) += vsyscall.o 30 30 obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o 31 31 obj-$(CONFIG_TEST_UDELAY) += test_udelay.o 32 - obj-$(CONFIG_TIME_NS) += namespace.o 32 + obj-$(CONFIG_TIME_NS) += namespace.o namespace_vdso.o 33 33 obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) += clocksource-wdtest.o 34 34 obj-$(CONFIG_TIME_KUNIT_TEST) += time_test.o
+4 -120
kernel/time/namespace.c
··· 19 19 #include <linux/err.h> 20 20 #include <linux/mm.h> 21 21 22 - #include <vdso/datapage.h> 22 + #include "namespace_internal.h" 23 23 24 24 ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, 25 25 struct timens_offsets *ns_offsets) ··· 138 138 return clone_time_ns(user_ns, old_ns); 139 139 } 140 140 141 - static struct timens_offset offset_from_ts(struct timespec64 off) 142 - { 143 - struct timens_offset ret; 144 - 145 - ret.sec = off.tv_sec; 146 - ret.nsec = off.tv_nsec; 147 - 148 - return ret; 149 - } 150 - 151 - /* 152 - * A time namespace VVAR page has the same layout as the VVAR page which 153 - * contains the system wide VDSO data. 154 - * 155 - * For a normal task the VVAR pages are installed in the normal ordering: 156 - * VVAR 157 - * PVCLOCK 158 - * HVCLOCK 159 - * TIMENS <- Not really required 160 - * 161 - * Now for a timens task the pages are installed in the following order: 162 - * TIMENS 163 - * PVCLOCK 164 - * HVCLOCK 165 - * VVAR 166 - * 167 - * The check for vdso_clock->clock_mode is in the unlikely path of 168 - * the seq begin magic. So for the non-timens case most of the time 169 - * 'seq' is even, so the branch is not taken. 170 - * 171 - * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check 172 - * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the 173 - * update to finish and for 'seq' to become even anyway. 174 - * 175 - * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which 176 - * enforces the time namespace handling path. 177 - */ 178 - static void timens_setup_vdso_clock_data(struct vdso_clock *vc, 179 - struct time_namespace *ns) 180 - { 181 - struct timens_offset *offset = vc->offset; 182 - struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); 183 - struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); 184 - 185 - vc->seq = 1; 186 - vc->clock_mode = VDSO_CLOCKMODE_TIMENS; 187 - offset[CLOCK_MONOTONIC] = monotonic; 188 - offset[CLOCK_MONOTONIC_RAW] = monotonic; 189 - offset[CLOCK_MONOTONIC_COARSE] = monotonic; 190 - offset[CLOCK_BOOTTIME] = boottime; 191 - offset[CLOCK_BOOTTIME_ALARM] = boottime; 192 - } 193 - 194 - struct page *find_timens_vvar_page(struct vm_area_struct *vma) 195 - { 196 - if (likely(vma->vm_mm == current->mm)) 197 - return current->nsproxy->time_ns->vvar_page; 198 - 199 - /* 200 - * VM_PFNMAP | VM_IO protect .fault() handler from being called 201 - * through interfaces like /proc/$pid/mem or 202 - * process_vm_{readv,writev}() as long as there's no .access() 203 - * in special_mapping_vmops(). 204 - * For more details check_vma_flags() and __access_remote_vm() 205 - */ 206 - 207 - WARN(1, "vvar_page accessed remotely"); 208 - 209 - return NULL; 210 - } 211 - 212 - /* 213 - * Protects possibly multiple offsets writers racing each other 214 - * and tasks entering the namespace. 215 - */ 216 - static DEFINE_MUTEX(offset_lock); 217 - 218 - static void timens_set_vvar_page(struct task_struct *task, 219 - struct time_namespace *ns) 220 - { 221 - struct vdso_time_data *vdata; 222 - struct vdso_clock *vc; 223 - unsigned int i; 224 - 225 - if (ns == &init_time_ns) 226 - return; 227 - 228 - /* Fast-path, taken by every task in namespace except the first. */ 229 - if (likely(ns->frozen_offsets)) 230 - return; 231 - 232 - mutex_lock(&offset_lock); 233 - /* Nothing to-do: vvar_page has been already initialized. */ 234 - if (ns->frozen_offsets) 235 - goto out; 236 - 237 - ns->frozen_offsets = true; 238 - vdata = page_address(ns->vvar_page); 239 - vc = vdata->clock_data; 240 - 241 - for (i = 0; i < CS_BASES; i++) 242 - timens_setup_vdso_clock_data(&vc[i], ns); 243 - 244 - if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) { 245 - for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++) 246 - timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns); 247 - } 248 - 249 - out: 250 - mutex_unlock(&offset_lock); 251 - } 141 + DEFINE_MUTEX(timens_offset_lock); 252 142 253 143 void free_time_ns(struct time_namespace *ns) 254 144 { ··· 186 296 static void timens_put(struct ns_common *ns) 187 297 { 188 298 put_time_ns(to_time_ns(ns)); 189 - } 190 - 191 - void timens_commit(struct task_struct *tsk, struct time_namespace *ns) 192 - { 193 - timens_set_vvar_page(tsk, ns); 194 - vdso_join_timens(tsk, ns); 195 299 } 196 300 197 301 static int timens_install(struct nsset *nsset, struct ns_common *new) ··· 312 428 goto out; 313 429 } 314 430 315 - mutex_lock(&offset_lock); 431 + mutex_lock(&timens_offset_lock); 316 432 if (time_ns->frozen_offsets) { 317 433 err = -EACCES; 318 434 goto out_unlock; ··· 337 453 } 338 454 339 455 out_unlock: 340 - mutex_unlock(&offset_lock); 456 + mutex_unlock(&timens_offset_lock); 341 457 out: 342 458 put_time_ns(time_ns); 343 459
+13
kernel/time/namespace_internal.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _TIME_NAMESPACE_INTERNAL_H 3 + #define _TIME_NAMESPACE_INTERNAL_H 4 + 5 + #include <linux/mutex.h> 6 + 7 + /* 8 + * Protects possibly multiple offsets writers racing each other 9 + * and tasks entering the namespace. 10 + */ 11 + extern struct mutex timens_offset_lock; 12 + 13 + #endif /* _TIME_NAMESPACE_INTERNAL_H */
+146
kernel/time/namespace_vdso.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Author: Andrei Vagin <avagin@openvz.org> 4 + * Author: Dmitry Safonov <dima@arista.com> 5 + */ 6 + 7 + #include <linux/cleanup.h> 8 + #include <linux/mm.h> 9 + #include <linux/time_namespace.h> 10 + #include <linux/time.h> 11 + #include <linux/vdso_datastore.h> 12 + 13 + #include <vdso/clocksource.h> 14 + #include <vdso/datapage.h> 15 + 16 + #include "namespace_internal.h" 17 + 18 + static struct timens_offset offset_from_ts(struct timespec64 off) 19 + { 20 + struct timens_offset ret; 21 + 22 + ret.sec = off.tv_sec; 23 + ret.nsec = off.tv_nsec; 24 + 25 + return ret; 26 + } 27 + 28 + /* 29 + * A time namespace VVAR page has the same layout as the VVAR page which 30 + * contains the system wide VDSO data. 31 + * 32 + * For a normal task the VVAR pages are installed in the normal ordering: 33 + * VVAR 34 + * PVCLOCK 35 + * HVCLOCK 36 + * TIMENS <- Not really required 37 + * 38 + * Now for a timens task the pages are installed in the following order: 39 + * TIMENS 40 + * PVCLOCK 41 + * HVCLOCK 42 + * VVAR 43 + * 44 + * The check for vdso_clock->clock_mode is in the unlikely path of 45 + * the seq begin magic. So for the non-timens case most of the time 46 + * 'seq' is even, so the branch is not taken. 47 + * 48 + * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check 49 + * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the 50 + * update to finish and for 'seq' to become even anyway. 51 + * 52 + * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which 53 + * enforces the time namespace handling path. 54 + */ 55 + static void timens_setup_vdso_clock_data(struct vdso_clock *vc, 56 + struct time_namespace *ns) 57 + { 58 + struct timens_offset *offset = vc->offset; 59 + struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); 60 + struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); 61 + 62 + vc->seq = 1; 63 + vc->clock_mode = VDSO_CLOCKMODE_TIMENS; 64 + offset[CLOCK_MONOTONIC] = monotonic; 65 + offset[CLOCK_MONOTONIC_RAW] = monotonic; 66 + offset[CLOCK_MONOTONIC_COARSE] = monotonic; 67 + offset[CLOCK_BOOTTIME] = boottime; 68 + offset[CLOCK_BOOTTIME_ALARM] = boottime; 69 + } 70 + 71 + struct page *find_timens_vvar_page(struct vm_area_struct *vma) 72 + { 73 + if (likely(vma->vm_mm == current->mm)) 74 + return current->nsproxy->time_ns->vvar_page; 75 + 76 + /* 77 + * VM_PFNMAP | VM_IO protect .fault() handler from being called 78 + * through interfaces like /proc/$pid/mem or 79 + * process_vm_{readv,writev}() as long as there's no .access() 80 + * in special_mapping_vmops(). 81 + * For more details check_vma_flags() and __access_remote_vm() 82 + */ 83 + 84 + WARN(1, "vvar_page accessed remotely"); 85 + 86 + return NULL; 87 + } 88 + 89 + static void timens_set_vvar_page(struct task_struct *task, 90 + struct time_namespace *ns) 91 + { 92 + struct vdso_time_data *vdata; 93 + struct vdso_clock *vc; 94 + unsigned int i; 95 + 96 + if (ns == &init_time_ns) 97 + return; 98 + 99 + /* Fast-path, taken by every task in namespace except the first. */ 100 + if (likely(ns->frozen_offsets)) 101 + return; 102 + 103 + guard(mutex)(&timens_offset_lock); 104 + /* Nothing to-do: vvar_page has been already initialized. */ 105 + if (ns->frozen_offsets) 106 + return; 107 + 108 + ns->frozen_offsets = true; 109 + vdata = page_address(ns->vvar_page); 110 + vc = vdata->clock_data; 111 + 112 + for (i = 0; i < CS_BASES; i++) 113 + timens_setup_vdso_clock_data(&vc[i], ns); 114 + 115 + if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) { 116 + for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++) 117 + timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns); 118 + } 119 + } 120 + 121 + /* 122 + * The vvar page layout depends on whether a task belongs to the root or 123 + * non-root time namespace. Whenever a task changes its namespace, the VVAR 124 + * page tables are cleared and then they will be re-faulted with a 125 + * corresponding layout. 126 + * See also the comment near timens_setup_vdso_clock_data() for details. 127 + */ 128 + static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) 129 + { 130 + struct mm_struct *mm = task->mm; 131 + struct vm_area_struct *vma; 132 + VMA_ITERATOR(vmi, mm, 0); 133 + 134 + guard(mmap_read_lock)(mm); 135 + for_each_vma(vmi, vma) { 136 + if (vma_is_special_mapping(vma, &vdso_vvar_mapping)) 137 + zap_vma_pages(vma); 138 + } 139 + return 0; 140 + } 141 + 142 + void timens_commit(struct task_struct *tsk, struct time_namespace *ns) 143 + { 144 + timens_set_vvar_page(tsk, ns); 145 + vdso_join_timens(tsk, ns); 146 + }
-25
lib/vdso/datastore.c
··· 132 132 VM_MIXEDMAP | VM_SEALED_SYSMAP, 133 133 &vdso_vvar_mapping); 134 134 } 135 - 136 - #ifdef CONFIG_TIME_NS 137 - /* 138 - * The vvar page layout depends on whether a task belongs to the root or 139 - * non-root time namespace. Whenever a task changes its namespace, the VVAR 140 - * page tables are cleared and then they will be re-faulted with a 141 - * corresponding layout. 142 - * See also the comment near timens_setup_vdso_clock_data() for details. 143 - */ 144 - int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) 145 - { 146 - struct mm_struct *mm = task->mm; 147 - struct vm_area_struct *vma; 148 - VMA_ITERATOR(vmi, mm, 0); 149 - 150 - mmap_read_lock(mm); 151 - for_each_vma(vmi, vma) { 152 - if (vma_is_special_mapping(vma, &vdso_vvar_mapping)) 153 - zap_vma_pages(vma); 154 - } 155 - mmap_read_unlock(mm); 156 - 157 - return 0; 158 - } 159 - #endif