Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at ee9dce44362b2d8132c32964656ab6dff7dfbc6a 468 lines 14 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * include/linux/userfaultfd_k.h 4 * 5 * Copyright (C) 2015 Red Hat, Inc. 6 * 7 */ 8 9#ifndef _LINUX_USERFAULTFD_K_H 10#define _LINUX_USERFAULTFD_K_H 11 12#ifdef CONFIG_USERFAULTFD 13 14#include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */ 15 16#include <linux/fcntl.h> 17#include <linux/mm.h> 18#include <linux/swap.h> 19#include <linux/leafops.h> 20#include <asm-generic/pgtable_uffd.h> 21#include <linux/hugetlb_inline.h> 22 23/* The set of all possible UFFD-related VM flags. */ 24#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) 25 26#define __VMA_UFFD_FLAGS mk_vma_flags(VMA_UFFD_MISSING_BIT, VMA_UFFD_WP_BIT, \ 27 VMA_UFFD_MINOR_BIT) 28 29/* 30 * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining 31 * new flags, since they might collide with O_* ones. We want 32 * to re-use O_* flags that couldn't possibly have a meaning 33 * from userfaultfd, in order to leave a free define-space for 34 * shared O_* flags. 35 */ 36#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) 37 38/* 39 * Start with fault_pending_wqh and fault_wqh so they're more likely 40 * to be in the same cacheline. 41 * 42 * Locking order: 43 * fd_wqh.lock 44 * fault_pending_wqh.lock 45 * fault_wqh.lock 46 * event_wqh.lock 47 * 48 * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, 49 * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's 50 * also taken in IRQ context. 51 */ 52struct userfaultfd_ctx { 53 /* waitqueue head for the pending (i.e. not read) userfaults */ 54 wait_queue_head_t fault_pending_wqh; 55 /* waitqueue head for the userfaults */ 56 wait_queue_head_t fault_wqh; 57 /* waitqueue head for the pseudo fd to wakeup poll/read */ 58 wait_queue_head_t fd_wqh; 59 /* waitqueue head for events */ 60 wait_queue_head_t event_wqh; 61 /* a refile sequence protected by fault_pending_wqh lock */ 62 seqcount_spinlock_t refile_seq; 63 /* pseudo fd refcounting */ 64 refcount_t refcount; 65 /* userfaultfd syscall flags */ 66 unsigned int flags; 67 /* features requested from the userspace */ 68 unsigned int features; 69 /* released */ 70 bool released; 71 /* 72 * Prevents userfaultfd operations (fill/move/wp) from happening while 73 * some non-cooperative event(s) is taking place. Increments are done 74 * in write-mode. Whereas, userfaultfd operations, which includes 75 * reading mmap_changing, is done under read-mode. 76 */ 77 struct rw_semaphore map_changing_lock; 78 /* memory mappings are changing because of non-cooperative event */ 79 atomic_t mmap_changing; 80 /* mm with one ore more vmas attached to this userfaultfd_ctx */ 81 struct mm_struct *mm; 82}; 83 84extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); 85 86/* VMA userfaultfd operations */ 87struct vm_uffd_ops { 88 /* Checks if a VMA can support userfaultfd */ 89 bool (*can_userfault)(struct vm_area_struct *vma, vm_flags_t vm_flags); 90 /* 91 * Called to resolve UFFDIO_CONTINUE request. 92 * Should return the folio found at pgoff in the VMA's pagecache if it 93 * exists or ERR_PTR otherwise. 94 * The returned folio is locked and with reference held. 95 */ 96 struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t pgoff); 97 /* 98 * Called during resolution of UFFDIO_COPY request. 99 * Should allocate and return a folio or NULL if allocation fails. 100 */ 101 struct folio *(*alloc_folio)(struct vm_area_struct *vma, 102 unsigned long addr); 103 /* 104 * Called during resolution of UFFDIO_COPY request. 105 * Should only be called with a folio returned by alloc_folio() above. 106 * The folio will be set to locked. 107 * Returns 0 on success, error code on failure. 108 */ 109 int (*filemap_add)(struct folio *folio, struct vm_area_struct *vma, 110 unsigned long addr); 111 /* 112 * Called during resolution of UFFDIO_COPY request on the error 113 * handling path. 114 * Should revert the operation of ->filemap_add(). 115 */ 116 void (*filemap_remove)(struct folio *folio, struct vm_area_struct *vma); 117}; 118 119/* A combined operation mode + behavior flags. */ 120typedef unsigned int __bitwise uffd_flags_t; 121 122/* Mutually exclusive modes of operation. */ 123enum mfill_atomic_mode { 124 MFILL_ATOMIC_COPY, 125 MFILL_ATOMIC_ZEROPAGE, 126 MFILL_ATOMIC_CONTINUE, 127 MFILL_ATOMIC_POISON, 128 NR_MFILL_ATOMIC_MODES, 129}; 130 131#define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) 132#define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr)) 133#define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) 134#define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1)) 135 136static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) 137{ 138 return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected); 139} 140 141static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode) 142{ 143 flags &= ~MFILL_ATOMIC_MODE_MASK; 144 return flags | ((__force uffd_flags_t) mode); 145} 146 147/* Flags controlling behavior. These behavior changes are mode-independent. */ 148#define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0) 149 150extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, 151 unsigned long src_start, unsigned long len, 152 uffd_flags_t flags); 153extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, 154 unsigned long dst_start, 155 unsigned long len); 156extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, 157 unsigned long len, uffd_flags_t flags); 158extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, 159 unsigned long len, uffd_flags_t flags); 160extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, 161 unsigned long len, bool enable_wp); 162extern long uffd_wp_range(struct vm_area_struct *vma, 163 unsigned long start, unsigned long len, bool enable_wp); 164 165/* move_pages */ 166void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); 167void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); 168ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, 169 unsigned long src_start, unsigned long len, __u64 flags); 170int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, 171 struct vm_area_struct *dst_vma, 172 struct vm_area_struct *src_vma, 173 unsigned long dst_addr, unsigned long src_addr); 174 175/* mm helpers */ 176static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 177 struct vm_userfaultfd_ctx vm_ctx) 178{ 179 return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; 180} 181 182/* 183 * Never enable huge pmd sharing on some uffd registered vmas: 184 * 185 * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. 186 * 187 * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for 188 * VMAs which share huge pmds. (If you have two mappings to the same 189 * underlying pages, and fault in the non-UFFD-registered one with a write, 190 * with huge pmd sharing this would *also* setup the second UFFD-registered 191 * mapping, and we'd not get minor faults.) 192 */ 193static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) 194{ 195 return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); 196} 197 198/* 199 * Don't do fault around for either WP or MINOR registered uffd range. For 200 * MINOR registered range, fault around will be a total disaster and ptes can 201 * be installed without notifications; for WP it should mostly be fine as long 202 * as the fault around checks for pte_none() before the installation, however 203 * to be super safe we just forbid it. 204 */ 205static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) 206{ 207 return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); 208} 209 210static inline bool userfaultfd_missing(struct vm_area_struct *vma) 211{ 212 return vma->vm_flags & VM_UFFD_MISSING; 213} 214 215static inline bool userfaultfd_wp(struct vm_area_struct *vma) 216{ 217 return vma->vm_flags & VM_UFFD_WP; 218} 219 220static inline bool userfaultfd_minor(struct vm_area_struct *vma) 221{ 222 return vma->vm_flags & VM_UFFD_MINOR; 223} 224 225static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, 226 pte_t pte) 227{ 228 return userfaultfd_wp(vma) && pte_uffd_wp(pte); 229} 230 231static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, 232 pmd_t pmd) 233{ 234 return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); 235} 236 237static inline bool userfaultfd_armed(struct vm_area_struct *vma) 238{ 239 return vma->vm_flags & __VM_UFFD_FLAGS; 240} 241 242bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags, 243 bool wp_async); 244 245static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) 246{ 247 struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; 248 249 return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0; 250} 251 252extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); 253extern void dup_userfaultfd_complete(struct list_head *); 254void dup_userfaultfd_fail(struct list_head *); 255 256extern void mremap_userfaultfd_prep(struct vm_area_struct *, 257 struct vm_userfaultfd_ctx *); 258extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, 259 unsigned long from, unsigned long to, 260 unsigned long len); 261void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *); 262 263extern bool userfaultfd_remove(struct vm_area_struct *vma, 264 unsigned long start, 265 unsigned long end); 266 267extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, 268 unsigned long start, unsigned long end, struct list_head *uf); 269extern void userfaultfd_unmap_complete(struct mm_struct *mm, 270 struct list_head *uf); 271extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); 272extern bool userfaultfd_wp_async(struct vm_area_struct *vma); 273 274void userfaultfd_reset_ctx(struct vm_area_struct *vma); 275 276struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, 277 struct vm_area_struct *prev, 278 struct vm_area_struct *vma, 279 unsigned long start, 280 unsigned long end); 281 282int userfaultfd_register_range(struct userfaultfd_ctx *ctx, 283 struct vm_area_struct *vma, 284 vm_flags_t vm_flags, 285 unsigned long start, unsigned long end, 286 bool wp_async); 287 288void userfaultfd_release_new(struct userfaultfd_ctx *ctx); 289 290void userfaultfd_release_all(struct mm_struct *mm, 291 struct userfaultfd_ctx *ctx); 292 293static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) 294{ 295 /* Only wr-protect mode uses pte markers */ 296 if (!userfaultfd_wp(vma)) 297 return false; 298 299 /* File-based uffd-wp always need markers */ 300 if (!vma_is_anonymous(vma)) 301 return true; 302 303 /* 304 * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED 305 * enabled (to apply markers on zero pages). 306 */ 307 return userfaultfd_wp_unpopulated(vma); 308} 309 310/* 311 * Returns true if this is a swap pte and was uffd-wp wr-protected in either 312 * forms (pte marker or a normal swap pte), false otherwise. 313 */ 314static inline bool pte_swp_uffd_wp_any(pte_t pte) 315{ 316 if (!uffd_supports_wp_marker()) 317 return false; 318 319 if (pte_present(pte)) 320 return false; 321 322 if (pte_swp_uffd_wp(pte)) 323 return true; 324 325 if (pte_is_uffd_wp_marker(pte)) 326 return true; 327 328 return false; 329} 330#else /* CONFIG_USERFAULTFD */ 331 332/* mm helpers */ 333static inline vm_fault_t handle_userfault(struct vm_fault *vmf, 334 unsigned long reason) 335{ 336 return VM_FAULT_SIGBUS; 337} 338 339static inline long uffd_wp_range(struct vm_area_struct *vma, 340 unsigned long start, unsigned long len, 341 bool enable_wp) 342{ 343 return false; 344} 345 346static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 347 struct vm_userfaultfd_ctx vm_ctx) 348{ 349 return true; 350} 351 352static inline bool userfaultfd_missing(struct vm_area_struct *vma) 353{ 354 return false; 355} 356 357static inline bool userfaultfd_wp(struct vm_area_struct *vma) 358{ 359 return false; 360} 361 362static inline bool userfaultfd_minor(struct vm_area_struct *vma) 363{ 364 return false; 365} 366 367static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, 368 pte_t pte) 369{ 370 return false; 371} 372 373static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, 374 pmd_t pmd) 375{ 376 return false; 377} 378 379 380static inline bool userfaultfd_armed(struct vm_area_struct *vma) 381{ 382 return false; 383} 384 385static inline int dup_userfaultfd(struct vm_area_struct *vma, 386 struct list_head *l) 387{ 388 return 0; 389} 390 391static inline void dup_userfaultfd_complete(struct list_head *l) 392{ 393} 394 395static inline void dup_userfaultfd_fail(struct list_head *l) 396{ 397} 398 399static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, 400 struct vm_userfaultfd_ctx *ctx) 401{ 402} 403 404static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, 405 unsigned long from, 406 unsigned long to, 407 unsigned long len) 408{ 409} 410 411static inline void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *ctx) 412{ 413} 414 415static inline bool userfaultfd_remove(struct vm_area_struct *vma, 416 unsigned long start, 417 unsigned long end) 418{ 419 return true; 420} 421 422static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, 423 unsigned long start, unsigned long end, 424 struct list_head *uf) 425{ 426 return 0; 427} 428 429static inline void userfaultfd_unmap_complete(struct mm_struct *mm, 430 struct list_head *uf) 431{ 432} 433 434static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) 435{ 436 return false; 437} 438 439static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) 440{ 441 return false; 442} 443 444static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) 445{ 446 return false; 447} 448 449static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) 450{ 451 return false; 452} 453 454static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) 455{ 456 return false; 457} 458 459/* 460 * Returns true if this is a swap pte and was uffd-wp wr-protected in either 461 * forms (pte marker or a normal swap pte), false otherwise. 462 */ 463static inline bool pte_swp_uffd_wp_any(pte_t pte) 464{ 465 return false; 466} 467#endif /* CONFIG_USERFAULTFD */ 468#endif /* _LINUX_USERFAULTFD_K_H */