Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

shmem, userfaultfd: use a VMA callback to handle UFFDIO_CONTINUE

When userspace resolves a page fault in a shmem VMA with UFFDIO_CONTINUE
it needs to get a folio that already exists in the pagecache backing that
VMA.

Instead of using shmem_get_folio() for that, add a get_folio_noalloc()
method to 'struct vm_uffd_ops' that will return a folio if it exists in
the VMA's pagecache at given pgoff.

Implement get_folio_noalloc() method for shmem and slightly refactor
userfaultfd's mfill_get_vma() and mfill_atomic_pte_continue() to support
this new API.

Link: https://lore.kernel.org/20260402041156.1377214-9-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: James Houghton <jthoughton@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Mike Rapoport (Microsoft) and committed by
Andrew Morton
dfc4d771 0f48947c

+39 -17
+7
include/linux/userfaultfd_k.h
··· 87 87 struct vm_uffd_ops { 88 88 /* Checks if a VMA can support userfaultfd */ 89 89 bool (*can_userfault)(struct vm_area_struct *vma, vm_flags_t vm_flags); 90 + /* 91 + * Called to resolve UFFDIO_CONTINUE request. 92 + * Should return the folio found at pgoff in the VMA's pagecache if it 93 + * exists or ERR_PTR otherwise. 94 + * The returned folio is locked and with reference held. 95 + */ 96 + struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t pgoff); 90 97 }; 91 98 92 99 /* A combined operation mode + behavior flags. */
+14 -1
mm/shmem.c
··· 3289 3289 return ret; 3290 3290 } 3291 3291 3292 + static struct folio *shmem_get_folio_noalloc(struct inode *inode, pgoff_t pgoff) 3293 + { 3294 + struct folio *folio; 3295 + int err; 3296 + 3297 + err = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); 3298 + if (err) 3299 + return ERR_PTR(err); 3300 + 3301 + return folio; 3302 + } 3303 + 3292 3304 static bool shmem_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags) 3293 3305 { 3294 3306 return true; 3295 3307 } 3296 3308 3297 3309 static const struct vm_uffd_ops shmem_uffd_ops = { 3298 - .can_userfault = shmem_can_userfault, 3310 + .can_userfault = shmem_can_userfault, 3311 + .get_folio_noalloc = shmem_get_folio_noalloc, 3299 3312 }; 3300 3313 #endif /* CONFIG_USERFAULTFD */ 3301 3314
+18 -16
mm/userfaultfd.c
··· 191 191 struct userfaultfd_ctx *ctx = state->ctx; 192 192 uffd_flags_t flags = state->flags; 193 193 struct vm_area_struct *dst_vma; 194 + const struct vm_uffd_ops *ops; 194 195 int err; 195 196 196 197 /* ··· 233 232 if (is_vm_hugetlb_page(dst_vma)) 234 233 return 0; 235 234 236 - if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) 235 + ops = vma_uffd_ops(dst_vma); 236 + if (!ops) 237 237 goto out_unlock; 238 - if (!vma_is_shmem(dst_vma) && 239 - uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) 238 + 239 + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE) && 240 + !ops->get_folio_noalloc) 240 241 goto out_unlock; 241 242 242 243 return 0; ··· 578 575 static int mfill_atomic_pte_continue(struct mfill_state *state) 579 576 { 580 577 struct vm_area_struct *dst_vma = state->vma; 578 + const struct vm_uffd_ops *ops = vma_uffd_ops(dst_vma); 581 579 unsigned long dst_addr = state->dst_addr; 582 580 pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); 583 581 struct inode *inode = file_inode(dst_vma->vm_file); ··· 588 584 struct page *page; 589 585 int ret; 590 586 591 - ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); 592 - /* Our caller expects us to return -EFAULT if we failed to find folio */ 593 - if (ret == -ENOENT) 594 - ret = -EFAULT; 595 - if (ret) 596 - goto out; 597 - if (!folio) { 598 - ret = -EFAULT; 599 - goto out; 587 + if (!ops) { 588 + VM_WARN_ONCE(1, "UFFDIO_CONTINUE for unsupported VMA"); 589 + return -EOPNOTSUPP; 600 590 } 591 + 592 + folio = ops->get_folio_noalloc(inode, pgoff); 593 + /* Our caller expects us to return -EFAULT if we failed to find folio */ 594 + if (IS_ERR_OR_NULL(folio)) 595 + return -EFAULT; 601 596 602 597 page = folio_file_page(folio, pgoff); 603 598 if (PageHWPoison(page)) { ··· 610 607 goto out_release; 611 608 612 609 folio_unlock(folio); 613 - ret = 0; 614 - out: 615 - return ret; 610 + return 0; 611 + 616 612 out_release: 617 613 folio_unlock(folio); 618 614 folio_put(folio); 619 - goto out; 615 + return ret; 620 616 } 621 617 622 618 /* Handles UFFDIO_POISON for all non-hugetlb VMAs. */