Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

userfaultfd: introduce vm_uffd_ops->alloc_folio()

and use it to refactor mfill_atomic_pte_zeroed_folio() and
mfill_atomic_pte_copy().

mfill_atomic_pte_zeroed_folio() and mfill_atomic_pte_copy() perform
almost identical actions:
* allocate a folio
* update folio contents (either copy from userspace of fill with zeros)
* update page tables with the new folio

Split a __mfill_atomic_pte() helper that handles both cases and uses newly
introduced vm_uffd_ops->alloc_folio() to allocate the folio.

Pass the ops structure from the callers to __mfill_atomic_pte() to later
allow using anon_uffd_ops for MAP_PRIVATE mappings of file-backed VMAs.

Note, that the new ops method is called alloc_folio() rather than
folio_alloc() to avoid clash with alloc_tag macro folio_alloc().

Link: https://lore.kernel.org/20260402041156.1377214-10-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: James Houghton <jthoughton@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Mike Rapoport (Microsoft) and committed by
Andrew Morton
ad9ac308 dfc4d771

+54 -44
+6
include/linux/userfaultfd_k.h
··· 94 94 * The returned folio is locked and with reference held. 95 95 */ 96 96 struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t pgoff); 97 + /* 98 + * Called during resolution of UFFDIO_COPY request. 99 + * Should allocate and return a folio or NULL if allocation fails. 100 + */ 101 + struct folio *(*alloc_folio)(struct vm_area_struct *vma, 102 + unsigned long addr); 97 103 }; 98 104 99 105 /* A combined operation mode + behavior flags. */
+48 -44
mm/userfaultfd.c
··· 42 42 return true; 43 43 } 44 44 45 + static struct folio *anon_alloc_folio(struct vm_area_struct *vma, 46 + unsigned long addr) 47 + { 48 + struct folio *folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, 49 + addr); 50 + 51 + if (!folio) 52 + return NULL; 53 + 54 + if (mem_cgroup_charge(folio, vma->vm_mm, GFP_KERNEL)) { 55 + folio_put(folio); 56 + return NULL; 57 + } 58 + 59 + return folio; 60 + } 61 + 45 62 static const struct vm_uffd_ops anon_uffd_ops = { 46 63 .can_userfault = anon_can_userfault, 64 + .alloc_folio = anon_alloc_folio, 47 65 }; 48 66 49 67 static const struct vm_uffd_ops *vma_uffd_ops(struct vm_area_struct *vma) ··· 474 456 return 0; 475 457 } 476 458 477 - static int mfill_atomic_pte_copy(struct mfill_state *state) 459 + static int __mfill_atomic_pte(struct mfill_state *state, 460 + const struct vm_uffd_ops *ops) 478 461 { 479 462 unsigned long dst_addr = state->dst_addr; 480 463 unsigned long src_addr = state->src_addr; ··· 483 464 struct folio *folio; 484 465 int ret; 485 466 486 - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, state->vma, dst_addr); 467 + folio = ops->alloc_folio(state->vma, state->dst_addr); 487 468 if (!folio) 488 469 return -ENOMEM; 489 470 490 - ret = -ENOMEM; 491 - if (mem_cgroup_charge(folio, state->vma->vm_mm, GFP_KERNEL)) 492 - goto out_release; 493 - 494 - ret = mfill_copy_folio_locked(folio, src_addr); 495 - if (unlikely(ret)) { 471 + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) { 472 + ret = mfill_copy_folio_locked(folio, src_addr); 496 473 /* 497 474 * Fallback to copy_from_user outside mmap_lock. 498 475 * If retry is successful, mfill_copy_folio_locked() returns ··· 496 481 * If there was an error, we must mfill_put_vma() anyway and it 497 482 * will take care of unlocking if needed. 498 483 */ 499 - ret = mfill_copy_folio_retry(state, folio); 500 - if (ret) 501 - goto out_release; 484 + if (unlikely(ret)) { 485 + ret = mfill_copy_folio_retry(state, folio); 486 + if (ret) 487 + goto err_folio_put; 488 + } 489 + } else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) { 490 + clear_user_highpage(&folio->page, state->dst_addr); 491 + } else { 492 + VM_WARN_ONCE(1, "Unknown UFFDIO operation, flags: %x", flags); 502 493 } 503 494 504 495 /* ··· 517 496 ret = mfill_atomic_install_pte(state->pmd, state->vma, dst_addr, 518 497 &folio->page, true, flags); 519 498 if (ret) 520 - goto out_release; 521 - out: 522 - return ret; 523 - out_release: 499 + goto err_folio_put; 500 + 501 + return 0; 502 + 503 + err_folio_put: 504 + folio_put(folio); 524 505 /* Don't return -ENOENT so that our caller won't retry */ 525 506 if (ret == -ENOENT) 526 507 ret = -EFAULT; 527 - folio_put(folio); 528 - goto out; 508 + return ret; 529 509 } 530 510 531 - static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd, 532 - struct vm_area_struct *dst_vma, 533 - unsigned long dst_addr) 511 + static int mfill_atomic_pte_copy(struct mfill_state *state) 534 512 { 535 - struct folio *folio; 536 - int ret = -ENOMEM; 513 + const struct vm_uffd_ops *ops = vma_uffd_ops(state->vma); 537 514 538 - folio = vma_alloc_zeroed_movable_folio(dst_vma, dst_addr); 539 - if (!folio) 540 - return ret; 515 + return __mfill_atomic_pte(state, ops); 516 + } 541 517 542 - if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL)) 543 - goto out_put; 518 + static int mfill_atomic_pte_zeroed_folio(struct mfill_state *state) 519 + { 520 + const struct vm_uffd_ops *ops = vma_uffd_ops(state->vma); 544 521 545 - /* 546 - * The memory barrier inside __folio_mark_uptodate makes sure that 547 - * zeroing out the folio become visible before mapping the page 548 - * using set_pte_at(). See do_anonymous_page(). 549 - */ 550 - __folio_mark_uptodate(folio); 551 - 552 - ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, 553 - &folio->page, true, 0); 554 - if (ret) 555 - goto out_put; 556 - 557 - return 0; 558 - out_put: 559 - folio_put(folio); 560 - return ret; 522 + return __mfill_atomic_pte(state, ops); 561 523 } 562 524 563 525 static int mfill_atomic_pte_zeropage(struct mfill_state *state) ··· 553 549 int ret; 554 550 555 551 if (mm_forbids_zeropage(dst_vma->vm_mm)) 556 - return mfill_atomic_pte_zeroed_folio(dst_pmd, dst_vma, dst_addr); 552 + return mfill_atomic_pte_zeroed_folio(state); 557 553 558 554 _dst_pte = pte_mkspecial(pfn_pte(zero_pfn(dst_addr), 559 555 dst_vma->vm_page_prot));