Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

userfaultfd: introduce struct mfill_state

mfill_atomic() passes a lot of parameters down to its callees.

Aggregate them all into mfill_state structure and pass this structure to
functions that implement various UFFDIO_ commands.

Tracking the state in a structure will allow moving the code that retries
copying of data for UFFDIO_COPY into mfill_atomic_pte_copy() and make the
loop in mfill_atomic() identical for all UFFDIO operations on PTE-mapped
memory.

The mfill_state definition is deliberately local to mm/userfaultfd.c,
hence shmem_mfill_atomic_pte() is not updated.

[harry.yoo@oracle.com: properly initialize mfill_state.len to fix
folio_add_new_anon_rmap() WARN]
Link: https://lore.kernel.org/abehBY7QakYF9bK4@hyeyoo
Link: https://lore.kernel.org/20260402041156.1377214-3-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Mike Rapoport (Microsoft) and committed by
Andrew Morton
db0062d2 c0620487

+81 -66
+81 -66
mm/userfaultfd.c
··· 20 20 #include "internal.h" 21 21 #include "swap.h" 22 22 23 + struct mfill_state { 24 + struct userfaultfd_ctx *ctx; 25 + unsigned long src_start; 26 + unsigned long dst_start; 27 + unsigned long len; 28 + uffd_flags_t flags; 29 + 30 + struct vm_area_struct *vma; 31 + unsigned long src_addr; 32 + unsigned long dst_addr; 33 + struct folio *folio; 34 + pmd_t *pmd; 35 + }; 36 + 23 37 static __always_inline 24 38 bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end) 25 39 { ··· 286 272 return ret; 287 273 } 288 274 289 - static int mfill_atomic_pte_copy(pmd_t *dst_pmd, 290 - struct vm_area_struct *dst_vma, 291 - unsigned long dst_addr, 292 - unsigned long src_addr, 293 - uffd_flags_t flags, 294 - struct folio **foliop) 275 + static int mfill_atomic_pte_copy(struct mfill_state *state) 295 276 { 296 - int ret; 277 + struct vm_area_struct *dst_vma = state->vma; 278 + unsigned long dst_addr = state->dst_addr; 279 + unsigned long src_addr = state->src_addr; 280 + uffd_flags_t flags = state->flags; 281 + pmd_t *dst_pmd = state->pmd; 297 282 struct folio *folio; 283 + int ret; 298 284 299 - if (!*foliop) { 285 + if (!state->folio) { 300 286 ret = -ENOMEM; 301 287 folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma, 302 288 dst_addr); ··· 308 294 /* fallback to copy_from_user outside mmap_lock */ 309 295 if (unlikely(ret)) { 310 296 ret = -ENOENT; 311 - *foliop = folio; 297 + state->folio = folio; 312 298 /* don't free the page */ 313 299 goto out; 314 300 } 315 301 } else { 316 - folio = *foliop; 317 - *foliop = NULL; 302 + folio = state->folio; 303 + state->folio = NULL; 318 304 } 319 305 320 306 /* ··· 371 357 return ret; 372 358 } 373 359 374 - static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, 375 - struct vm_area_struct *dst_vma, 376 - unsigned long dst_addr) 360 + static int mfill_atomic_pte_zeropage(struct mfill_state *state) 377 361 { 362 + struct vm_area_struct *dst_vma = state->vma; 363 + unsigned long dst_addr = state->dst_addr; 364 + pmd_t *dst_pmd = state->pmd; 378 365 pte_t _dst_pte, *dst_pte; 379 366 spinlock_t *ptl; 380 367 int ret; ··· 407 392 } 408 393 409 394 /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */ 410 - static int mfill_atomic_pte_continue(pmd_t *dst_pmd, 411 - struct vm_area_struct *dst_vma, 412 - unsigned long dst_addr, 413 - uffd_flags_t flags) 395 + static int mfill_atomic_pte_continue(struct mfill_state *state) 414 396 { 415 - struct inode *inode = file_inode(dst_vma->vm_file); 397 + struct vm_area_struct *dst_vma = state->vma; 398 + unsigned long dst_addr = state->dst_addr; 416 399 pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); 400 + struct inode *inode = file_inode(dst_vma->vm_file); 401 + uffd_flags_t flags = state->flags; 402 + pmd_t *dst_pmd = state->pmd; 417 403 struct folio *folio; 418 404 struct page *page; 419 405 int ret; ··· 452 436 } 453 437 454 438 /* Handles UFFDIO_POISON for all non-hugetlb VMAs. */ 455 - static int mfill_atomic_pte_poison(pmd_t *dst_pmd, 456 - struct vm_area_struct *dst_vma, 457 - unsigned long dst_addr, 458 - uffd_flags_t flags) 439 + static int mfill_atomic_pte_poison(struct mfill_state *state) 459 440 { 460 - int ret; 441 + struct vm_area_struct *dst_vma = state->vma; 461 442 struct mm_struct *dst_mm = dst_vma->vm_mm; 443 + unsigned long dst_addr = state->dst_addr; 444 + pmd_t *dst_pmd = state->pmd; 462 445 pte_t _dst_pte, *dst_pte; 463 446 spinlock_t *ptl; 447 + int ret; 464 448 465 449 _dst_pte = make_pte_marker(PTE_MARKER_POISONED); 466 450 ret = -EAGAIN; ··· 684 668 uffd_flags_t flags); 685 669 #endif /* CONFIG_HUGETLB_PAGE */ 686 670 687 - static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd, 688 - struct vm_area_struct *dst_vma, 689 - unsigned long dst_addr, 690 - unsigned long src_addr, 691 - uffd_flags_t flags, 692 - struct folio **foliop) 671 + static __always_inline ssize_t mfill_atomic_pte(struct mfill_state *state) 693 672 { 673 + struct vm_area_struct *dst_vma = state->vma; 674 + unsigned long src_addr = state->src_addr; 675 + unsigned long dst_addr = state->dst_addr; 676 + struct folio **foliop = &state->folio; 677 + uffd_flags_t flags = state->flags; 678 + pmd_t *dst_pmd = state->pmd; 694 679 ssize_t err; 695 680 696 - if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) { 697 - return mfill_atomic_pte_continue(dst_pmd, dst_vma, 698 - dst_addr, flags); 699 - } else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) { 700 - return mfill_atomic_pte_poison(dst_pmd, dst_vma, 701 - dst_addr, flags); 702 - } 681 + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) 682 + return mfill_atomic_pte_continue(state); 683 + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) 684 + return mfill_atomic_pte_poison(state); 703 685 704 686 /* 705 687 * The normal page fault path for a shmem will invoke the ··· 711 697 */ 712 698 if (!(dst_vma->vm_flags & VM_SHARED)) { 713 699 if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) 714 - err = mfill_atomic_pte_copy(dst_pmd, dst_vma, 715 - dst_addr, src_addr, 716 - flags, foliop); 700 + err = mfill_atomic_pte_copy(state); 717 701 else 718 - err = mfill_atomic_pte_zeropage(dst_pmd, 719 - dst_vma, dst_addr); 702 + err = mfill_atomic_pte_zeropage(state); 720 703 } else { 721 704 err = shmem_mfill_atomic_pte(dst_pmd, dst_vma, 722 705 dst_addr, src_addr, ··· 729 718 unsigned long len, 730 719 uffd_flags_t flags) 731 720 { 721 + struct mfill_state state = (struct mfill_state){ 722 + .ctx = ctx, 723 + .dst_start = dst_start, 724 + .src_start = src_start, 725 + .flags = flags, 726 + .len = len, 727 + .src_addr = src_start, 728 + .dst_addr = dst_start, 729 + }; 732 730 struct mm_struct *dst_mm = ctx->mm; 733 731 struct vm_area_struct *dst_vma; 732 + long copied = 0; 734 733 ssize_t err; 735 734 pmd_t *dst_pmd; 736 - unsigned long src_addr, dst_addr; 737 - long copied; 738 - struct folio *folio; 739 735 740 736 /* 741 737 * Sanitize the command parameters: ··· 754 736 VM_WARN_ON_ONCE(src_start + len <= src_start); 755 737 VM_WARN_ON_ONCE(dst_start + len <= dst_start); 756 738 757 - src_addr = src_start; 758 - dst_addr = dst_start; 759 - copied = 0; 760 - folio = NULL; 761 739 retry: 762 740 /* 763 741 * Make sure the vma is not shared, that the dst range is ··· 764 750 err = PTR_ERR(dst_vma); 765 751 goto out; 766 752 } 753 + state.vma = dst_vma; 767 754 768 755 /* 769 756 * If memory mappings are changing because of non-cooperative ··· 805 790 uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) 806 791 goto out_unlock; 807 792 808 - while (src_addr < src_start + len) { 793 + while (state.src_addr < src_start + len) { 794 + VM_WARN_ON_ONCE(state.dst_addr >= dst_start + len); 795 + 809 796 pmd_t dst_pmdval; 810 797 811 - VM_WARN_ON_ONCE(dst_addr >= dst_start + len); 812 - 813 - dst_pmd = mm_alloc_pmd(dst_mm, dst_addr); 798 + dst_pmd = mm_alloc_pmd(dst_mm, state.dst_addr); 814 799 if (unlikely(!dst_pmd)) { 815 800 err = -ENOMEM; 816 801 break; ··· 842 827 * tables under us; pte_offset_map_lock() will deal with that. 843 828 */ 844 829 845 - err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, 846 - src_addr, flags, &folio); 830 + state.pmd = dst_pmd; 831 + err = mfill_atomic_pte(&state); 847 832 cond_resched(); 848 833 849 834 if (unlikely(err == -ENOENT)) { 850 835 void *kaddr; 851 836 852 837 up_read(&ctx->map_changing_lock); 853 - uffd_mfill_unlock(dst_vma); 854 - VM_WARN_ON_ONCE(!folio); 838 + uffd_mfill_unlock(state.vma); 839 + VM_WARN_ON_ONCE(!state.folio); 855 840 856 - kaddr = kmap_local_folio(folio, 0); 841 + kaddr = kmap_local_folio(state.folio, 0); 857 842 err = copy_from_user(kaddr, 858 - (const void __user *) src_addr, 843 + (const void __user *)state.src_addr, 859 844 PAGE_SIZE); 860 845 kunmap_local(kaddr); 861 846 if (unlikely(err)) { 862 847 err = -EFAULT; 863 848 goto out; 864 849 } 865 - flush_dcache_folio(folio); 850 + flush_dcache_folio(state.folio); 866 851 goto retry; 867 852 } else 868 - VM_WARN_ON_ONCE(folio); 853 + VM_WARN_ON_ONCE(state.folio); 869 854 870 855 if (!err) { 871 - dst_addr += PAGE_SIZE; 872 - src_addr += PAGE_SIZE; 856 + state.dst_addr += PAGE_SIZE; 857 + state.src_addr += PAGE_SIZE; 873 858 copied += PAGE_SIZE; 874 859 875 860 if (fatal_signal_pending(current)) ··· 881 866 882 867 out_unlock: 883 868 up_read(&ctx->map_changing_lock); 884 - uffd_mfill_unlock(dst_vma); 869 + uffd_mfill_unlock(state.vma); 885 870 out: 886 - if (folio) 887 - folio_put(folio); 871 + if (state.folio) 872 + folio_put(state.folio); 888 873 VM_WARN_ON_ONCE(copied < 0); 889 874 VM_WARN_ON_ONCE(err > 0); 890 875 VM_WARN_ON_ONCE(!copied && !err);