Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

userfaultfd: introduce mfill_get_vma() and mfill_put_vma()

Split the code that finds, locks and verifies VMA from mfill_atomic() into
a helper function.

This function will be used later during refactoring of
mfill_atomic_pte_copy().

Add a counterpart mfill_put_vma() helper that unlocks the VMA and releases
map_changing_lock.

[avagin@google.com: fix lock leak in mfill_get_vma()]
Link: https://lore.kernel.org/20260316173829.1126728-1-avagin@google.com
Link: https://lore.kernel.org/20260402041156.1377214-5-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Andrei Vagin <avagin@google.com>
Reviewed-by: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Mike Rapoport (Microsoft) and committed by
Andrew Morton
b8c03b7f e2e0b826

+75 -50
+75 -50
mm/userfaultfd.c
··· 157 157 } 158 158 #endif 159 159 160 + static void mfill_put_vma(struct mfill_state *state) 161 + { 162 + if (!state->vma) 163 + return; 164 + 165 + up_read(&state->ctx->map_changing_lock); 166 + uffd_mfill_unlock(state->vma); 167 + state->vma = NULL; 168 + } 169 + 170 + static int mfill_get_vma(struct mfill_state *state) 171 + { 172 + struct userfaultfd_ctx *ctx = state->ctx; 173 + uffd_flags_t flags = state->flags; 174 + struct vm_area_struct *dst_vma; 175 + int err; 176 + 177 + /* 178 + * Make sure the vma is not shared, that the dst range is 179 + * both valid and fully within a single existing vma. 180 + */ 181 + dst_vma = uffd_mfill_lock(ctx->mm, state->dst_start, state->len); 182 + if (IS_ERR(dst_vma)) 183 + return PTR_ERR(dst_vma); 184 + 185 + /* 186 + * If memory mappings are changing because of non-cooperative 187 + * operation (e.g. mremap) running in parallel, bail out and 188 + * request the user to retry later 189 + */ 190 + down_read(&ctx->map_changing_lock); 191 + state->vma = dst_vma; 192 + err = -EAGAIN; 193 + if (atomic_read(&ctx->mmap_changing)) 194 + goto out_unlock; 195 + 196 + err = -EINVAL; 197 + 198 + /* 199 + * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but 200 + * it will overwrite vm_ops, so vma_is_anonymous must return false. 201 + */ 202 + if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) && 203 + dst_vma->vm_flags & VM_SHARED)) 204 + goto out_unlock; 205 + 206 + /* 207 + * validate 'mode' now that we know the dst_vma: don't allow 208 + * a wrprotect copy if the userfaultfd didn't register as WP. 209 + */ 210 + if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP)) 211 + goto out_unlock; 212 + 213 + if (is_vm_hugetlb_page(dst_vma)) 214 + return 0; 215 + 216 + if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) 217 + goto out_unlock; 218 + if (!vma_is_shmem(dst_vma) && 219 + uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) 220 + goto out_unlock; 221 + 222 + return 0; 223 + 224 + out_unlock: 225 + mfill_put_vma(state); 226 + return err; 227 + } 228 + 160 229 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) 161 230 { 162 231 pgd_t *pgd; ··· 836 767 .src_addr = src_start, 837 768 .dst_addr = dst_start, 838 769 }; 839 - struct mm_struct *dst_mm = ctx->mm; 840 - struct vm_area_struct *dst_vma; 841 770 long copied = 0; 842 771 ssize_t err; 843 772 ··· 850 783 VM_WARN_ON_ONCE(dst_start + len <= dst_start); 851 784 852 785 retry: 853 - /* 854 - * Make sure the vma is not shared, that the dst range is 855 - * both valid and fully within a single existing vma. 856 - */ 857 - dst_vma = uffd_mfill_lock(dst_mm, dst_start, len); 858 - if (IS_ERR(dst_vma)) { 859 - err = PTR_ERR(dst_vma); 786 + err = mfill_get_vma(&state); 787 + if (err) 860 788 goto out; 861 - } 862 - state.vma = dst_vma; 863 - 864 - /* 865 - * If memory mappings are changing because of non-cooperative 866 - * operation (e.g. mremap) running in parallel, bail out and 867 - * request the user to retry later 868 - */ 869 - down_read(&ctx->map_changing_lock); 870 - err = -EAGAIN; 871 - if (atomic_read(&ctx->mmap_changing)) 872 - goto out_unlock; 873 - 874 - err = -EINVAL; 875 - /* 876 - * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but 877 - * it will overwrite vm_ops, so vma_is_anonymous must return false. 878 - */ 879 - if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) && 880 - dst_vma->vm_flags & VM_SHARED)) 881 - goto out_unlock; 882 - 883 - /* 884 - * validate 'mode' now that we know the dst_vma: don't allow 885 - * a wrprotect copy if the userfaultfd didn't register as WP. 886 - */ 887 - if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP)) 888 - goto out_unlock; 889 789 890 790 /* 891 791 * If this is a HUGETLB vma, pass off to appropriate routine 892 792 */ 893 - if (is_vm_hugetlb_page(dst_vma)) 894 - return mfill_atomic_hugetlb(ctx, dst_vma, dst_start, 793 + if (is_vm_hugetlb_page(state.vma)) 794 + return mfill_atomic_hugetlb(ctx, state.vma, dst_start, 895 795 src_start, len, flags); 896 - 897 - if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) 898 - goto out_unlock; 899 - if (!vma_is_shmem(dst_vma) && 900 - uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) 901 - goto out_unlock; 902 796 903 797 while (state.src_addr < src_start + len) { 904 798 VM_WARN_ON_ONCE(state.dst_addr >= dst_start + len); ··· 879 851 if (unlikely(err == -ENOENT)) { 880 852 void *kaddr; 881 853 882 - up_read(&ctx->map_changing_lock); 883 - uffd_mfill_unlock(state.vma); 854 + mfill_put_vma(&state); 884 855 VM_WARN_ON_ONCE(!state.folio); 885 856 886 857 kaddr = kmap_local_folio(state.folio, 0); ··· 908 881 break; 909 882 } 910 883 911 - out_unlock: 912 - up_read(&ctx->map_changing_lock); 913 - uffd_mfill_unlock(state.vma); 884 + mfill_put_vma(&state); 914 885 out: 915 886 if (state.folio) 916 887 folio_put(state.folio);