Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/gup: remove pXX_devmap usage from get_user_pages()

GUP uses pXX_devmap() calls to see if it needs to a get a reference on the
associated pgmap data structure to ensure the pages won't go away.
However it's a driver responsibility to ensure that if pages are mapped
(ie. discoverable by GUP) that they are not offlined or removed from the
memmap so there is no need to hold a reference on the pgmap data structure
to ensure this.

Furthermore mappings with PFN_DEV are no longer created, hence this
effectively dead code anyway so can be removed.

Link: https://lkml.kernel.org/r/708b2be76876659ec5261fe5d059b07268b98b36.1750323463.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Björn Töpel <bjorn@kernel.org>
Cc: Björn Töpel <bjorn@rivosinc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Deepak Gupta <debug@rivosinc.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Inki Dae <m.szyprowski@samsung.com>
Cc: John Groves <john@groves.net>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Alistair Popple and committed by
Andrew Morton
fd2825b0 4b1d3145

+5 -198
-3
include/linux/huge_mm.h
··· 473 473 return folio_order(folio) >= HPAGE_PMD_ORDER; 474 474 } 475 475 476 - struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, 477 - pmd_t *pmd, int flags, struct dev_pagemap **pgmap); 478 - 479 476 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); 480 477 481 478 extern struct folio *huge_zero_folio;
+5 -155
mm/gup.c
··· 679 679 return NULL; 680 680 681 681 pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT; 682 - 683 - if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) && 684 - pud_devmap(pud)) { 685 - /* 686 - * device mapped pages can only be returned if the caller 687 - * will manage the page reference count. 688 - * 689 - * At least one of FOLL_GET | FOLL_PIN must be set, so 690 - * assert that here: 691 - */ 692 - if (!(flags & (FOLL_GET | FOLL_PIN))) 693 - return ERR_PTR(-EEXIST); 694 - 695 - if (flags & FOLL_TOUCH) 696 - touch_pud(vma, addr, pudp, flags & FOLL_WRITE); 697 - 698 - ctx->pgmap = get_dev_pagemap(pfn, ctx->pgmap); 699 - if (!ctx->pgmap) 700 - return ERR_PTR(-EFAULT); 701 - } 702 - 703 682 page = pfn_to_page(pfn); 704 683 705 - if (!pud_devmap(pud) && !pud_write(pud) && 706 - gup_must_unshare(vma, flags, page)) 684 + if (!pud_write(pud) && gup_must_unshare(vma, flags, page)) 707 685 return ERR_PTR(-EMLINK); 708 686 709 687 ret = try_grab_folio(page_folio(page), 1, flags); ··· 835 857 page = vm_normal_page(vma, address, pte); 836 858 837 859 /* 838 - * We only care about anon pages in can_follow_write_pte() and don't 839 - * have to worry about pte_devmap() because they are never anon. 860 + * We only care about anon pages in can_follow_write_pte(). 840 861 */ 841 862 if ((flags & FOLL_WRITE) && 842 863 !can_follow_write_pte(pte, page, vma, flags)) { ··· 843 866 goto out; 844 867 } 845 868 846 - if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { 847 - /* 848 - * Only return device mapping pages in the FOLL_GET or FOLL_PIN 849 - * case since they are only valid while holding the pgmap 850 - * reference. 851 - */ 852 - *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap); 853 - if (*pgmap) 854 - page = pte_page(pte); 855 - else 856 - goto no_page; 857 - } else if (unlikely(!page)) { 869 + if (unlikely(!page)) { 858 870 if (flags & FOLL_DUMP) { 859 871 /* Avoid special (like zero) pages in core dumps */ 860 872 page = ERR_PTR(-EFAULT); ··· 925 959 return no_page_table(vma, flags, address); 926 960 if (!pmd_present(pmdval)) 927 961 return no_page_table(vma, flags, address); 928 - if (pmd_devmap(pmdval)) { 929 - ptl = pmd_lock(mm, pmd); 930 - page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap); 931 - spin_unlock(ptl); 932 - if (page) 933 - return page; 934 - return no_page_table(vma, flags, address); 935 - } 936 962 if (likely(!pmd_leaf(pmdval))) 937 963 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); 938 964 ··· 2854 2896 int *nr) 2855 2897 { 2856 2898 struct dev_pagemap *pgmap = NULL; 2857 - int nr_start = *nr, ret = 0; 2899 + int ret = 0; 2858 2900 pte_t *ptep, *ptem; 2859 2901 2860 2902 ptem = ptep = pte_offset_map(&pmd, addr); ··· 2878 2920 if (!pte_access_permitted(pte, flags & FOLL_WRITE)) 2879 2921 goto pte_unmap; 2880 2922 2881 - if (pte_devmap(pte)) { 2882 - if (unlikely(flags & FOLL_LONGTERM)) 2883 - goto pte_unmap; 2884 - 2885 - pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); 2886 - if (unlikely(!pgmap)) { 2887 - gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); 2888 - goto pte_unmap; 2889 - } 2890 - } else if (pte_special(pte)) 2923 + if (pte_special(pte)) 2891 2924 goto pte_unmap; 2892 2925 2893 2926 /* If it's not marked as special it must have a valid memmap. */ ··· 2950 3001 } 2951 3002 #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ 2952 3003 2953 - #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) 2954 - static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr, 2955 - unsigned long end, unsigned int flags, struct page **pages, int *nr) 2956 - { 2957 - int nr_start = *nr; 2958 - struct dev_pagemap *pgmap = NULL; 2959 - 2960 - do { 2961 - struct folio *folio; 2962 - struct page *page = pfn_to_page(pfn); 2963 - 2964 - pgmap = get_dev_pagemap(pfn, pgmap); 2965 - if (unlikely(!pgmap)) { 2966 - gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); 2967 - break; 2968 - } 2969 - 2970 - folio = try_grab_folio_fast(page, 1, flags); 2971 - if (!folio) { 2972 - gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); 2973 - break; 2974 - } 2975 - folio_set_referenced(folio); 2976 - pages[*nr] = page; 2977 - (*nr)++; 2978 - pfn++; 2979 - } while (addr += PAGE_SIZE, addr != end); 2980 - 2981 - put_dev_pagemap(pgmap); 2982 - return addr == end; 2983 - } 2984 - 2985 - static int gup_fast_devmap_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, 2986 - unsigned long end, unsigned int flags, struct page **pages, 2987 - int *nr) 2988 - { 2989 - unsigned long fault_pfn; 2990 - int nr_start = *nr; 2991 - 2992 - fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 2993 - if (!gup_fast_devmap_leaf(fault_pfn, addr, end, flags, pages, nr)) 2994 - return 0; 2995 - 2996 - if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { 2997 - gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); 2998 - return 0; 2999 - } 3000 - return 1; 3001 - } 3002 - 3003 - static int gup_fast_devmap_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, 3004 - unsigned long end, unsigned int flags, struct page **pages, 3005 - int *nr) 3006 - { 3007 - unsigned long fault_pfn; 3008 - int nr_start = *nr; 3009 - 3010 - fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 3011 - if (!gup_fast_devmap_leaf(fault_pfn, addr, end, flags, pages, nr)) 3012 - return 0; 3013 - 3014 - if (unlikely(pud_val(orig) != pud_val(*pudp))) { 3015 - gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); 3016 - return 0; 3017 - } 3018 - return 1; 3019 - } 3020 - #else 3021 - static int gup_fast_devmap_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, 3022 - unsigned long end, unsigned int flags, struct page **pages, 3023 - int *nr) 3024 - { 3025 - BUILD_BUG(); 3026 - return 0; 3027 - } 3028 - 3029 - static int gup_fast_devmap_pud_leaf(pud_t pud, pud_t *pudp, unsigned long addr, 3030 - unsigned long end, unsigned int flags, struct page **pages, 3031 - int *nr) 3032 - { 3033 - BUILD_BUG(); 3034 - return 0; 3035 - } 3036 - #endif 3037 - 3038 3004 static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, 3039 3005 unsigned long end, unsigned int flags, struct page **pages, 3040 3006 int *nr) ··· 2963 3099 2964 3100 if (pmd_special(orig)) 2965 3101 return 0; 2966 - 2967 - if (pmd_devmap(orig)) { 2968 - if (unlikely(flags & FOLL_LONGTERM)) 2969 - return 0; 2970 - return gup_fast_devmap_pmd_leaf(orig, pmdp, addr, end, flags, 2971 - pages, nr); 2972 - } 2973 3102 2974 3103 page = pmd_page(orig); 2975 3104 refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); ··· 3003 3146 3004 3147 if (pud_special(orig)) 3005 3148 return 0; 3006 - 3007 - if (pud_devmap(orig)) { 3008 - if (unlikely(flags & FOLL_LONGTERM)) 3009 - return 0; 3010 - return gup_fast_devmap_pud_leaf(orig, pudp, addr, end, flags, 3011 - pages, nr); 3012 - } 3013 3149 3014 3150 page = pud_page(orig); 3015 3151 refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
-40
mm/huge_memory.c
··· 1672 1672 update_mmu_cache_pmd(vma, addr, pmd); 1673 1673 } 1674 1674 1675 - struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, 1676 - pmd_t *pmd, int flags, struct dev_pagemap **pgmap) 1677 - { 1678 - unsigned long pfn = pmd_pfn(*pmd); 1679 - struct mm_struct *mm = vma->vm_mm; 1680 - struct page *page; 1681 - int ret; 1682 - 1683 - assert_spin_locked(pmd_lockptr(mm, pmd)); 1684 - 1685 - if (flags & FOLL_WRITE && !pmd_write(*pmd)) 1686 - return NULL; 1687 - 1688 - if (pmd_present(*pmd) && pmd_devmap(*pmd)) 1689 - /* pass */; 1690 - else 1691 - return NULL; 1692 - 1693 - if (flags & FOLL_TOUCH) 1694 - touch_pmd(vma, addr, pmd, flags & FOLL_WRITE); 1695 - 1696 - /* 1697 - * device mapped pages can only be returned if the 1698 - * caller will manage the page reference count. 1699 - */ 1700 - if (!(flags & (FOLL_GET | FOLL_PIN))) 1701 - return ERR_PTR(-EEXIST); 1702 - 1703 - pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT; 1704 - *pgmap = get_dev_pagemap(pfn, *pgmap); 1705 - if (!*pgmap) 1706 - return ERR_PTR(-EFAULT); 1707 - page = pfn_to_page(pfn); 1708 - ret = try_grab_folio(page_folio(page), 1, flags); 1709 - if (ret) 1710 - page = ERR_PTR(ret); 1711 - 1712 - return page; 1713 - } 1714 - 1715 1675 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, 1716 1676 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, 1717 1677 struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)