Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/rmap: extend rmap and migration support device-private entries

Add device-private THP support to reverse mapping infrastructure, enabling
proper handling during migration and walk operations.

The key changes are:
- add_migration_pmd()/remove_migration_pmd(): Handle device-private
entries during folio migration and splitting
- page_vma_mapped_walk(): Recognize device-private THP entries during
VMA traversal operations

This change supports folio splitting and migration operations on
device-private entries.

[balbirs@nvidia.com: fix override of entry in remove_migration_pmd]
Link: https://lkml.kernel.org/r/20251114012153.2634497-2-balbirs@nvidia.com
[balbirs@nvidia.com: follow pattern used in remove_migration_pte()]
Link: https://lkml.kernel.org/r/20251115002835.3515194-1-balbirs@nvidia.com
Link: https://lkml.kernel.org/r/20251001065707.920170-5-balbirs@nvidia.com
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Balbir Singh and committed by
Andrew Morton
65edfda6 368076f5

+71 -10
+17 -3
mm/damon/ops-common.c
··· 75 75 void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) 76 76 { 77 77 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 78 - struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd))); 78 + pmd_t pmdval = pmdp_get(pmd); 79 + struct folio *folio; 80 + bool young = false; 81 + unsigned long pfn; 79 82 83 + if (likely(pmd_present(pmdval))) 84 + pfn = pmd_pfn(pmdval); 85 + else 86 + pfn = swp_offset_pfn(pmd_to_swp_entry(pmdval)); 87 + 88 + folio = damon_get_folio(pfn); 80 89 if (!folio) 81 90 return; 82 91 83 - if (pmdp_clear_young_notify(vma, addr, pmd)) 92 + if (likely(pmd_present(pmdval))) 93 + young |= pmdp_clear_young_notify(vma, addr, pmd); 94 + young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE); 95 + if (young) 84 96 folio_set_young(folio); 85 97 86 98 folio_set_idle(folio); ··· 211 199 mmu_notifier_test_young(vma->vm_mm, addr); 212 200 } else { 213 201 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 214 - *accessed = pmd_young(pmdp_get(pvmw.pmd)) || 202 + pmd_t pmd = pmdp_get(pvmw.pmd); 203 + 204 + *accessed = (pmd_present(pmd) && pmd_young(pmd)) || 215 205 !folio_test_idle(folio) || 216 206 mmu_notifier_test_young(vma->vm_mm, addr); 217 207 #else
+22 -1
mm/huge_memory.c
··· 4583 4583 return 0; 4584 4584 4585 4585 flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); 4586 - pmdval = pmdp_invalidate(vma, address, pvmw->pmd); 4586 + if (unlikely(!pmd_present(*pvmw->pmd))) 4587 + pmdval = pmdp_huge_get_and_clear(vma->vm_mm, address, pvmw->pmd); 4588 + else 4589 + pmdval = pmdp_invalidate(vma, address, pvmw->pmd); 4587 4590 4588 4591 /* See folio_try_share_anon_rmap_pmd(): invalidate PMD first. */ 4589 4592 anon_exclusive = folio_test_anon(folio) && PageAnonExclusive(page); ··· 4636 4633 entry = pmd_to_swp_entry(*pvmw->pmd); 4637 4634 folio_get(folio); 4638 4635 pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot)); 4636 + 4639 4637 if (pmd_swp_soft_dirty(*pvmw->pmd)) 4640 4638 pmde = pmd_mksoft_dirty(pmde); 4641 4639 if (is_writable_migration_entry(entry)) ··· 4648 4644 /* NOTE: this may contain setting soft-dirty on some archs */ 4649 4645 if (folio_test_dirty(folio) && is_migration_entry_dirty(entry)) 4650 4646 pmde = pmd_mkdirty(pmde); 4647 + 4648 + if (folio_is_device_private(folio)) { 4649 + swp_entry_t entry; 4650 + 4651 + if (pmd_write(pmde)) 4652 + entry = make_writable_device_private_entry( 4653 + page_to_pfn(new)); 4654 + else 4655 + entry = make_readable_device_private_entry( 4656 + page_to_pfn(new)); 4657 + pmde = swp_entry_to_pmd(entry); 4658 + 4659 + if (pmd_swp_soft_dirty(*pvmw->pmd)) 4660 + pmde = pmd_swp_mksoft_dirty(pmde); 4661 + if (pmd_swp_uffd_wp(*pvmw->pmd)) 4662 + pmde = pmd_swp_mkuffd_wp(pmde); 4663 + } 4651 4664 4652 4665 if (folio_test_anon(folio)) { 4653 4666 rmap_t rmap_flags = RMAP_NONE;
+5 -2
mm/page_idle.c
··· 71 71 referenced |= ptep_test_and_clear_young(vma, addr, pvmw.pte); 72 72 referenced |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); 73 73 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 74 - if (pmdp_clear_young_notify(vma, addr, pvmw.pmd)) 75 - referenced = true; 74 + pmd_t pmdval = pmdp_get(pvmw.pmd); 75 + 76 + if (likely(pmd_present(pmdval))) 77 + referenced |= pmdp_clear_young_notify(vma, addr, pvmw.pmd); 78 + referenced |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PMD_SIZE); 76 79 } else { 77 80 /* unexpected pmd-mapped page? */ 78 81 WARN_ON_ONCE(1);
+7
mm/page_vma_mapped.c
··· 277 277 * cannot return prematurely, while zap_huge_pmd() has 278 278 * cleared *pmd but not decremented compound_mapcount(). 279 279 */ 280 + swp_entry_t entry = pmd_to_swp_entry(pmde); 281 + 282 + if (is_device_private_entry(entry)) { 283 + pvmw->ptl = pmd_lock(mm, pvmw->pmd); 284 + return true; 285 + } 286 + 280 287 if ((pvmw->flags & PVMW_SYNC) && 281 288 thp_vma_suitable_order(vma, pvmw->address, 282 289 PMD_ORDER) &&
+20 -4
mm/rmap.c
··· 1022 1022 } else { 1023 1023 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1024 1024 pmd_t *pmd = pvmw->pmd; 1025 - pmd_t entry; 1025 + pmd_t entry = pmdp_get(pmd); 1026 1026 1027 - if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) 1027 + /* 1028 + * Please see the comment above (!pte_present). 1029 + * A non present PMD is not writable from a CPU 1030 + * perspective. 1031 + */ 1032 + if (!pmd_present(entry)) 1033 + continue; 1034 + if (!pmd_dirty(entry) && !pmd_write(entry)) 1028 1035 continue; 1029 1036 1030 1037 flush_cache_range(vma, address, ··· 2326 2319 while (page_vma_mapped_walk(&pvmw)) { 2327 2320 /* PMD-mapped THP migration entry */ 2328 2321 if (!pvmw.pte) { 2322 + __maybe_unused unsigned long pfn; 2323 + __maybe_unused pmd_t pmdval; 2324 + 2329 2325 if (flags & TTU_SPLIT_HUGE_PMD) { 2330 2326 split_huge_pmd_locked(vma, pvmw.address, 2331 2327 pvmw.pmd, true); ··· 2337 2327 break; 2338 2328 } 2339 2329 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 2340 - subpage = folio_page(folio, 2341 - pmd_pfn(*pvmw.pmd) - folio_pfn(folio)); 2330 + pmdval = pmdp_get(pvmw.pmd); 2331 + if (likely(pmd_present(pmdval))) 2332 + pfn = pmd_pfn(pmdval); 2333 + else 2334 + pfn = swp_offset_pfn(pmd_to_swp_entry(pmdval)); 2335 + 2336 + subpage = folio_page(folio, pfn - folio_pfn(folio)); 2337 + 2342 2338 VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) || 2343 2339 !folio_test_pmd_mappable(folio), folio); 2344 2340