Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/huge_memory: add device-private THP support to PMD operations

Extend core huge page management functions to handle device-private THP
entries. This enables proper handling of large device-private folios in
fundamental MM operations.

The following functions have been updated:

- copy_huge_pmd(): Handle device-private entries during fork/clone
- zap_huge_pmd(): Properly free device-private THP during munmap
- change_huge_pmd(): Support protection changes on device-private THP
- __pte_offset_map(): Add device-private entry awareness

Link: https://lkml.kernel.org/r/20251001065707.920170-4-balbirs@nvidia.com
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Balbir Singh and committed by
Andrew Morton
368076f5 3a5a0655

+80 -10
+32
include/linux/swapops.h
··· 594 594 } 595 595 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ 596 596 597 + #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) 598 + 599 + /** 600 + * is_pmd_device_private_entry() - Check if PMD contains a device private swap entry 601 + * @pmd: The PMD to check 602 + * 603 + * Returns true if the PMD contains a swap entry that represents a device private 604 + * page mapping. This is used for zone device private pages that have been 605 + * swapped out but still need special handling during various memory management 606 + * operations. 607 + * 608 + * Return: 1 if PMD contains device private entry, 0 otherwise 609 + */ 610 + static inline int is_pmd_device_private_entry(pmd_t pmd) 611 + { 612 + return is_swap_pmd(pmd) && is_device_private_entry(pmd_to_swp_entry(pmd)); 613 + } 614 + 615 + #else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ 616 + 617 + static inline int is_pmd_device_private_entry(pmd_t pmd) 618 + { 619 + return 0; 620 + } 621 + 622 + #endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ 623 + 597 624 static inline int non_swap_entry(swp_entry_t entry) 598 625 { 599 626 return swp_type(entry) >= MAX_SWAPFILES; 627 + } 628 + 629 + static inline int is_pmd_non_present_folio_entry(pmd_t pmd) 630 + { 631 + return is_pmd_migration_entry(pmd) || is_pmd_device_private_entry(pmd); 600 632 } 601 633 602 634 #endif /* CONFIG_MMU */
+47 -9
mm/huge_memory.c
··· 1704 1704 if (unlikely(is_swap_pmd(pmd))) { 1705 1705 swp_entry_t entry = pmd_to_swp_entry(pmd); 1706 1706 1707 - VM_BUG_ON(!is_pmd_migration_entry(pmd)); 1708 - if (!is_readable_migration_entry(entry)) { 1709 - entry = make_readable_migration_entry( 1710 - swp_offset(entry)); 1707 + VM_WARN_ON(!is_pmd_non_present_folio_entry(pmd)); 1708 + 1709 + if (is_writable_migration_entry(entry) || 1710 + is_readable_exclusive_migration_entry(entry)) { 1711 + entry = make_readable_migration_entry(swp_offset(entry)); 1711 1712 pmd = swp_entry_to_pmd(entry); 1712 1713 if (pmd_swp_soft_dirty(*src_pmd)) 1713 1714 pmd = pmd_swp_mksoft_dirty(pmd); 1714 1715 if (pmd_swp_uffd_wp(*src_pmd)) 1715 1716 pmd = pmd_swp_mkuffd_wp(pmd); 1716 1717 set_pmd_at(src_mm, addr, src_pmd, pmd); 1718 + } else if (is_device_private_entry(entry)) { 1719 + /* 1720 + * For device private entries, since there are no 1721 + * read exclusive entries, writable = !readable 1722 + */ 1723 + if (is_writable_device_private_entry(entry)) { 1724 + entry = make_readable_device_private_entry(swp_offset(entry)); 1725 + pmd = swp_entry_to_pmd(entry); 1726 + 1727 + if (pmd_swp_soft_dirty(*src_pmd)) 1728 + pmd = pmd_swp_mksoft_dirty(pmd); 1729 + if (pmd_swp_uffd_wp(*src_pmd)) 1730 + pmd = pmd_swp_mkuffd_wp(pmd); 1731 + set_pmd_at(src_mm, addr, src_pmd, pmd); 1732 + } 1733 + 1734 + src_folio = pfn_swap_entry_folio(entry); 1735 + VM_WARN_ON(!folio_test_large(src_folio)); 1736 + 1737 + folio_get(src_folio); 1738 + /* 1739 + * folio_try_dup_anon_rmap_pmd does not fail for 1740 + * device private entries. 1741 + */ 1742 + folio_try_dup_anon_rmap_pmd(src_folio, &src_folio->page, 1743 + dst_vma, src_vma); 1717 1744 } 1745 + 1718 1746 add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); 1719 1747 mm_inc_nr_ptes(dst_mm); 1720 1748 pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); ··· 2240 2212 folio_remove_rmap_pmd(folio, page, vma); 2241 2213 WARN_ON_ONCE(folio_mapcount(folio) < 0); 2242 2214 VM_BUG_ON_PAGE(!PageHead(page), page); 2243 - } else if (thp_migration_supported()) { 2215 + } else if (is_pmd_non_present_folio_entry(orig_pmd)) { 2244 2216 swp_entry_t entry; 2245 2217 2246 - VM_BUG_ON(!is_pmd_migration_entry(orig_pmd)); 2247 2218 entry = pmd_to_swp_entry(orig_pmd); 2248 2219 folio = pfn_swap_entry_folio(entry); 2249 2220 flush_needed = 0; 2250 - } else 2251 - WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); 2221 + 2222 + if (!thp_migration_supported()) 2223 + WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); 2224 + } 2252 2225 2253 2226 if (folio_test_anon(folio)) { 2254 2227 zap_deposited_table(tlb->mm, pmd); ··· 2267 2238 if (flush_needed && pmd_young(orig_pmd) && 2268 2239 likely(vma_has_recency(vma))) 2269 2240 folio_mark_accessed(folio); 2241 + } 2242 + 2243 + if (folio_is_device_private(folio)) { 2244 + folio_remove_rmap_pmd(folio, &folio->page, vma); 2245 + WARN_ON_ONCE(folio_mapcount(folio) < 0); 2246 + folio_put(folio); 2270 2247 } 2271 2248 2272 2249 spin_unlock(ptl); ··· 2403 2368 struct folio *folio = pfn_swap_entry_folio(entry); 2404 2369 pmd_t newpmd; 2405 2370 2406 - VM_BUG_ON(!is_pmd_migration_entry(*pmd)); 2371 + VM_WARN_ON(!is_pmd_non_present_folio_entry(*pmd)); 2407 2372 if (is_writable_migration_entry(entry)) { 2408 2373 /* 2409 2374 * A protection check is difficult so ··· 2416 2381 newpmd = swp_entry_to_pmd(entry); 2417 2382 if (pmd_swp_soft_dirty(*pmd)) 2418 2383 newpmd = pmd_swp_mksoft_dirty(newpmd); 2384 + } else if (is_writable_device_private_entry(entry)) { 2385 + entry = make_readable_device_private_entry(swp_offset(entry)); 2386 + newpmd = swp_entry_to_pmd(entry); 2419 2387 } else { 2420 2388 newpmd = *pmd; 2421 2389 }
+1 -1
mm/pgtable-generic.c
··· 292 292 293 293 if (pmdvalp) 294 294 *pmdvalp = pmdval; 295 - if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval))) 295 + if (unlikely(pmd_none(pmdval) || !pmd_present(pmdval))) 296 296 goto nomap; 297 297 if (unlikely(pmd_trans_huge(pmdval))) 298 298 goto nomap;