Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: mprotect: convert to folio_can_map_prot_numa()

The prot_numa_skip() naming is not good since it updates the folio access
time except checking whether to skip prot NUMA, so rename it to
folio_can_map_prot_numa(), and cleanup it a bit, remove ret by directly
return value instead of goto style.

Adding a new helper vma_is_single_threaded_private() to check whether it's
a single threaded private VMA, and make folio_can_map_prot_numa() a
non-static function so that they could be reused in change_huge_pmd(),
since folio_can_map_prot_numa() will be shared in different paths, let's
move it near change_prot_numa() in mempolicy.c.

Link: https://lkml.kernel.org/r/20251023113737.3572790-4-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Kefeng Wang and committed by
Andrew Morton
ca43034c 6e97624d

+86 -62
+20
mm/internal.h
··· 1378 1378 1379 1379 void __vunmap_range_noflush(unsigned long start, unsigned long end); 1380 1380 1381 + static inline bool vma_is_single_threaded_private(struct vm_area_struct *vma) 1382 + { 1383 + if (vma->vm_flags & VM_SHARED) 1384 + return false; 1385 + 1386 + return atomic_read(&vma->vm_mm->mm_users) == 1; 1387 + } 1388 + 1389 + #ifdef CONFIG_NUMA_BALANCING 1390 + bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma, 1391 + bool is_private_single_threaded); 1392 + 1393 + #else 1394 + static inline bool folio_can_map_prot_numa(struct folio *folio, 1395 + struct vm_area_struct *vma, bool is_private_single_threaded) 1396 + { 1397 + return false; 1398 + } 1399 + #endif 1400 + 1381 1401 int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, 1382 1402 unsigned long addr, int *flags, bool writable, 1383 1403 int *last_cpupid);
+61
mm/mempolicy.c
··· 85 85 #include <linux/sched.h> 86 86 #include <linux/sched/mm.h> 87 87 #include <linux/sched/numa_balancing.h> 88 + #include <linux/sched/sysctl.h> 88 89 #include <linux/sched/task.h> 89 90 #include <linux/nodemask.h> 90 91 #include <linux/cpuset.h> ··· 100 99 #include <linux/swap.h> 101 100 #include <linux/seq_file.h> 102 101 #include <linux/proc_fs.h> 102 + #include <linux/memory-tiers.h> 103 103 #include <linux/migrate.h> 104 104 #include <linux/ksm.h> 105 105 #include <linux/rmap.h> ··· 805 803 } 806 804 807 805 #ifdef CONFIG_NUMA_BALANCING 806 + /** 807 + * folio_can_map_prot_numa() - check whether the folio can map prot numa 808 + * @folio: The folio whose mapping considered for being made NUMA hintable 809 + * @vma: The VMA that the folio belongs to. 810 + * @is_private_single_threaded: Is this a single-threaded private VMA or not 811 + * 812 + * This function checks to see if the folio actually indicates that 813 + * we need to make the mapping one which causes a NUMA hinting fault, 814 + * as there are cases where it's simply unnecessary, and the folio's 815 + * access time is adjusted for memory tiering if prot numa needed. 816 + * 817 + * Return: True if the mapping of the folio needs to be changed, false otherwise. 818 + */ 819 + bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma, 820 + bool is_private_single_threaded) 821 + { 822 + int nid; 823 + 824 + if (!folio || folio_is_zone_device(folio) || folio_test_ksm(folio)) 825 + return false; 826 + 827 + /* Also skip shared copy-on-write folios */ 828 + if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) 829 + return false; 830 + 831 + /* Folios are pinned and can't be migrated */ 832 + if (folio_maybe_dma_pinned(folio)) 833 + return false; 834 + 835 + /* 836 + * While migration can move some dirty folios, 837 + * it cannot move them all from MIGRATE_ASYNC 838 + * context. 839 + */ 840 + if (folio_is_file_lru(folio) && folio_test_dirty(folio)) 841 + return false; 842 + 843 + /* 844 + * Don't mess with PTEs if folio is already on the node 845 + * a single-threaded process is running on. 846 + */ 847 + nid = folio_nid(folio); 848 + if (is_private_single_threaded && (nid == numa_node_id())) 849 + return false; 850 + 851 + /* 852 + * Skip scanning top tier node if normal numa 853 + * balancing is disabled 854 + */ 855 + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && 856 + node_is_toptier(nid)) 857 + return false; 858 + 859 + if (folio_use_access_time(folio)) 860 + folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); 861 + 862 + return true; 863 + } 864 + 808 865 /* 809 866 * This is used to mark a range of virtual addresses to be inaccessible. 810 867 * These are later cleared by a NUMA hinting fault. Depending on these
+5 -62
mm/mprotect.c
··· 29 29 #include <linux/uaccess.h> 30 30 #include <linux/mm_inline.h> 31 31 #include <linux/pgtable.h> 32 - #include <linux/sched/sysctl.h> 33 32 #include <linux/userfaultfd_k.h> 34 - #include <linux/memory-tiers.h> 35 33 #include <uapi/linux/mman.h> 36 34 #include <asm/cacheflush.h> 37 35 #include <asm/mmu_context.h> ··· 114 116 return 1; 115 117 116 118 return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags); 117 - } 118 - 119 - static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, 120 - struct folio *folio) 121 - { 122 - bool ret = true; 123 - bool toptier; 124 - int nid; 125 - 126 - if (!folio) 127 - goto skip; 128 - 129 - if (folio_is_zone_device(folio) || folio_test_ksm(folio)) 130 - goto skip; 131 - 132 - /* Also skip shared copy-on-write folios */ 133 - if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) 134 - goto skip; 135 - 136 - /* Folios are pinned and can't be migrated */ 137 - if (folio_maybe_dma_pinned(folio)) 138 - goto skip; 139 - 140 - /* 141 - * While migration can move some dirty pages, 142 - * it cannot move them all from MIGRATE_ASYNC 143 - * context. 144 - */ 145 - if (folio_is_file_lru(folio) && folio_test_dirty(folio)) 146 - goto skip; 147 - 148 - /* 149 - * Don't mess with PTEs if page is already on the node 150 - * a single-threaded process is running on. 151 - */ 152 - nid = folio_nid(folio); 153 - if (target_node == nid) 154 - goto skip; 155 - 156 - toptier = node_is_toptier(nid); 157 - 158 - /* 159 - * Skip scanning top tier node if normal numa 160 - * balancing is disabled 161 - */ 162 - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier) 163 - goto skip; 164 - 165 - ret = false; 166 - if (folio_use_access_time(folio)) 167 - folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); 168 - 169 - skip: 170 - return ret; 171 119 } 172 120 173 121 /* Set nr_ptes number of ptes, starting from idx */ ··· 218 274 pte_t *pte, oldpte; 219 275 spinlock_t *ptl; 220 276 long pages = 0; 221 - int target_node = NUMA_NO_NODE; 277 + bool is_private_single_threaded; 222 278 bool prot_numa = cp_flags & MM_CP_PROT_NUMA; 223 279 bool uffd_wp = cp_flags & MM_CP_UFFD_WP; 224 280 bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; ··· 229 285 if (!pte) 230 286 return -EAGAIN; 231 287 232 - /* Get target node for single threaded private VMAs */ 233 - if (prot_numa && !(vma->vm_flags & VM_SHARED) && 234 - atomic_read(&vma->vm_mm->mm_users) == 1) 235 - target_node = numa_node_id(); 288 + if (prot_numa) 289 + is_private_single_threaded = vma_is_single_threaded_private(vma); 236 290 237 291 flush_tlb_batched_pending(vma->vm_mm); 238 292 arch_enter_lazy_mmu_mode(); ··· 257 315 * pages. See similar comment in change_huge_pmd. 258 316 */ 259 317 if (prot_numa && 260 - prot_numa_skip(vma, target_node, folio)) { 318 + !folio_can_map_prot_numa(folio, vma, 319 + is_private_single_threaded)) { 261 320 262 321 /* determine batch to skip */ 263 322 nr_ptes = mprotect_folio_pte_batch(folio,