Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/vma: use unmap_desc in exit_mmap() and vms_clear_ptes()

Convert vms_clear_ptes() to use unmap_desc to call unmap_vmas() instead of
the large argument list. The UNMAP_STATE() cannot be used because the vma
iterator in the vms does not point to the correct maple state
(mas_detach), and the tree_end will be set incorrectly. Setting up the
arguments manually avoids setting the struct up incorrectly and doing
extra work to get the correct pagetable range.

exit_mmap() also calls unmap_vmas() with many arguments. Using the
unmap_all_init() function to set the unmap descriptor for all vmas makes
this a bit easier to read.

Update to the vma test code is necessary to ensure testing continues to
function.

No functional changes intended.

Link: https://lkml.kernel.org/r/20260121164946.2093480-10-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: SeongJae Park <sj@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Liam R. Howlett and committed by
Andrew Morton
0df5a8d3 5b6626a7

+53 -25
-4
include/linux/mm.h
··· 2625 2625 zap_page_range_single(vma, vma->vm_start, 2626 2626 vma->vm_end - vma->vm_start, NULL); 2627 2627 } 2628 - void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, 2629 - struct vm_area_struct *start_vma, unsigned long start, 2630 - unsigned long end, unsigned long tree_end); 2631 - 2632 2628 struct mmu_notifier_range; 2633 2629 2634 2630 void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+3
mm/internal.h
··· 197 197 } 198 198 } 199 199 200 + /* unmap_vmas is in mm/memory.c */ 201 + void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap); 202 + 200 203 #ifdef CONFIG_MMU 201 204 202 205 static inline void get_anon_vma(struct anon_vma *anon_vma)
+8 -12
mm/memory.c
··· 2144 2144 /** 2145 2145 * unmap_vmas - unmap a range of memory covered by a list of vma's 2146 2146 * @tlb: address of the caller's struct mmu_gather 2147 - * @mas: the maple state 2148 - * @vma: the starting vma 2149 - * @start_addr: virtual address at which to start unmapping 2150 - * @end_addr: virtual address at which to end unmapping 2151 - * @tree_end: The maximum index to check 2147 + * @unmap: The unmap_desc 2152 2148 * 2153 2149 * Unmap all pages in the vma list. 2154 2150 * ··· 2157 2161 * ensure that any thus-far unmapped pages are flushed before unmap_vmas() 2158 2162 * drops the lock and schedules. 2159 2163 */ 2160 - void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, 2161 - struct vm_area_struct *vma, unsigned long start_addr, 2162 - unsigned long end_addr, unsigned long tree_end) 2164 + void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) 2163 2165 { 2166 + struct vm_area_struct *vma; 2164 2167 struct mmu_notifier_range range; 2165 2168 struct zap_details details = { 2166 2169 .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, ··· 2167 2172 .even_cows = true, 2168 2173 }; 2169 2174 2175 + vma = unmap->first; 2170 2176 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, 2171 - start_addr, end_addr); 2177 + unmap->vma_start, unmap->vma_end); 2172 2178 mmu_notifier_invalidate_range_start(&range); 2173 2179 do { 2174 - unsigned long start = start_addr; 2175 - unsigned long end = end_addr; 2180 + unsigned long start = unmap->vma_start; 2181 + unsigned long end = unmap->vma_end; 2176 2182 hugetlb_zap_begin(vma, &start, &end); 2177 2183 unmap_single_vma(tlb, vma, start, end, &details); 2178 2184 hugetlb_zap_end(vma, &details); 2179 - vma = mas_find(mas, tree_end - 1); 2185 + vma = mas_find(unmap->mas, unmap->tree_end - 1); 2180 2186 } while (vma); 2181 2187 mmu_notifier_invalidate_range_end(&range); 2182 2188 }
+3 -1
mm/mmap.c
··· 1277 1277 struct vm_area_struct *vma; 1278 1278 unsigned long nr_accounted = 0; 1279 1279 VMA_ITERATOR(vmi, mm, 0); 1280 + struct unmap_desc unmap; 1280 1281 1281 1282 /* mm's last user has gone, and its about to be pulled down */ 1282 1283 mmu_notifier_release(mm); ··· 1293 1292 goto destroy; 1294 1293 } 1295 1294 1295 + unmap_all_init(&unmap, &vmi, vma); 1296 1296 flush_cache_mm(mm); 1297 1297 tlb_gather_mmu_fullmm(&tlb, mm); 1298 1298 /* update_hiwater_rss(mm) here? but nobody should be looking */ 1299 1299 /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */ 1300 - unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX); 1300 + unmap_vmas(&tlb, &unmap); 1301 1301 mmap_read_unlock(mm); 1302 1302 1303 1303 /*
+22 -5
mm/vma.c
··· 480 480 481 481 tlb_gather_mmu(&tlb, mm); 482 482 update_hiwater_rss(mm); 483 - unmap_vmas(&tlb, mas, unmap->first, unmap->vma_start, unmap->vma_end, 484 - unmap->vma_end); 483 + unmap_vmas(&tlb, unmap); 485 484 mas_set(mas, unmap->tree_reset); 486 485 free_pgtables(&tlb, mas, unmap->first, unmap->pg_start, unmap->pg_end, 487 486 unmap->tree_end, unmap->mm_wr_locked); ··· 1256 1257 struct ma_state *mas_detach, bool mm_wr_locked) 1257 1258 { 1258 1259 struct mmu_gather tlb; 1260 + struct unmap_desc unmap = { 1261 + .mas = mas_detach, 1262 + .first = vms->vma, 1263 + /* start and end may be different if there is no prev or next vma. */ 1264 + .pg_start = vms->unmap_start, 1265 + .pg_end = vms->unmap_end, 1266 + .vma_start = vms->start, 1267 + .vma_end = vms->end, 1268 + /* 1269 + * The tree limits and reset differ from the normal case since it's a 1270 + * side-tree 1271 + */ 1272 + .tree_reset = 1, 1273 + .tree_end = vms->vma_count, 1274 + /* 1275 + * We can free page tables without write-locking mmap_lock because VMAs 1276 + * were isolated before we downgraded mmap_lock. 1277 + */ 1278 + .mm_wr_locked = mm_wr_locked, 1279 + }; 1259 1280 1260 1281 if (!vms->clear_ptes) /* Nothing to do */ 1261 1282 return; ··· 1287 1268 mas_set(mas_detach, 1); 1288 1269 tlb_gather_mmu(&tlb, vms->vma->vm_mm); 1289 1270 update_hiwater_rss(vms->vma->vm_mm); 1290 - unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, 1291 - vms->vma_count); 1292 - 1271 + unmap_vmas(&tlb, &unmap); 1293 1272 mas_set(mas_detach, 1); 1294 1273 /* start and end may be different if there is no prev or next vma. */ 1295 1274 free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start,
+14
mm/vma.h
··· 167 167 bool mm_wr_locked; /* If the mmap write lock is held */ 168 168 }; 169 169 170 + static inline void unmap_all_init(struct unmap_desc *unmap, 171 + struct vma_iterator *vmi, struct vm_area_struct *vma) 172 + { 173 + unmap->mas = &vmi->mas; 174 + unmap->first = vma; 175 + unmap->pg_start = FIRST_USER_ADDRESS; 176 + unmap->pg_end = USER_PGTABLES_CEILING; 177 + unmap->vma_start = 0; 178 + unmap->vma_end = ULONG_MAX; 179 + unmap->tree_end = ULONG_MAX; 180 + unmap->tree_reset = vma->vm_end; 181 + unmap->mm_wr_locked = false; 182 + } 183 + 170 184 #define UNMAP_STATE(name, _vmi, _vma, _vma_start, _vma_end, _prev, _next) \ 171 185 struct unmap_desc name = { \ 172 186 .mas = &(_vmi)->mas, \
+3 -3
tools/testing/vma/vma_internal.h
··· 1131 1131 { 1132 1132 } 1133 1133 1134 - static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, 1135 - struct vm_area_struct *vma, unsigned long start_addr, 1136 - unsigned long end_addr, unsigned long tree_end) 1134 + struct unmap_desc; 1135 + 1136 + static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) 1137 1137 { 1138 1138 } 1139 1139