Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: isolate mmap internal logic to mm/vma.c

In previous commits we effected improvements to the mmap() logic in
mmap_region() and its newly introduced internal implementation function
__mmap_region().

However as these changes are intended to be backported, we kept the delta
as small as is possible and made as few changes as possible to the newly
introduced mm/vma.* files.

Take the opportunity to move this logic to mm/vma.c which not only
isolates it, but also makes it available for later userland testing which
can help us catch such logic errors far earlier.

Link: https://lkml.kernel.org/r/93fc2c3aa37dd30590b7e4ee067dfd832007bf7e.1729858176.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Lorenzo Stoakes and committed by
Andrew Morton
52956b0d c14f8046

+329 -330
-234
mm/mmap.c
··· 577 577 } 578 578 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 579 579 580 - /* 581 - * We account for memory if it's a private writeable mapping, 582 - * not hugepages and VM_NORESERVE wasn't set. 583 - */ 584 - static inline bool accountable_mapping(struct file *file, vm_flags_t vm_flags) 585 - { 586 - /* 587 - * hugetlb has its own accounting separate from the core VM 588 - * VM_HUGETLB may not be set yet so we cannot check for that flag. 589 - */ 590 - if (file && is_file_hugepages(file)) 591 - return false; 592 - 593 - return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; 594 - } 595 - 596 580 /** 597 581 * unmapped_area() - Find an area between the low_limit and the high_limit with 598 582 * the correct alignment and offset, all from @info. Note: current->mm is used ··· 1344 1360 VMA_ITERATOR(vmi, mm, start); 1345 1361 1346 1362 return do_vmi_munmap(&vmi, mm, start, len, uf, false); 1347 - } 1348 - 1349 - static unsigned long __mmap_region(struct file *file, unsigned long addr, 1350 - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, 1351 - struct list_head *uf) 1352 - { 1353 - struct mm_struct *mm = current->mm; 1354 - struct vm_area_struct *vma = NULL; 1355 - pgoff_t pglen = PHYS_PFN(len); 1356 - unsigned long charged = 0; 1357 - struct vma_munmap_struct vms; 1358 - struct ma_state mas_detach; 1359 - struct maple_tree mt_detach; 1360 - unsigned long end = addr + len; 1361 - int error; 1362 - VMA_ITERATOR(vmi, mm, addr); 1363 - VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff); 1364 - 1365 - vmg.file = file; 1366 - /* Find the first overlapping VMA */ 1367 - vma = vma_find(&vmi, end); 1368 - init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false); 1369 - if (vma) { 1370 - mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); 1371 - mt_on_stack(mt_detach); 1372 - mas_init(&mas_detach, &mt_detach, /* addr = */ 0); 1373 - /* Prepare to unmap any existing mapping in the area */ 1374 - error = vms_gather_munmap_vmas(&vms, &mas_detach); 1375 - if (error) 1376 - goto gather_failed; 1377 - 1378 - vmg.next = vms.next; 1379 - vmg.prev = vms.prev; 1380 - vma = NULL; 1381 - } else { 1382 - vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev); 1383 - } 1384 - 1385 - /* Check against address space limit. */ 1386 - if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) { 1387 - error = -ENOMEM; 1388 - goto abort_munmap; 1389 - } 1390 - 1391 - /* 1392 - * Private writable mapping: check memory availability 1393 - */ 1394 - if (accountable_mapping(file, vm_flags)) { 1395 - charged = pglen; 1396 - charged -= vms.nr_accounted; 1397 - if (charged) { 1398 - error = security_vm_enough_memory_mm(mm, charged); 1399 - if (error) 1400 - goto abort_munmap; 1401 - } 1402 - 1403 - vms.nr_accounted = 0; 1404 - vm_flags |= VM_ACCOUNT; 1405 - vmg.flags = vm_flags; 1406 - } 1407 - 1408 - /* 1409 - * clear PTEs while the vma is still in the tree so that rmap 1410 - * cannot race with the freeing later in the truncate scenario. 1411 - * This is also needed for mmap_file(), which is why vm_ops 1412 - * close function is called. 1413 - */ 1414 - vms_clean_up_area(&vms, &mas_detach); 1415 - vma = vma_merge_new_range(&vmg); 1416 - if (vma) 1417 - goto expanded; 1418 - /* 1419 - * Determine the object being mapped and call the appropriate 1420 - * specific mapper. the address has already been validated, but 1421 - * not unmapped, but the maps are removed from the list. 1422 - */ 1423 - vma = vm_area_alloc(mm); 1424 - if (!vma) { 1425 - error = -ENOMEM; 1426 - goto unacct_error; 1427 - } 1428 - 1429 - vma_iter_config(&vmi, addr, end); 1430 - vma_set_range(vma, addr, end, pgoff); 1431 - vm_flags_init(vma, vm_flags); 1432 - vma->vm_page_prot = vm_get_page_prot(vm_flags); 1433 - 1434 - if (vma_iter_prealloc(&vmi, vma)) { 1435 - error = -ENOMEM; 1436 - goto free_vma; 1437 - } 1438 - 1439 - if (file) { 1440 - vma->vm_file = get_file(file); 1441 - error = mmap_file(file, vma); 1442 - if (error) 1443 - goto unmap_and_free_file_vma; 1444 - 1445 - /* Drivers cannot alter the address of the VMA. */ 1446 - WARN_ON_ONCE(addr != vma->vm_start); 1447 - /* 1448 - * Drivers should not permit writability when previously it was 1449 - * disallowed. 1450 - */ 1451 - VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && 1452 - !(vm_flags & VM_MAYWRITE) && 1453 - (vma->vm_flags & VM_MAYWRITE)); 1454 - 1455 - vma_iter_config(&vmi, addr, end); 1456 - /* 1457 - * If vm_flags changed after mmap_file(), we should try merge 1458 - * vma again as we may succeed this time. 1459 - */ 1460 - if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) { 1461 - struct vm_area_struct *merge; 1462 - 1463 - vmg.flags = vma->vm_flags; 1464 - /* If this fails, state is reset ready for a reattempt. */ 1465 - merge = vma_merge_new_range(&vmg); 1466 - 1467 - if (merge) { 1468 - /* 1469 - * ->mmap() can change vma->vm_file and fput 1470 - * the original file. So fput the vma->vm_file 1471 - * here or we would add an extra fput for file 1472 - * and cause general protection fault 1473 - * ultimately. 1474 - */ 1475 - fput(vma->vm_file); 1476 - vm_area_free(vma); 1477 - vma = merge; 1478 - /* Update vm_flags to pick up the change. */ 1479 - vm_flags = vma->vm_flags; 1480 - goto file_expanded; 1481 - } 1482 - vma_iter_config(&vmi, addr, end); 1483 - } 1484 - 1485 - vm_flags = vma->vm_flags; 1486 - } else if (vm_flags & VM_SHARED) { 1487 - error = shmem_zero_setup(vma); 1488 - if (error) 1489 - goto free_iter_vma; 1490 - } else { 1491 - vma_set_anonymous(vma); 1492 - } 1493 - 1494 - #ifdef CONFIG_SPARC64 1495 - /* TODO: Fix SPARC ADI! */ 1496 - WARN_ON_ONCE(!arch_validate_flags(vm_flags)); 1497 - #endif 1498 - 1499 - /* Lock the VMA since it is modified after insertion into VMA tree */ 1500 - vma_start_write(vma); 1501 - vma_iter_store(&vmi, vma); 1502 - mm->map_count++; 1503 - vma_link_file(vma); 1504 - 1505 - /* 1506 - * vma_merge_new_range() calls khugepaged_enter_vma() too, the below 1507 - * call covers the non-merge case. 1508 - */ 1509 - khugepaged_enter_vma(vma, vma->vm_flags); 1510 - 1511 - file_expanded: 1512 - file = vma->vm_file; 1513 - ksm_add_vma(vma); 1514 - expanded: 1515 - perf_event_mmap(vma); 1516 - 1517 - /* Unmap any existing mapping in the area */ 1518 - vms_complete_munmap_vmas(&vms, &mas_detach); 1519 - 1520 - vm_stat_account(mm, vm_flags, pglen); 1521 - if (vm_flags & VM_LOCKED) { 1522 - if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || 1523 - is_vm_hugetlb_page(vma) || 1524 - vma == get_gate_vma(current->mm)) 1525 - vm_flags_clear(vma, VM_LOCKED_MASK); 1526 - else 1527 - mm->locked_vm += pglen; 1528 - } 1529 - 1530 - if (file) 1531 - uprobe_mmap(vma); 1532 - 1533 - /* 1534 - * New (or expanded) vma always get soft dirty status. 1535 - * Otherwise user-space soft-dirty page tracker won't 1536 - * be able to distinguish situation when vma area unmapped, 1537 - * then new mapped in-place (which must be aimed as 1538 - * a completely new data area). 1539 - */ 1540 - vm_flags_set(vma, VM_SOFTDIRTY); 1541 - 1542 - vma_set_page_prot(vma); 1543 - 1544 - return addr; 1545 - 1546 - unmap_and_free_file_vma: 1547 - fput(vma->vm_file); 1548 - vma->vm_file = NULL; 1549 - 1550 - vma_iter_set(&vmi, vma->vm_end); 1551 - /* Undo any partial mapping done by a device driver. */ 1552 - unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); 1553 - free_iter_vma: 1554 - vma_iter_free(&vmi); 1555 - free_vma: 1556 - vm_area_free(vma); 1557 - unacct_error: 1558 - if (charged) 1559 - vm_unacct_memory(charged); 1560 - 1561 - abort_munmap: 1562 - vms_abort_munmap_vmas(&vms, &mas_detach); 1563 - gather_failed: 1564 - return error; 1565 1363 } 1566 1364 1567 1365 unsigned long mmap_region(struct file *file, unsigned long addr,
+320 -3
mm/vma.c
··· 1103 1103 vms->clear_ptes = false; 1104 1104 } 1105 1105 1106 - void vms_clean_up_area(struct vma_munmap_struct *vms, 1106 + static void vms_clean_up_area(struct vma_munmap_struct *vms, 1107 1107 struct ma_state *mas_detach) 1108 1108 { 1109 1109 struct vm_area_struct *vma; ··· 1126 1126 * used for the munmap() and may downgrade the lock - if requested. Everything 1127 1127 * needed to be done once the vma maple tree is updated. 1128 1128 */ 1129 - void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, 1129 + static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, 1130 1130 struct ma_state *mas_detach) 1131 1131 { 1132 1132 struct vm_area_struct *vma; ··· 1168 1168 } 1169 1169 1170 1170 /* 1171 + * reattach_vmas() - Undo any munmap work and free resources 1172 + * @mas_detach: The maple state with the detached maple tree 1173 + * 1174 + * Reattach any detached vmas and free up the maple tree used to track the vmas. 1175 + */ 1176 + static void reattach_vmas(struct ma_state *mas_detach) 1177 + { 1178 + struct vm_area_struct *vma; 1179 + 1180 + mas_set(mas_detach, 0); 1181 + mas_for_each(mas_detach, vma, ULONG_MAX) 1182 + vma_mark_detached(vma, false); 1183 + 1184 + __mt_destroy(mas_detach->tree); 1185 + } 1186 + 1187 + /* 1171 1188 * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree 1172 1189 * for removal at a later date. Handles splitting first and last if necessary 1173 1190 * and marking the vmas as isolated. ··· 1194 1177 * 1195 1178 * Return: 0 on success, error otherwise 1196 1179 */ 1197 - int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, 1180 + static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, 1198 1181 struct ma_state *mas_detach) 1199 1182 { 1200 1183 struct vm_area_struct *next = NULL; ··· 1330 1313 start_split_failed: 1331 1314 map_count_exceeded: 1332 1315 return error; 1316 + } 1317 + 1318 + /* 1319 + * init_vma_munmap() - Initializer wrapper for vma_munmap_struct 1320 + * @vms: The vma munmap struct 1321 + * @vmi: The vma iterator 1322 + * @vma: The first vm_area_struct to munmap 1323 + * @start: The aligned start address to munmap 1324 + * @end: The aligned end address to munmap 1325 + * @uf: The userfaultfd list_head 1326 + * @unlock: Unlock after the operation. Only unlocked on success 1327 + */ 1328 + static void init_vma_munmap(struct vma_munmap_struct *vms, 1329 + struct vma_iterator *vmi, struct vm_area_struct *vma, 1330 + unsigned long start, unsigned long end, struct list_head *uf, 1331 + bool unlock) 1332 + { 1333 + vms->vmi = vmi; 1334 + vms->vma = vma; 1335 + if (vma) { 1336 + vms->start = start; 1337 + vms->end = end; 1338 + } else { 1339 + vms->start = vms->end = 0; 1340 + } 1341 + vms->unlock = unlock; 1342 + vms->uf = uf; 1343 + vms->vma_count = 0; 1344 + vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0; 1345 + vms->exec_vm = vms->stack_vm = vms->data_vm = 0; 1346 + vms->unmap_start = FIRST_USER_ADDRESS; 1347 + vms->unmap_end = USER_PGTABLES_CEILING; 1348 + vms->clear_ptes = false; 1333 1349 } 1334 1350 1335 1351 /* ··· 2118 2068 } 2119 2069 2120 2070 mutex_unlock(&mm_all_locks_mutex); 2071 + } 2072 + 2073 + /* 2074 + * We account for memory if it's a private writeable mapping, 2075 + * not hugepages and VM_NORESERVE wasn't set. 2076 + */ 2077 + static bool accountable_mapping(struct file *file, vm_flags_t vm_flags) 2078 + { 2079 + /* 2080 + * hugetlb has its own accounting separate from the core VM 2081 + * VM_HUGETLB may not be set yet so we cannot check for that flag. 2082 + */ 2083 + if (file && is_file_hugepages(file)) 2084 + return false; 2085 + 2086 + return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; 2087 + } 2088 + 2089 + /* 2090 + * vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap() 2091 + * operation. 2092 + * @vms: The vma unmap structure 2093 + * @mas_detach: The maple state with the detached maple tree 2094 + * 2095 + * Reattach any detached vmas, free up the maple tree used to track the vmas. 2096 + * If that's not possible because the ptes are cleared (and vm_ops->closed() may 2097 + * have been called), then a NULL is written over the vmas and the vmas are 2098 + * removed (munmap() completed). 2099 + */ 2100 + static void vms_abort_munmap_vmas(struct vma_munmap_struct *vms, 2101 + struct ma_state *mas_detach) 2102 + { 2103 + struct ma_state *mas = &vms->vmi->mas; 2104 + 2105 + if (!vms->nr_pages) 2106 + return; 2107 + 2108 + if (vms->clear_ptes) 2109 + return reattach_vmas(mas_detach); 2110 + 2111 + /* 2112 + * Aborting cannot just call the vm_ops open() because they are often 2113 + * not symmetrical and state data has been lost. Resort to the old 2114 + * failure method of leaving a gap where the MAP_FIXED mapping failed. 2115 + */ 2116 + mas_set_range(mas, vms->start, vms->end - 1); 2117 + mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL); 2118 + /* Clean up the insertion of the unfortunate gap */ 2119 + vms_complete_munmap_vmas(vms, mas_detach); 2120 + } 2121 + 2122 + unsigned long __mmap_region(struct file *file, unsigned long addr, 2123 + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, 2124 + struct list_head *uf) 2125 + { 2126 + struct mm_struct *mm = current->mm; 2127 + struct vm_area_struct *vma = NULL; 2128 + pgoff_t pglen = PHYS_PFN(len); 2129 + unsigned long charged = 0; 2130 + struct vma_munmap_struct vms; 2131 + struct ma_state mas_detach; 2132 + struct maple_tree mt_detach; 2133 + unsigned long end = addr + len; 2134 + int error; 2135 + VMA_ITERATOR(vmi, mm, addr); 2136 + VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff); 2137 + 2138 + vmg.file = file; 2139 + /* Find the first overlapping VMA */ 2140 + vma = vma_find(&vmi, end); 2141 + init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false); 2142 + if (vma) { 2143 + mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); 2144 + mt_on_stack(mt_detach); 2145 + mas_init(&mas_detach, &mt_detach, /* addr = */ 0); 2146 + /* Prepare to unmap any existing mapping in the area */ 2147 + error = vms_gather_munmap_vmas(&vms, &mas_detach); 2148 + if (error) 2149 + goto gather_failed; 2150 + 2151 + vmg.next = vms.next; 2152 + vmg.prev = vms.prev; 2153 + vma = NULL; 2154 + } else { 2155 + vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev); 2156 + } 2157 + 2158 + /* Check against address space limit. */ 2159 + if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) { 2160 + error = -ENOMEM; 2161 + goto abort_munmap; 2162 + } 2163 + 2164 + /* 2165 + * Private writable mapping: check memory availability 2166 + */ 2167 + if (accountable_mapping(file, vm_flags)) { 2168 + charged = pglen; 2169 + charged -= vms.nr_accounted; 2170 + if (charged) { 2171 + error = security_vm_enough_memory_mm(mm, charged); 2172 + if (error) 2173 + goto abort_munmap; 2174 + } 2175 + 2176 + vms.nr_accounted = 0; 2177 + vm_flags |= VM_ACCOUNT; 2178 + vmg.flags = vm_flags; 2179 + } 2180 + 2181 + /* 2182 + * clear PTEs while the vma is still in the tree so that rmap 2183 + * cannot race with the freeing later in the truncate scenario. 2184 + * This is also needed for mmap_file(), which is why vm_ops 2185 + * close function is called. 2186 + */ 2187 + vms_clean_up_area(&vms, &mas_detach); 2188 + vma = vma_merge_new_range(&vmg); 2189 + if (vma) 2190 + goto expanded; 2191 + /* 2192 + * Determine the object being mapped and call the appropriate 2193 + * specific mapper. the address has already been validated, but 2194 + * not unmapped, but the maps are removed from the list. 2195 + */ 2196 + vma = vm_area_alloc(mm); 2197 + if (!vma) { 2198 + error = -ENOMEM; 2199 + goto unacct_error; 2200 + } 2201 + 2202 + vma_iter_config(&vmi, addr, end); 2203 + vma_set_range(vma, addr, end, pgoff); 2204 + vm_flags_init(vma, vm_flags); 2205 + vma->vm_page_prot = vm_get_page_prot(vm_flags); 2206 + 2207 + if (vma_iter_prealloc(&vmi, vma)) { 2208 + error = -ENOMEM; 2209 + goto free_vma; 2210 + } 2211 + 2212 + if (file) { 2213 + vma->vm_file = get_file(file); 2214 + error = mmap_file(file, vma); 2215 + if (error) 2216 + goto unmap_and_free_file_vma; 2217 + 2218 + /* Drivers cannot alter the address of the VMA. */ 2219 + WARN_ON_ONCE(addr != vma->vm_start); 2220 + /* 2221 + * Drivers should not permit writability when previously it was 2222 + * disallowed. 2223 + */ 2224 + VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && 2225 + !(vm_flags & VM_MAYWRITE) && 2226 + (vma->vm_flags & VM_MAYWRITE)); 2227 + 2228 + vma_iter_config(&vmi, addr, end); 2229 + /* 2230 + * If vm_flags changed after mmap_file(), we should try merge 2231 + * vma again as we may succeed this time. 2232 + */ 2233 + if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) { 2234 + struct vm_area_struct *merge; 2235 + 2236 + vmg.flags = vma->vm_flags; 2237 + /* If this fails, state is reset ready for a reattempt. */ 2238 + merge = vma_merge_new_range(&vmg); 2239 + 2240 + if (merge) { 2241 + /* 2242 + * ->mmap() can change vma->vm_file and fput 2243 + * the original file. So fput the vma->vm_file 2244 + * here or we would add an extra fput for file 2245 + * and cause general protection fault 2246 + * ultimately. 2247 + */ 2248 + fput(vma->vm_file); 2249 + vm_area_free(vma); 2250 + vma = merge; 2251 + /* Update vm_flags to pick up the change. */ 2252 + vm_flags = vma->vm_flags; 2253 + goto file_expanded; 2254 + } 2255 + vma_iter_config(&vmi, addr, end); 2256 + } 2257 + 2258 + vm_flags = vma->vm_flags; 2259 + } else if (vm_flags & VM_SHARED) { 2260 + error = shmem_zero_setup(vma); 2261 + if (error) 2262 + goto free_iter_vma; 2263 + } else { 2264 + vma_set_anonymous(vma); 2265 + } 2266 + 2267 + #ifdef CONFIG_SPARC64 2268 + /* TODO: Fix SPARC ADI! */ 2269 + WARN_ON_ONCE(!arch_validate_flags(vm_flags)); 2270 + #endif 2271 + 2272 + /* Lock the VMA since it is modified after insertion into VMA tree */ 2273 + vma_start_write(vma); 2274 + vma_iter_store(&vmi, vma); 2275 + mm->map_count++; 2276 + vma_link_file(vma); 2277 + 2278 + /* 2279 + * vma_merge_new_range() calls khugepaged_enter_vma() too, the below 2280 + * call covers the non-merge case. 2281 + */ 2282 + khugepaged_enter_vma(vma, vma->vm_flags); 2283 + 2284 + file_expanded: 2285 + file = vma->vm_file; 2286 + ksm_add_vma(vma); 2287 + expanded: 2288 + perf_event_mmap(vma); 2289 + 2290 + /* Unmap any existing mapping in the area */ 2291 + vms_complete_munmap_vmas(&vms, &mas_detach); 2292 + 2293 + vm_stat_account(mm, vm_flags, pglen); 2294 + if (vm_flags & VM_LOCKED) { 2295 + if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || 2296 + is_vm_hugetlb_page(vma) || 2297 + vma == get_gate_vma(current->mm)) 2298 + vm_flags_clear(vma, VM_LOCKED_MASK); 2299 + else 2300 + mm->locked_vm += pglen; 2301 + } 2302 + 2303 + if (file) 2304 + uprobe_mmap(vma); 2305 + 2306 + /* 2307 + * New (or expanded) vma always get soft dirty status. 2308 + * Otherwise user-space soft-dirty page tracker won't 2309 + * be able to distinguish situation when vma area unmapped, 2310 + * then new mapped in-place (which must be aimed as 2311 + * a completely new data area). 2312 + */ 2313 + vm_flags_set(vma, VM_SOFTDIRTY); 2314 + 2315 + vma_set_page_prot(vma); 2316 + 2317 + return addr; 2318 + 2319 + unmap_and_free_file_vma: 2320 + fput(vma->vm_file); 2321 + vma->vm_file = NULL; 2322 + 2323 + vma_iter_set(&vmi, vma->vm_end); 2324 + /* Undo any partial mapping done by a device driver. */ 2325 + unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); 2326 + free_iter_vma: 2327 + vma_iter_free(&vmi); 2328 + free_vma: 2329 + vm_area_free(vma); 2330 + unacct_error: 2331 + if (charged) 2332 + vm_unacct_memory(charged); 2333 + 2334 + abort_munmap: 2335 + vms_abort_munmap_vmas(&vms, &mas_detach); 2336 + gather_failed: 2337 + return error; 2121 2338 }
+4 -93
mm/vma.h
··· 165 165 return 0; 166 166 } 167 167 168 - #ifdef CONFIG_MMU 169 - /* 170 - * init_vma_munmap() - Initializer wrapper for vma_munmap_struct 171 - * @vms: The vma munmap struct 172 - * @vmi: The vma iterator 173 - * @vma: The first vm_area_struct to munmap 174 - * @start: The aligned start address to munmap 175 - * @end: The aligned end address to munmap 176 - * @uf: The userfaultfd list_head 177 - * @unlock: Unlock after the operation. Only unlocked on success 178 - */ 179 - static inline void init_vma_munmap(struct vma_munmap_struct *vms, 180 - struct vma_iterator *vmi, struct vm_area_struct *vma, 181 - unsigned long start, unsigned long end, struct list_head *uf, 182 - bool unlock) 183 - { 184 - vms->vmi = vmi; 185 - vms->vma = vma; 186 - if (vma) { 187 - vms->start = start; 188 - vms->end = end; 189 - } else { 190 - vms->start = vms->end = 0; 191 - } 192 - vms->unlock = unlock; 193 - vms->uf = uf; 194 - vms->vma_count = 0; 195 - vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0; 196 - vms->exec_vm = vms->stack_vm = vms->data_vm = 0; 197 - vms->unmap_start = FIRST_USER_ADDRESS; 198 - vms->unmap_end = USER_PGTABLES_CEILING; 199 - vms->clear_ptes = false; 200 - } 201 - #endif 202 - 203 - int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, 204 - struct ma_state *mas_detach); 205 - 206 - void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, 207 - struct ma_state *mas_detach); 208 - 209 - void vms_clean_up_area(struct vma_munmap_struct *vms, 210 - struct ma_state *mas_detach); 211 - 212 - /* 213 - * reattach_vmas() - Undo any munmap work and free resources 214 - * @mas_detach: The maple state with the detached maple tree 215 - * 216 - * Reattach any detached vmas and free up the maple tree used to track the vmas. 217 - */ 218 - static inline void reattach_vmas(struct ma_state *mas_detach) 219 - { 220 - struct vm_area_struct *vma; 221 - 222 - mas_set(mas_detach, 0); 223 - mas_for_each(mas_detach, vma, ULONG_MAX) 224 - vma_mark_detached(vma, false); 225 - 226 - __mt_destroy(mas_detach->tree); 227 - } 228 - 229 - /* 230 - * vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap() 231 - * operation. 232 - * @vms: The vma unmap structure 233 - * @mas_detach: The maple state with the detached maple tree 234 - * 235 - * Reattach any detached vmas, free up the maple tree used to track the vmas. 236 - * If that's not possible because the ptes are cleared (and vm_ops->closed() may 237 - * have been called), then a NULL is written over the vmas and the vmas are 238 - * removed (munmap() completed). 239 - */ 240 - static inline void vms_abort_munmap_vmas(struct vma_munmap_struct *vms, 241 - struct ma_state *mas_detach) 242 - { 243 - struct ma_state *mas = &vms->vmi->mas; 244 - if (!vms->nr_pages) 245 - return; 246 - 247 - if (vms->clear_ptes) 248 - return reattach_vmas(mas_detach); 249 - 250 - /* 251 - * Aborting cannot just call the vm_ops open() because they are often 252 - * not symmetrical and state data has been lost. Resort to the old 253 - * failure method of leaving a gap where the MAP_FIXED mapping failed. 254 - */ 255 - mas_set_range(mas, vms->start, vms->end - 1); 256 - mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL); 257 - /* Clean up the insertion of the unfortunate gap */ 258 - vms_complete_munmap_vmas(vms, mas_detach); 259 - } 260 - 261 168 int 262 169 do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, 263 170 struct mm_struct *mm, unsigned long start, ··· 242 335 243 336 int mm_take_all_locks(struct mm_struct *mm); 244 337 void mm_drop_all_locks(struct mm_struct *mm); 338 + 339 + unsigned long __mmap_region(struct file *file, unsigned long addr, 340 + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, 341 + struct list_head *uf); 245 342 246 343 static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) 247 344 {
+5
mm/vma_internal.h
··· 17 17 #include <linux/file.h> 18 18 #include <linux/fs.h> 19 19 #include <linux/huge_mm.h> 20 + #include <linux/hugetlb.h> 20 21 #include <linux/hugetlb_inline.h> 21 22 #include <linux/kernel.h> 23 + #include <linux/ksm.h> 22 24 #include <linux/khugepaged.h> 23 25 #include <linux/list.h> 24 26 #include <linux/maple_tree.h> ··· 34 32 #include <linux/mmu_context.h> 35 33 #include <linux/mutex.h> 36 34 #include <linux/pagemap.h> 35 + #include <linux/perf_event.h> 37 36 #include <linux/pfn.h> 38 37 #include <linux/rcupdate.h> 39 38 #include <linux/rmap.h> 40 39 #include <linux/rwsem.h> 41 40 #include <linux/sched/signal.h> 41 + #include <linux/security.h> 42 + #include <linux/shmem_fs.h> 42 43 #include <linux/swap.h> 43 44 #include <linux/uprobes.h> 44 45 #include <linux/userfaultfd_k.h>