Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'mm-hotfixes-stable-2022-12-02' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc hotfixes from Andrew Morton:
"15 hotfixes, 11 marked cc:stable.

Only three or four of the latter address post-6.0 issues, which is
hopefully a sign that things are converging"

* tag 'mm-hotfixes-stable-2022-12-02' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
revert "kbuild: fix -Wimplicit-function-declaration in license_is_gpl_compatible"
Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled
drm/amdgpu: temporarily disable broken Clang builds due to blown stack-frame
mm/khugepaged: invoke MMU notifiers in shmem/file collapse paths
mm/khugepaged: fix GUP-fast interaction by sending IPI
mm/khugepaged: take the right locks for page table retraction
mm: migrate: fix THP's mapcount on isolation
mm: introduce arch_has_hw_nonleaf_pmd_young()
mm: add dummy pmd_young() for architectures not having it
mm/damon/sysfs: fix wrong empty schemes assumption under online tuning in damon_sysfs_set_schemes()
tools/vm/slabinfo-gnuplot: use "grep -E" instead of "egrep"
nilfs2: fix NULL pointer dereference in nilfs_palloc_commit_free_entry()
hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing
madvise: use zap_page_range_single for madvise dontneed
mm: replace VM_WARN_ON to pr_warn if the node is offline with __GFP_THISNODE

+245 -71
+1
arch/loongarch/include/asm/pgtable.h
··· 490 490 return pmd; 491 491 } 492 492 493 + #define pmd_young pmd_young 493 494 static inline int pmd_young(pmd_t pmd) 494 495 { 495 496 return !!(pmd_val(pmd) & _PAGE_ACCESSED);
+1
arch/mips/include/asm/pgtable.h
··· 622 622 return pmd; 623 623 } 624 624 625 + #define pmd_young pmd_young 625 626 static inline int pmd_young(pmd_t pmd) 626 627 { 627 628 return !!(pmd_val(pmd) & _PAGE_ACCESSED);
+1
arch/riscv/include/asm/pgtable.h
··· 600 600 return pte_dirty(pmd_pte(pmd)); 601 601 } 602 602 603 + #define pmd_young pmd_young 603 604 static inline int pmd_young(pmd_t pmd) 604 605 { 605 606 return pte_young(pmd_pte(pmd));
+1
arch/s390/include/asm/pgtable.h
··· 763 763 return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; 764 764 } 765 765 766 + #define pmd_young pmd_young 766 767 static inline int pmd_young(pmd_t pmd) 767 768 { 768 769 return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+1
arch/sparc/include/asm/pgtable_64.h
··· 693 693 return pte_dirty(pte); 694 694 } 695 695 696 + #define pmd_young pmd_young 696 697 static inline unsigned long pmd_young(pmd_t pmd) 697 698 { 698 699 pte_t pte = __pte(pmd_val(pmd));
+9
arch/x86/include/asm/pgtable.h
··· 139 139 return pmd_flags(pmd) & _PAGE_DIRTY; 140 140 } 141 141 142 + #define pmd_young pmd_young 142 143 static inline int pmd_young(pmd_t pmd) 143 144 { 144 145 return pmd_flags(pmd) & _PAGE_ACCESSED; ··· 1438 1437 { 1439 1438 return true; 1440 1439 } 1440 + 1441 + #ifdef CONFIG_XEN_PV 1442 + #define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young 1443 + static inline bool arch_has_hw_nonleaf_pmd_young(void) 1444 + { 1445 + return !cpu_feature_enabled(X86_FEATURE_XENPV); 1446 + } 1447 + #endif 1441 1448 1442 1449 #ifdef CONFIG_PAGE_TABLE_CHECK 1443 1450 static inline bool pte_user_accessible_page(pte_t pte)
+7
drivers/gpu/drm/amd/display/Kconfig
··· 5 5 config DRM_AMD_DC 6 6 bool "AMD DC - Enable new display engine" 7 7 default y 8 + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 8 9 select SND_HDA_COMPONENT if SND_HDA_CORE 9 10 select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) 10 11 help 11 12 Choose this option if you want to use the new display engine 12 13 support for AMDGPU. This adds required support for Vega and 13 14 Raven ASICs. 15 + 16 + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) 17 + architectures built with Clang (all released versions), whereby the stack 18 + frame gets blown up to well over 5k. This would cause an immediate kernel 19 + panic on most architectures. We'll revert this when the following bug report 20 + has been resolved: https://github.com/llvm/llvm-project/issues/41896. 14 21 15 22 config DRM_AMD_DC_DCN 16 23 def_bool n
+7
fs/nilfs2/dat.c
··· 111 111 kunmap_atomic(kaddr); 112 112 113 113 nilfs_dat_commit_entry(dat, req); 114 + 115 + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { 116 + nilfs_error(dat->i_sb, 117 + "state inconsistency probably due to duplicate use of vblocknr = %llu", 118 + (unsigned long long)req->pr_entry_nr); 119 + return; 120 + } 114 121 nilfs_palloc_commit_free_entry(dat, req); 115 122 } 116 123
+4
include/asm-generic/tlb.h
··· 222 222 #define tlb_needs_table_invalidate() (true) 223 223 #endif 224 224 225 + void tlb_remove_table_sync_one(void); 226 + 225 227 #else 226 228 227 229 #ifdef tlb_needs_table_invalidate 228 230 #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE 229 231 #endif 232 + 233 + static inline void tlb_remove_table_sync_one(void) { } 230 234 231 235 #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ 232 236
+16 -2
include/linux/gfp.h
··· 210 210 return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); 211 211 } 212 212 213 + static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) 214 + { 215 + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); 216 + 217 + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) 218 + return; 219 + 220 + if (node_online(this_node)) 221 + return; 222 + 223 + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); 224 + dump_stack(); 225 + } 226 + 213 227 /* 214 228 * Allocate pages, preferring the node given as nid. The node must be valid and 215 229 * online. For more general interface, see alloc_pages_node(). ··· 232 218 __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) 233 219 { 234 220 VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); 235 - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); 221 + warn_if_node_offline(nid, gfp_mask); 236 222 237 223 return __alloc_pages(gfp_mask, order, nid, NULL); 238 224 } ··· 241 227 struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) 242 228 { 243 229 VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); 244 - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); 230 + warn_if_node_offline(nid, gfp); 245 231 246 232 return __folio_alloc(gfp, order, nid, NULL); 247 233 }
-2
include/linux/license.h
··· 2 2 #ifndef __LICENSE_H 3 3 #define __LICENSE_H 4 4 5 - #include <linux/string.h> 6 - 7 5 static inline int license_is_gpl_compatible(const char *license) 8 6 { 9 7 return (strcmp(license, "GPL") == 0
+21 -8
include/linux/mm.h
··· 1852 1852 __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); 1853 1853 } 1854 1854 1855 + /* 1856 + * Parameter block passed down to zap_pte_range in exceptional cases. 1857 + */ 1858 + struct zap_details { 1859 + struct folio *single_folio; /* Locked folio to be unmapped */ 1860 + bool even_cows; /* Zap COWed private pages too? */ 1861 + zap_flags_t zap_flags; /* Extra flags for zapping */ 1862 + }; 1863 + 1864 + /* 1865 + * Whether to drop the pte markers, for example, the uffd-wp information for 1866 + * file-backed memory. This should only be specified when we will completely 1867 + * drop the page in the mm, either by truncation or unmapping of the vma. By 1868 + * default, the flag is not set. 1869 + */ 1870 + #define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) 1871 + /* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ 1872 + #define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) 1873 + 1855 1874 #ifdef CONFIG_MMU 1856 1875 extern bool can_do_mlock(void); 1857 1876 #else ··· 1888 1869 unsigned long size); 1889 1870 void zap_page_range(struct vm_area_struct *vma, unsigned long address, 1890 1871 unsigned long size); 1872 + void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, 1873 + unsigned long size, struct zap_details *details); 1891 1874 void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, 1892 1875 struct vm_area_struct *start_vma, unsigned long start, 1893 1876 unsigned long end); ··· 3487 3466 return 0; 3488 3467 } 3489 3468 #endif 3490 - 3491 - /* 3492 - * Whether to drop the pte markers, for example, the uffd-wp information for 3493 - * file-backed memory. This should only be specified when we will completely 3494 - * drop the page in the mm, either by truncation or unmapping of the vma. By 3495 - * default, the flag is not set. 3496 - */ 3497 - #define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) 3498 3469 3499 3470 #endif /* _LINUX_MM_H */
+18
include/linux/pgtable.h
··· 165 165 return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); 166 166 } 167 167 168 + #ifndef pmd_young 169 + static inline int pmd_young(pmd_t pmd) 170 + { 171 + return 0; 172 + } 173 + #endif 174 + 168 175 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 169 176 extern int ptep_set_access_flags(struct vm_area_struct *vma, 170 177 unsigned long address, pte_t *ptep, ··· 265 258 return 0; 266 259 } 267 260 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 261 + #endif 262 + 263 + #ifndef arch_has_hw_nonleaf_pmd_young 264 + /* 265 + * Return whether the accessed bit in non-leaf PMD entries is supported on the 266 + * local CPU. 267 + */ 268 + static inline bool arch_has_hw_nonleaf_pmd_young(void) 269 + { 270 + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); 271 + } 268 272 #endif 269 273 270 274 #ifndef arch_has_hw_pte_young
+1
lib/Kconfig.debug
··· 399 399 default 2048 if GCC_PLUGIN_LATENT_ENTROPY 400 400 default 2048 if PARISC 401 401 default 1536 if (!64BIT && XTENSA) 402 + default 1280 if KASAN && !64BIT 402 403 default 1024 if !64BIT 403 404 default 2048 if 64BIT 404 405 help
+16 -16
mm/compaction.c
··· 985 985 } 986 986 987 987 /* 988 - * Migration will fail if an anonymous page is pinned in memory, 989 - * so avoid taking lru_lock and isolating it unnecessarily in an 990 - * admittedly racy check. 991 - */ 992 - mapping = page_mapping(page); 993 - if (!mapping && page_count(page) > page_mapcount(page)) 994 - goto isolate_fail; 995 - 996 - /* 997 - * Only allow to migrate anonymous pages in GFP_NOFS context 998 - * because those do not depend on fs locks. 999 - */ 1000 - if (!(cc->gfp_mask & __GFP_FS) && mapping) 1001 - goto isolate_fail; 1002 - 1003 - /* 1004 988 * Be careful not to clear PageLRU until after we're 1005 989 * sure the page is not being freed elsewhere -- the 1006 990 * page release code relies on it. 1007 991 */ 1008 992 if (unlikely(!get_page_unless_zero(page))) 1009 993 goto isolate_fail; 994 + 995 + /* 996 + * Migration will fail if an anonymous page is pinned in memory, 997 + * so avoid taking lru_lock and isolating it unnecessarily in an 998 + * admittedly racy check. 999 + */ 1000 + mapping = page_mapping(page); 1001 + if (!mapping && (page_count(page) - 1) > total_mapcount(page)) 1002 + goto isolate_fail_put; 1003 + 1004 + /* 1005 + * Only allow to migrate anonymous pages in GFP_NOFS context 1006 + * because those do not depend on fs locks. 1007 + */ 1008 + if (!(cc->gfp_mask & __GFP_FS) && mapping) 1009 + goto isolate_fail_put; 1010 1010 1011 1011 /* Only take pages on LRU: a check now makes later tests safe */ 1012 1012 if (!PageLRU(page))
+44 -2
mm/damon/sysfs.c
··· 2283 2283 &wmarks); 2284 2284 } 2285 2285 2286 + static void damon_sysfs_update_scheme(struct damos *scheme, 2287 + struct damon_sysfs_scheme *sysfs_scheme) 2288 + { 2289 + struct damon_sysfs_access_pattern *access_pattern = 2290 + sysfs_scheme->access_pattern; 2291 + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; 2292 + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; 2293 + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; 2294 + 2295 + scheme->pattern.min_sz_region = access_pattern->sz->min; 2296 + scheme->pattern.max_sz_region = access_pattern->sz->max; 2297 + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; 2298 + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; 2299 + scheme->pattern.min_age_region = access_pattern->age->min; 2300 + scheme->pattern.max_age_region = access_pattern->age->max; 2301 + 2302 + scheme->action = sysfs_scheme->action; 2303 + 2304 + scheme->quota.ms = sysfs_quotas->ms; 2305 + scheme->quota.sz = sysfs_quotas->sz; 2306 + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; 2307 + scheme->quota.weight_sz = sysfs_weights->sz; 2308 + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; 2309 + scheme->quota.weight_age = sysfs_weights->age; 2310 + 2311 + scheme->wmarks.metric = sysfs_wmarks->metric; 2312 + scheme->wmarks.interval = sysfs_wmarks->interval_us; 2313 + scheme->wmarks.high = sysfs_wmarks->high; 2314 + scheme->wmarks.mid = sysfs_wmarks->mid; 2315 + scheme->wmarks.low = sysfs_wmarks->low; 2316 + } 2317 + 2286 2318 static int damon_sysfs_set_schemes(struct damon_ctx *ctx, 2287 2319 struct damon_sysfs_schemes *sysfs_schemes) 2288 2320 { 2289 - int i; 2321 + struct damos *scheme, *next; 2322 + int i = 0; 2290 2323 2291 - for (i = 0; i < sysfs_schemes->nr; i++) { 2324 + damon_for_each_scheme_safe(scheme, next, ctx) { 2325 + if (i < sysfs_schemes->nr) 2326 + damon_sysfs_update_scheme(scheme, 2327 + sysfs_schemes->schemes_arr[i]); 2328 + else 2329 + damon_destroy_scheme(scheme); 2330 + i++; 2331 + } 2332 + 2333 + for (; i < sysfs_schemes->nr; i++) { 2292 2334 struct damos *scheme, *next; 2293 2335 2294 2336 scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
+16 -11
mm/hugetlb.c
··· 5206 5206 5207 5207 __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); 5208 5208 5209 - /* 5210 - * Unlock and free the vma lock before releasing i_mmap_rwsem. When 5211 - * the vma_lock is freed, this makes the vma ineligible for pmd 5212 - * sharing. And, i_mmap_rwsem is required to set up pmd sharing. 5213 - * This is important as page tables for this unmapped range will 5214 - * be asynchrously deleted. If the page tables are shared, there 5215 - * will be issues when accessed by someone else. 5216 - */ 5217 - __hugetlb_vma_unlock_write_free(vma); 5218 - 5219 - i_mmap_unlock_write(vma->vm_file->f_mapping); 5209 + if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ 5210 + /* 5211 + * Unlock and free the vma lock before releasing i_mmap_rwsem. 5212 + * When the vma_lock is freed, this makes the vma ineligible 5213 + * for pmd sharing. And, i_mmap_rwsem is required to set up 5214 + * pmd sharing. This is important as page tables for this 5215 + * unmapped range will be asynchrously deleted. If the page 5216 + * tables are shared, there will be issues when accessed by 5217 + * someone else. 5218 + */ 5219 + __hugetlb_vma_unlock_write_free(vma); 5220 + i_mmap_unlock_write(vma->vm_file->f_mapping); 5221 + } else { 5222 + i_mmap_unlock_write(vma->vm_file->f_mapping); 5223 + hugetlb_vma_unlock_write(vma); 5224 + } 5220 5225 } 5221 5226 5222 5227 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
+58 -4
mm/khugepaged.c
··· 1051 1051 _pmd = pmdp_collapse_flush(vma, address, pmd); 1052 1052 spin_unlock(pmd_ptl); 1053 1053 mmu_notifier_invalidate_range_end(&range); 1054 + tlb_remove_table_sync_one(); 1054 1055 1055 1056 spin_lock(pte_ptl); 1056 1057 result = __collapse_huge_page_isolate(vma, address, pte, cc, ··· 1380 1379 return SCAN_SUCCEED; 1381 1380 } 1382 1381 1382 + /* 1383 + * A note about locking: 1384 + * Trying to take the page table spinlocks would be useless here because those 1385 + * are only used to synchronize: 1386 + * 1387 + * - modifying terminal entries (ones that point to a data page, not to another 1388 + * page table) 1389 + * - installing *new* non-terminal entries 1390 + * 1391 + * Instead, we need roughly the same kind of protection as free_pgtables() or 1392 + * mm_take_all_locks() (but only for a single VMA): 1393 + * The mmap lock together with this VMA's rmap locks covers all paths towards 1394 + * the page table entries we're messing with here, except for hardware page 1395 + * table walks and lockless_pages_from_mm(). 1396 + */ 1383 1397 static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, 1384 1398 unsigned long addr, pmd_t *pmdp) 1385 1399 { 1386 - spinlock_t *ptl; 1387 1400 pmd_t pmd; 1401 + struct mmu_notifier_range range; 1388 1402 1389 1403 mmap_assert_write_locked(mm); 1390 - ptl = pmd_lock(vma->vm_mm, pmdp); 1404 + if (vma->vm_file) 1405 + lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem); 1406 + /* 1407 + * All anon_vmas attached to the VMA have the same root and are 1408 + * therefore locked by the same lock. 1409 + */ 1410 + if (vma->anon_vma) 1411 + lockdep_assert_held_write(&vma->anon_vma->root->rwsem); 1412 + 1413 + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr, 1414 + addr + HPAGE_PMD_SIZE); 1415 + mmu_notifier_invalidate_range_start(&range); 1391 1416 pmd = pmdp_collapse_flush(vma, addr, pmdp); 1392 - spin_unlock(ptl); 1417 + tlb_remove_table_sync_one(); 1418 + mmu_notifier_invalidate_range_end(&range); 1393 1419 mm_dec_nr_ptes(mm); 1394 1420 page_table_check_pte_clear_range(mm, addr, pmd); 1395 1421 pte_free(mm, pmd_pgtable(pmd)); ··· 1467 1439 if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) 1468 1440 return SCAN_VMA_CHECK; 1469 1441 1442 + /* 1443 + * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings 1444 + * that got written to. Without this, we'd have to also lock the 1445 + * anon_vma if one exists. 1446 + */ 1447 + if (vma->anon_vma) 1448 + return SCAN_VMA_CHECK; 1449 + 1470 1450 /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ 1471 1451 if (userfaultfd_wp(vma)) 1472 1452 return SCAN_PTE_UFFD_WP; ··· 1508 1472 goto drop_hpage; 1509 1473 } 1510 1474 1475 + /* 1476 + * We need to lock the mapping so that from here on, only GUP-fast and 1477 + * hardware page walks can access the parts of the page tables that 1478 + * we're operating on. 1479 + * See collapse_and_free_pmd(). 1480 + */ 1481 + i_mmap_lock_write(vma->vm_file->f_mapping); 1482 + 1483 + /* 1484 + * This spinlock should be unnecessary: Nobody else should be accessing 1485 + * the page tables under spinlock protection here, only 1486 + * lockless_pages_from_mm() and the hardware page walker can access page 1487 + * tables while all the high-level locks are held in write mode. 1488 + */ 1511 1489 start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); 1512 1490 result = SCAN_FAIL; 1513 1491 ··· 1576 1526 /* step 4: remove pte entries */ 1577 1527 collapse_and_free_pmd(mm, vma, haddr, pmd); 1578 1528 1529 + i_mmap_unlock_write(vma->vm_file->f_mapping); 1530 + 1579 1531 maybe_install_pmd: 1580 1532 /* step 5: install pmd entry */ 1581 1533 result = install_pmd ··· 1591 1539 1592 1540 abort: 1593 1541 pte_unmap_unlock(start_pte, ptl); 1542 + i_mmap_unlock_write(vma->vm_file->f_mapping); 1594 1543 goto drop_hpage; 1595 1544 } 1596 1545 ··· 1648 1595 * An alternative would be drop the check, but check that page 1649 1596 * table is clear before calling pmdp_collapse_flush() under 1650 1597 * ptl. It has higher chance to recover THP for the VMA, but 1651 - * has higher cost too. 1598 + * has higher cost too. It would also probably require locking 1599 + * the anon_vma. 1652 1600 */ 1653 1601 if (vma->anon_vma) { 1654 1602 result = SCAN_PAGE_ANON;
+3 -3
mm/madvise.c
··· 772 772 * Application no longer needs these pages. If the pages are dirty, 773 773 * it's OK to just throw them away. The app will be more careful about 774 774 * data it wants to keep. Be sure to free swap resources too. The 775 - * zap_page_range call sets things up for shrink_active_list to actually free 776 - * these pages later if no one else has touched them in the meantime, 775 + * zap_page_range_single call sets things up for shrink_active_list to actually 776 + * free these pages later if no one else has touched them in the meantime, 777 777 * although we could add these pages to a global reuse list for 778 778 * shrink_active_list to pick up before reclaiming other pages. 779 779 * ··· 790 790 static long madvise_dontneed_single_vma(struct vm_area_struct *vma, 791 791 unsigned long start, unsigned long end) 792 792 { 793 - zap_page_range(vma, start, end - start); 793 + zap_page_range_single(vma, start, end - start, NULL); 794 794 return 0; 795 795 } 796 796
+12 -13
mm/memory.c
··· 1341 1341 return ret; 1342 1342 } 1343 1343 1344 - /* 1345 - * Parameter block passed down to zap_pte_range in exceptional cases. 1346 - */ 1347 - struct zap_details { 1348 - struct folio *single_folio; /* Locked folio to be unmapped */ 1349 - bool even_cows; /* Zap COWed private pages too? */ 1350 - zap_flags_t zap_flags; /* Extra flags for zapping */ 1351 - }; 1352 - 1353 1344 /* Whether we should zap all COWed (private) pages too */ 1354 1345 static inline bool should_zap_cows(struct zap_details *details) 1355 1346 { ··· 1711 1720 { 1712 1721 struct mmu_notifier_range range; 1713 1722 struct zap_details details = { 1714 - .zap_flags = ZAP_FLAG_DROP_MARKER, 1723 + .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, 1715 1724 /* Careful - we need to zap private pages too! */ 1716 1725 .even_cows = true, 1717 1726 }; ··· 1765 1774 * 1766 1775 * The range must fit into one VMA. 1767 1776 */ 1768 - static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, 1777 + void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, 1769 1778 unsigned long size, struct zap_details *details) 1770 1779 { 1780 + const unsigned long end = address + size; 1771 1781 struct mmu_notifier_range range; 1772 1782 struct mmu_gather tlb; 1773 1783 1774 1784 lru_add_drain(); 1775 1785 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, 1776 - address, address + size); 1786 + address, end); 1787 + if (is_vm_hugetlb_page(vma)) 1788 + adjust_range_if_pmd_sharing_possible(vma, &range.start, 1789 + &range.end); 1777 1790 tlb_gather_mmu(&tlb, vma->vm_mm); 1778 1791 update_hiwater_rss(vma->vm_mm); 1779 1792 mmu_notifier_invalidate_range_start(&range); 1780 - unmap_single_vma(&tlb, vma, address, range.end, details); 1793 + /* 1794 + * unmap 'address-end' not 'range.start-range.end' as range 1795 + * could have been expanded for hugetlb pmd sharing. 1796 + */ 1797 + unmap_single_vma(&tlb, vma, address, end, details); 1781 1798 mmu_notifier_invalidate_range_end(&range); 1782 1799 tlb_finish_mmu(&tlb); 1783 1800 }
+1 -3
mm/mmu_gather.c
··· 153 153 /* Simply deliver the interrupt */ 154 154 } 155 155 156 - static void tlb_remove_table_sync_one(void) 156 + void tlb_remove_table_sync_one(void) 157 157 { 158 158 /* 159 159 * This isn't an RCU grace period and hence the page-tables cannot be ··· 176 176 } 177 177 178 178 #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ 179 - 180 - static void tlb_remove_table_sync_one(void) { } 181 179 182 180 static void tlb_remove_table_free(struct mmu_table_batch *batch) 183 181 {
+5 -5
mm/vmscan.c
··· 3987 3987 goto next; 3988 3988 3989 3989 if (!pmd_trans_huge(pmd[i])) { 3990 - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && 3990 + if (arch_has_hw_nonleaf_pmd_young() && 3991 3991 get_cap(LRU_GEN_NONLEAF_YOUNG)) 3992 3992 pmdp_test_and_clear_young(vma, addr, pmd + i); 3993 3993 goto next; ··· 4085 4085 #endif 4086 4086 walk->mm_stats[MM_NONLEAF_TOTAL]++; 4087 4087 4088 - #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG 4089 - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { 4088 + if (arch_has_hw_nonleaf_pmd_young() && 4089 + get_cap(LRU_GEN_NONLEAF_YOUNG)) { 4090 4090 if (!pmd_young(val)) 4091 4091 continue; 4092 4092 4093 4093 walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); 4094 4094 } 4095 - #endif 4095 + 4096 4096 if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) 4097 4097 continue; 4098 4098 ··· 5392 5392 if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) 5393 5393 caps |= BIT(LRU_GEN_MM_WALK); 5394 5394 5395 - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) 5395 + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) 5396 5396 caps |= BIT(LRU_GEN_NONLEAF_YOUNG); 5397 5397 5398 5398 return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
+2 -2
tools/vm/slabinfo-gnuplot.sh
··· 150 150 let lines=3 151 151 out=`basename "$in"`"-slabs-by-loss" 152 152 `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ 153 - egrep -iv '\-\-|Name|Slabs'\ 153 + grep -E -iv '\-\-|Name|Slabs'\ 154 154 | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` 155 155 if [ $? -eq 0 ]; then 156 156 do_slabs_plotting "$out" ··· 159 159 let lines=3 160 160 out=`basename "$in"`"-slabs-by-size" 161 161 `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ 162 - egrep -iv '\-\-|Name|Slabs'\ 162 + grep -E -iv '\-\-|Name|Slabs'\ 163 163 | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` 164 164 if [ $? -eq 0 ]; then 165 165 do_slabs_plotting "$out"