Merge tag 'mm-hotfixes-stable-2025-03-17-20-09' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

+9 -1

fs/proc/generic.c

··· 559 559 return p; 560 560 } 561 561 562 - static inline void pde_set_flags(struct proc_dir_entry *pde) 562 + static void pde_set_flags(struct proc_dir_entry *pde) 563 563 { 564 564 if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT) 565 565 pde->flags |= PROC_ENTRY_PERMANENT; 566 + if (pde->proc_ops->proc_read_iter) 567 + pde->flags |= PROC_ENTRY_proc_read_iter; 568 + #ifdef CONFIG_COMPAT 569 + if (pde->proc_ops->proc_compat_ioctl) 570 + pde->flags |= PROC_ENTRY_proc_compat_ioctl; 571 + #endif 566 572 } 567 573 568 574 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, ··· 632 626 p->proc_ops = &proc_seq_ops; 633 627 p->seq_ops = ops; 634 628 p->state_size = state_size; 629 + pde_set_flags(p); 635 630 return proc_register(parent, p); 636 631 } 637 632 EXPORT_SYMBOL(proc_create_seq_private); ··· 663 656 return NULL; 664 657 p->proc_ops = &proc_single_ops; 665 658 p->single_show = show; 659 + pde_set_flags(p); 666 660 return proc_register(parent, p); 667 661 } 668 662 EXPORT_SYMBOL(proc_create_single_data);

+3 -3

fs/proc/inode.c

··· 656 656 657 657 if (S_ISREG(inode->i_mode)) { 658 658 inode->i_op = de->proc_iops; 659 - if (de->proc_ops->proc_read_iter) 659 + if (pde_has_proc_read_iter(de)) 660 660 inode->i_fop = &proc_iter_file_ops; 661 661 else 662 662 inode->i_fop = &proc_reg_file_ops; 663 663 #ifdef CONFIG_COMPAT 664 - if (de->proc_ops->proc_compat_ioctl) { 665 - if (de->proc_ops->proc_read_iter) 664 + if (pde_has_proc_compat_ioctl(de)) { 665 + if (pde_has_proc_read_iter(de)) 666 666 inode->i_fop = &proc_iter_file_ops_compat; 667 667 else 668 668 inode->i_fop = &proc_reg_file_ops_compat;

+14

fs/proc/internal.h

··· 85 85 pde->flags |= PROC_ENTRY_PERMANENT; 86 86 } 87 87 88 + static inline bool pde_has_proc_read_iter(const struct proc_dir_entry *pde) 89 + { 90 + return pde->flags & PROC_ENTRY_proc_read_iter; 91 + } 92 + 93 + static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde) 94 + { 95 + #ifdef CONFIG_COMPAT 96 + return pde->flags & PROC_ENTRY_proc_compat_ioctl; 97 + #else 98 + return false; 99 + #endif 100 + } 101 + 88 102 extern struct kmem_cache *proc_dir_entry_cache; 89 103 void pde_free(struct proc_dir_entry *pde); 90 104

+1 -1

fs/squashfs/cache.c

··· 198 198 { 199 199 int i, j; 200 200 201 - if (cache == NULL) 201 + if (IS_ERR(cache) || cache == NULL) 202 202 return; 203 203 204 204 for (i = 0; i < cache->entries; i++) {

+5

include/linux/damon.h

··· 470 470 unsigned long next_apply_sis; 471 471 /* informs if ongoing DAMOS walk for this scheme is finished */ 472 472 bool walk_completed; 473 + /* 474 + * If the current region in the filtering stage is allowed by core 475 + * layer-handled filters. If true, operations layer allows it, too. 476 + */ 477 + bool core_filters_allowed; 473 478 /* public: */ 474 479 struct damos_quota quota; 475 480 struct damos_watermarks wmarks;

+7 -1

include/linux/mm.h

··· 1458 1458 1459 1459 static inline void get_page(struct page *page) 1460 1460 { 1461 - folio_get(page_folio(page)); 1461 + struct folio *folio = page_folio(page); 1462 + if (WARN_ON_ONCE(folio_test_slab(folio))) 1463 + return; 1464 + folio_get(folio); 1462 1465 } 1463 1466 1464 1467 static inline __must_check bool try_get_page(struct page *page) ··· 1554 1551 static inline void put_page(struct page *page) 1555 1552 { 1556 1553 struct folio *folio = page_folio(page); 1554 + 1555 + if (folio_test_slab(folio)) 1556 + return; 1557 1557 1558 1558 /* 1559 1559 * For some devmap managed pages we need to catch refcount transition

+5 -2

include/linux/proc_fs.h

··· 20 20 * If in doubt, ignore this flag. 21 21 */ 22 22 #ifdef MODULE 23 - PROC_ENTRY_PERMANENT = 0U, 23 + PROC_ENTRY_PERMANENT = 0U, 24 24 #else 25 - PROC_ENTRY_PERMANENT = 1U << 0, 25 + PROC_ENTRY_PERMANENT = 1U << 0, 26 26 #endif 27 + 28 + PROC_ENTRY_proc_read_iter = 1U << 1, 29 + PROC_ENTRY_proc_compat_ioctl = 1U << 2, 27 30 }; 28 31 29 32 struct proc_ops {

+2 -2

include/linux/swap_cgroup.h

··· 6 6 7 7 #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) 8 8 9 - extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent); 9 + extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent); 10 10 extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents); 11 11 extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); 12 12 extern int swap_cgroup_swapon(int type, unsigned long max_pages); ··· 15 15 #else 16 16 17 17 static inline 18 - void swap_cgroup_record(struct folio *folio, swp_entry_t ent) 18 + void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent) 19 19 { 20 20 } 21 21

+6 -2

lib/iov_iter.c

··· 1190 1190 if (!n) 1191 1191 return -ENOMEM; 1192 1192 p = *pages; 1193 - for (int k = 0; k < n; k++) 1194 - get_page(p[k] = page + k); 1193 + for (int k = 0; k < n; k++) { 1194 + struct folio *folio = page_folio(page); 1195 + p[k] = page + k; 1196 + if (!folio_test_slab(folio)) 1197 + folio_get(folio); 1198 + } 1195 1199 maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); 1196 1200 i->count -= maxsize; 1197 1201 i->iov_offset += maxsize;

+6 -1

mm/damon/core.c

··· 373 373 * or damon_attrs are updated. 374 374 */ 375 375 scheme->next_apply_sis = 0; 376 + scheme->walk_completed = false; 376 377 INIT_LIST_HEAD(&scheme->filters); 377 378 scheme->stat = (struct damos_stat){}; 378 379 INIT_LIST_HEAD(&scheme->list); ··· 1430 1429 { 1431 1430 struct damos_filter *filter; 1432 1431 1432 + s->core_filters_allowed = false; 1433 1433 damos_for_each_filter(filter, s) { 1434 - if (damos_filter_match(ctx, t, r, filter)) 1434 + if (damos_filter_match(ctx, t, r, filter)) { 1435 + if (filter->allow) 1436 + s->core_filters_allowed = true; 1435 1437 return !filter->allow; 1438 + } 1436 1439 } 1437 1440 return false; 1438 1441 }

+3

mm/damon/paddr.c

··· 236 236 { 237 237 struct damos_filter *filter; 238 238 239 + if (scheme->core_filters_allowed) 240 + return false; 241 + 239 242 damos_for_each_filter(filter, scheme) { 240 243 if (damos_pa_filter_match(filter, folio)) 241 244 return !filter->allow;

+28 -12

mm/filemap.c

··· 1985 1985 1986 1986 if (err == -EEXIST) 1987 1987 goto repeat; 1988 - if (err) 1988 + if (err) { 1989 + /* 1990 + * When NOWAIT I/O fails to allocate folios this could 1991 + * be due to a nonblocking memory allocation and not 1992 + * because the system actually is out of memory. 1993 + * Return -EAGAIN so that there caller retries in a 1994 + * blocking fashion instead of propagating -ENOMEM 1995 + * to the application. 1996 + */ 1997 + if ((fgp_flags & FGP_NOWAIT) && err == -ENOMEM) 1998 + err = -EAGAIN; 1989 1999 return ERR_PTR(err); 2000 + } 1990 2001 /* 1991 2002 * filemap_add_folio locks the page, and for mmap 1992 2003 * we expect an unlocked page. ··· 4094 4083 bytes = min(chunk - offset, bytes); 4095 4084 balance_dirty_pages_ratelimited(mapping); 4096 4085 4097 - /* 4098 - * Bring in the user page that we will copy from _first_. 4099 - * Otherwise there's a nasty deadlock on copying from the 4100 - * same page as we're writing to, without it being marked 4101 - * up-to-date. 4102 - */ 4103 - if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) { 4104 - status = -EFAULT; 4105 - break; 4106 - } 4107 - 4108 4086 if (fatal_signal_pending(current)) { 4109 4087 status = -EINTR; 4110 4088 break; ··· 4111 4111 if (mapping_writably_mapped(mapping)) 4112 4112 flush_dcache_folio(folio); 4113 4113 4114 + /* 4115 + * Faults here on mmap()s can recurse into arbitrary 4116 + * filesystem code. Lots of locks are held that can 4117 + * deadlock. Use an atomic copy to avoid deadlocking 4118 + * in page fault handling. 4119 + */ 4114 4120 copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); 4115 4121 flush_dcache_folio(folio); 4116 4122 ··· 4141 4135 if (copied) { 4142 4136 bytes = copied; 4143 4137 goto retry; 4138 + } 4139 + 4140 + /* 4141 + * 'folio' is now unlocked and faults on it can be 4142 + * handled. Ensure forward progress by trying to 4143 + * fault it in now. 4144 + */ 4145 + if (fault_in_iov_iter_readable(i, bytes) == bytes) { 4146 + status = -EFAULT; 4147 + break; 4144 4148 } 4145 4149 } else { 4146 4150 pos += status;

+1 -1

mm/huge_memory.c

··· 3304 3304 folio_account_cleaned(tail, 3305 3305 inode_to_wb(folio->mapping->host)); 3306 3306 __filemap_remove_folio(tail, NULL); 3307 - folio_put(tail); 3307 + folio_put_refs(tail, folio_nr_pages(tail)); 3308 3308 } else if (!folio_test_anon(folio)) { 3309 3309 __xa_store(&folio->mapping->i_pages, tail->index, 3310 3310 tail, 0);

+6 -2

mm/hugetlb.c

··· 2135 2135 2136 2136 if (!folio_ref_count(folio)) { 2137 2137 struct hstate *h = folio_hstate(folio); 2138 + bool adjust_surplus = false; 2139 + 2138 2140 if (!available_huge_pages(h)) 2139 2141 goto out; 2140 2142 ··· 2159 2157 goto retry; 2160 2158 } 2161 2159 2162 - remove_hugetlb_folio(h, folio, false); 2160 + if (h->surplus_huge_pages_node[folio_nid(folio)]) 2161 + adjust_surplus = true; 2162 + remove_hugetlb_folio(h, folio, adjust_surplus); 2163 2163 h->max_huge_pages--; 2164 2164 spin_unlock_irq(&hugetlb_lock); 2165 2165 ··· 2181 2177 rc = hugetlb_vmemmap_restore_folio(h, folio); 2182 2178 if (rc) { 2183 2179 spin_lock_irq(&hugetlb_lock); 2184 - add_hugetlb_folio(h, folio, false); 2180 + add_hugetlb_folio(h, folio, adjust_surplus); 2185 2181 h->max_huge_pages++; 2186 2182 goto out; 2187 2183 }

+11 -2

mm/memcontrol.c

··· 1921 1921 static int memcg_hotplug_cpu_dead(unsigned int cpu) 1922 1922 { 1923 1923 struct memcg_stock_pcp *stock; 1924 + struct obj_cgroup *old; 1925 + unsigned long flags; 1924 1926 1925 1927 stock = &per_cpu(memcg_stock, cpu); 1928 + 1929 + /* drain_obj_stock requires stock_lock */ 1930 + local_lock_irqsave(&memcg_stock.stock_lock, flags); 1931 + old = drain_obj_stock(stock); 1932 + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1933 + 1926 1934 drain_stock(stock); 1935 + obj_cgroup_put(old); 1927 1936 1928 1937 return 0; 1929 1938 } ··· 5002 4993 mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); 5003 4994 mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); 5004 4995 5005 - swap_cgroup_record(folio, entry); 4996 + swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); 5006 4997 5007 4998 folio_unqueue_deferred_split(folio); 5008 4999 folio->memcg_data = 0; ··· 5064 5055 mem_cgroup_id_get_many(memcg, nr_pages - 1); 5065 5056 mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); 5066 5057 5067 - swap_cgroup_record(folio, entry); 5058 + swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); 5068 5059 5069 5060 return 0; 5070 5061 }

+4 -6

mm/migrate.c

··· 518 518 if (folio_test_anon(folio) && folio_test_large(folio)) 519 519 mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1); 520 520 folio_ref_add(newfolio, nr); /* add cache reference */ 521 - if (folio_test_swapbacked(folio)) { 521 + if (folio_test_swapbacked(folio)) 522 522 __folio_set_swapbacked(newfolio); 523 - if (folio_test_swapcache(folio)) { 524 - folio_set_swapcache(newfolio); 525 - newfolio->private = folio_get_private(folio); 526 - } 523 + if (folio_test_swapcache(folio)) { 524 + folio_set_swapcache(newfolio); 525 + newfolio->private = folio_get_private(folio); 527 526 entries = nr; 528 527 } else { 529 - VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); 530 528 entries = 1; 531 529 } 532 530

+12 -2

mm/page_alloc.c

··· 7004 7004 7005 7005 static bool cond_accept_memory(struct zone *zone, unsigned int order) 7006 7006 { 7007 - long to_accept; 7007 + long to_accept, wmark; 7008 7008 bool ret = false; 7009 7009 7010 7010 if (!has_unaccepted_memory()) ··· 7013 7013 if (list_empty(&zone->unaccepted_pages)) 7014 7014 return false; 7015 7015 7016 + wmark = promo_wmark_pages(zone); 7017 + 7018 + /* 7019 + * Watermarks have not been initialized yet. 7020 + * 7021 + * Accepting one MAX_ORDER page to ensure progress. 7022 + */ 7023 + if (!wmark) 7024 + return try_to_accept_memory_one(zone); 7025 + 7016 7026 /* How much to accept to get to promo watermark? */ 7017 - to_accept = promo_wmark_pages(zone) - 7027 + to_accept = wmark - 7018 7028 (zone_page_state(zone, NR_FREE_PAGES) - 7019 7029 __zone_watermark_unusable_free(zone, order, 0) - 7020 7030 zone_page_state(zone, NR_UNACCEPTED));

+4 -3

mm/swap_cgroup.c

··· 58 58 * entries must not have been charged 59 59 * 60 60 * @folio: the folio that the swap entry belongs to 61 + * @id: mem_cgroup ID to be recorded 61 62 * @ent: the first swap entry to be recorded 62 63 */ 63 - void swap_cgroup_record(struct folio *folio, swp_entry_t ent) 64 + void swap_cgroup_record(struct folio *folio, unsigned short id, 65 + swp_entry_t ent) 64 66 { 65 67 unsigned int nr_ents = folio_nr_pages(folio); 66 68 struct swap_cgroup *map; ··· 74 72 map = swap_cgroup_ctrl[swp_type(ent)].map; 75 73 76 74 do { 77 - old = __swap_cgroup_id_xchg(map, offset, 78 - mem_cgroup_id(folio_memcg(folio))); 75 + old = __swap_cgroup_id_xchg(map, offset, id); 79 76 VM_BUG_ON(old); 80 77 } while (++offset != end); 81 78 }

+2 -1

mm/vma.c

··· 2381 2381 * vma_merge_new_range() calls khugepaged_enter_vma() too, the below 2382 2382 * call covers the non-merge case. 2383 2383 */ 2384 - khugepaged_enter_vma(vma, map->flags); 2384 + if (!vma_is_anonymous(vma)) 2385 + khugepaged_enter_vma(vma, map->flags); 2385 2386 ksm_add_vma(vma); 2386 2387 *vmap = vma; 2387 2388 return 0;

+3 -1

tools/testing/selftests/mm/run_vmtests.sh

··· 304 304 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 305 305 # Hugetlb tests require source and destination huge pages. Pass in half 306 306 # the size of the free pages we have, which is used for *each*. 307 - half_ufd_size_MB=$((freepgs / 2)) 307 + # uffd-stress expects a region expressed in MiB, so we adjust 308 + # half_ufd_size_MB accordingly. 309 + half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2)) 308 310 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 309 311 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 310 312 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16

Configure Feed

Configure Feed