Merge tag 'mm-hotfixes-stable-2025-06-06-16-02' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

+21

MAINTAINERS

··· 15818 15818 R: Byungchul Park <byungchul@sk.com> 15819 15819 R: Gregory Price <gourry@gourry.net> 15820 15820 R: Ying Huang <ying.huang@linux.alibaba.com> 15821 + R: Alistair Popple <apopple@nvidia.com> 15821 15822 L: linux-mm@kvack.org 15822 15823 S: Maintained 15823 15824 W: http://www.linux-mm.org ··· 15889 15888 S: Maintained 15890 15889 F: include/linux/secretmem.h 15891 15890 F: mm/secretmem.c 15891 + 15892 + MEMORY MANAGEMENT - SWAP 15893 + M: Andrew Morton <akpm@linux-foundation.org> 15894 + R: Kemeng Shi <shikemeng@huaweicloud.com> 15895 + R: Kairui Song <kasong@tencent.com> 15896 + R: Nhat Pham <nphamcs@gmail.com> 15897 + R: Baoquan He <bhe@redhat.com> 15898 + R: Barry Song <baohua@kernel.org> 15899 + R: Chris Li <chrisl@kernel.org> 15900 + L: linux-mm@kvack.org 15901 + S: Maintained 15902 + F: include/linux/swap.h 15903 + F: include/linux/swapfile.h 15904 + F: include/linux/swapops.h 15905 + F: mm/page_io.c 15906 + F: mm/swap.c 15907 + F: mm/swap.h 15908 + F: mm/swap_state.c 15909 + F: mm/swapfile.c 15892 15910 15893 15911 MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) 15894 15912 M: Andrew Morton <akpm@linux-foundation.org> ··· 16747 16727 S: Maintained 16748 16728 F: arch/*/include/asm/tlb.h 16749 16729 F: include/asm-generic/tlb.h 16730 + F: include/trace/events/tlb.h 16750 16731 F: mm/mmu_gather.c 16751 16732 16752 16733 MN88472 MEDIA DRIVER

+4 -4

arch/s390/kvm/gaccess.c

··· 319 319 PROT_TYPE_DAT = 3, 320 320 PROT_TYPE_IEP = 4, 321 321 /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ 322 - PROT_NONE, 322 + PROT_TYPE_DUMMY, 323 323 }; 324 324 325 325 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, ··· 335 335 switch (code) { 336 336 case PGM_PROTECTION: 337 337 switch (prot) { 338 - case PROT_NONE: 338 + case PROT_TYPE_DUMMY: 339 339 /* We should never get here, acts like termination */ 340 340 WARN_ON_ONCE(1); 341 341 break; ··· 805 805 gpa = kvm_s390_real_to_abs(vcpu, ga); 806 806 if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { 807 807 rc = PGM_ADDRESSING; 808 - prot = PROT_NONE; 808 + prot = PROT_TYPE_DUMMY; 809 809 } 810 810 } 811 811 if (rc) ··· 963 963 if (rc == PGM_PROTECTION) 964 964 prot = PROT_TYPE_KEYC; 965 965 else 966 - prot = PROT_NONE; 966 + prot = PROT_TYPE_DUMMY; 967 967 rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); 968 968 } 969 969 out_unlock:

+4 -4

include/linux/codetag.h

··· 36 36 struct codetag_type_desc { 37 37 const char *section; 38 38 size_t tag_size; 39 - void (*module_load)(struct module *mod, 40 - struct codetag *start, struct codetag *end); 39 + int (*module_load)(struct module *mod, 40 + struct codetag *start, struct codetag *end); 41 41 void (*module_unload)(struct module *mod, 42 42 struct codetag *start, struct codetag *end); 43 43 #ifdef CONFIG_MODULES ··· 89 89 unsigned long align); 90 90 void codetag_free_module_sections(struct module *mod); 91 91 void codetag_module_replaced(struct module *mod, struct module *new_mod); 92 - void codetag_load_module(struct module *mod); 92 + int codetag_load_module(struct module *mod); 93 93 void codetag_unload_module(struct module *mod); 94 94 95 95 #else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ ··· 103 103 unsigned long align) { return NULL; } 104 104 static inline void codetag_free_module_sections(struct module *mod) {} 105 105 static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} 106 - static inline void codetag_load_module(struct module *mod) {} 106 + static inline int codetag_load_module(struct module *mod) { return 0; } 107 107 static inline void codetag_unload_module(struct module *mod) {} 108 108 109 109 #endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */

+3

include/linux/hugetlb.h

··· 279 279 bool is_hugetlb_entry_hwpoisoned(pte_t pte); 280 280 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 281 281 void fixup_hugetlb_reservations(struct vm_area_struct *vma); 282 + void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); 282 283 283 284 #else /* !CONFIG_HUGETLB_PAGE */ 284 285 ··· 476 475 static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) 477 476 { 478 477 } 478 + 479 + static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} 479 480 480 481 #endif /* !CONFIG_HUGETLB_PAGE */ 481 482

+3 -2

kernel/module/main.c

··· 3386 3386 goto sysfs_cleanup; 3387 3387 } 3388 3388 3389 + if (codetag_load_module(mod)) 3390 + goto sysfs_cleanup; 3391 + 3389 3392 /* Get rid of temporary copy. */ 3390 3393 free_copy(info, flags); 3391 - 3392 - codetag_load_module(mod); 3393 3394 3394 3395 /* Done! */ 3395 3396 trace_module_load(mod);

+26

kernel/rcu/tree_stall.h

··· 20 20 int sysctl_panic_on_rcu_stall __read_mostly; 21 21 int sysctl_max_rcu_stall_to_panic __read_mostly; 22 22 23 + #ifdef CONFIG_SYSFS 24 + 25 + static unsigned int rcu_stall_count; 26 + 27 + static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr, 28 + char *page) 29 + { 30 + return sysfs_emit(page, "%u\n", rcu_stall_count); 31 + } 32 + 33 + static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count); 34 + 35 + static __init int kernel_rcu_stall_sysfs_init(void) 36 + { 37 + sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL); 38 + return 0; 39 + } 40 + 41 + late_initcall(kernel_rcu_stall_sysfs_init); 42 + 43 + #endif // CONFIG_SYSFS 44 + 23 45 #ifdef CONFIG_PROVE_RCU 24 46 #define RCU_STALL_DELAY_DELTA (5 * HZ) 25 47 #else ··· 805 783 */ 806 784 if (kvm_check_and_clear_guest_paused()) 807 785 return; 786 + 787 + #ifdef CONFIG_SYSFS 788 + ++rcu_stall_count; 789 + #endif 808 790 809 791 rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps); 810 792 if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {

+7 -5

lib/alloc_tag.c

··· 607 607 mas_unlock(&mas); 608 608 } 609 609 610 - static void load_module(struct module *mod, struct codetag *start, struct codetag *stop) 610 + static int load_module(struct module *mod, struct codetag *start, struct codetag *stop) 611 611 { 612 612 /* Allocate module alloc_tag percpu counters */ 613 613 struct alloc_tag *start_tag; 614 614 struct alloc_tag *stop_tag; 615 615 struct alloc_tag *tag; 616 616 617 + /* percpu counters for core allocations are already statically allocated */ 617 618 if (!mod) 618 - return; 619 + return 0; 619 620 620 621 start_tag = ct_to_alloc_tag(start); 621 622 stop_tag = ct_to_alloc_tag(stop); ··· 628 627 free_percpu(tag->counters); 629 628 tag->counters = NULL; 630 629 } 631 - shutdown_mem_profiling(true); 632 - pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n", 630 + pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n", 633 631 mod->name); 634 - break; 632 + return -ENOMEM; 635 633 } 636 634 } 635 + 636 + return 0; 637 637 } 638 638 639 639 static void replace_module(struct module *mod, struct module *new_mod)

+25 -9

lib/codetag.c

··· 167 167 { 168 168 struct codetag_range range; 169 169 struct codetag_module *cmod; 170 + int mod_id; 170 171 int err; 171 172 172 173 range = get_section_range(mod, cttype->desc.section); ··· 191 190 cmod->range = range; 192 191 193 192 down_write(&cttype->mod_lock); 194 - err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); 195 - if (err >= 0) { 196 - cttype->count += range_size(cttype, &range); 197 - if (cttype->desc.module_load) 198 - cttype->desc.module_load(mod, range.start, range.stop); 193 + mod_id = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); 194 + if (mod_id >= 0) { 195 + if (cttype->desc.module_load) { 196 + err = cttype->desc.module_load(mod, range.start, range.stop); 197 + if (!err) 198 + cttype->count += range_size(cttype, &range); 199 + else 200 + idr_remove(&cttype->mod_idr, mod_id); 201 + } else { 202 + cttype->count += range_size(cttype, &range); 203 + err = 0; 204 + } 205 + } else { 206 + err = mod_id; 199 207 } 200 208 up_write(&cttype->mod_lock); 201 209 ··· 305 295 mutex_unlock(&codetag_lock); 306 296 } 307 297 308 - void codetag_load_module(struct module *mod) 298 + int codetag_load_module(struct module *mod) 309 299 { 310 300 struct codetag_type *cttype; 301 + int ret = 0; 311 302 312 303 if (!mod) 313 - return; 304 + return 0; 314 305 315 306 mutex_lock(&codetag_lock); 316 - list_for_each_entry(cttype, &codetag_types, link) 317 - codetag_module_init(cttype, mod); 307 + list_for_each_entry(cttype, &codetag_types, link) { 308 + ret = codetag_module_init(cttype, mod); 309 + if (ret) 310 + break; 311 + } 318 312 mutex_unlock(&codetag_lock); 313 + 314 + return ret; 319 315 } 320 316 321 317 void codetag_unload_module(struct module *mod)

+1 -1

lib/iov_iter.c

··· 817 817 size_t size = i->count; 818 818 819 819 do { 820 - size_t len = bvec->bv_len; 820 + size_t len = bvec->bv_len - skip; 821 821 822 822 if (len > size) 823 823 len = size;

+51 -16

mm/hugetlb.c

··· 121 121 static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); 122 122 static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); 123 123 static void hugetlb_unshare_pmds(struct vm_area_struct *vma, 124 - unsigned long start, unsigned long end); 124 + unsigned long start, unsigned long end, bool take_locks); 125 125 static struct resv_map *vma_resv_map(struct vm_area_struct *vma); 126 126 127 127 static void hugetlb_free_folio(struct folio *folio) ··· 5426 5426 { 5427 5427 if (addr & ~(huge_page_mask(hstate_vma(vma)))) 5428 5428 return -EINVAL; 5429 + return 0; 5430 + } 5429 5431 5432 + void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) 5433 + { 5430 5434 /* 5431 5435 * PMD sharing is only possible for PUD_SIZE-aligned address ranges 5432 5436 * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this 5433 5437 * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. 5438 + * This function is called in the middle of a VMA split operation, with 5439 + * MM, VMA and rmap all write-locked to prevent concurrent page table 5440 + * walks (except hardware and gup_fast()). 5434 5441 */ 5442 + vma_assert_write_locked(vma); 5443 + i_mmap_assert_write_locked(vma->vm_file->f_mapping); 5444 + 5435 5445 if (addr & ~PUD_MASK) { 5436 - /* 5437 - * hugetlb_vm_op_split is called right before we attempt to 5438 - * split the VMA. We will need to unshare PMDs in the old and 5439 - * new VMAs, so let's unshare before we split. 5440 - */ 5441 5446 unsigned long floor = addr & PUD_MASK; 5442 5447 unsigned long ceil = floor + PUD_SIZE; 5443 5448 5444 - if (floor >= vma->vm_start && ceil <= vma->vm_end) 5445 - hugetlb_unshare_pmds(vma, floor, ceil); 5449 + if (floor >= vma->vm_start && ceil <= vma->vm_end) { 5450 + /* 5451 + * Locking: 5452 + * Use take_locks=false here. 5453 + * The file rmap lock is already held. 5454 + * The hugetlb VMA lock can't be taken when we already 5455 + * hold the file rmap lock, and we don't need it because 5456 + * its purpose is to synchronize against concurrent page 5457 + * table walks, which are not possible thanks to the 5458 + * locks held by our caller. 5459 + */ 5460 + hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); 5461 + } 5446 5462 } 5447 - 5448 - return 0; 5449 5463 } 5450 5464 5451 5465 static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) ··· 7629 7615 return 0; 7630 7616 7631 7617 pud_clear(pud); 7618 + /* 7619 + * Once our caller drops the rmap lock, some other process might be 7620 + * using this page table as a normal, non-hugetlb page table. 7621 + * Wait for pending gup_fast() in other threads to finish before letting 7622 + * that happen. 7623 + */ 7624 + tlb_remove_table_sync_one(); 7632 7625 ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep)); 7633 7626 mm_dec_nr_pmds(mm); 7634 7627 return 1; ··· 7906 7885 spin_unlock_irq(&hugetlb_lock); 7907 7886 } 7908 7887 7888 + /* 7889 + * If @take_locks is false, the caller must ensure that no concurrent page table 7890 + * access can happen (except for gup_fast() and hardware page walks). 7891 + * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like 7892 + * concurrent page fault handling) and the file rmap lock. 7893 + */ 7909 7894 static void hugetlb_unshare_pmds(struct vm_area_struct *vma, 7910 7895 unsigned long start, 7911 - unsigned long end) 7896 + unsigned long end, 7897 + bool take_locks) 7912 7898 { 7913 7899 struct hstate *h = hstate_vma(vma); 7914 7900 unsigned long sz = huge_page_size(h); ··· 7939 7911 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, 7940 7912 start, end); 7941 7913 mmu_notifier_invalidate_range_start(&range); 7942 - hugetlb_vma_lock_write(vma); 7943 - i_mmap_lock_write(vma->vm_file->f_mapping); 7914 + if (take_locks) { 7915 + hugetlb_vma_lock_write(vma); 7916 + i_mmap_lock_write(vma->vm_file->f_mapping); 7917 + } else { 7918 + i_mmap_assert_write_locked(vma->vm_file->f_mapping); 7919 + } 7944 7920 for (address = start; address < end; address += PUD_SIZE) { 7945 7921 ptep = hugetlb_walk(vma, address, sz); 7946 7922 if (!ptep) ··· 7954 7922 spin_unlock(ptl); 7955 7923 } 7956 7924 flush_hugetlb_tlb_range(vma, start, end); 7957 - i_mmap_unlock_write(vma->vm_file->f_mapping); 7958 - hugetlb_vma_unlock_write(vma); 7925 + if (take_locks) { 7926 + i_mmap_unlock_write(vma->vm_file->f_mapping); 7927 + hugetlb_vma_unlock_write(vma); 7928 + } 7959 7929 /* 7960 7930 * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see 7961 7931 * Documentation/mm/mmu_notifier.rst. ··· 7972 7938 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) 7973 7939 { 7974 7940 hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), 7975 - ALIGN_DOWN(vma->vm_end, PUD_SIZE)); 7941 + ALIGN_DOWN(vma->vm_end, PUD_SIZE), 7942 + /* take_locks = */ true); 7976 7943 } 7977 7944 7978 7945 /*

+1

mm/kmsan/kmsan_test.c

··· 732 732 733 733 MODULE_LICENSE("GPL"); 734 734 MODULE_AUTHOR("Alexander Potapenko <glider@google.com>"); 735 + MODULE_DESCRIPTION("Test cases for KMSAN");

+4 -1

mm/madvise.c

··· 1881 1881 /* Drop and reacquire lock to unwind race. */ 1882 1882 madvise_finish_tlb(&madv_behavior); 1883 1883 madvise_unlock(mm, behavior); 1884 - madvise_lock(mm, behavior); 1884 + ret = madvise_lock(mm, behavior); 1885 + if (ret) 1886 + goto out; 1885 1887 madvise_init_tlb(&madv_behavior, mm); 1886 1888 continue; 1887 1889 } ··· 1894 1892 madvise_finish_tlb(&madv_behavior); 1895 1893 madvise_unlock(mm, behavior); 1896 1894 1895 + out: 1897 1896 ret = (total_len - iov_iter_count(iter)) ? : ret; 1898 1897 1899 1898 return ret;

+1 -3

mm/mempolicy.c

··· 3708 3708 lockdep_is_held(&wi_state_lock)); 3709 3709 if (!old_wi_state) { 3710 3710 mutex_unlock(&wi_state_lock); 3711 - goto out; 3711 + return; 3712 3712 } 3713 3713 3714 3714 rcu_assign_pointer(wi_state, NULL); 3715 3715 mutex_unlock(&wi_state_lock); 3716 3716 synchronize_rcu(); 3717 3717 kfree(old_wi_state); 3718 - out: 3719 - kfree(&wi_group->wi_kobj); 3720 3718 } 3721 3719 3722 3720 static struct kobj_attribute wi_auto_attr =

+7

mm/vma.c

··· 539 539 init_vma_prep(&vp, vma); 540 540 vp.insert = new; 541 541 vma_prepare(&vp); 542 + 543 + /* 544 + * Get rid of huge pages and shared page tables straddling the split 545 + * boundary. 546 + */ 542 547 vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL); 548 + if (is_vm_hugetlb_page(vma)) 549 + hugetlb_split(vma, addr); 543 550 544 551 if (new_below) { 545 552 vma->vm_start = addr;

-1

mm/vmstat.c

··· 1201 1201 "nr_zone_unevictable", 1202 1202 "nr_zone_write_pending", 1203 1203 "nr_mlock", 1204 - "nr_bounce", 1205 1204 #if IS_ENABLED(CONFIG_ZSMALLOC) 1206 1205 "nr_zspages", 1207 1206 #endif

+2

tools/testing/vma/vma_internal.h

··· 932 932 (void)next; 933 933 } 934 934 935 + static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} 936 + 935 937 static inline void vma_iter_free(struct vma_iterator *vmi) 936 938 { 937 939 mas_destroy(&vmi->mas);

Configure Feed

Configure Feed