Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'mm-hotfixes-stable-2025-11-10-19-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
"26 hotfixes. 22(!) are cc:stable, 22 are MM.

- address some Kexec Handover issues (Pasha Tatashin)

- fix handling of large folios which are mapped outside i_size (Kiryl
Shutsemau)

- fix some DAMON time issues on 32-bit machines (Quanmin Yan)

Plus the usual shower of singletons"

* tag 'mm-hotfixes-stable-2025-11-10-19-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (26 commits)
kho: warn and exit when unpreserved page wasn't preserved
kho: fix unpreservation of higher-order vmalloc preservations
kho: fix out-of-bounds access of vmalloc chunk
MAINTAINERS: add Chris and Kairui as the swap maintainer
mm/secretmem: fix use-after-free race in fault handler
mm/huge_memory: initialise the tags of the huge zero folio
nilfs2: avoid having an active sc_timer before freeing sci
scripts/decode_stacktrace.sh: fix build ID and PC source parsing
mm/damon/sysfs: change next_update_jiffies to a global variable
mm/damon/stat: change last_refresh_jiffies to a global variable
maple_tree: fix tracepoint string pointers
codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for slabobj_ext
mm/mremap: honour writable bit in mremap pte batching
gcov: add support for GCC 15
mm/mm_init: fix hash table order logging in alloc_large_system_hash()
mm/truncate: unmap large folio on split failure
mm/memory: do not populate page table entries beyond i_size
fs/proc: fix uaf in proc_readdir_de()
mm/huge_memory: preserve PG_has_hwpoisoned if a folio is split to >0 order
ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
...

+424 -152
+2 -2
MAINTAINERS
··· 16498 16498 16499 16499 MEMORY MANAGEMENT - SWAP 16500 16500 M: Andrew Morton <akpm@linux-foundation.org> 16501 + M: Chris Li <chrisl@kernel.org> 16502 + M: Kairui Song <kasong@tencent.com> 16501 16503 R: Kemeng Shi <shikemeng@huaweicloud.com> 16502 - R: Kairui Song <kasong@tencent.com> 16503 16504 R: Nhat Pham <nphamcs@gmail.com> 16504 16505 R: Baoquan He <bhe@redhat.com> 16505 16506 R: Barry Song <baohua@kernel.org> 16506 - R: Chris Li <chrisl@kernel.org> 16507 16507 L: linux-mm@kvack.org 16508 16508 S: Maintained 16509 16509 F: Documentation/mm/swap-table.rst
+2 -1
arch/arm64/kernel/mte.c
··· 476 476 477 477 folio = page_folio(page); 478 478 if (folio_test_hugetlb(folio)) 479 - WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); 479 + WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) && 480 + !is_huge_zero_folio(folio)); 480 481 else 481 482 WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); 482 483
+10
arch/arm64/mm/fault.c
··· 969 969 970 970 void tag_clear_highpage(struct page *page) 971 971 { 972 + /* 973 + * Check if MTE is supported and fall back to clear_highpage(). 974 + * get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and 975 + * post_alloc_hook() will invoke tag_clear_highpage(). 976 + */ 977 + if (!system_supports_mte()) { 978 + clear_highpage(page); 979 + return; 980 + } 981 + 972 982 /* Newly allocated page, shouldn't have been tagged yet */ 973 983 WARN_ON_ONCE(!try_page_mte_tagging(page)); 974 984 mte_zero_clear_page_tags(page_address(page));
+6 -1
fs/nilfs2/segment.c
··· 2768 2768 2769 2769 if (sci->sc_task) { 2770 2770 wake_up(&sci->sc_wait_daemon); 2771 - kthread_stop(sci->sc_task); 2771 + if (kthread_stop(sci->sc_task)) { 2772 + spin_lock(&sci->sc_state_lock); 2773 + sci->sc_task = NULL; 2774 + timer_shutdown_sync(&sci->sc_timer); 2775 + spin_unlock(&sci->sc_state_lock); 2776 + } 2772 2777 } 2773 2778 2774 2779 spin_lock(&sci->sc_state_lock);
+9 -3
fs/proc/generic.c
··· 698 698 } 699 699 } 700 700 701 + static void pde_erase(struct proc_dir_entry *pde, struct proc_dir_entry *parent) 702 + { 703 + rb_erase(&pde->subdir_node, &parent->subdir); 704 + RB_CLEAR_NODE(&pde->subdir_node); 705 + } 706 + 701 707 /* 702 708 * Remove a /proc entry and free it if it's not currently in use. 703 709 */ ··· 726 720 WARN(1, "removing permanent /proc entry '%s'", de->name); 727 721 de = NULL; 728 722 } else { 729 - rb_erase(&de->subdir_node, &parent->subdir); 723 + pde_erase(de, parent); 730 724 if (S_ISDIR(de->mode)) 731 725 parent->nlink--; 732 726 } ··· 770 764 root->parent->name, root->name); 771 765 return -EINVAL; 772 766 } 773 - rb_erase(&root->subdir_node, &parent->subdir); 767 + pde_erase(root, parent); 774 768 775 769 de = root; 776 770 while (1) { ··· 782 776 next->parent->name, next->name); 783 777 return -EINVAL; 784 778 } 785 - rb_erase(&next->subdir_node, &de->subdir); 779 + pde_erase(next, de); 786 780 de = next; 787 781 continue; 788 782 }
+3
include/linux/gfp.h
··· 7 7 #include <linux/mmzone.h> 8 8 #include <linux/topology.h> 9 9 #include <linux/alloc_tag.h> 10 + #include <linux/cleanup.h> 10 11 #include <linux/sched.h> 11 12 12 13 struct vm_area_struct; ··· 463 462 #endif 464 463 /* This should be paired with folio_put() rather than free_contig_range(). */ 465 464 #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) 465 + 466 + DEFINE_FREE(free_page, void *, free_page((unsigned long)_T)) 466 467 467 468 #endif /* __LINUX_GFP_H */
+23 -32
include/linux/huge_mm.h
··· 376 376 int folio_split(struct folio *folio, unsigned int new_order, struct page *page, 377 377 struct list_head *list); 378 378 /* 379 - * try_folio_split - try to split a @folio at @page using non uniform split. 379 + * try_folio_split_to_order - try to split a @folio at @page to @new_order using 380 + * non uniform split. 380 381 * @folio: folio to be split 381 - * @page: split to order-0 at the given page 382 - * @list: store the after-split folios 382 + * @page: split to @new_order at the given page 383 + * @new_order: the target split order 383 384 * 384 - * Try to split a @folio at @page using non uniform split to order-0, if 385 - * non uniform split is not supported, fall back to uniform split. 385 + * Try to split a @folio at @page using non uniform split to @new_order, if 386 + * non uniform split is not supported, fall back to uniform split. After-split 387 + * folios are put back to LRU list. Use min_order_for_split() to get the lower 388 + * bound of @new_order. 386 389 * 387 390 * Return: 0: split is successful, otherwise split failed. 388 391 */ 389 - static inline int try_folio_split(struct folio *folio, struct page *page, 390 - struct list_head *list) 392 + static inline int try_folio_split_to_order(struct folio *folio, 393 + struct page *page, unsigned int new_order) 391 394 { 392 - int ret = min_order_for_split(folio); 393 - 394 - if (ret < 0) 395 - return ret; 396 - 397 - if (!non_uniform_split_supported(folio, 0, false)) 398 - return split_huge_page_to_list_to_order(&folio->page, list, 399 - ret); 400 - return folio_split(folio, ret, page, list); 395 + if (!non_uniform_split_supported(folio, new_order, /* warns= */ false)) 396 + return split_huge_page_to_list_to_order(&folio->page, NULL, 397 + new_order); 398 + return folio_split(folio, new_order, page, NULL); 401 399 } 402 400 static inline int split_huge_page(struct page *page) 403 401 { 404 - struct folio *folio = page_folio(page); 405 - int ret = min_order_for_split(folio); 406 - 407 - if (ret < 0) 408 - return ret; 409 - 410 - /* 411 - * split_huge_page() locks the page before splitting and 412 - * expects the same page that has been split to be locked when 413 - * returned. split_folio(page_folio(page)) cannot be used here 414 - * because it converts the page to folio and passes the head 415 - * page to be split. 416 - */ 417 - return split_huge_page_to_list_to_order(page, NULL, ret); 402 + return split_huge_page_to_list_to_order(page, NULL, 0); 418 403 } 419 404 void deferred_split_folio(struct folio *folio, bool partially_mapped); 420 405 ··· 582 597 return -EINVAL; 583 598 } 584 599 600 + static inline int min_order_for_split(struct folio *folio) 601 + { 602 + VM_WARN_ON_ONCE_FOLIO(1, folio); 603 + return -EINVAL; 604 + } 605 + 585 606 static inline int split_folio_to_list(struct folio *folio, struct list_head *list) 586 607 { 587 608 VM_WARN_ON_ONCE_FOLIO(1, folio); 588 609 return -EINVAL; 589 610 } 590 611 591 - static inline int try_folio_split(struct folio *folio, struct page *page, 592 - struct list_head *list) 612 + static inline int try_folio_split_to_order(struct folio *folio, 613 + struct page *page, unsigned int new_order) 593 614 { 594 615 VM_WARN_ON_ONCE_FOLIO(1, folio); 595 616 return -EINVAL;
+9
kernel/Kconfig.kexec
··· 109 109 to keep data or state alive across the kexec. For this to work, 110 110 both source and target kernels need to have this option enabled. 111 111 112 + config KEXEC_HANDOVER_DEBUG 113 + bool "Enable Kexec Handover debug checks" 114 + depends on KEXEC_HANDOVER 115 + help 116 + This option enables extra sanity checks for the Kexec Handover 117 + subsystem. Since, KHO performance is crucial in live update 118 + scenarios and the extra code might be adding overhead it is 119 + only optionally enabled. 120 + 112 121 config CRASH_DUMP 113 122 bool "kernel crash dumps" 114 123 default ARCH_DEFAULT_CRASH_DUMP
+1
kernel/Makefile
··· 83 83 obj-$(CONFIG_KEXEC_FILE) += kexec_file.o 84 84 obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o 85 85 obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o 86 + obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o 86 87 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o 87 88 obj-$(CONFIG_COMPAT) += compat.o 88 89 obj-$(CONFIG_CGROUPS) += cgroup/
+3 -1
kernel/gcov/gcc_4_7.c
··· 18 18 #include <linux/mm.h> 19 19 #include "gcov.h" 20 20 21 - #if (__GNUC__ >= 14) 21 + #if (__GNUC__ >= 15) 22 + #define GCOV_COUNTERS 10 23 + #elif (__GNUC__ >= 14) 22 24 #define GCOV_COUNTERS 9 23 25 #elif (__GNUC__ >= 10) 24 26 #define GCOV_COUNTERS 8
+58 -37
kernel/kexec_handover.c
··· 8 8 9 9 #define pr_fmt(fmt) "KHO: " fmt 10 10 11 + #include <linux/cleanup.h> 11 12 #include <linux/cma.h> 12 13 #include <linux/count_zeros.h> 13 14 #include <linux/debugfs.h> ··· 23 22 24 23 #include <asm/early_ioremap.h> 25 24 25 + #include "kexec_handover_internal.h" 26 26 /* 27 27 * KHO is tightly coupled with mm init and needs access to some of mm 28 28 * internal APIs. ··· 69 67 * Keep track of memory that is to be preserved across KHO. 70 68 * 71 69 * The serializing side uses two levels of xarrays to manage chunks of per-order 72 - * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a 73 - * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations 74 - * each bitmap will cover 16M of address space. Thus, for 16G of memory at most 75 - * 512K of bitmap memory will be needed for order 0. 70 + * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order 71 + * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0 72 + * allocations each bitmap will cover 128M of address space. Thus, for 16G of 73 + * memory at most 512K of bitmap memory will be needed for order 0. 76 74 * 77 75 * This approach is fully incremental, as the serialization progresses folios 78 76 * can continue be aggregated to the tracker. The final step, immediately prior ··· 80 78 * successor kernel to parse. 81 79 */ 82 80 83 - #define PRESERVE_BITS (512 * 8) 81 + #define PRESERVE_BITS (PAGE_SIZE * 8) 84 82 85 83 struct kho_mem_phys_bits { 86 84 DECLARE_BITMAP(preserve, PRESERVE_BITS); 87 85 }; 86 + 87 + static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE); 88 88 89 89 struct kho_mem_phys { 90 90 /* ··· 135 131 .finalized = false, 136 132 }; 137 133 138 - static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz) 134 + static void *xa_load_or_alloc(struct xarray *xa, unsigned long index) 139 135 { 140 - void *elm, *res; 136 + void *res = xa_load(xa, index); 141 137 142 - elm = xa_load(xa, index); 143 - if (elm) 144 - return elm; 138 + if (res) 139 + return res; 145 140 146 - elm = kzalloc(sz, GFP_KERNEL); 141 + void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL); 142 + 147 143 if (!elm) 148 144 return ERR_PTR(-ENOMEM); 149 145 146 + if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE))) 147 + return ERR_PTR(-EINVAL); 148 + 150 149 res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL); 151 150 if (xa_is_err(res)) 152 - res = ERR_PTR(xa_err(res)); 153 - 154 - if (res) { 155 - kfree(elm); 151 + return ERR_PTR(xa_err(res)); 152 + else if (res) 156 153 return res; 157 - } 158 154 159 - return elm; 155 + return no_free_ptr(elm); 160 156 } 161 157 162 158 static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn, ··· 171 167 const unsigned long pfn_high = pfn >> order; 172 168 173 169 physxa = xa_load(&track->orders, order); 174 - if (!physxa) 175 - continue; 170 + if (WARN_ON_ONCE(!physxa)) 171 + return; 176 172 177 173 bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS); 178 - if (!bits) 179 - continue; 174 + if (WARN_ON_ONCE(!bits)) 175 + return; 180 176 181 177 clear_bit(pfn_high % PRESERVE_BITS, bits->preserve); 182 178 ··· 220 216 } 221 217 } 222 218 223 - bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS, 224 - sizeof(*bits)); 219 + bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS); 225 220 if (IS_ERR(bits)) 226 221 return PTR_ERR(bits); 227 222 ··· 348 345 static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk, 349 346 unsigned long order) 350 347 { 351 - struct khoser_mem_chunk *chunk; 348 + struct khoser_mem_chunk *chunk __free(free_page) = NULL; 352 349 353 - chunk = kzalloc(PAGE_SIZE, GFP_KERNEL); 350 + chunk = (void *)get_zeroed_page(GFP_KERNEL); 354 351 if (!chunk) 355 - return NULL; 352 + return ERR_PTR(-ENOMEM); 353 + 354 + if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE))) 355 + return ERR_PTR(-EINVAL); 356 + 356 357 chunk->hdr.order = order; 357 358 if (cur_chunk) 358 359 KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk); 359 - return chunk; 360 + return no_free_ptr(chunk); 360 361 } 361 362 362 363 static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk) ··· 381 374 struct khoser_mem_chunk *chunk = NULL; 382 375 struct kho_mem_phys *physxa; 383 376 unsigned long order; 377 + int err = -ENOMEM; 384 378 385 379 xa_for_each(&ser->track.orders, order, physxa) { 386 380 struct kho_mem_phys_bits *bits; 387 381 unsigned long phys; 388 382 389 383 chunk = new_chunk(chunk, order); 390 - if (!chunk) 384 + if (IS_ERR(chunk)) { 385 + err = PTR_ERR(chunk); 391 386 goto err_free; 387 + } 392 388 393 389 if (!first_chunk) 394 390 first_chunk = chunk; ··· 401 391 402 392 if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) { 403 393 chunk = new_chunk(chunk, order); 404 - if (!chunk) 394 + if (IS_ERR(chunk)) { 395 + err = PTR_ERR(chunk); 405 396 goto err_free; 397 + } 406 398 } 407 399 408 400 elm = &chunk->bitmaps[chunk->hdr.num_elms]; ··· 421 409 422 410 err_free: 423 411 kho_mem_ser_free(first_chunk); 424 - return -ENOMEM; 412 + return err; 425 413 } 426 414 427 415 static void __init deserialize_bitmap(unsigned int order, ··· 477 465 * area for early allocations that happen before page allocator is 478 466 * initialized. 479 467 */ 480 - static struct kho_scratch *kho_scratch; 481 - static unsigned int kho_scratch_cnt; 468 + struct kho_scratch *kho_scratch; 469 + unsigned int kho_scratch_cnt; 482 470 483 471 /* 484 472 * The scratch areas are scaled by default as percent of memory allocated from ··· 764 752 const unsigned int order = folio_order(folio); 765 753 struct kho_mem_track *track = &kho_out.ser.track; 766 754 755 + if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order))) 756 + return -EINVAL; 757 + 767 758 return __kho_preserve_order(track, pfn, order); 768 759 } 769 760 EXPORT_SYMBOL_GPL(kho_preserve_folio); ··· 789 774 unsigned long pfn = start_pfn; 790 775 unsigned long failed_pfn = 0; 791 776 int err = 0; 777 + 778 + if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT, 779 + nr_pages << PAGE_SHIFT))) { 780 + return -EINVAL; 781 + } 792 782 793 783 while (pfn < end_pfn) { 794 784 const unsigned int order = ··· 882 862 return NULL; 883 863 } 884 864 885 - static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) 865 + static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk, 866 + unsigned short order) 886 867 { 887 868 struct kho_mem_track *track = &kho_out.ser.track; 888 869 unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); 889 870 890 871 __kho_unpreserve(track, pfn, pfn + 1); 891 872 892 - for (int i = 0; chunk->phys[i]; i++) { 873 + for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { 893 874 pfn = PHYS_PFN(chunk->phys[i]); 894 - __kho_unpreserve(track, pfn, pfn + 1); 875 + __kho_unpreserve(track, pfn, pfn + (1 << order)); 895 876 } 896 877 } 897 878 ··· 903 882 while (chunk) { 904 883 struct kho_vmalloc_chunk *tmp = chunk; 905 884 906 - kho_vmalloc_unpreserve_chunk(chunk); 885 + kho_vmalloc_unpreserve_chunk(chunk, kho_vmalloc->order); 907 886 908 887 chunk = KHOSER_LOAD_PTR(chunk->hdr.next); 909 888 free_page((unsigned long)tmp); ··· 1013 992 while (chunk) { 1014 993 struct page *page; 1015 994 1016 - for (int i = 0; chunk->phys[i]; i++) { 995 + for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { 1017 996 phys_addr_t phys = chunk->phys[i]; 1018 997 1019 998 if (idx + contig_pages > total_pages)
+25
kernel/kexec_handover_debug.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * kexec_handover_debug.c - kexec handover optional debug functionality 4 + * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com> 5 + */ 6 + 7 + #define pr_fmt(fmt) "KHO: " fmt 8 + 9 + #include "kexec_handover_internal.h" 10 + 11 + bool kho_scratch_overlap(phys_addr_t phys, size_t size) 12 + { 13 + phys_addr_t scratch_start, scratch_end; 14 + unsigned int i; 15 + 16 + for (i = 0; i < kho_scratch_cnt; i++) { 17 + scratch_start = kho_scratch[i].addr; 18 + scratch_end = kho_scratch[i].addr + kho_scratch[i].size; 19 + 20 + if (phys < scratch_end && (phys + size) > scratch_start) 21 + return true; 22 + } 23 + 24 + return false; 25 + }
+20
kernel/kexec_handover_internal.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H 3 + #define LINUX_KEXEC_HANDOVER_INTERNAL_H 4 + 5 + #include <linux/kexec_handover.h> 6 + #include <linux/types.h> 7 + 8 + extern struct kho_scratch *kho_scratch; 9 + extern unsigned int kho_scratch_cnt; 10 + 11 + #ifdef CONFIG_KEXEC_HANDOVER_DEBUG 12 + bool kho_scratch_overlap(phys_addr_t phys, size_t size); 13 + #else 14 + static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size) 15 + { 16 + return false; 17 + } 18 + #endif /* CONFIG_KEXEC_HANDOVER_DEBUG */ 19 + 20 + #endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
+16 -14
lib/maple_tree.c
··· 64 64 #define CREATE_TRACE_POINTS 65 65 #include <trace/events/maple_tree.h> 66 66 67 + #define TP_FCT tracepoint_string(__func__) 68 + 67 69 /* 68 70 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 69 71 * pointers get hashed to arbitrary values. ··· 2758 2756 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2759 2757 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2760 2758 2761 - trace_ma_op(__func__, mas); 2759 + trace_ma_op(TP_FCT, mas); 2762 2760 2763 2761 /* 2764 2762 * Rebalancing occurs if a node is insufficient. Data is rebalanced ··· 2999 2997 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3000 2998 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3001 2999 3002 - trace_ma_op(__func__, mas); 3000 + trace_ma_op(TP_FCT, mas); 3003 3001 3004 3002 mast.l = &l_mas; 3005 3003 mast.r = &r_mas; ··· 3174 3172 return false; 3175 3173 } 3176 3174 3177 - trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3175 + trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry); 3178 3176 return true; 3179 3177 } 3180 3178 ··· 3418 3416 * of data may happen. 3419 3417 */ 3420 3418 mas = wr_mas->mas; 3421 - trace_ma_op(__func__, mas); 3419 + trace_ma_op(TP_FCT, mas); 3422 3420 3423 3421 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3424 3422 return mas_new_root(mas, wr_mas->entry); ··· 3554 3552 } else { 3555 3553 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3556 3554 } 3557 - trace_ma_write(__func__, mas, 0, wr_mas->entry); 3555 + trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); 3558 3556 mas_update_gap(mas); 3559 3557 mas->end = new_end; 3560 3558 return; ··· 3598 3596 mas->offset++; /* Keep mas accurate. */ 3599 3597 } 3600 3598 3601 - trace_ma_write(__func__, mas, 0, wr_mas->entry); 3599 + trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); 3602 3600 /* 3603 3601 * Only update gap when the new entry is empty or there is an empty 3604 3602 * entry in the original two ranges. ··· 3719 3717 mas_update_gap(mas); 3720 3718 3721 3719 mas->end = new_end; 3722 - trace_ma_write(__func__, mas, new_end, wr_mas->entry); 3720 + trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry); 3723 3721 return; 3724 3722 } 3725 3723 ··· 3733 3731 { 3734 3732 struct maple_big_node b_node; 3735 3733 3736 - trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 3734 + trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry); 3737 3735 memset(&b_node, 0, sizeof(struct maple_big_node)); 3738 3736 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 3739 3737 mas_commit_b_node(wr_mas, &b_node); ··· 5064 5062 { 5065 5063 MA_WR_STATE(wr_mas, mas, entry); 5066 5064 5067 - trace_ma_write(__func__, mas, 0, entry); 5065 + trace_ma_write(TP_FCT, mas, 0, entry); 5068 5066 #ifdef CONFIG_DEBUG_MAPLE_TREE 5069 5067 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5070 5068 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, ··· 5165 5163 } 5166 5164 5167 5165 store: 5168 - trace_ma_write(__func__, mas, 0, entry); 5166 + trace_ma_write(TP_FCT, mas, 0, entry); 5169 5167 mas_wr_store_entry(&wr_mas); 5170 5168 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5171 5169 mas_destroy(mas); ··· 5884 5882 MA_STATE(mas, mt, index, index); 5885 5883 void *entry; 5886 5884 5887 - trace_ma_read(__func__, &mas); 5885 + trace_ma_read(TP_FCT, &mas); 5888 5886 rcu_read_lock(); 5889 5887 retry: 5890 5888 entry = mas_start(&mas); ··· 5927 5925 MA_STATE(mas, mt, index, last); 5928 5926 int ret = 0; 5929 5927 5930 - trace_ma_write(__func__, &mas, 0, entry); 5928 + trace_ma_write(TP_FCT, &mas, 0, entry); 5931 5929 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5932 5930 return -EINVAL; 5933 5931 ··· 6150 6148 void *entry = NULL; 6151 6149 6152 6150 MA_STATE(mas, mt, index, index); 6153 - trace_ma_op(__func__, &mas); 6151 + trace_ma_op(TP_FCT, &mas); 6154 6152 6155 6153 mtree_lock(mt); 6156 6154 entry = mas_erase(&mas); ··· 6487 6485 unsigned long copy = *index; 6488 6486 #endif 6489 6487 6490 - trace_ma_read(__func__, &mas); 6488 + trace_ma_read(TP_FCT, &mas); 6491 6489 6492 6490 if ((*index) > max) 6493 6491 return NULL;
+6 -3
mm/damon/stat.c
··· 46 46 47 47 static struct damon_ctx *damon_stat_context; 48 48 49 + static unsigned long damon_stat_last_refresh_jiffies; 50 + 49 51 static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c) 50 52 { 51 53 struct damon_target *t; ··· 132 130 static int damon_stat_damon_call_fn(void *data) 133 131 { 134 132 struct damon_ctx *c = data; 135 - static unsigned long last_refresh_jiffies; 136 133 137 134 /* avoid unnecessarily frequent stat update */ 138 - if (time_before_eq(jiffies, last_refresh_jiffies + 135 + if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies + 139 136 msecs_to_jiffies(5 * MSEC_PER_SEC))) 140 137 return 0; 141 - last_refresh_jiffies = jiffies; 138 + damon_stat_last_refresh_jiffies = jiffies; 142 139 143 140 aggr_interval_us = c->attrs.aggr_interval; 144 141 damon_stat_set_estimated_memory_bandwidth(c); ··· 211 210 err = damon_start(&damon_stat_context, 1, true); 212 211 if (err) 213 212 return err; 213 + 214 + damon_stat_last_refresh_jiffies = jiffies; 214 215 call_control.data = damon_stat_context; 215 216 return damon_call(damon_stat_context, &call_control); 216 217 }
+7 -3
mm/damon/sysfs.c
··· 1552 1552 return ctx; 1553 1553 } 1554 1554 1555 + static unsigned long damon_sysfs_next_update_jiffies; 1556 + 1555 1557 static int damon_sysfs_repeat_call_fn(void *data) 1556 1558 { 1557 1559 struct damon_sysfs_kdamond *sysfs_kdamond = data; 1558 - static unsigned long next_update_jiffies; 1559 1560 1560 1561 if (!sysfs_kdamond->refresh_ms) 1561 1562 return 0; 1562 - if (time_before(jiffies, next_update_jiffies)) 1563 + if (time_before(jiffies, damon_sysfs_next_update_jiffies)) 1563 1564 return 0; 1564 - next_update_jiffies = jiffies + 1565 + damon_sysfs_next_update_jiffies = jiffies + 1565 1566 msecs_to_jiffies(sysfs_kdamond->refresh_ms); 1566 1567 1567 1568 if (!mutex_trylock(&damon_sysfs_lock)) ··· 1607 1606 return err; 1608 1607 } 1609 1608 kdamond->damon_ctx = ctx; 1609 + 1610 + damon_sysfs_next_update_jiffies = 1611 + jiffies + msecs_to_jiffies(kdamond->refresh_ms); 1610 1612 1611 1613 repeat_call_control->fn = damon_sysfs_repeat_call_fn; 1612 1614 repeat_call_control->data = kdamond;
+20 -8
mm/filemap.c
··· 3681 3681 static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, 3682 3682 struct folio *folio, unsigned long start, 3683 3683 unsigned long addr, unsigned int nr_pages, 3684 - unsigned long *rss, unsigned short *mmap_miss) 3684 + unsigned long *rss, unsigned short *mmap_miss, 3685 + bool can_map_large) 3685 3686 { 3686 3687 unsigned int ref_from_caller = 1; 3687 3688 vm_fault_t ret = 0; ··· 3697 3696 * The folio must not cross VMA or page table boundary. 3698 3697 */ 3699 3698 addr0 = addr - start * PAGE_SIZE; 3700 - if (folio_within_vma(folio, vmf->vma) && 3699 + if (can_map_large && folio_within_vma(folio, vmf->vma) && 3701 3700 (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) { 3702 3701 vmf->pte -= start; 3703 3702 page -= start; ··· 3812 3811 unsigned long rss = 0; 3813 3812 unsigned int nr_pages = 0, folio_type; 3814 3813 unsigned short mmap_miss = 0, mmap_miss_saved; 3814 + bool can_map_large; 3815 3815 3816 3816 rcu_read_lock(); 3817 3817 folio = next_uptodate_folio(&xas, mapping, end_pgoff); 3818 3818 if (!folio) 3819 3819 goto out; 3820 3820 3821 - if (filemap_map_pmd(vmf, folio, start_pgoff)) { 3821 + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; 3822 + end_pgoff = min(end_pgoff, file_end); 3823 + 3824 + /* 3825 + * Do not allow to map with PTEs beyond i_size and with PMD 3826 + * across i_size to preserve SIGBUS semantics. 3827 + * 3828 + * Make an exception for shmem/tmpfs that for long time 3829 + * intentionally mapped with PMDs across i_size. 3830 + */ 3831 + can_map_large = shmem_mapping(mapping) || 3832 + file_end >= folio_next_index(folio); 3833 + 3834 + if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) { 3822 3835 ret = VM_FAULT_NOPAGE; 3823 3836 goto out; 3824 3837 } ··· 3844 3829 folio_put(folio); 3845 3830 goto out; 3846 3831 } 3847 - 3848 - file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; 3849 - if (end_pgoff > file_end) 3850 - end_pgoff = file_end; 3851 3832 3852 3833 folio_type = mm_counter_file(folio); 3853 3834 do { ··· 3861 3850 else 3862 3851 ret |= filemap_map_folio_range(vmf, folio, 3863 3852 xas.xa_index - folio->index, addr, 3864 - nr_pages, &rss, &mmap_miss); 3853 + nr_pages, &rss, &mmap_miss, 3854 + can_map_large); 3865 3855 3866 3856 folio_unlock(folio); 3867 3857 } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
+23 -12
mm/huge_memory.c
··· 214 214 if (likely(atomic_inc_not_zero(&huge_zero_refcount))) 215 215 return true; 216 216 217 - zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, 217 + zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO | __GFP_ZEROTAGS) & 218 + ~__GFP_MOVABLE, 218 219 HPAGE_PMD_ORDER); 219 220 if (!zero_folio) { 220 221 count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); ··· 3264 3263 caller_pins; 3265 3264 } 3266 3265 3266 + static bool page_range_has_hwpoisoned(struct page *page, long nr_pages) 3267 + { 3268 + for (; nr_pages; page++, nr_pages--) 3269 + if (PageHWPoison(page)) 3270 + return true; 3271 + return false; 3272 + } 3273 + 3267 3274 /* 3268 3275 * It splits @folio into @new_order folios and copies the @folio metadata to 3269 3276 * all the resulting folios. ··· 3279 3270 static void __split_folio_to_order(struct folio *folio, int old_order, 3280 3271 int new_order) 3281 3272 { 3273 + /* Scan poisoned pages when split a poisoned folio to large folios */ 3274 + const bool handle_hwpoison = folio_test_has_hwpoisoned(folio) && new_order; 3282 3275 long new_nr_pages = 1 << new_order; 3283 3276 long nr_pages = 1 << old_order; 3284 3277 long i; 3285 3278 3279 + folio_clear_has_hwpoisoned(folio); 3280 + 3281 + /* Check first new_nr_pages since the loop below skips them */ 3282 + if (handle_hwpoison && 3283 + page_range_has_hwpoisoned(folio_page(folio, 0), new_nr_pages)) 3284 + folio_set_has_hwpoisoned(folio); 3286 3285 /* 3287 3286 * Skip the first new_nr_pages, since the new folio from them have all 3288 3287 * the flags from the original folio. 3289 3288 */ 3290 3289 for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) { 3291 3290 struct page *new_head = &folio->page + i; 3292 - 3293 3291 /* 3294 3292 * Careful: new_folio is not a "real" folio before we cleared PageTail. 3295 3293 * Don't pass it around before clear_compound_head(). ··· 3337 3321 #endif 3338 3322 (1L << PG_dirty) | 3339 3323 LRU_GEN_MASK | LRU_REFS_MASK)); 3324 + 3325 + if (handle_hwpoison && 3326 + page_range_has_hwpoisoned(new_head, new_nr_pages)) 3327 + folio_set_has_hwpoisoned(new_folio); 3340 3328 3341 3329 new_folio->mapping = folio->mapping; 3342 3330 new_folio->index = folio->index + i; ··· 3441 3421 3442 3422 if (folio_test_anon(folio)) 3443 3423 mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1); 3444 - 3445 - folio_clear_has_hwpoisoned(folio); 3446 3424 3447 3425 /* 3448 3426 * split to new_order one order at a time. For uniform split, ··· 3671 3653 3672 3654 min_order = mapping_min_folio_order(folio->mapping); 3673 3655 if (new_order < min_order) { 3674 - VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u", 3675 - min_order); 3676 3656 ret = -EINVAL; 3677 3657 goto out; 3678 3658 } ··· 4002 3986 4003 3987 int split_folio_to_list(struct folio *folio, struct list_head *list) 4004 3988 { 4005 - int ret = min_order_for_split(folio); 4006 - 4007 - if (ret < 0) 4008 - return ret; 4009 - 4010 - return split_huge_page_to_list_to_order(&folio->page, list, ret); 3989 + return split_huge_page_to_list_to_order(&folio->page, list, 0); 4011 3990 } 4012 3991 4013 3992 /*
-3
mm/kmsan/core.c
··· 72 72 73 73 nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0); 74 74 75 - /* Don't sleep. */ 76 - flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM); 77 - 78 75 handle = stack_depot_save(entries, nr_entries, flags); 79 76 return stack_depot_set_extra_bits(handle, extra); 80 77 }
+4 -2
mm/kmsan/hooks.c
··· 84 84 if (s->ctor) 85 85 return; 86 86 kmsan_enter_runtime(); 87 - kmsan_internal_poison_memory(object, s->object_size, GFP_KERNEL, 87 + kmsan_internal_poison_memory(object, s->object_size, 88 + GFP_KERNEL & ~(__GFP_RECLAIM), 88 89 KMSAN_POISON_CHECK | KMSAN_POISON_FREE); 89 90 kmsan_leave_runtime(); 90 91 } ··· 115 114 kmsan_enter_runtime(); 116 115 page = virt_to_head_page((void *)ptr); 117 116 KMSAN_WARN_ON(ptr != page_address(page)); 118 - kmsan_internal_poison_memory((void *)ptr, page_size(page), GFP_KERNEL, 117 + kmsan_internal_poison_memory((void *)ptr, page_size(page), 118 + GFP_KERNEL & ~(__GFP_RECLAIM), 119 119 KMSAN_POISON_CHECK | KMSAN_POISON_FREE); 120 120 kmsan_leave_runtime(); 121 121 }
+1 -1
mm/kmsan/shadow.c
··· 208 208 return; 209 209 kmsan_enter_runtime(); 210 210 kmsan_internal_poison_memory(page_address(page), page_size(page), 211 - GFP_KERNEL, 211 + GFP_KERNEL & ~(__GFP_RECLAIM), 212 212 KMSAN_POISON_CHECK | KMSAN_POISON_FREE); 213 213 kmsan_leave_runtime(); 214 214 }
+104 -9
mm/ksm.c
··· 2455 2455 return true; 2456 2456 } 2457 2457 2458 + struct ksm_next_page_arg { 2459 + struct folio *folio; 2460 + struct page *page; 2461 + unsigned long addr; 2462 + }; 2463 + 2464 + static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end, 2465 + struct mm_walk *walk) 2466 + { 2467 + struct ksm_next_page_arg *private = walk->private; 2468 + struct vm_area_struct *vma = walk->vma; 2469 + pte_t *start_ptep = NULL, *ptep, pte; 2470 + struct mm_struct *mm = walk->mm; 2471 + struct folio *folio; 2472 + struct page *page; 2473 + spinlock_t *ptl; 2474 + pmd_t pmd; 2475 + 2476 + if (ksm_test_exit(mm)) 2477 + return 0; 2478 + 2479 + cond_resched(); 2480 + 2481 + pmd = pmdp_get_lockless(pmdp); 2482 + if (!pmd_present(pmd)) 2483 + return 0; 2484 + 2485 + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) { 2486 + ptl = pmd_lock(mm, pmdp); 2487 + pmd = pmdp_get(pmdp); 2488 + 2489 + if (!pmd_present(pmd)) { 2490 + goto not_found_unlock; 2491 + } else if (pmd_leaf(pmd)) { 2492 + page = vm_normal_page_pmd(vma, addr, pmd); 2493 + if (!page) 2494 + goto not_found_unlock; 2495 + folio = page_folio(page); 2496 + 2497 + if (folio_is_zone_device(folio) || !folio_test_anon(folio)) 2498 + goto not_found_unlock; 2499 + 2500 + page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT); 2501 + goto found_unlock; 2502 + } 2503 + spin_unlock(ptl); 2504 + } 2505 + 2506 + start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); 2507 + if (!start_ptep) 2508 + return 0; 2509 + 2510 + for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) { 2511 + pte = ptep_get(ptep); 2512 + 2513 + if (!pte_present(pte)) 2514 + continue; 2515 + 2516 + page = vm_normal_page(vma, addr, pte); 2517 + if (!page) 2518 + continue; 2519 + folio = page_folio(page); 2520 + 2521 + if (folio_is_zone_device(folio) || !folio_test_anon(folio)) 2522 + continue; 2523 + goto found_unlock; 2524 + } 2525 + 2526 + not_found_unlock: 2527 + spin_unlock(ptl); 2528 + if (start_ptep) 2529 + pte_unmap(start_ptep); 2530 + return 0; 2531 + found_unlock: 2532 + folio_get(folio); 2533 + spin_unlock(ptl); 2534 + if (start_ptep) 2535 + pte_unmap(start_ptep); 2536 + private->page = page; 2537 + private->folio = folio; 2538 + private->addr = addr; 2539 + return 1; 2540 + } 2541 + 2542 + static struct mm_walk_ops ksm_next_page_ops = { 2543 + .pmd_entry = ksm_next_page_pmd_entry, 2544 + .walk_lock = PGWALK_RDLOCK, 2545 + }; 2546 + 2458 2547 static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) 2459 2548 { 2460 2549 struct mm_struct *mm; ··· 2631 2542 ksm_scan.address = vma->vm_end; 2632 2543 2633 2544 while (ksm_scan.address < vma->vm_end) { 2545 + struct ksm_next_page_arg ksm_next_page_arg; 2634 2546 struct page *tmp_page = NULL; 2635 - struct folio_walk fw; 2636 2547 struct folio *folio; 2637 2548 2638 2549 if (ksm_test_exit(mm)) 2639 2550 break; 2640 2551 2641 - folio = folio_walk_start(&fw, vma, ksm_scan.address, 0); 2642 - if (folio) { 2643 - if (!folio_is_zone_device(folio) && 2644 - folio_test_anon(folio)) { 2645 - folio_get(folio); 2646 - tmp_page = fw.page; 2647 - } 2648 - folio_walk_end(&fw, vma); 2552 + int found; 2553 + 2554 + found = walk_page_range_vma(vma, ksm_scan.address, 2555 + vma->vm_end, 2556 + &ksm_next_page_ops, 2557 + &ksm_next_page_arg); 2558 + 2559 + if (found > 0) { 2560 + folio = ksm_next_page_arg.folio; 2561 + tmp_page = ksm_next_page_arg.page; 2562 + ksm_scan.address = ksm_next_page_arg.addr; 2563 + } else { 2564 + VM_WARN_ON_ONCE(found < 0); 2565 + ksm_scan.address = vma->vm_end - PAGE_SIZE; 2649 2566 } 2650 2567 2651 2568 if (tmp_page) {
+19 -1
mm/memory.c
··· 65 65 #include <linux/gfp.h> 66 66 #include <linux/migrate.h> 67 67 #include <linux/string.h> 68 + #include <linux/shmem_fs.h> 68 69 #include <linux/memory-tiers.h> 69 70 #include <linux/debugfs.h> 70 71 #include <linux/userfaultfd_k.h> ··· 5502 5501 return ret; 5503 5502 } 5504 5503 5504 + if (!needs_fallback && vma->vm_file) { 5505 + struct address_space *mapping = vma->vm_file->f_mapping; 5506 + pgoff_t file_end; 5507 + 5508 + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); 5509 + 5510 + /* 5511 + * Do not allow to map with PTEs beyond i_size and with PMD 5512 + * across i_size to preserve SIGBUS semantics. 5513 + * 5514 + * Make an exception for shmem/tmpfs that for long time 5515 + * intentionally mapped with PMDs across i_size. 5516 + */ 5517 + needs_fallback = !shmem_mapping(mapping) && 5518 + file_end < folio_next_index(folio); 5519 + } 5520 + 5505 5521 if (pmd_none(*vmf->pmd)) { 5506 - if (folio_test_pmd_mappable(folio)) { 5522 + if (!needs_fallback && folio_test_pmd_mappable(folio)) { 5507 5523 ret = do_set_pmd(vmf, folio, page); 5508 5524 if (ret != VM_FAULT_FALLBACK) 5509 5525 return ret;
+1 -1
mm/mm_init.c
··· 2469 2469 panic("Failed to allocate %s hash table\n", tablename); 2470 2470 2471 2471 pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", 2472 - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, 2472 + tablename, 1UL << log2qty, get_order(size), size, 2473 2473 virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear"); 2474 2474 2475 2475 if (_hash_shift)
+1 -1
mm/mremap.c
··· 187 187 if (!folio || !folio_test_large(folio)) 188 188 return 1; 189 189 190 - return folio_pte_batch(folio, ptep, pte, max_nr); 190 + return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE); 191 191 } 192 192 193 193 static int move_ptes(struct pagetable_move_control *pmc,
+1 -1
mm/secretmem.c
··· 82 82 __folio_mark_uptodate(folio); 83 83 err = filemap_add_folio(mapping, folio, offset, gfp); 84 84 if (unlikely(err)) { 85 - folio_put(folio); 86 85 /* 87 86 * If a split of large page was required, it 88 87 * already happened when we marked the page invalid 89 88 * which guarantees that this call won't fail 90 89 */ 91 90 set_direct_map_default_noflush(folio_page(folio, 0)); 91 + folio_put(folio); 92 92 if (err == -EEXIST) 93 93 goto retry; 94 94
+6 -3
mm/shmem.c
··· 1882 1882 struct shmem_inode_info *info = SHMEM_I(inode); 1883 1883 unsigned long suitable_orders = 0; 1884 1884 struct folio *folio = NULL; 1885 + pgoff_t aligned_index; 1885 1886 long pages; 1886 1887 int error, order; 1887 1888 ··· 1896 1895 order = highest_order(suitable_orders); 1897 1896 while (suitable_orders) { 1898 1897 pages = 1UL << order; 1899 - index = round_down(index, pages); 1900 - folio = shmem_alloc_folio(gfp, order, info, index); 1901 - if (folio) 1898 + aligned_index = round_down(index, pages); 1899 + folio = shmem_alloc_folio(gfp, order, info, aligned_index); 1900 + if (folio) { 1901 + index = aligned_index; 1902 1902 goto allocated; 1903 + } 1903 1904 1904 1905 if (pages == HPAGE_PMD_NR) 1905 1906 count_vm_event(THP_FILE_FALLBACK);
+5 -1
mm/slub.c
··· 2046 2046 if (slab_exts) { 2047 2047 unsigned int offs = obj_to_index(obj_exts_slab->slab_cache, 2048 2048 obj_exts_slab, obj_exts); 2049 - /* codetag should be NULL */ 2049 + 2050 + if (unlikely(is_codetag_empty(&slab_exts[offs].ref))) 2051 + return; 2052 + 2053 + /* codetag should be NULL here */ 2050 2054 WARN_ON(slab_exts[offs].ref.ct); 2051 2055 set_codetag_empty(&slab_exts[offs].ref); 2052 2056 }
+31 -6
mm/truncate.c
··· 177 177 return 0; 178 178 } 179 179 180 + static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, 181 + unsigned long min_order) 182 + { 183 + enum ttu_flags ttu_flags = 184 + TTU_SYNC | 185 + TTU_SPLIT_HUGE_PMD | 186 + TTU_IGNORE_MLOCK; 187 + int ret; 188 + 189 + ret = try_folio_split_to_order(folio, split_at, min_order); 190 + 191 + /* 192 + * If the split fails, unmap the folio, so it will be refaulted 193 + * with PTEs to respect SIGBUS semantics. 194 + * 195 + * Make an exception for shmem/tmpfs that for long time 196 + * intentionally mapped with PMDs across i_size. 197 + */ 198 + if (ret && !shmem_mapping(folio->mapping)) { 199 + try_to_unmap(folio, ttu_flags); 200 + WARN_ON(folio_mapped(folio)); 201 + } 202 + 203 + return ret; 204 + } 205 + 180 206 /* 181 207 * Handle partial folios. The folio may be entirely within the 182 208 * range if a split has raced with us. If not, we zero the part of the ··· 220 194 size_t size = folio_size(folio); 221 195 unsigned int offset, length; 222 196 struct page *split_at, *split_at2; 197 + unsigned int min_order; 223 198 224 199 if (pos < start) 225 200 offset = start - pos; ··· 250 223 if (!folio_test_large(folio)) 251 224 return true; 252 225 226 + min_order = mapping_min_folio_order(folio->mapping); 253 227 split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); 254 - if (!try_folio_split(folio, split_at, NULL)) { 228 + if (!try_folio_split_or_unmap(folio, split_at, min_order)) { 255 229 /* 256 230 * try to split at offset + length to make sure folios within 257 231 * the range can be dropped, especially to avoid memory waste ··· 276 248 if (!folio_trylock(folio2)) 277 249 goto out; 278 250 279 - /* 280 - * make sure folio2 is large and does not change its mapping. 281 - * Its split result does not matter here. 282 - */ 251 + /* make sure folio2 is large and does not change its mapping */ 283 252 if (folio_test_large(folio2) && 284 253 folio2->mapping == folio->mapping) 285 - try_folio_split(folio2, split_at2, NULL); 254 + try_folio_split_or_unmap(folio2, split_at2, min_order); 286 255 287 256 folio_unlock(folio2); 288 257 out:
+8 -6
scripts/decode_stacktrace.sh
··· 277 277 fi 278 278 done 279 279 280 - if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then 281 - words[$last-1]="${words[$last-1]} ${words[$last]}" 282 - unset words[$last] spaces[$last] 283 - last=$(( $last - 1 )) 284 - fi 285 - 286 280 # Extract info after the symbol if present. E.g.: 287 281 # func_name+0x54/0x80 (P) 288 282 # ^^^ ··· 285 291 local info_str="" 286 292 if [[ ${words[$last]} =~ \([A-Z]*\) ]]; then 287 293 info_str=${words[$last]} 294 + unset words[$last] spaces[$last] 295 + last=$(( $last - 1 )) 296 + fi 297 + 298 + # Join module name with its build id if present, as these were 299 + # split during tokenization (e.g. "[module" and "modbuildid]"). 300 + if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then 301 + words[$last-1]="${words[$last-1]} ${words[$last]}" 288 302 unset words[$last] spaces[$last] 289 303 last=$(( $last - 1 )) 290 304 fi