Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'mm-hotfixes-stable-2024-11-09-22-40' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
"20 hotfixes, 14 of which are cc:stable.

Three affect DAMON. Lorenzo's five-patch series to address the
mmap_region error handling is here also.

Apart from that, various singletons"

* tag 'mm-hotfixes-stable-2024-11-09-22-40' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
mailmap: add entry for Thorsten Blum
ocfs2: remove entry once instead of null-ptr-dereference in ocfs2_xa_remove()
signal: restore the override_rlimit logic
fs/proc: fix compile warning about variable 'vmcore_mmap_ops'
ucounts: fix counter leak in inc_rlimit_get_ucounts()
selftests: hugetlb_dio: check for initial conditions to skip in the start
mm: fix docs for the kernel parameter ``thp_anon=``
mm/damon/core: avoid overflow in damon_feed_loop_next_input()
mm/damon/core: handle zero schemes apply interval
mm/damon/core: handle zero {aggregation,ops_update} intervals
mm/mlock: set the correct prev on failure
objpool: fix to make percpu slot allocation more robust
mm/page_alloc: keep track of free highatomic
mm: resolve faulty mmap_region() error path behaviour
mm: refactor arch_calc_vm_flag_bits() and arm64 MTE handling
mm: refactor map_deny_write_exec()
mm: unconditionally close VMAs on error
mm: avoid unsafe VMA hook invocation when error arises on mmap hook
mm/thp: fix deferred split unqueue naming and locking
mm/thp: fix deferred split queue not partially_mapped

+329 -172
+1
.mailmap
··· 665 665 Thomas Graf <tgraf@suug.ch> 666 666 Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu> 667 667 Thomas Pedersen <twp@codeaurora.org> 668 + Thorsten Blum <thorsten.blum@linux.dev> <thorsten.blum@toblux.com> 668 669 Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com> 669 670 Tingwei Zhang <quic_tingwei@quicinc.com> <tingwei@codeaurora.org> 670 671 Tirupathi Reddy <quic_tirupath@quicinc.com> <tirupath@codeaurora.org>
+1 -1
Documentation/admin-guide/kernel-parameters.txt
··· 6688 6688 0: no polling (default) 6689 6689 6690 6690 thp_anon= [KNL] 6691 - Format: <size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state> 6691 + Format: <size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state> 6692 6692 state is one of "always", "madvise", "never" or "inherit". 6693 6693 Control the default behavior of the system with respect 6694 6694 to anonymous transparent hugepages.
+1 -1
Documentation/admin-guide/mm/transhuge.rst
··· 303 303 kernel command line. 304 304 305 305 Alternatively, each supported anonymous THP size can be controlled by 306 - passing ``thp_anon=<size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>``, 306 + passing ``thp_anon=<size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>``, 307 307 where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and 308 308 supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``, 309 309 ``never`` or ``inherit``.
+7 -3
arch/arm64/include/asm/mman.h
··· 6 6 7 7 #ifndef BUILD_VDSO 8 8 #include <linux/compiler.h> 9 + #include <linux/fs.h> 10 + #include <linux/shmem_fs.h> 9 11 #include <linux/types.h> 10 12 11 13 static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, ··· 33 31 } 34 32 #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) 35 33 36 - static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) 34 + static inline unsigned long arch_calc_vm_flag_bits(struct file *file, 35 + unsigned long flags) 37 36 { 38 37 /* 39 38 * Only allow MTE on anonymous mappings as these are guaranteed to be 40 39 * backed by tags-capable memory. The vm_flags may be overridden by a 41 40 * filesystem supporting MTE (RAM-based). 42 41 */ 43 - if (system_supports_mte() && (flags & MAP_ANONYMOUS)) 42 + if (system_supports_mte() && 43 + ((flags & MAP_ANONYMOUS) || shmem_file(file))) 44 44 return VM_MTE_ALLOWED; 45 45 46 46 return 0; 47 47 } 48 - #define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) 48 + #define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) 49 49 50 50 static inline bool arch_validate_prot(unsigned long prot, 51 51 unsigned long addr __always_unused)
+3 -2
arch/parisc/include/asm/mman.h
··· 2 2 #ifndef __ASM_MMAN_H__ 3 3 #define __ASM_MMAN_H__ 4 4 5 + #include <linux/fs.h> 5 6 #include <uapi/asm/mman.h> 6 7 7 8 /* PARISC cannot allow mdwe as it needs writable stacks */ ··· 12 11 } 13 12 #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported 14 13 15 - static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) 14 + static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags) 16 15 { 17 16 /* 18 17 * The stack on parisc grows upwards, so if userspace requests memory ··· 24 23 25 24 return 0; 26 25 } 27 - #define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) 26 + #define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) 28 27 29 28 #endif /* __ASM_MMAN_H__ */
+1 -2
fs/ocfs2/xattr.c
··· 2036 2036 rc = 0; 2037 2037 ocfs2_xa_cleanup_value_truncate(loc, "removing", 2038 2038 orig_clusters); 2039 - if (rc) 2040 - goto out; 2039 + goto out; 2041 2040 } 2042 2041 } 2043 2042
+5 -4
fs/proc/vmcore.c
··· 457 457 #endif 458 458 } 459 459 460 - static const struct vm_operations_struct vmcore_mmap_ops = { 461 - .fault = mmap_vmcore_fault, 462 - }; 463 - 464 460 /** 465 461 * vmcore_alloc_buf - allocate buffer in vmalloc memory 466 462 * @size: size of buffer ··· 484 488 * virtually contiguous user-space in ELF layout. 485 489 */ 486 490 #ifdef CONFIG_MMU 491 + 492 + static const struct vm_operations_struct vmcore_mmap_ops = { 493 + .fault = mmap_vmcore_fault, 494 + }; 495 + 487 496 /* 488 497 * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages 489 498 * reported as not being ram with the zero page.
+22 -6
include/linux/mman.h
··· 2 2 #ifndef _LINUX_MMAN_H 3 3 #define _LINUX_MMAN_H 4 4 5 + #include <linux/fs.h> 5 6 #include <linux/mm.h> 6 7 #include <linux/percpu_counter.h> 7 8 ··· 95 94 #endif 96 95 97 96 #ifndef arch_calc_vm_flag_bits 98 - #define arch_calc_vm_flag_bits(flags) 0 97 + #define arch_calc_vm_flag_bits(file, flags) 0 99 98 #endif 100 99 101 100 #ifndef arch_validate_prot ··· 152 151 * Combine the mmap "flags" argument into "vm_flags" used internally. 153 152 */ 154 153 static inline unsigned long 155 - calc_vm_flag_bits(unsigned long flags) 154 + calc_vm_flag_bits(struct file *file, unsigned long flags) 156 155 { 157 156 return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | 158 157 _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | 159 158 _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | 160 159 _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | 161 - arch_calc_vm_flag_bits(flags); 160 + arch_calc_vm_flag_bits(file, flags); 162 161 } 163 162 164 163 unsigned long vm_commit_limit(void); ··· 189 188 * 190 189 * d) mmap(PROT_READ | PROT_EXEC) 191 190 * mmap(PROT_READ | PROT_EXEC | PROT_BTI) 191 + * 192 + * This is only applicable if the user has set the Memory-Deny-Write-Execute 193 + * (MDWE) protection mask for the current process. 194 + * 195 + * @old specifies the VMA flags the VMA originally possessed, and @new the ones 196 + * we propose to set. 197 + * 198 + * Return: false if proposed change is OK, true if not ok and should be denied. 192 199 */ 193 - static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags) 200 + static inline bool map_deny_write_exec(unsigned long old, unsigned long new) 194 201 { 202 + /* If MDWE is disabled, we have nothing to deny. */ 195 203 if (!test_bit(MMF_HAS_MDWE, &current->mm->flags)) 196 204 return false; 197 205 198 - if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE)) 206 + /* If the new VMA is not executable, we have nothing to deny. */ 207 + if (!(new & VM_EXEC)) 208 + return false; 209 + 210 + /* Under MDWE we do not accept newly writably executable VMAs... */ 211 + if (new & VM_WRITE) 199 212 return true; 200 213 201 - if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC)) 214 + /* ...nor previously non-executable VMAs becoming executable. */ 215 + if (!(old & VM_EXEC)) 202 216 return true; 203 217 204 218 return false;
+1
include/linux/mmzone.h
··· 823 823 unsigned long watermark_boost; 824 824 825 825 unsigned long nr_reserved_highatomic; 826 + unsigned long nr_free_highatomic; 826 827 827 828 /* 828 829 * We don't know if the memory that we're going to allocate will be
+2 -1
include/linux/user_namespace.h
··· 141 141 142 142 long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); 143 143 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); 144 - long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type); 144 + long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, 145 + bool override_rlimit); 145 146 void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); 146 147 bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); 147 148
+2 -1
kernel/signal.c
··· 419 419 */ 420 420 rcu_read_lock(); 421 421 ucounts = task_ucounts(t); 422 - sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); 422 + sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 423 + override_rlimit); 423 424 rcu_read_unlock(); 424 425 if (!sigpending) 425 426 return NULL;
+5 -4
kernel/ucount.c
··· 307 307 do_dec_rlimit_put_ucounts(ucounts, NULL, type); 308 308 } 309 309 310 - long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type) 310 + long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, 311 + bool override_rlimit) 311 312 { 312 313 /* Caller must hold a reference to ucounts */ 313 314 struct ucounts *iter; ··· 318 317 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 319 318 long new = atomic_long_add_return(1, &iter->rlimit[type]); 320 319 if (new < 0 || new > max) 321 - goto unwind; 320 + goto dec_unwind; 322 321 if (iter == ucounts) 323 322 ret = new; 324 - max = get_userns_rlimit_max(iter->ns, type); 323 + if (!override_rlimit) 324 + max = get_userns_rlimit_max(iter->ns, type); 325 325 /* 326 326 * Grab an extra ucount reference for the caller when 327 327 * the rlimit count was previously 0. ··· 336 334 dec_unwind: 337 335 dec = atomic_long_sub_return(1, &iter->rlimit[type]); 338 336 WARN_ON_ONCE(dec < 0); 339 - unwind: 340 337 do_dec_rlimit_put_ucounts(ucounts, iter, type); 341 338 return 0; 342 339 }
+12 -6
lib/objpool.c
··· 74 74 * warm caches and TLB hits. in default vmalloc is used to 75 75 * reduce the pressure of kernel slab system. as we know, 76 76 * mimimal size of vmalloc is one page since vmalloc would 77 - * always align the requested size to page size 77 + * always align the requested size to page size. 78 + * but if vmalloc fails or it is not available (e.g. GFP_ATOMIC) 79 + * allocate percpu slot with kmalloc. 78 80 */ 79 - if ((pool->gfp & GFP_ATOMIC) == GFP_ATOMIC) 80 - slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); 81 - else 81 + slot = NULL; 82 + 83 + if ((pool->gfp & (GFP_ATOMIC | GFP_KERNEL)) != GFP_ATOMIC) 82 84 slot = __vmalloc_node(size, sizeof(void *), pool->gfp, 83 85 cpu_to_node(i), __builtin_return_address(0)); 84 - if (!slot) 85 - return -ENOMEM; 86 + 87 + if (!slot) { 88 + slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); 89 + if (!slot) 90 + return -ENOMEM; 91 + } 86 92 memset(slot, 0, size); 87 93 pool->cpu_slots[i] = slot; 88 94
+28 -14
mm/damon/core.c
··· 1412 1412 damon_for_each_scheme(s, c) { 1413 1413 struct damos_quota *quota = &s->quota; 1414 1414 1415 - if (c->passed_sample_intervals != s->next_apply_sis) 1415 + if (c->passed_sample_intervals < s->next_apply_sis) 1416 1416 continue; 1417 1417 1418 1418 if (!s->wmarks.activated) ··· 1456 1456 unsigned long score) 1457 1457 { 1458 1458 const unsigned long goal = 10000; 1459 - unsigned long score_goal_diff = max(goal, score) - min(goal, score); 1460 - unsigned long score_goal_diff_bp = score_goal_diff * 10000 / goal; 1461 - unsigned long compensation = last_input * score_goal_diff_bp / 10000; 1462 1459 /* Set minimum input as 10000 to avoid compensation be zero */ 1463 1460 const unsigned long min_input = 10000; 1461 + unsigned long score_goal_diff, compensation; 1462 + bool over_achieving = score > goal; 1464 1463 1465 - if (goal > score) 1464 + if (score == goal) 1465 + return last_input; 1466 + if (score >= goal * 2) 1467 + return min_input; 1468 + 1469 + if (over_achieving) 1470 + score_goal_diff = score - goal; 1471 + else 1472 + score_goal_diff = goal - score; 1473 + 1474 + if (last_input < ULONG_MAX / score_goal_diff) 1475 + compensation = last_input * score_goal_diff / goal; 1476 + else 1477 + compensation = last_input / goal * score_goal_diff; 1478 + 1479 + if (over_achieving) 1480 + return max(last_input - compensation, min_input); 1481 + if (last_input < ULONG_MAX - compensation) 1466 1482 return last_input + compensation; 1467 - if (last_input > compensation + min_input) 1468 - return last_input - compensation; 1469 - return min_input; 1483 + return ULONG_MAX; 1470 1484 } 1471 1485 1472 1486 #ifdef CONFIG_PSI ··· 1636 1622 bool has_schemes_to_apply = false; 1637 1623 1638 1624 damon_for_each_scheme(s, c) { 1639 - if (c->passed_sample_intervals != s->next_apply_sis) 1625 + if (c->passed_sample_intervals < s->next_apply_sis) 1640 1626 continue; 1641 1627 1642 1628 if (!s->wmarks.activated) ··· 1656 1642 } 1657 1643 1658 1644 damon_for_each_scheme(s, c) { 1659 - if (c->passed_sample_intervals != s->next_apply_sis) 1645 + if (c->passed_sample_intervals < s->next_apply_sis) 1660 1646 continue; 1661 - s->next_apply_sis += 1647 + s->next_apply_sis = c->passed_sample_intervals + 1662 1648 (s->apply_interval_us ? s->apply_interval_us : 1663 1649 c->attrs.aggr_interval) / sample_interval; 1664 1650 } ··· 2014 2000 if (ctx->ops.check_accesses) 2015 2001 max_nr_accesses = ctx->ops.check_accesses(ctx); 2016 2002 2017 - if (ctx->passed_sample_intervals == next_aggregation_sis) { 2003 + if (ctx->passed_sample_intervals >= next_aggregation_sis) { 2018 2004 kdamond_merge_regions(ctx, 2019 2005 max_nr_accesses / 10, 2020 2006 sz_limit); ··· 2032 2018 2033 2019 sample_interval = ctx->attrs.sample_interval ? 2034 2020 ctx->attrs.sample_interval : 1; 2035 - if (ctx->passed_sample_intervals == next_aggregation_sis) { 2021 + if (ctx->passed_sample_intervals >= next_aggregation_sis) { 2036 2022 ctx->next_aggregation_sis = next_aggregation_sis + 2037 2023 ctx->attrs.aggr_interval / sample_interval; 2038 2024 ··· 2042 2028 ctx->ops.reset_aggregated(ctx); 2043 2029 } 2044 2030 2045 - if (ctx->passed_sample_intervals == next_ops_update_sis) { 2031 + if (ctx->passed_sample_intervals >= next_ops_update_sis) { 2046 2032 ctx->next_ops_update_sis = next_ops_update_sis + 2047 2033 ctx->attrs.ops_update_interval / 2048 2034 sample_interval;
+43 -13
mm/huge_memory.c
··· 3588 3588 return split_huge_page_to_list_to_order(&folio->page, list, ret); 3589 3589 } 3590 3590 3591 - void __folio_undo_large_rmappable(struct folio *folio) 3591 + /* 3592 + * __folio_unqueue_deferred_split() is not to be called directly: 3593 + * the folio_unqueue_deferred_split() inline wrapper in mm/internal.h 3594 + * limits its calls to those folios which may have a _deferred_list for 3595 + * queueing THP splits, and that list is (racily observed to be) non-empty. 3596 + * 3597 + * It is unsafe to call folio_unqueue_deferred_split() until folio refcount is 3598 + * zero: because even when split_queue_lock is held, a non-empty _deferred_list 3599 + * might be in use on deferred_split_scan()'s unlocked on-stack list. 3600 + * 3601 + * If memory cgroups are enabled, split_queue_lock is in the mem_cgroup: it is 3602 + * therefore important to unqueue deferred split before changing folio memcg. 3603 + */ 3604 + bool __folio_unqueue_deferred_split(struct folio *folio) 3592 3605 { 3593 3606 struct deferred_split *ds_queue; 3594 3607 unsigned long flags; 3608 + bool unqueued = false; 3609 + 3610 + WARN_ON_ONCE(folio_ref_count(folio)); 3611 + WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg(folio)); 3595 3612 3596 3613 ds_queue = get_deferred_split_queue(folio); 3597 3614 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); ··· 3620 3603 MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); 3621 3604 } 3622 3605 list_del_init(&folio->_deferred_list); 3606 + unqueued = true; 3623 3607 } 3624 3608 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); 3609 + 3610 + return unqueued; /* useful for debug warnings */ 3625 3611 } 3626 3612 3627 3613 /* partially_mapped=false won't clear PG_partially_mapped folio flag */ ··· 3647 3627 return; 3648 3628 3649 3629 /* 3650 - * The try_to_unmap() in page reclaim path might reach here too, 3651 - * this may cause a race condition to corrupt deferred split queue. 3652 - * And, if page reclaim is already handling the same folio, it is 3653 - * unnecessary to handle it again in shrinker. 3654 - * 3655 - * Check the swapcache flag to determine if the folio is being 3656 - * handled by page reclaim since THP swap would add the folio into 3657 - * swap cache before calling try_to_unmap(). 3630 + * Exclude swapcache: originally to avoid a corrupt deferred split 3631 + * queue. Nowadays that is fully prevented by mem_cgroup_swapout(); 3632 + * but if page reclaim is already handling the same folio, it is 3633 + * unnecessary to handle it again in the shrinker, so excluding 3634 + * swapcache here may still be a useful optimization. 3658 3635 */ 3659 3636 if (folio_test_swapcache(folio)) 3660 3637 return; ··· 3735 3718 struct deferred_split *ds_queue = &pgdata->deferred_split_queue; 3736 3719 unsigned long flags; 3737 3720 LIST_HEAD(list); 3738 - struct folio *folio, *next; 3739 - int split = 0; 3721 + struct folio *folio, *next, *prev = NULL; 3722 + int split = 0, removed = 0; 3740 3723 3741 3724 #ifdef CONFIG_MEMCG 3742 3725 if (sc->memcg) ··· 3792 3775 */ 3793 3776 if (!did_split && !folio_test_partially_mapped(folio)) { 3794 3777 list_del_init(&folio->_deferred_list); 3795 - ds_queue->split_queue_len--; 3778 + removed++; 3779 + } else { 3780 + /* 3781 + * That unlocked list_del_init() above would be unsafe, 3782 + * unless its folio is separated from any earlier folios 3783 + * left on the list (which may be concurrently unqueued) 3784 + * by one safe folio with refcount still raised. 3785 + */ 3786 + swap(folio, prev); 3796 3787 } 3797 - folio_put(folio); 3788 + if (folio) 3789 + folio_put(folio); 3798 3790 } 3799 3791 3800 3792 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); 3801 3793 list_splice_tail(&list, &ds_queue->split_queue); 3794 + ds_queue->split_queue_len -= removed; 3802 3795 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); 3796 + 3797 + if (prev) 3798 + folio_put(prev); 3803 3799 3804 3800 /* 3805 3801 * Stop shrinker if we didn't split any page, but the queue is empty.
+50 -5
mm/internal.h
··· 108 108 return (void *)(mapping & ~PAGE_MAPPING_FLAGS); 109 109 } 110 110 111 + /* 112 + * This is a file-backed mapping, and is about to be memory mapped - invoke its 113 + * mmap hook and safely handle error conditions. On error, VMA hooks will be 114 + * mutated. 115 + * 116 + * @file: File which backs the mapping. 117 + * @vma: VMA which we are mapping. 118 + * 119 + * Returns: 0 if success, error otherwise. 120 + */ 121 + static inline int mmap_file(struct file *file, struct vm_area_struct *vma) 122 + { 123 + int err = call_mmap(file, vma); 124 + 125 + if (likely(!err)) 126 + return 0; 127 + 128 + /* 129 + * OK, we tried to call the file hook for mmap(), but an error 130 + * arose. The mapping is in an inconsistent state and we most not invoke 131 + * any further hooks on it. 132 + */ 133 + vma->vm_ops = &vma_dummy_vm_ops; 134 + 135 + return err; 136 + } 137 + 138 + /* 139 + * If the VMA has a close hook then close it, and since closing it might leave 140 + * it in an inconsistent state which makes the use of any hooks suspect, clear 141 + * them down by installing dummy empty hooks. 142 + */ 143 + static inline void vma_close(struct vm_area_struct *vma) 144 + { 145 + if (vma->vm_ops && vma->vm_ops->close) { 146 + vma->vm_ops->close(vma); 147 + 148 + /* 149 + * The mapping is in an inconsistent state, and no further hooks 150 + * may be invoked upon it. 151 + */ 152 + vma->vm_ops = &vma_dummy_vm_ops; 153 + } 154 + } 155 + 111 156 #ifdef CONFIG_MMU 112 157 113 158 /* Flags for folio_pte_batch(). */ ··· 684 639 #endif 685 640 } 686 641 687 - void __folio_undo_large_rmappable(struct folio *folio); 688 - static inline void folio_undo_large_rmappable(struct folio *folio) 642 + bool __folio_unqueue_deferred_split(struct folio *folio); 643 + static inline bool folio_unqueue_deferred_split(struct folio *folio) 689 644 { 690 645 if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) 691 - return; 646 + return false; 692 647 693 648 /* 694 649 * At this point, there is no one trying to add the folio to ··· 696 651 * to check without acquiring the split_queue_lock. 697 652 */ 698 653 if (data_race(list_empty(&folio->_deferred_list))) 699 - return; 654 + return false; 700 655 701 - __folio_undo_large_rmappable(folio); 656 + return __folio_unqueue_deferred_split(folio); 702 657 } 703 658 704 659 static inline struct folio *page_rmappable_folio(struct page *page)
+25
mm/memcontrol-v1.c
··· 848 848 css_get(&to->css); 849 849 css_put(&from->css); 850 850 851 + /* Warning should never happen, so don't worry about refcount non-0 */ 852 + WARN_ON_ONCE(folio_unqueue_deferred_split(folio)); 851 853 folio->memcg_data = (unsigned long)to; 852 854 853 855 __folio_memcg_unlock(from); ··· 1219 1217 enum mc_target_type target_type; 1220 1218 union mc_target target; 1221 1219 struct folio *folio; 1220 + bool tried_split_before = false; 1222 1221 1222 + retry_pmd: 1223 1223 ptl = pmd_trans_huge_lock(pmd, vma); 1224 1224 if (ptl) { 1225 1225 if (mc.precharge < HPAGE_PMD_NR) { ··· 1231 1227 target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); 1232 1228 if (target_type == MC_TARGET_PAGE) { 1233 1229 folio = target.folio; 1230 + /* 1231 + * Deferred split queue locking depends on memcg, 1232 + * and unqueue is unsafe unless folio refcount is 0: 1233 + * split or skip if on the queue? first try to split. 1234 + */ 1235 + if (!list_empty(&folio->_deferred_list)) { 1236 + spin_unlock(ptl); 1237 + if (!tried_split_before) 1238 + split_folio(folio); 1239 + folio_unlock(folio); 1240 + folio_put(folio); 1241 + if (tried_split_before) 1242 + return 0; 1243 + tried_split_before = true; 1244 + goto retry_pmd; 1245 + } 1246 + /* 1247 + * So long as that pmd lock is held, the folio cannot 1248 + * be racily added to the _deferred_list, because 1249 + * __folio_remove_rmap() will find !partially_mapped. 1250 + */ 1234 1251 if (folio_isolate_lru(folio)) { 1235 1252 if (!mem_cgroup_move_account(folio, true, 1236 1253 mc.from, mc.to)) {
+5 -4
mm/memcontrol.c
··· 4629 4629 struct obj_cgroup *objcg; 4630 4630 4631 4631 VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); 4632 - VM_BUG_ON_FOLIO(folio_order(folio) > 1 && 4633 - !folio_test_hugetlb(folio) && 4634 - !list_empty(&folio->_deferred_list) && 4635 - folio_test_partially_mapped(folio), folio); 4636 4632 4637 4633 /* 4638 4634 * Nobody should be changing or seriously looking at ··· 4675 4679 ug->nr_memory += nr_pages; 4676 4680 ug->pgpgout++; 4677 4681 4682 + WARN_ON_ONCE(folio_unqueue_deferred_split(folio)); 4678 4683 folio->memcg_data = 0; 4679 4684 } 4680 4685 ··· 4787 4790 4788 4791 /* Transfer the charge and the css ref */ 4789 4792 commit_charge(new, memcg); 4793 + 4794 + /* Warning should never happen, so don't worry about refcount non-0 */ 4795 + WARN_ON_ONCE(folio_unqueue_deferred_split(old)); 4790 4796 old->memcg_data = 0; 4791 4797 } 4792 4798 ··· 4976 4976 VM_BUG_ON_FOLIO(oldid, folio); 4977 4977 mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); 4978 4978 4979 + folio_unqueue_deferred_split(folio); 4979 4980 folio->memcg_data = 0; 4980 4981 4981 4982 if (!mem_cgroup_is_root(memcg))
+2 -2
mm/migrate.c
··· 490 490 folio_test_large_rmappable(folio)) { 491 491 if (!folio_ref_freeze(folio, expected_count)) 492 492 return -EAGAIN; 493 - folio_undo_large_rmappable(folio); 493 + folio_unqueue_deferred_split(folio); 494 494 folio_ref_unfreeze(folio, expected_count); 495 495 } 496 496 ··· 515 515 } 516 516 517 517 /* Take off deferred split queue while frozen and memcg set */ 518 - folio_undo_large_rmappable(folio); 518 + folio_unqueue_deferred_split(folio); 519 519 520 520 /* 521 521 * Now we know that no one else is looking at the folio:
+6 -3
mm/mlock.c
··· 725 725 } 726 726 727 727 for_each_vma(vmi, vma) { 728 + int error; 728 729 vm_flags_t newflags; 729 730 730 731 newflags = vma->vm_flags & ~VM_LOCKED_MASK; 731 732 newflags |= to_add; 732 733 733 - /* Ignore errors */ 734 - mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end, 735 - newflags); 734 + error = mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end, 735 + newflags); 736 + /* Ignore errors, but prev needs fixing up. */ 737 + if (error) 738 + prev = vma; 736 739 cond_resched(); 737 740 } 738 741 out:
+70 -60
mm/mmap.c
··· 344 344 * to. we assume access permissions have been handled by the open 345 345 * of the memory object, so we don't do any here. 346 346 */ 347 - vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | 347 + vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(file, flags) | 348 348 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 349 349 350 350 /* Obtain the address to map to. we verify (or select) it and ensure ··· 1358 1358 return do_vmi_munmap(&vmi, mm, start, len, uf, false); 1359 1359 } 1360 1360 1361 - unsigned long mmap_region(struct file *file, unsigned long addr, 1361 + static unsigned long __mmap_region(struct file *file, unsigned long addr, 1362 1362 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, 1363 1363 struct list_head *uf) 1364 1364 { 1365 1365 struct mm_struct *mm = current->mm; 1366 1366 struct vm_area_struct *vma = NULL; 1367 1367 pgoff_t pglen = PHYS_PFN(len); 1368 - struct vm_area_struct *merge; 1369 1368 unsigned long charged = 0; 1370 1369 struct vma_munmap_struct vms; 1371 1370 struct ma_state mas_detach; 1372 1371 struct maple_tree mt_detach; 1373 1372 unsigned long end = addr + len; 1374 - bool writable_file_mapping = false; 1375 1373 int error; 1376 1374 VMA_ITERATOR(vmi, mm, addr); 1377 1375 VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff); ··· 1420 1422 /* 1421 1423 * clear PTEs while the vma is still in the tree so that rmap 1422 1424 * cannot race with the freeing later in the truncate scenario. 1423 - * This is also needed for call_mmap(), which is why vm_ops 1425 + * This is also needed for mmap_file(), which is why vm_ops 1424 1426 * close function is called. 1425 1427 */ 1426 1428 vms_clean_up_area(&vms, &mas_detach); ··· 1443 1445 vm_flags_init(vma, vm_flags); 1444 1446 vma->vm_page_prot = vm_get_page_prot(vm_flags); 1445 1447 1448 + if (vma_iter_prealloc(&vmi, vma)) { 1449 + error = -ENOMEM; 1450 + goto free_vma; 1451 + } 1452 + 1446 1453 if (file) { 1447 1454 vma->vm_file = get_file(file); 1448 - error = call_mmap(file, vma); 1455 + error = mmap_file(file, vma); 1449 1456 if (error) 1450 - goto unmap_and_free_vma; 1457 + goto unmap_and_free_file_vma; 1451 1458 1452 - if (vma_is_shared_maywrite(vma)) { 1453 - error = mapping_map_writable(file->f_mapping); 1454 - if (error) 1455 - goto close_and_free_vma; 1456 - 1457 - writable_file_mapping = true; 1458 - } 1459 - 1459 + /* Drivers cannot alter the address of the VMA. */ 1460 + WARN_ON_ONCE(addr != vma->vm_start); 1460 1461 /* 1461 - * Expansion is handled above, merging is handled below. 1462 - * Drivers should not alter the address of the VMA. 1462 + * Drivers should not permit writability when previously it was 1463 + * disallowed. 1463 1464 */ 1464 - if (WARN_ON((addr != vma->vm_start))) { 1465 - error = -EINVAL; 1466 - goto close_and_free_vma; 1467 - } 1465 + VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && 1466 + !(vm_flags & VM_MAYWRITE) && 1467 + (vma->vm_flags & VM_MAYWRITE)); 1468 1468 1469 1469 vma_iter_config(&vmi, addr, end); 1470 1470 /* 1471 - * If vm_flags changed after call_mmap(), we should try merge 1471 + * If vm_flags changed after mmap_file(), we should try merge 1472 1472 * vma again as we may succeed this time. 1473 1473 */ 1474 1474 if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) { 1475 + struct vm_area_struct *merge; 1476 + 1475 1477 vmg.flags = vma->vm_flags; 1476 1478 /* If this fails, state is reset ready for a reattempt. */ 1477 1479 merge = vma_merge_new_range(&vmg); ··· 1489 1491 vma = merge; 1490 1492 /* Update vm_flags to pick up the change. */ 1491 1493 vm_flags = vma->vm_flags; 1492 - goto unmap_writable; 1494 + goto file_expanded; 1493 1495 } 1494 1496 vma_iter_config(&vmi, addr, end); 1495 1497 } ··· 1498 1500 } else if (vm_flags & VM_SHARED) { 1499 1501 error = shmem_zero_setup(vma); 1500 1502 if (error) 1501 - goto free_vma; 1503 + goto free_iter_vma; 1502 1504 } else { 1503 1505 vma_set_anonymous(vma); 1504 1506 } 1505 1507 1506 - if (map_deny_write_exec(vma, vma->vm_flags)) { 1507 - error = -EACCES; 1508 - goto close_and_free_vma; 1509 - } 1510 - 1511 - /* Allow architectures to sanity-check the vm_flags */ 1512 - if (!arch_validate_flags(vma->vm_flags)) { 1513 - error = -EINVAL; 1514 - goto close_and_free_vma; 1515 - } 1516 - 1517 - if (vma_iter_prealloc(&vmi, vma)) { 1518 - error = -ENOMEM; 1519 - goto close_and_free_vma; 1520 - } 1508 + #ifdef CONFIG_SPARC64 1509 + /* TODO: Fix SPARC ADI! */ 1510 + WARN_ON_ONCE(!arch_validate_flags(vm_flags)); 1511 + #endif 1521 1512 1522 1513 /* Lock the VMA since it is modified after insertion into VMA tree */ 1523 1514 vma_start_write(vma); ··· 1520 1533 */ 1521 1534 khugepaged_enter_vma(vma, vma->vm_flags); 1522 1535 1523 - /* Once vma denies write, undo our temporary denial count */ 1524 - unmap_writable: 1525 - if (writable_file_mapping) 1526 - mapping_unmap_writable(file->f_mapping); 1536 + file_expanded: 1527 1537 file = vma->vm_file; 1528 1538 ksm_add_vma(vma); 1529 1539 expanded: ··· 1553 1569 1554 1570 vma_set_page_prot(vma); 1555 1571 1556 - validate_mm(mm); 1557 1572 return addr; 1558 1573 1559 - close_and_free_vma: 1560 - if (file && !vms.closed_vm_ops && vma->vm_ops && vma->vm_ops->close) 1561 - vma->vm_ops->close(vma); 1574 + unmap_and_free_file_vma: 1575 + fput(vma->vm_file); 1576 + vma->vm_file = NULL; 1562 1577 1563 - if (file || vma->vm_file) { 1564 - unmap_and_free_vma: 1565 - fput(vma->vm_file); 1566 - vma->vm_file = NULL; 1567 - 1568 - vma_iter_set(&vmi, vma->vm_end); 1569 - /* Undo any partial mapping done by a device driver. */ 1570 - unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); 1571 - } 1572 - if (writable_file_mapping) 1573 - mapping_unmap_writable(file->f_mapping); 1578 + vma_iter_set(&vmi, vma->vm_end); 1579 + /* Undo any partial mapping done by a device driver. */ 1580 + unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); 1581 + free_iter_vma: 1582 + vma_iter_free(&vmi); 1574 1583 free_vma: 1575 1584 vm_area_free(vma); 1576 1585 unacct_error: ··· 1573 1596 abort_munmap: 1574 1597 vms_abort_munmap_vmas(&vms, &mas_detach); 1575 1598 gather_failed: 1576 - validate_mm(mm); 1577 1599 return error; 1600 + } 1601 + 1602 + unsigned long mmap_region(struct file *file, unsigned long addr, 1603 + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, 1604 + struct list_head *uf) 1605 + { 1606 + unsigned long ret; 1607 + bool writable_file_mapping = false; 1608 + 1609 + /* Check to see if MDWE is applicable. */ 1610 + if (map_deny_write_exec(vm_flags, vm_flags)) 1611 + return -EACCES; 1612 + 1613 + /* Allow architectures to sanity-check the vm_flags. */ 1614 + if (!arch_validate_flags(vm_flags)) 1615 + return -EINVAL; 1616 + 1617 + /* Map writable and ensure this isn't a sealed memfd. */ 1618 + if (file && is_shared_maywrite(vm_flags)) { 1619 + int error = mapping_map_writable(file->f_mapping); 1620 + 1621 + if (error) 1622 + return error; 1623 + writable_file_mapping = true; 1624 + } 1625 + 1626 + ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf); 1627 + 1628 + /* Clear our write mapping regardless of error. */ 1629 + if (writable_file_mapping) 1630 + mapping_unmap_writable(file->f_mapping); 1631 + 1632 + validate_mm(current->mm); 1633 + return ret; 1578 1634 } 1579 1635 1580 1636 static int __vm_munmap(unsigned long start, size_t len, bool unlock) ··· 1944 1934 do { 1945 1935 if (vma->vm_flags & VM_ACCOUNT) 1946 1936 nr_accounted += vma_pages(vma); 1947 - remove_vma(vma, /* unreachable = */ true, /* closed = */ false); 1937 + remove_vma(vma, /* unreachable = */ true); 1948 1938 count++; 1949 1939 cond_resched(); 1950 1940 vma = vma_next(&vmi);
+1 -1
mm/mprotect.c
··· 810 810 break; 811 811 } 812 812 813 - if (map_deny_write_exec(vma, newflags)) { 813 + if (map_deny_write_exec(vma->vm_flags, newflags)) { 814 814 error = -EACCES; 815 815 break; 816 816 }
+4 -5
mm/nommu.c
··· 589 589 */ 590 590 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 591 591 { 592 - if (vma->vm_ops && vma->vm_ops->close) 593 - vma->vm_ops->close(vma); 592 + vma_close(vma); 594 593 if (vma->vm_file) 595 594 fput(vma->vm_file); 596 595 put_nommu_region(vma->vm_region); ··· 842 843 { 843 844 unsigned long vm_flags; 844 845 845 - vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); 846 + vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags); 846 847 847 848 if (!file) { 848 849 /* ··· 884 885 { 885 886 int ret; 886 887 887 - ret = call_mmap(vma->vm_file, vma); 888 + ret = mmap_file(vma->vm_file, vma); 888 889 if (ret == 0) { 889 890 vma->vm_region->vm_top = vma->vm_region->vm_end; 890 891 return 0; ··· 917 918 * happy. 918 919 */ 919 920 if (capabilities & NOMMU_MAP_DIRECT) { 920 - ret = call_mmap(vma->vm_file, vma); 921 + ret = mmap_file(vma->vm_file, vma); 921 922 /* shouldn't return success if we're not sharing */ 922 923 if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags))) 923 924 ret = -ENOSYS;
+9 -7
mm/page_alloc.c
··· 635 635 static inline void account_freepages(struct zone *zone, int nr_pages, 636 636 int migratetype) 637 637 { 638 + lockdep_assert_held(&zone->lock); 639 + 638 640 if (is_migrate_isolate(migratetype)) 639 641 return; 640 642 ··· 644 642 645 643 if (is_migrate_cma(migratetype)) 646 644 __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages); 645 + else if (is_migrate_highatomic(migratetype)) 646 + WRITE_ONCE(zone->nr_free_highatomic, 647 + zone->nr_free_highatomic + nr_pages); 647 648 } 648 649 649 650 /* Used for pages not on another list */ ··· 966 961 break; 967 962 case 2: 968 963 /* the second tail page: deferred_list overlaps ->mapping */ 969 - if (unlikely(!list_empty(&folio->_deferred_list) && 970 - folio_test_partially_mapped(folio))) { 971 - bad_page(page, "partially mapped folio on deferred list"); 964 + if (unlikely(!list_empty(&folio->_deferred_list))) { 965 + bad_page(page, "on deferred list"); 972 966 goto out; 973 967 } 974 968 break; ··· 2686 2682 unsigned long pfn = folio_pfn(folio); 2687 2683 unsigned int order = folio_order(folio); 2688 2684 2689 - folio_undo_large_rmappable(folio); 2690 2685 if (!free_pages_prepare(&folio->page, order)) 2691 2686 continue; 2692 2687 /* ··· 3084 3081 3085 3082 /* 3086 3083 * If the caller does not have rights to reserves below the min 3087 - * watermark then subtract the high-atomic reserves. This will 3088 - * over-estimate the size of the atomic reserve but it avoids a search. 3084 + * watermark then subtract the free pages reserved for highatomic. 3089 3085 */ 3090 3086 if (likely(!(alloc_flags & ALLOC_RESERVES))) 3091 - unusable_free += z->nr_reserved_highatomic; 3087 + unusable_free += READ_ONCE(z->nr_free_highatomic); 3092 3088 3093 3089 #ifdef CONFIG_CMA 3094 3090 /* If allocation can't use CMA areas don't use free CMA pages */
-3
mm/shmem.c
··· 2733 2733 if (ret) 2734 2734 return ret; 2735 2735 2736 - /* arm64 - allow memory tagging on RAM-based files */ 2737 - vm_flags_set(vma, VM_MTE_ALLOWED); 2738 - 2739 2736 file_accessed(file); 2740 2737 /* This is anonymous shared memory if it is unlinked at the time of mmap */ 2741 2738 if (inode->i_nlink)
+2 -2
mm/swap.c
··· 121 121 } 122 122 123 123 page_cache_release(folio); 124 - folio_undo_large_rmappable(folio); 124 + folio_unqueue_deferred_split(folio); 125 125 mem_cgroup_uncharge(folio); 126 126 free_unref_page(&folio->page, folio_order(folio)); 127 127 } ··· 988 988 free_huge_folio(folio); 989 989 continue; 990 990 } 991 - folio_undo_large_rmappable(folio); 991 + folio_unqueue_deferred_split(folio); 992 992 __page_cache_release(folio, &lruvec, &flags); 993 993 994 994 if (j != i)
+5 -9
mm/vma.c
··· 323 323 /* 324 324 * Close a vm structure and free it. 325 325 */ 326 - void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed) 326 + void remove_vma(struct vm_area_struct *vma, bool unreachable) 327 327 { 328 328 might_sleep(); 329 - if (!closed && vma->vm_ops && vma->vm_ops->close) 330 - vma->vm_ops->close(vma); 329 + vma_close(vma); 331 330 if (vma->vm_file) 332 331 fput(vma->vm_file); 333 332 mpol_put(vma_policy(vma)); ··· 1114 1115 vms_clear_ptes(vms, mas_detach, true); 1115 1116 mas_set(mas_detach, 0); 1116 1117 mas_for_each(mas_detach, vma, ULONG_MAX) 1117 - if (vma->vm_ops && vma->vm_ops->close) 1118 - vma->vm_ops->close(vma); 1119 - vms->closed_vm_ops = true; 1118 + vma_close(vma); 1120 1119 } 1121 1120 1122 1121 /* ··· 1157 1160 /* Remove and clean up vmas */ 1158 1161 mas_set(mas_detach, 0); 1159 1162 mas_for_each(mas_detach, vma, ULONG_MAX) 1160 - remove_vma(vma, /* = */ false, vms->closed_vm_ops); 1163 + remove_vma(vma, /* unreachable = */ false); 1161 1164 1162 1165 vm_unacct_memory(vms->nr_accounted); 1163 1166 validate_mm(mm); ··· 1681 1684 return new_vma; 1682 1685 1683 1686 out_vma_link: 1684 - if (new_vma->vm_ops && new_vma->vm_ops->close) 1685 - new_vma->vm_ops->close(new_vma); 1687 + vma_close(new_vma); 1686 1688 1687 1689 if (new_vma->vm_file) 1688 1690 fput(new_vma->vm_file);
+2 -4
mm/vma.h
··· 42 42 int vma_count; /* Number of vmas that will be removed */ 43 43 bool unlock; /* Unlock after the munmap */ 44 44 bool clear_ptes; /* If there are outstanding PTE to be cleared */ 45 - bool closed_vm_ops; /* call_mmap() was encountered, so vmas may be closed */ 46 - /* 1 byte hole */ 45 + /* 2 byte hole */ 47 46 unsigned long nr_pages; /* Number of pages being removed */ 48 47 unsigned long locked_vm; /* Number of locked pages */ 49 48 unsigned long nr_accounted; /* Number of VM_ACCOUNT pages */ ··· 197 198 vms->unmap_start = FIRST_USER_ADDRESS; 198 199 vms->unmap_end = USER_PGTABLES_CEILING; 199 200 vms->clear_ptes = false; 200 - vms->closed_vm_ops = false; 201 201 } 202 202 #endif 203 203 ··· 267 269 unsigned long start, size_t len, struct list_head *uf, 268 270 bool unlock); 269 271 270 - void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed); 272 + void remove_vma(struct vm_area_struct *vma, bool unreachable); 271 273 272 274 void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, 273 275 struct vm_area_struct *prev, struct vm_area_struct *next);
+2 -2
mm/vmscan.c
··· 1476 1476 */ 1477 1477 nr_reclaimed += nr_pages; 1478 1478 1479 - folio_undo_large_rmappable(folio); 1479 + folio_unqueue_deferred_split(folio); 1480 1480 if (folio_batch_add(&free_folios, folio) == 0) { 1481 1481 mem_cgroup_uncharge_folios(&free_folios); 1482 1482 try_to_unmap_flush(); ··· 1864 1864 if (unlikely(folio_put_testzero(folio))) { 1865 1865 __folio_clear_lru_flags(folio); 1866 1866 1867 - folio_undo_large_rmappable(folio); 1867 + folio_unqueue_deferred_split(folio); 1868 1868 if (folio_batch_add(&free_folios, folio) == 0) { 1869 1869 spin_unlock_irq(&lruvec->lru_lock); 1870 1870 mem_cgroup_uncharge_folios(&free_folios);
+12 -7
tools/testing/selftests/mm/hugetlb_dio.c
··· 44 44 if (fd < 0) 45 45 ksft_exit_fail_perror("Error opening file\n"); 46 46 47 - /* Get the free huge pages before allocation */ 48 - free_hpage_b = get_free_hugepages(); 49 - if (free_hpage_b == 0) { 50 - close(fd); 51 - ksft_exit_skip("No free hugepage, exiting!\n"); 52 - } 53 - 54 47 /* Allocate a hugetlb page */ 55 48 orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0); 56 49 if (orig_buffer == MAP_FAILED) { ··· 87 94 int main(void) 88 95 { 89 96 size_t pagesize = 0; 97 + int fd; 90 98 91 99 ksft_print_header(); 100 + 101 + /* Open the file to DIO */ 102 + fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664); 103 + if (fd < 0) 104 + ksft_exit_skip("Unable to allocate file: %s\n", strerror(errno)); 105 + close(fd); 106 + 107 + /* Check if huge pages are free */ 108 + if (!get_free_hugepages()) 109 + ksft_exit_skip("No free hugepage, exiting\n"); 110 + 92 111 ksft_set_plan(4); 93 112 94 113 /* Get base page size */