Merge tag 'mm-hotfixes-stable-2025-04-16-19-59' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

+3

.mailmap

··· 322 322 Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com> 323 323 Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com> 324 324 <jean-philippe@linaro.org> <jean-philippe.brucker@arm.com> 325 + Jean-Michel Hautbois <jeanmichel.hautbois@yoseli.org> <jeanmichel.hautbois@ideasonboard.com> 325 326 Jean Tourrilhes <jt@hpl.hp.com> 326 327 Jeevan Shriram <quic_jshriram@quicinc.com> <jshriram@codeaurora.org> 327 328 Jeff Garzik <jgarzik@pretzel.yyz.us> ··· 439 438 Li Yang <leoyang.li@nxp.com> <leoli@freescale.com> 440 439 Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org> 441 440 Lior David <quic_liord@quicinc.com> <liord@codeaurora.org> 441 + Loic Poulain <loic.poulain@oss.qualcomm.com> <loic.poulain@linaro.org> 442 + Loic Poulain <loic.poulain@oss.qualcomm.com> <loic.poulain@intel.com> 442 443 Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com> 443 444 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com> 444 445 Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>

+1 -1

Documentation/userspace-api/mseal.rst

··· 27 27 ======= 28 28 mseal syscall signature 29 29 ----------------------- 30 - ``int mseal(void \* addr, size_t len, unsigned long flags)`` 30 + ``int mseal(void *addr, size_t len, unsigned long flags)`` 31 31 32 32 **addr**/**len**: virtual memory address range. 33 33 The address range set by **addr**/**len** must meet:

+4

MAINTAINERS

··· 10956 10956 10957 10957 HUGETLB SUBSYSTEM 10958 10958 M: Muchun Song <muchun.song@linux.dev> 10959 + R: Oscar Salvador <osalvador@suse.de> 10959 10960 L: linux-mm@kvack.org 10960 10961 S: Maintained 10961 10962 F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages ··· 12813 12812 F: scripts/Makefile.kcsan 12814 12813 12815 12814 KDUMP 12815 + M: Andrew Morton <akpm@linux-foundation.org> 12816 12816 M: Baoquan He <bhe@redhat.com> 12817 12817 R: Vivek Goyal <vgoyal@redhat.com> 12818 12818 R: Dave Young <dyoung@redhat.com> ··· 13115 13113 F: include/linux/kernfs.h 13116 13114 13117 13115 KEXEC 13116 + M: Andrew Morton <akpm@linux-foundation.org> 13117 + M: Baoquan He <bhe@redhat.com> 13118 13118 L: kexec@lists.infradead.org 13119 13119 W: http://kernel.org/pub/linux/utils/kernel/kexec/ 13120 13120 F: include/linux/kexec.h

+1

drivers/fpga/tests/fpga-bridge-test.c

··· 170 170 171 171 kunit_test_suite(fpga_bridge_suite); 172 172 173 + MODULE_DESCRIPTION("KUnit test for the FPGA Bridge"); 173 174 MODULE_LICENSE("GPL");

+1

drivers/fpga/tests/fpga-mgr-test.c

··· 330 330 331 331 kunit_test_suite(fpga_mgr_suite); 332 332 333 + MODULE_DESCRIPTION("KUnit test for the FPGA Manager"); 333 334 MODULE_LICENSE("GPL");

+1

drivers/fpga/tests/fpga-region-test.c

··· 214 214 215 215 kunit_test_suite(fpga_region_suite); 216 216 217 + MODULE_DESCRIPTION("KUnit test for the FPGA Region"); 217 218 MODULE_LICENSE("GPL");

+8 -50

include/linux/local_lock.h

··· 52 52 __local_unlock_irqrestore(lock, flags) 53 53 54 54 /** 55 - * localtry_lock_init - Runtime initialize a lock instance 55 + * local_lock_init - Runtime initialize a lock instance 56 56 */ 57 - #define localtry_lock_init(lock) __localtry_lock_init(lock) 57 + #define local_trylock_init(lock) __local_trylock_init(lock) 58 58 59 59 /** 60 - * localtry_lock - Acquire a per CPU local lock 61 - * @lock: The lock variable 62 - */ 63 - #define localtry_lock(lock) __localtry_lock(lock) 64 - 65 - /** 66 - * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts 67 - * @lock: The lock variable 68 - */ 69 - #define localtry_lock_irq(lock) __localtry_lock_irq(lock) 70 - 71 - /** 72 - * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable 73 - * interrupts 74 - * @lock: The lock variable 75 - * @flags: Storage for interrupt flags 76 - */ 77 - #define localtry_lock_irqsave(lock, flags) \ 78 - __localtry_lock_irqsave(lock, flags) 79 - 80 - /** 81 - * localtry_trylock - Try to acquire a per CPU local lock. 60 + * local_trylock - Try to acquire a per CPU local lock 82 61 * @lock: The lock variable 83 62 * 84 63 * The function can be used in any context such as NMI or HARDIRQ. Due to 85 64 * locking constrains it will _always_ fail to acquire the lock in NMI or 86 65 * HARDIRQ context on PREEMPT_RT. 87 66 */ 88 - #define localtry_trylock(lock) __localtry_trylock(lock) 67 + #define local_trylock(lock) __local_trylock(lock) 89 68 90 69 /** 91 - * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable 92 - * interrupts if acquired 70 + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable 71 + * interrupts if acquired 93 72 * @lock: The lock variable 94 73 * @flags: Storage for interrupt flags 95 74 * ··· 76 97 * locking constrains it will _always_ fail to acquire the lock in NMI or 77 98 * HARDIRQ context on PREEMPT_RT. 78 99 */ 79 - #define localtry_trylock_irqsave(lock, flags) \ 80 - __localtry_trylock_irqsave(lock, flags) 81 - 82 - /** 83 - * local_unlock - Release a per CPU local lock 84 - * @lock: The lock variable 85 - */ 86 - #define localtry_unlock(lock) __localtry_unlock(lock) 87 - 88 - /** 89 - * local_unlock_irq - Release a per CPU local lock and enable interrupts 90 - * @lock: The lock variable 91 - */ 92 - #define localtry_unlock_irq(lock) __localtry_unlock_irq(lock) 93 - 94 - /** 95 - * localtry_unlock_irqrestore - Release a per CPU local lock and restore 96 - * interrupt flags 97 - * @lock: The lock variable 98 - * @flags: Interrupt flags to restore 99 - */ 100 - #define localtry_unlock_irqrestore(lock, flags) \ 101 - __localtry_unlock_irqrestore(lock, flags) 100 + #define local_trylock_irqsave(lock, flags) \ 101 + __local_trylock_irqsave(lock, flags) 102 102 103 103 DEFINE_GUARD(local_lock, local_lock_t __percpu*, 104 104 local_lock(_T),

+89 -122

include/linux/local_lock_internal.h

··· 15 15 #endif 16 16 } local_lock_t; 17 17 18 + /* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ 18 19 typedef struct { 19 20 local_lock_t llock; 20 - unsigned int acquired; 21 - } localtry_lock_t; 21 + u8 acquired; 22 + } local_trylock_t; 22 23 23 24 #ifdef CONFIG_DEBUG_LOCK_ALLOC 24 25 # define LOCAL_LOCK_DEBUG_INIT(lockname) \ ··· 29 28 .lock_type = LD_LOCK_PERCPU, \ 30 29 }, \ 31 30 .owner = NULL, 31 + 32 + # define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ 33 + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, 32 34 33 35 static inline void local_lock_acquire(local_lock_t *l) 34 36 { ··· 60 56 } 61 57 #else /* CONFIG_DEBUG_LOCK_ALLOC */ 62 58 # define LOCAL_LOCK_DEBUG_INIT(lockname) 59 + # define LOCAL_TRYLOCK_DEBUG_INIT(lockname) 63 60 static inline void local_lock_acquire(local_lock_t *l) { } 64 61 static inline void local_trylock_acquire(local_lock_t *l) { } 65 62 static inline void local_lock_release(local_lock_t *l) { } ··· 68 63 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ 69 64 70 65 #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } 71 - #define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }} 66 + #define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } 72 67 73 68 #define __local_lock_init(lock) \ 74 69 do { \ ··· 81 76 local_lock_debug_init(lock); \ 82 77 } while (0) 83 78 79 + #define __local_trylock_init(lock) __local_lock_init(lock.llock) 80 + 84 81 #define __spinlock_nested_bh_init(lock) \ 85 82 do { \ 86 83 static struct lock_class_key __key; \ ··· 94 87 local_lock_debug_init(lock); \ 95 88 } while (0) 96 89 90 + #define __local_lock_acquire(lock) \ 91 + do { \ 92 + local_trylock_t *tl; \ 93 + local_lock_t *l; \ 94 + \ 95 + l = (local_lock_t *)this_cpu_ptr(lock); \ 96 + tl = (local_trylock_t *)l; \ 97 + _Generic((lock), \ 98 + local_trylock_t *: ({ \ 99 + lockdep_assert(tl->acquired == 0); \ 100 + WRITE_ONCE(tl->acquired, 1); \ 101 + }), \ 102 + default:(void)0); \ 103 + local_lock_acquire(l); \ 104 + } while (0) 105 + 97 106 #define __local_lock(lock) \ 98 107 do { \ 99 108 preempt_disable(); \ 100 - local_lock_acquire(this_cpu_ptr(lock)); \ 109 + __local_lock_acquire(lock); \ 101 110 } while (0) 102 111 103 112 #define __local_lock_irq(lock) \ 104 113 do { \ 105 114 local_irq_disable(); \ 106 - local_lock_acquire(this_cpu_ptr(lock)); \ 115 + __local_lock_acquire(lock); \ 107 116 } while (0) 108 117 109 118 #define __local_lock_irqsave(lock, flags) \ 110 119 do { \ 111 120 local_irq_save(flags); \ 112 - local_lock_acquire(this_cpu_ptr(lock)); \ 121 + __local_lock_acquire(lock); \ 122 + } while (0) 123 + 124 + #define __local_trylock(lock) \ 125 + ({ \ 126 + local_trylock_t *tl; \ 127 + \ 128 + preempt_disable(); \ 129 + tl = this_cpu_ptr(lock); \ 130 + if (READ_ONCE(tl->acquired)) { \ 131 + preempt_enable(); \ 132 + tl = NULL; \ 133 + } else { \ 134 + WRITE_ONCE(tl->acquired, 1); \ 135 + local_trylock_acquire( \ 136 + (local_lock_t *)tl); \ 137 + } \ 138 + !!tl; \ 139 + }) 140 + 141 + #define __local_trylock_irqsave(lock, flags) \ 142 + ({ \ 143 + local_trylock_t *tl; \ 144 + \ 145 + local_irq_save(flags); \ 146 + tl = this_cpu_ptr(lock); \ 147 + if (READ_ONCE(tl->acquired)) { \ 148 + local_irq_restore(flags); \ 149 + tl = NULL; \ 150 + } else { \ 151 + WRITE_ONCE(tl->acquired, 1); \ 152 + local_trylock_acquire( \ 153 + (local_lock_t *)tl); \ 154 + } \ 155 + !!tl; \ 156 + }) 157 + 158 + #define __local_lock_release(lock) \ 159 + do { \ 160 + local_trylock_t *tl; \ 161 + local_lock_t *l; \ 162 + \ 163 + l = (local_lock_t *)this_cpu_ptr(lock); \ 164 + tl = (local_trylock_t *)l; \ 165 + local_lock_release(l); \ 166 + _Generic((lock), \ 167 + local_trylock_t *: ({ \ 168 + lockdep_assert(tl->acquired == 1); \ 169 + WRITE_ONCE(tl->acquired, 0); \ 170 + }), \ 171 + default:(void)0); \ 113 172 } while (0) 114 173 115 174 #define __local_unlock(lock) \ 116 175 do { \ 117 - local_lock_release(this_cpu_ptr(lock)); \ 176 + __local_lock_release(lock); \ 118 177 preempt_enable(); \ 119 178 } while (0) 120 179 121 180 #define __local_unlock_irq(lock) \ 122 181 do { \ 123 - local_lock_release(this_cpu_ptr(lock)); \ 182 + __local_lock_release(lock); \ 124 183 local_irq_enable(); \ 125 184 } while (0) 126 185 127 186 #define __local_unlock_irqrestore(lock, flags) \ 128 187 do { \ 129 - local_lock_release(this_cpu_ptr(lock)); \ 188 + __local_lock_release(lock); \ 130 189 local_irq_restore(flags); \ 131 190 } while (0) 132 191 ··· 205 132 #define __local_unlock_nested_bh(lock) \ 206 133 local_lock_release(this_cpu_ptr(lock)) 207 134 208 - /* localtry_lock_t variants */ 209 - 210 - #define __localtry_lock_init(lock) \ 211 - do { \ 212 - __local_lock_init(&(lock)->llock); \ 213 - WRITE_ONCE((lock)->acquired, 0); \ 214 - } while (0) 215 - 216 - #define __localtry_lock(lock) \ 217 - do { \ 218 - localtry_lock_t *lt; \ 219 - preempt_disable(); \ 220 - lt = this_cpu_ptr(lock); \ 221 - local_lock_acquire(&lt->llock); \ 222 - WRITE_ONCE(lt->acquired, 1); \ 223 - } while (0) 224 - 225 - #define __localtry_lock_irq(lock) \ 226 - do { \ 227 - localtry_lock_t *lt; \ 228 - local_irq_disable(); \ 229 - lt = this_cpu_ptr(lock); \ 230 - local_lock_acquire(&lt->llock); \ 231 - WRITE_ONCE(lt->acquired, 1); \ 232 - } while (0) 233 - 234 - #define __localtry_lock_irqsave(lock, flags) \ 235 - do { \ 236 - localtry_lock_t *lt; \ 237 - local_irq_save(flags); \ 238 - lt = this_cpu_ptr(lock); \ 239 - local_lock_acquire(&lt->llock); \ 240 - WRITE_ONCE(lt->acquired, 1); \ 241 - } while (0) 242 - 243 - #define __localtry_trylock(lock) \ 244 - ({ \ 245 - localtry_lock_t *lt; \ 246 - bool _ret; \ 247 - \ 248 - preempt_disable(); \ 249 - lt = this_cpu_ptr(lock); \ 250 - if (!READ_ONCE(lt->acquired)) { \ 251 - WRITE_ONCE(lt->acquired, 1); \ 252 - local_trylock_acquire(&lt->llock); \ 253 - _ret = true; \ 254 - } else { \ 255 - _ret = false; \ 256 - preempt_enable(); \ 257 - } \ 258 - _ret; \ 259 - }) 260 - 261 - #define __localtry_trylock_irqsave(lock, flags) \ 262 - ({ \ 263 - localtry_lock_t *lt; \ 264 - bool _ret; \ 265 - \ 266 - local_irq_save(flags); \ 267 - lt = this_cpu_ptr(lock); \ 268 - if (!READ_ONCE(lt->acquired)) { \ 269 - WRITE_ONCE(lt->acquired, 1); \ 270 - local_trylock_acquire(&lt->llock); \ 271 - _ret = true; \ 272 - } else { \ 273 - _ret = false; \ 274 - local_irq_restore(flags); \ 275 - } \ 276 - _ret; \ 277 - }) 278 - 279 - #define __localtry_unlock(lock) \ 280 - do { \ 281 - localtry_lock_t *lt; \ 282 - lt = this_cpu_ptr(lock); \ 283 - WRITE_ONCE(lt->acquired, 0); \ 284 - local_lock_release(&lt->llock); \ 285 - preempt_enable(); \ 286 - } while (0) 287 - 288 - #define __localtry_unlock_irq(lock) \ 289 - do { \ 290 - localtry_lock_t *lt; \ 291 - lt = this_cpu_ptr(lock); \ 292 - WRITE_ONCE(lt->acquired, 0); \ 293 - local_lock_release(&lt->llock); \ 294 - local_irq_enable(); \ 295 - } while (0) 296 - 297 - #define __localtry_unlock_irqrestore(lock, flags) \ 298 - do { \ 299 - localtry_lock_t *lt; \ 300 - lt = this_cpu_ptr(lock); \ 301 - WRITE_ONCE(lt->acquired, 0); \ 302 - local_lock_release(&lt->llock); \ 303 - local_irq_restore(flags); \ 304 - } while (0) 305 - 306 135 #else /* !CONFIG_PREEMPT_RT */ 307 136 308 137 /* ··· 212 237 * critical section while staying preemptible. 213 238 */ 214 239 typedef spinlock_t local_lock_t; 215 - typedef spinlock_t localtry_lock_t; 240 + typedef spinlock_t local_trylock_t; 216 241 217 242 #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) 218 - #define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname) 243 + #define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) 219 244 220 245 #define __local_lock_init(l) \ 221 246 do { \ 222 247 local_spin_lock_init((l)); \ 223 248 } while (0) 249 + 250 + #define __local_trylock_init(l) __local_lock_init(l) 224 251 225 252 #define __local_lock(__lock) \ 226 253 do { \ ··· 260 283 spin_unlock(this_cpu_ptr((lock))); \ 261 284 } while (0) 262 285 263 - /* localtry_lock_t variants */ 264 - 265 - #define __localtry_lock_init(lock) __local_lock_init(lock) 266 - #define __localtry_lock(lock) __local_lock(lock) 267 - #define __localtry_lock_irq(lock) __local_lock(lock) 268 - #define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags) 269 - #define __localtry_unlock(lock) __local_unlock(lock) 270 - #define __localtry_unlock_irq(lock) __local_unlock(lock) 271 - #define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags) 272 - 273 - #define __localtry_trylock(lock) \ 286 + #define __local_trylock(lock) \ 274 287 ({ \ 275 288 int __locked; \ 276 289 \ ··· 275 308 __locked; \ 276 309 }) 277 310 278 - #define __localtry_trylock_irqsave(lock, flags) \ 311 + #define __local_trylock_irqsave(lock, flags) \ 279 312 ({ \ 280 313 typecheck(unsigned long, flags); \ 281 314 flags = 0; \ 282 - __localtry_trylock(lock); \ 315 + __local_trylock(lock); \ 283 316 }) 284 317 285 318 #endif /* CONFIG_PREEMPT_RT */

+6 -3

include/linux/pgtable.h

··· 1511 1511 1512 1512 /* 1513 1513 * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page 1514 - * tables copied during copy_page_range(). On success, stores the pfn to be 1515 - * passed to untrack_pfn_copy(). 1514 + * tables copied during copy_page_range(). Will store the pfn to be 1515 + * passed to untrack_pfn_copy() only if there is something to be untracked. 1516 + * Callers should initialize the pfn to 0. 1516 1517 */ 1517 1518 static inline int track_pfn_copy(struct vm_area_struct *dst_vma, 1518 1519 struct vm_area_struct *src_vma, unsigned long *pfn) ··· 1523 1522 1524 1523 /* 1525 1524 * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during 1526 - * copy_page_range(), but after track_pfn_copy() was already called. 1525 + * copy_page_range(), but after track_pfn_copy() was already called. Can 1526 + * be called even if track_pfn_copy() did not actually track anything: 1527 + * handled internally. 1527 1528 */ 1528 1529 static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, 1529 1530 unsigned long pfn)

+12 -3

lib/alloc_tag.c

··· 422 422 unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN); 423 423 unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN); 424 424 unsigned long more_pages; 425 - unsigned long nr; 425 + unsigned long nr = 0; 426 426 427 427 more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; 428 - nr = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, 429 - NUMA_NO_NODE, more_pages, next_page); 428 + while (nr < more_pages) { 429 + unsigned long allocated; 430 + 431 + allocated = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, 432 + NUMA_NO_NODE, more_pages - nr, next_page + nr); 433 + 434 + if (!allocated) 435 + break; 436 + nr += allocated; 437 + } 438 + 430 439 if (nr < more_pages || 431 440 vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, 432 441 next_page, PAGE_SHIFT) < 0) {

+1

lib/asn1_decoder.c

··· 518 518 } 519 519 EXPORT_SYMBOL_GPL(asn1_ber_decoder); 520 520 521 + MODULE_DESCRIPTION("Decoder for ASN.1 BER/DER/CER encoded bytestream"); 521 522 MODULE_LICENSE("GPL");

+1 -1

lib/iov_iter.c

··· 1191 1191 return -ENOMEM; 1192 1192 p = *pages; 1193 1193 for (int k = 0; k < n; k++) { 1194 - struct folio *folio = page_folio(page); 1194 + struct folio *folio = page_folio(page + k); 1195 1195 p[k] = page + k; 1196 1196 if (!folio_test_slab(folio)) 1197 1197 folio_get(folio);

+1

lib/tests/slub_kunit.c

··· 325 325 }; 326 326 kunit_test_suite(test_suite); 327 327 328 + MODULE_DESCRIPTION("Kunit tests for slub allocator"); 328 329 MODULE_LICENSE("GPL");

+1

lib/ucs2_string.c

··· 165 165 } 166 166 EXPORT_SYMBOL(ucs2_as_utf8); 167 167 168 + MODULE_DESCRIPTION("UCS2 string handling"); 168 169 MODULE_LICENSE("GPL v2");

+1

lib/zlib_inflate/inflate_syms.c

··· 18 18 EXPORT_SYMBOL(zlib_inflateReset); 19 19 EXPORT_SYMBOL(zlib_inflateIncomp); 20 20 EXPORT_SYMBOL(zlib_inflate_blob); 21 + MODULE_DESCRIPTION("Data decompression using the deflation algorithm"); 21 22 MODULE_LICENSE("GPL");

+11 -8

mm/cma.c

··· 35 35 struct cma cma_areas[MAX_CMA_AREAS]; 36 36 unsigned int cma_area_count; 37 37 38 - static int __init __cma_declare_contiguous_nid(phys_addr_t base, 38 + static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, 39 39 phys_addr_t size, phys_addr_t limit, 40 40 phys_addr_t alignment, unsigned int order_per_bit, 41 41 bool fixed, const char *name, struct cma **res_cma, ··· 370 370 phys_addr_t align, unsigned int order_per_bit, 371 371 const char *name, struct cma **res_cma, int nid) 372 372 { 373 - phys_addr_t start, end; 373 + phys_addr_t start = 0, end; 374 374 phys_addr_t size, sizesum, sizeleft; 375 375 struct cma_init_memrange *mrp, *mlp, *failed; 376 376 struct cma_memrange *cmrp; ··· 384 384 /* 385 385 * First, try it the normal way, producing just one range. 386 386 */ 387 - ret = __cma_declare_contiguous_nid(0, total_size, 0, align, 387 + ret = __cma_declare_contiguous_nid(&start, total_size, 0, align, 388 388 order_per_bit, false, name, res_cma, nid); 389 389 if (ret != -ENOMEM) 390 390 goto out; ··· 580 580 { 581 581 int ret; 582 582 583 - ret = __cma_declare_contiguous_nid(base, size, limit, alignment, 583 + ret = __cma_declare_contiguous_nid(&base, size, limit, alignment, 584 584 order_per_bit, fixed, name, res_cma, nid); 585 585 if (ret != 0) 586 586 pr_err("Failed to reserve %ld MiB\n", ··· 592 592 return ret; 593 593 } 594 594 595 - static int __init __cma_declare_contiguous_nid(phys_addr_t base, 595 + static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, 596 596 phys_addr_t size, phys_addr_t limit, 597 597 phys_addr_t alignment, unsigned int order_per_bit, 598 598 bool fixed, const char *name, struct cma **res_cma, 599 599 int nid) 600 600 { 601 601 phys_addr_t memblock_end = memblock_end_of_DRAM(); 602 - phys_addr_t highmem_start; 602 + phys_addr_t highmem_start, base = *basep; 603 603 int ret; 604 604 605 605 /* ··· 722 722 } 723 723 724 724 ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); 725 - if (ret) 725 + if (ret) { 726 726 memblock_phys_free(base, size); 727 + return ret; 728 + } 727 729 728 730 (*res_cma)->nid = nid; 731 + *basep = base; 729 732 730 - return ret; 733 + return 0; 731 734 } 732 735 733 736 static void cma_debug_show_areas(struct cma *cma)

+3 -3

mm/compaction.c

··· 981 981 } 982 982 983 983 if (PageHuge(page)) { 984 + const unsigned int order = compound_order(page); 984 985 /* 985 986 * skip hugetlbfs if we are not compacting for pages 986 987 * bigger than its order. THPs and other compound pages 987 988 * are handled below. 988 989 */ 989 990 if (!cc->alloc_contig) { 990 - const unsigned int order = compound_order(page); 991 991 992 992 if (order <= MAX_PAGE_ORDER) { 993 993 low_pfn += (1UL << order) - 1; ··· 1011 1011 /* Do not report -EBUSY down the chain */ 1012 1012 if (ret == -EBUSY) 1013 1013 ret = 0; 1014 - low_pfn += compound_nr(page) - 1; 1015 - nr_scanned += compound_nr(page) - 1; 1014 + low_pfn += (1UL << order) - 1; 1015 + nr_scanned += (1UL << order) - 1; 1016 1016 goto isolate_fail; 1017 1017 } 1018 1018

+1

mm/filemap.c

··· 2244 2244 *start = folio->index + nr; 2245 2245 goto out; 2246 2246 } 2247 + xas_advance(&xas, folio_next_index(folio) - 1); 2247 2248 continue; 2248 2249 put_folio: 2249 2250 folio_put(folio);

+20 -3

mm/hugetlb.c

··· 2271 2271 * as surplus_pages, otherwise it might confuse 2272 2272 * persistent_huge_pages() momentarily. 2273 2273 */ 2274 - __prep_account_new_huge_page(h, nid); 2274 + __prep_account_new_huge_page(h, folio_nid(folio)); 2275 2275 2276 2276 /* 2277 2277 * We could have raced with the pool size change. ··· 3825 3825 static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, 3826 3826 nodemask_t *nodes_allowed) 3827 3827 { 3828 + unsigned long persistent_free_count; 3828 3829 unsigned long min_count; 3829 3830 unsigned long allocated; 3830 3831 struct folio *folio; ··· 3960 3959 * though, we'll note that we're not allowed to exceed surplus 3961 3960 * and won't grow the pool anywhere else. Not until one of the 3962 3961 * sysctls are changed, or the surplus pages go out of use. 3962 + * 3963 + * min_count is the expected number of persistent pages, we 3964 + * shouldn't calculate min_count by using 3965 + * resv_huge_pages + persistent_huge_pages() - free_huge_pages, 3966 + * because there may exist free surplus huge pages, and this will 3967 + * lead to subtracting twice. Free surplus huge pages come from HVO 3968 + * failing to restore vmemmap, see comments in the callers of 3969 + * hugetlb_vmemmap_restore_folio(). Thus, we should calculate 3970 + * persistent free count first. 3963 3971 */ 3964 - min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; 3972 + persistent_free_count = h->free_huge_pages; 3973 + if (h->free_huge_pages > persistent_huge_pages(h)) { 3974 + if (h->free_huge_pages > h->surplus_huge_pages) 3975 + persistent_free_count -= h->surplus_huge_pages; 3976 + else 3977 + persistent_free_count = 0; 3978 + } 3979 + min_count = h->resv_huge_pages + persistent_huge_pages(h) - persistent_free_count; 3965 3980 min_count = max(count, min_count); 3966 3981 try_to_free_low(h, min_count, nodes_allowed); 3967 3982 ··· 4647 4630 err = hugetlb_sysfs_add_hstate(h, hugepages_kobj, 4648 4631 hstate_kobjs, &hstate_attr_group); 4649 4632 if (err) 4650 - pr_err("HugeTLB: Unable to add hstate %s", h->name); 4633 + pr_err("HugeTLB: Unable to add hstate %s\n", h->name); 4651 4634 } 4652 4635 4653 4636 #ifdef CONFIG_NUMA

+1

mm/kasan/kasan_test_c.c

··· 2127 2127 2128 2128 kunit_test_suite(kasan_kunit_test_suite); 2129 2129 2130 + MODULE_DESCRIPTION("KUnit tests for checking KASAN bug-detection capabilities"); 2130 2131 MODULE_LICENSE("GPL");

+19 -20

mm/memcontrol.c

··· 1759 1759 } 1760 1760 1761 1761 struct memcg_stock_pcp { 1762 - localtry_lock_t stock_lock; 1762 + local_trylock_t stock_lock; 1763 1763 struct mem_cgroup *cached; /* this never be root cgroup */ 1764 1764 unsigned int nr_pages; 1765 1765 ··· 1774 1774 #define FLUSHING_CACHED_CHARGE 0 1775 1775 }; 1776 1776 static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = { 1777 - .stock_lock = INIT_LOCALTRY_LOCK(stock_lock), 1777 + .stock_lock = INIT_LOCAL_TRYLOCK(stock_lock), 1778 1778 }; 1779 1779 static DEFINE_MUTEX(percpu_charge_mutex); 1780 1780 ··· 1805 1805 if (nr_pages > MEMCG_CHARGE_BATCH) 1806 1806 return ret; 1807 1807 1808 - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { 1809 - if (!gfpflags_allow_spinning(gfp_mask)) 1810 - return ret; 1811 - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); 1812 - } 1808 + if (gfpflags_allow_spinning(gfp_mask)) 1809 + local_lock_irqsave(&memcg_stock.stock_lock, flags); 1810 + else if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) 1811 + return ret; 1813 1812 1814 1813 stock = this_cpu_ptr(&memcg_stock); 1815 1814 stock_pages = READ_ONCE(stock->nr_pages); ··· 1817 1818 ret = true; 1818 1819 } 1819 1820 1820 - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1821 + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1821 1822 1822 1823 return ret; 1823 1824 } ··· 1856 1857 * drain_stock races is that we always operate on local CPU stock 1857 1858 * here with IRQ disabled 1858 1859 */ 1859 - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); 1860 + local_lock_irqsave(&memcg_stock.stock_lock, flags); 1860 1861 1861 1862 stock = this_cpu_ptr(&memcg_stock); 1862 1863 old = drain_obj_stock(stock); 1863 1864 drain_stock(stock); 1864 1865 clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); 1865 1866 1866 - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1867 + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1867 1868 obj_cgroup_put(old); 1868 1869 } 1869 1870 ··· 1893 1894 { 1894 1895 unsigned long flags; 1895 1896 1896 - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { 1897 + if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) { 1897 1898 /* 1898 1899 * In case of unlikely failure to lock percpu stock_lock 1899 1900 * uncharge memcg directly. ··· 1906 1907 return; 1907 1908 } 1908 1909 __refill_stock(memcg, nr_pages); 1909 - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1910 + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1910 1911 } 1911 1912 1912 1913 /* ··· 1963 1964 stock = &per_cpu(memcg_stock, cpu); 1964 1965 1965 1966 /* drain_obj_stock requires stock_lock */ 1966 - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); 1967 + local_lock_irqsave(&memcg_stock.stock_lock, flags); 1967 1968 old = drain_obj_stock(stock); 1968 - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1969 + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); 1969 1970 1970 1971 drain_stock(stock); 1971 1972 obj_cgroup_put(old); ··· 2786 2787 unsigned long flags; 2787 2788 int *bytes; 2788 2789 2789 - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); 2790 + local_lock_irqsave(&memcg_stock.stock_lock, flags); 2790 2791 stock = this_cpu_ptr(&memcg_stock); 2791 2792 2792 2793 /* ··· 2835 2836 if (nr) 2836 2837 __mod_objcg_mlstate(objcg, pgdat, idx, nr); 2837 2838 2838 - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); 2839 + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); 2839 2840 obj_cgroup_put(old); 2840 2841 } 2841 2842 ··· 2845 2846 unsigned long flags; 2846 2847 bool ret = false; 2847 2848 2848 - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); 2849 + local_lock_irqsave(&memcg_stock.stock_lock, flags); 2849 2850 2850 2851 stock = this_cpu_ptr(&memcg_stock); 2851 2852 if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { ··· 2853 2854 ret = true; 2854 2855 } 2855 2856 2856 - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); 2857 + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); 2857 2858 2858 2859 return ret; 2859 2860 } ··· 2945 2946 unsigned long flags; 2946 2947 unsigned int nr_pages = 0; 2947 2948 2948 - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); 2949 + local_lock_irqsave(&memcg_stock.stock_lock, flags); 2949 2950 2950 2951 stock = this_cpu_ptr(&memcg_stock); 2951 2952 if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ ··· 2959 2960 stock->nr_bytes &= (PAGE_SIZE - 1); 2960 2961 } 2961 2962 2962 - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); 2963 + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); 2963 2964 obj_cgroup_put(old); 2964 2965 2965 2966 if (nr_pages)

+3 -3

mm/memory.c

··· 1361 1361 struct mm_struct *dst_mm = dst_vma->vm_mm; 1362 1362 struct mm_struct *src_mm = src_vma->vm_mm; 1363 1363 struct mmu_notifier_range range; 1364 - unsigned long next, pfn; 1364 + unsigned long next, pfn = 0; 1365 1365 bool is_cow; 1366 1366 int ret; 1367 1367 ··· 2938 2938 if (fn) { 2939 2939 do { 2940 2940 if (create || !pte_none(ptep_get(pte))) { 2941 - err = fn(pte++, addr, data); 2941 + err = fn(pte, addr, data); 2942 2942 if (err) 2943 2943 break; 2944 2944 } 2945 - } while (addr += PAGE_SIZE, addr != end); 2945 + } while (pte++, addr += PAGE_SIZE, addr != end); 2946 2946 } 2947 2947 *mask |= PGTBL_PTE_MODIFIED; 2948 2948

+88 -40

mm/page_alloc.c

··· 1400 1400 struct llist_head *llhead; 1401 1401 unsigned long flags; 1402 1402 1403 - if (!spin_trylock_irqsave(&zone->lock, flags)) { 1404 - if (unlikely(fpi_flags & FPI_TRYLOCK)) { 1403 + if (unlikely(fpi_flags & FPI_TRYLOCK)) { 1404 + if (!spin_trylock_irqsave(&zone->lock, flags)) { 1405 1405 add_page_to_zone_llist(zone, page, order); 1406 1406 return; 1407 1407 } 1408 + } else { 1408 1409 spin_lock_irqsave(&zone->lock, flags); 1409 1410 } 1410 1411 ··· 2183 2182 } 2184 2183 2185 2184 /* 2186 - * Try finding a free buddy page on the fallback list. 2187 - * 2188 - * This will attempt to claim a whole pageblock for the requested type 2189 - * to ensure grouping of such requests in the future. 2190 - * 2191 - * If a whole block cannot be claimed, steal an individual page, regressing to 2192 - * __rmqueue_smallest() logic to at least break up as little contiguity as 2193 - * possible. 2185 + * Try to allocate from some fallback migratetype by claiming the entire block, 2186 + * i.e. converting it to the allocation's start migratetype. 2194 2187 * 2195 2188 * The use of signed ints for order and current_order is a deliberate 2196 2189 * deviation from the rest of this file, to make the for loop 2197 2190 * condition simpler. 2198 - * 2199 - * Return the stolen page, or NULL if none can be found. 2200 2191 */ 2201 2192 static __always_inline struct page * 2202 - __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, 2193 + __rmqueue_claim(struct zone *zone, int order, int start_migratetype, 2203 2194 unsigned int alloc_flags) 2204 2195 { 2205 2196 struct free_area *area; ··· 2229 2236 page = try_to_claim_block(zone, page, current_order, order, 2230 2237 start_migratetype, fallback_mt, 2231 2238 alloc_flags); 2232 - if (page) 2233 - goto got_one; 2239 + if (page) { 2240 + trace_mm_page_alloc_extfrag(page, order, current_order, 2241 + start_migratetype, fallback_mt); 2242 + return page; 2243 + } 2234 2244 } 2235 2245 2236 - if (alloc_flags & ALLOC_NOFRAGMENT) 2237 - return NULL; 2246 + return NULL; 2247 + } 2238 2248 2239 - /* No luck claiming pageblock. Find the smallest fallback page */ 2249 + /* 2250 + * Try to steal a single page from some fallback migratetype. Leave the rest of 2251 + * the block as its current migratetype, potentially causing fragmentation. 2252 + */ 2253 + static __always_inline struct page * 2254 + __rmqueue_steal(struct zone *zone, int order, int start_migratetype) 2255 + { 2256 + struct free_area *area; 2257 + int current_order; 2258 + struct page *page; 2259 + int fallback_mt; 2260 + bool claim_block; 2261 + 2240 2262 for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { 2241 2263 area = &(zone->free_area[current_order]); 2242 2264 fallback_mt = find_suitable_fallback(area, current_order, ··· 2261 2253 2262 2254 page = get_page_from_free_area(area, fallback_mt); 2263 2255 page_del_and_expand(zone, page, order, current_order, fallback_mt); 2264 - goto got_one; 2256 + trace_mm_page_alloc_extfrag(page, order, current_order, 2257 + start_migratetype, fallback_mt); 2258 + return page; 2265 2259 } 2266 2260 2267 2261 return NULL; 2268 - 2269 - got_one: 2270 - trace_mm_page_alloc_extfrag(page, order, current_order, 2271 - start_migratetype, fallback_mt); 2272 - 2273 - return page; 2274 2262 } 2263 + 2264 + enum rmqueue_mode { 2265 + RMQUEUE_NORMAL, 2266 + RMQUEUE_CMA, 2267 + RMQUEUE_CLAIM, 2268 + RMQUEUE_STEAL, 2269 + }; 2275 2270 2276 2271 /* 2277 2272 * Do the hard work of removing an element from the buddy allocator. ··· 2282 2271 */ 2283 2272 static __always_inline struct page * 2284 2273 __rmqueue(struct zone *zone, unsigned int order, int migratetype, 2285 - unsigned int alloc_flags) 2274 + unsigned int alloc_flags, enum rmqueue_mode *mode) 2286 2275 { 2287 2276 struct page *page; 2288 2277 ··· 2301 2290 } 2302 2291 } 2303 2292 2304 - page = __rmqueue_smallest(zone, order, migratetype); 2305 - if (unlikely(!page)) { 2306 - if (alloc_flags & ALLOC_CMA) 2293 + /* 2294 + * First try the freelists of the requested migratetype, then try 2295 + * fallbacks modes with increasing levels of fragmentation risk. 2296 + * 2297 + * The fallback logic is expensive and rmqueue_bulk() calls in 2298 + * a loop with the zone->lock held, meaning the freelists are 2299 + * not subject to any outside changes. Remember in *mode where 2300 + * we found pay dirt, to save us the search on the next call. 2301 + */ 2302 + switch (*mode) { 2303 + case RMQUEUE_NORMAL: 2304 + page = __rmqueue_smallest(zone, order, migratetype); 2305 + if (page) 2306 + return page; 2307 + fallthrough; 2308 + case RMQUEUE_CMA: 2309 + if (alloc_flags & ALLOC_CMA) { 2307 2310 page = __rmqueue_cma_fallback(zone, order); 2308 - 2309 - if (!page) 2310 - page = __rmqueue_fallback(zone, order, migratetype, 2311 - alloc_flags); 2311 + if (page) { 2312 + *mode = RMQUEUE_CMA; 2313 + return page; 2314 + } 2315 + } 2316 + fallthrough; 2317 + case RMQUEUE_CLAIM: 2318 + page = __rmqueue_claim(zone, order, migratetype, alloc_flags); 2319 + if (page) { 2320 + /* Replenished preferred freelist, back to normal mode. */ 2321 + *mode = RMQUEUE_NORMAL; 2322 + return page; 2323 + } 2324 + fallthrough; 2325 + case RMQUEUE_STEAL: 2326 + if (!(alloc_flags & ALLOC_NOFRAGMENT)) { 2327 + page = __rmqueue_steal(zone, order, migratetype); 2328 + if (page) { 2329 + *mode = RMQUEUE_STEAL; 2330 + return page; 2331 + } 2332 + } 2312 2333 } 2313 - return page; 2334 + return NULL; 2314 2335 } 2315 2336 2316 2337 /* ··· 2354 2311 unsigned long count, struct list_head *list, 2355 2312 int migratetype, unsigned int alloc_flags) 2356 2313 { 2314 + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; 2357 2315 unsigned long flags; 2358 2316 int i; 2359 2317 2360 - if (!spin_trylock_irqsave(&zone->lock, flags)) { 2361 - if (unlikely(alloc_flags & ALLOC_TRYLOCK)) 2318 + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) { 2319 + if (!spin_trylock_irqsave(&zone->lock, flags)) 2362 2320 return 0; 2321 + } else { 2363 2322 spin_lock_irqsave(&zone->lock, flags); 2364 2323 } 2365 2324 for (i = 0; i < count; ++i) { 2366 2325 struct page *page = __rmqueue(zone, order, migratetype, 2367 - alloc_flags); 2326 + alloc_flags, &rmqm); 2368 2327 if (unlikely(page == NULL)) 2369 2328 break; 2370 2329 ··· 2982 2937 2983 2938 do { 2984 2939 page = NULL; 2985 - if (!spin_trylock_irqsave(&zone->lock, flags)) { 2986 - if (unlikely(alloc_flags & ALLOC_TRYLOCK)) 2940 + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) { 2941 + if (!spin_trylock_irqsave(&zone->lock, flags)) 2987 2942 return NULL; 2943 + } else { 2988 2944 spin_lock_irqsave(&zone->lock, flags); 2989 2945 } 2990 2946 if (alloc_flags & ALLOC_HIGHATOMIC) 2991 2947 page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); 2992 2948 if (!page) { 2993 - page = __rmqueue(zone, order, migratetype, alloc_flags); 2949 + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; 2950 + 2951 + page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm); 2994 2952 2995 2953 /* 2996 2954 * If the allocation fails, allow OOM handling and

+11 -2

mm/userfaultfd.c

··· 1902 1902 unsigned long end) 1903 1903 { 1904 1904 struct vm_area_struct *ret; 1905 + bool give_up_on_oom = false; 1906 + 1907 + /* 1908 + * If we are modifying only and not splitting, just give up on the merge 1909 + * if OOM prevents us from merging successfully. 1910 + */ 1911 + if (start == vma->vm_start && end == vma->vm_end) 1912 + give_up_on_oom = true; 1905 1913 1906 1914 /* Reset ptes for the whole vma range if wr-protected */ 1907 1915 if (userfaultfd_wp(vma)) ··· 1917 1909 1918 1910 ret = vma_modify_flags_uffd(vmi, prev, vma, start, end, 1919 1911 vma->vm_flags & ~__VM_UFFD_FLAGS, 1920 - NULL_VM_UFFD_CTX); 1912 + NULL_VM_UFFD_CTX, give_up_on_oom); 1921 1913 1922 1914 /* 1923 1915 * In the vma_merge() successful mprotect-like case 8: ··· 1968 1960 new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags; 1969 1961 vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end, 1970 1962 new_flags, 1971 - (struct vm_userfaultfd_ctx){ctx}); 1963 + (struct vm_userfaultfd_ctx){ctx}, 1964 + /* give_up_on_oom = */false); 1972 1965 if (IS_ERR(vma)) 1973 1966 return PTR_ERR(vma); 1974 1967

+47 -4

mm/vma.c

··· 666 666 /* 667 667 * Actually perform the VMA merge operation. 668 668 * 669 + * IMPORTANT: We guarantee that, should vmg->give_up_on_oom is set, to not 670 + * modify any VMAs or cause inconsistent state should an OOM condition arise. 671 + * 669 672 * Returns 0 on success, or an error value on failure. 670 673 */ 671 674 static int commit_merge(struct vma_merge_struct *vmg) ··· 688 685 689 686 init_multi_vma_prep(&vp, vma, vmg); 690 687 688 + /* 689 + * If vmg->give_up_on_oom is set, we're safe, because we don't actually 690 + * manipulate any VMAs until we succeed at preallocation. 691 + * 692 + * Past this point, we will not return an error. 693 + */ 691 694 if (vma_iter_prealloc(vmg->vmi, vma)) 692 695 return -ENOMEM; 693 696 ··· 924 915 if (anon_dup) 925 916 unlink_anon_vmas(anon_dup); 926 917 927 - vmg->state = VMA_MERGE_ERROR_NOMEM; 918 + /* 919 + * We've cleaned up any cloned anon_vma's, no VMAs have been 920 + * modified, no harm no foul if the user requests that we not 921 + * report this and just give up, leaving the VMAs unmerged. 922 + */ 923 + if (!vmg->give_up_on_oom) 924 + vmg->state = VMA_MERGE_ERROR_NOMEM; 928 925 return NULL; 929 926 } 930 927 ··· 941 926 abort: 942 927 vma_iter_set(vmg->vmi, start); 943 928 vma_iter_load(vmg->vmi); 944 - vmg->state = VMA_MERGE_ERROR_NOMEM; 929 + 930 + /* 931 + * This means we have failed to clone anon_vma's correctly, but no 932 + * actual changes to VMAs have occurred, so no harm no foul - if the 933 + * user doesn't want this reported and instead just wants to give up on 934 + * the merge, allow it. 935 + */ 936 + if (!vmg->give_up_on_oom) 937 + vmg->state = VMA_MERGE_ERROR_NOMEM; 945 938 return NULL; 946 939 } 947 940 ··· 1091 1068 /* This should already have been checked by this point. */ 1092 1069 VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg); 1093 1070 vma_start_write(next); 1071 + /* 1072 + * In this case we don't report OOM, so vmg->give_up_on_mm is 1073 + * safe. 1074 + */ 1094 1075 ret = dup_anon_vma(middle, next, &anon_dup); 1095 1076 if (ret) 1096 1077 return ret; ··· 1117 1090 return 0; 1118 1091 1119 1092 nomem: 1120 - vmg->state = VMA_MERGE_ERROR_NOMEM; 1121 1093 if (anon_dup) 1122 1094 unlink_anon_vmas(anon_dup); 1095 + /* 1096 + * If the user requests that we just give upon OOM, we are safe to do so 1097 + * here, as commit merge provides this contract to us. Nothing has been 1098 + * changed - no harm no foul, just don't report it. 1099 + */ 1100 + if (!vmg->give_up_on_oom) 1101 + vmg->state = VMA_MERGE_ERROR_NOMEM; 1123 1102 return -ENOMEM; 1124 1103 } 1125 1104 ··· 1567 1534 if (vmg_nomem(vmg)) 1568 1535 return ERR_PTR(-ENOMEM); 1569 1536 1537 + /* 1538 + * Split can fail for reasons other than OOM, so if the user requests 1539 + * this it's probably a mistake. 1540 + */ 1541 + VM_WARN_ON(vmg->give_up_on_oom && 1542 + (vma->vm_start != start || vma->vm_end != end)); 1543 + 1570 1544 /* Split any preceding portion of the VMA. */ 1571 1545 if (vma->vm_start < start) { 1572 1546 int err = split_vma(vmg->vmi, vma, start, 1); ··· 1642 1602 struct vm_area_struct *vma, 1643 1603 unsigned long start, unsigned long end, 1644 1604 unsigned long new_flags, 1645 - struct vm_userfaultfd_ctx new_ctx) 1605 + struct vm_userfaultfd_ctx new_ctx, 1606 + bool give_up_on_oom) 1646 1607 { 1647 1608 VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); 1648 1609 1649 1610 vmg.flags = new_flags; 1650 1611 vmg.uffd_ctx = new_ctx; 1612 + if (give_up_on_oom) 1613 + vmg.give_up_on_oom = true; 1651 1614 1652 1615 return vma_modify(&vmg); 1653 1616 }

+8 -1

mm/vma.h

··· 114 114 */ 115 115 bool just_expand :1; 116 116 117 + /* 118 + * If a merge is possible, but an OOM error occurs, give up and don't 119 + * execute the merge, returning NULL. 120 + */ 121 + bool give_up_on_oom :1; 122 + 117 123 /* Internal flags set during merge process: */ 118 124 119 125 /* ··· 261 255 struct vm_area_struct *vma, 262 256 unsigned long start, unsigned long end, 263 257 unsigned long new_flags, 264 - struct vm_userfaultfd_ctx new_ctx); 258 + struct vm_userfaultfd_ctx new_ctx, 259 + bool give_up_on_oom); 265 260 266 261 __must_check struct vm_area_struct 267 262 *vma_merge_new_range(struct vma_merge_struct *vmg);

+1

samples/livepatch/livepatch-callbacks-busymod.c

··· 56 56 57 57 module_init(livepatch_callbacks_mod_init); 58 58 module_exit(livepatch_callbacks_mod_exit); 59 + MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); 59 60 MODULE_LICENSE("GPL");

+1

samples/livepatch/livepatch-callbacks-demo.c

··· 192 192 193 193 module_init(livepatch_callbacks_demo_init); 194 194 module_exit(livepatch_callbacks_demo_exit); 195 + MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks"); 195 196 MODULE_LICENSE("GPL"); 196 197 MODULE_INFO(livepatch, "Y");

+1

samples/livepatch/livepatch-callbacks-mod.c

··· 38 38 39 39 module_init(livepatch_callbacks_mod_init); 40 40 module_exit(livepatch_callbacks_mod_exit); 41 + MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); 41 42 MODULE_LICENSE("GPL");

+1

samples/livepatch/livepatch-sample.c

··· 66 66 67 67 module_init(livepatch_init); 68 68 module_exit(livepatch_exit); 69 + MODULE_DESCRIPTION("Kernel Live Patching Sample Module"); 69 70 MODULE_LICENSE("GPL"); 70 71 MODULE_INFO(livepatch, "Y");

+1

samples/livepatch/livepatch-shadow-fix1.c

··· 168 168 169 169 module_init(livepatch_shadow_fix1_init); 170 170 module_exit(livepatch_shadow_fix1_exit); 171 + MODULE_DESCRIPTION("Live patching demo for shadow variables"); 171 172 MODULE_LICENSE("GPL"); 172 173 MODULE_INFO(livepatch, "Y");

+1

samples/livepatch/livepatch-shadow-fix2.c

··· 128 128 129 129 module_init(livepatch_shadow_fix2_init); 130 130 module_exit(livepatch_shadow_fix2_exit); 131 + MODULE_DESCRIPTION("Live patching demo for shadow variables"); 131 132 MODULE_LICENSE("GPL"); 132 133 MODULE_INFO(livepatch, "Y");

+2 -14

tools/testing/selftests/mincore/mincore_selftest.c

··· 283 283 284 284 /* 285 285 * Test mincore() behavior on a page backed by a tmpfs file. This test 286 - * performs the same steps as the previous one. However, we don't expect 287 - * any readahead in this case. 286 + * performs the same steps as the previous one. 288 287 */ 289 288 TEST(check_tmpfs_mmap) 290 289 { ··· 294 295 int page_size; 295 296 int fd; 296 297 int i; 297 - int ra_pages = 0; 298 298 299 299 page_size = sysconf(_SC_PAGESIZE); 300 300 vec_size = FILE_SIZE / page_size; ··· 336 338 } 337 339 338 340 /* 339 - * Touch a page in the middle of the mapping. We expect only 340 - * that page to be fetched into memory. 341 + * Touch a page in the middle of the mapping. 341 342 */ 342 343 addr[FILE_SIZE / 2] = 1; 343 344 retval = mincore(addr, FILE_SIZE, vec); 344 345 ASSERT_EQ(0, retval); 345 346 ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) { 346 347 TH_LOG("Page not found in memory after use"); 347 - } 348 - 349 - i = FILE_SIZE / 2 / page_size + 1; 350 - while (i < vec_size && vec[i]) { 351 - ra_pages++; 352 - i++; 353 - } 354 - ASSERT_EQ(ra_pages, 0) { 355 - TH_LOG("Read-ahead pages found in memory"); 356 348 } 357 349 358 350 munmap(addr, FILE_SIZE);

+2 -2

tools/testing/selftests/mm/charge_reserved_hugetlb.sh

··· 29 29 if [[ $cgroup2 ]]; then 30 30 cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') 31 31 if [[ -z "$cgroup_path" ]]; then 32 - cgroup_path=/dev/cgroup/memory 32 + cgroup_path=$(mktemp -d) 33 33 mount -t cgroup2 none $cgroup_path 34 34 do_umount=1 35 35 fi ··· 37 37 else 38 38 cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') 39 39 if [[ -z "$cgroup_path" ]]; then 40 - cgroup_path=/dev/cgroup/memory 40 + cgroup_path=$(mktemp -d) 41 41 mount -t cgroup memory,hugetlb $cgroup_path 42 42 do_umount=1 43 43 fi

+1 -1

tools/testing/selftests/mm/cow.c

··· 293 293 .iov_base = mem, 294 294 .iov_len = size, 295 295 }; 296 - ssize_t cur, total, transferred; 296 + ssize_t cur, total, transferred = 0; 297 297 struct comm_pipes comm_pipes; 298 298 char *old, *new; 299 299 int ret, fds[2];

+1 -1

tools/testing/selftests/mm/hugetlb_reparenting_test.sh

··· 23 23 if [[ $cgroup2 ]]; then 24 24 CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') 25 25 if [[ -z "$CGROUP_ROOT" ]]; then 26 - CGROUP_ROOT=/dev/cgroup/memory 26 + CGROUP_ROOT=$(mktemp -d) 27 27 mount -t cgroup2 none $CGROUP_ROOT 28 28 do_umount=1 29 29 fi

+2 -2

tools/testing/shared/linux.c

··· 150 150 void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) 151 151 { 152 152 if (kmalloc_verbose) 153 - pr_debug("Bulk free %p[0-%lu]\n", list, size - 1); 153 + pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); 154 154 155 155 pthread_mutex_lock(&cachep->lock); 156 156 for (int i = 0; i < size; i++) ··· 168 168 size_t i; 169 169 170 170 if (kmalloc_verbose) 171 - pr_debug("Bulk alloc %lu\n", size); 171 + pr_debug("Bulk alloc %zu\n", size); 172 172 173 173 pthread_mutex_lock(&cachep->lock); 174 174 if (cachep->nr_objs >= size) {

+2

tools/testing/shared/linux/cleanup.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #include "../../../../include/linux/cleanup.h"

Configure Feed

Configure Feed