Merge tag 'mm-hotfixes-stable-2025-04-02-21-57' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

+25 -26

fs/userfaultfd.c

··· 396 396 goto out; 397 397 398 398 /* 399 - * If it's already released don't get it. This avoids to loop 400 - * in __get_user_pages if userfaultfd_release waits on the 401 - * caller of handle_userfault to release the mmap_lock. 402 - */ 403 - if (unlikely(READ_ONCE(ctx->released))) { 404 - /* 405 - * Don't return VM_FAULT_SIGBUS in this case, so a non 406 - * cooperative manager can close the uffd after the 407 - * last UFFDIO_COPY, without risking to trigger an 408 - * involuntary SIGBUS if the process was starting the 409 - * userfaultfd while the userfaultfd was still armed 410 - * (but after the last UFFDIO_COPY). If the uffd 411 - * wasn't already closed when the userfault reached 412 - * this point, that would normally be solved by 413 - * userfaultfd_must_wait returning 'false'. 414 - * 415 - * If we were to return VM_FAULT_SIGBUS here, the non 416 - * cooperative manager would be instead forced to 417 - * always call UFFDIO_UNREGISTER before it can safely 418 - * close the uffd. 419 - */ 420 - ret = VM_FAULT_NOPAGE; 421 - goto out; 422 - } 423 - 424 - /* 425 399 * Check that we can return VM_FAULT_RETRY. 426 400 * 427 401 * NOTE: it should become possible to return VM_FAULT_RETRY ··· 430 456 ret = VM_FAULT_RETRY; 431 457 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) 432 458 goto out; 459 + 460 + if (unlikely(READ_ONCE(ctx->released))) { 461 + /* 462 + * If a concurrent release is detected, do not return 463 + * VM_FAULT_SIGBUS or VM_FAULT_NOPAGE, but instead always 464 + * return VM_FAULT_RETRY with lock released proactively. 465 + * 466 + * If we were to return VM_FAULT_SIGBUS here, the non 467 + * cooperative manager would be instead forced to 468 + * always call UFFDIO_UNREGISTER before it can safely 469 + * close the uffd, to avoid involuntary SIGBUS triggered. 470 + * 471 + * If we were to return VM_FAULT_NOPAGE, it would work for 472 + * the fault path, in which the lock will be released 473 + * later. However for GUP, faultin_page() does nothing 474 + * special on NOPAGE, so GUP would spin retrying without 475 + * releasing the mmap read lock, causing possible livelock. 476 + * 477 + * Here only VM_FAULT_RETRY would make sure the mmap lock 478 + * be released immediately, so that the thread concurrently 479 + * releasing the userfault would always make progress. 480 + */ 481 + release_fault_lock(vmf); 482 + goto out; 483 + } 433 484 434 485 /* take the reference before dropping the mmap_lock */ 435 486 userfaultfd_ctx_get(ctx);

+37

include/linux/page-flags.h

··· 226 226 } 227 227 return page; 228 228 } 229 + 230 + static __always_inline bool page_count_writable(const struct page *page, int u) 231 + { 232 + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) 233 + return true; 234 + 235 + /* 236 + * The refcount check is ordered before the fake-head check to prevent 237 + * the following race: 238 + * CPU 1 (HVO) CPU 2 (speculative PFN walker) 239 + * 240 + * page_ref_freeze() 241 + * synchronize_rcu() 242 + * rcu_read_lock() 243 + * page_is_fake_head() is false 244 + * vmemmap_remap_pte() 245 + * XXX: struct page[] becomes r/o 246 + * 247 + * page_ref_unfreeze() 248 + * page_ref_count() is not zero 249 + * 250 + * atomic_add_unless(&page->_refcount) 251 + * XXX: try to modify r/o struct page[] 252 + * 253 + * The refcount check also prevents modification attempts to other (r/o) 254 + * tail pages that are not fake heads. 255 + */ 256 + if (atomic_read_acquire(&page->_refcount) == u) 257 + return false; 258 + 259 + return page_fixed_fake_head(page) == page; 260 + } 229 261 #else 230 262 static inline const struct page *page_fixed_fake_head(const struct page *page) 231 263 { 232 264 return page; 265 + } 266 + 267 + static inline bool page_count_writable(const struct page *page, int u) 268 + { 269 + return true; 233 270 } 234 271 #endif 235 272

+1 -1

include/linux/page_ref.h

··· 234 234 235 235 rcu_read_lock(); 236 236 /* avoid writing to the vmemmap area being remapped */ 237 - if (!page_is_fake_head(page) && page_ref_count(page) != u) 237 + if (page_count_writable(page, u)) 238 238 ret = atomic_add_unless(&page->_refcount, nr, u); 239 239 rcu_read_unlock(); 240 240

+1 -1

mm/hugetlb.c

··· 5179 5179 }, 5180 5180 }; 5181 5181 5182 - static void hugetlb_sysctl_init(void) 5182 + static void __init hugetlb_sysctl_init(void) 5183 5183 { 5184 5184 register_sysctl_init("vm", hugetlb_table); 5185 5185 }

+8 -1

mm/page_isolation.c

··· 83 83 unsigned int skip_pages; 84 84 85 85 if (PageHuge(page)) { 86 - if (!hugepage_migration_supported(folio_hstate(folio))) 86 + struct hstate *h; 87 + 88 + /* 89 + * The huge page may be freed so can not 90 + * use folio_hstate() directly. 91 + */ 92 + h = size_to_hstate(folio_size(folio)); 93 + if (h && !hugepage_migration_supported(h)) 87 94 return page; 88 95 } else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) { 89 96 return page;

+22 -8

mm/zswap.c

··· 883 883 { 884 884 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 885 885 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 886 + struct acomp_req *req; 887 + struct crypto_acomp *acomp; 888 + u8 *buffer; 889 + 890 + if (IS_ERR_OR_NULL(acomp_ctx)) 891 + return 0; 886 892 887 893 mutex_lock(&acomp_ctx->mutex); 888 - if (!IS_ERR_OR_NULL(acomp_ctx)) { 889 - if (!IS_ERR_OR_NULL(acomp_ctx->req)) 890 - acomp_request_free(acomp_ctx->req); 891 - acomp_ctx->req = NULL; 892 - if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 893 - crypto_free_acomp(acomp_ctx->acomp); 894 - kfree(acomp_ctx->buffer); 895 - } 894 + req = acomp_ctx->req; 895 + acomp = acomp_ctx->acomp; 896 + buffer = acomp_ctx->buffer; 897 + acomp_ctx->req = NULL; 898 + acomp_ctx->acomp = NULL; 899 + acomp_ctx->buffer = NULL; 896 900 mutex_unlock(&acomp_ctx->mutex); 901 + 902 + /* 903 + * Do the actual freeing after releasing the mutex to avoid subtle 904 + * locking dependencies causing deadlocks. 905 + */ 906 + if (!IS_ERR_OR_NULL(req)) 907 + acomp_request_free(req); 908 + if (!IS_ERR_OR_NULL(acomp)) 909 + crypto_free_acomp(acomp); 910 + kfree(buffer); 897 911 898 912 return 0; 899 913 }

Configure Feed

Configure Feed