Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Revert "mm: init_mlocked_on_free_v3"

There was insufficient review and no agreement that this is the right
approach.

There are serious flaws with the implementation that make processes using
mlock() not even work with simple fork() [1] and we get reliable crashes
when rebooting.

Further, simply because we might be unmapping a single PTE of a large
mlocked folio, we shouldn't zero out the whole folio.

... especially because the code can also *corrupt* urelated memory because
kernel_init_pages(page, folio_nr_pages(folio));

Could end up writing outside of the actual folio if we work with a tail
page.

Let's revert it. Once there is agreement that this is the right approach,
the issues were fixed and there was reasonable review and proper testing,
we can consider it again.

[1] https://lkml.kernel.org/r/4da9da2f-73e4-45fd-b62f-a8a513314057@redhat.com

Link: https://lkml.kernel.org/r/20240605091710.38961-1-david@redhat.com
Fixes: ba42b524a040 ("mm: init_mlocked_on_free_v3")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: David Wang <00107082@163.com>
Closes: https://lore.kernel.org/lkml/20240528151340.4282-1-00107082@163.com/
Reported-by: Lance Yang <ioworker0@gmail.com>
Closes: https://lkml.kernel.org/r/20240601140917.43562-1-ioworker0@gmail.com
Acked-by: Lance Yang <ioworker0@gmail.com>
Cc: York Jasper Niebuhr <yjnworkstation@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

David Hildenbrand and committed by
Andrew Morton
384a746b 8bb592c2

+9 -73
-6
Documentation/admin-guide/kernel-parameters.txt
··· 2192 2192 Format: 0 | 1 2193 2193 Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON. 2194 2194 2195 - init_mlocked_on_free= [MM] Fill freed userspace memory with zeroes if 2196 - it was mlock'ed and not explicitly munlock'ed 2197 - afterwards. 2198 - Format: 0 | 1 2199 - Default set by CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON 2200 - 2201 2195 init_pkru= [X86] Specify the default memory protection keys rights 2202 2196 register contents for all processes. 0x55555554 by 2203 2197 default (disallow access to all but pkey 0). Can
+1 -8
include/linux/mm.h
··· 3776 3776 static inline bool want_init_on_free(void) 3777 3777 { 3778 3778 return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, 3779 - &init_on_free); 3780 - } 3781 - 3782 - DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); 3783 - static inline bool want_init_mlocked_on_free(void) 3784 - { 3785 - return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, 3786 - &init_mlocked_on_free); 3779 + &init_on_free); 3787 3780 } 3788 3781 3789 3782 extern bool _debug_pagealloc_enabled_early;
-1
mm/internal.h
··· 588 588 extern void memblock_free_pages(struct page *page, unsigned long pfn, 589 589 unsigned int order); 590 590 extern void __free_pages_core(struct page *page, unsigned int order); 591 - extern void kernel_init_pages(struct page *page, int numpages); 592 591 593 592 /* 594 593 * This will have no effect, other than possibly generating a warning, if the
-6
mm/memory.c
··· 1507 1507 if (unlikely(folio_mapcount(folio) < 0)) 1508 1508 print_bad_pte(vma, addr, ptent, page); 1509 1509 } 1510 - 1511 - if (want_init_mlocked_on_free() && folio_test_mlocked(folio) && 1512 - !delay_rmap && folio_test_anon(folio)) { 1513 - kernel_init_pages(page, folio_nr_pages(folio)); 1514 - } 1515 - 1516 1510 if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) { 1517 1511 *force_flush = true; 1518 1512 *force_break = true;
+7 -36
mm/mm_init.c
··· 2523 2523 DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); 2524 2524 EXPORT_SYMBOL(init_on_free); 2525 2525 2526 - DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); 2527 - EXPORT_SYMBOL(init_mlocked_on_free); 2528 - 2529 2526 static bool _init_on_alloc_enabled_early __read_mostly 2530 2527 = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON); 2531 2528 static int __init early_init_on_alloc(char *buf) ··· 2539 2542 return kstrtobool(buf, &_init_on_free_enabled_early); 2540 2543 } 2541 2544 early_param("init_on_free", early_init_on_free); 2542 - 2543 - static bool _init_mlocked_on_free_enabled_early __read_mostly 2544 - = IS_ENABLED(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON); 2545 - static int __init early_init_mlocked_on_free(char *buf) 2546 - { 2547 - return kstrtobool(buf, &_init_mlocked_on_free_enabled_early); 2548 - } 2549 - early_param("init_mlocked_on_free", early_init_mlocked_on_free); 2550 2545 2551 2546 DEFINE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); 2552 2547 ··· 2567 2578 } 2568 2579 #endif 2569 2580 2570 - if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early || 2571 - _init_mlocked_on_free_enabled_early) && 2581 + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && 2572 2582 page_poisoning_requested) { 2573 2583 pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " 2574 - "will take precedence over init_on_alloc, init_on_free " 2575 - "and init_mlocked_on_free\n"); 2584 + "will take precedence over init_on_alloc and init_on_free\n"); 2576 2585 _init_on_alloc_enabled_early = false; 2577 2586 _init_on_free_enabled_early = false; 2578 - _init_mlocked_on_free_enabled_early = false; 2579 - } 2580 - 2581 - if (_init_mlocked_on_free_enabled_early && _init_on_free_enabled_early) { 2582 - pr_info("mem auto-init: init_on_free is on, " 2583 - "will take precedence over init_mlocked_on_free\n"); 2584 - _init_mlocked_on_free_enabled_early = false; 2585 2587 } 2586 2588 2587 2589 if (_init_on_alloc_enabled_early) { ··· 2589 2609 static_branch_disable(&init_on_free); 2590 2610 } 2591 2611 2592 - if (_init_mlocked_on_free_enabled_early) { 2593 - want_check_pages = true; 2594 - static_branch_enable(&init_mlocked_on_free); 2595 - } else { 2596 - static_branch_disable(&init_mlocked_on_free); 2597 - } 2598 - 2599 - if (IS_ENABLED(CONFIG_KMSAN) && (_init_on_alloc_enabled_early || 2600 - _init_on_free_enabled_early || _init_mlocked_on_free_enabled_early)) 2601 - pr_info("mem auto-init: please make sure init_on_alloc, init_on_free and " 2602 - "init_mlocked_on_free are disabled when running KMSAN\n"); 2612 + if (IS_ENABLED(CONFIG_KMSAN) && 2613 + (_init_on_alloc_enabled_early || _init_on_free_enabled_early)) 2614 + pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n"); 2603 2615 2604 2616 #ifdef CONFIG_DEBUG_PAGEALLOC 2605 2617 if (debug_pagealloc_enabled()) { ··· 2630 2658 else 2631 2659 stack = "off"; 2632 2660 2633 - pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s, mlocked free:%s\n", 2661 + pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n", 2634 2662 stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off", 2635 - want_init_on_free() ? "on" : "off", 2636 - want_init_mlocked_on_free() ? "on" : "off"); 2663 + want_init_on_free() ? "on" : "off"); 2637 2664 if (want_init_on_free()) 2638 2665 pr_info("mem auto-init: clearing system memory may take some time...\n"); 2639 2666 }
+1 -1
mm/page_alloc.c
··· 1016 1016 return page_kasan_tag(page) == KASAN_TAG_KERNEL; 1017 1017 } 1018 1018 1019 - void kernel_init_pages(struct page *page, int numpages) 1019 + static void kernel_init_pages(struct page *page, int numpages) 1020 1020 { 1021 1021 int i; 1022 1022
-15
security/Kconfig.hardening
··· 255 255 touching "cold" memory areas. Most cases see 3-5% impact. Some 256 256 synthetic workloads have measured as high as 8%. 257 257 258 - config INIT_MLOCKED_ON_FREE_DEFAULT_ON 259 - bool "Enable mlocked memory zeroing on free" 260 - depends on !KMSAN 261 - help 262 - This config has the effect of setting "init_mlocked_on_free=1" 263 - on the kernel command line. If it is enabled, all mlocked process 264 - memory is zeroed when freed. This restriction to mlocked memory 265 - improves performance over "init_on_free" but can still be used to 266 - protect confidential data like key material from content exposures 267 - to other processes, as well as live forensics and cold boot attacks. 268 - Any non-mlocked memory is not cleared before it is reassigned. This 269 - configuration can be overwritten by setting "init_mlocked_on_free=0" 270 - on the command line. The "init_on_free" boot option takes 271 - precedence over "init_mlocked_on_free". 272 - 273 258 config CC_HAS_ZERO_CALL_USED_REGS 274 259 def_bool $(cc-option,-fzero-call-used-regs=used-gpr) 275 260 # https://github.com/ClangBuiltLinux/linux/issues/1766