Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/slab: use prandom if !allow_spin

When CONFIG_SLAB_FREELIST_RANDOM is enabled and get_random_u32()
is called in an NMI context, lockdep complains because it acquires
a local_lock:

================================
WARNING: inconsistent lock state
6.19.0-rc5-slab-for-next+ #325 Tainted: G N
--------------------------------
inconsistent {INITIAL USE} -> {IN-NMI} usage.
kunit_try_catch/8312 [HC2[2]:SC0[0]:HE0:SE1] takes:
ffff88a02ec49cc0 (batched_entropy_u32.lock){-.-.}-{3:3}, at: get_random_u32+0x7f/0x2e0
{INITIAL USE} state was registered at:
lock_acquire+0xd9/0x2f0
get_random_u32+0x93/0x2e0
__get_random_u32_below+0x17/0x70
cache_random_seq_create+0x121/0x1c0
init_cache_random_seq+0x5d/0x110
do_kmem_cache_create+0x1e0/0xa30
__kmem_cache_create_args+0x4ec/0x830
create_kmalloc_caches+0xe6/0x130
kmem_cache_init+0x1b1/0x660
mm_core_init+0x1d8/0x4b0
start_kernel+0x620/0xcd0
x86_64_start_reservations+0x18/0x30
x86_64_start_kernel+0xf3/0x140
common_startup_64+0x13e/0x148
irq event stamp: 76
hardirqs last enabled at (75): [<ffffffff8298b77a>] exc_nmi+0x11a/0x240
hardirqs last disabled at (76): [<ffffffff8298b991>] sysvec_irq_work+0x11/0x110
softirqs last enabled at (0): [<ffffffff813b2dda>] copy_process+0xc7a/0x2350
softirqs last disabled at (0): [<0000000000000000>] 0x0

other info that might help us debug this:
Possible unsafe locking scenario:

CPU0
----
lock(batched_entropy_u32.lock);
<Interrupt>
lock(batched_entropy_u32.lock);

*** DEADLOCK ***

Fix this by using pseudo-random number generator if !allow_spin.
This means kmalloc_nolock() users won't get truly random numbers,
but there is not much we can do about it.

Note that an NMI handler might interrupt prandom_u32_state() and
change the random state, but that's safe.

Link: https://lore.kernel.org/all/0c33bdee-6de8-4d9f-92ca-4f72c1b6fb9f@suse.cz
Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().")
Cc: stable@vger.kernel.org
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Link: https://patch.msgid.link/20260210081900.329447-3-harry.yoo@oracle.com
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

authored by

Harry Yoo and committed by
Vlastimil Babka
a1e244a9 144080a5

+24 -4
+24 -4
mm/slub.c
··· 43 43 #include <linux/prefetch.h> 44 44 #include <linux/memcontrol.h> 45 45 #include <linux/random.h> 46 + #include <linux/prandom.h> 46 47 #include <kunit/test.h> 47 48 #include <kunit/test-bug.h> 48 49 #include <linux/sort.h> ··· 3312 3311 return (char *)start + idx; 3313 3312 } 3314 3313 3314 + static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state); 3315 + 3315 3316 /* Shuffle the single linked freelist based on a random pre-computed sequence */ 3316 - static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) 3317 + static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, 3318 + bool allow_spin) 3317 3319 { 3318 3320 void *start; 3319 3321 void *cur; ··· 3327 3323 return false; 3328 3324 3329 3325 freelist_count = oo_objects(s->oo); 3330 - pos = get_random_u32_below(freelist_count); 3326 + if (allow_spin) { 3327 + pos = get_random_u32_below(freelist_count); 3328 + } else { 3329 + struct rnd_state *state; 3330 + 3331 + /* 3332 + * An interrupt or NMI handler might interrupt and change 3333 + * the state in the middle, but that's safe. 3334 + */ 3335 + state = &get_cpu_var(slab_rnd_state); 3336 + pos = prandom_u32_state(state) % freelist_count; 3337 + put_cpu_var(slab_rnd_state); 3338 + } 3331 3339 3332 3340 page_limit = slab->objects * s->size; 3333 3341 start = fixup_red_left(s, slab_address(slab)); ··· 3366 3350 return 0; 3367 3351 } 3368 3352 static inline void init_freelist_randomization(void) { } 3369 - static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) 3353 + static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, 3354 + bool allow_spin) 3370 3355 { 3371 3356 return false; 3372 3357 } ··· 3458 3441 alloc_slab_obj_exts_early(s, slab); 3459 3442 account_slab(slab, oo_order(oo), s, flags); 3460 3443 3461 - shuffle = shuffle_freelist(s, slab); 3444 + shuffle = shuffle_freelist(s, slab, allow_spin); 3462 3445 3463 3446 if (!shuffle) { 3464 3447 start = fixup_red_left(s, start); ··· 8358 8341 { 8359 8342 flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0); 8360 8343 WARN_ON(!flushwq); 8344 + #ifdef CONFIG_SLAB_FREELIST_RANDOM 8345 + prandom_init_once(&slab_rnd_state); 8346 + #endif 8361 8347 } 8362 8348 8363 8349 int do_kmem_cache_create(struct kmem_cache *s, const char *name,