Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull more slab updates from Vlastimil Babka:

- Two stable fixes for kmalloc_nolock() usage from NMI context (Harry
Yoo)

- Allow kmalloc_nolock() allocations to be freed with kfree() and thus
also kfree_rcu() and simplify slabobj_ext handling - we no longer
need to track how it was allocated to use the matching freeing
function (Harry Yoo)

* tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
mm/slab: drop the OBJEXTS_NOSPIN_ALLOC flag from enum objext_flags
mm/slab: allow freeing kmalloc_nolock()'d objects using kfree[_rcu]()
mm/slab: use prandom if !allow_spin
mm/slab: do not access current->mems_allowed_seq if !allow_spin

+76 -33
+1 -2
include/linux/memcontrol.h
··· 359 359 * MEMCG_DATA_OBJEXTS. 360 360 */ 361 361 OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL, 362 - /* slabobj_ext vector allocated with kmalloc_nolock() */ 363 - OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG, 362 + __OBJEXTS_FLAG_UNUSED = __FIRST_OBJEXT_FLAG, 364 363 /* the next bit after the last actual flag */ 365 364 __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), 366 365 };
+2 -2
include/linux/rcupdate.h
··· 1074 1074 * either fall back to use of call_rcu() or rearrange the structure to 1075 1075 * position the rcu_head structure into the first 4096 bytes. 1076 1076 * 1077 - * The object to be freed can be allocated either by kmalloc() or 1078 - * kmem_cache_alloc(). 1077 + * The object to be freed can be allocated either by kmalloc(), 1078 + * kmalloc_nolock(), or kmem_cache_alloc(). 1079 1079 * 1080 1080 * Note that the allowable offset might decrease in the future. 1081 1081 *
+10 -12
mm/kmemleak.c
··· 837 837 struct kmemleak_object *object; 838 838 839 839 object = find_and_remove_object(ptr, 0, objflags); 840 - if (!object) { 841 - #ifdef DEBUG 842 - kmemleak_warn("Freeing unknown object at 0x%08lx\n", 843 - ptr); 844 - #endif 840 + if (!object) 841 + /* 842 + * kmalloc_nolock() -> kfree() calls kmemleak_free() 843 + * without kmemleak_alloc(). 844 + */ 845 845 return; 846 - } 847 846 __delete_object(object); 848 847 } 849 848 ··· 925 926 struct kmemleak_object *object; 926 927 927 928 object = __find_and_get_object(ptr, 0, objflags); 928 - if (!object) { 929 - kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n", 930 - ptr, 931 - (color == KMEMLEAK_GREY) ? "Grey" : 932 - (color == KMEMLEAK_BLACK) ? "Black" : "Unknown"); 929 + if (!object) 930 + /* 931 + * kmalloc_nolock() -> kfree_rcu() calls kmemleak_ignore() 932 + * without kmemleak_alloc(). 933 + */ 933 934 return; 934 - } 935 935 paint_it(object, color); 936 936 put_object(object); 937 937 }
+63 -17
mm/slub.c
··· 43 43 #include <linux/prefetch.h> 44 44 #include <linux/memcontrol.h> 45 45 #include <linux/random.h> 46 + #include <linux/prandom.h> 46 47 #include <kunit/test.h> 47 48 #include <kunit/test-bug.h> 48 49 #include <linux/sort.h> ··· 2190 2189 virt_to_slab(vec)->slab_cache == s); 2191 2190 2192 2191 new_exts = (unsigned long)vec; 2193 - if (unlikely(!allow_spin)) 2194 - new_exts |= OBJEXTS_NOSPIN_ALLOC; 2195 2192 #ifdef CONFIG_MEMCG 2196 2193 new_exts |= MEMCG_DATA_OBJEXTS; 2197 2194 #endif ··· 2227 2228 return 0; 2228 2229 } 2229 2230 2230 - static inline void free_slab_obj_exts(struct slab *slab) 2231 + static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin) 2231 2232 { 2232 2233 struct slabobj_ext *obj_exts; 2233 2234 ··· 2255 2256 * the extension for obj_exts is expected to be NULL. 2256 2257 */ 2257 2258 mark_objexts_empty(obj_exts); 2258 - if (unlikely(READ_ONCE(slab->obj_exts) & OBJEXTS_NOSPIN_ALLOC)) 2259 - kfree_nolock(obj_exts); 2260 - else 2259 + if (allow_spin) 2261 2260 kfree(obj_exts); 2261 + else 2262 + kfree_nolock(obj_exts); 2262 2263 slab->obj_exts = 0; 2263 2264 } 2264 2265 ··· 2322 2323 return 0; 2323 2324 } 2324 2325 2325 - static inline void free_slab_obj_exts(struct slab *slab) 2326 + static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin) 2326 2327 { 2327 2328 } 2328 2329 ··· 2583 2584 * Returns true if freeing of the object can proceed, false if its reuse 2584 2585 * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned 2585 2586 * to KFENCE. 2587 + * 2588 + * For objects allocated via kmalloc_nolock(), only a subset of alloc hooks 2589 + * are invoked, so some free hooks must handle asymmetric hook calls. 2590 + * 2591 + * Alloc hooks called for kmalloc_nolock(): 2592 + * - kmsan_slab_alloc() 2593 + * - kasan_slab_alloc() 2594 + * - memcg_slab_post_alloc_hook() 2595 + * - alloc_tagging_slab_alloc_hook() 2596 + * 2597 + * Free hooks that must handle missing corresponding alloc hooks: 2598 + * - kmemleak_free_recursive() 2599 + * - kfence_free() 2600 + * 2601 + * Free hooks that have no alloc hook counterpart, and thus safe to call: 2602 + * - debug_check_no_locks_freed() 2603 + * - debug_check_no_obj_freed() 2604 + * - __kcsan_check_access() 2586 2605 */ 2587 2606 static __always_inline 2588 2607 bool slab_free_hook(struct kmem_cache *s, void *x, bool init, ··· 3328 3311 return (char *)start + idx; 3329 3312 } 3330 3313 3314 + static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state); 3315 + 3331 3316 /* Shuffle the single linked freelist based on a random pre-computed sequence */ 3332 - static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) 3317 + static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, 3318 + bool allow_spin) 3333 3319 { 3334 3320 void *start; 3335 3321 void *cur; ··· 3343 3323 return false; 3344 3324 3345 3325 freelist_count = oo_objects(s->oo); 3346 - pos = get_random_u32_below(freelist_count); 3326 + if (allow_spin) { 3327 + pos = get_random_u32_below(freelist_count); 3328 + } else { 3329 + struct rnd_state *state; 3330 + 3331 + /* 3332 + * An interrupt or NMI handler might interrupt and change 3333 + * the state in the middle, but that's safe. 3334 + */ 3335 + state = &get_cpu_var(slab_rnd_state); 3336 + pos = prandom_u32_state(state) % freelist_count; 3337 + put_cpu_var(slab_rnd_state); 3338 + } 3347 3339 3348 3340 page_limit = slab->objects * s->size; 3349 3341 start = fixup_red_left(s, slab_address(slab)); ··· 3382 3350 return 0; 3383 3351 } 3384 3352 static inline void init_freelist_randomization(void) { } 3385 - static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) 3353 + static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, 3354 + bool allow_spin) 3386 3355 { 3387 3356 return false; 3388 3357 } ··· 3402 3369 } 3403 3370 3404 3371 static __always_inline void unaccount_slab(struct slab *slab, int order, 3405 - struct kmem_cache *s) 3372 + struct kmem_cache *s, bool allow_spin) 3406 3373 { 3407 3374 /* 3408 3375 * The slab object extensions should now be freed regardless of 3409 3376 * whether mem_alloc_profiling_enabled() or not because profiling 3410 3377 * might have been disabled after slab->obj_exts got allocated. 3411 3378 */ 3412 - free_slab_obj_exts(slab); 3379 + free_slab_obj_exts(slab, allow_spin); 3413 3380 3414 3381 mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), 3415 3382 -(PAGE_SIZE << order)); ··· 3474 3441 alloc_slab_obj_exts_early(s, slab); 3475 3442 account_slab(slab, oo_order(oo), s, flags); 3476 3443 3477 - shuffle = shuffle_freelist(s, slab); 3444 + shuffle = shuffle_freelist(s, slab, allow_spin); 3478 3445 3479 3446 if (!shuffle) { 3480 3447 start = fixup_red_left(s, start); ··· 3513 3480 page->mapping = NULL; 3514 3481 __ClearPageSlab(page); 3515 3482 mm_account_reclaimed_pages(pages); 3516 - unaccount_slab(slab, order, s); 3483 + unaccount_slab(slab, order, s, allow_spin); 3517 3484 if (allow_spin) 3518 3485 free_frozen_pages(page, order); 3519 3486 else ··· 3824 3791 struct zone *zone; 3825 3792 enum zone_type highest_zoneidx = gfp_zone(pc->flags); 3826 3793 unsigned int cpuset_mems_cookie; 3794 + bool allow_spin = gfpflags_allow_spinning(pc->flags); 3827 3795 3828 3796 /* 3829 3797 * The defrag ratio allows a configuration of the tradeoffs between ··· 3849 3815 return NULL; 3850 3816 3851 3817 do { 3852 - cpuset_mems_cookie = read_mems_allowed_begin(); 3818 + /* 3819 + * read_mems_allowed_begin() accesses current->mems_allowed_seq, 3820 + * a seqcount_spinlock_t that is not NMI-safe. Do not access 3821 + * current->mems_allowed_seq and avoid retry when GFP flags 3822 + * indicate spinning is not allowed. 3823 + */ 3824 + if (allow_spin) 3825 + cpuset_mems_cookie = read_mems_allowed_begin(); 3826 + 3853 3827 zonelist = node_zonelist(mempolicy_slab_node(), pc->flags); 3854 3828 for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { 3855 3829 struct kmem_cache_node *n; ··· 3881 3839 } 3882 3840 } 3883 3841 } 3884 - } while (read_mems_allowed_retry(cpuset_mems_cookie)); 3842 + } while (allow_spin && read_mems_allowed_retry(cpuset_mems_cookie)); 3885 3843 #endif /* CONFIG_NUMA */ 3886 3844 return NULL; 3887 3845 } ··· 6414 6372 6415 6373 /** 6416 6374 * kfree - free previously allocated memory 6417 - * @object: pointer returned by kmalloc() or kmem_cache_alloc() 6375 + * @object: pointer returned by kmalloc(), kmalloc_nolock(), or kmem_cache_alloc() 6418 6376 * 6419 6377 * If @object is NULL, no operation is performed. 6420 6378 */ ··· 6433 6391 page = virt_to_page(object); 6434 6392 slab = page_slab(page); 6435 6393 if (!slab) { 6394 + /* kmalloc_nolock() doesn't support large kmalloc */ 6436 6395 free_large_kmalloc(page, (void *)object); 6437 6396 return; 6438 6397 } ··· 8380 8337 flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU, 8381 8338 0); 8382 8339 WARN_ON(!flushwq); 8340 + #ifdef CONFIG_SLAB_FREELIST_RANDOM 8341 + prandom_init_once(&slab_rnd_state); 8342 + #endif 8383 8343 } 8384 8344 8385 8345 int do_kmem_cache_create(struct kmem_cache *s, const char *name,