Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

slab: introduce percpu sheaves bootstrap

Until now, kmem_cache->cpu_sheaves was !NULL only for caches with
sheaves enabled. Since we want to enable them for almost all caches,
it's suboptimal to test the pointer in the fast paths, so instead
allocate it for all caches in do_kmem_cache_create(). Instead of testing
the cpu_sheaves pointer to recognize caches (yet) without sheaves, test
kmem_cache->sheaf_capacity for being 0, where needed, using a new
cache_has_sheaves() helper.

However, for the fast paths sake we also assume that the main sheaf
always exists (pcs->main is !NULL), and during bootstrap we cannot
allocate sheaves yet.

Solve this by introducing a single static bootstrap_sheaf that's
assigned as pcs->main during bootstrap. It has a size of 0, so during
allocations, the fast path will find it's empty. Since the size of 0
matches sheaf_capacity of 0, the freeing fast paths will find it's
"full". In the slow path handlers, we use cache_has_sheaves() to
recognize that the cache doesn't (yet) have real sheaves, and fall back.
Thus sharing the single bootstrap sheaf like this for multiple caches
and cpus is safe.

Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Hao Li <hao.li@linux.dev>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

+97 -47
+12
mm/slab.h
··· 277 277 struct kmem_cache_node *node[MAX_NUMNODES]; 278 278 }; 279 279 280 + /* 281 + * Every cache has !NULL s->cpu_sheaves but they may point to the 282 + * bootstrap_sheaf temporarily during init, or permanently for the boot caches 283 + * and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY. This 284 + * helper distinguishes whether cache has real non-bootstrap sheaves. 285 + */ 286 + static inline bool cache_has_sheaves(struct kmem_cache *s) 287 + { 288 + /* Test CONFIG_SLUB_TINY for code elimination purposes */ 289 + return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity; 290 + } 291 + 280 292 #if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) 281 293 #define SLAB_SUPPORTS_SYSFS 1 282 294 void sysfs_slab_unlink(struct kmem_cache *s);
+3 -6
mm/slab_common.c
··· 1663 1663 return false; 1664 1664 1665 1665 s = slab->slab_cache; 1666 - if (s->cpu_sheaves) { 1667 - if (likely(!IS_ENABLED(CONFIG_NUMA) || 1668 - slab_nid(slab) == numa_mem_id())) 1669 - return __kfree_rcu_sheaf(s, obj); 1670 - } 1666 + if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())) 1667 + return __kfree_rcu_sheaf(s, obj); 1671 1668 1672 1669 return false; 1673 1670 } ··· 2168 2171 */ 2169 2172 void kvfree_rcu_barrier_on_cache(struct kmem_cache *s) 2170 2173 { 2171 - if (s->cpu_sheaves) { 2174 + if (cache_has_sheaves(s)) { 2172 2175 flush_rcu_sheaves_on_cache(s); 2173 2176 rcu_barrier(); 2174 2177 }
+82 -41
mm/slub.c
··· 2846 2846 { 2847 2847 int cpu; 2848 2848 2849 + /* 2850 + * We may be unwinding cache creation that failed before or during the 2851 + * allocation of this. 2852 + */ 2853 + if (!s->cpu_sheaves) 2854 + return; 2855 + 2856 + /* pcs->main can only point to the bootstrap sheaf, nothing to free */ 2857 + if (!cache_has_sheaves(s)) 2858 + goto free_pcs; 2859 + 2849 2860 for_each_possible_cpu(cpu) { 2850 2861 struct slub_percpu_sheaves *pcs; 2851 2862 2852 2863 pcs = per_cpu_ptr(s->cpu_sheaves, cpu); 2853 2864 2854 - /* can happen when unwinding failed create */ 2865 + /* This can happen when unwinding failed cache creation. */ 2855 2866 if (!pcs->main) 2856 2867 continue; 2857 2868 ··· 2884 2873 } 2885 2874 } 2886 2875 2876 + free_pcs: 2887 2877 free_percpu(s->cpu_sheaves); 2888 2878 s->cpu_sheaves = NULL; 2889 2879 } ··· 4042 4030 { 4043 4031 struct slub_percpu_sheaves *pcs; 4044 4032 4045 - if (!s->cpu_sheaves) 4033 + if (!cache_has_sheaves(s)) 4046 4034 return false; 4047 4035 4048 4036 pcs = per_cpu_ptr(s->cpu_sheaves, cpu); ··· 4064 4052 4065 4053 s = sfw->s; 4066 4054 4067 - if (s->cpu_sheaves) 4055 + if (cache_has_sheaves(s)) 4068 4056 pcs_flush_all(s); 4069 4057 4070 4058 flush_this_cpu_slab(s); ··· 4169 4157 mutex_lock(&slab_mutex); 4170 4158 4171 4159 list_for_each_entry(s, &slab_caches, list) { 4172 - if (!s->cpu_sheaves) 4160 + if (!cache_has_sheaves(s)) 4173 4161 continue; 4174 4162 flush_rcu_sheaves_on_cache(s); 4175 4163 } ··· 4191 4179 mutex_lock(&slab_mutex); 4192 4180 list_for_each_entry(s, &slab_caches, list) { 4193 4181 __flush_cpu_slab(s, cpu); 4194 - if (s->cpu_sheaves) 4182 + if (cache_has_sheaves(s)) 4195 4183 __pcs_flush_all_cpu(s, cpu); 4196 4184 } 4197 4185 mutex_unlock(&slab_mutex); ··· 4991 4979 4992 4980 lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); 4993 4981 4982 + /* Bootstrap or debug cache, back off */ 4983 + if (unlikely(!cache_has_sheaves(s))) { 4984 + local_unlock(&s->cpu_sheaves->lock); 4985 + return NULL; 4986 + } 4987 + 4994 4988 if (pcs->spare && pcs->spare->size > 0) { 4995 4989 swap(pcs->main, pcs->spare); 4996 4990 return pcs; ··· 5183 5165 struct slab_sheaf *full; 5184 5166 struct node_barn *barn; 5185 5167 5168 + if (unlikely(!cache_has_sheaves(s))) { 5169 + local_unlock(&s->cpu_sheaves->lock); 5170 + return allocated; 5171 + } 5172 + 5186 5173 if (pcs->spare && pcs->spare->size > 0) { 5187 5174 swap(pcs->main, pcs->spare); 5188 5175 goto do_alloc; ··· 5267 5244 if (unlikely(object)) 5268 5245 goto out; 5269 5246 5270 - if (s->cpu_sheaves) 5271 - object = alloc_from_pcs(s, gfpflags, node); 5247 + object = alloc_from_pcs(s, gfpflags, node); 5272 5248 5273 5249 if (!object) 5274 5250 object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); ··· 5375 5353 struct slab_sheaf *sheaf = NULL; 5376 5354 struct node_barn *barn; 5377 5355 5378 - if (unlikely(size > s->sheaf_capacity)) { 5356 + if (unlikely(!size)) 5357 + return NULL; 5379 5358 5380 - /* 5381 - * slab_debug disables cpu sheaves intentionally so all 5382 - * prefilled sheaves become "oversize" and we give up on 5383 - * performance for the debugging. Same with SLUB_TINY. 5384 - * Creating a cache without sheaves and then requesting a 5385 - * prefilled sheaf is however not expected, so warn. 5386 - */ 5387 - WARN_ON_ONCE(s->sheaf_capacity == 0 && 5388 - !IS_ENABLED(CONFIG_SLUB_TINY) && 5389 - !(s->flags & SLAB_DEBUG_FLAGS)); 5359 + if (unlikely(size > s->sheaf_capacity)) { 5390 5360 5391 5361 sheaf = kzalloc(struct_size(sheaf, objects, size), gfp); 5392 5362 if (!sheaf) ··· 6096 6082 restart: 6097 6083 lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); 6098 6084 6085 + /* Bootstrap or debug cache, back off */ 6086 + if (unlikely(!cache_has_sheaves(s))) { 6087 + local_unlock(&s->cpu_sheaves->lock); 6088 + return NULL; 6089 + } 6090 + 6099 6091 barn = get_barn(s); 6100 6092 if (!barn) { 6101 6093 local_unlock(&s->cpu_sheaves->lock); ··· 6317 6297 6318 6298 struct slab_sheaf *empty; 6319 6299 struct node_barn *barn; 6300 + 6301 + /* Bootstrap or debug cache, fall back */ 6302 + if (unlikely(!cache_has_sheaves(s))) { 6303 + local_unlock(&s->cpu_sheaves->lock); 6304 + goto fail; 6305 + } 6320 6306 6321 6307 if (pcs->spare && pcs->spare->size == 0) { 6322 6308 pcs->rcu_free = pcs->spare; ··· 6720 6694 if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false))) 6721 6695 return; 6722 6696 6723 - if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) || 6724 - slab_nid(slab) == numa_mem_id()) 6725 - && likely(!slab_test_pfmemalloc(slab))) { 6697 + if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()) 6698 + && likely(!slab_test_pfmemalloc(slab))) { 6726 6699 if (likely(free_to_pcs(s, object))) 6727 6700 return; 6728 6701 } ··· 7434 7409 * freeing to sheaves is so incompatible with the detached freelist so 7435 7410 * once we go that way, we have to do everything differently 7436 7411 */ 7437 - if (s && s->cpu_sheaves) { 7412 + if (s && cache_has_sheaves(s)) { 7438 7413 free_to_pcs_bulk(s, size, p); 7439 7414 return; 7440 7415 } ··· 7545 7520 size--; 7546 7521 } 7547 7522 7548 - if (s->cpu_sheaves) 7549 - i = alloc_from_pcs_bulk(s, size, p); 7523 + i = alloc_from_pcs_bulk(s, size, p); 7550 7524 7551 7525 if (i < size) { 7552 7526 /* ··· 7756 7732 7757 7733 static int init_percpu_sheaves(struct kmem_cache *s) 7758 7734 { 7735 + static struct slab_sheaf bootstrap_sheaf = {}; 7759 7736 int cpu; 7760 7737 7761 7738 for_each_possible_cpu(cpu) { ··· 7766 7741 7767 7742 local_trylock_init(&pcs->lock); 7768 7743 7769 - pcs->main = alloc_empty_sheaf(s, GFP_KERNEL); 7744 + /* 7745 + * Bootstrap sheaf has zero size so fast-path allocation fails. 7746 + * It has also size == s->sheaf_capacity, so fast-path free 7747 + * fails. In the slow paths we recognize the situation by 7748 + * checking s->sheaf_capacity. This allows fast paths to assume 7749 + * s->cpu_sheaves and pcs->main always exists and are valid. 7750 + * It's also safe to share the single static bootstrap_sheaf 7751 + * with zero-sized objects array as it's never modified. 7752 + * 7753 + * Bootstrap_sheaf also has NULL pointer to kmem_cache so we 7754 + * recognize it and not attempt to free it when destroying the 7755 + * cache. 7756 + * 7757 + * We keep bootstrap_sheaf for kmem_cache and kmem_cache_node, 7758 + * caches with debug enabled, and all caches with SLUB_TINY. 7759 + * For kmalloc caches it's used temporarily during the initial 7760 + * bootstrap. 7761 + */ 7762 + if (!s->sheaf_capacity) 7763 + pcs->main = &bootstrap_sheaf; 7764 + else 7765 + pcs->main = alloc_empty_sheaf(s, GFP_KERNEL); 7770 7766 7771 7767 if (!pcs->main) 7772 7768 return -ENOMEM; ··· 7862 7816 void __kmem_cache_release(struct kmem_cache *s) 7863 7817 { 7864 7818 cache_random_seq_destroy(s); 7865 - if (s->cpu_sheaves) 7866 - pcs_destroy(s); 7819 + pcs_destroy(s); 7867 7820 #ifdef CONFIG_PREEMPT_RT 7868 7821 if (s->cpu_slab) 7869 7822 lockdep_unregister_key(&s->lock_key); ··· 7884 7839 continue; 7885 7840 } 7886 7841 7887 - if (s->cpu_sheaves) { 7842 + if (cache_has_sheaves(s)) { 7888 7843 barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node); 7889 7844 7890 7845 if (!barn) ··· 8207 8162 flush_all_cpus_locked(s); 8208 8163 8209 8164 /* we might have rcu sheaves in flight */ 8210 - if (s->cpu_sheaves) 8165 + if (cache_has_sheaves(s)) 8211 8166 rcu_barrier(); 8212 8167 8213 8168 /* Attempt to free all objects */ ··· 8519 8474 if (get_node(s, nid)) 8520 8475 continue; 8521 8476 8522 - if (s->cpu_sheaves) { 8477 + if (cache_has_sheaves(s)) { 8523 8478 barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid); 8524 8479 8525 8480 if (!barn) { ··· 8727 8682 8728 8683 set_cpu_partial(s); 8729 8684 8730 - if (s->sheaf_capacity) { 8731 - s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves); 8732 - if (!s->cpu_sheaves) { 8733 - err = -ENOMEM; 8734 - goto out; 8735 - } 8685 + s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves); 8686 + if (!s->cpu_sheaves) { 8687 + err = -ENOMEM; 8688 + goto out; 8736 8689 } 8737 8690 8738 8691 #ifdef CONFIG_NUMA ··· 8749 8706 if (!alloc_kmem_cache_cpus(s)) 8750 8707 goto out; 8751 8708 8752 - if (s->cpu_sheaves) { 8753 - err = init_percpu_sheaves(s); 8754 - if (err) 8755 - goto out; 8756 - } 8709 + err = init_percpu_sheaves(s); 8710 + if (err) 8711 + goto out; 8757 8712 8758 8713 err = 0; 8759 8714