slab: remove SLUB_CPU_PARTIAL · tjh.dev/kernel@e323b52

-11

mm/Kconfig

··· 247 247 out which slabs are relevant to a particular load. 248 248 Try running: slabinfo -DA 249 249 250 - config SLUB_CPU_PARTIAL 251 - default y 252 - depends on SMP && !SLUB_TINY 253 - bool "Enable per cpu partial caches" 254 - help 255 - Per cpu partial caches accelerate objects allocation and freeing 256 - that is local to a processor at the price of more indeterminism 257 - in the latency of the free. On overflow these caches will be cleared 258 - which requires the taking of locks that may cause latency spikes. 259 - Typically one would choose no for a realtime system. 260 - 261 250 config RANDOM_KMALLOC_CACHES 262 251 default n 263 252 depends on !SLUB_TINY

-29

mm/slab.h

··· 77 77 struct llist_node llnode; 78 78 void *flush_freelist; 79 79 }; 80 - #ifdef CONFIG_SLUB_CPU_PARTIAL 81 - struct { 82 - struct slab *next; 83 - int slabs; /* Nr of slabs left */ 84 - }; 85 - #endif 86 80 }; 87 81 /* Double-word boundary */ 88 82 struct freelist_counters; ··· 182 188 return PAGE_SIZE << slab_order(slab); 183 189 } 184 190 185 - #ifdef CONFIG_SLUB_CPU_PARTIAL 186 - #define slub_percpu_partial(c) ((c)->partial) 187 - 188 - #define slub_set_percpu_partial(c, p) \ 189 - ({ \ 190 - slub_percpu_partial(c) = (p)->next; \ 191 - }) 192 - 193 - #define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) 194 - #else 195 - #define slub_percpu_partial(c) NULL 196 - 197 - #define slub_set_percpu_partial(c, p) 198 - 199 - #define slub_percpu_partial_read_once(c) NULL 200 - #endif // CONFIG_SLUB_CPU_PARTIAL 201 - 202 191 /* 203 192 * Word size structure that can be atomically updated or read and that 204 193 * contains both the order and the number of objects that a slab of the ··· 205 228 unsigned int object_size; /* Object size without metadata */ 206 229 struct reciprocal_value reciprocal_size; 207 230 unsigned int offset; /* Free pointer offset */ 208 - #ifdef CONFIG_SLUB_CPU_PARTIAL 209 - /* Number of per cpu partial objects to keep around */ 210 - unsigned int cpu_partial; 211 - /* Number of per cpu partial slabs to keep around */ 212 - unsigned int cpu_partial_slabs; 213 - #endif 214 231 unsigned int sheaf_capacity; 215 232 struct kmem_cache_order_objects oo; 216 233

+18 -301

mm/slub.c

··· 268 268 return p; 269 269 } 270 270 271 - static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) 272 - { 273 - #ifdef CONFIG_SLUB_CPU_PARTIAL 274 - return !kmem_cache_debug(s); 275 - #else 276 - return false; 277 - #endif 278 - } 279 - 280 271 /* 281 272 * Issues still to be resolved: 282 273 * ··· 422 431 struct kmem_cache_cpu { 423 432 struct freelist_tid; 424 433 struct slab *slab; /* The slab from which we are allocating */ 425 - #ifdef CONFIG_SLUB_CPU_PARTIAL 426 - struct slab *partial; /* Partially allocated slabs */ 427 - #endif 428 434 local_trylock_t lock; /* Protects the fields above */ 429 435 #ifdef CONFIG_SLUB_STATS 430 436 unsigned int stat[NR_SLUB_STAT_ITEMS]; ··· 653 665 { 654 666 return x.x & OO_MASK; 655 667 } 656 - 657 - #ifdef CONFIG_SLUB_CPU_PARTIAL 658 - static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) 659 - { 660 - unsigned int nr_slabs; 661 - 662 - s->cpu_partial = nr_objects; 663 - 664 - /* 665 - * We take the number of objects but actually limit the number of 666 - * slabs on the per cpu partial list, in order to limit excessive 667 - * growth of the list. For simplicity we assume that the slabs will 668 - * be half-full. 669 - */ 670 - nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo)); 671 - s->cpu_partial_slabs = nr_slabs; 672 - } 673 - #elif defined(SLAB_SUPPORTS_SYSFS) 674 - static inline void 675 - slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) 676 - { 677 - } 678 - #endif /* CONFIG_SLUB_CPU_PARTIAL */ 679 668 680 669 /* 681 670 * If network-based swap is enabled, slub must keep track of whether memory ··· 3441 3476 return object; 3442 3477 } 3443 3478 3444 - #ifdef CONFIG_SLUB_CPU_PARTIAL 3445 - static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain); 3446 - #else 3447 - static inline void put_cpu_partial(struct kmem_cache *s, struct slab *slab, 3448 - int drain) { } 3449 - #endif 3450 3479 static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags); 3451 3480 3452 3481 static bool get_partial_node_bulk(struct kmem_cache *s, ··· 3853 3894 #define local_unlock_cpu_slab(s, flags) \ 3854 3895 local_unlock_irqrestore(&(s)->cpu_slab->lock, flags) 3855 3896 3856 - #ifdef CONFIG_SLUB_CPU_PARTIAL 3857 - static void __put_partials(struct kmem_cache *s, struct slab *partial_slab) 3858 - { 3859 - struct kmem_cache_node *n = NULL, *n2 = NULL; 3860 - struct slab *slab, *slab_to_discard = NULL; 3861 - unsigned long flags = 0; 3862 - 3863 - while (partial_slab) { 3864 - slab = partial_slab; 3865 - partial_slab = slab->next; 3866 - 3867 - n2 = get_node(s, slab_nid(slab)); 3868 - if (n != n2) { 3869 - if (n) 3870 - spin_unlock_irqrestore(&n->list_lock, flags); 3871 - 3872 - n = n2; 3873 - spin_lock_irqsave(&n->list_lock, flags); 3874 - } 3875 - 3876 - if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial)) { 3877 - slab->next = slab_to_discard; 3878 - slab_to_discard = slab; 3879 - } else { 3880 - add_partial(n, slab, DEACTIVATE_TO_TAIL); 3881 - stat(s, FREE_ADD_PARTIAL); 3882 - } 3883 - } 3884 - 3885 - if (n) 3886 - spin_unlock_irqrestore(&n->list_lock, flags); 3887 - 3888 - while (slab_to_discard) { 3889 - slab = slab_to_discard; 3890 - slab_to_discard = slab_to_discard->next; 3891 - 3892 - stat(s, DEACTIVATE_EMPTY); 3893 - discard_slab(s, slab); 3894 - stat(s, FREE_SLAB); 3895 - } 3896 - } 3897 - 3898 - /* 3899 - * Put all the cpu partial slabs to the node partial list. 3900 - */ 3901 - static void put_partials(struct kmem_cache *s) 3902 - { 3903 - struct slab *partial_slab; 3904 - unsigned long flags; 3905 - 3906 - local_lock_irqsave(&s->cpu_slab->lock, flags); 3907 - partial_slab = this_cpu_read(s->cpu_slab->partial); 3908 - this_cpu_write(s->cpu_slab->partial, NULL); 3909 - local_unlock_irqrestore(&s->cpu_slab->lock, flags); 3910 - 3911 - if (partial_slab) 3912 - __put_partials(s, partial_slab); 3913 - } 3914 - 3915 - static void put_partials_cpu(struct kmem_cache *s, 3916 - struct kmem_cache_cpu *c) 3917 - { 3918 - struct slab *partial_slab; 3919 - 3920 - partial_slab = slub_percpu_partial(c); 3921 - c->partial = NULL; 3922 - 3923 - if (partial_slab) 3924 - __put_partials(s, partial_slab); 3925 - } 3926 - 3927 - /* 3928 - * Put a slab into a partial slab slot if available. 3929 - * 3930 - * If we did not find a slot then simply move all the partials to the 3931 - * per node partial list. 3932 - */ 3933 - static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain) 3934 - { 3935 - struct slab *oldslab; 3936 - struct slab *slab_to_put = NULL; 3937 - unsigned long flags; 3938 - int slabs = 0; 3939 - 3940 - local_lock_cpu_slab(s, flags); 3941 - 3942 - oldslab = this_cpu_read(s->cpu_slab->partial); 3943 - 3944 - if (oldslab) { 3945 - if (drain && oldslab->slabs >= s->cpu_partial_slabs) { 3946 - /* 3947 - * Partial array is full. Move the existing set to the 3948 - * per node partial list. Postpone the actual unfreezing 3949 - * outside of the critical section. 3950 - */ 3951 - slab_to_put = oldslab; 3952 - oldslab = NULL; 3953 - } else { 3954 - slabs = oldslab->slabs; 3955 - } 3956 - } 3957 - 3958 - slabs++; 3959 - 3960 - slab->slabs = slabs; 3961 - slab->next = oldslab; 3962 - 3963 - this_cpu_write(s->cpu_slab->partial, slab); 3964 - 3965 - local_unlock_cpu_slab(s, flags); 3966 - 3967 - if (slab_to_put) { 3968 - __put_partials(s, slab_to_put); 3969 - stat(s, CPU_PARTIAL_DRAIN); 3970 - } 3971 - } 3972 - 3973 - #else /* CONFIG_SLUB_CPU_PARTIAL */ 3974 - 3975 - static inline void put_partials(struct kmem_cache *s) { } 3976 - static inline void put_partials_cpu(struct kmem_cache *s, 3977 - struct kmem_cache_cpu *c) { } 3978 - 3979 - #endif /* CONFIG_SLUB_CPU_PARTIAL */ 3980 - 3981 3897 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 3982 3898 { 3983 3899 unsigned long flags; ··· 3890 4056 deactivate_slab(s, slab, freelist); 3891 4057 stat(s, CPUSLAB_FLUSH); 3892 4058 } 3893 - 3894 - put_partials_cpu(s, c); 3895 4059 } 3896 4060 3897 4061 static inline void flush_this_cpu_slab(struct kmem_cache *s) ··· 3898 4066 3899 4067 if (c->slab) 3900 4068 flush_slab(s, c); 3901 - 3902 - put_partials(s); 3903 4069 } 3904 4070 3905 4071 static bool has_cpu_slab(int cpu, struct kmem_cache *s) 3906 4072 { 3907 4073 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 3908 4074 3909 - return c->slab || slub_percpu_partial(c); 4075 + return c->slab; 3910 4076 } 3911 4077 3912 4078 static bool has_pcs_used(int cpu, struct kmem_cache *s) ··· 5482 5652 return; 5483 5653 } 5484 5654 5485 - /* 5486 - * It is enough to test IS_ENABLED(CONFIG_SLUB_CPU_PARTIAL) below 5487 - * instead of kmem_cache_has_cpu_partial(s), because kmem_cache_debug(s) 5488 - * is the only other reason it can be false, and it is already handled 5489 - * above. 5490 - */ 5491 - 5492 5655 do { 5493 5656 if (unlikely(n)) { 5494 5657 spin_unlock_irqrestore(&n->list_lock, flags); ··· 5506 5683 * Unless it's frozen. 5507 5684 */ 5508 5685 if ((!new.inuse || was_full) && !was_frozen) { 5686 + 5687 + n = get_node(s, slab_nid(slab)); 5509 5688 /* 5510 - * If slab becomes non-full and we have cpu partial 5511 - * lists, we put it there unconditionally to avoid 5512 - * taking the list_lock. Otherwise we need it. 5689 + * Speculatively acquire the list_lock. 5690 + * If the cmpxchg does not succeed then we may 5691 + * drop the list_lock without any processing. 5692 + * 5693 + * Otherwise the list_lock will synchronize with 5694 + * other processors updating the list of slabs. 5513 5695 */ 5514 - if (!(IS_ENABLED(CONFIG_SLUB_CPU_PARTIAL) && was_full)) { 5696 + spin_lock_irqsave(&n->list_lock, flags); 5515 5697 5516 - n = get_node(s, slab_nid(slab)); 5517 - /* 5518 - * Speculatively acquire the list_lock. 5519 - * If the cmpxchg does not succeed then we may 5520 - * drop the list_lock without any processing. 5521 - * 5522 - * Otherwise the list_lock will synchronize with 5523 - * other processors updating the list of slabs. 5524 - */ 5525 - spin_lock_irqsave(&n->list_lock, flags); 5526 - 5527 - on_node_partial = slab_test_node_partial(slab); 5528 - } 5698 + on_node_partial = slab_test_node_partial(slab); 5529 5699 } 5530 5700 5531 5701 } while (!slab_update_freelist(s, slab, &old, &new, "__slab_free")); ··· 5531 5715 * activity can be necessary. 5532 5716 */ 5533 5717 stat(s, FREE_FROZEN); 5534 - } else if (IS_ENABLED(CONFIG_SLUB_CPU_PARTIAL) && was_full) { 5535 - /* 5536 - * If we started with a full slab then put it onto the 5537 - * per cpu partial list. 5538 - */ 5539 - put_cpu_partial(s, slab, 1); 5540 - stat(s, CPU_PARTIAL_FREE); 5541 5718 } 5542 5719 5543 5720 /* ··· 5559 5750 5560 5751 /* 5561 5752 * Objects left in the slab. If it was not on the partial list before 5562 - * then add it. This can only happen when cache has no per cpu partial 5563 - * list otherwise we would have put it there. 5753 + * then add it. 5564 5754 */ 5565 - if (!IS_ENABLED(CONFIG_SLUB_CPU_PARTIAL) && unlikely(was_full)) { 5755 + if (unlikely(was_full)) { 5566 5756 add_partial(n, slab, DEACTIVATE_TO_TAIL); 5567 5757 stat(s, FREE_ADD_PARTIAL); 5568 5758 } ··· 6230 6422 if (unlikely(!allow_spin)) { 6231 6423 /* 6232 6424 * __slab_free() can locklessly cmpxchg16 into a slab, 6233 - * but then it might need to take spin_lock or local_lock 6234 - * in put_cpu_partial() for further processing. 6425 + * but then it might need to take spin_lock 6426 + * for further processing. 6235 6427 * Avoid the complexity and simply add to a deferred list. 6236 6428 */ 6237 6429 defer_free(s, head); ··· 7555 7747 return 1; 7556 7748 } 7557 7749 7558 - static void set_cpu_partial(struct kmem_cache *s) 7559 - { 7560 - #ifdef CONFIG_SLUB_CPU_PARTIAL 7561 - unsigned int nr_objects; 7562 - 7563 - /* 7564 - * cpu_partial determined the maximum number of objects kept in the 7565 - * per cpu partial lists of a processor. 7566 - * 7567 - * Per cpu partial lists mainly contain slabs that just have one 7568 - * object freed. If they are used for allocation then they can be 7569 - * filled up again with minimal effort. The slab will never hit the 7570 - * per node partial lists and therefore no locking will be required. 7571 - * 7572 - * For backwards compatibility reasons, this is determined as number 7573 - * of objects, even though we now limit maximum number of pages, see 7574 - * slub_set_cpu_partial() 7575 - */ 7576 - if (!kmem_cache_has_cpu_partial(s)) 7577 - nr_objects = 0; 7578 - else if (s->size >= PAGE_SIZE) 7579 - nr_objects = 6; 7580 - else if (s->size >= 1024) 7581 - nr_objects = 24; 7582 - else if (s->size >= 256) 7583 - nr_objects = 52; 7584 - else 7585 - nr_objects = 120; 7586 - 7587 - slub_set_cpu_partial(s, nr_objects); 7588 - #endif 7589 - } 7590 - 7591 7750 static unsigned int calculate_sheaf_capacity(struct kmem_cache *s, 7592 7751 struct kmem_cache_args *args) 7593 7752 ··· 8415 8640 s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / 2); 8416 8641 s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial); 8417 8642 8418 - set_cpu_partial(s); 8419 - 8420 8643 s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves); 8421 8644 if (!s->cpu_sheaves) { 8422 8645 err = -ENOMEM; ··· 8778 9005 total += x; 8779 9006 nodes[node] += x; 8780 9007 8781 - #ifdef CONFIG_SLUB_CPU_PARTIAL 8782 - slab = slub_percpu_partial_read_once(c); 8783 - if (slab) { 8784 - node = slab_nid(slab); 8785 - if (flags & SO_TOTAL) 8786 - WARN_ON_ONCE(1); 8787 - else if (flags & SO_OBJECTS) 8788 - WARN_ON_ONCE(1); 8789 - else 8790 - x = data_race(slab->slabs); 8791 - total += x; 8792 - nodes[node] += x; 8793 - } 8794 - #endif 8795 9008 } 8796 9009 } 8797 9010 ··· 8912 9153 8913 9154 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) 8914 9155 { 8915 - unsigned int nr_partial = 0; 8916 - #ifdef CONFIG_SLUB_CPU_PARTIAL 8917 - nr_partial = s->cpu_partial; 8918 - #endif 8919 - 8920 - return sysfs_emit(buf, "%u\n", nr_partial); 9156 + return sysfs_emit(buf, "0\n"); 8921 9157 } 8922 9158 8923 9159 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, ··· 8924 9170 err = kstrtouint(buf, 10, &objects); 8925 9171 if (err) 8926 9172 return err; 8927 - if (objects && !kmem_cache_has_cpu_partial(s)) 9173 + if (objects) 8928 9174 return -EINVAL; 8929 9175 8930 - slub_set_cpu_partial(s, objects); 8931 - flush_all(s); 8932 9176 return length; 8933 9177 } 8934 9178 SLAB_ATTR(cpu_partial); ··· 8965 9213 8966 9214 static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) 8967 9215 { 8968 - int objects = 0; 8969 - int slabs = 0; 8970 - int cpu __maybe_unused; 8971 - int len = 0; 8972 - 8973 - #ifdef CONFIG_SLUB_CPU_PARTIAL 8974 - for_each_online_cpu(cpu) { 8975 - struct slab *slab; 8976 - 8977 - slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); 8978 - 8979 - if (slab) 8980 - slabs += data_race(slab->slabs); 8981 - } 8982 - #endif 8983 - 8984 - /* Approximate half-full slabs, see slub_set_cpu_partial() */ 8985 - objects = (slabs * oo_objects(s->oo)) / 2; 8986 - len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs); 8987 - 8988 - #ifdef CONFIG_SLUB_CPU_PARTIAL 8989 - for_each_online_cpu(cpu) { 8990 - struct slab *slab; 8991 - 8992 - slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); 8993 - if (slab) { 8994 - slabs = data_race(slab->slabs); 8995 - objects = (slabs * oo_objects(s->oo)) / 2; 8996 - len += sysfs_emit_at(buf, len, " C%d=%d(%d)", 8997 - cpu, objects, slabs); 8998 - } 8999 - } 9000 - #endif 9001 - len += sysfs_emit_at(buf, len, "\n"); 9002 - 9003 - return len; 9216 + return sysfs_emit(buf, "0(0)\n"); 9004 9217 } 9005 9218 SLAB_ATTR_RO(slabs_cpu_partial); 9006 9219

Configure Feed

Configure Feed