Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

slab: remove struct kmem_cache_cpu

The cpu slab is not used anymore for allocation or freeing, the
remaining code is for flushing, but it's effectively dead. Remove the
whole struct kmem_cache_cpu, the flushing code and other orphaned
functions.

The remaining used field of kmem_cache_cpu is the stat array with
CONFIG_SLUB_STATS. Put it instead in a new struct kmem_cache_stats.
In struct kmem_cache, the field is cpu_stats and placed near the
end of the struct.

Reviewed-by: Hao Li <hao.li@linux.dev>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

+27 -284
+4 -3
mm/slab.h
··· 21 21 # define system_has_freelist_aba() system_has_cmpxchg128() 22 22 # define try_cmpxchg_freelist try_cmpxchg128 23 23 # endif 24 - #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128 25 24 typedef u128 freelist_full_t; 26 25 #else /* CONFIG_64BIT */ 27 26 # ifdef system_has_cmpxchg64 28 27 # define system_has_freelist_aba() system_has_cmpxchg64() 29 28 # define try_cmpxchg_freelist try_cmpxchg64 30 29 # endif 31 - #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64 32 30 typedef u64 freelist_full_t; 33 31 #endif /* CONFIG_64BIT */ 34 32 ··· 187 189 * Slab cache management. 188 190 */ 189 191 struct kmem_cache { 190 - struct kmem_cache_cpu __percpu *cpu_slab; 191 192 struct slub_percpu_sheaves __percpu *cpu_sheaves; 192 193 /* Used for retrieving partial slabs, etc. */ 193 194 slab_flags_t flags; ··· 233 236 #ifdef CONFIG_HARDENED_USERCOPY 234 237 unsigned int useroffset; /* Usercopy region offset */ 235 238 unsigned int usersize; /* Usercopy region size */ 239 + #endif 240 + 241 + #ifdef CONFIG_SLUB_STATS 242 + struct kmem_cache_stats __percpu *cpu_stats; 236 243 #endif 237 244 238 245 struct kmem_cache_node *node[MAX_NUMNODES];
+23 -281
mm/slub.c
··· 405 405 NR_SLUB_STAT_ITEMS 406 406 }; 407 407 408 - struct freelist_tid { 409 - union { 410 - struct { 411 - void *freelist; /* Pointer to next available object */ 412 - unsigned long tid; /* Globally unique transaction id */ 413 - }; 414 - freelist_full_t freelist_tid; 415 - }; 416 - }; 417 - 418 - /* 419 - * When changing the layout, make sure freelist and tid are still compatible 420 - * with this_cpu_cmpxchg_double() alignment requirements. 421 - */ 422 - struct kmem_cache_cpu { 423 - struct freelist_tid; 424 - struct slab *slab; /* The slab from which we are allocating */ 425 - local_trylock_t lock; /* Protects the fields above */ 426 408 #ifdef CONFIG_SLUB_STATS 409 + struct kmem_cache_stats { 427 410 unsigned int stat[NR_SLUB_STAT_ITEMS]; 428 - #endif 429 411 }; 412 + #endif 430 413 431 414 static inline void stat(const struct kmem_cache *s, enum stat_item si) 432 415 { ··· 418 435 * The rmw is racy on a preemptible kernel but this is acceptable, so 419 436 * avoid this_cpu_add()'s irq-disable overhead. 420 437 */ 421 - raw_cpu_inc(s->cpu_slab->stat[si]); 438 + raw_cpu_inc(s->cpu_stats->stat[si]); 422 439 #endif 423 440 } 424 441 ··· 426 443 void stat_add(const struct kmem_cache *s, enum stat_item si, int v) 427 444 { 428 445 #ifdef CONFIG_SLUB_STATS 429 - raw_cpu_add(s->cpu_slab->stat[si], v); 446 + raw_cpu_add(s->cpu_stats->stat[si], v); 430 447 #endif 431 448 } 432 449 ··· 515 532 static nodemask_t slab_nodes; 516 533 517 534 /* 518 - * Workqueue used for flush_cpu_slab(). 535 + * Workqueue used for flushing cpu and kfree_rcu sheaves. 519 536 */ 520 537 static struct workqueue_struct *flushwq; 521 538 ··· 1135 1152 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); 1136 1153 1137 1154 WARN_ON(1); 1138 - } 1139 - 1140 - static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, 1141 - void **freelist, void *nextfree) 1142 - { 1143 - if ((s->flags & SLAB_CONSISTENCY_CHECKS) && 1144 - !check_valid_pointer(s, slab, nextfree) && freelist) { 1145 - object_err(s, slab, *freelist, "Freechain corrupt"); 1146 - *freelist = NULL; 1147 - slab_fix(s, "Isolate corrupted freechain"); 1148 - return true; 1149 - } 1150 - 1151 - return false; 1152 1155 } 1153 1156 1154 1157 static void __slab_err(struct slab *slab) ··· 1918 1949 int objects) {} 1919 1950 static inline void dec_slabs_node(struct kmem_cache *s, int node, 1920 1951 int objects) {} 1921 - static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, 1922 - void **freelist, void *nextfree) 1923 - { 1924 - return false; 1925 - } 1926 1952 #endif /* CONFIG_SLUB_DEBUG */ 1927 1953 1928 1954 /* ··· 3615 3651 return get_from_any_partial(s, pc); 3616 3652 } 3617 3653 3618 - #ifdef CONFIG_PREEMPTION 3619 - /* 3620 - * Calculate the next globally unique transaction for disambiguation 3621 - * during cmpxchg. The transactions start with the cpu number and are then 3622 - * incremented by CONFIG_NR_CPUS. 3623 - */ 3624 - #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS) 3625 - #else 3626 - /* 3627 - * No preemption supported therefore also no need to check for 3628 - * different cpus. 3629 - */ 3630 - #define TID_STEP 1 3631 - #endif /* CONFIG_PREEMPTION */ 3632 - 3633 - static inline unsigned long next_tid(unsigned long tid) 3634 - { 3635 - return tid + TID_STEP; 3636 - } 3637 - 3638 - #ifdef SLUB_DEBUG_CMPXCHG 3639 - static inline unsigned int tid_to_cpu(unsigned long tid) 3640 - { 3641 - return tid % TID_STEP; 3642 - } 3643 - 3644 - static inline unsigned long tid_to_event(unsigned long tid) 3645 - { 3646 - return tid / TID_STEP; 3647 - } 3648 - #endif 3649 - 3650 - static inline unsigned int init_tid(int cpu) 3651 - { 3652 - return cpu; 3653 - } 3654 - 3655 - static void init_kmem_cache_cpus(struct kmem_cache *s) 3656 - { 3657 - int cpu; 3658 - struct kmem_cache_cpu *c; 3659 - 3660 - for_each_possible_cpu(cpu) { 3661 - c = per_cpu_ptr(s->cpu_slab, cpu); 3662 - local_trylock_init(&c->lock); 3663 - c->tid = init_tid(cpu); 3664 - } 3665 - } 3666 - 3667 - /* 3668 - * Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist, 3669 - * unfreezes the slabs and puts it on the proper list. 3670 - * Assumes the slab has been already safely taken away from kmem_cache_cpu 3671 - * by the caller. 3672 - */ 3673 - static void deactivate_slab(struct kmem_cache *s, struct slab *slab, 3674 - void *freelist) 3675 - { 3676 - struct kmem_cache_node *n = get_node(s, slab_nid(slab)); 3677 - int free_delta = 0; 3678 - void *nextfree, *freelist_iter, *freelist_tail; 3679 - int tail = DEACTIVATE_TO_HEAD; 3680 - unsigned long flags = 0; 3681 - struct freelist_counters old, new; 3682 - 3683 - if (READ_ONCE(slab->freelist)) { 3684 - stat(s, DEACTIVATE_REMOTE_FREES); 3685 - tail = DEACTIVATE_TO_TAIL; 3686 - } 3687 - 3688 - /* 3689 - * Stage one: Count the objects on cpu's freelist as free_delta and 3690 - * remember the last object in freelist_tail for later splicing. 3691 - */ 3692 - freelist_tail = NULL; 3693 - freelist_iter = freelist; 3694 - while (freelist_iter) { 3695 - nextfree = get_freepointer(s, freelist_iter); 3696 - 3697 - /* 3698 - * If 'nextfree' is invalid, it is possible that the object at 3699 - * 'freelist_iter' is already corrupted. So isolate all objects 3700 - * starting at 'freelist_iter' by skipping them. 3701 - */ 3702 - if (freelist_corrupted(s, slab, &freelist_iter, nextfree)) 3703 - break; 3704 - 3705 - freelist_tail = freelist_iter; 3706 - free_delta++; 3707 - 3708 - freelist_iter = nextfree; 3709 - } 3710 - 3711 - /* 3712 - * Stage two: Unfreeze the slab while splicing the per-cpu 3713 - * freelist to the head of slab's freelist. 3714 - */ 3715 - do { 3716 - old.freelist = READ_ONCE(slab->freelist); 3717 - old.counters = READ_ONCE(slab->counters); 3718 - VM_BUG_ON(!old.frozen); 3719 - 3720 - /* Determine target state of the slab */ 3721 - new.counters = old.counters; 3722 - new.frozen = 0; 3723 - if (freelist_tail) { 3724 - new.inuse -= free_delta; 3725 - set_freepointer(s, freelist_tail, old.freelist); 3726 - new.freelist = freelist; 3727 - } else { 3728 - new.freelist = old.freelist; 3729 - } 3730 - } while (!slab_update_freelist(s, slab, &old, &new, "unfreezing slab")); 3731 - 3732 - /* 3733 - * Stage three: Manipulate the slab list based on the updated state. 3734 - */ 3735 - if (!new.inuse && n->nr_partial >= s->min_partial) { 3736 - stat(s, DEACTIVATE_EMPTY); 3737 - discard_slab(s, slab); 3738 - stat(s, FREE_SLAB); 3739 - } else if (new.freelist) { 3740 - spin_lock_irqsave(&n->list_lock, flags); 3741 - add_partial(n, slab, tail); 3742 - spin_unlock_irqrestore(&n->list_lock, flags); 3743 - stat(s, tail); 3744 - } else { 3745 - stat(s, DEACTIVATE_FULL); 3746 - } 3747 - } 3748 - 3749 - static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 3750 - { 3751 - unsigned long flags; 3752 - struct slab *slab; 3753 - void *freelist; 3754 - 3755 - local_lock_irqsave(&s->cpu_slab->lock, flags); 3756 - 3757 - slab = c->slab; 3758 - freelist = c->freelist; 3759 - 3760 - c->slab = NULL; 3761 - c->freelist = NULL; 3762 - c->tid = next_tid(c->tid); 3763 - 3764 - local_unlock_irqrestore(&s->cpu_slab->lock, flags); 3765 - 3766 - if (slab) { 3767 - deactivate_slab(s, slab, freelist); 3768 - stat(s, CPUSLAB_FLUSH); 3769 - } 3770 - } 3771 - 3772 - static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 3773 - { 3774 - struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 3775 - void *freelist = c->freelist; 3776 - struct slab *slab = c->slab; 3777 - 3778 - c->slab = NULL; 3779 - c->freelist = NULL; 3780 - c->tid = next_tid(c->tid); 3781 - 3782 - if (slab) { 3783 - deactivate_slab(s, slab, freelist); 3784 - stat(s, CPUSLAB_FLUSH); 3785 - } 3786 - } 3787 - 3788 - static inline void flush_this_cpu_slab(struct kmem_cache *s) 3789 - { 3790 - struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 3791 - 3792 - if (c->slab) 3793 - flush_slab(s, c); 3794 - } 3795 - 3796 - static bool has_cpu_slab(int cpu, struct kmem_cache *s) 3797 - { 3798 - struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 3799 - 3800 - return c->slab; 3801 - } 3802 - 3803 3654 static bool has_pcs_used(int cpu, struct kmem_cache *s) 3804 3655 { 3805 3656 struct slub_percpu_sheaves *pcs; ··· 3628 3849 } 3629 3850 3630 3851 /* 3631 - * Flush cpu slab. 3852 + * Flush percpu sheaves 3632 3853 * 3633 3854 * Called from CPU work handler with migration disabled. 3634 3855 */ 3635 - static void flush_cpu_slab(struct work_struct *w) 3856 + static void flush_cpu_sheaves(struct work_struct *w) 3636 3857 { 3637 3858 struct kmem_cache *s; 3638 3859 struct slub_flush_work *sfw; ··· 3643 3864 3644 3865 if (cache_has_sheaves(s)) 3645 3866 pcs_flush_all(s); 3646 - 3647 - flush_this_cpu_slab(s); 3648 3867 } 3649 3868 3650 3869 static void flush_all_cpus_locked(struct kmem_cache *s) ··· 3655 3878 3656 3879 for_each_online_cpu(cpu) { 3657 3880 sfw = &per_cpu(slub_flush, cpu); 3658 - if (!has_cpu_slab(cpu, s) && !has_pcs_used(cpu, s)) { 3881 + if (!has_pcs_used(cpu, s)) { 3659 3882 sfw->skip = true; 3660 3883 continue; 3661 3884 } 3662 - INIT_WORK(&sfw->work, flush_cpu_slab); 3885 + INIT_WORK(&sfw->work, flush_cpu_sheaves); 3663 3886 sfw->skip = false; 3664 3887 sfw->s = s; 3665 3888 queue_work_on(cpu, flushwq, &sfw->work); ··· 3765 3988 3766 3989 mutex_lock(&slab_mutex); 3767 3990 list_for_each_entry(s, &slab_caches, list) { 3768 - __flush_cpu_slab(s, cpu); 3769 3991 if (cache_has_sheaves(s)) 3770 3992 __pcs_flush_all_cpu(s, cpu); 3771 3993 } ··· 6938 7162 barn_init(barn); 6939 7163 } 6940 7164 6941 - static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) 7165 + #ifdef CONFIG_SLUB_STATS 7166 + static inline int alloc_kmem_cache_stats(struct kmem_cache *s) 6942 7167 { 6943 7168 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < 6944 7169 NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH * 6945 - sizeof(struct kmem_cache_cpu)); 7170 + sizeof(struct kmem_cache_stats)); 6946 7171 6947 - /* 6948 - * Must align to double word boundary for the double cmpxchg 6949 - * instructions to work; see __pcpu_double_call_return_bool(). 6950 - */ 6951 - s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 6952 - 2 * sizeof(void *)); 7172 + s->cpu_stats = alloc_percpu(struct kmem_cache_stats); 6953 7173 6954 - if (!s->cpu_slab) 7174 + if (!s->cpu_stats) 6955 7175 return 0; 6956 - 6957 - init_kmem_cache_cpus(s); 6958 7176 6959 7177 return 1; 6960 7178 } 7179 + #endif 6961 7180 6962 7181 static int init_percpu_sheaves(struct kmem_cache *s) 6963 7182 { ··· 7063 7292 { 7064 7293 cache_random_seq_destroy(s); 7065 7294 pcs_destroy(s); 7066 - free_percpu(s->cpu_slab); 7295 + #ifdef CONFIG_SLUB_STATS 7296 + free_percpu(s->cpu_stats); 7297 + #endif 7067 7298 free_kmem_cache_nodes(s); 7068 7299 } 7069 7300 ··· 7762 7989 7763 7990 memcpy(s, static_cache, kmem_cache->object_size); 7764 7991 7765 - /* 7766 - * This runs very early, and only the boot processor is supposed to be 7767 - * up. Even if it weren't true, IRQs are not up so we couldn't fire 7768 - * IPIs around. 7769 - */ 7770 - __flush_cpu_slab(s, smp_processor_id()); 7771 7992 for_each_kmem_cache_node(s, node, n) { 7772 7993 struct slab *p; 7773 7994 ··· 7976 8209 if (!init_kmem_cache_nodes(s)) 7977 8210 goto out; 7978 8211 7979 - if (!alloc_kmem_cache_cpus(s)) 8212 + #ifdef CONFIG_SLUB_STATS 8213 + if (!alloc_kmem_cache_stats(s)) 7980 8214 goto out; 8215 + #endif 7981 8216 7982 8217 err = init_percpu_sheaves(s); 7983 8218 if (err) ··· 8297 8528 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL); 8298 8529 if (!nodes) 8299 8530 return -ENOMEM; 8300 - 8301 - if (flags & SO_CPU) { 8302 - int cpu; 8303 - 8304 - for_each_possible_cpu(cpu) { 8305 - struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, 8306 - cpu); 8307 - int node; 8308 - struct slab *slab; 8309 - 8310 - slab = READ_ONCE(c->slab); 8311 - if (!slab) 8312 - continue; 8313 - 8314 - node = slab_nid(slab); 8315 - if (flags & SO_TOTAL) 8316 - x = slab->objects; 8317 - else if (flags & SO_OBJECTS) 8318 - x = slab->inuse; 8319 - else 8320 - x = 1; 8321 - 8322 - total += x; 8323 - nodes[node] += x; 8324 - 8325 - } 8326 - } 8327 8531 8328 8532 /* 8329 8533 * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex" ··· 8668 8926 return -ENOMEM; 8669 8927 8670 8928 for_each_online_cpu(cpu) { 8671 - unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; 8929 + unsigned int x = per_cpu_ptr(s->cpu_stats, cpu)->stat[si]; 8672 8930 8673 8931 data[cpu] = x; 8674 8932 sum += x; ··· 8694 8952 int cpu; 8695 8953 8696 8954 for_each_online_cpu(cpu) 8697 - per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; 8955 + per_cpu_ptr(s->cpu_stats, cpu)->stat[si] = 0; 8698 8956 } 8699 8957 8700 8958 #define STAT_ATTR(si, text) \