Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'slab-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

- Sheaves performance improvements for systems with memoryless NUMA
nodes, developed in response to regression reports.

These mainly ensure that percpu sheaves exist and are used on cpus
that belong to these memoryless nodes (Vlastimil Babka, Hao Li).

- Cleanup API usage and constify sysfs attributes (Thomas Weißschuh)

- Disable kfree_rcu() batching on builds intended for fuzzing/debugging
that enable CONFIG_RCU_STRICT_GRACE_PERIOD (Jann Horn)

- Add a kunit test for kmalloc_nolock()/kfree_nolock() (Harry Yoo)

* tag 'slab-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
slub: clarify kmem_cache_refill_sheaf() comments
lib/tests/slub_kunit: add a test case for {kmalloc,kfree}_nolock
MAINTAINERS: add lib/tests/slub_kunit.c to SLAB ALLOCATOR section
slub: use N_NORMAL_MEMORY in can_free_to_pcs to handle remote frees
slab,rcu: disable KVFREE_RCU_BATCHED for strict grace period
slab: free remote objects to sheaves on memoryless nodes
slab: create barns for online memoryless nodes
slab: decouple pointer to barn from kmem_cache_node
slab: remove alloc_full_sheaf()
mm/slab: constify sysfs attributes
mm/slab: create sysfs attribute through default_groups

+323 -131
+1
MAINTAINERS
··· 24495 24495 F: Documentation/mm/slab.rst 24496 24496 F: include/linux/mempool.h 24497 24497 F: include/linux/slab.h 24498 + F: lib/tests/slub_kunit.c 24498 24499 F: mm/failslab.c 24499 24500 F: mm/mempool.c 24500 24501 F: mm/slab.h
+92
lib/tests/slub_kunit.c
··· 7 7 #include <linux/kernel.h> 8 8 #include <linux/rcupdate.h> 9 9 #include <linux/delay.h> 10 + #include <linux/perf_event.h> 10 11 #include "../mm/slab.h" 11 12 12 13 static struct kunit_resource resource; ··· 292 291 kmem_cache_destroy(s); 293 292 } 294 293 294 + #ifdef CONFIG_PERF_EVENTS 295 + #define NR_ITERATIONS 1000 296 + #define NR_OBJECTS 1000 297 + static void *objects[NR_OBJECTS]; 298 + 299 + struct test_nolock_context { 300 + struct kunit *test; 301 + int callback_count; 302 + int alloc_ok; 303 + int alloc_fail; 304 + struct perf_event *event; 305 + }; 306 + 307 + static struct perf_event_attr hw_attr = { 308 + .type = PERF_TYPE_HARDWARE, 309 + .config = PERF_COUNT_HW_CPU_CYCLES, 310 + .size = sizeof(struct perf_event_attr), 311 + .pinned = 1, 312 + .disabled = 1, 313 + .freq = 1, 314 + .sample_freq = 100000, 315 + }; 316 + 317 + static void overflow_handler_test_kmalloc_kfree_nolock(struct perf_event *event, 318 + struct perf_sample_data *data, 319 + struct pt_regs *regs) 320 + { 321 + void *objp; 322 + gfp_t gfp; 323 + struct test_nolock_context *ctx = event->overflow_handler_context; 324 + 325 + /* __GFP_ACCOUNT to test kmalloc_nolock() in alloc_slab_obj_exts() */ 326 + gfp = (ctx->callback_count % 2) ? 0 : __GFP_ACCOUNT; 327 + objp = kmalloc_nolock(64, gfp, NUMA_NO_NODE); 328 + 329 + if (objp) 330 + ctx->alloc_ok++; 331 + else 332 + ctx->alloc_fail++; 333 + 334 + kfree_nolock(objp); 335 + ctx->callback_count++; 336 + } 337 + 338 + static void test_kmalloc_kfree_nolock(struct kunit *test) 339 + { 340 + int i, j; 341 + struct test_nolock_context ctx = { .test = test }; 342 + struct perf_event *event; 343 + bool alloc_fail = false; 344 + 345 + event = perf_event_create_kernel_counter(&hw_attr, -1, current, 346 + overflow_handler_test_kmalloc_kfree_nolock, 347 + &ctx); 348 + if (IS_ERR(event)) 349 + kunit_skip(test, "Failed to create perf event"); 350 + ctx.event = event; 351 + perf_event_enable(ctx.event); 352 + for (i = 0; i < NR_ITERATIONS; i++) { 353 + for (j = 0; j < NR_OBJECTS; j++) { 354 + gfp_t gfp = (i % 2) ? GFP_KERNEL : GFP_KERNEL_ACCOUNT; 355 + 356 + objects[j] = kmalloc(64, gfp); 357 + if (!objects[j]) { 358 + j--; 359 + while (j >= 0) 360 + kfree(objects[j--]); 361 + alloc_fail = true; 362 + goto cleanup; 363 + } 364 + } 365 + for (j = 0; j < NR_OBJECTS; j++) 366 + kfree(objects[j]); 367 + } 368 + 369 + cleanup: 370 + perf_event_disable(ctx.event); 371 + perf_event_release_kernel(ctx.event); 372 + 373 + kunit_info(test, "callback_count: %d, alloc_ok: %d, alloc_fail: %d\n", 374 + ctx.callback_count, ctx.alloc_ok, ctx.alloc_fail); 375 + 376 + if (alloc_fail) 377 + kunit_skip(test, "Allocation failed"); 378 + KUNIT_EXPECT_EQ(test, 0, slab_errors); 379 + } 380 + #endif 381 + 295 382 static int test_init(struct kunit *test) 296 383 { 297 384 slab_errors = 0; ··· 404 315 KUNIT_CASE(test_kfree_rcu_wq_destroy), 405 316 KUNIT_CASE(test_leak_destroy), 406 317 KUNIT_CASE(test_krealloc_redzone_zeroing), 318 + #ifdef CONFIG_PERF_EVENTS 319 + KUNIT_CASE_SLOW(test_kmalloc_kfree_nolock), 320 + #endif 407 321 {} 408 322 }; 409 323
+1
mm/Kconfig
··· 172 172 config KVFREE_RCU_BATCHED 173 173 def_bool y 174 174 depends on !SLUB_TINY && !TINY_RCU 175 + depends on !RCU_STRICT_GRACE_PERIOD 175 176 176 177 config SLUB_TINY 177 178 bool "Configure for minimal memory footprint"
+6 -1
mm/slab.h
··· 191 191 unsigned int x; 192 192 }; 193 193 194 + struct kmem_cache_per_node_ptrs { 195 + struct node_barn *barn; 196 + struct kmem_cache_node *node; 197 + }; 198 + 194 199 /* 195 200 * Slab cache management. 196 201 */ ··· 252 247 struct kmem_cache_stats __percpu *cpu_stats; 253 248 #endif 254 249 255 - struct kmem_cache_node *node[MAX_NUMNODES]; 250 + struct kmem_cache_per_node_ptrs per_node[MAX_NUMNODES]; 256 251 }; 257 252 258 253 /*
+223 -130
mm/slub.c
··· 59 59 * 0. cpu_hotplug_lock 60 60 * 1. slab_mutex (Global Mutex) 61 61 * 2a. kmem_cache->cpu_sheaves->lock (Local trylock) 62 - * 2b. node->barn->lock (Spinlock) 62 + * 2b. barn->lock (Spinlock) 63 63 * 2c. node->list_lock (Spinlock) 64 64 * 3. slab_lock(slab) (Only on some arches) 65 65 * 4. object_map_lock (Only for debugging) ··· 136 136 * or spare sheaf can handle the allocation or free, there is no other 137 137 * overhead. 138 138 * 139 - * node->barn->lock (spinlock) 139 + * barn->lock (spinlock) 140 140 * 141 141 * This lock protects the operations on per-NUMA-node barn. It can quickly 142 142 * serve an empty or full sheaf if available, and avoid more expensive refill ··· 436 436 atomic_long_t total_objects; 437 437 struct list_head full; 438 438 #endif 439 - struct node_barn *barn; 440 439 }; 441 440 442 441 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 443 442 { 444 - return s->node[node]; 443 + return s->per_node[node].node; 444 + } 445 + 446 + static inline struct node_barn *get_barn_node(struct kmem_cache *s, int node) 447 + { 448 + return s->per_node[node].barn; 445 449 } 446 450 447 451 /* 448 - * Get the barn of the current cpu's closest memory node. It may not exist on 449 - * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES 452 + * Get the barn of the current cpu's NUMA node. It may be a memoryless node. 450 453 */ 451 454 static inline struct node_barn *get_barn(struct kmem_cache *s) 452 455 { 453 - struct kmem_cache_node *n = get_node(s, numa_mem_id()); 454 - 455 - if (!n) 456 - return NULL; 457 - 458 - return n->barn; 456 + return get_barn_node(s, numa_node_id()); 459 457 } 460 458 461 459 /* ··· 471 473 * Protected by slab_mutex. 472 474 */ 473 475 static nodemask_t slab_nodes; 476 + 477 + /* 478 + * Similar to slab_nodes but for where we have node_barn allocated. 479 + * Corresponds to N_ONLINE nodes. 480 + */ 481 + static nodemask_t slab_barn_nodes; 474 482 475 483 /* 476 484 * Workqueue used for flushing cpu and kfree_rcu sheaves. ··· 2826 2822 return 0; 2827 2823 } 2828 2824 2829 - static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf); 2830 - 2831 - static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) 2832 - { 2833 - struct slab_sheaf *sheaf = alloc_empty_sheaf(s, gfp); 2834 - 2835 - if (!sheaf) 2836 - return NULL; 2837 - 2838 - if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { 2839 - sheaf_flush_unused(s, sheaf); 2840 - free_empty_sheaf(s, sheaf); 2841 - return NULL; 2842 - } 2843 - 2844 - return sheaf; 2845 - } 2846 - 2847 2825 /* 2848 2826 * Maximum number of objects freed during a single flush of main pcs sheaf. 2849 2827 * Translates directly to an on-stack array size. ··· 4068 4082 rcu_barrier(); 4069 4083 } 4070 4084 4085 + static int slub_cpu_setup(unsigned int cpu) 4086 + { 4087 + int nid = cpu_to_node(cpu); 4088 + struct kmem_cache *s; 4089 + int ret = 0; 4090 + 4091 + /* 4092 + * we never clear a nid so it's safe to do a quick check before taking 4093 + * the mutex, and then recheck to handle parallel cpu hotplug safely 4094 + */ 4095 + if (node_isset(nid, slab_barn_nodes)) 4096 + return 0; 4097 + 4098 + mutex_lock(&slab_mutex); 4099 + 4100 + if (node_isset(nid, slab_barn_nodes)) 4101 + goto out; 4102 + 4103 + list_for_each_entry(s, &slab_caches, list) { 4104 + struct node_barn *barn; 4105 + 4106 + /* 4107 + * barn might already exist if a previous callback failed midway 4108 + */ 4109 + if (!cache_has_sheaves(s) || get_barn_node(s, nid)) 4110 + continue; 4111 + 4112 + barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid); 4113 + 4114 + if (!barn) { 4115 + ret = -ENOMEM; 4116 + goto out; 4117 + } 4118 + 4119 + barn_init(barn); 4120 + s->per_node[nid].barn = barn; 4121 + } 4122 + node_set(nid, slab_barn_nodes); 4123 + 4124 + out: 4125 + mutex_unlock(&slab_mutex); 4126 + 4127 + return ret; 4128 + } 4129 + 4071 4130 /* 4072 4131 * Use the cpu notifier to insure that the cpu slabs are flushed when 4073 4132 * necessary. ··· 4642 4611 if (!allow_spin) 4643 4612 return NULL; 4644 4613 4645 - if (empty) { 4646 - if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { 4647 - full = empty; 4648 - } else { 4649 - /* 4650 - * we must be very low on memory so don't bother 4651 - * with the barn 4652 - */ 4653 - sheaf_flush_unused(s, empty); 4654 - free_empty_sheaf(s, empty); 4655 - } 4656 - } else { 4657 - full = alloc_full_sheaf(s, gfp); 4614 + if (!empty) { 4615 + empty = alloc_empty_sheaf(s, gfp); 4616 + if (!empty) 4617 + return NULL; 4658 4618 } 4659 4619 4660 - if (!full) 4620 + if (refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { 4621 + /* 4622 + * we must be very low on memory so don't bother 4623 + * with the barn 4624 + */ 4625 + sheaf_flush_unused(s, empty); 4626 + free_empty_sheaf(s, empty); 4627 + 4661 4628 return NULL; 4629 + } 4630 + 4631 + full = empty; 4632 + empty = NULL; 4662 4633 4663 4634 if (!local_trylock(&s->cpu_sheaves->lock)) 4664 4635 goto barn_put; 4665 4636 pcs = this_cpu_ptr(s->cpu_sheaves); 4666 4637 4667 4638 /* 4668 - * If we are returning empty sheaf, we either got it from the 4669 - * barn or had to allocate one. If we are returning a full 4670 - * sheaf, it's due to racing or being migrated to a different 4671 - * cpu. Breaching the barn's sheaf limits should be thus rare 4672 - * enough so just ignore them to simplify the recovery. 4639 + * If we put any empty or full sheaf to the barn below, it's due to 4640 + * racing or being migrated to a different cpu. Breaching the barn's 4641 + * sheaf limits should be thus rare enough so just ignore them to 4642 + * simplify the recovery. 4673 4643 */ 4674 4644 4675 4645 if (pcs->main->size == 0) { ··· 5120 5088 } 5121 5089 5122 5090 /* 5123 - * refill a sheaf previously returned by kmem_cache_prefill_sheaf to at least 5124 - * the given size 5091 + * Refill a sheaf previously returned by kmem_cache_prefill_sheaf to at least 5092 + * the given size. 5125 5093 * 5126 - * the sheaf might be replaced by a new one when requesting more than 5127 - * s->sheaf_capacity objects if such replacement is necessary, but the refill 5128 - * fails (returning -ENOMEM), the existing sheaf is left intact 5094 + * Return: 0 on success. The sheaf will contain at least @size objects. 5095 + * The sheaf might have been replaced with a new one if more than 5096 + * sheaf->capacity objects are requested. 5097 + * 5098 + * Return: -ENOMEM on failure. Some objects might have been added to the sheaf 5099 + * but the sheaf will not be replaced. 5129 5100 * 5130 5101 * In practice we always refill to full sheaf's capacity. 5131 5102 */ ··· 5823 5788 5824 5789 static void rcu_free_sheaf(struct rcu_head *head) 5825 5790 { 5826 - struct kmem_cache_node *n; 5827 5791 struct slab_sheaf *sheaf; 5828 5792 struct node_barn *barn = NULL; 5829 5793 struct kmem_cache *s; ··· 5845 5811 if (__rcu_free_sheaf_prepare(s, sheaf)) 5846 5812 goto flush; 5847 5813 5848 - n = get_node(s, sheaf->node); 5849 - if (!n) 5814 + barn = get_barn_node(s, sheaf->node); 5815 + if (!barn) 5850 5816 goto flush; 5851 - 5852 - barn = n->barn; 5853 5817 5854 5818 /* due to slab_free_hook() */ 5855 5819 if (unlikely(sheaf->size == 0)) ··· 5970 5938 rcu_sheaf = NULL; 5971 5939 } else { 5972 5940 pcs->rcu_free = NULL; 5973 - rcu_sheaf->node = numa_mem_id(); 5941 + rcu_sheaf->node = numa_node_id(); 5974 5942 } 5975 5943 5976 5944 /* ··· 5992 5960 return false; 5993 5961 } 5994 5962 5963 + static __always_inline bool can_free_to_pcs(struct slab *slab) 5964 + { 5965 + int slab_node; 5966 + int numa_node; 5967 + 5968 + if (!IS_ENABLED(CONFIG_NUMA)) 5969 + goto check_pfmemalloc; 5970 + 5971 + slab_node = slab_nid(slab); 5972 + 5973 + #ifdef CONFIG_HAVE_MEMORYLESS_NODES 5974 + /* 5975 + * numa_mem_id() points to the closest node with memory so only allow 5976 + * objects from that node to the percpu sheaves 5977 + */ 5978 + numa_node = numa_mem_id(); 5979 + 5980 + if (likely(slab_node == numa_node)) 5981 + goto check_pfmemalloc; 5982 + #else 5983 + 5984 + /* 5985 + * numa_mem_id() is only a wrapper to numa_node_id() which is where this 5986 + * cpu belongs to, but it might be a memoryless node anyway. We don't 5987 + * know what the closest node is. 5988 + */ 5989 + numa_node = numa_node_id(); 5990 + 5991 + /* freed object is from this cpu's node, proceed */ 5992 + if (likely(slab_node == numa_node)) 5993 + goto check_pfmemalloc; 5994 + 5995 + /* 5996 + * Freed object isn't from this cpu's node, but that node is memoryless 5997 + * or only has ZONE_MOVABLE memory, which slab cannot allocate from. 5998 + * Proceed as it's better to cache remote objects than falling back to 5999 + * the slowpath for everything. The allocation side can never obtain 6000 + * a local object anyway, if none exist. We don't have numa_mem_id() to 6001 + * point to the closest node as we would on a proper memoryless node 6002 + * setup. 6003 + */ 6004 + if (unlikely(!node_state(numa_node, N_NORMAL_MEMORY))) 6005 + goto check_pfmemalloc; 6006 + #endif 6007 + 6008 + return false; 6009 + 6010 + check_pfmemalloc: 6011 + return likely(!slab_test_pfmemalloc(slab)); 6012 + } 6013 + 5995 6014 /* 5996 6015 * Bulk free objects to the percpu sheaves. 5997 6016 * Unlike free_to_pcs() this includes the calls to all necessary hooks ··· 6057 5974 struct node_barn *barn; 6058 5975 void *remote_objects[PCS_BATCH_MAX]; 6059 5976 unsigned int remote_nr = 0; 6060 - int node = numa_mem_id(); 6061 5977 6062 5978 next_remote_batch: 6063 5979 while (i < size) { ··· 6070 5988 continue; 6071 5989 } 6072 5990 6073 - if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node) 6074 - || slab_test_pfmemalloc(slab))) { 5991 + if (unlikely(!can_free_to_pcs(slab))) { 6075 5992 remote_objects[remote_nr] = p[i]; 6076 5993 p[i] = p[--size]; 6077 5994 if (++remote_nr >= PCS_BATCH_MAX) ··· 6246 6165 if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false))) 6247 6166 return; 6248 6167 6249 - if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()) 6250 - && likely(!slab_test_pfmemalloc(slab))) { 6251 - if (likely(free_to_pcs(s, object, true))) 6252 - return; 6253 - } 6168 + if (likely(can_free_to_pcs(slab)) && likely(free_to_pcs(s, object, true))) 6169 + return; 6254 6170 6255 6171 __slab_free(s, slab, object, object, 1, addr); 6256 6172 stat(s, FREE_SLOWPATH); ··· 6618 6540 */ 6619 6541 kasan_slab_free(s, x, false, false, /* skip quarantine */true); 6620 6542 6621 - if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())) { 6622 - if (likely(free_to_pcs(s, x, false))) 6623 - return; 6624 - } 6543 + if (likely(can_free_to_pcs(slab)) && likely(free_to_pcs(s, x, false))) 6544 + return; 6625 6545 6626 6546 /* 6627 6547 * __slab_free() can locklessly cmpxchg16 into a slab, but then it might ··· 7503 7427 } 7504 7428 7505 7429 static void 7506 - init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn) 7430 + init_kmem_cache_node(struct kmem_cache_node *n) 7507 7431 { 7508 7432 n->nr_partial = 0; 7509 7433 spin_lock_init(&n->list_lock); ··· 7513 7437 atomic_long_set(&n->total_objects, 0); 7514 7438 INIT_LIST_HEAD(&n->full); 7515 7439 #endif 7516 - n->barn = barn; 7517 - if (barn) 7518 - barn_init(barn); 7519 7440 } 7520 7441 7521 7442 #ifdef CONFIG_SLUB_STATS ··· 7607 7534 n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); 7608 7535 slab->freelist = get_freepointer(kmem_cache_node, n); 7609 7536 slab->inuse = 1; 7610 - kmem_cache_node->node[node] = n; 7611 - init_kmem_cache_node(n, NULL); 7537 + kmem_cache_node->per_node[node].node = n; 7538 + init_kmem_cache_node(n); 7612 7539 inc_slabs_node(kmem_cache_node, node, slab->objects); 7613 7540 7614 7541 /* ··· 7623 7550 int node; 7624 7551 struct kmem_cache_node *n; 7625 7552 7626 - for_each_kmem_cache_node(s, node, n) { 7627 - if (n->barn) { 7628 - WARN_ON(n->barn->nr_full); 7629 - WARN_ON(n->barn->nr_empty); 7630 - kfree(n->barn); 7631 - n->barn = NULL; 7632 - } 7553 + for_each_node(node) { 7554 + struct node_barn *barn = get_barn_node(s, node); 7633 7555 7634 - s->node[node] = NULL; 7556 + if (!barn) 7557 + continue; 7558 + 7559 + WARN_ON(barn->nr_full); 7560 + WARN_ON(barn->nr_empty); 7561 + kfree(barn); 7562 + s->per_node[node].barn = NULL; 7563 + } 7564 + 7565 + for_each_kmem_cache_node(s, node, n) { 7566 + s->per_node[node].node = NULL; 7635 7567 kmem_cache_free(kmem_cache_node, n); 7636 7568 } 7637 7569 } ··· 7657 7579 7658 7580 for_each_node_mask(node, slab_nodes) { 7659 7581 struct kmem_cache_node *n; 7660 - struct node_barn *barn = NULL; 7661 7582 7662 7583 if (slab_state == DOWN) { 7663 7584 early_kmem_cache_node_alloc(node); 7664 7585 continue; 7665 7586 } 7666 7587 7667 - if (cache_has_sheaves(s)) { 7668 - barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node); 7669 - 7670 - if (!barn) 7671 - return 0; 7672 - } 7673 - 7674 7588 n = kmem_cache_alloc_node(kmem_cache_node, 7675 7589 GFP_KERNEL, node); 7676 - if (!n) { 7677 - kfree(barn); 7590 + if (!n) 7678 7591 return 0; 7679 - } 7680 7592 7681 - init_kmem_cache_node(n, barn); 7682 - 7683 - s->node[node] = n; 7593 + init_kmem_cache_node(n); 7594 + s->per_node[node].node = n; 7684 7595 } 7596 + 7597 + if (slab_state == DOWN || !cache_has_sheaves(s)) 7598 + return 1; 7599 + 7600 + for_each_node_mask(node, slab_barn_nodes) { 7601 + struct node_barn *barn; 7602 + 7603 + barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node); 7604 + 7605 + if (!barn) 7606 + return 0; 7607 + 7608 + barn_init(barn); 7609 + s->per_node[node].barn = barn; 7610 + } 7611 + 7685 7612 return 1; 7686 7613 } 7687 7614 ··· 7975 7892 if (cache_has_sheaves(s)) 7976 7893 rcu_barrier(); 7977 7894 7895 + for_each_node(node) { 7896 + struct node_barn *barn = get_barn_node(s, node); 7897 + 7898 + if (barn) 7899 + barn_shrink(s, barn); 7900 + } 7901 + 7978 7902 /* Attempt to free all objects */ 7979 7903 for_each_kmem_cache_node(s, node, n) { 7980 - if (n->barn) 7981 - barn_shrink(s, n->barn); 7982 7904 free_partial(s, n); 7983 7905 if (n->nr_partial || node_nr_slabs(n)) 7984 7906 return 1; ··· 8193 8105 unsigned long flags; 8194 8106 int ret = 0; 8195 8107 8108 + for_each_node(node) { 8109 + struct node_barn *barn = get_barn_node(s, node); 8110 + 8111 + if (barn) 8112 + barn_shrink(s, barn); 8113 + } 8114 + 8196 8115 for_each_kmem_cache_node(s, node, n) { 8197 8116 INIT_LIST_HEAD(&discard); 8198 8117 for (i = 0; i < SHRINK_PROMOTE_MAX; i++) 8199 8118 INIT_LIST_HEAD(promote + i); 8200 - 8201 - if (n->barn) 8202 - barn_shrink(s, n->barn); 8203 8119 8204 8120 spin_lock_irqsave(&n->list_lock, flags); 8205 8121 ··· 8293 8201 if (get_node(s, nid)) 8294 8202 continue; 8295 8203 8296 - if (cache_has_sheaves(s)) { 8204 + if (cache_has_sheaves(s) && !get_barn_node(s, nid)) { 8205 + 8297 8206 barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid); 8298 8207 8299 8208 if (!barn) { ··· 8315 8222 goto out; 8316 8223 } 8317 8224 8318 - init_kmem_cache_node(n, barn); 8225 + init_kmem_cache_node(n); 8226 + s->per_node[nid].node = n; 8319 8227 8320 - s->node[nid] = n; 8228 + if (barn) { 8229 + barn_init(barn); 8230 + s->per_node[nid].barn = barn; 8231 + } 8321 8232 } 8322 8233 /* 8323 8234 * Any cache created after this point will also have kmem_cache_node 8324 - * initialized for the new node. 8235 + * and barn initialized for the new node. 8325 8236 */ 8326 8237 node_set(nid, slab_nodes); 8238 + node_set(nid, slab_barn_nodes); 8327 8239 out: 8328 8240 mutex_unlock(&slab_mutex); 8329 8241 return ret; ··· 8407 8309 if (!capacity) 8408 8310 return; 8409 8311 8410 - for_each_node_mask(node, slab_nodes) { 8312 + for_each_node_mask(node, slab_barn_nodes) { 8411 8313 struct node_barn *barn; 8412 8314 8413 8315 barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node); ··· 8418 8320 } 8419 8321 8420 8322 barn_init(barn); 8421 - get_node(s, node)->barn = barn; 8323 + s->per_node[node].barn = barn; 8422 8324 } 8423 8325 8424 8326 for_each_possible_cpu(cpu) { ··· 8479 8381 for_each_node_state(node, N_MEMORY) 8480 8382 node_set(node, slab_nodes); 8481 8383 8384 + for_each_online_node(node) 8385 + node_set(node, slab_barn_nodes); 8386 + 8482 8387 create_boot_cache(kmem_cache_node, "kmem_cache_node", 8483 8388 sizeof(struct kmem_cache_node), 8484 8389 SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0); ··· 8492 8391 slab_state = PARTIAL; 8493 8392 8494 8393 create_boot_cache(kmem_cache, "kmem_cache", 8495 - offsetof(struct kmem_cache, node) + 8496 - nr_node_ids * sizeof(struct kmem_cache_node *), 8394 + offsetof(struct kmem_cache, per_node) + 8395 + nr_node_ids * sizeof(struct kmem_cache_per_node_ptrs), 8497 8396 SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0); 8498 8397 8499 8398 kmem_cache = bootstrap(&boot_kmem_cache); ··· 8508 8407 /* Setup random freelists for each cache */ 8509 8408 init_freelist_randomization(); 8510 8409 8511 - cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL, 8410 + cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", slub_cpu_setup, 8512 8411 slub_cpu_dead); 8513 8412 8514 8413 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n", ··· 8975 8874 return len; 8976 8875 } 8977 8876 8978 - #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) 8877 + #define to_slab_attr(n) container_of_const(n, struct slab_attribute, attr) 8979 8878 #define to_slab(n) container_of(n, struct kmem_cache, kobj) 8980 8879 8981 8880 struct slab_attribute { ··· 8985 8884 }; 8986 8885 8987 8886 #define SLAB_ATTR_RO(_name) \ 8988 - static struct slab_attribute _name##_attr = __ATTR_RO_MODE(_name, 0400) 8887 + static const struct slab_attribute _name##_attr = __ATTR_RO_MODE(_name, 0400) 8989 8888 8990 8889 #define SLAB_ATTR(_name) \ 8991 - static struct slab_attribute _name##_attr = __ATTR_RW_MODE(_name, 0600) 8890 + static const struct slab_attribute _name##_attr = __ATTR_RW_MODE(_name, 0600) 8992 8891 8993 8892 static ssize_t slab_size_show(struct kmem_cache *s, char *buf) 8994 8893 { ··· 9382 9281 SLAB_ATTR(skip_kfence); 9383 9282 #endif 9384 9283 9385 - static struct attribute *slab_attrs[] = { 9284 + static const struct attribute *const slab_attrs[] = { 9386 9285 &slab_size_attr.attr, 9387 9286 &object_size_attr.attr, 9388 9287 &objs_per_slab_attr.attr, ··· 9459 9358 NULL 9460 9359 }; 9461 9360 9462 - static const struct attribute_group slab_attr_group = { 9463 - .attrs = slab_attrs, 9464 - }; 9361 + ATTRIBUTE_GROUPS(slab); 9465 9362 9466 9363 static ssize_t slab_attr_show(struct kobject *kobj, 9467 9364 struct attribute *attr, 9468 9365 char *buf) 9469 9366 { 9470 - struct slab_attribute *attribute; 9367 + const struct slab_attribute *attribute; 9471 9368 struct kmem_cache *s; 9472 9369 9473 9370 attribute = to_slab_attr(attr); ··· 9481 9382 struct attribute *attr, 9482 9383 const char *buf, size_t len) 9483 9384 { 9484 - struct slab_attribute *attribute; 9385 + const struct slab_attribute *attribute; 9485 9386 struct kmem_cache *s; 9486 9387 9487 9388 attribute = to_slab_attr(attr); ··· 9506 9407 static const struct kobj_type slab_ktype = { 9507 9408 .sysfs_ops = &slab_sysfs_ops, 9508 9409 .release = kmem_cache_release, 9410 + .default_groups = slab_groups, 9509 9411 }; 9510 9412 9511 9413 static struct kset *slab_kset; ··· 9594 9494 if (err) 9595 9495 goto out; 9596 9496 9597 - err = sysfs_create_group(&s->kobj, &slab_attr_group); 9598 - if (err) 9599 - goto out_del_kobj; 9600 - 9601 9497 if (!unmergeable) { 9602 9498 /* Setup first alias */ 9603 9499 sysfs_slab_alias(s, s->name); ··· 9602 9506 if (!unmergeable) 9603 9507 kfree(name); 9604 9508 return err; 9605 - out_del_kobj: 9606 - kobject_del(&s->kobj); 9607 - goto out; 9608 9509 } 9609 9510 9610 9511 void sysfs_slab_unlink(struct kmem_cache *s)