slab: add opt-in caching layer of percpu sheaves

+31

include/linux/slab.h

··· 335 335 * %NULL means no constructor. 336 336 */ 337 337 void (*ctor)(void *); 338 + /** 339 + * @sheaf_capacity: Enable sheaves of given capacity for the cache. 340 + * 341 + * With a non-zero value, allocations from the cache go through caching 342 + * arrays called sheaves. Each cpu has a main sheaf that's always 343 + * present, and a spare sheaf that may be not present. When both become 344 + * empty, there's an attempt to replace an empty sheaf with a full sheaf 345 + * from the per-node barn. 346 + * 347 + * When no full sheaf is available, and gfp flags allow blocking, a 348 + * sheaf is allocated and filled from slab(s) using bulk allocation. 349 + * Otherwise the allocation falls back to the normal operation 350 + * allocating a single object from a slab. 351 + * 352 + * Analogically when freeing and both percpu sheaves are full, the barn 353 + * may replace it with an empty sheaf, unless it's over capacity. In 354 + * that case a sheaf is bulk freed to slab pages. 355 + * 356 + * The sheaves do not enforce NUMA placement of objects, so allocations 357 + * via kmem_cache_alloc_node() with a node specified other than 358 + * NUMA_NO_NODE will bypass them. 359 + * 360 + * Bulk allocation and free operations also try to use the cpu sheaves 361 + * and barn, but fallback to using slab pages directly. 362 + * 363 + * When slub_debug is enabled for the cache, the sheaf_capacity argument 364 + * is ignored. 365 + * 366 + * %0 means no sheaves will be created. 367 + */ 368 + unsigned int sheaf_capacity; 338 369 }; 339 370 340 371 struct kmem_cache *__kmem_cache_create_args(const char *name,

+2

mm/slab.h

··· 235 235 #ifndef CONFIG_SLUB_TINY 236 236 struct kmem_cache_cpu __percpu *cpu_slab; 237 237 #endif 238 + struct slub_percpu_sheaves __percpu *cpu_sheaves; 238 239 /* Used for retrieving partial slabs, etc. */ 239 240 slab_flags_t flags; 240 241 unsigned long min_partial; ··· 249 248 /* Number of per cpu partial slabs to keep around */ 250 249 unsigned int cpu_partial_slabs; 251 250 #endif 251 + unsigned int sheaf_capacity; 252 252 struct kmem_cache_order_objects oo; 253 253 254 254 /* Allocation and freeing of slabs */

+4 -1

mm/slab_common.c

··· 163 163 return 1; 164 164 #endif 165 165 166 + if (s->cpu_sheaves) 167 + return 1; 168 + 166 169 /* 167 170 * We may have set a slab to be unmergeable during bootstrap. 168 171 */ ··· 324 321 object_size - args->usersize < args->useroffset)) 325 322 args->usersize = args->useroffset = 0; 326 323 327 - if (!args->usersize) 324 + if (!args->usersize && !args->sheaf_capacity) 328 325 s = __kmem_cache_alias(name, object_size, args->align, flags, 329 326 args->ctor); 330 327 if (s)

+1093 -47

mm/slub.c

··· 363 363 #endif 364 364 365 365 enum stat_item { 366 + ALLOC_PCS, /* Allocation from percpu sheaf */ 366 367 ALLOC_FASTPATH, /* Allocation from cpu slab */ 367 368 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ 369 + FREE_PCS, /* Free to percpu sheaf */ 368 370 FREE_FASTPATH, /* Free to cpu slab */ 369 371 FREE_SLOWPATH, /* Freeing not to cpu slab */ 370 372 FREE_FROZEN, /* Freeing to frozen slab */ ··· 391 389 CPU_PARTIAL_FREE, /* Refill cpu partial on free */ 392 390 CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */ 393 391 CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ 392 + SHEAF_FLUSH, /* Objects flushed from a sheaf */ 393 + SHEAF_REFILL, /* Objects refilled to a sheaf */ 394 + SHEAF_ALLOC, /* Allocation of an empty sheaf */ 395 + SHEAF_FREE, /* Freeing of an empty sheaf */ 396 + BARN_GET, /* Got full sheaf from barn */ 397 + BARN_GET_FAIL, /* Failed to get full sheaf from barn */ 398 + BARN_PUT, /* Put full sheaf to barn */ 399 + BARN_PUT_FAIL, /* Failed to put full sheaf to barn */ 394 400 NR_SLUB_STAT_ITEMS 395 401 }; 396 402 ··· 445 435 #endif 446 436 } 447 437 438 + #define MAX_FULL_SHEAVES 10 439 + #define MAX_EMPTY_SHEAVES 10 440 + 441 + struct node_barn { 442 + spinlock_t lock; 443 + struct list_head sheaves_full; 444 + struct list_head sheaves_empty; 445 + unsigned int nr_full; 446 + unsigned int nr_empty; 447 + }; 448 + 449 + struct slab_sheaf { 450 + union { 451 + struct rcu_head rcu_head; 452 + struct list_head barn_list; 453 + }; 454 + unsigned int size; 455 + void *objects[]; 456 + }; 457 + 458 + struct slub_percpu_sheaves { 459 + local_trylock_t lock; 460 + struct slab_sheaf *main; /* never NULL when unlocked */ 461 + struct slab_sheaf *spare; /* empty or full, may be NULL */ 462 + }; 463 + 448 464 /* 449 465 * The slab lists for all objects. 450 466 */ ··· 483 447 atomic_long_t total_objects; 484 448 struct list_head full; 485 449 #endif 450 + struct node_barn *barn; 486 451 }; 487 452 488 453 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 489 454 { 490 455 return s->node[node]; 456 + } 457 + 458 + /* Get the barn of the current cpu's memory node */ 459 + static inline struct node_barn *get_barn(struct kmem_cache *s) 460 + { 461 + return get_node(s, numa_mem_id())->barn; 491 462 } 492 463 493 464 /* ··· 513 470 */ 514 471 static nodemask_t slab_nodes; 515 472 516 - #ifndef CONFIG_SLUB_TINY 517 473 /* 518 474 * Workqueue used for flush_cpu_slab(). 519 475 */ 520 476 static struct workqueue_struct *flushwq; 521 - #endif 477 + 478 + struct slub_flush_work { 479 + struct work_struct work; 480 + struct kmem_cache *s; 481 + bool skip; 482 + }; 483 + 484 + static DEFINE_MUTEX(flush_lock); 485 + static DEFINE_PER_CPU(struct slub_flush_work, slub_flush); 522 486 523 487 /******************************************************************** 524 488 * Core slab cache functions ··· 2523 2473 return object; 2524 2474 } 2525 2475 2476 + static struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp) 2477 + { 2478 + struct slab_sheaf *sheaf = kzalloc(struct_size(sheaf, objects, 2479 + s->sheaf_capacity), gfp); 2480 + 2481 + if (unlikely(!sheaf)) 2482 + return NULL; 2483 + 2484 + stat(s, SHEAF_ALLOC); 2485 + 2486 + return sheaf; 2487 + } 2488 + 2489 + static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) 2490 + { 2491 + kfree(sheaf); 2492 + 2493 + stat(s, SHEAF_FREE); 2494 + } 2495 + 2496 + static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, 2497 + size_t size, void **p); 2498 + 2499 + 2500 + static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf, 2501 + gfp_t gfp) 2502 + { 2503 + int to_fill = s->sheaf_capacity - sheaf->size; 2504 + int filled; 2505 + 2506 + if (!to_fill) 2507 + return 0; 2508 + 2509 + filled = __kmem_cache_alloc_bulk(s, gfp, to_fill, 2510 + &sheaf->objects[sheaf->size]); 2511 + 2512 + sheaf->size += filled; 2513 + 2514 + stat_add(s, SHEAF_REFILL, filled); 2515 + 2516 + if (filled < to_fill) 2517 + return -ENOMEM; 2518 + 2519 + return 0; 2520 + } 2521 + 2522 + 2523 + static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) 2524 + { 2525 + struct slab_sheaf *sheaf = alloc_empty_sheaf(s, gfp); 2526 + 2527 + if (!sheaf) 2528 + return NULL; 2529 + 2530 + if (refill_sheaf(s, sheaf, gfp)) { 2531 + free_empty_sheaf(s, sheaf); 2532 + return NULL; 2533 + } 2534 + 2535 + return sheaf; 2536 + } 2537 + 2538 + /* 2539 + * Maximum number of objects freed during a single flush of main pcs sheaf. 2540 + * Translates directly to an on-stack array size. 2541 + */ 2542 + #define PCS_BATCH_MAX 32U 2543 + 2544 + static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); 2545 + 2546 + /* 2547 + * Free all objects from the main sheaf. In order to perform 2548 + * __kmem_cache_free_bulk() outside of cpu_sheaves->lock, work in batches where 2549 + * object pointers are moved to a on-stack array under the lock. To bound the 2550 + * stack usage, limit each batch to PCS_BATCH_MAX. 2551 + * 2552 + * returns true if at least partially flushed 2553 + */ 2554 + static bool sheaf_flush_main(struct kmem_cache *s) 2555 + { 2556 + struct slub_percpu_sheaves *pcs; 2557 + unsigned int batch, remaining; 2558 + void *objects[PCS_BATCH_MAX]; 2559 + struct slab_sheaf *sheaf; 2560 + bool ret = false; 2561 + 2562 + next_batch: 2563 + if (!local_trylock(&s->cpu_sheaves->lock)) 2564 + return ret; 2565 + 2566 + pcs = this_cpu_ptr(s->cpu_sheaves); 2567 + sheaf = pcs->main; 2568 + 2569 + batch = min(PCS_BATCH_MAX, sheaf->size); 2570 + 2571 + sheaf->size -= batch; 2572 + memcpy(objects, sheaf->objects + sheaf->size, batch * sizeof(void *)); 2573 + 2574 + remaining = sheaf->size; 2575 + 2576 + local_unlock(&s->cpu_sheaves->lock); 2577 + 2578 + __kmem_cache_free_bulk(s, batch, &objects[0]); 2579 + 2580 + stat_add(s, SHEAF_FLUSH, batch); 2581 + 2582 + ret = true; 2583 + 2584 + if (remaining) 2585 + goto next_batch; 2586 + 2587 + return ret; 2588 + } 2589 + 2590 + /* 2591 + * Free all objects from a sheaf that's unused, i.e. not linked to any 2592 + * cpu_sheaves, so we need no locking and batching. The locking is also not 2593 + * necessary when flushing cpu's sheaves (both spare and main) during cpu 2594 + * hotremove as the cpu is not executing anymore. 2595 + */ 2596 + static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf) 2597 + { 2598 + if (!sheaf->size) 2599 + return; 2600 + 2601 + stat_add(s, SHEAF_FLUSH, sheaf->size); 2602 + 2603 + __kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]); 2604 + 2605 + sheaf->size = 0; 2606 + } 2607 + 2608 + /* 2609 + * Caller needs to make sure migration is disabled in order to fully flush 2610 + * single cpu's sheaves 2611 + * 2612 + * must not be called from an irq 2613 + * 2614 + * flushing operations are rare so let's keep it simple and flush to slabs 2615 + * directly, skipping the barn 2616 + */ 2617 + static void pcs_flush_all(struct kmem_cache *s) 2618 + { 2619 + struct slub_percpu_sheaves *pcs; 2620 + struct slab_sheaf *spare; 2621 + 2622 + local_lock(&s->cpu_sheaves->lock); 2623 + pcs = this_cpu_ptr(s->cpu_sheaves); 2624 + 2625 + spare = pcs->spare; 2626 + pcs->spare = NULL; 2627 + 2628 + local_unlock(&s->cpu_sheaves->lock); 2629 + 2630 + if (spare) { 2631 + sheaf_flush_unused(s, spare); 2632 + free_empty_sheaf(s, spare); 2633 + } 2634 + 2635 + sheaf_flush_main(s); 2636 + } 2637 + 2638 + static void __pcs_flush_all_cpu(struct kmem_cache *s, unsigned int cpu) 2639 + { 2640 + struct slub_percpu_sheaves *pcs; 2641 + 2642 + pcs = per_cpu_ptr(s->cpu_sheaves, cpu); 2643 + 2644 + /* The cpu is not executing anymore so we don't need pcs->lock */ 2645 + sheaf_flush_unused(s, pcs->main); 2646 + if (pcs->spare) { 2647 + sheaf_flush_unused(s, pcs->spare); 2648 + free_empty_sheaf(s, pcs->spare); 2649 + pcs->spare = NULL; 2650 + } 2651 + } 2652 + 2653 + static void pcs_destroy(struct kmem_cache *s) 2654 + { 2655 + int cpu; 2656 + 2657 + for_each_possible_cpu(cpu) { 2658 + struct slub_percpu_sheaves *pcs; 2659 + 2660 + pcs = per_cpu_ptr(s->cpu_sheaves, cpu); 2661 + 2662 + /* can happen when unwinding failed create */ 2663 + if (!pcs->main) 2664 + continue; 2665 + 2666 + /* 2667 + * We have already passed __kmem_cache_shutdown() so everything 2668 + * was flushed and there should be no objects allocated from 2669 + * slabs, otherwise kmem_cache_destroy() would have aborted. 2670 + * Therefore something would have to be really wrong if the 2671 + * warnings here trigger, and we should rather leave objects and 2672 + * sheaves to leak in that case. 2673 + */ 2674 + 2675 + WARN_ON(pcs->spare); 2676 + 2677 + if (!WARN_ON(pcs->main->size)) { 2678 + free_empty_sheaf(s, pcs->main); 2679 + pcs->main = NULL; 2680 + } 2681 + } 2682 + 2683 + free_percpu(s->cpu_sheaves); 2684 + s->cpu_sheaves = NULL; 2685 + } 2686 + 2687 + static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn) 2688 + { 2689 + struct slab_sheaf *empty = NULL; 2690 + unsigned long flags; 2691 + 2692 + spin_lock_irqsave(&barn->lock, flags); 2693 + 2694 + if (barn->nr_empty) { 2695 + empty = list_first_entry(&barn->sheaves_empty, 2696 + struct slab_sheaf, barn_list); 2697 + list_del(&empty->barn_list); 2698 + barn->nr_empty--; 2699 + } 2700 + 2701 + spin_unlock_irqrestore(&barn->lock, flags); 2702 + 2703 + return empty; 2704 + } 2705 + 2706 + /* 2707 + * The following two functions are used mainly in cases where we have to undo an 2708 + * intended action due to a race or cpu migration. Thus they do not check the 2709 + * empty or full sheaf limits for simplicity. 2710 + */ 2711 + 2712 + static void barn_put_empty_sheaf(struct node_barn *barn, struct slab_sheaf *sheaf) 2713 + { 2714 + unsigned long flags; 2715 + 2716 + spin_lock_irqsave(&barn->lock, flags); 2717 + 2718 + list_add(&sheaf->barn_list, &barn->sheaves_empty); 2719 + barn->nr_empty++; 2720 + 2721 + spin_unlock_irqrestore(&barn->lock, flags); 2722 + } 2723 + 2724 + static void barn_put_full_sheaf(struct node_barn *barn, struct slab_sheaf *sheaf) 2725 + { 2726 + unsigned long flags; 2727 + 2728 + spin_lock_irqsave(&barn->lock, flags); 2729 + 2730 + list_add(&sheaf->barn_list, &barn->sheaves_full); 2731 + barn->nr_full++; 2732 + 2733 + spin_unlock_irqrestore(&barn->lock, flags); 2734 + } 2735 + 2736 + /* 2737 + * If a full sheaf is available, return it and put the supplied empty one to 2738 + * barn. We ignore the limit on empty sheaves as the number of sheaves doesn't 2739 + * change. 2740 + */ 2741 + static struct slab_sheaf * 2742 + barn_replace_empty_sheaf(struct node_barn *barn, struct slab_sheaf *empty) 2743 + { 2744 + struct slab_sheaf *full = NULL; 2745 + unsigned long flags; 2746 + 2747 + spin_lock_irqsave(&barn->lock, flags); 2748 + 2749 + if (barn->nr_full) { 2750 + full = list_first_entry(&barn->sheaves_full, struct slab_sheaf, 2751 + barn_list); 2752 + list_del(&full->barn_list); 2753 + list_add(&empty->barn_list, &barn->sheaves_empty); 2754 + barn->nr_full--; 2755 + barn->nr_empty++; 2756 + } 2757 + 2758 + spin_unlock_irqrestore(&barn->lock, flags); 2759 + 2760 + return full; 2761 + } 2762 + 2763 + /* 2764 + * If an empty sheaf is available, return it and put the supplied full one to 2765 + * barn. But if there are too many full sheaves, reject this with -E2BIG. 2766 + */ 2767 + static struct slab_sheaf * 2768 + barn_replace_full_sheaf(struct node_barn *barn, struct slab_sheaf *full) 2769 + { 2770 + struct slab_sheaf *empty; 2771 + unsigned long flags; 2772 + 2773 + spin_lock_irqsave(&barn->lock, flags); 2774 + 2775 + if (barn->nr_full >= MAX_FULL_SHEAVES) { 2776 + empty = ERR_PTR(-E2BIG); 2777 + } else if (!barn->nr_empty) { 2778 + empty = ERR_PTR(-ENOMEM); 2779 + } else { 2780 + empty = list_first_entry(&barn->sheaves_empty, struct slab_sheaf, 2781 + barn_list); 2782 + list_del(&empty->barn_list); 2783 + list_add(&full->barn_list, &barn->sheaves_full); 2784 + barn->nr_empty--; 2785 + barn->nr_full++; 2786 + } 2787 + 2788 + spin_unlock_irqrestore(&barn->lock, flags); 2789 + 2790 + return empty; 2791 + } 2792 + 2793 + static void barn_init(struct node_barn *barn) 2794 + { 2795 + spin_lock_init(&barn->lock); 2796 + INIT_LIST_HEAD(&barn->sheaves_full); 2797 + INIT_LIST_HEAD(&barn->sheaves_empty); 2798 + barn->nr_full = 0; 2799 + barn->nr_empty = 0; 2800 + } 2801 + 2802 + static void barn_shrink(struct kmem_cache *s, struct node_barn *barn) 2803 + { 2804 + struct list_head empty_list; 2805 + struct list_head full_list; 2806 + struct slab_sheaf *sheaf, *sheaf2; 2807 + unsigned long flags; 2808 + 2809 + INIT_LIST_HEAD(&empty_list); 2810 + INIT_LIST_HEAD(&full_list); 2811 + 2812 + spin_lock_irqsave(&barn->lock, flags); 2813 + 2814 + list_splice_init(&barn->sheaves_full, &full_list); 2815 + barn->nr_full = 0; 2816 + list_splice_init(&barn->sheaves_empty, &empty_list); 2817 + barn->nr_empty = 0; 2818 + 2819 + spin_unlock_irqrestore(&barn->lock, flags); 2820 + 2821 + list_for_each_entry_safe(sheaf, sheaf2, &full_list, barn_list) { 2822 + sheaf_flush_unused(s, sheaf); 2823 + free_empty_sheaf(s, sheaf); 2824 + } 2825 + 2826 + list_for_each_entry_safe(sheaf, sheaf2, &empty_list, barn_list) 2827 + free_empty_sheaf(s, sheaf); 2828 + } 2829 + 2526 2830 /* 2527 2831 * Slab allocation and freeing 2528 2832 */ ··· 3748 3344 put_partials_cpu(s, c); 3749 3345 } 3750 3346 3751 - struct slub_flush_work { 3752 - struct work_struct work; 3753 - struct kmem_cache *s; 3754 - bool skip; 3755 - }; 3756 - 3757 - /* 3758 - * Flush cpu slab. 3759 - * 3760 - * Called from CPU work handler with migration disabled. 3761 - */ 3762 - static void flush_cpu_slab(struct work_struct *w) 3347 + static inline void flush_this_cpu_slab(struct kmem_cache *s) 3763 3348 { 3764 - struct kmem_cache *s; 3765 - struct kmem_cache_cpu *c; 3766 - struct slub_flush_work *sfw; 3767 - 3768 - sfw = container_of(w, struct slub_flush_work, work); 3769 - 3770 - s = sfw->s; 3771 - c = this_cpu_ptr(s->cpu_slab); 3349 + struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 3772 3350 3773 3351 if (c->slab) 3774 3352 flush_slab(s, c); ··· 3765 3379 return c->slab || slub_percpu_partial(c); 3766 3380 } 3767 3381 3768 - static DEFINE_MUTEX(flush_lock); 3769 - static DEFINE_PER_CPU(struct slub_flush_work, slub_flush); 3382 + #else /* CONFIG_SLUB_TINY */ 3383 + static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { } 3384 + static inline bool has_cpu_slab(int cpu, struct kmem_cache *s) { return false; } 3385 + static inline void flush_this_cpu_slab(struct kmem_cache *s) { } 3386 + #endif /* CONFIG_SLUB_TINY */ 3387 + 3388 + static bool has_pcs_used(int cpu, struct kmem_cache *s) 3389 + { 3390 + struct slub_percpu_sheaves *pcs; 3391 + 3392 + if (!s->cpu_sheaves) 3393 + return false; 3394 + 3395 + pcs = per_cpu_ptr(s->cpu_sheaves, cpu); 3396 + 3397 + return (pcs->spare || pcs->main->size); 3398 + } 3399 + 3400 + /* 3401 + * Flush cpu slab. 3402 + * 3403 + * Called from CPU work handler with migration disabled. 3404 + */ 3405 + static void flush_cpu_slab(struct work_struct *w) 3406 + { 3407 + struct kmem_cache *s; 3408 + struct slub_flush_work *sfw; 3409 + 3410 + sfw = container_of(w, struct slub_flush_work, work); 3411 + 3412 + s = sfw->s; 3413 + 3414 + if (s->cpu_sheaves) 3415 + pcs_flush_all(s); 3416 + 3417 + flush_this_cpu_slab(s); 3418 + } 3770 3419 3771 3420 static void flush_all_cpus_locked(struct kmem_cache *s) 3772 3421 { ··· 3813 3392 3814 3393 for_each_online_cpu(cpu) { 3815 3394 sfw = &per_cpu(slub_flush, cpu); 3816 - if (!has_cpu_slab(cpu, s)) { 3395 + if (!has_cpu_slab(cpu, s) && !has_pcs_used(cpu, s)) { 3817 3396 sfw->skip = true; 3818 3397 continue; 3819 3398 } ··· 3849 3428 struct kmem_cache *s; 3850 3429 3851 3430 mutex_lock(&slab_mutex); 3852 - list_for_each_entry(s, &slab_caches, list) 3431 + list_for_each_entry(s, &slab_caches, list) { 3853 3432 __flush_cpu_slab(s, cpu); 3433 + if (s->cpu_sheaves) 3434 + __pcs_flush_all_cpu(s, cpu); 3435 + } 3854 3436 mutex_unlock(&slab_mutex); 3855 3437 return 0; 3856 3438 } 3857 - 3858 - #else /* CONFIG_SLUB_TINY */ 3859 - static inline void flush_all_cpus_locked(struct kmem_cache *s) { } 3860 - static inline void flush_all(struct kmem_cache *s) { } 3861 - static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { } 3862 - static inline int slub_cpu_dead(unsigned int cpu) { return 0; } 3863 - #endif /* CONFIG_SLUB_TINY */ 3864 3439 3865 3440 /* 3866 3441 * Check if the objects in a per cpu structure fit numa ··· 4608 4191 } 4609 4192 4610 4193 /* 4194 + * Replace the empty main sheaf with a (at least partially) full sheaf. 4195 + * 4196 + * Must be called with the cpu_sheaves local lock locked. If successful, returns 4197 + * the pcs pointer and the local lock locked (possibly on a different cpu than 4198 + * initially called). If not successful, returns NULL and the local lock 4199 + * unlocked. 4200 + */ 4201 + static struct slub_percpu_sheaves * 4202 + __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, gfp_t gfp) 4203 + { 4204 + struct slab_sheaf *empty = NULL; 4205 + struct slab_sheaf *full; 4206 + struct node_barn *barn; 4207 + bool can_alloc; 4208 + 4209 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); 4210 + 4211 + if (pcs->spare && pcs->spare->size > 0) { 4212 + swap(pcs->main, pcs->spare); 4213 + return pcs; 4214 + } 4215 + 4216 + barn = get_barn(s); 4217 + 4218 + full = barn_replace_empty_sheaf(barn, pcs->main); 4219 + 4220 + if (full) { 4221 + stat(s, BARN_GET); 4222 + pcs->main = full; 4223 + return pcs; 4224 + } 4225 + 4226 + stat(s, BARN_GET_FAIL); 4227 + 4228 + can_alloc = gfpflags_allow_blocking(gfp); 4229 + 4230 + if (can_alloc) { 4231 + if (pcs->spare) { 4232 + empty = pcs->spare; 4233 + pcs->spare = NULL; 4234 + } else { 4235 + empty = barn_get_empty_sheaf(barn); 4236 + } 4237 + } 4238 + 4239 + local_unlock(&s->cpu_sheaves->lock); 4240 + 4241 + if (!can_alloc) 4242 + return NULL; 4243 + 4244 + if (empty) { 4245 + if (!refill_sheaf(s, empty, gfp)) { 4246 + full = empty; 4247 + } else { 4248 + /* 4249 + * we must be very low on memory so don't bother 4250 + * with the barn 4251 + */ 4252 + free_empty_sheaf(s, empty); 4253 + } 4254 + } else { 4255 + full = alloc_full_sheaf(s, gfp); 4256 + } 4257 + 4258 + if (!full) 4259 + return NULL; 4260 + 4261 + /* 4262 + * we can reach here only when gfpflags_allow_blocking 4263 + * so this must not be an irq 4264 + */ 4265 + local_lock(&s->cpu_sheaves->lock); 4266 + pcs = this_cpu_ptr(s->cpu_sheaves); 4267 + 4268 + /* 4269 + * If we are returning empty sheaf, we either got it from the 4270 + * barn or had to allocate one. If we are returning a full 4271 + * sheaf, it's due to racing or being migrated to a different 4272 + * cpu. Breaching the barn's sheaf limits should be thus rare 4273 + * enough so just ignore them to simplify the recovery. 4274 + */ 4275 + 4276 + if (pcs->main->size == 0) { 4277 + barn_put_empty_sheaf(barn, pcs->main); 4278 + pcs->main = full; 4279 + return pcs; 4280 + } 4281 + 4282 + if (!pcs->spare) { 4283 + pcs->spare = full; 4284 + return pcs; 4285 + } 4286 + 4287 + if (pcs->spare->size == 0) { 4288 + barn_put_empty_sheaf(barn, pcs->spare); 4289 + pcs->spare = full; 4290 + return pcs; 4291 + } 4292 + 4293 + barn_put_full_sheaf(barn, full); 4294 + stat(s, BARN_PUT); 4295 + 4296 + return pcs; 4297 + } 4298 + 4299 + static __fastpath_inline 4300 + void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp) 4301 + { 4302 + struct slub_percpu_sheaves *pcs; 4303 + void *object; 4304 + 4305 + #ifdef CONFIG_NUMA 4306 + if (static_branch_unlikely(&strict_numa)) { 4307 + if (current->mempolicy) 4308 + return NULL; 4309 + } 4310 + #endif 4311 + 4312 + if (!local_trylock(&s->cpu_sheaves->lock)) 4313 + return NULL; 4314 + 4315 + pcs = this_cpu_ptr(s->cpu_sheaves); 4316 + 4317 + if (unlikely(pcs->main->size == 0)) { 4318 + pcs = __pcs_replace_empty_main(s, pcs, gfp); 4319 + if (unlikely(!pcs)) 4320 + return NULL; 4321 + } 4322 + 4323 + object = pcs->main->objects[--pcs->main->size]; 4324 + 4325 + local_unlock(&s->cpu_sheaves->lock); 4326 + 4327 + stat(s, ALLOC_PCS); 4328 + 4329 + return object; 4330 + } 4331 + 4332 + static __fastpath_inline 4333 + unsigned int alloc_from_pcs_bulk(struct kmem_cache *s, size_t size, void **p) 4334 + { 4335 + struct slub_percpu_sheaves *pcs; 4336 + struct slab_sheaf *main; 4337 + unsigned int allocated = 0; 4338 + unsigned int batch; 4339 + 4340 + next_batch: 4341 + if (!local_trylock(&s->cpu_sheaves->lock)) 4342 + return allocated; 4343 + 4344 + pcs = this_cpu_ptr(s->cpu_sheaves); 4345 + 4346 + if (unlikely(pcs->main->size == 0)) { 4347 + 4348 + struct slab_sheaf *full; 4349 + 4350 + if (pcs->spare && pcs->spare->size > 0) { 4351 + swap(pcs->main, pcs->spare); 4352 + goto do_alloc; 4353 + } 4354 + 4355 + full = barn_replace_empty_sheaf(get_barn(s), pcs->main); 4356 + 4357 + if (full) { 4358 + stat(s, BARN_GET); 4359 + pcs->main = full; 4360 + goto do_alloc; 4361 + } 4362 + 4363 + stat(s, BARN_GET_FAIL); 4364 + 4365 + local_unlock(&s->cpu_sheaves->lock); 4366 + 4367 + /* 4368 + * Once full sheaves in barn are depleted, let the bulk 4369 + * allocation continue from slab pages, otherwise we would just 4370 + * be copying arrays of pointers twice. 4371 + */ 4372 + return allocated; 4373 + } 4374 + 4375 + do_alloc: 4376 + 4377 + main = pcs->main; 4378 + batch = min(size, main->size); 4379 + 4380 + main->size -= batch; 4381 + memcpy(p, main->objects + main->size, batch * sizeof(void *)); 4382 + 4383 + local_unlock(&s->cpu_sheaves->lock); 4384 + 4385 + stat_add(s, ALLOC_PCS, batch); 4386 + 4387 + allocated += batch; 4388 + 4389 + if (batch < size) { 4390 + p += batch; 4391 + size -= batch; 4392 + goto next_batch; 4393 + } 4394 + 4395 + return allocated; 4396 + } 4397 + 4398 + 4399 + /* 4611 4400 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) 4612 4401 * have the fastpath folded into their functions. So no function call 4613 4402 * overhead for requests that can be satisfied on the fastpath. ··· 4837 4214 if (unlikely(object)) 4838 4215 goto out; 4839 4216 4840 - object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); 4217 + if (s->cpu_sheaves && node == NUMA_NO_NODE) 4218 + object = alloc_from_pcs(s, gfpflags); 4219 + 4220 + if (!object) 4221 + object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); 4841 4222 4842 4223 maybe_wipe_obj_freeptr(s, object); 4843 4224 init = slab_want_init_on_alloc(gfpflags, s); ··· 5218 4591 discard_slab(s, slab); 5219 4592 } 5220 4593 4594 + /* 4595 + * pcs is locked. We should have get rid of the spare sheaf and obtained an 4596 + * empty sheaf, while the main sheaf is full. We want to install the empty sheaf 4597 + * as a main sheaf, and make the current main sheaf a spare sheaf. 4598 + * 4599 + * However due to having relinquished the cpu_sheaves lock when obtaining 4600 + * the empty sheaf, we need to handle some unlikely but possible cases. 4601 + * 4602 + * If we put any sheaf to barn here, it's because we were interrupted or have 4603 + * been migrated to a different cpu, which should be rare enough so just ignore 4604 + * the barn's limits to simplify the handling. 4605 + * 4606 + * An alternative scenario that gets us here is when we fail 4607 + * barn_replace_full_sheaf(), because there's no empty sheaf available in the 4608 + * barn, so we had to allocate it by alloc_empty_sheaf(). But because we saw the 4609 + * limit on full sheaves was not exceeded, we assume it didn't change and just 4610 + * put the full sheaf there. 4611 + */ 4612 + static void __pcs_install_empty_sheaf(struct kmem_cache *s, 4613 + struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty) 4614 + { 4615 + struct node_barn *barn; 4616 + 4617 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); 4618 + 4619 + /* This is what we expect to find if nobody interrupted us. */ 4620 + if (likely(!pcs->spare)) { 4621 + pcs->spare = pcs->main; 4622 + pcs->main = empty; 4623 + return; 4624 + } 4625 + 4626 + barn = get_barn(s); 4627 + 4628 + /* 4629 + * Unlikely because if the main sheaf had space, we would have just 4630 + * freed to it. Get rid of our empty sheaf. 4631 + */ 4632 + if (pcs->main->size < s->sheaf_capacity) { 4633 + barn_put_empty_sheaf(barn, empty); 4634 + return; 4635 + } 4636 + 4637 + /* Also unlikely for the same reason */ 4638 + if (pcs->spare->size < s->sheaf_capacity) { 4639 + swap(pcs->main, pcs->spare); 4640 + barn_put_empty_sheaf(barn, empty); 4641 + return; 4642 + } 4643 + 4644 + /* 4645 + * We probably failed barn_replace_full_sheaf() due to no empty sheaf 4646 + * available there, but we allocated one, so finish the job. 4647 + */ 4648 + barn_put_full_sheaf(barn, pcs->main); 4649 + stat(s, BARN_PUT); 4650 + pcs->main = empty; 4651 + } 4652 + 4653 + /* 4654 + * Replace the full main sheaf with a (at least partially) empty sheaf. 4655 + * 4656 + * Must be called with the cpu_sheaves local lock locked. If successful, returns 4657 + * the pcs pointer and the local lock locked (possibly on a different cpu than 4658 + * initially called). If not successful, returns NULL and the local lock 4659 + * unlocked. 4660 + */ 4661 + static struct slub_percpu_sheaves * 4662 + __pcs_replace_full_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs) 4663 + { 4664 + struct slab_sheaf *empty; 4665 + struct node_barn *barn; 4666 + bool put_fail; 4667 + 4668 + restart: 4669 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); 4670 + 4671 + barn = get_barn(s); 4672 + put_fail = false; 4673 + 4674 + if (!pcs->spare) { 4675 + empty = barn_get_empty_sheaf(barn); 4676 + if (empty) { 4677 + pcs->spare = pcs->main; 4678 + pcs->main = empty; 4679 + return pcs; 4680 + } 4681 + goto alloc_empty; 4682 + } 4683 + 4684 + if (pcs->spare->size < s->sheaf_capacity) { 4685 + swap(pcs->main, pcs->spare); 4686 + return pcs; 4687 + } 4688 + 4689 + empty = barn_replace_full_sheaf(barn, pcs->main); 4690 + 4691 + if (!IS_ERR(empty)) { 4692 + stat(s, BARN_PUT); 4693 + pcs->main = empty; 4694 + return pcs; 4695 + } 4696 + 4697 + if (PTR_ERR(empty) == -E2BIG) { 4698 + /* Since we got here, spare exists and is full */ 4699 + struct slab_sheaf *to_flush = pcs->spare; 4700 + 4701 + stat(s, BARN_PUT_FAIL); 4702 + 4703 + pcs->spare = NULL; 4704 + local_unlock(&s->cpu_sheaves->lock); 4705 + 4706 + sheaf_flush_unused(s, to_flush); 4707 + empty = to_flush; 4708 + goto got_empty; 4709 + } 4710 + 4711 + /* 4712 + * We could not replace full sheaf because barn had no empty 4713 + * sheaves. We can still allocate it and put the full sheaf in 4714 + * __pcs_install_empty_sheaf(), but if we fail to allocate it, 4715 + * make sure to count the fail. 4716 + */ 4717 + put_fail = true; 4718 + 4719 + alloc_empty: 4720 + local_unlock(&s->cpu_sheaves->lock); 4721 + 4722 + empty = alloc_empty_sheaf(s, GFP_NOWAIT); 4723 + if (empty) 4724 + goto got_empty; 4725 + 4726 + if (put_fail) 4727 + stat(s, BARN_PUT_FAIL); 4728 + 4729 + if (!sheaf_flush_main(s)) 4730 + return NULL; 4731 + 4732 + if (!local_trylock(&s->cpu_sheaves->lock)) 4733 + return NULL; 4734 + 4735 + pcs = this_cpu_ptr(s->cpu_sheaves); 4736 + 4737 + /* 4738 + * we flushed the main sheaf so it should be empty now, 4739 + * but in case we got preempted or migrated, we need to 4740 + * check again 4741 + */ 4742 + if (pcs->main->size == s->sheaf_capacity) 4743 + goto restart; 4744 + 4745 + return pcs; 4746 + 4747 + got_empty: 4748 + if (!local_trylock(&s->cpu_sheaves->lock)) { 4749 + barn_put_empty_sheaf(barn, empty); 4750 + return NULL; 4751 + } 4752 + 4753 + pcs = this_cpu_ptr(s->cpu_sheaves); 4754 + __pcs_install_empty_sheaf(s, pcs, empty); 4755 + 4756 + return pcs; 4757 + } 4758 + 4759 + /* 4760 + * Free an object to the percpu sheaves. 4761 + * The object is expected to have passed slab_free_hook() already. 4762 + */ 4763 + static __fastpath_inline 4764 + bool free_to_pcs(struct kmem_cache *s, void *object) 4765 + { 4766 + struct slub_percpu_sheaves *pcs; 4767 + 4768 + if (!local_trylock(&s->cpu_sheaves->lock)) 4769 + return false; 4770 + 4771 + pcs = this_cpu_ptr(s->cpu_sheaves); 4772 + 4773 + if (unlikely(pcs->main->size == s->sheaf_capacity)) { 4774 + 4775 + pcs = __pcs_replace_full_main(s, pcs); 4776 + if (unlikely(!pcs)) 4777 + return false; 4778 + } 4779 + 4780 + pcs->main->objects[pcs->main->size++] = object; 4781 + 4782 + local_unlock(&s->cpu_sheaves->lock); 4783 + 4784 + stat(s, FREE_PCS); 4785 + 4786 + return true; 4787 + } 4788 + 4789 + /* 4790 + * Bulk free objects to the percpu sheaves. 4791 + * Unlike free_to_pcs() this includes the calls to all necessary hooks 4792 + * and the fallback to freeing to slab pages. 4793 + */ 4794 + static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) 4795 + { 4796 + struct slub_percpu_sheaves *pcs; 4797 + struct slab_sheaf *main, *empty; 4798 + bool init = slab_want_init_on_free(s); 4799 + unsigned int batch, i = 0; 4800 + struct node_barn *barn; 4801 + 4802 + while (i < size) { 4803 + struct slab *slab = virt_to_slab(p[i]); 4804 + 4805 + memcg_slab_free_hook(s, slab, p + i, 1); 4806 + alloc_tagging_slab_free_hook(s, slab, p + i, 1); 4807 + 4808 + if (unlikely(!slab_free_hook(s, p[i], init, false))) { 4809 + p[i] = p[--size]; 4810 + if (!size) 4811 + return; 4812 + continue; 4813 + } 4814 + 4815 + i++; 4816 + } 4817 + 4818 + next_batch: 4819 + if (!local_trylock(&s->cpu_sheaves->lock)) 4820 + goto fallback; 4821 + 4822 + pcs = this_cpu_ptr(s->cpu_sheaves); 4823 + 4824 + if (likely(pcs->main->size < s->sheaf_capacity)) 4825 + goto do_free; 4826 + 4827 + barn = get_barn(s); 4828 + 4829 + if (!pcs->spare) { 4830 + empty = barn_get_empty_sheaf(barn); 4831 + if (!empty) 4832 + goto no_empty; 4833 + 4834 + pcs->spare = pcs->main; 4835 + pcs->main = empty; 4836 + goto do_free; 4837 + } 4838 + 4839 + if (pcs->spare->size < s->sheaf_capacity) { 4840 + swap(pcs->main, pcs->spare); 4841 + goto do_free; 4842 + } 4843 + 4844 + empty = barn_replace_full_sheaf(barn, pcs->main); 4845 + if (IS_ERR(empty)) { 4846 + stat(s, BARN_PUT_FAIL); 4847 + goto no_empty; 4848 + } 4849 + 4850 + stat(s, BARN_PUT); 4851 + pcs->main = empty; 4852 + 4853 + do_free: 4854 + main = pcs->main; 4855 + batch = min(size, s->sheaf_capacity - main->size); 4856 + 4857 + memcpy(main->objects + main->size, p, batch * sizeof(void *)); 4858 + main->size += batch; 4859 + 4860 + local_unlock(&s->cpu_sheaves->lock); 4861 + 4862 + stat_add(s, FREE_PCS, batch); 4863 + 4864 + if (batch < size) { 4865 + p += batch; 4866 + size -= batch; 4867 + goto next_batch; 4868 + } 4869 + 4870 + return; 4871 + 4872 + no_empty: 4873 + local_unlock(&s->cpu_sheaves->lock); 4874 + 4875 + /* 4876 + * if we depleted all empty sheaves in the barn or there are too 4877 + * many full sheaves, free the rest to slab pages 4878 + */ 4879 + fallback: 4880 + __kmem_cache_free_bulk(s, size, p); 4881 + } 4882 + 5221 4883 #ifndef CONFIG_SLUB_TINY 5222 4884 /* 5223 4885 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that ··· 5593 4677 memcg_slab_free_hook(s, slab, &object, 1); 5594 4678 alloc_tagging_slab_free_hook(s, slab, &object, 1); 5595 4679 5596 - if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false))) 4680 + if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false))) 4681 + return; 4682 + 4683 + if (!s->cpu_sheaves || !free_to_pcs(s, object)) 5597 4684 do_slab_free(s, slab, object, object, 1, addr); 5598 4685 } 5599 4686 ··· 6192 5273 if (!size) 6193 5274 return; 6194 5275 5276 + /* 5277 + * freeing to sheaves is so incompatible with the detached freelist so 5278 + * once we go that way, we have to do everything differently 5279 + */ 5280 + if (s && s->cpu_sheaves) { 5281 + free_to_pcs_bulk(s, size, p); 5282 + return; 5283 + } 5284 + 6195 5285 do { 6196 5286 struct detached_freelist df; 6197 5287 ··· 6319 5391 int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, 6320 5392 void **p) 6321 5393 { 6322 - int i; 5394 + unsigned int i = 0; 6323 5395 6324 5396 if (!size) 6325 5397 return 0; ··· 6328 5400 if (unlikely(!s)) 6329 5401 return 0; 6330 5402 6331 - i = __kmem_cache_alloc_bulk(s, flags, size, p); 6332 - if (unlikely(i == 0)) 6333 - return 0; 5403 + if (s->cpu_sheaves) 5404 + i = alloc_from_pcs_bulk(s, size, p); 5405 + 5406 + if (i < size) { 5407 + /* 5408 + * If we ran out of memory, don't bother with freeing back to 5409 + * the percpu sheaves, we have bigger problems. 5410 + */ 5411 + if (unlikely(__kmem_cache_alloc_bulk(s, flags, size - i, p + i) == 0)) { 5412 + if (i > 0) 5413 + __kmem_cache_free_bulk(s, i, p); 5414 + return 0; 5415 + } 5416 + } 6334 5417 6335 5418 /* 6336 5419 * memcg and kmem_cache debug support and memory initialization. ··· 6351 5412 slab_want_init_on_alloc(flags, s), s->object_size))) { 6352 5413 return 0; 6353 5414 } 6354 - return i; 5415 + 5416 + return size; 6355 5417 } 6356 5418 EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof); 6357 - 6358 5419 6359 5420 /* 6360 5421 * Object placement in a slab is made very easy because we always start at ··· 6489 5550 } 6490 5551 6491 5552 static void 6492 - init_kmem_cache_node(struct kmem_cache_node *n) 5553 + init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn) 6493 5554 { 6494 5555 n->nr_partial = 0; 6495 5556 spin_lock_init(&n->list_lock); ··· 6499 5560 atomic_long_set(&n->total_objects, 0); 6500 5561 INIT_LIST_HEAD(&n->full); 6501 5562 #endif 5563 + n->barn = barn; 5564 + if (barn) 5565 + barn_init(barn); 6502 5566 } 6503 5567 6504 5568 #ifndef CONFIG_SLUB_TINY ··· 6531 5589 return 1; 6532 5590 } 6533 5591 #endif /* CONFIG_SLUB_TINY */ 5592 + 5593 + static int init_percpu_sheaves(struct kmem_cache *s) 5594 + { 5595 + int cpu; 5596 + 5597 + for_each_possible_cpu(cpu) { 5598 + struct slub_percpu_sheaves *pcs; 5599 + 5600 + pcs = per_cpu_ptr(s->cpu_sheaves, cpu); 5601 + 5602 + local_trylock_init(&pcs->lock); 5603 + 5604 + pcs->main = alloc_empty_sheaf(s, GFP_KERNEL); 5605 + 5606 + if (!pcs->main) 5607 + return -ENOMEM; 5608 + } 5609 + 5610 + return 0; 5611 + } 6534 5612 6535 5613 static struct kmem_cache *kmem_cache_node; 6536 5614 ··· 6587 5625 slab->freelist = get_freepointer(kmem_cache_node, n); 6588 5626 slab->inuse = 1; 6589 5627 kmem_cache_node->node[node] = n; 6590 - init_kmem_cache_node(n); 5628 + init_kmem_cache_node(n, NULL); 6591 5629 inc_slabs_node(kmem_cache_node, node, slab->objects); 6592 5630 6593 5631 /* ··· 6603 5641 struct kmem_cache_node *n; 6604 5642 6605 5643 for_each_kmem_cache_node(s, node, n) { 5644 + if (n->barn) { 5645 + WARN_ON(n->barn->nr_full); 5646 + WARN_ON(n->barn->nr_empty); 5647 + kfree(n->barn); 5648 + n->barn = NULL; 5649 + } 5650 + 6606 5651 s->node[node] = NULL; 6607 5652 kmem_cache_free(kmem_cache_node, n); 6608 5653 } ··· 6618 5649 void __kmem_cache_release(struct kmem_cache *s) 6619 5650 { 6620 5651 cache_random_seq_destroy(s); 5652 + if (s->cpu_sheaves) 5653 + pcs_destroy(s); 6621 5654 #ifndef CONFIG_SLUB_TINY 6622 5655 free_percpu(s->cpu_slab); 6623 5656 #endif ··· 6632 5661 6633 5662 for_each_node_mask(node, slab_nodes) { 6634 5663 struct kmem_cache_node *n; 5664 + struct node_barn *barn = NULL; 6635 5665 6636 5666 if (slab_state == DOWN) { 6637 5667 early_kmem_cache_node_alloc(node); 6638 5668 continue; 6639 5669 } 5670 + 5671 + if (s->cpu_sheaves) { 5672 + barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node); 5673 + 5674 + if (!barn) 5675 + return 0; 5676 + } 5677 + 6640 5678 n = kmem_cache_alloc_node(kmem_cache_node, 6641 5679 GFP_KERNEL, node); 6642 - 6643 - if (!n) 5680 + if (!n) { 5681 + kfree(barn); 6644 5682 return 0; 5683 + } 6645 5684 6646 - init_kmem_cache_node(n); 5685 + init_kmem_cache_node(n, barn); 5686 + 6647 5687 s->node[node] = n; 6648 5688 } 6649 5689 return 1; ··· 6911 5929 flush_all_cpus_locked(s); 6912 5930 /* Attempt to free all objects */ 6913 5931 for_each_kmem_cache_node(s, node, n) { 5932 + if (n->barn) 5933 + barn_shrink(s, n->barn); 6914 5934 free_partial(s, n); 6915 5935 if (n->nr_partial || node_nr_slabs(n)) 6916 5936 return 1; ··· 7116 6132 for (i = 0; i < SHRINK_PROMOTE_MAX; i++) 7117 6133 INIT_LIST_HEAD(promote + i); 7118 6134 6135 + if (n->barn) 6136 + barn_shrink(s, n->barn); 6137 + 7119 6138 spin_lock_irqsave(&n->list_lock, flags); 7120 6139 7121 6140 /* ··· 7198 6211 */ 7199 6212 mutex_lock(&slab_mutex); 7200 6213 list_for_each_entry(s, &slab_caches, list) { 6214 + struct node_barn *barn = NULL; 6215 + 7201 6216 /* 7202 6217 * The structure may already exist if the node was previously 7203 6218 * onlined and offlined. 7204 6219 */ 7205 6220 if (get_node(s, nid)) 7206 6221 continue; 6222 + 6223 + if (s->cpu_sheaves) { 6224 + barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid); 6225 + 6226 + if (!barn) { 6227 + ret = -ENOMEM; 6228 + goto out; 6229 + } 6230 + } 6231 + 7207 6232 /* 7208 6233 * XXX: kmem_cache_alloc_node will fallback to other nodes 7209 6234 * since memory is not yet available from the node that ··· 7223 6224 */ 7224 6225 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL); 7225 6226 if (!n) { 6227 + kfree(barn); 7226 6228 ret = -ENOMEM; 7227 6229 goto out; 7228 6230 } 7229 - init_kmem_cache_node(n); 6231 + 6232 + init_kmem_cache_node(n, barn); 6233 + 7230 6234 s->node[nid] = n; 7231 6235 } 7232 6236 /* ··· 7442 6440 7443 6441 set_cpu_partial(s); 7444 6442 6443 + if (args->sheaf_capacity && !IS_ENABLED(CONFIG_SLUB_TINY) 6444 + && !(s->flags & SLAB_DEBUG_FLAGS)) { 6445 + s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves); 6446 + if (!s->cpu_sheaves) { 6447 + err = -ENOMEM; 6448 + goto out; 6449 + } 6450 + // TODO: increase capacity to grow slab_sheaf up to next kmalloc size? 6451 + s->sheaf_capacity = args->sheaf_capacity; 6452 + } 6453 + 7445 6454 #ifdef CONFIG_NUMA 7446 6455 s->remote_node_defrag_ratio = 1000; 7447 6456 #endif ··· 7468 6455 7469 6456 if (!alloc_kmem_cache_cpus(s)) 7470 6457 goto out; 6458 + 6459 + if (s->cpu_sheaves) { 6460 + err = init_percpu_sheaves(s); 6461 + if (err) 6462 + goto out; 6463 + } 7471 6464 7472 6465 err = 0; 7473 6466 ··· 7927 6908 } 7928 6909 SLAB_ATTR_RO(order); 7929 6910 6911 + static ssize_t sheaf_capacity_show(struct kmem_cache *s, char *buf) 6912 + { 6913 + return sysfs_emit(buf, "%u\n", s->sheaf_capacity); 6914 + } 6915 + SLAB_ATTR_RO(sheaf_capacity); 6916 + 7930 6917 static ssize_t min_partial_show(struct kmem_cache *s, char *buf) 7931 6918 { 7932 6919 return sysfs_emit(buf, "%lu\n", s->min_partial); ··· 8280 7255 } \ 8281 7256 SLAB_ATTR(text); \ 8282 7257 7258 + STAT_ATTR(ALLOC_PCS, alloc_cpu_sheaf); 8283 7259 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); 8284 7260 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); 7261 + STAT_ATTR(FREE_PCS, free_cpu_sheaf); 8285 7262 STAT_ATTR(FREE_FASTPATH, free_fastpath); 8286 7263 STAT_ATTR(FREE_SLOWPATH, free_slowpath); 8287 7264 STAT_ATTR(FREE_FROZEN, free_frozen); ··· 8308 7281 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); 8309 7282 STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node); 8310 7283 STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain); 7284 + STAT_ATTR(SHEAF_FLUSH, sheaf_flush); 7285 + STAT_ATTR(SHEAF_REFILL, sheaf_refill); 7286 + STAT_ATTR(SHEAF_ALLOC, sheaf_alloc); 7287 + STAT_ATTR(SHEAF_FREE, sheaf_free); 7288 + STAT_ATTR(BARN_GET, barn_get); 7289 + STAT_ATTR(BARN_GET_FAIL, barn_get_fail); 7290 + STAT_ATTR(BARN_PUT, barn_put); 7291 + STAT_ATTR(BARN_PUT_FAIL, barn_put_fail); 8311 7292 #endif /* CONFIG_SLUB_STATS */ 8312 7293 8313 7294 #ifdef CONFIG_KFENCE ··· 8346 7311 &object_size_attr.attr, 8347 7312 &objs_per_slab_attr.attr, 8348 7313 &order_attr.attr, 7314 + &sheaf_capacity_attr.attr, 8349 7315 &min_partial_attr.attr, 8350 7316 &cpu_partial_attr.attr, 8351 7317 &objects_partial_attr.attr, ··· 8378 7342 &remote_node_defrag_ratio_attr.attr, 8379 7343 #endif 8380 7344 #ifdef CONFIG_SLUB_STATS 7345 + &alloc_cpu_sheaf_attr.attr, 8381 7346 &alloc_fastpath_attr.attr, 8382 7347 &alloc_slowpath_attr.attr, 7348 + &free_cpu_sheaf_attr.attr, 8383 7349 &free_fastpath_attr.attr, 8384 7350 &free_slowpath_attr.attr, 8385 7351 &free_frozen_attr.attr, ··· 8406 7368 &cpu_partial_free_attr.attr, 8407 7369 &cpu_partial_node_attr.attr, 8408 7370 &cpu_partial_drain_attr.attr, 7371 + &sheaf_flush_attr.attr, 7372 + &sheaf_refill_attr.attr, 7373 + &sheaf_alloc_attr.attr, 7374 + &sheaf_free_attr.attr, 7375 + &barn_get_attr.attr, 7376 + &barn_get_fail_attr.attr, 7377 + &barn_put_attr.attr, 7378 + &barn_put_fail_attr.attr, 8409 7379 #endif 8410 7380 #ifdef CONFIG_FAILSLAB 8411 7381 &failslab_attr.attr,

Configure Feed

Configure Feed