Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

slab: remove cpu (partial) slabs usage from allocation paths

We now rely on sheaves as the percpu caching layer and can refill them
directly from partial or newly allocated slabs. Start removing the cpu
(partial) slabs code, first from allocation paths.

This means that any allocation not satisfied from percpu sheaves will
end up in ___slab_alloc(), where we remove the usage of cpu (partial)
slabs, so it will only perform get_partial() or new_slab(). In the
latter case we reuse alloc_from_new_slab() (when we don't use
the debug/tiny alloc_single_from_new_slab() variant).

In get_partial_node() we used to return a slab for freezing as the cpu
slab and to refill the partial slab. Now we only want to return a single
object and leave the slab on the list (unless it became full). We can't
simply reuse alloc_single_from_partial() as that assumes freeing uses
free_to_partial_list(). Instead we need to use __slab_update_freelist()
to work properly against a racing __slab_free().

To reflect the new purpose of get_partial() functions, rename them to
get_from_partial(), get_from_partial_node(), and get_from_any_partial().

The rest of the changes is removing functions that no longer have any
callers.

Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Hao Li <hao.li@linux.dev>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

+87 -541
+87 -541
mm/slub.c
··· 241 241 static DEFINE_STATIC_KEY_FALSE(strict_numa); 242 242 #endif 243 243 244 - /* Structure holding parameters for get_partial() call chain */ 244 + /* Structure holding parameters for get_from_partial() call chain */ 245 245 struct partial_context { 246 246 gfp_t flags; 247 247 unsigned int orig_size; 248 - void *object; 249 248 }; 250 249 251 250 /* Structure holding parameters for get_partial_node_bulk() */ ··· 603 604 return freelist_ptr_decode(s, p, ptr_addr); 604 605 } 605 606 606 - static void prefetch_freepointer(const struct kmem_cache *s, void *object) 607 - { 608 - prefetchw(object + s->offset); 609 - } 610 - 611 - /* 612 - * When running under KMSAN, get_freepointer_safe() may return an uninitialized 613 - * pointer value in the case the current thread loses the race for the next 614 - * memory chunk in the freelist. In that case this_cpu_cmpxchg_double() in 615 - * slab_alloc_node() will fail, so the uninitialized value won't be used, but 616 - * KMSAN will still check all arguments of cmpxchg because of imperfect 617 - * handling of inline assembly. 618 - * To work around this problem, we apply __no_kmsan_checks to ensure that 619 - * get_freepointer_safe() returns initialized memory. 620 - */ 621 - __no_kmsan_checks 622 - static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) 623 - { 624 - unsigned long freepointer_addr; 625 - freeptr_t p; 626 - 627 - if (!debug_pagealloc_enabled_static()) 628 - return get_freepointer(s, object); 629 - 630 - object = kasan_reset_tag(object); 631 - freepointer_addr = (unsigned long)object + s->offset; 632 - copy_from_kernel_nofault(&p, (freeptr_t *)freepointer_addr, sizeof(p)); 633 - return freelist_ptr_decode(s, p, freepointer_addr); 634 - } 635 - 636 607 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) 637 608 { 638 609 unsigned long freeptr_addr = (unsigned long)object + s->offset; ··· 682 713 nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo)); 683 714 s->cpu_partial_slabs = nr_slabs; 684 715 } 685 - 686 - static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s) 687 - { 688 - return s->cpu_partial_slabs; 689 - } 690 - #else 691 - #ifdef SLAB_SUPPORTS_SYSFS 716 + #elif defined(SLAB_SUPPORTS_SYSFS) 692 717 static inline void 693 718 slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) 694 719 { 695 - } 696 - #endif 697 - 698 - static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s) 699 - { 700 - return 0; 701 720 } 702 721 #endif /* CONFIG_SLUB_CPU_PARTIAL */ 703 722 ··· 1028 1071 p->handle = handle; 1029 1072 #endif 1030 1073 p->addr = addr; 1031 - p->cpu = smp_processor_id(); 1074 + p->cpu = raw_smp_processor_id(); 1032 1075 p->pid = current->pid; 1033 1076 p->when = jiffies; 1034 1077 } ··· 3536 3579 } 3537 3580 3538 3581 /* 3539 - * Try to allocate a partial slab from a specific node. 3582 + * Try to allocate object from a partial slab on a specific node. 3540 3583 */ 3541 - static struct slab *get_partial_node(struct kmem_cache *s, 3542 - struct kmem_cache_node *n, 3543 - struct partial_context *pc) 3584 + static void *get_from_partial_node(struct kmem_cache *s, 3585 + struct kmem_cache_node *n, 3586 + struct partial_context *pc) 3544 3587 { 3545 - struct slab *slab, *slab2, *partial = NULL; 3588 + struct slab *slab, *slab2; 3546 3589 unsigned long flags; 3547 - unsigned int partial_slabs = 0; 3590 + void *object = NULL; 3548 3591 3549 3592 /* 3550 3593 * Racy check. If we mistakenly see no partial slabs then we 3551 3594 * just allocate an empty slab. If we mistakenly try to get a 3552 - * partial slab and there is none available then get_partial() 3595 + * partial slab and there is none available then get_from_partial() 3553 3596 * will return NULL. 3554 3597 */ 3555 3598 if (!n || !n->nr_partial) ··· 3560 3603 else if (!spin_trylock_irqsave(&n->list_lock, flags)) 3561 3604 return NULL; 3562 3605 list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) { 3606 + 3607 + struct freelist_counters old, new; 3608 + 3563 3609 if (!pfmemalloc_match(slab, pc->flags)) 3564 3610 continue; 3565 3611 3566 3612 if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { 3567 - void *object = alloc_single_from_partial(s, n, slab, 3613 + object = alloc_single_from_partial(s, n, slab, 3568 3614 pc->orig_size); 3569 - if (object) { 3570 - partial = slab; 3571 - pc->object = object; 3615 + if (object) 3572 3616 break; 3573 - } 3574 3617 continue; 3575 3618 } 3576 3619 3577 - remove_partial(n, slab); 3620 + /* 3621 + * get a single object from the slab. This might race against 3622 + * __slab_free(), which however has to take the list_lock if 3623 + * it's about to make the slab fully free. 3624 + */ 3625 + do { 3626 + old.freelist = slab->freelist; 3627 + old.counters = slab->counters; 3578 3628 3579 - if (!partial) { 3580 - partial = slab; 3581 - stat(s, ALLOC_FROM_PARTIAL); 3629 + new.freelist = get_freepointer(s, old.freelist); 3630 + new.counters = old.counters; 3631 + new.inuse++; 3582 3632 3583 - if ((slub_get_cpu_partial(s) == 0)) { 3584 - break; 3585 - } 3586 - } else { 3587 - put_cpu_partial(s, slab, 0); 3588 - stat(s, CPU_PARTIAL_NODE); 3633 + } while (!__slab_update_freelist(s, slab, &old, &new, "get_from_partial_node")); 3589 3634 3590 - if (++partial_slabs > slub_get_cpu_partial(s) / 2) { 3591 - break; 3592 - } 3593 - } 3635 + object = old.freelist; 3636 + if (!new.freelist) 3637 + remove_partial(n, slab); 3638 + 3639 + break; 3594 3640 } 3595 3641 spin_unlock_irqrestore(&n->list_lock, flags); 3596 - return partial; 3642 + return object; 3597 3643 } 3598 3644 3599 3645 /* 3600 - * Get a slab from somewhere. Search in increasing NUMA distances. 3646 + * Get an object from somewhere. Search in increasing NUMA distances. 3601 3647 */ 3602 - static struct slab *get_any_partial(struct kmem_cache *s, 3603 - struct partial_context *pc) 3648 + static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *pc) 3604 3649 { 3605 3650 #ifdef CONFIG_NUMA 3606 3651 struct zonelist *zonelist; 3607 3652 struct zoneref *z; 3608 3653 struct zone *zone; 3609 3654 enum zone_type highest_zoneidx = gfp_zone(pc->flags); 3610 - struct slab *slab; 3611 3655 unsigned int cpuset_mems_cookie; 3612 3656 3613 3657 /* ··· 3643 3685 3644 3686 if (n && cpuset_zone_allowed(zone, pc->flags) && 3645 3687 n->nr_partial > s->min_partial) { 3646 - slab = get_partial_node(s, n, pc); 3647 - if (slab) { 3688 + 3689 + void *object = get_from_partial_node(s, n, pc); 3690 + 3691 + if (object) { 3648 3692 /* 3649 3693 * Don't check read_mems_allowed_retry() 3650 3694 * here - if mems_allowed was updated in ··· 3654 3694 * between allocation and the cpuset 3655 3695 * update 3656 3696 */ 3657 - return slab; 3697 + return object; 3658 3698 } 3659 3699 } 3660 3700 } ··· 3664 3704 } 3665 3705 3666 3706 /* 3667 - * Get a partial slab, lock it and return it. 3707 + * Get an object from a partial slab 3668 3708 */ 3669 - static struct slab *get_partial(struct kmem_cache *s, int node, 3670 - struct partial_context *pc) 3709 + static void *get_from_partial(struct kmem_cache *s, int node, 3710 + struct partial_context *pc) 3671 3711 { 3672 - struct slab *slab; 3673 3712 int searchnode = node; 3713 + void *object; 3674 3714 3675 3715 if (node == NUMA_NO_NODE) 3676 3716 searchnode = numa_mem_id(); 3677 3717 3678 - slab = get_partial_node(s, get_node(s, searchnode), pc); 3679 - if (slab || (node != NUMA_NO_NODE && (pc->flags & __GFP_THISNODE))) 3680 - return slab; 3718 + object = get_from_partial_node(s, get_node(s, searchnode), pc); 3719 + if (object || (node != NUMA_NO_NODE && (pc->flags & __GFP_THISNODE))) 3720 + return object; 3681 3721 3682 - return get_any_partial(s, pc); 3722 + return get_from_any_partial(s, pc); 3683 3723 } 3684 3724 3685 3725 #ifdef CONFIG_PREEMPTION ··· 4237 4277 return 0; 4238 4278 } 4239 4279 4240 - /* 4241 - * Check if the objects in a per cpu structure fit numa 4242 - * locality expectations. 4243 - */ 4244 - static inline int node_match(struct slab *slab, int node) 4245 - { 4246 - #ifdef CONFIG_NUMA 4247 - if (node != NUMA_NO_NODE && slab_nid(slab) != node) 4248 - return 0; 4249 - #endif 4250 - return 1; 4251 - } 4252 - 4253 4280 #ifdef CONFIG_SLUB_DEBUG 4254 4281 static int count_free(struct slab *slab) 4255 4282 { ··· 4422 4475 } 4423 4476 4424 4477 /* 4425 - * Check the slab->freelist and either transfer the freelist to the 4426 - * per cpu freelist or deactivate the slab. 4427 - * 4428 - * The slab is still frozen if the return value is not NULL. 4429 - * 4430 - * If this function returns NULL then the slab has been unfrozen. 4431 - */ 4432 - static inline void *get_freelist(struct kmem_cache *s, struct slab *slab) 4433 - { 4434 - struct freelist_counters old, new; 4435 - 4436 - lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock)); 4437 - 4438 - do { 4439 - old.freelist = slab->freelist; 4440 - old.counters = slab->counters; 4441 - 4442 - new.freelist = NULL; 4443 - new.counters = old.counters; 4444 - 4445 - new.inuse = old.objects; 4446 - new.frozen = old.freelist != NULL; 4447 - 4448 - 4449 - } while (!__slab_update_freelist(s, slab, &old, &new, "get_freelist")); 4450 - 4451 - return old.freelist; 4452 - } 4453 - 4454 - /* 4455 4478 * Get the slab's freelist and do not freeze it. 4456 4479 * 4457 4480 * Assumes the slab is isolated from node partial list and not frozen. ··· 4444 4527 new.inuse = old.objects; 4445 4528 4446 4529 } while (!slab_update_freelist(s, slab, &old, &new, "get_freelist_nofreeze")); 4447 - 4448 - return old.freelist; 4449 - } 4450 - 4451 - /* 4452 - * Freeze the partial slab and return the pointer to the freelist. 4453 - */ 4454 - static inline void *freeze_slab(struct kmem_cache *s, struct slab *slab) 4455 - { 4456 - struct freelist_counters old, new; 4457 - 4458 - do { 4459 - old.freelist = slab->freelist; 4460 - old.counters = slab->counters; 4461 - 4462 - new.freelist = NULL; 4463 - new.counters = old.counters; 4464 - VM_BUG_ON(new.frozen); 4465 - 4466 - new.inuse = old.objects; 4467 - new.frozen = 1; 4468 - 4469 - } while (!slab_update_freelist(s, slab, &old, &new, "freeze_slab")); 4470 4530 4471 4531 return old.freelist; 4472 4532 } ··· 4515 4621 } 4516 4622 4517 4623 /* 4518 - * Slow path. The lockless freelist is empty or we need to perform 4519 - * debugging duties. 4624 + * Slow path. We failed to allocate via percpu sheaves or they are not available 4625 + * due to bootstrap or debugging enabled or SLUB_TINY. 4520 4626 * 4521 - * Processing is still very fast if new objects have been freed to the 4522 - * regular freelist. In that case we simply take over the regular freelist 4523 - * as the lockless freelist and zap the regular freelist. 4524 - * 4525 - * If that is not working then we fall back to the partial lists. We take the 4526 - * first element of the freelist as the object to allocate now and move the 4527 - * rest of the freelist to the lockless freelist. 4528 - * 4529 - * And if we were unable to get a new slab from the partial slab lists then 4530 - * we need to allocate a new slab. This is the slowest path since it involves 4531 - * a call to the page allocator and the setup of a new slab. 4532 - * 4533 - * Version of __slab_alloc to use when we know that preemption is 4534 - * already disabled (which is the case for bulk allocation). 4627 + * We try to allocate from partial slab lists and fall back to allocating a new 4628 + * slab. 4535 4629 */ 4536 4630 static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, 4537 - unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size) 4631 + unsigned long addr, unsigned int orig_size) 4538 4632 { 4539 4633 bool allow_spin = gfpflags_allow_spinning(gfpflags); 4540 - void *freelist; 4634 + void *object; 4541 4635 struct slab *slab; 4542 - unsigned long flags; 4543 4636 struct partial_context pc; 4544 4637 bool try_thisnode = true; 4545 4638 4546 4639 stat(s, ALLOC_SLOWPATH); 4547 - 4548 - reread_slab: 4549 - 4550 - slab = READ_ONCE(c->slab); 4551 - if (!slab) { 4552 - /* 4553 - * if the node is not online or has no normal memory, just 4554 - * ignore the node constraint 4555 - */ 4556 - if (unlikely(node != NUMA_NO_NODE && 4557 - !node_isset(node, slab_nodes))) 4558 - node = NUMA_NO_NODE; 4559 - goto new_slab; 4560 - } 4561 - 4562 - if (unlikely(!node_match(slab, node))) { 4563 - /* 4564 - * same as above but node_match() being false already 4565 - * implies node != NUMA_NO_NODE. 4566 - * 4567 - * We don't strictly honor pfmemalloc and NUMA preferences 4568 - * when !allow_spin because: 4569 - * 4570 - * 1. Most kmalloc() users allocate objects on the local node, 4571 - * so kmalloc_nolock() tries not to interfere with them by 4572 - * deactivating the cpu slab. 4573 - * 4574 - * 2. Deactivating due to NUMA or pfmemalloc mismatch may cause 4575 - * unnecessary slab allocations even when n->partial list 4576 - * is not empty. 4577 - */ 4578 - if (!node_isset(node, slab_nodes) || 4579 - !allow_spin) { 4580 - node = NUMA_NO_NODE; 4581 - } else { 4582 - stat(s, ALLOC_NODE_MISMATCH); 4583 - goto deactivate_slab; 4584 - } 4585 - } 4586 - 4587 - /* 4588 - * By rights, we should be searching for a slab page that was 4589 - * PFMEMALLOC but right now, we are losing the pfmemalloc 4590 - * information when the page leaves the per-cpu allocator 4591 - */ 4592 - if (unlikely(!pfmemalloc_match(slab, gfpflags) && allow_spin)) 4593 - goto deactivate_slab; 4594 - 4595 - /* must check again c->slab in case we got preempted and it changed */ 4596 - local_lock_cpu_slab(s, flags); 4597 - 4598 - if (unlikely(slab != c->slab)) { 4599 - local_unlock_cpu_slab(s, flags); 4600 - goto reread_slab; 4601 - } 4602 - freelist = c->freelist; 4603 - if (freelist) 4604 - goto load_freelist; 4605 - 4606 - freelist = get_freelist(s, slab); 4607 - 4608 - if (!freelist) { 4609 - c->slab = NULL; 4610 - c->tid = next_tid(c->tid); 4611 - local_unlock_cpu_slab(s, flags); 4612 - stat(s, DEACTIVATE_BYPASS); 4613 - goto new_slab; 4614 - } 4615 - 4616 - stat(s, ALLOC_REFILL); 4617 - 4618 - load_freelist: 4619 - 4620 - lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock)); 4621 - 4622 - /* 4623 - * freelist is pointing to the list of objects to be used. 4624 - * slab is pointing to the slab from which the objects are obtained. 4625 - * That slab must be frozen for per cpu allocations to work. 4626 - */ 4627 - VM_BUG_ON(!c->slab->frozen); 4628 - c->freelist = get_freepointer(s, freelist); 4629 - c->tid = next_tid(c->tid); 4630 - local_unlock_cpu_slab(s, flags); 4631 - return freelist; 4632 - 4633 - deactivate_slab: 4634 - 4635 - local_lock_cpu_slab(s, flags); 4636 - if (slab != c->slab) { 4637 - local_unlock_cpu_slab(s, flags); 4638 - goto reread_slab; 4639 - } 4640 - freelist = c->freelist; 4641 - c->slab = NULL; 4642 - c->freelist = NULL; 4643 - c->tid = next_tid(c->tid); 4644 - local_unlock_cpu_slab(s, flags); 4645 - deactivate_slab(s, slab, freelist); 4646 - 4647 - new_slab: 4648 - 4649 - #ifdef CONFIG_SLUB_CPU_PARTIAL 4650 - while (slub_percpu_partial(c)) { 4651 - local_lock_cpu_slab(s, flags); 4652 - if (unlikely(c->slab)) { 4653 - local_unlock_cpu_slab(s, flags); 4654 - goto reread_slab; 4655 - } 4656 - if (unlikely(!slub_percpu_partial(c))) { 4657 - local_unlock_cpu_slab(s, flags); 4658 - /* we were preempted and partial list got empty */ 4659 - goto new_objects; 4660 - } 4661 - 4662 - slab = slub_percpu_partial(c); 4663 - slub_set_percpu_partial(c, slab); 4664 - 4665 - if (likely(node_match(slab, node) && 4666 - pfmemalloc_match(slab, gfpflags)) || 4667 - !allow_spin) { 4668 - c->slab = slab; 4669 - freelist = get_freelist(s, slab); 4670 - VM_BUG_ON(!freelist); 4671 - stat(s, CPU_PARTIAL_ALLOC); 4672 - goto load_freelist; 4673 - } 4674 - 4675 - local_unlock_cpu_slab(s, flags); 4676 - 4677 - slab->next = NULL; 4678 - __put_partials(s, slab); 4679 - } 4680 - #endif 4681 4640 4682 4641 new_objects: 4683 4642 ··· 4539 4792 * When a preferred node is indicated but no __GFP_THISNODE 4540 4793 * 4541 4794 * 1) try to get a partial slab from target node only by having 4542 - * __GFP_THISNODE in pc.flags for get_partial() 4795 + * __GFP_THISNODE in pc.flags for get_from_partial() 4543 4796 * 2) if 1) failed, try to allocate a new slab from target node with 4544 4797 * GPF_NOWAIT | __GFP_THISNODE opportunistically 4545 4798 * 3) if 2) failed, retry with original gfpflags which will allow 4546 - * get_partial() try partial lists of other nodes before potentially 4547 - * allocating new page from other nodes 4799 + * get_from_partial() try partial lists of other nodes before 4800 + * potentially allocating new page from other nodes 4548 4801 */ 4549 4802 if (unlikely(node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE) 4550 4803 && try_thisnode)) { ··· 4556 4809 } 4557 4810 4558 4811 pc.orig_size = orig_size; 4559 - slab = get_partial(s, node, &pc); 4560 - if (slab) { 4561 - if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { 4562 - freelist = pc.object; 4563 - /* 4564 - * For debug caches here we had to go through 4565 - * alloc_single_from_partial() so just store the 4566 - * tracking info and return the object. 4567 - * 4568 - * Due to disabled preemption we need to disallow 4569 - * blocking. The flags are further adjusted by 4570 - * gfp_nested_mask() in stack_depot itself. 4571 - */ 4572 - if (s->flags & SLAB_STORE_USER) 4573 - set_track(s, freelist, TRACK_ALLOC, addr, 4574 - gfpflags & ~(__GFP_DIRECT_RECLAIM)); 4812 + object = get_from_partial(s, node, &pc); 4813 + if (object) 4814 + goto success; 4575 4815 4576 - return freelist; 4577 - } 4578 - 4579 - freelist = freeze_slab(s, slab); 4580 - goto retry_load_slab; 4581 - } 4582 - 4583 - slub_put_cpu_ptr(s->cpu_slab); 4584 4816 slab = new_slab(s, pc.flags, node); 4585 - c = slub_get_cpu_ptr(s->cpu_slab); 4586 4817 4587 4818 if (unlikely(!slab)) { 4588 4819 if (node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE) ··· 4575 4850 stat(s, ALLOC_SLAB); 4576 4851 4577 4852 if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { 4578 - freelist = alloc_single_from_new_slab(s, slab, orig_size, gfpflags); 4853 + object = alloc_single_from_new_slab(s, slab, orig_size, gfpflags); 4579 4854 4580 - if (unlikely(!freelist)) { 4581 - /* This could cause an endless loop. Fail instead. */ 4582 - if (!allow_spin) 4583 - return NULL; 4584 - goto new_objects; 4585 - } 4855 + if (likely(object)) 4856 + goto success; 4857 + } else { 4858 + alloc_from_new_slab(s, slab, &object, 1, allow_spin); 4586 4859 4587 - if (s->flags & SLAB_STORE_USER) 4588 - set_track(s, freelist, TRACK_ALLOC, addr, 4589 - gfpflags & ~(__GFP_DIRECT_RECLAIM)); 4590 - 4591 - return freelist; 4860 + /* we don't need to check SLAB_STORE_USER here */ 4861 + if (likely(object)) 4862 + return object; 4592 4863 } 4593 4864 4594 - /* 4595 - * No other reference to the slab yet so we can 4596 - * muck around with it freely without cmpxchg 4597 - */ 4598 - freelist = slab->freelist; 4599 - slab->freelist = NULL; 4600 - slab->inuse = slab->objects; 4601 - slab->frozen = 1; 4865 + if (allow_spin) 4866 + goto new_objects; 4602 4867 4603 - inc_slabs_node(s, slab_nid(slab), slab->objects); 4868 + /* This could cause an endless loop. Fail instead. */ 4869 + return NULL; 4604 4870 4605 - if (unlikely(!pfmemalloc_match(slab, gfpflags) && allow_spin)) { 4606 - /* 4607 - * For !pfmemalloc_match() case we don't load freelist so that 4608 - * we don't make further mismatched allocations easier. 4609 - */ 4610 - deactivate_slab(s, slab, get_freepointer(s, freelist)); 4611 - return freelist; 4612 - } 4871 + success: 4872 + if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) 4873 + set_track(s, object, TRACK_ALLOC, addr, gfpflags); 4613 4874 4614 - retry_load_slab: 4615 - 4616 - local_lock_cpu_slab(s, flags); 4617 - if (unlikely(c->slab)) { 4618 - void *flush_freelist = c->freelist; 4619 - struct slab *flush_slab = c->slab; 4620 - 4621 - c->slab = NULL; 4622 - c->freelist = NULL; 4623 - c->tid = next_tid(c->tid); 4624 - 4625 - local_unlock_cpu_slab(s, flags); 4626 - 4627 - if (unlikely(!allow_spin)) { 4628 - /* Reentrant slub cannot take locks, defer */ 4629 - defer_deactivate_slab(flush_slab, flush_freelist); 4630 - } else { 4631 - deactivate_slab(s, flush_slab, flush_freelist); 4632 - } 4633 - 4634 - stat(s, CPUSLAB_FLUSH); 4635 - 4636 - goto retry_load_slab; 4637 - } 4638 - c->slab = slab; 4639 - 4640 - goto load_freelist; 4875 + return object; 4641 4876 } 4877 + 4642 4878 /* 4643 4879 * We disallow kprobes in ___slab_alloc() to prevent reentrance 4644 4880 * ··· 4614 4928 */ 4615 4929 NOKPROBE_SYMBOL(___slab_alloc); 4616 4930 4617 - /* 4618 - * A wrapper for ___slab_alloc() for contexts where preemption is not yet 4619 - * disabled. Compensates for possible cpu changes by refetching the per cpu area 4620 - * pointer. 4621 - */ 4622 - static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, 4623 - unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size) 4624 - { 4625 - void *p; 4626 - 4627 - #ifdef CONFIG_PREEMPT_COUNT 4628 - /* 4629 - * We may have been preempted and rescheduled on a different 4630 - * cpu before disabling preemption. Need to reload cpu area 4631 - * pointer. 4632 - */ 4633 - c = slub_get_cpu_ptr(s->cpu_slab); 4634 - #endif 4635 - if (unlikely(!gfpflags_allow_spinning(gfpflags))) { 4636 - if (local_lock_is_locked(&s->cpu_slab->lock)) { 4637 - /* 4638 - * EBUSY is an internal signal to kmalloc_nolock() to 4639 - * retry a different bucket. It's not propagated 4640 - * to the caller. 4641 - */ 4642 - p = ERR_PTR(-EBUSY); 4643 - goto out; 4644 - } 4645 - } 4646 - p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size); 4647 - out: 4648 - #ifdef CONFIG_PREEMPT_COUNT 4649 - slub_put_cpu_ptr(s->cpu_slab); 4650 - #endif 4651 - return p; 4652 - } 4653 - 4654 4931 static __always_inline void *__slab_alloc_node(struct kmem_cache *s, 4655 4932 gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) 4656 4933 { 4657 - struct kmem_cache_cpu *c; 4658 - struct slab *slab; 4659 - unsigned long tid; 4660 4934 void *object; 4661 - 4662 - redo: 4663 - /* 4664 - * Must read kmem_cache cpu data via this cpu ptr. Preemption is 4665 - * enabled. We may switch back and forth between cpus while 4666 - * reading from one cpu area. That does not matter as long 4667 - * as we end up on the original cpu again when doing the cmpxchg. 4668 - * 4669 - * We must guarantee that tid and kmem_cache_cpu are retrieved on the 4670 - * same cpu. We read first the kmem_cache_cpu pointer and use it to read 4671 - * the tid. If we are preempted and switched to another cpu between the 4672 - * two reads, it's OK as the two are still associated with the same cpu 4673 - * and cmpxchg later will validate the cpu. 4674 - */ 4675 - c = raw_cpu_ptr(s->cpu_slab); 4676 - tid = READ_ONCE(c->tid); 4677 - 4678 - /* 4679 - * Irqless object alloc/free algorithm used here depends on sequence 4680 - * of fetching cpu_slab's data. tid should be fetched before anything 4681 - * on c to guarantee that object and slab associated with previous tid 4682 - * won't be used with current tid. If we fetch tid first, object and 4683 - * slab could be one associated with next tid and our alloc/free 4684 - * request will be failed. In this case, we will retry. So, no problem. 4685 - */ 4686 - barrier(); 4687 - 4688 - /* 4689 - * The transaction ids are globally unique per cpu and per operation on 4690 - * a per cpu queue. Thus they can be guarantee that the cmpxchg_double 4691 - * occurs on the right processor and that there was no operation on the 4692 - * linked list in between. 4693 - */ 4694 - 4695 - object = c->freelist; 4696 - slab = c->slab; 4697 4935 4698 4936 #ifdef CONFIG_NUMA 4699 4937 if (static_branch_unlikely(&strict_numa) && ··· 4627 5017 4628 5018 if (mpol) { 4629 5019 /* 4630 - * Special BIND rule support. If existing slab 5020 + * Special BIND rule support. If the local node 4631 5021 * is in permitted set then do not redirect 4632 5022 * to a particular node. 4633 5023 * Otherwise we apply the memory policy to get 4634 5024 * the node we need to allocate on. 4635 5025 */ 4636 - if (mpol->mode != MPOL_BIND || !slab || 4637 - !node_isset(slab_nid(slab), mpol->nodes)) 4638 - 5026 + if (mpol->mode != MPOL_BIND || 5027 + !node_isset(numa_mem_id(), mpol->nodes)) 4639 5028 node = mempolicy_slab_node(); 4640 5029 } 4641 5030 } 4642 5031 #endif 4643 5032 4644 - if (!USE_LOCKLESS_FAST_PATH() || 4645 - unlikely(!object || !slab || !node_match(slab, node))) { 4646 - object = __slab_alloc(s, gfpflags, node, addr, c, orig_size); 4647 - } else { 4648 - void *next_object = get_freepointer_safe(s, object); 4649 - 4650 - /* 4651 - * The cmpxchg will only match if there was no additional 4652 - * operation and if we are on the right processor. 4653 - * 4654 - * The cmpxchg does the following atomically (without lock 4655 - * semantics!) 4656 - * 1. Relocate first pointer to the current per cpu area. 4657 - * 2. Verify that tid and freelist have not been changed 4658 - * 3. If they were not changed replace tid and freelist 4659 - * 4660 - * Since this is without lock semantics the protection is only 4661 - * against code executing on this cpu *not* from access by 4662 - * other cpus. 4663 - */ 4664 - if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) { 4665 - note_cmpxchg_failure("slab_alloc", s, tid); 4666 - goto redo; 4667 - } 4668 - prefetch_freepointer(s, next_object); 4669 - stat(s, ALLOC_FASTPATH); 4670 - } 5033 + object = ___slab_alloc(s, gfpflags, node, addr, orig_size); 4671 5034 4672 5035 return object; 4673 5036 } ··· 7335 7752 int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, 7336 7753 void **p) 7337 7754 { 7338 - struct kmem_cache_cpu *c; 7339 - unsigned long irqflags; 7340 7755 int i; 7341 7756 7342 7757 /* 7343 - * Drain objects in the per cpu slab, while disabling local 7344 - * IRQs, which protects against PREEMPT and interrupts 7345 - * handlers invoking normal fastpath. 7758 + * TODO: this might be more efficient (if necessary) by reusing 7759 + * __refill_objects() 7346 7760 */ 7347 - c = slub_get_cpu_ptr(s->cpu_slab); 7348 - local_lock_irqsave(&s->cpu_slab->lock, irqflags); 7349 - 7350 7761 for (i = 0; i < size; i++) { 7351 - void *object = c->freelist; 7352 7762 7353 - if (unlikely(!object)) { 7354 - /* 7355 - * We may have removed an object from c->freelist using 7356 - * the fastpath in the previous iteration; in that case, 7357 - * c->tid has not been bumped yet. 7358 - * Since ___slab_alloc() may reenable interrupts while 7359 - * allocating memory, we should bump c->tid now. 7360 - */ 7361 - c->tid = next_tid(c->tid); 7763 + p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_, 7764 + s->object_size); 7765 + if (unlikely(!p[i])) 7766 + goto error; 7362 7767 7363 - local_unlock_irqrestore(&s->cpu_slab->lock, irqflags); 7364 - 7365 - /* 7366 - * Invoking slow path likely have side-effect 7367 - * of re-populating per CPU c->freelist 7368 - */ 7369 - p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, 7370 - _RET_IP_, c, s->object_size); 7371 - if (unlikely(!p[i])) 7372 - goto error; 7373 - 7374 - c = this_cpu_ptr(s->cpu_slab); 7375 - maybe_wipe_obj_freeptr(s, p[i]); 7376 - 7377 - local_lock_irqsave(&s->cpu_slab->lock, irqflags); 7378 - 7379 - continue; /* goto for-loop */ 7380 - } 7381 - c->freelist = get_freepointer(s, object); 7382 - p[i] = object; 7383 7768 maybe_wipe_obj_freeptr(s, p[i]); 7384 - stat(s, ALLOC_FASTPATH); 7385 7769 } 7386 - c->tid = next_tid(c->tid); 7387 - local_unlock_irqrestore(&s->cpu_slab->lock, irqflags); 7388 - slub_put_cpu_ptr(s->cpu_slab); 7389 7770 7390 7771 return i; 7391 7772 7392 7773 error: 7393 - slub_put_cpu_ptr(s->cpu_slab); 7394 7774 __kmem_cache_free_bulk(s, i, p); 7395 7775 return 0; 7396 7776