Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge slub bulk allocator updates from Andrew Morton:
"This missed the merge window because I was waiting for some repairs to
come in. Nothing actually uses the bulk allocator yet and the changes
to other code paths are pretty small. And the net guys are waiting
for this so they can start merging the client code"

More comments from Jesper Dangaard Brouer:
"The kmem_cache_alloc_bulk() call, in mm/slub.c, were included in
previous kernel. The present version contains a bug. Vladimir
Davydov noticed it contained a bug, when kernel is compiled with
CONFIG_MEMCG_KMEM (see commit 03ec0ed57ffc: "slub: fix kmem cgroup
bug in kmem_cache_alloc_bulk"). Plus the mem cgroup counterpart in
kmem_cache_free_bulk() were missing (see commit 033745189b1b "slub:
add missing kmem cgroup support to kmem_cache_free_bulk").

I don't consider the fix stable-material because there are no in-tree
users of the API.

But with known bugs (for memcg) I cannot start using the API in the
net-tree"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
slab/slub: adjust kmem_cache_alloc_bulk API
slub: add missing kmem cgroup support to kmem_cache_free_bulk
slub: fix kmem cgroup bug in kmem_cache_alloc_bulk
slub: optimize bulk slowpath free by detached freelist
slub: support for bulk free with SLUB freelists

+182 -76
+1 -1
include/linux/slab.h
··· 316 316 * Note that interrupts must be enabled when calling these functions. 317 317 */ 318 318 void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); 319 - bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); 319 + int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); 320 320 321 321 #ifdef CONFIG_NUMA 322 322 void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
+1 -1
mm/slab.c
··· 3419 3419 } 3420 3420 EXPORT_SYMBOL(kmem_cache_free_bulk); 3421 3421 3422 - bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, 3422 + int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, 3423 3423 void **p) 3424 3424 { 3425 3425 return __kmem_cache_alloc_bulk(s, flags, size, p);
+1 -1
mm/slab.h
··· 170 170 * may be allocated or freed using these operations. 171 171 */ 172 172 void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); 173 - bool __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); 173 + int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); 174 174 175 175 #ifdef CONFIG_MEMCG_KMEM 176 176 /*
+3 -3
mm/slab_common.c
··· 112 112 kmem_cache_free(s, p[i]); 113 113 } 114 114 115 - bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, 115 + int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, 116 116 void **p) 117 117 { 118 118 size_t i; ··· 121 121 void *x = p[i] = kmem_cache_alloc(s, flags); 122 122 if (!x) { 123 123 __kmem_cache_free_bulk(s, i, p); 124 - return false; 124 + return 0; 125 125 } 126 126 } 127 - return true; 127 + return i; 128 128 } 129 129 130 130 #ifdef CONFIG_MEMCG_KMEM
+1 -1
mm/slob.c
··· 617 617 } 618 618 EXPORT_SYMBOL(kmem_cache_free_bulk); 619 619 620 - bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, 620 + int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, 621 621 void **p) 622 622 { 623 623 return __kmem_cache_alloc_bulk(s, flags, size, p);
+175 -69
mm/slub.c
··· 1065 1065 return 0; 1066 1066 } 1067 1067 1068 + /* Supports checking bulk free of a constructed freelist */ 1068 1069 static noinline struct kmem_cache_node *free_debug_processing( 1069 - struct kmem_cache *s, struct page *page, void *object, 1070 + struct kmem_cache *s, struct page *page, 1071 + void *head, void *tail, int bulk_cnt, 1070 1072 unsigned long addr, unsigned long *flags) 1071 1073 { 1072 1074 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1075 + void *object = head; 1076 + int cnt = 0; 1073 1077 1074 1078 spin_lock_irqsave(&n->list_lock, *flags); 1075 1079 slab_lock(page); 1076 1080 1077 1081 if (!check_slab(s, page)) 1078 1082 goto fail; 1083 + 1084 + next_object: 1085 + cnt++; 1079 1086 1080 1087 if (!check_valid_pointer(s, page, object)) { 1081 1088 slab_err(s, page, "Invalid object pointer 0x%p", object); ··· 1114 1107 if (s->flags & SLAB_STORE_USER) 1115 1108 set_track(s, object, TRACK_FREE, addr); 1116 1109 trace(s, page, object, 0); 1110 + /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ 1117 1111 init_object(s, object, SLUB_RED_INACTIVE); 1112 + 1113 + /* Reached end of constructed freelist yet? */ 1114 + if (object != tail) { 1115 + object = get_freepointer(s, object); 1116 + goto next_object; 1117 + } 1118 1118 out: 1119 + if (cnt != bulk_cnt) 1120 + slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", 1121 + bulk_cnt, cnt); 1122 + 1119 1123 slab_unlock(page); 1120 1124 /* 1121 1125 * Keep node_lock to preserve integrity ··· 1230 1212 struct page *page, void *object, unsigned long addr) { return 0; } 1231 1213 1232 1214 static inline struct kmem_cache_node *free_debug_processing( 1233 - struct kmem_cache *s, struct page *page, void *object, 1215 + struct kmem_cache *s, struct page *page, 1216 + void *head, void *tail, int bulk_cnt, 1234 1217 unsigned long addr, unsigned long *flags) { return NULL; } 1235 1218 1236 1219 static inline int slab_pad_check(struct kmem_cache *s, struct page *page) ··· 1292 1273 return memcg_kmem_get_cache(s, flags); 1293 1274 } 1294 1275 1295 - static inline void slab_post_alloc_hook(struct kmem_cache *s, 1296 - gfp_t flags, void *object) 1276 + static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, 1277 + size_t size, void **p) 1297 1278 { 1279 + size_t i; 1280 + 1298 1281 flags &= gfp_allowed_mask; 1299 - kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 1300 - kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); 1282 + for (i = 0; i < size; i++) { 1283 + void *object = p[i]; 1284 + 1285 + kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 1286 + kmemleak_alloc_recursive(object, s->object_size, 1, 1287 + s->flags, flags); 1288 + kasan_slab_alloc(s, object); 1289 + } 1301 1290 memcg_kmem_put_cache(s); 1302 - kasan_slab_alloc(s, object); 1303 1291 } 1304 1292 1305 1293 static inline void slab_free_hook(struct kmem_cache *s, void *x) ··· 1332 1306 debug_check_no_obj_freed(x, s->object_size); 1333 1307 1334 1308 kasan_slab_free(s, x); 1309 + } 1310 + 1311 + static inline void slab_free_freelist_hook(struct kmem_cache *s, 1312 + void *head, void *tail) 1313 + { 1314 + /* 1315 + * Compiler cannot detect this function can be removed if slab_free_hook() 1316 + * evaluates to nothing. Thus, catch all relevant config debug options here. 1317 + */ 1318 + #if defined(CONFIG_KMEMCHECK) || \ 1319 + defined(CONFIG_LOCKDEP) || \ 1320 + defined(CONFIG_DEBUG_KMEMLEAK) || \ 1321 + defined(CONFIG_DEBUG_OBJECTS_FREE) || \ 1322 + defined(CONFIG_KASAN) 1323 + 1324 + void *object = head; 1325 + void *tail_obj = tail ? : head; 1326 + 1327 + do { 1328 + slab_free_hook(s, object); 1329 + } while ((object != tail_obj) && 1330 + (object = get_freepointer(s, object))); 1331 + #endif 1335 1332 } 1336 1333 1337 1334 static void setup_object(struct kmem_cache *s, struct page *page, ··· 2482 2433 static __always_inline void *slab_alloc_node(struct kmem_cache *s, 2483 2434 gfp_t gfpflags, int node, unsigned long addr) 2484 2435 { 2485 - void **object; 2436 + void *object; 2486 2437 struct kmem_cache_cpu *c; 2487 2438 struct page *page; 2488 2439 unsigned long tid; ··· 2561 2512 if (unlikely(gfpflags & __GFP_ZERO) && object) 2562 2513 memset(object, 0, s->object_size); 2563 2514 2564 - slab_post_alloc_hook(s, gfpflags, object); 2515 + slab_post_alloc_hook(s, gfpflags, 1, &object); 2565 2516 2566 2517 return object; 2567 2518 } ··· 2632 2583 * handling required then we can return immediately. 2633 2584 */ 2634 2585 static void __slab_free(struct kmem_cache *s, struct page *page, 2635 - void *x, unsigned long addr) 2586 + void *head, void *tail, int cnt, 2587 + unsigned long addr) 2588 + 2636 2589 { 2637 2590 void *prior; 2638 - void **object = (void *)x; 2639 2591 int was_frozen; 2640 2592 struct page new; 2641 2593 unsigned long counters; ··· 2646 2596 stat(s, FREE_SLOWPATH); 2647 2597 2648 2598 if (kmem_cache_debug(s) && 2649 - !(n = free_debug_processing(s, page, x, addr, &flags))) 2599 + !(n = free_debug_processing(s, page, head, tail, cnt, 2600 + addr, &flags))) 2650 2601 return; 2651 2602 2652 2603 do { ··· 2657 2606 } 2658 2607 prior = page->freelist; 2659 2608 counters = page->counters; 2660 - set_freepointer(s, object, prior); 2609 + set_freepointer(s, tail, prior); 2661 2610 new.counters = counters; 2662 2611 was_frozen = new.frozen; 2663 - new.inuse--; 2612 + new.inuse -= cnt; 2664 2613 if ((!new.inuse || !prior) && !was_frozen) { 2665 2614 2666 2615 if (kmem_cache_has_cpu_partial(s) && !prior) { ··· 2691 2640 2692 2641 } while (!cmpxchg_double_slab(s, page, 2693 2642 prior, counters, 2694 - object, new.counters, 2643 + head, new.counters, 2695 2644 "__slab_free")); 2696 2645 2697 2646 if (likely(!n)) { ··· 2756 2705 * 2757 2706 * If fastpath is not possible then fall back to __slab_free where we deal 2758 2707 * with all sorts of special processing. 2708 + * 2709 + * Bulk free of a freelist with several objects (all pointing to the 2710 + * same page) possible by specifying head and tail ptr, plus objects 2711 + * count (cnt). Bulk free indicated by tail pointer being set. 2759 2712 */ 2760 - static __always_inline void slab_free(struct kmem_cache *s, 2761 - struct page *page, void *x, unsigned long addr) 2713 + static __always_inline void slab_free(struct kmem_cache *s, struct page *page, 2714 + void *head, void *tail, int cnt, 2715 + unsigned long addr) 2762 2716 { 2763 - void **object = (void *)x; 2717 + void *tail_obj = tail ? : head; 2764 2718 struct kmem_cache_cpu *c; 2765 2719 unsigned long tid; 2766 2720 2767 - slab_free_hook(s, x); 2721 + slab_free_freelist_hook(s, head, tail); 2768 2722 2769 2723 redo: 2770 2724 /* ··· 2788 2732 barrier(); 2789 2733 2790 2734 if (likely(page == c->page)) { 2791 - set_freepointer(s, object, c->freelist); 2735 + set_freepointer(s, tail_obj, c->freelist); 2792 2736 2793 2737 if (unlikely(!this_cpu_cmpxchg_double( 2794 2738 s->cpu_slab->freelist, s->cpu_slab->tid, 2795 2739 c->freelist, tid, 2796 - object, next_tid(tid)))) { 2740 + head, next_tid(tid)))) { 2797 2741 2798 2742 note_cmpxchg_failure("slab_free", s, tid); 2799 2743 goto redo; 2800 2744 } 2801 2745 stat(s, FREE_FASTPATH); 2802 2746 } else 2803 - __slab_free(s, page, x, addr); 2747 + __slab_free(s, page, head, tail_obj, cnt, addr); 2804 2748 2805 2749 } 2806 2750 ··· 2809 2753 s = cache_from_obj(s, x); 2810 2754 if (!s) 2811 2755 return; 2812 - slab_free(s, virt_to_head_page(x), x, _RET_IP_); 2756 + slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_); 2813 2757 trace_kmem_cache_free(_RET_IP_, x); 2814 2758 } 2815 2759 EXPORT_SYMBOL(kmem_cache_free); 2816 2760 2817 - /* Note that interrupts must be enabled when calling this function. */ 2818 - void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) 2819 - { 2820 - struct kmem_cache_cpu *c; 2761 + struct detached_freelist { 2821 2762 struct page *page; 2822 - int i; 2763 + void *tail; 2764 + void *freelist; 2765 + int cnt; 2766 + }; 2823 2767 2824 - local_irq_disable(); 2825 - c = this_cpu_ptr(s->cpu_slab); 2768 + /* 2769 + * This function progressively scans the array with free objects (with 2770 + * a limited look ahead) and extract objects belonging to the same 2771 + * page. It builds a detached freelist directly within the given 2772 + * page/objects. This can happen without any need for 2773 + * synchronization, because the objects are owned by running process. 2774 + * The freelist is build up as a single linked list in the objects. 2775 + * The idea is, that this detached freelist can then be bulk 2776 + * transferred to the real freelist(s), but only requiring a single 2777 + * synchronization primitive. Look ahead in the array is limited due 2778 + * to performance reasons. 2779 + */ 2780 + static int build_detached_freelist(struct kmem_cache *s, size_t size, 2781 + void **p, struct detached_freelist *df) 2782 + { 2783 + size_t first_skipped_index = 0; 2784 + int lookahead = 3; 2785 + void *object; 2826 2786 2827 - for (i = 0; i < size; i++) { 2828 - void *object = p[i]; 2787 + /* Always re-init detached_freelist */ 2788 + df->page = NULL; 2829 2789 2830 - BUG_ON(!object); 2831 - /* kmem cache debug support */ 2832 - s = cache_from_obj(s, object); 2833 - if (unlikely(!s)) 2834 - goto exit; 2835 - slab_free_hook(s, object); 2790 + do { 2791 + object = p[--size]; 2792 + } while (!object && size); 2836 2793 2837 - page = virt_to_head_page(object); 2794 + if (!object) 2795 + return 0; 2838 2796 2839 - if (c->page == page) { 2840 - /* Fastpath: local CPU free */ 2841 - set_freepointer(s, object, c->freelist); 2842 - c->freelist = object; 2843 - } else { 2844 - c->tid = next_tid(c->tid); 2845 - local_irq_enable(); 2846 - /* Slowpath: overhead locked cmpxchg_double_slab */ 2847 - __slab_free(s, page, object, _RET_IP_); 2848 - local_irq_disable(); 2849 - c = this_cpu_ptr(s->cpu_slab); 2797 + /* Start new detached freelist */ 2798 + set_freepointer(s, object, NULL); 2799 + df->page = virt_to_head_page(object); 2800 + df->tail = object; 2801 + df->freelist = object; 2802 + p[size] = NULL; /* mark object processed */ 2803 + df->cnt = 1; 2804 + 2805 + while (size) { 2806 + object = p[--size]; 2807 + if (!object) 2808 + continue; /* Skip processed objects */ 2809 + 2810 + /* df->page is always set at this point */ 2811 + if (df->page == virt_to_head_page(object)) { 2812 + /* Opportunity build freelist */ 2813 + set_freepointer(s, object, df->freelist); 2814 + df->freelist = object; 2815 + df->cnt++; 2816 + p[size] = NULL; /* mark object processed */ 2817 + 2818 + continue; 2850 2819 } 2820 + 2821 + /* Limit look ahead search */ 2822 + if (!--lookahead) 2823 + break; 2824 + 2825 + if (!first_skipped_index) 2826 + first_skipped_index = size + 1; 2851 2827 } 2852 - exit: 2853 - c->tid = next_tid(c->tid); 2854 - local_irq_enable(); 2828 + 2829 + return first_skipped_index; 2830 + } 2831 + 2832 + 2833 + /* Note that interrupts must be enabled when calling this function. */ 2834 + void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) 2835 + { 2836 + if (WARN_ON(!size)) 2837 + return; 2838 + 2839 + do { 2840 + struct detached_freelist df; 2841 + struct kmem_cache *s; 2842 + 2843 + /* Support for memcg */ 2844 + s = cache_from_obj(orig_s, p[size - 1]); 2845 + 2846 + size = build_detached_freelist(s, size, p, &df); 2847 + if (unlikely(!df.page)) 2848 + continue; 2849 + 2850 + slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); 2851 + } while (likely(size)); 2855 2852 } 2856 2853 EXPORT_SYMBOL(kmem_cache_free_bulk); 2857 2854 2858 2855 /* Note that interrupts must be enabled when calling this function. */ 2859 - bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, 2860 - void **p) 2856 + int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, 2857 + void **p) 2861 2858 { 2862 2859 struct kmem_cache_cpu *c; 2863 2860 int i; 2864 2861 2862 + /* memcg and kmem_cache debug support */ 2863 + s = slab_pre_alloc_hook(s, flags); 2864 + if (unlikely(!s)) 2865 + return false; 2865 2866 /* 2866 2867 * Drain objects in the per cpu slab, while disabling local 2867 2868 * IRQs, which protects against PREEMPT and interrupts ··· 2943 2830 c = this_cpu_ptr(s->cpu_slab); 2944 2831 continue; /* goto for-loop */ 2945 2832 } 2946 - 2947 - /* kmem_cache debug support */ 2948 - s = slab_pre_alloc_hook(s, flags); 2949 - if (unlikely(!s)) 2950 - goto error; 2951 - 2952 2833 c->freelist = get_freepointer(s, object); 2953 2834 p[i] = object; 2954 - 2955 - /* kmem_cache debug support */ 2956 - slab_post_alloc_hook(s, flags, object); 2957 2835 } 2958 2836 c->tid = next_tid(c->tid); 2959 2837 local_irq_enable(); ··· 2957 2853 memset(p[j], 0, s->object_size); 2958 2854 } 2959 2855 2960 - return true; 2961 - 2856 + /* memcg and kmem_cache debug support */ 2857 + slab_post_alloc_hook(s, flags, size, p); 2858 + return i; 2962 2859 error: 2963 - __kmem_cache_free_bulk(s, i, p); 2964 2860 local_irq_enable(); 2965 - return false; 2861 + slab_post_alloc_hook(s, flags, i, p); 2862 + __kmem_cache_free_bulk(s, i, p); 2863 + return 0; 2966 2864 } 2967 2865 EXPORT_SYMBOL(kmem_cache_alloc_bulk); 2968 2866 ··· 3629 3523 __free_kmem_pages(page, compound_order(page)); 3630 3524 return; 3631 3525 } 3632 - slab_free(page->slab_cache, page, object, _RET_IP_); 3526 + slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); 3633 3527 } 3634 3528 EXPORT_SYMBOL(kfree); 3635 3529