Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: memcg: separate slab stat accounting from objcg charge cache

Cgroup slab metrics are cached per-cpu the same way as the sub-page charge
cache. However, the intertwined code to manage those dependent caches
right now is quite difficult to follow.

Specifically, cached slab stat updates occur in consume() if there was
enough charge cache to satisfy the new object. If that fails, whole pages
are reserved, and slab stats are updated when the remainder of those
pages, after subtracting the size of the new slab object, are put into the
charge cache. This already juggles a delicate mix of the object size, the
page charge size, and the remainder to put into the byte cache. Doing
slab accounting in this path as well is fragile, and has recently caused a
bug where the input parameters between the two caches were mixed up.

Refactor the consume() and refill() paths into unlocked and locked
variants that only do charge caching. Then let the slab path manage its
own lock section and open-code charging and accounting.

This makes the slab stat cache subordinate to the charge cache:
__refill_obj_stock() is called first to prepare it; __account_obj_stock()
follows to hitch a ride.

This results in a minor behavioral change: previously, a mismatching
percpu stock would always be drained for the purpose of setting up slab
account caching, even if there was no byte remainder to put into the
charge cache. Now, the stock is left alone, and slab accounting takes the
uncached path if there is a mismatch. This is exceedingly rare, and it
was probably never worth draining the whole stock just to cache the slab
stat update.

Link: https://lkml.kernel.org/r/20260302195305.620713-6-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Hao Li <hao.li@linux.dev>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Johannes Weiner <jweiner@meta.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Johannes Weiner and committed by
Andrew Morton
52af721b 4665aa7e

+61 -39
+61 -39
mm/memcontrol.c
··· 2960 2960 2961 2961 static void unlock_stock(struct obj_stock_pcp *stock) 2962 2962 { 2963 - local_unlock(&obj_stock.lock); 2963 + if (stock) 2964 + local_unlock(&obj_stock.lock); 2964 2965 } 2965 2966 2967 + /* Call after __refill_obj_stock() to ensure stock->cached_objg == objcg */ 2966 2968 static void __account_obj_stock(struct obj_cgroup *objcg, 2967 2969 struct obj_stock_pcp *stock, int nr, 2968 2970 struct pglist_data *pgdat, enum node_stat_item idx) 2969 2971 { 2970 2972 int *bytes; 2971 2973 2972 - if (!stock) 2974 + if (!stock || READ_ONCE(stock->cached_objcg) != objcg) 2973 2975 goto direct; 2974 2976 2975 2977 /* ··· 3018 3016 mod_objcg_mlstate(objcg, pgdat, idx, nr); 3019 3017 } 3020 3018 3021 - static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, 3022 - struct pglist_data *pgdat, enum node_stat_item idx) 3019 + static bool __consume_obj_stock(struct obj_cgroup *objcg, 3020 + struct obj_stock_pcp *stock, 3021 + unsigned int nr_bytes) 3022 + { 3023 + if (objcg == READ_ONCE(stock->cached_objcg) && 3024 + stock->nr_bytes >= nr_bytes) { 3025 + stock->nr_bytes -= nr_bytes; 3026 + return true; 3027 + } 3028 + 3029 + return false; 3030 + } 3031 + 3032 + static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) 3023 3033 { 3024 3034 struct obj_stock_pcp *stock; 3025 3035 bool ret = false; ··· 3040 3026 if (!stock) 3041 3027 return ret; 3042 3028 3043 - if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { 3044 - stock->nr_bytes -= nr_bytes; 3045 - ret = true; 3046 - 3047 - if (pgdat) 3048 - __account_obj_stock(objcg, stock, nr_bytes, pgdat, idx); 3049 - } 3050 - 3029 + ret = __consume_obj_stock(objcg, stock, nr_bytes); 3051 3030 unlock_stock(stock); 3052 3031 3053 3032 return ret; ··· 3125 3118 return flush; 3126 3119 } 3127 3120 3128 - static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, 3129 - bool allow_uncharge, int nr_acct, struct pglist_data *pgdat, 3130 - enum node_stat_item idx) 3121 + static void __refill_obj_stock(struct obj_cgroup *objcg, 3122 + struct obj_stock_pcp *stock, 3123 + unsigned int nr_bytes, 3124 + bool allow_uncharge) 3131 3125 { 3132 - struct obj_stock_pcp *stock; 3133 3126 unsigned int nr_pages = 0; 3134 3127 3135 - stock = trylock_stock(); 3136 3128 if (!stock) { 3137 - if (pgdat) 3138 - __account_obj_stock(objcg, NULL, nr_acct, pgdat, idx); 3139 3129 nr_pages = nr_bytes >> PAGE_SHIFT; 3140 3130 nr_bytes = nr_bytes & (PAGE_SIZE - 1); 3141 3131 atomic_add(nr_bytes, &objcg->nr_charged_bytes); ··· 3150 3146 } 3151 3147 stock->nr_bytes += nr_bytes; 3152 3148 3153 - if (pgdat) 3154 - __account_obj_stock(objcg, stock, nr_acct, pgdat, idx); 3155 - 3156 3149 if (allow_uncharge && (stock->nr_bytes > PAGE_SIZE)) { 3157 3150 nr_pages = stock->nr_bytes >> PAGE_SHIFT; 3158 3151 stock->nr_bytes &= (PAGE_SIZE - 1); 3159 3152 } 3160 3153 3161 - unlock_stock(stock); 3162 3154 out: 3163 3155 if (nr_pages) 3164 3156 obj_cgroup_uncharge_pages(objcg, nr_pages); 3157 + } 3158 + 3159 + static void refill_obj_stock(struct obj_cgroup *objcg, 3160 + unsigned int nr_bytes, 3161 + bool allow_uncharge) 3162 + { 3163 + struct obj_stock_pcp *stock = trylock_stock(); 3164 + __refill_obj_stock(objcg, stock, nr_bytes, allow_uncharge); 3165 + unlock_stock(stock); 3165 3166 } 3166 3167 3167 3168 static int __obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, ··· 3183 3174 return ret; 3184 3175 } 3185 3176 3186 - static int obj_cgroup_charge_account(struct obj_cgroup *objcg, gfp_t gfp, size_t size, 3187 - struct pglist_data *pgdat, enum node_stat_item idx) 3177 + int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) 3188 3178 { 3189 3179 size_t remainder; 3190 3180 int ret; 3191 3181 3192 - if (likely(consume_obj_stock(objcg, size, pgdat, idx))) 3182 + if (likely(consume_obj_stock(objcg, size))) 3193 3183 return 0; 3194 3184 3195 3185 /* ··· 3215 3207 * race. 3216 3208 */ 3217 3209 ret = __obj_cgroup_charge(objcg, gfp, size, &remainder); 3218 - if (!ret && (remainder || pgdat)) 3219 - refill_obj_stock(objcg, remainder, false, size, pgdat, idx); 3210 + if (!ret && remainder) 3211 + refill_obj_stock(objcg, remainder, false); 3220 3212 3221 3213 return ret; 3222 3214 } 3223 3215 3224 - int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) 3225 - { 3226 - return obj_cgroup_charge_account(objcg, gfp, size, NULL, 0); 3227 - } 3228 - 3229 3216 void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) 3230 3217 { 3231 - refill_obj_stock(objcg, size, true, 0, NULL, 0); 3218 + refill_obj_stock(objcg, size, true); 3232 3219 } 3233 3220 3234 3221 static inline size_t obj_full_size(struct kmem_cache *s) ··· 3238 3235 bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, 3239 3236 gfp_t flags, size_t size, void **p) 3240 3237 { 3238 + size_t obj_size = obj_full_size(s); 3241 3239 struct obj_cgroup *objcg; 3242 3240 struct slab *slab; 3243 3241 unsigned long off; ··· 3279 3275 for (i = 0; i < size; i++) { 3280 3276 unsigned long obj_exts; 3281 3277 struct slabobj_ext *obj_ext; 3278 + struct obj_stock_pcp *stock; 3282 3279 3283 3280 slab = virt_to_slab(p[i]); 3284 3281 ··· 3299 3294 * TODO: we could batch this until slab_pgdat(slab) changes 3300 3295 * between iterations, with a more complicated undo 3301 3296 */ 3302 - if (obj_cgroup_charge_account(objcg, flags, obj_full_size(s), 3303 - slab_pgdat(slab), cache_vmstat_idx(s))) 3304 - return false; 3297 + stock = trylock_stock(); 3298 + if (!stock || !__consume_obj_stock(objcg, stock, obj_size)) { 3299 + size_t remainder; 3300 + 3301 + unlock_stock(stock); 3302 + if (__obj_cgroup_charge(objcg, flags, obj_size, &remainder)) 3303 + return false; 3304 + stock = trylock_stock(); 3305 + if (remainder) 3306 + __refill_obj_stock(objcg, stock, remainder, false); 3307 + } 3308 + __account_obj_stock(objcg, stock, obj_size, 3309 + slab_pgdat(slab), cache_vmstat_idx(s)); 3310 + unlock_stock(stock); 3305 3311 3306 3312 obj_exts = slab_obj_exts(slab); 3307 3313 get_slab_obj_exts(obj_exts); ··· 3334 3318 for (int i = 0; i < objects; i++) { 3335 3319 struct obj_cgroup *objcg; 3336 3320 struct slabobj_ext *obj_ext; 3321 + struct obj_stock_pcp *stock; 3337 3322 unsigned int off; 3338 3323 3339 3324 off = obj_to_index(s, slab, p[i]); ··· 3344 3327 continue; 3345 3328 3346 3329 obj_ext->objcg = NULL; 3347 - refill_obj_stock(objcg, obj_size, true, -obj_size, 3348 - slab_pgdat(slab), cache_vmstat_idx(s)); 3330 + 3331 + stock = trylock_stock(); 3332 + __refill_obj_stock(objcg, stock, obj_size, true); 3333 + __account_obj_stock(objcg, stock, -obj_size, 3334 + slab_pgdat(slab), cache_vmstat_idx(s)); 3335 + unlock_stock(stock); 3336 + 3349 3337 obj_cgroup_put(objcg); 3350 3338 } 3351 3339 }