Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/slab: save memory by allocating slabobj_ext array from leftover

The leftover space in a slab is always smaller than s->size, and
kmem caches for large objects that are not power-of-two sizes tend to have
a greater amount of leftover space per slab. In some cases, the leftover
space is larger than the size of the slabobj_ext array for the slab.

An excellent example of such a cache is ext4_inode_cache. On my system,
the object size is 1136, with a preferred order of 3, 28 objects per slab,
and 960 bytes of leftover space per slab.

Since the size of the slabobj_ext array is only 224 bytes (w/o mem
profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
fits within the leftover space.

Allocate the slabobj_exts array from this unused space instead of using
kcalloc() when it is large enough. The array is allocated from unused
space only when creating new slabs, and it doesn't try to utilize unused
space if alloc_slab_obj_exts() is called after slab creation because
implementing lazy allocation involves more expensive synchronization.

The implementation and evaluation of lazy allocation from unused space
is left as future-work. As pointed by Vlastimil Babka [1], it could be
beneficial when a slab cache without SLAB_ACCOUNT can be created, and
some of the allocations from the cache use __GFP_ACCOUNT. For example,
xarray does that.

To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
array only when either of them is enabled on slab allocation.

[ MEMCG=y, MEM_ALLOC_PROFILING=n ]

Before patch (creating ~2.64M directories on ext4):
Slab: 4747880 kB
SReclaimable: 4169652 kB
SUnreclaim: 578228 kB

After patch (creating ~2.64M directories on ext4):
Slab: 4724020 kB
SReclaimable: 4169188 kB
SUnreclaim: 554832 kB (-22.84 MiB)

Enjoy the memory savings!

Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz [1]
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Link: https://patch.msgid.link/20260113061845.159790-8-harry.yoo@oracle.com
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

authored by

Harry Yoo and committed by
Vlastimil Babka
70089d01 4b1530f8

+150 -5
+150 -5
mm/slub.c
··· 883 883 return *(unsigned long *)p; 884 884 } 885 885 886 + #ifdef CONFIG_SLAB_OBJ_EXT 887 + 888 + /* 889 + * Check if memory cgroup or memory allocation profiling is enabled. 890 + * If enabled, SLUB tries to reduce memory overhead of accounting 891 + * slab objects. If neither is enabled when this function is called, 892 + * the optimization is simply skipped to avoid affecting caches that do not 893 + * need slabobj_ext metadata. 894 + * 895 + * However, this may disable optimization when memory cgroup or memory 896 + * allocation profiling is used, but slabs are created too early 897 + * even before those subsystems are initialized. 898 + */ 899 + static inline bool need_slab_obj_exts(struct kmem_cache *s) 900 + { 901 + if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT)) 902 + return true; 903 + 904 + if (mem_alloc_profiling_enabled()) 905 + return true; 906 + 907 + return false; 908 + } 909 + 910 + static inline unsigned int obj_exts_size_in_slab(struct slab *slab) 911 + { 912 + return sizeof(struct slabobj_ext) * slab->objects; 913 + } 914 + 915 + static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s, 916 + struct slab *slab) 917 + { 918 + unsigned long objext_offset; 919 + 920 + objext_offset = s->size * slab->objects; 921 + objext_offset = ALIGN(objext_offset, sizeof(struct slabobj_ext)); 922 + return objext_offset; 923 + } 924 + 925 + static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s, 926 + struct slab *slab) 927 + { 928 + unsigned long objext_offset = obj_exts_offset_in_slab(s, slab); 929 + unsigned long objext_size = obj_exts_size_in_slab(slab); 930 + 931 + return objext_offset + objext_size <= slab_size(slab); 932 + } 933 + 934 + static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab) 935 + { 936 + unsigned long obj_exts; 937 + unsigned long start; 938 + unsigned long end; 939 + 940 + obj_exts = slab_obj_exts(slab); 941 + if (!obj_exts) 942 + return false; 943 + 944 + start = (unsigned long)slab_address(slab); 945 + end = start + slab_size(slab); 946 + return (obj_exts >= start) && (obj_exts < end); 947 + } 948 + #else 949 + static inline bool need_slab_obj_exts(struct kmem_cache *s) 950 + { 951 + return false; 952 + } 953 + 954 + static inline unsigned int obj_exts_size_in_slab(struct slab *slab) 955 + { 956 + return 0; 957 + } 958 + 959 + static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s, 960 + struct slab *slab) 961 + { 962 + return 0; 963 + } 964 + 965 + static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s, 966 + struct slab *slab) 967 + { 968 + return false; 969 + } 970 + 971 + static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab) 972 + { 973 + return false; 974 + } 975 + #endif 976 + 886 977 #ifdef CONFIG_SLUB_DEBUG 887 978 888 979 /* ··· 1509 1418 start = slab_address(slab); 1510 1419 length = slab_size(slab); 1511 1420 end = start + length; 1512 - remainder = length % s->size; 1421 + 1422 + if (obj_exts_in_slab(s, slab)) { 1423 + remainder = length; 1424 + remainder -= obj_exts_offset_in_slab(s, slab); 1425 + remainder -= obj_exts_size_in_slab(slab); 1426 + } else { 1427 + remainder = length % s->size; 1428 + } 1429 + 1513 1430 if (!remainder) 1514 1431 return; 1515 1432 ··· 2337 2238 return; 2338 2239 } 2339 2240 2241 + if (obj_exts_in_slab(slab->slab_cache, slab)) { 2242 + slab->obj_exts = 0; 2243 + return; 2244 + } 2245 + 2340 2246 /* 2341 2247 * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its 2342 2248 * corresponding extension will be NULL. alloc_tag_sub() will throw a ··· 2357 2253 slab->obj_exts = 0; 2358 2254 } 2359 2255 2256 + /* 2257 + * Try to allocate slabobj_ext array from unused space. 2258 + * This function must be called on a freshly allocated slab to prevent 2259 + * concurrency problems. 2260 + */ 2261 + static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab) 2262 + { 2263 + void *addr; 2264 + unsigned long obj_exts; 2265 + 2266 + if (!need_slab_obj_exts(s)) 2267 + return; 2268 + 2269 + if (obj_exts_fit_within_slab_leftover(s, slab)) { 2270 + addr = slab_address(slab) + obj_exts_offset_in_slab(s, slab); 2271 + addr = kasan_reset_tag(addr); 2272 + obj_exts = (unsigned long)addr; 2273 + 2274 + get_slab_obj_exts(obj_exts); 2275 + memset(addr, 0, obj_exts_size_in_slab(slab)); 2276 + put_slab_obj_exts(obj_exts); 2277 + 2278 + #ifdef CONFIG_MEMCG 2279 + obj_exts |= MEMCG_DATA_OBJEXTS; 2280 + #endif 2281 + slab->obj_exts = obj_exts; 2282 + slab_set_stride(slab, sizeof(struct slabobj_ext)); 2283 + } 2284 + } 2285 + 2360 2286 #else /* CONFIG_SLAB_OBJ_EXT */ 2361 2287 2362 2288 static inline void init_slab_obj_exts(struct slab *slab) ··· 2400 2266 } 2401 2267 2402 2268 static inline void free_slab_obj_exts(struct slab *slab) 2269 + { 2270 + } 2271 + 2272 + static inline void alloc_slab_obj_exts_early(struct kmem_cache *s, 2273 + struct slab *slab) 2403 2274 { 2404 2275 } 2405 2276 ··· 3404 3265 static __always_inline void account_slab(struct slab *slab, int order, 3405 3266 struct kmem_cache *s, gfp_t gfp) 3406 3267 { 3407 - if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT)) 3268 + if (memcg_kmem_online() && 3269 + (s->flags & SLAB_ACCOUNT) && 3270 + !slab_obj_exts(slab)) 3408 3271 alloc_slab_obj_exts(slab, s, gfp, true); 3409 3272 3410 3273 mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), ··· 3470 3329 slab->objects = oo_objects(oo); 3471 3330 slab->inuse = 0; 3472 3331 slab->frozen = 0; 3473 - init_slab_obj_exts(slab); 3474 - 3475 - account_slab(slab, oo_order(oo), s, flags); 3476 3332 3477 3333 slab->slab_cache = s; 3478 3334 ··· 3478 3340 start = slab_address(slab); 3479 3341 3480 3342 setup_slab_debug(s, slab, start); 3343 + init_slab_obj_exts(slab); 3344 + /* 3345 + * Poison the slab before initializing the slabobj_ext array 3346 + * to prevent the array from being overwritten. 3347 + */ 3348 + alloc_slab_obj_exts_early(s, slab); 3349 + account_slab(slab, oo_order(oo), s, flags); 3481 3350 3482 3351 shuffle = shuffle_freelist(s, slab); 3483 3352