Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/slab: place slabobj_ext metadata in unused space within s->size

When a cache has high s->align value and s->object_size is not aligned
to it, each object ends up with some unused space because of alignment.
If this wasted space is big enough, we can use it to store the
slabobj_ext metadata instead of wasting it.

On my system, this happens with caches like kmem_cache, mm_struct, pid,
task_struct, sighand_cache, xfs_inode, and others.

To place the slabobj_ext metadata within each object, the existing
slab_obj_ext() logic can still be used by setting:

- slab->obj_exts = slab_address(slab) + (slabobj_ext offset)
- stride = s->size

slab_obj_ext() doesn't need know where the metadata is stored,
so this method works without adding extra overhead to slab_obj_ext().

A good example benefiting from this optimization is xfs_inode
(object_size: 992, align: 64). To measure memory savings, 2 millions of
files were created on XFS.

[ MEMCG=y, MEM_ALLOC_PROFILING=n ]

Before patch (creating ~2.64M directories on xfs):
Slab: 5175976 kB
SReclaimable: 3837524 kB
SUnreclaim: 1338452 kB

After patch (creating ~2.64M directories on xfs):
Slab: 5152912 kB
SReclaimable: 3838568 kB
SUnreclaim: 1314344 kB (-23.54 MiB)

Enjoy the memory savings!

Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Link: https://patch.msgid.link/20260113061845.159790-10-harry.yoo@oracle.com
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

authored by

Harry Yoo and committed by
Vlastimil Babka
a77d6d33 fab06946

+101 -11
+9
include/linux/slab.h
··· 60 60 #ifdef CONFIG_SLAB_OBJ_EXT 61 61 _SLAB_NO_OBJ_EXT, 62 62 #endif 63 + #if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT) 64 + _SLAB_OBJ_EXT_IN_OBJ, 65 + #endif 63 66 _SLAB_FLAGS_LAST_BIT 64 67 }; 65 68 ··· 245 242 #define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) 246 243 #else 247 244 #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED 245 + #endif 246 + 247 + #if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT) 248 + #define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ) 249 + #else 250 + #define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_UNUSED 248 251 #endif 249 252 250 253 /*
+5 -2
mm/slab_common.c
··· 43 43 struct kmem_cache *kmem_cache; 44 44 45 45 /* 46 - * Set of flags that will prevent slab merging 46 + * Set of flags that will prevent slab merging. 47 + * Any flag that adds per-object metadata should be included, 48 + * since slab merging can update s->inuse that affects the metadata layout. 47 49 */ 48 50 #define SLAB_NEVER_MERGE (SLAB_DEBUG_FLAGS | SLAB_TYPESAFE_BY_RCU | \ 49 - SLAB_NOLEAKTRACE | SLAB_FAILSLAB | SLAB_NO_MERGE) 51 + SLAB_NOLEAKTRACE | SLAB_FAILSLAB | SLAB_NO_MERGE | \ 52 + SLAB_OBJ_EXT_IN_OBJ) 50 53 51 54 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ 52 55 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
+87 -9
mm/slub.c
··· 972 972 { 973 973 return false; 974 974 } 975 + 976 + #endif 977 + 978 + #if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT) 979 + static bool obj_exts_in_object(struct kmem_cache *s, struct slab *slab) 980 + { 981 + /* 982 + * Note we cannot rely on the SLAB_OBJ_EXT_IN_OBJ flag here and need to 983 + * check the stride. A cache can have SLAB_OBJ_EXT_IN_OBJ set, but 984 + * allocations within_slab_leftover are preferred. And those may be 985 + * possible or not depending on the particular slab's size. 986 + */ 987 + return obj_exts_in_slab(s, slab) && 988 + (slab_get_stride(slab) == s->size); 989 + } 990 + 991 + static unsigned int obj_exts_offset_in_object(struct kmem_cache *s) 992 + { 993 + unsigned int offset = get_info_end(s); 994 + 995 + if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) 996 + offset += sizeof(struct track) * 2; 997 + 998 + if (slub_debug_orig_size(s)) 999 + offset += sizeof(unsigned long); 1000 + 1001 + offset += kasan_metadata_size(s, false); 1002 + 1003 + return offset; 1004 + } 1005 + #else 1006 + static inline bool obj_exts_in_object(struct kmem_cache *s, struct slab *slab) 1007 + { 1008 + return false; 1009 + } 1010 + 1011 + static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s) 1012 + { 1013 + return 0; 1014 + } 975 1015 #endif 976 1016 977 1017 #ifdef CONFIG_SLUB_DEBUG ··· 1312 1272 1313 1273 off += kasan_metadata_size(s, false); 1314 1274 1275 + if (obj_exts_in_object(s, slab)) 1276 + off += sizeof(struct slabobj_ext); 1277 + 1315 1278 if (off != size_from_object(s)) 1316 1279 /* Beginning of the filler is the free pointer */ 1317 1280 print_section(KERN_ERR, "Padding ", p + off, ··· 1496 1453 * between metadata and the next object, independent of alignment. 1497 1454 * - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set. 1498 1455 * [Final alignment padding] 1499 - * - Any bytes added by ALIGN(size, s->align) to reach s->size. 1500 - * - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set. 1456 + * - Bytes added by ALIGN(size, s->align) to reach s->size. 1457 + * - When the padding is large enough, it can be used to store 1458 + * struct slabobj_ext for accounting metadata (obj_exts_in_object()). 1459 + * - The remaining bytes (if any) are filled with 0x5a (POISON_INUSE) 1460 + * when SLAB_POISON is set. 1501 1461 * 1502 1462 * Notes: 1503 1463 * - Redzones are filled by init_object() with SLUB_RED_ACTIVE/INACTIVE. ··· 1531 1485 1532 1486 off += kasan_metadata_size(s, false); 1533 1487 1488 + if (obj_exts_in_object(s, slab)) 1489 + off += sizeof(struct slabobj_ext); 1490 + 1534 1491 if (size_from_object(s) == off) 1535 1492 return 1; 1536 1493 ··· 1559 1510 length = slab_size(slab); 1560 1511 end = start + length; 1561 1512 1562 - if (obj_exts_in_slab(s, slab)) { 1513 + if (obj_exts_in_slab(s, slab) && !obj_exts_in_object(s, slab)) { 1563 1514 remainder = length; 1564 1515 remainder -= obj_exts_offset_in_slab(s, slab); 1565 1516 remainder -= obj_exts_size_in_slab(slab); ··· 2433 2384 #endif 2434 2385 slab->obj_exts = obj_exts; 2435 2386 slab_set_stride(slab, sizeof(struct slabobj_ext)); 2387 + } else if (s->flags & SLAB_OBJ_EXT_IN_OBJ) { 2388 + unsigned int offset = obj_exts_offset_in_object(s); 2389 + 2390 + obj_exts = (unsigned long)slab_address(slab); 2391 + obj_exts += s->red_left_pad; 2392 + obj_exts += offset; 2393 + 2394 + get_slab_obj_exts(obj_exts); 2395 + for_each_object(addr, s, slab_address(slab), slab->objects) 2396 + memset(kasan_reset_tag(addr) + offset, 0, 2397 + sizeof(struct slabobj_ext)); 2398 + put_slab_obj_exts(obj_exts); 2399 + 2400 + #ifdef CONFIG_MEMCG 2401 + obj_exts |= MEMCG_DATA_OBJEXTS; 2402 + #endif 2403 + slab->obj_exts = obj_exts; 2404 + slab_set_stride(slab, s->size); 2436 2405 } 2437 2406 } 2438 2407 ··· 7095 7028 } 7096 7029 EXPORT_SYMBOL(kmem_cache_free); 7097 7030 7098 - static inline size_t slab_ksize(const struct kmem_cache *s) 7031 + static inline size_t slab_ksize(struct slab *slab) 7099 7032 { 7033 + struct kmem_cache *s = slab->slab_cache; 7034 + 7100 7035 #ifdef CONFIG_SLUB_DEBUG 7101 7036 /* 7102 7037 * Debugging requires use of the padding between object ··· 7111 7042 return s->object_size; 7112 7043 /* 7113 7044 * If we have the need to store the freelist pointer 7114 - * back there or track user information then we can 7045 + * or any other metadata back there then we can 7115 7046 * only use the space before that information. 7116 7047 */ 7117 7048 if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) 7049 + return s->inuse; 7050 + else if (obj_exts_in_object(s, slab)) 7118 7051 return s->inuse; 7119 7052 /* 7120 7053 * Else we can use all the padding etc for the allocation ··· 7126 7055 7127 7056 static size_t __ksize(const void *object) 7128 7057 { 7129 - const struct page *page; 7130 - const struct slab *slab; 7058 + struct page *page; 7059 + struct slab *slab; 7131 7060 7132 7061 if (unlikely(object == ZERO_SIZE_PTR)) 7133 7062 return 0; ··· 7146 7075 skip_orig_size_check(slab->slab_cache, object); 7147 7076 #endif 7148 7077 7149 - return slab_ksize(slab->slab_cache); 7078 + return slab_ksize(slab); 7150 7079 } 7151 7080 7152 7081 /** ··· 8270 8199 { 8271 8200 slab_flags_t flags = s->flags; 8272 8201 unsigned int size = s->object_size; 8202 + unsigned int aligned_size; 8273 8203 unsigned int order; 8274 8204 8275 8205 /* ··· 8380 8308 * offset 0. In order to align the objects we have to simply size 8381 8309 * each object to conform to the alignment. 8382 8310 */ 8383 - size = ALIGN(size, s->align); 8311 + aligned_size = ALIGN(size, s->align); 8312 + #if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT) 8313 + if (aligned_size - size >= sizeof(struct slabobj_ext)) 8314 + s->flags |= SLAB_OBJ_EXT_IN_OBJ; 8315 + #endif 8316 + size = aligned_size; 8317 + 8384 8318 s->size = size; 8385 8319 s->reciprocal_size = reciprocal_value(size); 8386 8320 order = calculate_order(size);