Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tcp: reduce tcp sockets size by one cache line

By default, when a kmem_cache is created with SLAB_TYPESAFE_BY_RCU,
slub has to use extra storage for the freelist pointer after each
object, because slub assumes that any bit in the object
can be used by RCU readers.

Because proto_register() is also using SLAB_HWCACHE_ALIGN,
this forces slub to use one extra cache line per object.

We can instead put the slub freelist anywhere in the object,
granted the concurrent RCU readers are not supposed to
use the pointer value.

Add a new (struct sock)sk_freeptr field, in an union
with sk_rcu: No RCU readers would need to look at sk_rcu,
which is only used at free phase.

Tested:

grep . /sys/kernel/slab/TCP/{object_size,slab_size,objs_per_slab}
grep . /sys/kernel/slab/TCPv6/{object_size,slab_size,objs_per_slab}

Before:

/sys/kernel/slab/TCP/object_size:2368
/sys/kernel/slab/TCP/slab_size:2432
/sys/kernel/slab/TCP/objs_per_slab:13

/sys/kernel/slab/TCPv6/object_size:2496
/sys/kernel/slab/TCPv6/slab_size:2560
/sys/kernel/slab/TCPv6/objs_per_slab:12

After this patch, we can pack one more TCPv6 object per slab,
and object_size == slab_size.

/sys/kernel/slab/TCP/object_size:2368
/sys/kernel/slab/TCP/slab_size:2368
/sys/kernel/slab/TCP/objs_per_slab:13

/sys/kernel/slab/TCPv6/object_size:2496
/sys/kernel/slab/TCPv6/slab_size:2496
/sys/kernel/slab/TCPv6/objs_per_slab:13

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260129153458.4163797-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
ed9b7004 6ad42b24

+24 -7
+10 -1
include/net/sock.h
··· 341 341 * @sk_reuseport_cb: reuseport group container 342 342 * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage 343 343 * @sk_rcu: used during RCU grace period 344 + * @sk_freeptr: used for SLAB_TYPESAFE_BY_RCU managed sockets 344 345 * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) 345 346 * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME 346 347 * @sk_txtime_report_errors: set report errors mode for SO_TXTIME ··· 583 582 struct bpf_local_storage __rcu *sk_bpf_storage; 584 583 #endif 585 584 struct numa_drop_counters *sk_drop_counters; 586 - struct rcu_head sk_rcu; 585 + /* sockets using SLAB_TYPESAFE_BY_RCU can use sk_freeptr. 586 + * By the time kfree() is called, sk_rcu can not be in 587 + * use and can be mangled. 588 + */ 589 + union { 590 + struct rcu_head sk_rcu; 591 + freeptr_t sk_freeptr; 592 + }; 587 593 netns_tracker ns_tracker; 588 594 struct xarray sk_user_frags; 589 595 ··· 1376 1368 1377 1369 struct kmem_cache *slab; 1378 1370 unsigned int obj_size; 1371 + unsigned int freeptr_offset; 1379 1372 unsigned int ipv6_pinfo_offset; 1380 1373 slab_flags_t slab_flags; 1381 1374 unsigned int useroffset; /* Usercopy region offset */
+10 -6
net/core/sock.c
··· 4193 4193 return -EINVAL; 4194 4194 } 4195 4195 if (alloc_slab) { 4196 - prot->slab = kmem_cache_create_usercopy(prot->name, 4197 - prot->obj_size, 0, 4198 - SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | 4199 - prot->slab_flags, 4200 - prot->useroffset, prot->usersize, 4201 - NULL); 4196 + struct kmem_cache_args args = { 4197 + .useroffset = prot->useroffset, 4198 + .usersize = prot->usersize, 4199 + .freeptr_offset = prot->freeptr_offset, 4200 + .use_freeptr_offset = !!prot->freeptr_offset, 4201 + }; 4202 4202 4203 + prot->slab = kmem_cache_create(prot->name, prot->obj_size, 4204 + &args, 4205 + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | 4206 + prot->slab_flags); 4203 4207 if (prot->slab == NULL) { 4204 4208 pr_crit("%s: Can't create sock SLAB cache!\n", 4205 4209 prot->name);
+2
net/ipv4/tcp_ipv4.c
··· 3460 3460 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 3461 3461 .max_header = MAX_TCP_HEADER, 3462 3462 .obj_size = sizeof(struct tcp_sock), 3463 + .freeptr_offset = offsetof(struct tcp_sock, 3464 + inet_conn.icsk_inet.sk.sk_freeptr), 3463 3465 .slab_flags = SLAB_TYPESAFE_BY_RCU, 3464 3466 .twsk_prot = &tcp_timewait_sock_ops, 3465 3467 .rsk_prot = &tcp_request_sock_ops,
+2
net/ipv6/tcp_ipv6.c
··· 2332 2332 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2333 2333 .max_header = MAX_TCP_HEADER, 2334 2334 .obj_size = sizeof(struct tcp6_sock), 2335 + .freeptr_offset = offsetof(struct tcp6_sock, 2336 + tcp.inet_conn.icsk_inet.sk.sk_freeptr), 2335 2337 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2336 2338 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2337 2339 .twsk_prot = &tcp6_timewait_sock_ops,