Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'gro-inline-tcp6_gro_-receive-complete'

Eric Dumazet says:

====================
gro: inline tcp6_gro_{receive,complete}

On some platforms, GRO stack is too deep and causes cpu stalls.

Decreasing call depths by one shows a 1.5 % gain on Zen2 cpus.
(32 RX queues, 100Gbit NIC, RFS enabled, tcp_rr with 128 threads and 10,000 flows)

We can go further by inlining ipv6_gro_{receive,complete}
and take care of IPv4 if there is interest.

Note: two temporary __always_inline will be replaced with
inline_for_performance when/if available.

Cumulative size increase for this series (of 3):

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux.3
add/remove: 2/2 grow/shrink: 5/1 up/down: 1572/-471 (1101)
Function old new delta
ipv6_gro_receive 1069 1846 +777
ipv6_gro_complete 433 733 +300
tcp6_check_fraglist_gro - 272 +272
tcp6_gro_complete 227 306 +79
tcp4_gro_complete 325 397 +72
ipv6_offload_init 218 274 +56
__pfx_tcp6_check_fraglist_gro - 16 +16
__pfx___skb_incr_checksum_unnecessary 32 - -32
__skb_incr_checksum_unnecessary 186 - -186
tcp6_gro_receive 959 706 -253
Total: Before=22592724, After=22593825, chg +0.00%
====================

Link: https://patch.msgid.link/20260120164903.1912995-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+32 -37
+1 -1
include/linux/skbuff.h
··· 4763 4763 } 4764 4764 } 4765 4765 4766 - static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) 4766 + static __always_inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) 4767 4767 { 4768 4768 if (skb->ip_summed == CHECKSUM_UNNECESSARY) { 4769 4769 if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+2 -3
include/net/gro.h
··· 405 405 struct sk_buff *)); 406 406 INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); 407 407 408 - INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, 409 - struct sk_buff *)); 410 - INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); 408 + struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *); 409 + int udp6_gro_complete(struct sk_buff *, int); 411 410 412 411 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ 413 412 ({ \
-2
include/net/tcp.h
··· 2324 2324 struct tcphdr *th); 2325 2325 INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); 2326 2326 INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); 2327 - INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); 2328 - INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); 2329 2327 #ifdef CONFIG_INET 2330 2328 void tcp_gro_complete(struct sk_buff *skb); 2331 2329 #else
+1 -1
net/ipv6/Makefile
··· 45 45 46 46 obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o 47 47 obj-$(CONFIG_INET) += output_core.o protocol.o \ 48 - ip6_offload.o tcpv6_offload.o exthdrs_offload.o 48 + ip6_offload.o exthdrs_offload.o 49 49 50 50 obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o 51 51
+22 -21
net/ipv6/ip6_offload.c
··· 19 19 #include <net/gso.h> 20 20 21 21 #include "ip6_offload.h" 22 - 23 - /* All GRO functions are always builtin, except UDP over ipv6, which lays in 24 - * ipv6 module, as it depends on UDPv6 lookup function, so we need special care 25 - * when ipv6 is built as a module 26 - */ 27 - #if IS_BUILTIN(CONFIG_IPV6) 28 - #define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__) 29 - #else 30 - #define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) 31 - #endif 32 - 33 - #define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \ 34 - ({ \ 35 - unlikely(gro_recursion_inc_test(skb)) ? \ 36 - NAPI_GRO_CB(skb)->flush |= 1, NULL : \ 37 - INDIRECT_CALL_L4(cb, f2, f1, head, skb); \ 38 - }) 22 + #include "tcpv6_offload.c" 39 23 40 24 static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto) 41 25 { ··· 282 298 283 299 skb_gro_postpull_rcsum(skb, iph, nlen); 284 300 285 - pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive, 286 - ops->callbacks.gro_receive, head, skb); 301 + if (unlikely(gro_recursion_inc_test(skb))) { 302 + flush = 1; 303 + goto out; 304 + } 287 305 306 + if (likely(proto == IPPROTO_TCP)) 307 + pp = tcp6_gro_receive(head, skb); 308 + #if IS_BUILTIN(CONFIG_IPV6) 309 + else if (likely(proto == IPPROTO_UDP)) 310 + pp = udp6_gro_receive(head, skb); 311 + #endif 312 + else 313 + pp = ops->callbacks.gro_receive(head, skb); 288 314 out: 289 315 skb_gro_flush_final(skb, pp, flush); 290 316 ··· 373 379 } 374 380 375 381 nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops); 382 + 383 + if (likely(ops == &net_hotdata.tcpv6_offload)) 384 + return tcp6_gro_complete(skb, nhoff); 385 + #if IS_BUILTIN(CONFIG_IPV6) 386 + if (ops == &net_hotdata.udpv6_offload) 387 + return udp6_gro_complete(skb, nhoff); 388 + #endif 389 + 376 390 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 377 391 goto out; 378 392 379 - err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete, 380 - udp6_gro_complete, skb, nhoff); 393 + err = ops->callbacks.gro_complete(skb, nhoff); 381 394 382 395 out: 383 396 return err;
+5 -7
net/ipv6/tcpv6_offload.c
··· 24 24 struct net *net; 25 25 int iif, sdif; 26 26 27 - if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) 28 - return; 29 - 30 27 p = tcp_gro_lookup(head, th); 31 28 if (p) { 32 29 NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; ··· 42 45 #endif /* IS_ENABLED(CONFIG_IPV6) */ 43 46 } 44 47 45 - INDIRECT_CALLABLE_SCOPE 46 - struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) 48 + static __always_inline struct sk_buff *tcp6_gro_receive(struct list_head *head, 49 + struct sk_buff *skb) 47 50 { 48 51 struct tcphdr *th; 49 52 ··· 57 60 if (!th) 58 61 goto flush; 59 62 60 - tcp6_check_fraglist_gro(head, skb, th); 63 + if (unlikely(skb->dev->features & NETIF_F_GRO_FRAGLIST)) 64 + tcp6_check_fraglist_gro(head, skb, th); 61 65 62 66 return tcp_gro_receive(head, skb, th); 63 67 ··· 67 69 return NULL; 68 70 } 69 71 70 - INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) 72 + static __always_inline int tcp6_gro_complete(struct sk_buff *skb, int thoff) 71 73 { 72 74 const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; 73 75 const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
+1 -2
net/ipv6/udp_offload.c
··· 132 132 sdif, net->ipv4.udp_table, NULL); 133 133 } 134 134 135 - INDIRECT_CALLABLE_SCOPE 136 135 struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) 137 136 { 138 137 struct udphdr *uh = udp_gro_udphdr(skb); ··· 164 165 return NULL; 165 166 } 166 167 167 - INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) 168 + int udp6_gro_complete(struct sk_buff *skb, int nhoff) 168 169 { 169 170 const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; 170 171 const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset);