Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: add NUMA awareness to skb_attempt_defer_free()

Instead of sharing sd->defer_list & sd->defer_count with
many cpus, add one pair for each NUMA node.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250928084934.3266948-4-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Eric Dumazet and committed by
Paolo Abeni
5628f3fe 844c9db7

+37 -22
-4
include/linux/netdevice.h
··· 3536 3536 3537 3537 struct numa_drop_counters drop_counters; 3538 3538 3539 - /* Another possibly contended cache line */ 3540 - struct llist_head defer_list ____cacheline_aligned_in_smp; 3541 - atomic_long_t defer_count; 3542 - 3543 3539 int defer_ipi_scheduled ____cacheline_aligned_in_smp; 3544 3540 call_single_data_t defer_csd; 3545 3541 };
+7
include/net/hotdata.h
··· 2 2 #ifndef _NET_HOTDATA_H 3 3 #define _NET_HOTDATA_H 4 4 5 + #include <linux/llist.h> 5 6 #include <linux/types.h> 6 7 #include <linux/netdevice.h> 7 8 #include <net/protocol.h> 9 + 10 + struct skb_defer_node { 11 + struct llist_head defer_list; 12 + atomic_long_t defer_count; 13 + } ____cacheline_aligned_in_smp; 8 14 9 15 /* Read mostly data used in network fast paths. */ 10 16 struct net_hotdata { ··· 36 30 struct rps_sock_flow_table __rcu *rps_sock_flow_table; 37 31 u32 rps_cpu_mask; 38 32 #endif 33 + struct skb_defer_node __percpu *skb_defer_nodes; 39 34 int gro_normal_batch; 40 35 int netdev_budget; 41 36 int netdev_budget_usecs;
+23 -12
net/core/dev.c
··· 5180 5180 __napi_schedule_irqoff(&mysd->backlog); 5181 5181 } 5182 5182 5183 - void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu) 5183 + void kick_defer_list_purge(unsigned int cpu) 5184 5184 { 5185 + struct softnet_data *sd = &per_cpu(softnet_data, cpu); 5185 5186 unsigned long flags; 5186 5187 5187 5188 if (use_backlog_threads()) { ··· 6716 6715 } 6717 6716 EXPORT_SYMBOL(napi_complete_done); 6718 6717 6719 - static void skb_defer_free_flush(struct softnet_data *sd) 6718 + static void skb_defer_free_flush(void) 6720 6719 { 6721 6720 struct llist_node *free_list; 6722 6721 struct sk_buff *skb, *next; 6722 + struct skb_defer_node *sdn; 6723 + int node; 6723 6724 6724 - if (llist_empty(&sd->defer_list)) 6725 - return; 6726 - atomic_long_set(&sd->defer_count, 0); 6727 - free_list = llist_del_all(&sd->defer_list); 6725 + for_each_node(node) { 6726 + sdn = this_cpu_ptr(net_hotdata.skb_defer_nodes) + node; 6728 6727 6729 - llist_for_each_entry_safe(skb, next, free_list, ll_node) { 6730 - napi_consume_skb(skb, 1); 6728 + if (llist_empty(&sdn->defer_list)) 6729 + continue; 6730 + atomic_long_set(&sdn->defer_count, 0); 6731 + free_list = llist_del_all(&sdn->defer_list); 6732 + 6733 + llist_for_each_entry_safe(skb, next, free_list, ll_node) { 6734 + napi_consume_skb(skb, 1); 6735 + } 6731 6736 } 6732 6737 } 6733 6738 ··· 6861 6854 if (work > 0) 6862 6855 __NET_ADD_STATS(dev_net(napi->dev), 6863 6856 LINUX_MIB_BUSYPOLLRXPACKETS, work); 6864 - skb_defer_free_flush(this_cpu_ptr(&softnet_data)); 6857 + skb_defer_free_flush(); 6865 6858 bpf_net_ctx_clear(bpf_net_ctx); 6866 6859 local_bh_enable(); 6867 6860 ··· 7720 7713 local_irq_disable(); 7721 7714 net_rps_action_and_irq_enable(sd); 7722 7715 } 7723 - skb_defer_free_flush(sd); 7716 + skb_defer_free_flush(); 7724 7717 bpf_net_ctx_clear(bpf_net_ctx); 7725 7718 local_bh_enable(); 7726 7719 ··· 7762 7755 for (;;) { 7763 7756 struct napi_struct *n; 7764 7757 7765 - skb_defer_free_flush(sd); 7758 + skb_defer_free_flush(); 7766 7759 7767 7760 if (list_empty(&list)) { 7768 7761 if (list_empty(&repoll)) { ··· 12996 12989 sd->cpu = i; 12997 12990 #endif 12998 12991 INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); 12999 - init_llist_head(&sd->defer_list); 13000 12992 13001 12993 gro_init(&sd->backlog.gro); 13002 12994 sd->backlog.poll = process_backlog; ··· 13005 12999 if (net_page_pool_create(i)) 13006 13000 goto out; 13007 13001 } 13002 + net_hotdata.skb_defer_nodes = 13003 + __alloc_percpu(sizeof(struct skb_defer_node) * nr_node_ids, 13004 + __alignof__(struct skb_defer_node)); 13005 + if (!net_hotdata.skb_defer_nodes) 13006 + goto out; 13008 13007 if (use_backlog_threads()) 13009 13008 smpboot_register_percpu_thread(&backlog_threads); 13010 13009
+1 -1
net/core/dev.h
··· 357 357 WARN_ON(READ_ONCE(napi->list_owner) != -1); 358 358 } 359 359 360 - void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); 360 + void kick_defer_list_purge(unsigned int cpu); 361 361 362 362 #define XMIT_RECURSION_LIMIT 8 363 363
+6 -5
net/core/skbuff.c
··· 7185 7185 */ 7186 7186 void skb_attempt_defer_free(struct sk_buff *skb) 7187 7187 { 7188 + struct skb_defer_node *sdn; 7188 7189 unsigned long defer_count; 7189 7190 int cpu = skb->alloc_cpu; 7190 - struct softnet_data *sd; 7191 7191 unsigned int defer_max; 7192 7192 bool kick; 7193 7193 ··· 7201 7201 DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); 7202 7202 DEBUG_NET_WARN_ON_ONCE(skb->destructor); 7203 7203 7204 - sd = &per_cpu(softnet_data, cpu); 7204 + sdn = per_cpu_ptr(net_hotdata.skb_defer_nodes, cpu) + numa_node_id(); 7205 + 7205 7206 defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max); 7206 - defer_count = atomic_long_inc_return(&sd->defer_count); 7207 + defer_count = atomic_long_inc_return(&sdn->defer_count); 7207 7208 7208 7209 if (defer_count >= defer_max) 7209 7210 goto nodefer; 7210 7211 7211 - llist_add(&skb->ll_node, &sd->defer_list); 7212 + llist_add(&skb->ll_node, &sdn->defer_list); 7212 7213 7213 7214 /* Send an IPI every time queue reaches half capacity. */ 7214 7215 kick = (defer_count - 1) == (defer_max >> 1); ··· 7218 7217 * if we are unlucky enough (this seems very unlikely). 7219 7218 */ 7220 7219 if (unlikely(kick)) 7221 - kick_defer_list_purge(sd, cpu); 7220 + kick_defer_list_purge(cpu); 7222 7221 } 7223 7222 7224 7223 static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,