Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add support to set NAPI threaded for individual NAPI

A net device has a threaded sysctl that can be used to enable threaded
NAPI polling on all of the NAPI contexts under that device. Allow
enabling threaded NAPI polling at individual NAPI level using netlink.

Extend the netlink operation `napi-set` and allow setting the threaded
attribute of a NAPI. This will enable the threaded polling on a NAPI
context.

Add a test in `nl_netdev.py` that verifies various cases of threaded
NAPI being set at NAPI and at device level.

Tested
./tools/testing/selftests/net/nl_netdev.py
TAP version 13
1..7
ok 1 nl_netdev.empty_check
ok 2 nl_netdev.lo_check
ok 3 nl_netdev.page_pool_check
ok 4 nl_netdev.napi_list_check
ok 5 nl_netdev.dev_set_threaded
ok 6 nl_netdev.napi_set_threaded
ok 7 nl_netdev.nsim_rxq_reset_down
# Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250710211203.3979655-1-skhawaja@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Samiullah Khawaja and committed by
Jakub Kicinski
2677010e a44312d5

+162 -7
+10
Documentation/netlink/specs/netdev.yaml
··· 283 283 doc: The timeout, in nanoseconds, of how long to suspend irq 284 284 processing, if event polling finds events 285 285 type: uint 286 + - 287 + name: threaded 288 + doc: Whether the NAPI is configured to operate in threaded polling 289 + mode. If this is set to 1 then the NAPI context operates in 290 + threaded polling mode. 291 + type: uint 292 + checks: 293 + max: 1 286 294 - 287 295 name: xsk-info 288 296 attributes: [] ··· 702 694 - defer-hard-irqs 703 695 - gro-flush-timeout 704 696 - irq-suspend-timeout 697 + - threaded 705 698 dump: 706 699 request: 707 700 attributes: ··· 755 746 - defer-hard-irqs 756 747 - gro-flush-timeout 757 748 - irq-suspend-timeout 749 + - threaded 758 750 - 759 751 name: bind-tx 760 752 doc: Bind dmabuf to netdev for TX
+8 -1
Documentation/networking/napi.rst
··· 444 444 order than the process IDs of the kernel threads. 445 445 446 446 Threaded NAPI is controlled by writing 0/1 to the ``threaded`` file in 447 - netdev's sysfs directory. 447 + netdev's sysfs directory. It can also be enabled for a specific NAPI using 448 + netlink interface. 449 + 450 + For example, using the script: 451 + 452 + .. code-block:: bash 453 + 454 + $ ynl --family netdev --do napi-set --json='{"id": 66, "threaded": 1}' 448 455 449 456 .. rubric:: Footnotes 450 457
+1
include/linux/netdevice.h
··· 369 369 u64 irq_suspend_timeout; 370 370 u32 defer_hard_irqs; 371 371 cpumask_t affinity_mask; 372 + bool threaded; 372 373 unsigned int napi_id; 373 374 }; 374 375
+1
include/uapi/linux/netdev.h
··· 134 134 NETDEV_A_NAPI_DEFER_HARD_IRQS, 135 135 NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, 136 136 NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, 137 + NETDEV_A_NAPI_THREADED, 137 138 138 139 __NETDEV_A_NAPI_MAX, 139 140 NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
+27 -3
net/core/dev.c
··· 6961 6961 napi->thread = NULL; 6962 6962 } 6963 6963 6964 + int napi_set_threaded(struct napi_struct *napi, bool threaded) 6965 + { 6966 + if (threaded) { 6967 + if (!napi->thread) { 6968 + int err = napi_kthread_create(napi); 6969 + 6970 + if (err) 6971 + return err; 6972 + } 6973 + } 6974 + 6975 + if (napi->config) 6976 + napi->config->threaded = threaded; 6977 + 6978 + if (!threaded && napi->thread) { 6979 + napi_stop_kthread(napi); 6980 + } else { 6981 + /* Make sure kthread is created before THREADED bit is set. */ 6982 + smp_mb__before_atomic(); 6983 + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); 6984 + } 6985 + 6986 + return 0; 6987 + } 6988 + 6964 6989 int dev_set_threaded(struct net_device *dev, bool threaded) 6965 6990 { 6966 6991 struct napi_struct *napi; 6967 6992 int err = 0; 6968 6993 6969 6994 netdev_assert_locked_or_invisible(dev); 6970 - 6971 - if (dev->threaded == threaded) 6972 - return 0; 6973 6995 6974 6996 if (threaded) { 6975 6997 list_for_each_entry(napi, &dev->napi_list, dev_list) { ··· 7243 7221 napi_hash_add(n); 7244 7222 n->config->napi_id = n->napi_id; 7245 7223 } 7224 + 7225 + WARN_ON_ONCE(napi_set_threaded(n, n->config->threaded)); 7246 7226 } 7247 7227 7248 7228 static void napi_save_config(struct napi_struct *n)
+7
net/core/dev.h
··· 315 315 WRITE_ONCE(n->irq_suspend_timeout, timeout); 316 316 } 317 317 318 + static inline bool napi_get_threaded(struct napi_struct *n) 319 + { 320 + return test_bit(NAPI_STATE_THREADED, &n->state); 321 + } 322 + 323 + int napi_set_threaded(struct napi_struct *n, bool threaded); 324 + 318 325 int rps_cpumask_housekeeping(struct cpumask *mask); 319 326 320 327 #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
+3 -2
net/core/netdev-genl-gen.c
··· 92 92 }; 93 93 94 94 /* NETDEV_CMD_NAPI_SET - do */ 95 - static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = { 95 + static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED + 1] = { 96 96 [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, 97 97 [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), 98 98 [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, 99 99 [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, 100 + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_UINT, 1), 100 101 }; 101 102 102 103 /* NETDEV_CMD_BIND_TX - do */ ··· 194 193 .cmd = NETDEV_CMD_NAPI_SET, 195 194 .doit = netdev_nl_napi_set_doit, 196 195 .policy = netdev_napi_set_nl_policy, 197 - .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, 196 + .maxattr = NETDEV_A_NAPI_THREADED, 198 197 .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, 199 198 }, 200 199 {
+14
net/core/netdev-genl.c
··· 184 184 if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) 185 185 goto nla_put_failure; 186 186 187 + if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED, 188 + napi_get_threaded(napi))) 189 + goto nla_put_failure; 190 + 187 191 if (napi->thread) { 188 192 pid = task_pid_nr(napi->thread); 189 193 if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) ··· 326 322 { 327 323 u64 irq_suspend_timeout = 0; 328 324 u64 gro_flush_timeout = 0; 325 + u8 threaded = 0; 329 326 u32 defer = 0; 327 + 328 + if (info->attrs[NETDEV_A_NAPI_THREADED]) { 329 + int ret; 330 + 331 + threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); 332 + ret = napi_set_threaded(napi, !!threaded); 333 + if (ret) 334 + return ret; 335 + } 330 336 331 337 if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { 332 338 defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
+1
tools/include/uapi/linux/netdev.h
··· 134 134 NETDEV_A_NAPI_DEFER_HARD_IRQS, 135 135 NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, 136 136 NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, 137 + NETDEV_A_NAPI_THREADED, 137 138 138 139 __NETDEV_A_NAPI_MAX, 139 140 NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
+90 -1
tools/testing/selftests/net/nl_netdev.py
··· 35 35 ksft_eq(len(napis), 100, 36 36 comment=f"queue count after reset queue {q} mode {i}") 37 37 38 + def napi_set_threaded(nf) -> None: 39 + """ 40 + Test that verifies various cases of napi threaded 41 + set and unset at napi and device level. 42 + """ 43 + with NetdevSimDev(queue_count=2) as nsimdev: 44 + nsim = nsimdev.nsims[0] 45 + 46 + ip(f"link set dev {nsim.ifname} up") 47 + 48 + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) 49 + ksft_eq(len(napis), 2) 50 + 51 + napi0_id = napis[0]['id'] 52 + napi1_id = napis[1]['id'] 53 + 54 + # set napi threaded and verify 55 + nf.napi_set({'id': napi0_id, 'threaded': 1}) 56 + napi0 = nf.napi_get({'id': napi0_id}) 57 + ksft_eq(napi0['threaded'], 1) 58 + ksft_ne(napi0.get('pid'), None) 59 + 60 + # check it is not set for napi1 61 + napi1 = nf.napi_get({'id': napi1_id}) 62 + ksft_eq(napi1['threaded'], 0) 63 + ksft_eq(napi1.get('pid'), None) 64 + 65 + ip(f"link set dev {nsim.ifname} down") 66 + ip(f"link set dev {nsim.ifname} up") 67 + 68 + # verify if napi threaded is still set 69 + napi0 = nf.napi_get({'id': napi0_id}) 70 + ksft_eq(napi0['threaded'], 1) 71 + ksft_ne(napi0.get('pid'), None) 72 + 73 + # check it is still not set for napi1 74 + napi1 = nf.napi_get({'id': napi1_id}) 75 + ksft_eq(napi1['threaded'], 0) 76 + ksft_eq(napi1.get('pid'), None) 77 + 78 + # unset napi threaded and verify 79 + nf.napi_set({'id': napi0_id, 'threaded': 0}) 80 + napi0 = nf.napi_get({'id': napi0_id}) 81 + ksft_eq(napi0['threaded'], 0) 82 + ksft_eq(napi0.get('pid'), None) 83 + 84 + # set threaded at device level 85 + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") 86 + 87 + # check napi threaded is set for both napis 88 + napi0 = nf.napi_get({'id': napi0_id}) 89 + ksft_eq(napi0['threaded'], 1) 90 + ksft_ne(napi0.get('pid'), None) 91 + napi1 = nf.napi_get({'id': napi1_id}) 92 + ksft_eq(napi1['threaded'], 1) 93 + ksft_ne(napi1.get('pid'), None) 94 + 95 + # unset threaded at device level 96 + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") 97 + 98 + # check napi threaded is unset for both napis 99 + napi0 = nf.napi_get({'id': napi0_id}) 100 + ksft_eq(napi0['threaded'], 0) 101 + ksft_eq(napi0.get('pid'), None) 102 + napi1 = nf.napi_get({'id': napi1_id}) 103 + ksft_eq(napi1['threaded'], 0) 104 + ksft_eq(napi1.get('pid'), None) 105 + 106 + # set napi threaded for napi0 107 + nf.napi_set({'id': napi0_id, 'threaded': 1}) 108 + napi0 = nf.napi_get({'id': napi0_id}) 109 + ksft_eq(napi0['threaded'], 1) 110 + ksft_ne(napi0.get('pid'), None) 111 + 112 + # unset threaded at device level 113 + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") 114 + 115 + # check napi threaded is unset for both napis 116 + napi0 = nf.napi_get({'id': napi0_id}) 117 + ksft_eq(napi0['threaded'], 0) 118 + ksft_eq(napi0.get('pid'), None) 119 + napi1 = nf.napi_get({'id': napi1_id}) 120 + ksft_eq(napi1['threaded'], 0) 121 + ksft_eq(napi1.get('pid'), None) 122 + 38 123 def dev_set_threaded(nf) -> None: 39 124 """ 40 125 Test that verifies various cases of napi threaded ··· 141 56 142 57 # check napi threaded is set for both napis 143 58 napi0 = nf.napi_get({'id': napi0_id}) 59 + ksft_eq(napi0['threaded'], 1) 144 60 ksft_ne(napi0.get('pid'), None) 145 61 napi1 = nf.napi_get({'id': napi1_id}) 62 + ksft_eq(napi1['threaded'], 1) 146 63 ksft_ne(napi1.get('pid'), None) 147 64 148 65 # unset threaded ··· 152 65 153 66 # check napi threaded is unset for both napis 154 67 napi0 = nf.napi_get({'id': napi0_id}) 68 + ksft_eq(napi0['threaded'], 0) 155 69 ksft_eq(napi0.get('pid'), None) 156 70 napi1 = nf.napi_get({'id': napi1_id}) 71 + ksft_eq(napi1['threaded'], 0) 157 72 ksft_eq(napi1.get('pid'), None) 158 73 159 74 def nsim_rxq_reset_down(nf) -> None: ··· 245 156 def main() -> None: 246 157 nf = NetdevFamily() 247 158 ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, 248 - dev_set_threaded, nsim_rxq_reset_down], 159 + dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down], 249 160 args=(nf, )) 250 161 ksft_exit() 251 162