Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'add-support-to-do-threaded-napi-busy-poll'

Samiullah Khawaja says:

====================
Add support to do threaded napi busy poll

Extend the already existing support of threaded napi poll to do continuous
busy polling.

This is used for doing continuous polling of napi to fetch descriptors
from backing RX/TX queues for low latency applications. Allow enabling
of threaded busypoll using netlink so this can be enabled on a set of
dedicated napis for low latency applications.

Once enabled user can fetch the PID of the kthread doing NAPI polling
and set affinity, priority and scheduler for it depending on the
low-latency requirements.

Extend the netlink interface to allow enabling/disabling threaded
busypolling at individual napi level.

We use this for our AF_XDP based hard low-latency usecase with usecs
level latency requirement. For our usecase we want low jitter and stable
latency at P99.

Following is an analysis and comparison of available (and compatible)
busy poll interfaces for a low latency usecase with stable P99. This can
be suitable for applications that want very low latency at the expense
of cpu usage and efficiency.

Already existing APIs (SO_BUSYPOLL and epoll) allow busy polling a NAPI
backing a socket, but the missing piece is a mechanism to busy poll a
NAPI instance in a dedicated thread while ignoring available events or
packets, regardless of the userspace API. Most existing mechanisms are
designed to work in a pattern where you poll until new packets or events
are received, after which userspace is expected to handle them.

As a result, one has to hack together a solution using a mechanism
intended to receive packets or events, not to simply NAPI poll. NAPI
threaded busy polling, on the other hand, provides this capability
natively, independent of any userspace API. This makes it really easy to
setup and manage.

For analysis we use an AF_XDP based benchmarking tool `xsk_rr`. The
description of the tool and how it tries to simulate the real workload
is following,

- It sends UDP packets between 2 machines.
- The client machine sends packets at a fixed frequency. To maintain the
frequency of the packet being sent, we use open-loop sampling. That is
the packets are sent in a separate thread.
- The server replies to the packet inline by reading the pkt from the
recv ring and replies using the tx ring.
- To simulate the application processing time, we use a configurable
delay in usecs on the client side after a reply is received from the
server.

The xsk_rr tool is posted separately as an RFC for tools/testing/selftest.

We use this tool with following napi polling configurations,

- Interrupts only
- SO_BUSYPOLL (inline in the same thread where the client receives the
packet).
- SO_BUSYPOLL (separate thread and separate core)
- Threaded NAPI busypoll

System is configured using following script in all 4 cases,

```
echo 0 | sudo tee /sys/class/net/eth0/threaded
echo 0 | sudo tee /proc/sys/kernel/timer_migration
echo off | sudo tee /sys/devices/system/cpu/smt/control

sudo ethtool -L eth0 rx 1 tx 1
sudo ethtool -G eth0 rx 1024

echo 0 | sudo tee /proc/sys/net/core/rps_sock_flow_entries
echo 0 | sudo tee /sys/class/net/eth0/queues/rx-0/rps_cpus

# pin IRQs on CPU 2
IRQS="$(gawk '/eth0-(TxRx-)?1/ {match($1, /([0-9]+)/, arr); \
print arr[0]}' < /proc/interrupts)"
for irq in "${IRQS}"; \
do echo 2 | sudo tee /proc/irq/$irq/smp_affinity_list; done

echo -1 | sudo tee /proc/sys/kernel/sched_rt_runtime_us

for i in /sys/devices/virtual/workqueue/*/cpumask; \
do echo $i; echo 1,2,3,4,5,6 > $i; done

if [[ -z "$1" ]]; then
echo 400 | sudo tee /proc/sys/net/core/busy_read
echo 100 | sudo tee /sys/class/net/eth0/napi_defer_hard_irqs
echo 15000 | sudo tee /sys/class/net/eth0/gro_flush_timeout
fi

sudo ethtool -C eth0 adaptive-rx off adaptive-tx off rx-usecs 0 tx-usecs 0

if [[ "$1" == "enable_threaded" ]]; then
echo 0 | sudo tee /proc/sys/net/core/busy_poll
echo 0 | sudo tee /proc/sys/net/core/busy_read
echo 100 | sudo tee /sys/class/net/eth0/napi_defer_hard_irqs
echo 15000 | sudo tee /sys/class/net/eth0/gro_flush_timeout
NAPI_ID=$(ynl --family netdev --output-json --do queue-get \
--json '{"ifindex": '${IFINDEX}', "id": '0', "type": "rx"}' | jq '."napi-id"')

ynl --family netdev --json '{"id": "'${NAPI_ID}'", "threaded": "busy-poll"}'

NAPI_T=$(ynl --family netdev --output-json --do napi-get \
--json '{"id": "'$NAPI_ID'"}' | jq '."pid"')

sudo chrt -f -p 50 $NAPI_T

# pin threaded poll thread to CPU 2
sudo taskset -pc 2 $NAPI_T
fi

if [[ "$1" == "enable_interrupt" ]]; then
echo 0 | sudo tee /proc/sys/net/core/busy_read
echo 0 | sudo tee /sys/class/net/eth0/napi_defer_hard_irqs
echo 15000 | sudo tee /sys/class/net/eth0/gro_flush_timeout
fi
```

To enable various configurations, script can be run as following,

- Interrupt Only
```
<script> enable_interrupt
```

- SO_BUSYPOLL (no arguments to script)
```
<script>
```

- NAPI threaded busypoll
```
<script> enable_threaded
```

Once configured, the workload is run with various configurations using
following commands. Set period (1/frequency) and delay in usecs to
produce results for packet frequency and application processing delay.

## Interrupt Only and SO_BUSYPOLL (inline)

- Server
```
sudo chrt -f 50 taskset -c 3-5 ./xsk_rr -o 0 -B 400 -i eth0 -4 \
-D <IP-dest> -S <IP-src> -M <MAC-dst> -m <MAC-src> -p 54321 -h -v
```

- Client
```
sudo chrt -f 50 taskset -c 3-5 ./xsk_rr -o 0 -B 400 -i eth0 -4 \
-S <IP-src> -D <IP-dest> -m <MAC-src> -M <MAC-dst> -p 54321 \
-P <Period-usecs> -d <Delay-usecs> -T -l 1 -v
```

## SO_BUSYPOLL(done in separate core using recvfrom)

Argument -t spawns a separate thread and continuously calls recvfrom.

- Server
```
sudo chrt -f 50 taskset -c 3-5 ./xsk_rr -o 0 -B 400 -i eth0 -4 \
-D <IP-dest> -S <IP-src> -M <MAC-dst> -m <MAC-src> -p 54321 \
-h -v -t
```

- Client
```
sudo chrt -f 50 taskset -c 3-5 ./xsk_rr -o 0 -B 400 -i eth0 -4 \
-S <IP-src> -D <IP-dest> -m <MAC-src> -M <MAC-dst> -p 54321 \
-P <Period-usecs> -d <Delay-usecs> -T -l 1 -v -t
```

## NAPI Threaded Busy Poll

Argument -n skips the recvfrom call as there is no recv kick needed.

- Server
```
sudo chrt -f 50 taskset -c 3-5 ./xsk_rr -o 0 -B 400 -i eth0 -4 \
-D <IP-dest> -S <IP-src> -M <MAC-dst> -m <MAC-src> -p 54321 \
-h -v -n
```

- Client
```
sudo chrt -f 50 taskset -c 3-5 ./xsk_rr -o 0 -B 400 -i eth0 -4 \
-S <IP-src> -D <IP-dest> -m <MAC-src> -M <MAC-dst> -p 54321 \
-P <Period-usecs> -d <Delay-usecs> -T -l 1 -v -n
```

| Experiment | interrupts | SO_BUSYPOLL | SO_BUSYPOLL(separate) | NAPI threaded |
|---|---|---|---|---|
| 12 Kpkt/s + 0us delay | | | | |
| | p5: 12700 | p5: 12900 | p5: 13300 | p5: 12800 |
| | p50: 13100 | p50: 13600 | p50: 14100 | p50: 13000 |
| | p95: 13200 | p95: 13800 | p95: 14400 | p95: 13000 |
| | p99: 13200 | p99: 13800 | p99: 14400 | p99: 13000 |
| 32 Kpkt/s + 30us delay | | | | |
| | p5: 19900 | p5: 16600 | p5: 13100 | p5: 12800 |
| | p50: 21100 | p50: 17000 | p50: 13700 | p50: 13000 |
| | p95: 21200 | p95: 17100 | p95: 14000 | p95: 13000 |
| | p99: 21200 | p99: 17100 | p99: 14000 | p99: 13000 |
| 125 Kpkt/s + 6us delay | | | | |
| | p5: 14600 | p5: 17100 | p5: 13300 | p5: 12900 |
| | p50: 15400 | p50: 17400 | p50: 13800 | p50: 13100 |
| | p95: 15600 | p95: 17600 | p95: 14000 | p95: 13100 |
| | p99: 15600 | p99: 17600 | p99: 14000 | p99: 13100 |
| 12 Kpkt/s + 78us delay | | | | |
| | p5: 14100 | p5: 16700 | p5: 13200 | p5: 12600 |
| | p50: 14300 | p50: 17100 | p50: 13900 | p50: 12800 |
| | p95: 14300 | p95: 17200 | p95: 14200 | p95: 12800 |
| | p99: 14300 | p99: 17200 | p99: 14200 | p99: 12800 |
| 25 Kpkt/s + 38us delay | | | | |
| | p5: 19900 | p5: 16600 | p5: 13000 | p5: 12700 |
| | p50: 21000 | p50: 17100 | p50: 13800 | p50: 12900 |
| | p95: 21100 | p95: 17100 | p95: 14100 | p95: 12900 |
| | p99: 21100 | p99: 17100 | p99: 14100 | p99: 12900 |

## Observations

- Here without application processing all the approaches give the same
latency within 1usecs range and NAPI threaded gives minimum latency.
- With application processing the latency increases by 3-4usecs when
doing inline polling.
- Using a dedicated core to drive napi polling keeps the latency same
even with application processing. This is observed both in userspace
and threaded napi (in kernel).
- Using napi threaded polling in kernel gives lower latency by
1-2usecs as compared to userspace driven polling in separate core.
- Even on a dedicated core, SO_BUSYPOLL adds around 1-2usecs of latency.
This is because it doesn't continuously busy poll until events are
ready. Instead, it returns after polling only once, requiring the
process to re-invoke the syscall for each poll, which requires a new
enter/leave kernel cycle and the setup/teardown of the busy poll for
every single poll attempt.
- With application processing userspace will get the packet from recv
ring and spend some time doing application processing and then do napi
polling. While application processing is happening a dedicated core
doing napi polling can pull the packet of the NAPI RX queue and
populate the AF_XDP recv ring. This means that when the application
thread is done with application processing it has new packets ready to
recv and process in recv ring.
- Napi threaded busy polling in the kernel with a dedicated core gives
the consistent P5-P99 latency.

Note well that threaded napi busy-polling has not been shown to yield
efficiency or throughput benefits. In contrast, dedicating an entire
core to busy-polling one NAPI (NIC queue) is rather inefficient.
However, in certain specific use cases, this mechanism results in lower
packet processing latency. The experimental testing reported here only
covers those use cases and does not present a comprehensive evaluation
of threaded napi busy-polling.

Following histogram is generated to measure the time spent in recvfrom
while using inline thread with SO_BUSYPOLL. The histogram is generated
using the following bpftrace command. In this experiment there are 32K
packets per second and the application processing delay is 30usecs. This
is to measure whether there is significant time spent pulling packets
from the descriptor queue that it will affect the overall latency if
done inline.

```
bpftrace -e '
kprobe:xsk_recvmsg {
@start[tid] = nsecs;
}
kretprobe:xsk_recvmsg {
if (@start[tid]) {
$sample = (nsecs - @start[tid]);
@xsk_recvfrom_hist = hist($sample);
delete(@start[tid]);
}
}
END { clear(@start);}'
```

Here in case of inline busypolling around 35 percent of calls are taking
1-2usecs and around 50 percent are taking 0.5-2usecs.

@xsk_recvfrom_hist:
[128, 256) 24073 |@@@@@@@@@@@@@@@@@@@@@@ |
[256, 512) 55633 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[512, 1K) 20974 |@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 34234 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[2K, 4K) 3266 |@@@ |
[4K, 8K) 19 | |
====================

Link: https://patch.msgid.link/20251028203007.575686-1-skhawaja@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+145 -19
+3 -2
Documentation/netlink/specs/netdev.yaml
··· 88 88 - 89 89 name: napi-threaded 90 90 type: enum 91 - entries: [disabled, enabled] 91 + entries: [disabled, enabled, busy-poll] 92 92 93 93 attribute-sets: 94 94 - ··· 291 291 name: threaded 292 292 doc: Whether the NAPI is configured to operate in threaded polling 293 293 mode. If this is set to enabled then the NAPI context operates 294 - in threaded polling mode. 294 + in threaded polling mode. If this is set to busy-poll, then the 295 + threaded polling mode also busy polls. 295 296 type: u32 296 297 enum: napi-threaded 297 298 -
+49 -1
Documentation/networking/napi.rst
··· 263 263 Busy polling is enabled by either setting ``SO_BUSY_POLL`` on 264 264 selected sockets or using the global ``net.core.busy_poll`` and 265 265 ``net.core.busy_read`` sysctls. An io_uring API for NAPI busy polling 266 - also exists. 266 + also exists. Threaded polling of NAPI also has a mode to busy poll for 267 + packets (:ref:`threaded busy polling<threaded_busy_poll>`) using the NAPI 268 + processing kthread. 267 269 268 270 epoll-based busy polling 269 271 ------------------------ ··· 427 425 Therefore, setting ``gro_flush_timeout`` and ``napi_defer_hard_irqs`` is 428 426 the recommended usage, because otherwise setting ``irq-suspend-timeout`` 429 427 might not have any discernible effect. 428 + 429 + .. _threaded_busy_poll: 430 + 431 + Threaded NAPI busy polling 432 + -------------------------- 433 + 434 + Threaded NAPI busy polling extends threaded NAPI and adds support to do 435 + continuous busy polling of the NAPI. This can be useful for forwarding or 436 + AF_XDP applications. 437 + 438 + Threaded NAPI busy polling can be enabled on per NIC queue basis using Netlink. 439 + 440 + For example, using the following script: 441 + 442 + .. code-block:: bash 443 + 444 + $ ynl --family netdev --do napi-set \ 445 + --json='{"id": 66, "threaded": "busy-poll"}' 446 + 447 + The kernel will create a kthread that busy polls on this NAPI. 448 + 449 + The user may elect to set the CPU affinity of this kthread to an unused CPU 450 + core to improve how often the NAPI is polled at the expense of wasted CPU 451 + cycles. Note that this will keep the CPU core busy with 100% usage. 452 + 453 + Once threaded busy polling is enabled for a NAPI, PID of the kthread can be 454 + retrieved using Netlink so the affinity of the kthread can be set up. 455 + 456 + For example, the following script can be used to fetch the PID: 457 + 458 + .. code-block:: bash 459 + 460 + $ ynl --family netdev --do napi-get --json='{"id": 66}' 461 + 462 + This will output something like following, the pid `258` is the PID of the 463 + kthread that is polling this NAPI. 464 + 465 + .. code-block:: bash 466 + 467 + $ {'defer-hard-irqs': 0, 468 + 'gro-flush-timeout': 0, 469 + 'id': 66, 470 + 'ifindex': 2, 471 + 'irq-suspend-timeout': 0, 472 + 'pid': 258, 473 + 'threaded': 'busy-poll'} 430 474 431 475 .. _threaded: 432 476
+3 -1
include/linux/netdevice.h
··· 423 423 NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ 424 424 NAPI_STATE_LISTED, /* NAPI added to system lists */ 425 425 NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ 426 - NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ 426 + NAPI_STATE_IN_BUSY_POLL, /* Do not rearm NAPI interrupt */ 427 427 NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ 428 428 NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ 429 429 NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ 430 430 NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */ 431 + NAPI_STATE_THREADED_BUSY_POLL, /* The threaded NAPI poller will busy poll */ 431 432 }; 432 433 433 434 enum { ··· 443 442 NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), 444 443 NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), 445 444 NAPIF_STATE_HAS_NOTIFIER = BIT(NAPI_STATE_HAS_NOTIFIER), 445 + NAPIF_STATE_THREADED_BUSY_POLL = BIT(NAPI_STATE_THREADED_BUSY_POLL), 446 446 }; 447 447 448 448 enum gro_result {
+1
include/uapi/linux/netdev.h
··· 80 80 enum netdev_napi_threaded { 81 81 NETDEV_NAPI_THREADED_DISABLED, 82 82 NETDEV_NAPI_THREADED_ENABLED, 83 + NETDEV_NAPI_THREADED_BUSY_POLL, 83 84 }; 84 85 85 86 enum {
+48 -10
net/core/dev.c
··· 7089 7089 */ 7090 7090 if ((val & NAPIF_STATE_SCHED_THREADED) || 7091 7091 !(val & NAPIF_STATE_SCHED)) { 7092 - new = val & (~NAPIF_STATE_THREADED); 7092 + new = val & (~(NAPIF_STATE_THREADED | 7093 + NAPIF_STATE_THREADED_BUSY_POLL)); 7093 7094 } else { 7094 7095 msleep(20); 7095 7096 continue; ··· 7112 7111 7113 7112 kthread_stop(napi->thread); 7114 7113 napi->thread = NULL; 7114 + } 7115 + 7116 + static void napi_set_threaded_state(struct napi_struct *napi, 7117 + enum netdev_napi_threaded threaded_mode) 7118 + { 7119 + bool threaded = threaded_mode != NETDEV_NAPI_THREADED_DISABLED; 7120 + bool busy_poll = threaded_mode == NETDEV_NAPI_THREADED_BUSY_POLL; 7121 + 7122 + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); 7123 + assign_bit(NAPI_STATE_THREADED_BUSY_POLL, &napi->state, busy_poll); 7115 7124 } 7116 7125 7117 7126 int napi_set_threaded(struct napi_struct *napi, ··· 7150 7139 } else { 7151 7140 /* Make sure kthread is created before THREADED bit is set. */ 7152 7141 smp_mb__before_atomic(); 7153 - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); 7142 + napi_set_threaded_state(napi, threaded); 7154 7143 } 7155 7144 7156 7145 return 0; ··· 7542 7531 } 7543 7532 7544 7533 new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC; 7545 - new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL); 7534 + new &= ~(NAPIF_STATE_THREADED | 7535 + NAPIF_STATE_THREADED_BUSY_POLL | 7536 + NAPIF_STATE_PREFER_BUSY_POLL); 7546 7537 } while (!try_cmpxchg(&n->state, &val, new)); 7547 7538 7548 7539 hrtimer_cancel(&n->timer); ··· 7756 7743 return -1; 7757 7744 } 7758 7745 7759 - static void napi_threaded_poll_loop(struct napi_struct *napi) 7746 + static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) 7760 7747 { 7761 7748 struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; 7762 7749 struct softnet_data *sd; ··· 7785 7772 } 7786 7773 skb_defer_free_flush(); 7787 7774 bpf_net_ctx_clear(bpf_net_ctx); 7775 + 7776 + /* When busy poll is enabled, the old packets are not flushed in 7777 + * napi_complete_done. So flush them here. 7778 + */ 7779 + if (busy_poll) 7780 + gro_flush_normal(&napi->gro, HZ >= 1000); 7788 7781 local_bh_enable(); 7782 + 7783 + /* Call cond_resched here to avoid watchdog warnings. */ 7784 + if (repoll || busy_poll) { 7785 + rcu_softirq_qs_periodic(last_qs); 7786 + cond_resched(); 7787 + } 7789 7788 7790 7789 if (!repoll) 7791 7790 break; 7792 - 7793 - rcu_softirq_qs_periodic(last_qs); 7794 - cond_resched(); 7795 7791 } 7796 7792 } 7797 7793 7798 7794 static int napi_threaded_poll(void *data) 7799 7795 { 7800 7796 struct napi_struct *napi = data; 7797 + bool want_busy_poll; 7798 + bool in_busy_poll; 7799 + unsigned long val; 7801 7800 7802 - while (!napi_thread_wait(napi)) 7803 - napi_threaded_poll_loop(napi); 7801 + while (!napi_thread_wait(napi)) { 7802 + val = READ_ONCE(napi->state); 7803 + 7804 + want_busy_poll = val & NAPIF_STATE_THREADED_BUSY_POLL; 7805 + in_busy_poll = val & NAPIF_STATE_IN_BUSY_POLL; 7806 + 7807 + if (unlikely(val & NAPIF_STATE_DISABLE)) 7808 + want_busy_poll = false; 7809 + 7810 + if (want_busy_poll != in_busy_poll) 7811 + assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state, 7812 + want_busy_poll); 7813 + 7814 + napi_threaded_poll_loop(napi, want_busy_poll); 7815 + } 7804 7816 7805 7817 return 0; 7806 7818 } ··· 13135 13097 { 13136 13098 struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); 13137 13099 13138 - napi_threaded_poll_loop(&sd->backlog); 13100 + napi_threaded_poll_loop(&sd->backlog, false); 13139 13101 } 13140 13102 13141 13103 static void backlog_napi_setup(unsigned int cpu)
+3
net/core/dev.h
··· 317 317 318 318 static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n) 319 319 { 320 + if (test_bit(NAPI_STATE_THREADED_BUSY_POLL, &n->state)) 321 + return NETDEV_NAPI_THREADED_BUSY_POLL; 322 + 320 323 if (test_bit(NAPI_STATE_THREADED, &n->state)) 321 324 return NETDEV_NAPI_THREADED_ENABLED; 322 325
+1 -1
net/core/netdev-genl-gen.c
··· 97 97 [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), 98 98 [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, 99 99 [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, 100 - [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 1), 100 + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 2), 101 101 }; 102 102 103 103 /* NETDEV_CMD_BIND_TX - do */
+1
tools/include/uapi/linux/netdev.h
··· 80 80 enum netdev_napi_threaded { 81 81 NETDEV_NAPI_THREADED_DISABLED, 82 82 NETDEV_NAPI_THREADED_ENABLED, 83 + NETDEV_NAPI_THREADED_BUSY_POLL, 83 84 }; 84 85 85 86 enum {
+23 -1
tools/testing/selftests/net/busy_poll_test.sh
··· 27 27 GRO_FLUSH_TIMEOUT=50000 28 28 SUSPEND_TIMEOUT=20000000 29 29 30 + NAPI_THREADED_MODE_BUSY_POLL=2 31 + 30 32 setup_ns() 31 33 { 32 34 set -e ··· 64 62 test_busypoll() 65 63 { 66 64 suspend_value=${1:-0} 65 + napi_threaded_value=${2:-0} 66 + prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL} 67 + 67 68 tmp_file=$(mktemp) 68 69 out_file=$(mktemp) 69 70 ··· 78 73 -b${SERVER_IP} \ 79 74 -m${MAX_EVENTS} \ 80 75 -u${BUSY_POLL_USECS} \ 81 - -P${PREFER_BUSY_POLL} \ 76 + -P${prefer_busy_poll_value} \ 82 77 -g${BUSY_POLL_BUDGET} \ 83 78 -i${NSIM_SV_IFIDX} \ 84 79 -s${suspend_value} \ 80 + -t${napi_threaded_value} \ 85 81 -o${out_file}& 86 82 87 83 wait_local_port_listen nssv ${SERVER_PORT} tcp ··· 111 105 test_busypoll_with_suspend() 112 106 { 113 107 test_busypoll ${SUSPEND_TIMEOUT} 108 + 109 + return $? 110 + } 111 + 112 + test_busypoll_with_napi_threaded() 113 + { 114 + # Only enable napi threaded poll. Set suspend timeout and prefer busy 115 + # poll to 0. 116 + test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0 114 117 115 118 return $? 116 119 } ··· 165 150 test_busypoll_with_suspend 166 151 if [ $? -ne 0 ]; then 167 152 echo "test_busypoll_with_suspend failed" 153 + cleanup_ns 154 + exit 1 155 + fi 156 + 157 + test_busypoll_with_napi_threaded 158 + if [ $? -ne 0 ]; then 159 + echo "test_busypoll_with_napi_threaded failed" 168 160 cleanup_ns 169 161 exit 1 170 162 fi
+13 -3
tools/testing/selftests/net/busy_poller.c
··· 65 65 static uint16_t cfg_busy_poll_budget; 66 66 static uint8_t cfg_prefer_busy_poll; 67 67 68 - /* IRQ params */ 68 + /* NAPI params */ 69 69 static uint32_t cfg_defer_hard_irqs; 70 70 static uint64_t cfg_gro_flush_timeout; 71 71 static uint64_t cfg_irq_suspend_timeout; 72 + static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED; 72 73 73 74 static void usage(const char *filepath) 74 75 { 75 76 error(1, 0, 76 - "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", 77 + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>", 77 78 filepath); 78 79 } 79 80 ··· 87 86 if (argc <= 1) 88 87 usage(argv[0]); 89 88 90 - while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { 89 + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) { 91 90 /* most options take integer values, except o and b, so reduce 92 91 * code duplication a bit for the common case by calling 93 92 * strtoull here and leave bounds checking and casting per ··· 169 168 170 169 cfg_ifindex = (int)tmp; 171 170 break; 171 + case 't': 172 + if (tmp > 2) 173 + error(1, ERANGE, "napi threaded poll value must be 0-2"); 174 + 175 + cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp; 176 + break; 172 177 } 173 178 } 174 179 ··· 253 246 cfg_gro_flush_timeout); 254 247 netdev_napi_set_req_set_irq_suspend_timeout(set_req, 255 248 cfg_irq_suspend_timeout); 249 + 250 + if (cfg_napi_threaded_poll) 251 + netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll); 256 252 257 253 if (netdev_napi_set(ys, set_req)) 258 254 error(1, 0, "can't set NAPI params: %s\n", yerr.msg);