Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/net: improve recv bundles

Current recv bundles are only supported for multishot receives, and
additionally they also always post at least 2 CQEs if more data is
available than what a buffer will hold. This happens because the initial
bundle recv will do a single buffer, and then do the rest of what is in
the socket as a followup receive. As shown in a test program, if 1k
buffers are available and 32k is available to receive in the socket,
you'd get the following completions:

bundle=1, mshot=0
cqe res 1024
cqe res 1024
[...]
cqe res 1024

bundle=1, mshot=1
cqe res 1024
cqe res 31744

where bundle=1 && mshot=0 will post 32 1k completions, and bundle=1 &&
mshot=1 will post a 1k completion and then a 31k completion.

To support bundle recv without multishot, it's possible to simply retry
the recv immediately and post a single completion, rather than split it
into two completions. With the below patch, the same test looks as
follows:

bundle=1, mshot=0
cqe res 32768

bundle=1, mshot=1
cqe res 32768

where mshot=0 works fine for bundles, and both of them post just a
single 32k completion rather than split it into separate completions.
Posting fewer completions is always a nice win, and not needing
multishot for proper bundle efficiency is nice for cases that can't
necessarily use multishot.

Reported-by: Norman Maurer <norman_maurer@apple.com>
Link: https://lore.kernel.org/r/184f9f92-a682-4205-a15d-89e18f664502@kernel.dk
Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv")
Signed-off-by: Jens Axboe <axboe@kernel.dk>

+18
+18
io_uring/net.c
··· 76 76 /* initialised and used only by !msg send variants */ 77 77 u16 buf_group; 78 78 u16 buf_index; 79 + bool retry; 79 80 void __user *msg_control; 80 81 /* used only for send zerocopy */ 81 82 struct io_kiocb *notif; ··· 188 187 189 188 req->flags &= ~REQ_F_BL_EMPTY; 190 189 sr->done_io = 0; 190 + sr->retry = false; 191 191 sr->len = 0; /* get from the provided buffer */ 192 192 req->buf_index = sr->buf_group; 193 193 } ··· 404 402 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 405 403 406 404 sr->done_io = 0; 405 + sr->retry = false; 407 406 408 407 if (req->opcode != IORING_OP_SEND) { 409 408 if (sqe->addr2 || sqe->file_index) ··· 788 785 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 789 786 790 787 sr->done_io = 0; 788 + sr->retry = false; 791 789 792 790 if (unlikely(sqe->file_index || sqe->addr2)) 793 791 return -EINVAL; ··· 837 833 return io_recvmsg_prep_setup(req); 838 834 } 839 835 836 + /* bits to clear in old and inherit in new cflags on bundle retry */ 837 + #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) 838 + 840 839 /* 841 840 * Finishes io_recv and io_recvmsg. 842 841 * ··· 859 852 if (sr->flags & IORING_RECVSEND_BUNDLE) { 860 853 cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), 861 854 issue_flags); 855 + if (sr->retry) 856 + cflags = req->cqe.flags | (cflags & CQE_F_MASK); 862 857 /* bundle with no more immediate buffers, we're done */ 863 858 if (req->flags & REQ_F_BL_EMPTY) 864 859 goto finish; 860 + /* if more is available, retry and append to this one */ 861 + if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) { 862 + req->cqe.flags = cflags & ~CQE_F_MASK; 863 + sr->len = kmsg->msg.msg_inq; 864 + sr->done_io += *ret; 865 + sr->retry = true; 866 + return false; 867 + } 865 868 } else { 866 869 cflags |= io_put_kbuf(req, *ret, issue_flags); 867 870 } ··· 1250 1233 struct io_kiocb *notif; 1251 1234 1252 1235 zc->done_io = 0; 1236 + zc->retry = false; 1253 1237 req->flags |= REQ_F_POLL_NO_LAZY; 1254 1238 1255 1239 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))