Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/net: Support multishot receive len cap

At the moment its very hard to do fine grained backpressure when using
multishot as the kernel might produce a lot of completions before the
user has a chance to cancel a previous submitted multishot recv.

This change adds support to issue a multishot recv that is capped by a
len, which means the kernel will only rearm until X amount of data is
received. When the limit is reached the completion will signal to the
user that a re-arm needs to happen manually by not setting the IORING_CQE_F_MORE
flag.

Signed-off-by: Norman Maurer <norman_maurer@apple.com>
Link: https://lore.kernel.org/r/20250715140249.31186-1-norman_maurer@apple.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Norman Maurer and committed by
Jens Axboe
0ebc9a7e 8723c146

+34 -4
+34 -4
io_uring/net.c
··· 75 75 u16 flags; 76 76 /* initialised and used only by !msg send variants */ 77 77 u16 buf_group; 78 + /* per-invocation mshot limit */ 78 79 unsigned mshot_len; 80 + /* overall mshot byte limit */ 81 + unsigned mshot_total_len; 79 82 void __user *msg_control; 80 83 /* used only for send zerocopy */ 81 84 struct io_kiocb *notif; ··· 92 89 IORING_RECV_RETRY = (1U << 15), 93 90 IORING_RECV_PARTIAL_MAP = (1U << 14), 94 91 IORING_RECV_MSHOT_CAP = (1U << 13), 92 + IORING_RECV_MSHOT_LIM = (1U << 12), 93 + IORING_RECV_MSHOT_DONE = (1U << 11), 95 94 96 95 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, 97 96 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | 98 - IORING_RECV_MSHOT_CAP, 97 + IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE, 99 98 }; 100 99 101 100 /* ··· 770 765 771 766 sr->done_io = 0; 772 767 773 - if (unlikely(sqe->file_index || sqe->addr2)) 768 + if (unlikely(sqe->addr2)) 774 769 return -EINVAL; 775 770 776 771 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); ··· 795 790 sr->buf_group = req->buf_index; 796 791 req->buf_list = NULL; 797 792 } 798 - sr->mshot_len = 0; 793 + sr->mshot_total_len = sr->mshot_len = 0; 799 794 if (sr->flags & IORING_RECV_MULTISHOT) { 800 795 if (!(req->flags & REQ_F_BUFFER_SELECT)) 801 796 return -EINVAL; 802 797 if (sr->msg_flags & MSG_WAITALL) 803 798 return -EINVAL; 804 - if (req->opcode == IORING_OP_RECV) 799 + if (req->opcode == IORING_OP_RECV) { 805 800 sr->mshot_len = sr->len; 801 + sr->mshot_total_len = READ_ONCE(sqe->optlen); 802 + if (sr->mshot_total_len) 803 + sr->flags |= IORING_RECV_MSHOT_LIM; 804 + } else if (sqe->optlen) { 805 + return -EINVAL; 806 + } 806 807 req->flags |= REQ_F_APOLL_MULTISHOT; 808 + } else if (sqe->optlen) { 809 + return -EINVAL; 807 810 } 811 + 808 812 if (sr->flags & IORING_RECVSEND_BUNDLE) { 809 813 if (req->opcode == IORING_OP_RECVMSG) 810 814 return -EINVAL; ··· 844 830 845 831 if (kmsg->msg.msg_inq > 0) 846 832 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 833 + 834 + if (*ret > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { 835 + /* 836 + * If sr->len hits zero, the limit has been reached. Mark 837 + * mshot as finished, and flag MSHOT_DONE as well to prevent 838 + * a potential bundle from being retried. 839 + */ 840 + sr->mshot_total_len -= min_t(int, *ret, sr->mshot_total_len); 841 + if (!sr->mshot_total_len) { 842 + sr->flags |= IORING_RECV_MSHOT_DONE; 843 + mshot_finished = true; 844 + } 845 + } 847 846 848 847 if (sr->flags & IORING_RECVSEND_BUNDLE) { 849 848 size_t this_ret = *ret - sr->done_io; ··· 1121 1094 else if (kmsg->msg.msg_inq > 1) 1122 1095 arg.max_len = min_not_zero(*len, (size_t) kmsg->msg.msg_inq); 1123 1096 1097 + /* if mshot limited, ensure we don't go over */ 1098 + if (sr->flags & IORING_RECV_MSHOT_LIM) 1099 + arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len); 1124 1100 ret = io_buffers_peek(req, &arg); 1125 1101 if (unlikely(ret < 0)) 1126 1102 return ret;