Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/kbuf: pass bgid to io_buffer_select()

The current situation with buffer group id juggling is not ideal.
req->buf_index first stores the bgid, then it's overwritten by a buffer
id, and then it can get restored back no recycling / etc. It's not so
easy to control, and it's not handled consistently across request types
with receive requests saving and restoring the bgid it by hand.

It's a prep patch that adds a buffer group id argument to
io_buffer_select(). The caller will be responsible for stashing a copy
somewhere and passing it into the function.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a210d6427cc3f4f42271a6853274cd5a50e56820.1743437358.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Pavel Begunkov and committed by
Jens Axboe
c0e96505 e6f74fd6

+13 -9
+2 -2
io_uring/kbuf.c
··· 193 193 } 194 194 195 195 void __user *io_buffer_select(struct io_kiocb *req, size_t *len, 196 - unsigned int issue_flags) 196 + unsigned buf_group, unsigned int issue_flags) 197 197 { 198 198 struct io_ring_ctx *ctx = req->ctx; 199 199 struct io_buffer_list *bl; ··· 201 201 202 202 io_ring_submit_lock(req->ctx, issue_flags); 203 203 204 - bl = io_buffer_get_list(ctx, req->buf_index); 204 + bl = io_buffer_get_list(ctx, buf_group); 205 205 if (likely(bl)) { 206 206 if (bl->flags & IOBL_BUF_RING) 207 207 ret = io_ring_buffer_select(req, len, bl, issue_flags);
+1 -1
io_uring/kbuf.h
··· 58 58 }; 59 59 60 60 void __user *io_buffer_select(struct io_kiocb *req, size_t *len, 61 - unsigned int issue_flags); 61 + unsigned buf_group, unsigned int issue_flags); 62 62 int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, 63 63 unsigned int issue_flags); 64 64 int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg);
+4 -5
io_uring/net.c
··· 407 407 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 408 408 if (sr->msg_flags & MSG_DONTWAIT) 409 409 req->flags |= REQ_F_NOWAIT; 410 + if (req->flags & REQ_F_BUFFER_SELECT) 411 + sr->buf_group = req->buf_index; 410 412 if (sr->flags & IORING_RECVSEND_BUNDLE) { 411 413 if (req->opcode == IORING_OP_SENDMSG) 412 414 return -EINVAL; 413 - if (!(req->flags & REQ_F_BUFFER_SELECT)) 414 - return -EINVAL; 415 415 sr->msg_flags |= MSG_WAITALL; 416 - sr->buf_group = req->buf_index; 417 416 req->buf_list = NULL; 418 417 req->flags |= REQ_F_MULTISHOT; 419 418 } ··· 978 979 void __user *buf; 979 980 size_t len = sr->len; 980 981 981 - buf = io_buffer_select(req, &len, issue_flags); 982 + buf = io_buffer_select(req, &len, sr->buf_group, issue_flags); 982 983 if (!buf) 983 984 return -ENOBUFS; 984 985 ··· 1088 1089 void __user *buf; 1089 1090 1090 1091 *len = sr->len; 1091 - buf = io_buffer_select(req, len, issue_flags); 1092 + buf = io_buffer_select(req, len, sr->buf_group, issue_flags); 1092 1093 if (!buf) 1093 1094 return -ENOBUFS; 1094 1095 sr->buf = buf;
+4 -1
io_uring/rw.c
··· 119 119 return io_import_vec(ddir, req, io, buf, sqe_len); 120 120 121 121 if (io_do_buffer_select(req)) { 122 - buf = io_buffer_select(req, &sqe_len, issue_flags); 122 + buf = io_buffer_select(req, &sqe_len, io->buf_group, issue_flags); 123 123 if (!buf) 124 124 return -ENOBUFS; 125 125 rw->addr = (unsigned long) buf; ··· 253 253 int ddir) 254 254 { 255 255 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); 256 + struct io_async_rw *io; 256 257 unsigned ioprio; 257 258 u64 attr_type_mask; 258 259 int ret; 259 260 260 261 if (io_rw_alloc_async(req)) 261 262 return -ENOMEM; 263 + io = req->async_data; 262 264 263 265 rw->kiocb.ki_pos = READ_ONCE(sqe->off); 264 266 /* used for fixed read/write too - just read unconditionally */ 265 267 req->buf_index = READ_ONCE(sqe->buf_index); 268 + io->buf_group = req->buf_index; 266 269 267 270 ioprio = READ_ONCE(sqe->ioprio); 268 271 if (ioprio) {
+2
io_uring/rw.h
··· 16 16 struct iov_iter iter; 17 17 struct iov_iter_state iter_state; 18 18 struct iovec fast_iov; 19 + unsigned buf_group; 20 + 19 21 /* 20 22 * wpq is for buffered io, while meta fields are used with 21 23 * direct io