Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/kbuf: add helpers for getting/peeking multiple buffers

Our provided buffer interface only allows selection of a single buffer.
Add an API that allows getting/peeking multiple buffers at the same time.

This is only implemented for the ring provided buffers. It could be added
for the legacy provided buffers as well, but since it's strongly
encouraged to use the new interface, let's keep it simpler and just
provide it for the new API. The legacy interface will always just select
a single buffer.

There are two new main functions:

io_buffers_select(), which selects up as many buffers as it can. The
caller supplies the iovec array, and io_buffers_select() may allocate a
bigger array if the 'out_len' being passed in is non-zero and bigger
than what fits in the provided iovec. Buffers grabbed with this helper
are permanently assigned.

io_buffers_peek(), which works like io_buffers_select(), except they can
be recycled, if needed. Callers using either of these functions should
call io_put_kbufs() rather than io_put_kbuf() at completion time. The
peek interface must be called with the ctx locked from peek to
completion.

This add a bit state for the request:

- REQ_F_BUFFERS_COMMIT, which means that the the buffers have been
peeked and should be committed to the buffer ring head when they are
put as part of completion. Prior to this, req->buf_list was cleared to
NULL when committed.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

+201 -12
+3
include/linux/io_uring_types.h
··· 472 472 REQ_F_CAN_POLL_BIT, 473 473 REQ_F_BL_EMPTY_BIT, 474 474 REQ_F_BL_NO_RECYCLE_BIT, 475 + REQ_F_BUFFERS_COMMIT_BIT, 475 476 476 477 /* not a real bit, just to check we're not overflowing the space */ 477 478 __REQ_F_LAST_BIT, ··· 551 550 REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT), 552 551 /* don't recycle provided buffers for this request */ 553 552 REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), 553 + /* buffer ring head needs incrementing on put */ 554 + REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), 554 555 }; 555 556 556 557 typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
+154 -3
io_uring/kbuf.c
··· 117 117 return NULL; 118 118 } 119 119 120 + static int io_provided_buffers_select(struct io_kiocb *req, size_t *len, 121 + struct io_buffer_list *bl, 122 + struct iovec *iov) 123 + { 124 + void __user *buf; 125 + 126 + buf = io_provided_buffer_select(req, len, bl); 127 + if (unlikely(!buf)) 128 + return -ENOBUFS; 129 + 130 + iov[0].iov_base = buf; 131 + iov[0].iov_len = *len; 132 + return 0; 133 + } 134 + 135 + static struct io_uring_buf *io_ring_head_to_buf(struct io_uring_buf_ring *br, 136 + __u16 head, __u16 mask) 137 + { 138 + return &br->bufs[head & mask]; 139 + } 140 + 120 141 static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, 121 142 struct io_buffer_list *bl, 122 143 unsigned int issue_flags) ··· 153 132 if (head + 1 == tail) 154 133 req->flags |= REQ_F_BL_EMPTY; 155 134 156 - head &= bl->mask; 157 - buf = &br->bufs[head]; 135 + buf = io_ring_head_to_buf(br, head, bl->mask); 158 136 if (*len == 0 || *len > buf->len) 159 137 *len = buf->len; 160 - req->flags |= REQ_F_BUFFER_RING; 138 + req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; 161 139 req->buf_list = bl; 162 140 req->buf_index = buf->bid; 163 141 ··· 171 151 * the transfer completes (or if we get -EAGAIN and must poll of 172 152 * retry). 173 153 */ 154 + req->flags &= ~REQ_F_BUFFERS_COMMIT; 174 155 req->buf_list = NULL; 175 156 bl->head++; 176 157 } ··· 196 175 } 197 176 io_ring_submit_unlock(req->ctx, issue_flags); 198 177 return ret; 178 + } 179 + 180 + /* cap it at a reasonable 256, will be one page even for 4K */ 181 + #define PEEK_MAX_IMPORT 256 182 + 183 + static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, 184 + struct io_buffer_list *bl) 185 + { 186 + struct io_uring_buf_ring *br = bl->buf_ring; 187 + struct iovec *iov = arg->iovs; 188 + int nr_iovs = arg->nr_iovs; 189 + __u16 nr_avail, tail, head; 190 + struct io_uring_buf *buf; 191 + 192 + tail = smp_load_acquire(&br->tail); 193 + head = bl->head; 194 + nr_avail = min_t(__u16, tail - head, UIO_MAXIOV); 195 + if (unlikely(!nr_avail)) 196 + return -ENOBUFS; 197 + 198 + buf = io_ring_head_to_buf(br, head, bl->mask); 199 + if (arg->max_len) { 200 + int needed; 201 + 202 + needed = (arg->max_len + buf->len - 1) / buf->len; 203 + needed = min(needed, PEEK_MAX_IMPORT); 204 + if (nr_avail > needed) 205 + nr_avail = needed; 206 + } 207 + 208 + /* 209 + * only alloc a bigger array if we know we have data to map, eg not 210 + * a speculative peek operation. 211 + */ 212 + if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) { 213 + iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL); 214 + if (unlikely(!iov)) 215 + return -ENOMEM; 216 + if (arg->mode & KBUF_MODE_FREE) 217 + kfree(arg->iovs); 218 + arg->iovs = iov; 219 + nr_iovs = nr_avail; 220 + } else if (nr_avail < nr_iovs) { 221 + nr_iovs = nr_avail; 222 + } 223 + 224 + /* set it to max, if not set, so we can use it unconditionally */ 225 + if (!arg->max_len) 226 + arg->max_len = INT_MAX; 227 + 228 + req->buf_index = buf->bid; 229 + do { 230 + /* truncate end piece, if needed */ 231 + if (buf->len > arg->max_len) 232 + buf->len = arg->max_len; 233 + 234 + iov->iov_base = u64_to_user_ptr(buf->addr); 235 + iov->iov_len = buf->len; 236 + iov++; 237 + 238 + arg->out_len += buf->len; 239 + arg->max_len -= buf->len; 240 + if (!arg->max_len) 241 + break; 242 + 243 + buf = io_ring_head_to_buf(br, ++head, bl->mask); 244 + } while (--nr_iovs); 245 + 246 + if (head == tail) 247 + req->flags |= REQ_F_BL_EMPTY; 248 + 249 + req->flags |= REQ_F_BUFFER_RING; 250 + req->buf_list = bl; 251 + return iov - arg->iovs; 252 + } 253 + 254 + int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, 255 + unsigned int issue_flags) 256 + { 257 + struct io_ring_ctx *ctx = req->ctx; 258 + struct io_buffer_list *bl; 259 + int ret = -ENOENT; 260 + 261 + io_ring_submit_lock(ctx, issue_flags); 262 + bl = io_buffer_get_list(ctx, req->buf_index); 263 + if (unlikely(!bl)) 264 + goto out_unlock; 265 + 266 + if (bl->is_buf_ring) { 267 + ret = io_ring_buffers_peek(req, arg, bl); 268 + /* 269 + * Don't recycle these buffers if we need to go through poll. 270 + * Nobody else can use them anyway, and holding on to provided 271 + * buffers for a send/write operation would happen on the app 272 + * side anyway with normal buffers. Besides, we already 273 + * committed them, they cannot be put back in the queue. 274 + */ 275 + if (ret > 0) { 276 + req->flags |= REQ_F_BL_NO_RECYCLE; 277 + req->buf_list->head += ret; 278 + } 279 + } else { 280 + ret = io_provided_buffers_select(req, &arg->out_len, bl, arg->iovs); 281 + } 282 + out_unlock: 283 + io_ring_submit_unlock(ctx, issue_flags); 284 + return ret; 285 + } 286 + 287 + int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg) 288 + { 289 + struct io_ring_ctx *ctx = req->ctx; 290 + struct io_buffer_list *bl; 291 + int ret; 292 + 293 + lockdep_assert_held(&ctx->uring_lock); 294 + 295 + bl = io_buffer_get_list(ctx, req->buf_index); 296 + if (unlikely(!bl)) 297 + return -ENOENT; 298 + 299 + if (bl->is_buf_ring) { 300 + ret = io_ring_buffers_peek(req, arg, bl); 301 + if (ret > 0) 302 + req->flags |= REQ_F_BUFFERS_COMMIT; 303 + return ret; 304 + } 305 + 306 + /* don't support multiple buffer selections for legacy */ 307 + return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs); 199 308 } 200 309 201 310 static int __io_remove_buffers(struct io_ring_ctx *ctx,
+44 -9
io_uring/kbuf.h
··· 41 41 __u16 bgid; 42 42 }; 43 43 44 + enum { 45 + /* can alloc a bigger vec */ 46 + KBUF_MODE_EXPAND = 1, 47 + /* if bigger vec allocated, free old one */ 48 + KBUF_MODE_FREE = 2, 49 + }; 50 + 51 + struct buf_sel_arg { 52 + struct iovec *iovs; 53 + size_t out_len; 54 + size_t max_len; 55 + int nr_iovs; 56 + int mode; 57 + }; 58 + 44 59 void __user *io_buffer_select(struct io_kiocb *req, size_t *len, 45 60 unsigned int issue_flags); 61 + int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, 62 + unsigned int issue_flags); 63 + int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg); 46 64 void io_destroy_buffers(struct io_ring_ctx *ctx); 47 65 48 66 int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); ··· 93 75 */ 94 76 if (req->buf_list) { 95 77 req->buf_index = req->buf_list->bgid; 96 - req->flags &= ~REQ_F_BUFFER_RING; 78 + req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT); 97 79 return true; 98 80 } 99 81 return false; ··· 117 99 return false; 118 100 } 119 101 120 - static inline void __io_put_kbuf_ring(struct io_kiocb *req) 102 + static inline void __io_put_kbuf_ring(struct io_kiocb *req, int nr) 121 103 { 122 - if (req->buf_list) { 123 - req->buf_index = req->buf_list->bgid; 124 - req->buf_list->head++; 104 + struct io_buffer_list *bl = req->buf_list; 105 + 106 + if (bl) { 107 + if (req->flags & REQ_F_BUFFERS_COMMIT) { 108 + bl->head += nr; 109 + req->flags &= ~REQ_F_BUFFERS_COMMIT; 110 + } 111 + req->buf_index = bl->bgid; 125 112 } 126 113 req->flags &= ~REQ_F_BUFFER_RING; 127 114 } ··· 135 112 struct list_head *list) 136 113 { 137 114 if (req->flags & REQ_F_BUFFER_RING) { 138 - __io_put_kbuf_ring(req); 115 + __io_put_kbuf_ring(req, 1); 139 116 } else { 140 117 req->buf_index = req->kbuf->bgid; 141 118 list_add(&req->kbuf->list, list); ··· 153 130 __io_put_kbuf_list(req, &req->ctx->io_buffers_comp); 154 131 } 155 132 156 - static inline unsigned int io_put_kbuf(struct io_kiocb *req, 157 - unsigned issue_flags) 133 + static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int nbufs, 134 + unsigned issue_flags) 158 135 { 159 136 unsigned int ret; 160 137 ··· 163 140 164 141 ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); 165 142 if (req->flags & REQ_F_BUFFER_RING) 166 - __io_put_kbuf_ring(req); 143 + __io_put_kbuf_ring(req, nbufs); 167 144 else 168 145 __io_put_kbuf(req, issue_flags); 169 146 return ret; 147 + } 148 + 149 + static inline unsigned int io_put_kbuf(struct io_kiocb *req, 150 + unsigned issue_flags) 151 + { 152 + return __io_put_kbufs(req, 1, issue_flags); 153 + } 154 + 155 + static inline unsigned int io_put_kbufs(struct io_kiocb *req, int nbufs, 156 + unsigned issue_flags) 157 + { 158 + return __io_put_kbufs(req, nbufs, issue_flags); 170 159 } 171 160 #endif