Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-6.14-20250131' of git://git.kernel.dk/linux

Pull more io_uring updates from Jens Axboe:

- Series cleaning up the alloc cache changes from this merge window,
and then another series on top making it better yet.

This also solves an issue with KASAN_EXTRA_INFO, by making io_uring
resilient to KASAN using parts of the freed struct for storage

- Cleanups and simplications to buffer cloning and io resource node
management

- Fix an issue introduced in this merge window where READ/WRITE_ONCE
was used on an atomic_t, which made some archs complain

- Fix for an errant connect retry when the socket has been shut down

- Fix for multishot and provided buffers

* tag 'io_uring-6.14-20250131' of git://git.kernel.dk/linux:
io_uring/net: don't retry connect operation on EPOLLERR
io_uring/rw: simplify io_rw_recycle()
io_uring: remove !KASAN guards from cache free
io_uring/net: extract io_send_select_buffer()
io_uring/net: clean io_msg_copy_hdr()
io_uring/net: make io_net_vec_assign() return void
io_uring: add alloc_cache.c
io_uring: dont ifdef io_alloc_cache_kasan()
io_uring: include all deps for alloc_cache.h
io_uring: fix multishots with selected buffers
io_uring/register: use atomic_read/write for sq_flags migration
io_uring/alloc_cache: get rid of _nocache() helper
io_uring: get rid of alloc cache init_once handling
io_uring/uring_cmd: cleanup struct io_uring_cmd_data layout
io_uring/uring_cmd: use cached cmd_op in io_uring_cmd_sock()
io_uring/msg_ring: don't leave potentially dangling ->tctx pointer
io_uring/rsrc: Move lockdep assert from io_free_rsrc_node() to caller
io_uring/rsrc: remove unused parameter ctx for io_rsrc_node_alloc()
io_uring: clean up io_uring_register_get_file()
io_uring/rsrc: Simplify buffer cloning by locking both rings

+272 -243
+1 -1
include/linux/io_uring/cmd.h
··· 19 19 }; 20 20 21 21 struct io_uring_cmd_data { 22 - struct io_uring_sqe sqes[2]; 23 22 void *op_data; 23 + struct io_uring_sqe sqes[2]; 24 24 }; 25 25 26 26 static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
+2 -1
include/linux/io_uring_types.h
··· 222 222 void **entries; 223 223 unsigned int nr_cached; 224 224 unsigned int max_cached; 225 - size_t elem_size; 225 + unsigned int elem_size; 226 + unsigned int init_clear; 226 227 }; 227 228 228 229 struct io_ring_ctx {
+1 -1
io_uring/Makefile
··· 13 13 sync.o msg_ring.o advise.o openclose.o \ 14 14 epoll.o statx.o timeout.o fdinfo.o \ 15 15 cancel.o waitid.o register.o \ 16 - truncate.o memmap.o 16 + truncate.o memmap.o alloc_cache.o 17 17 obj-$(CONFIG_IO_WQ) += io-wq.o 18 18 obj-$(CONFIG_FUTEX) += futex.o 19 19 obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
+44
io_uring/alloc_cache.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "alloc_cache.h" 4 + 5 + void io_alloc_cache_free(struct io_alloc_cache *cache, 6 + void (*free)(const void *)) 7 + { 8 + void *entry; 9 + 10 + if (!cache->entries) 11 + return; 12 + 13 + while ((entry = io_alloc_cache_get(cache)) != NULL) 14 + free(entry); 15 + 16 + kvfree(cache->entries); 17 + cache->entries = NULL; 18 + } 19 + 20 + /* returns false if the cache was initialized properly */ 21 + bool io_alloc_cache_init(struct io_alloc_cache *cache, 22 + unsigned max_nr, unsigned int size, 23 + unsigned int init_bytes) 24 + { 25 + cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); 26 + if (!cache->entries) 27 + return true; 28 + 29 + cache->nr_cached = 0; 30 + cache->max_cached = max_nr; 31 + cache->elem_size = size; 32 + cache->init_clear = init_bytes; 33 + return false; 34 + } 35 + 36 + void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp) 37 + { 38 + void *obj; 39 + 40 + obj = kmalloc(cache->elem_size, gfp); 41 + if (obj && cache->init_clear) 42 + memset(obj, 0, cache->init_clear); 43 + return obj; 44 + }
+33 -36
io_uring/alloc_cache.h
··· 1 1 #ifndef IOU_ALLOC_CACHE_H 2 2 #define IOU_ALLOC_CACHE_H 3 3 4 + #include <linux/io_uring_types.h> 5 + 4 6 /* 5 7 * Don't allow the cache to grow beyond this size. 6 8 */ 7 9 #define IO_ALLOC_CACHE_MAX 128 10 + 11 + void io_alloc_cache_free(struct io_alloc_cache *cache, 12 + void (*free)(const void *)); 13 + bool io_alloc_cache_init(struct io_alloc_cache *cache, 14 + unsigned max_nr, unsigned int size, 15 + unsigned int init_bytes); 16 + 17 + void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp); 18 + 19 + static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr) 20 + { 21 + if (IS_ENABLED(CONFIG_KASAN)) { 22 + kfree(*iov); 23 + *iov = NULL; 24 + *nr = 0; 25 + } 26 + } 8 27 9 28 static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, 10 29 void *entry) ··· 42 23 if (cache->nr_cached) { 43 24 void *entry = cache->entries[--cache->nr_cached]; 44 25 26 + /* 27 + * If KASAN is enabled, always clear the initial bytes that 28 + * must be zeroed post alloc, in case any of them overlap 29 + * with KASAN storage. 30 + */ 31 + #if defined(CONFIG_KASAN) 45 32 kasan_mempool_unpoison_object(entry, cache->elem_size); 33 + if (cache->init_clear) 34 + memset(entry, 0, cache->init_clear); 35 + #endif 46 36 return entry; 47 37 } 48 38 49 39 return NULL; 50 40 } 51 41 52 - static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp, 53 - void (*init_once)(void *obj)) 42 + static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp) 54 43 { 55 - if (unlikely(!cache->nr_cached)) { 56 - void *obj = kmalloc(cache->elem_size, gfp); 44 + void *obj; 57 45 58 - if (obj && init_once) 59 - init_once(obj); 46 + obj = io_alloc_cache_get(cache); 47 + if (obj) 60 48 return obj; 61 - } 62 - return io_alloc_cache_get(cache); 49 + return io_cache_alloc_new(cache, gfp); 63 50 } 64 51 65 - /* returns false if the cache was initialized properly */ 66 - static inline bool io_alloc_cache_init(struct io_alloc_cache *cache, 67 - unsigned max_nr, size_t size) 68 - { 69 - cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); 70 - if (cache->entries) { 71 - cache->nr_cached = 0; 72 - cache->max_cached = max_nr; 73 - cache->elem_size = size; 74 - return false; 75 - } 76 - return true; 77 - } 78 - 79 - static inline void io_alloc_cache_free(struct io_alloc_cache *cache, 80 - void (*free)(const void *)) 81 - { 82 - void *entry; 83 - 84 - if (!cache->entries) 85 - return; 86 - 87 - while ((entry = io_alloc_cache_get(cache)) != NULL) 88 - free(entry); 89 - 90 - kvfree(cache->entries); 91 - cache->entries = NULL; 92 - } 93 52 #endif
+1 -1
io_uring/filetable.c
··· 68 68 if (slot_index >= ctx->file_table.data.nr) 69 69 return -EINVAL; 70 70 71 - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); 71 + node = io_rsrc_node_alloc(IORING_RSRC_FILE); 72 72 if (!node) 73 73 return -ENOMEM; 74 74
+2 -2
io_uring/futex.c
··· 36 36 bool io_futex_cache_init(struct io_ring_ctx *ctx) 37 37 { 38 38 return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX, 39 - sizeof(struct io_futex_data)); 39 + sizeof(struct io_futex_data), 0); 40 40 } 41 41 42 42 void io_futex_cache_free(struct io_ring_ctx *ctx) ··· 320 320 } 321 321 322 322 io_ring_submit_lock(ctx, issue_flags); 323 - ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT, NULL); 323 + ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT); 324 324 if (!ifd) { 325 325 ret = -ENOMEM; 326 326 goto done_unlock;
+7 -5
io_uring/io_uring.c
··· 315 315 INIT_LIST_HEAD(&ctx->cq_overflow_list); 316 316 INIT_LIST_HEAD(&ctx->io_buffers_cache); 317 317 ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX, 318 - sizeof(struct async_poll)); 318 + sizeof(struct async_poll), 0); 319 319 ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, 320 - sizeof(struct io_async_msghdr)); 320 + sizeof(struct io_async_msghdr), 321 + offsetof(struct io_async_msghdr, clear)); 321 322 ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, 322 - sizeof(struct io_async_rw)); 323 + sizeof(struct io_async_rw), 324 + offsetof(struct io_async_rw, clear)); 323 325 ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, 324 - sizeof(struct io_uring_cmd_data)); 326 + sizeof(struct io_uring_cmd_data), 0); 325 327 spin_lock_init(&ctx->msg_lock); 326 328 ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, 327 - sizeof(struct io_kiocb)); 329 + sizeof(struct io_kiocb), 0); 328 330 ret |= io_futex_cache_init(ctx); 329 331 if (ret) 330 332 goto free_ref;
+8 -13
io_uring/io_uring.h
··· 226 226 } 227 227 228 228 static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache, 229 - struct io_kiocb *req, 230 - void (*init_once)(void *obj)) 229 + struct io_kiocb *req) 231 230 { 232 - req->async_data = io_cache_alloc(cache, GFP_KERNEL, init_once); 233 - if (req->async_data) 234 - req->flags |= REQ_F_ASYNC_DATA; 235 - return req->async_data; 236 - } 231 + if (cache) { 232 + req->async_data = io_cache_alloc(cache, GFP_KERNEL); 233 + } else { 234 + const struct io_issue_def *def = &io_issue_defs[req->opcode]; 237 235 238 - static inline void *io_uring_alloc_async_data_nocache(struct io_kiocb *req) 239 - { 240 - const struct io_issue_def *def = &io_issue_defs[req->opcode]; 241 - 242 - WARN_ON_ONCE(!def->async_size); 243 - req->async_data = kmalloc(def->async_size, GFP_KERNEL); 236 + WARN_ON_ONCE(!def->async_size); 237 + req->async_data = kmalloc(def->async_size, GFP_KERNEL); 238 + } 244 239 if (req->async_data) 245 240 req->flags |= REQ_F_ASYNC_DATA; 246 241 return req->async_data;
+2 -2
io_uring/msg_ring.c
··· 89 89 static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, 90 90 int res, u32 cflags, u64 user_data) 91 91 { 92 - req->tctx = READ_ONCE(ctx->submitter_task->io_uring); 93 - if (!req->tctx) { 92 + if (!READ_ONCE(ctx->submitter_task)) { 94 93 kmem_cache_free(req_cachep, req); 95 94 return -EOWNERDEAD; 96 95 } ··· 97 98 io_req_set_res(req, res, cflags); 98 99 percpu_ref_get(&ctx->refs); 99 100 req->ctx = ctx; 101 + req->tctx = NULL; 100 102 req->io_task_work.func = io_msg_tw_complete; 101 103 io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); 102 104 return 0;
+68 -66
io_uring/net.c
··· 137 137 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 138 138 { 139 139 struct io_async_msghdr *hdr = req->async_data; 140 - struct iovec *iov; 141 140 142 141 /* can't recycle, ensure we free the iovec if we have one */ 143 142 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { ··· 145 146 } 146 147 147 148 /* Let normal cleanup path reap it if we fail adding to the cache */ 148 - iov = hdr->free_iov; 149 + io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr); 149 150 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 150 - if (iov) 151 - kasan_mempool_poison_object(iov); 152 151 req->async_data = NULL; 153 152 req->flags &= ~REQ_F_ASYNC_DATA; 154 153 } 155 - } 156 - 157 - static void io_msg_async_data_init(void *obj) 158 - { 159 - struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj; 160 - 161 - hdr->free_iov = NULL; 162 - hdr->free_iov_nr = 0; 163 154 } 164 155 165 156 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) ··· 157 168 struct io_ring_ctx *ctx = req->ctx; 158 169 struct io_async_msghdr *hdr; 159 170 160 - hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req, 161 - io_msg_async_data_init); 171 + hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); 162 172 if (!hdr) 163 173 return NULL; 164 174 165 175 /* If the async data was cached, we might have an iov cached inside. */ 166 - if (hdr->free_iov) { 167 - kasan_mempool_unpoison_object(hdr->free_iov, 168 - hdr->free_iov_nr * sizeof(struct iovec)); 176 + if (hdr->free_iov) 169 177 req->flags |= REQ_F_NEED_CLEANUP; 170 - } 171 178 return hdr; 172 179 } 173 180 174 181 /* assign new iovec to kmsg, if we need to */ 175 - static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 182 + static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 176 183 struct iovec *iov) 177 184 { 178 185 if (iov) { ··· 178 193 kfree(kmsg->free_iov); 179 194 kmsg->free_iov = iov; 180 195 } 181 - return 0; 182 196 } 183 197 184 198 static inline void io_mshot_prep_retry(struct io_kiocb *req, ··· 239 255 if (unlikely(ret < 0)) 240 256 return ret; 241 257 242 - return io_net_vec_assign(req, iomsg, iov); 258 + io_net_vec_assign(req, iomsg, iov); 259 + return 0; 243 260 } 244 261 #endif 245 262 ··· 280 295 ret = -EINVAL; 281 296 goto ua_end; 282 297 } else { 298 + struct iovec __user *uiov = msg->msg_iov; 299 + 283 300 /* we only need the length for provided buffers */ 284 - if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) 301 + if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len))) 285 302 goto ua_end; 286 - unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, 287 - ua_end); 303 + unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end); 288 304 sr->len = iov->iov_len; 289 305 } 290 306 ret = 0; ··· 300 314 if (unlikely(ret < 0)) 301 315 return ret; 302 316 303 - return io_net_vec_assign(req, iomsg, iov); 317 + io_net_vec_assign(req, iomsg, iov); 318 + return 0; 304 319 } 305 320 306 321 static int io_sendmsg_copy_hdr(struct io_kiocb *req, ··· 566 579 return IOU_OK; 567 580 } 568 581 582 + static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, 583 + struct io_async_msghdr *kmsg) 584 + { 585 + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 586 + 587 + int ret; 588 + struct buf_sel_arg arg = { 589 + .iovs = &kmsg->fast_iov, 590 + .max_len = min_not_zero(sr->len, INT_MAX), 591 + .nr_iovs = 1, 592 + }; 593 + 594 + if (kmsg->free_iov) { 595 + arg.nr_iovs = kmsg->free_iov_nr; 596 + arg.iovs = kmsg->free_iov; 597 + arg.mode = KBUF_MODE_FREE; 598 + } 599 + 600 + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 601 + arg.nr_iovs = 1; 602 + else 603 + arg.mode |= KBUF_MODE_EXPAND; 604 + 605 + ret = io_buffers_select(req, &arg, issue_flags); 606 + if (unlikely(ret < 0)) 607 + return ret; 608 + 609 + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 610 + kmsg->free_iov_nr = ret; 611 + kmsg->free_iov = arg.iovs; 612 + req->flags |= REQ_F_NEED_CLEANUP; 613 + } 614 + sr->len = arg.out_len; 615 + 616 + if (ret == 1) { 617 + sr->buf = arg.iovs[0].iov_base; 618 + ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 619 + &kmsg->msg.msg_iter); 620 + if (unlikely(ret)) 621 + return ret; 622 + } else { 623 + iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 624 + arg.iovs, ret, arg.out_len); 625 + } 626 + 627 + return 0; 628 + } 629 + 569 630 int io_send(struct io_kiocb *req, unsigned int issue_flags) 570 631 { 571 632 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); ··· 637 602 638 603 retry_bundle: 639 604 if (io_do_buffer_select(req)) { 640 - struct buf_sel_arg arg = { 641 - .iovs = &kmsg->fast_iov, 642 - .max_len = min_not_zero(sr->len, INT_MAX), 643 - .nr_iovs = 1, 644 - }; 645 - 646 - if (kmsg->free_iov) { 647 - arg.nr_iovs = kmsg->free_iov_nr; 648 - arg.iovs = kmsg->free_iov; 649 - arg.mode = KBUF_MODE_FREE; 650 - } 651 - 652 - if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 653 - arg.nr_iovs = 1; 654 - else 655 - arg.mode |= KBUF_MODE_EXPAND; 656 - 657 - ret = io_buffers_select(req, &arg, issue_flags); 658 - if (unlikely(ret < 0)) 605 + ret = io_send_select_buffer(req, issue_flags, kmsg); 606 + if (ret) 659 607 return ret; 660 - 661 - if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 662 - kmsg->free_iov_nr = ret; 663 - kmsg->free_iov = arg.iovs; 664 - req->flags |= REQ_F_NEED_CLEANUP; 665 - } 666 - sr->len = arg.out_len; 667 - 668 - if (ret == 1) { 669 - sr->buf = arg.iovs[0].iov_base; 670 - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 671 - &kmsg->msg.msg_iter); 672 - if (unlikely(ret)) 673 - return ret; 674 - } else { 675 - iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, 676 - arg.iovs, ret, arg.out_len); 677 - } 678 608 } 679 609 680 610 /* ··· 1710 1710 int ret; 1711 1711 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1712 1712 1713 + if (unlikely(req->flags & REQ_F_FAIL)) { 1714 + ret = -ECONNRESET; 1715 + goto out; 1716 + } 1717 + 1713 1718 file_flags = force_nonblock ? O_NONBLOCK : 0; 1714 1719 1715 1720 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, ··· 1818 1813 { 1819 1814 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1820 1815 1821 - if (kmsg->free_iov) { 1822 - kasan_mempool_unpoison_object(kmsg->free_iov, 1823 - kmsg->free_iov_nr * sizeof(struct iovec)); 1816 + if (kmsg->free_iov) 1824 1817 io_netmsg_iovec_free(kmsg); 1825 - } 1826 1818 kfree(kmsg); 1827 1819 } 1828 1820 #endif
+12 -8
io_uring/net.h
··· 5 5 6 6 struct io_async_msghdr { 7 7 #if defined(CONFIG_NET) 8 - struct iovec fast_iov; 9 - /* points to an allocated iov, if NULL we use fast_iov instead */ 10 8 struct iovec *free_iov; 9 + /* points to an allocated iov, if NULL we use fast_iov instead */ 11 10 int free_iov_nr; 12 - int namelen; 13 - __kernel_size_t controllen; 14 - __kernel_size_t payloadlen; 15 - struct sockaddr __user *uaddr; 16 - struct msghdr msg; 17 - struct sockaddr_storage addr; 11 + struct_group(clear, 12 + int namelen; 13 + struct iovec fast_iov; 14 + __kernel_size_t controllen; 15 + __kernel_size_t payloadlen; 16 + struct sockaddr __user *uaddr; 17 + struct msghdr msg; 18 + struct sockaddr_storage addr; 19 + ); 20 + #else 21 + struct_group(clear); 18 22 #endif 19 23 }; 20 24
+5 -1
io_uring/poll.c
··· 273 273 return IOU_POLL_REISSUE; 274 274 } 275 275 } 276 + if (unlikely(req->cqe.res & EPOLLERR)) 277 + req_set_fail(req); 276 278 if (req->apoll_events & EPOLLONESHOT) 277 279 return IOU_POLL_DONE; 278 280 ··· 317 315 318 316 ret = io_poll_check_events(req, ts); 319 317 if (ret == IOU_POLL_NO_ACTION) { 318 + io_kbuf_recycle(req, 0); 320 319 return; 321 320 } else if (ret == IOU_POLL_REQUEUE) { 321 + io_kbuf_recycle(req, 0); 322 322 __io_poll_execute(req, 0); 323 323 return; 324 324 } ··· 654 650 kfree(apoll->double_poll); 655 651 } else { 656 652 if (!(issue_flags & IO_URING_F_UNLOCKED)) 657 - apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC, NULL); 653 + apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC); 658 654 else 659 655 apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 660 656 if (!apoll)
+5 -3
io_uring/register.c
··· 552 552 ctx->cqe_cached = ctx->cqe_sentinel = NULL; 553 553 554 554 WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); 555 - WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags)); 555 + atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags)); 556 556 WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); 557 557 WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); 558 558 ··· 853 853 return ERR_PTR(-EINVAL); 854 854 fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); 855 855 file = tctx->registered_rings[fd]; 856 + if (file) 857 + get_file(file); 856 858 } else { 857 859 file = fget(fd); 858 860 } ··· 921 919 trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, 922 920 ctx->buf_table.nr, ret); 923 921 mutex_unlock(&ctx->uring_lock); 924 - if (!use_registered_ring) 925 - fput(file); 922 + 923 + fput(file); 926 924 return ret; 927 925 }
+46 -42
io_uring/rsrc.c
··· 118 118 } 119 119 } 120 120 121 - struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type) 121 + struct io_rsrc_node *io_rsrc_node_alloc(int type) 122 122 { 123 123 struct io_rsrc_node *node; 124 124 ··· 203 203 err = -EBADF; 204 204 break; 205 205 } 206 - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); 206 + node = io_rsrc_node_alloc(IORING_RSRC_FILE); 207 207 if (!node) { 208 208 err = -ENOMEM; 209 209 fput(file); ··· 444 444 445 445 void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) 446 446 { 447 - lockdep_assert_held(&ctx->uring_lock); 448 - 449 447 if (node->tag) 450 448 io_post_aux_cqe(ctx, node->tag, 0, 0); 451 449 ··· 523 525 goto fail; 524 526 } 525 527 ret = -ENOMEM; 526 - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); 528 + node = io_rsrc_node_alloc(IORING_RSRC_FILE); 527 529 if (!node) { 528 530 fput(file); 529 531 goto fail; ··· 728 730 if (!iov->iov_base) 729 731 return NULL; 730 732 731 - node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); 733 + node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); 732 734 if (!node) 733 735 return ERR_PTR(-ENOMEM); 734 736 node->buf = NULL; ··· 919 921 return 0; 920 922 } 921 923 924 + /* Lock two rings at once. The rings must be different! */ 925 + static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2) 926 + { 927 + if (ctx1 > ctx2) 928 + swap(ctx1, ctx2); 929 + mutex_lock(&ctx1->uring_lock); 930 + mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING); 931 + } 932 + 933 + /* Both rings are locked by the caller. */ 922 934 static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx, 923 935 struct io_uring_clone_buffers *arg) 924 936 { 925 937 struct io_rsrc_data data; 926 938 int i, ret, off, nr; 927 939 unsigned int nbufs; 940 + 941 + lockdep_assert_held(&ctx->uring_lock); 942 + lockdep_assert_held(&src_ctx->uring_lock); 928 943 929 944 /* 930 945 * Accounting state is shared between the two rings; that only works if ··· 953 942 if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE)) 954 943 return -EBUSY; 955 944 956 - nbufs = READ_ONCE(src_ctx->buf_table.nr); 945 + nbufs = src_ctx->buf_table.nr; 957 946 if (!arg->nr) 958 947 arg->nr = nbufs; 959 948 else if (arg->nr > nbufs) ··· 977 966 } 978 967 } 979 968 980 - /* 981 - * Drop our own lock here. We'll setup the data we need and reference 982 - * the source buffers, then re-grab, check, and assign at the end. 983 - */ 984 - mutex_unlock(&ctx->uring_lock); 985 - 986 - mutex_lock(&src_ctx->uring_lock); 987 969 ret = -ENXIO; 988 970 nbufs = src_ctx->buf_table.nr; 989 971 if (!nbufs) 990 - goto out_unlock; 972 + goto out_free; 991 973 ret = -EINVAL; 992 974 if (!arg->nr) 993 975 arg->nr = nbufs; 994 976 else if (arg->nr > nbufs) 995 - goto out_unlock; 977 + goto out_free; 996 978 ret = -EOVERFLOW; 997 979 if (check_add_overflow(arg->nr, arg->src_off, &off)) 998 - goto out_unlock; 980 + goto out_free; 999 981 if (off > nbufs) 1000 - goto out_unlock; 982 + goto out_free; 1001 983 1002 984 off = arg->dst_off; 1003 985 i = arg->src_off; ··· 1002 998 if (!src_node) { 1003 999 dst_node = NULL; 1004 1000 } else { 1005 - dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); 1001 + dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); 1006 1002 if (!dst_node) { 1007 1003 ret = -ENOMEM; 1008 - goto out_unlock; 1004 + goto out_free; 1009 1005 } 1010 1006 1011 1007 refcount_inc(&src_node->buf->refs); ··· 1015 1011 i++; 1016 1012 } 1017 1013 1018 - /* Have a ref on the bufs now, drop src lock and re-grab our own lock */ 1019 - mutex_unlock(&src_ctx->uring_lock); 1020 - mutex_lock(&ctx->uring_lock); 1021 - 1022 1014 /* 1023 1015 * If asked for replace, put the old table. data->nodes[] holds both 1024 1016 * old and new nodes at this point. ··· 1023 1023 io_rsrc_data_free(ctx, &ctx->buf_table); 1024 1024 1025 1025 /* 1026 - * ctx->buf_table should be empty now - either the contents are being 1027 - * replaced and we just freed the table, or someone raced setting up 1028 - * a buffer table while the clone was happening. If not empty, fall 1029 - * through to failure handling. 1026 + * ctx->buf_table must be empty now - either the contents are being 1027 + * replaced and we just freed the table, or the contents are being 1028 + * copied to a ring that does not have buffers yet (checked at function 1029 + * entry). 1030 1030 */ 1031 - if (!ctx->buf_table.nr) { 1032 - ctx->buf_table = data; 1033 - return 0; 1034 - } 1031 + WARN_ON_ONCE(ctx->buf_table.nr); 1032 + ctx->buf_table = data; 1033 + return 0; 1035 1034 1036 - mutex_unlock(&ctx->uring_lock); 1037 - mutex_lock(&src_ctx->uring_lock); 1038 - /* someone raced setting up buffers, dump ours */ 1039 - ret = -EBUSY; 1040 - out_unlock: 1035 + out_free: 1041 1036 io_rsrc_data_free(ctx, &data); 1042 - mutex_unlock(&src_ctx->uring_lock); 1043 - mutex_lock(&ctx->uring_lock); 1044 1037 return ret; 1045 1038 } 1046 1039 ··· 1047 1054 int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) 1048 1055 { 1049 1056 struct io_uring_clone_buffers buf; 1057 + struct io_ring_ctx *src_ctx; 1050 1058 bool registered_src; 1051 1059 struct file *file; 1052 1060 int ret; ··· 1065 1071 file = io_uring_register_get_file(buf.src_fd, registered_src); 1066 1072 if (IS_ERR(file)) 1067 1073 return PTR_ERR(file); 1068 - ret = io_clone_buffers(ctx, file->private_data, &buf); 1069 - if (!registered_src) 1070 - fput(file); 1074 + 1075 + src_ctx = file->private_data; 1076 + if (src_ctx != ctx) { 1077 + mutex_unlock(&ctx->uring_lock); 1078 + lock_two_rings(ctx, src_ctx); 1079 + } 1080 + 1081 + ret = io_clone_buffers(ctx, src_ctx, &buf); 1082 + 1083 + if (src_ctx != ctx) 1084 + mutex_unlock(&src_ctx->uring_lock); 1085 + 1086 + fput(file); 1071 1087 return ret; 1072 1088 }
+4 -1
io_uring/rsrc.h
··· 2 2 #ifndef IOU_RSRC_H 3 3 #define IOU_RSRC_H 4 4 5 + #include <linux/lockdep.h> 6 + 5 7 #define IO_NODE_ALLOC_CACHE_MAX 32 6 8 7 9 #define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) ··· 45 43 unsigned int nr_folios; 46 44 }; 47 45 48 - struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type); 46 + struct io_rsrc_node *io_rsrc_node_alloc(int type); 49 47 void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node); 50 48 void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data); 51 49 int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr); ··· 82 80 83 81 static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) 84 82 { 83 + lockdep_assert_held(&ctx->uring_lock); 85 84 if (node && !--node->refs) 86 85 io_free_rsrc_node(ctx, node); 87 86 }
+7 -34
io_uring/rw.c
··· 146 146 return 0; 147 147 } 148 148 149 - static void io_rw_iovec_free(struct io_async_rw *rw) 150 - { 151 - if (rw->free_iovec) { 152 - kfree(rw->free_iovec); 153 - rw->free_iov_nr = 0; 154 - rw->free_iovec = NULL; 155 - } 156 - } 157 - 158 149 static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags) 159 150 { 160 151 struct io_async_rw *rw = req->async_data; 161 - struct iovec *iov; 162 152 163 - if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 164 - io_rw_iovec_free(rw); 153 + if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) 165 154 return; 166 - } 167 - iov = rw->free_iovec; 155 + 156 + io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr); 168 157 if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) { 169 - if (iov) 170 - kasan_mempool_poison_object(iov); 171 158 req->async_data = NULL; 172 159 req->flags &= ~REQ_F_ASYNC_DATA; 173 160 } ··· 195 208 } 196 209 } 197 210 198 - static void io_rw_async_data_init(void *obj) 199 - { 200 - struct io_async_rw *rw = (struct io_async_rw *)obj; 201 - 202 - rw->free_iovec = NULL; 203 - rw->bytes_done = 0; 204 - } 205 - 206 211 static int io_rw_alloc_async(struct io_kiocb *req) 207 212 { 208 213 struct io_ring_ctx *ctx = req->ctx; 209 214 struct io_async_rw *rw; 210 215 211 - rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init); 216 + rw = io_uring_alloc_async_data(&ctx->rw_cache, req); 212 217 if (!rw) 213 218 return -ENOMEM; 214 - if (rw->free_iovec) { 215 - kasan_mempool_unpoison_object(rw->free_iovec, 216 - rw->free_iov_nr * sizeof(struct iovec)); 219 + if (rw->free_iovec) 217 220 req->flags |= REQ_F_NEED_CLEANUP; 218 - } 219 221 rw->bytes_done = 0; 220 222 return 0; 221 223 } ··· 1299 1323 { 1300 1324 struct io_async_rw *rw = (struct io_async_rw *) entry; 1301 1325 1302 - if (rw->free_iovec) { 1303 - kasan_mempool_unpoison_object(rw->free_iovec, 1304 - rw->free_iov_nr * sizeof(struct iovec)); 1305 - io_rw_iovec_free(rw); 1306 - } 1326 + if (rw->free_iovec) 1327 + kfree(rw->free_iovec); 1307 1328 kfree(rw); 1308 1329 }
+16 -11
io_uring/rw.h
··· 9 9 10 10 struct io_async_rw { 11 11 size_t bytes_done; 12 - struct iov_iter iter; 13 - struct iov_iter_state iter_state; 14 - struct iovec fast_iov; 15 12 struct iovec *free_iovec; 16 - int free_iov_nr; 17 - /* wpq is for buffered io, while meta fields are used with direct io */ 18 - union { 19 - struct wait_page_queue wpq; 20 - struct { 21 - struct uio_meta meta; 22 - struct io_meta_state meta_state; 13 + struct_group(clear, 14 + struct iov_iter iter; 15 + struct iov_iter_state iter_state; 16 + struct iovec fast_iov; 17 + int free_iov_nr; 18 + /* 19 + * wpq is for buffered io, while meta fields are used with 20 + * direct io 21 + */ 22 + union { 23 + struct wait_page_queue wpq; 24 + struct { 25 + struct uio_meta meta; 26 + struct io_meta_state meta_state; 27 + }; 23 28 }; 24 - }; 29 + ); 25 30 }; 26 31 27 32 int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+1 -1
io_uring/timeout.c
··· 544 544 545 545 if (WARN_ON_ONCE(req_has_async_data(req))) 546 546 return -EFAULT; 547 - data = io_uring_alloc_async_data_nocache(req); 547 + data = io_uring_alloc_async_data(NULL, req); 548 548 if (!data) 549 549 return -ENOMEM; 550 550 data->req = req;
+6 -13
io_uring/uring_cmd.c
··· 168 168 } 169 169 EXPORT_SYMBOL_GPL(io_uring_cmd_done); 170 170 171 - static void io_uring_cmd_init_once(void *obj) 172 - { 173 - struct io_uring_cmd_data *data = obj; 174 - 175 - data->op_data = NULL; 176 - } 177 - 178 171 static int io_uring_cmd_prep_setup(struct io_kiocb *req, 179 172 const struct io_uring_sqe *sqe) 180 173 { 181 174 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); 182 175 struct io_uring_cmd_data *cache; 183 176 184 - cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req, 185 - io_uring_cmd_init_once); 177 + cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req); 186 178 if (!cache) 187 179 return -ENOMEM; 180 + cache->op_data = NULL; 188 181 189 182 if (!(req->flags & REQ_F_FORCE_ASYNC)) { 190 183 /* defer memcpy until we need it */ ··· 185 192 return 0; 186 193 } 187 194 188 - memcpy(req->async_data, sqe, uring_sqe_size(req->ctx)); 189 - ioucmd->sqe = req->async_data; 195 + memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); 196 + ioucmd->sqe = cache->sqes; 190 197 return 0; 191 198 } 192 199 ··· 253 260 struct io_uring_cmd_data *cache = req->async_data; 254 261 255 262 if (ioucmd->sqe != (void *) cache) 256 - memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx)); 263 + memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); 257 264 return -EAGAIN; 258 265 } else if (ret == -EIOCBQUEUED) { 259 266 return -EIOCBQUEUED; ··· 343 350 if (!prot || !prot->ioctl) 344 351 return -EOPNOTSUPP; 345 352 346 - switch (cmd->sqe->cmd_op) { 353 + switch (cmd->cmd_op) { 347 354 case SOCKET_URING_OP_SIOCINQ: 348 355 ret = prot->ioctl(sk, SIOCINQ, &arg); 349 356 if (ret)
+1 -1
io_uring/waitid.c
··· 303 303 struct io_waitid_async *iwa; 304 304 int ret; 305 305 306 - iwa = io_uring_alloc_async_data_nocache(req); 306 + iwa = io_uring_alloc_async_data(NULL, req); 307 307 if (!iwa) 308 308 return -ENOMEM; 309 309