Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-5.9-2020-08-21' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

- Make sure the head link cancelation includes async work

- Get rid of kiocb_wait_page_queue_init(), makes no sense to have it as
a separate function since you moved it into io_uring itself

- io_import_iovec cleanups (Pavel, me)

- Use system_unbound_wq for ring exit work, to avoid spawning tons of
these if we have tons of rings exiting at the same time

- Fix req->flags overflow flag manipulation (Pavel)

* tag 'io_uring-5.9-2020-08-21' of git://git.kernel.dk/linux-block:
io_uring: kill extra iovec=NULL in import_iovec()
io_uring: comment on kfree(iovec) checks
io_uring: fix racy req->flags modification
io_uring: use system_unbound_wq for ring exit work
io_uring: cleanup io_import_iovec() of pre-mapped request
io_uring: get rid of kiocb_wait_page_queue_init()
io_uring: find and cancel head link async work on files exit

+79 -94
+79 -94
fs/io_uring.c
··· 540 540 REQ_F_ISREG_BIT, 541 541 REQ_F_COMP_LOCKED_BIT, 542 542 REQ_F_NEED_CLEANUP_BIT, 543 - REQ_F_OVERFLOW_BIT, 544 543 REQ_F_POLLED_BIT, 545 544 REQ_F_BUFFER_SELECTED_BIT, 546 545 REQ_F_NO_FILE_TABLE_BIT, ··· 582 583 REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT), 583 584 /* needs cleanup */ 584 585 REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), 585 - /* in overflow list */ 586 - REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT), 587 586 /* already went through poll handler */ 588 587 REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), 589 588 /* buffer already selected */ ··· 943 946 944 947 static inline void io_clean_op(struct io_kiocb *req) 945 948 { 946 - if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED)) 949 + if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED | 950 + REQ_F_INFLIGHT)) 947 951 __io_clean_op(req); 948 952 } 949 953 ··· 1364 1366 req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb, 1365 1367 compl.list); 1366 1368 list_move(&req->compl.list, &list); 1367 - req->flags &= ~REQ_F_OVERFLOW; 1368 1369 if (cqe) { 1369 1370 WRITE_ONCE(cqe->user_data, req->user_data); 1370 1371 WRITE_ONCE(cqe->res, req->result); ··· 1416 1419 ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; 1417 1420 } 1418 1421 io_clean_op(req); 1419 - req->flags |= REQ_F_OVERFLOW; 1420 1422 req->result = res; 1421 1423 req->compl.cflags = cflags; 1422 1424 refcount_inc(&req->refs); ··· 1558 1562 kfree(req->io); 1559 1563 if (req->file) 1560 1564 io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); 1561 - 1562 - if (req->flags & REQ_F_INFLIGHT) { 1563 - struct io_ring_ctx *ctx = req->ctx; 1564 - unsigned long flags; 1565 - 1566 - spin_lock_irqsave(&ctx->inflight_lock, flags); 1567 - list_del(&req->inflight_entry); 1568 - if (waitqueue_active(&ctx->inflight_wait)) 1569 - wake_up(&ctx->inflight_wait); 1570 - spin_unlock_irqrestore(&ctx->inflight_lock, flags); 1571 - } 1572 1565 1573 1566 return io_req_clean_work(req); 1574 1567 } ··· 2804 2819 return __io_iov_buffer_select(req, iov, needs_lock); 2805 2820 } 2806 2821 2807 - static ssize_t io_import_iovec(int rw, struct io_kiocb *req, 2808 - struct iovec **iovec, struct iov_iter *iter, 2809 - bool needs_lock) 2822 + static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, 2823 + struct iovec **iovec, struct iov_iter *iter, 2824 + bool needs_lock) 2810 2825 { 2811 2826 void __user *buf = u64_to_user_ptr(req->rw.addr); 2812 2827 size_t sqe_len = req->rw.len; 2813 2828 ssize_t ret; 2814 2829 u8 opcode; 2815 - 2816 - if (req->io) { 2817 - struct io_async_rw *iorw = &req->io->rw; 2818 - 2819 - *iovec = NULL; 2820 - return iov_iter_count(&iorw->iter); 2821 - } 2822 2830 2823 2831 opcode = req->opcode; 2824 2832 if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { ··· 2826 2848 if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { 2827 2849 if (req->flags & REQ_F_BUFFER_SELECT) { 2828 2850 buf = io_rw_buffer_select(req, &sqe_len, needs_lock); 2829 - if (IS_ERR(buf)) { 2830 - *iovec = NULL; 2851 + if (IS_ERR(buf)) 2831 2852 return PTR_ERR(buf); 2832 - } 2833 2853 req->rw.len = sqe_len; 2834 2854 } 2835 2855 ··· 2853 2877 #endif 2854 2878 2855 2879 return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter); 2880 + } 2881 + 2882 + static ssize_t io_import_iovec(int rw, struct io_kiocb *req, 2883 + struct iovec **iovec, struct iov_iter *iter, 2884 + bool needs_lock) 2885 + { 2886 + if (!req->io) 2887 + return __io_import_iovec(rw, req, iovec, iter, needs_lock); 2888 + *iovec = NULL; 2889 + return iov_iter_count(&req->io->rw.iter); 2856 2890 } 2857 2891 2858 2892 /* ··· 2987 3001 ssize_t ret; 2988 3002 2989 3003 iorw->iter.iov = iorw->fast_iov; 2990 - /* reset ->io around the iovec import, we don't want to use it */ 2991 - req->io = NULL; 2992 - ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov, 3004 + ret = __io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov, 2993 3005 &iorw->iter, !force_nonblock); 2994 - req->io = container_of(iorw, struct io_async_ctx, rw); 2995 3006 if (unlikely(ret < 0)) 2996 3007 return ret; 2997 3008 ··· 3057 3074 return 1; 3058 3075 } 3059 3076 3060 - static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, 3061 - struct wait_page_queue *wait, 3062 - wait_queue_func_t func, 3063 - void *data) 3064 - { 3065 - /* Can't support async wakeup with polled IO */ 3066 - if (kiocb->ki_flags & IOCB_HIPRI) 3067 - return -EINVAL; 3068 - if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) { 3069 - wait->wait.func = func; 3070 - wait->wait.private = data; 3071 - wait->wait.flags = 0; 3072 - INIT_LIST_HEAD(&wait->wait.entry); 3073 - kiocb->ki_flags |= IOCB_WAITQ; 3074 - kiocb->ki_waitq = wait; 3075 - return 0; 3076 - } 3077 - 3078 - return -EOPNOTSUPP; 3079 - } 3080 - 3081 3077 /* 3082 3078 * This controls whether a given IO request should be armed for async page 3083 3079 * based retry. If we return false here, the request is handed to the async ··· 3071 3109 */ 3072 3110 static bool io_rw_should_retry(struct io_kiocb *req) 3073 3111 { 3112 + struct wait_page_queue *wait = &req->io->rw.wpq; 3074 3113 struct kiocb *kiocb = &req->rw.kiocb; 3075 - int ret; 3076 3114 3077 3115 /* never retry for NOWAIT, we just complete with -EAGAIN */ 3078 3116 if (req->flags & REQ_F_NOWAIT) 3079 3117 return false; 3080 3118 3081 3119 /* Only for buffered IO */ 3082 - if (kiocb->ki_flags & IOCB_DIRECT) 3120 + if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) 3083 3121 return false; 3122 + 3084 3123 /* 3085 3124 * just use poll if we can, and don't attempt if the fs doesn't 3086 3125 * support callback based unlocks ··· 3089 3126 if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) 3090 3127 return false; 3091 3128 3092 - ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, 3093 - io_async_buf_func, req); 3094 - if (!ret) { 3095 - io_get_req_task(req); 3096 - return true; 3097 - } 3129 + wait->wait.func = io_async_buf_func; 3130 + wait->wait.private = req; 3131 + wait->wait.flags = 0; 3132 + INIT_LIST_HEAD(&wait->wait.entry); 3133 + kiocb->ki_flags |= IOCB_WAITQ; 3134 + kiocb->ki_waitq = wait; 3098 3135 3099 - return false; 3136 + io_get_req_task(req); 3137 + return true; 3100 3138 } 3101 3139 3102 3140 static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) ··· 3202 3238 kiocb_done(kiocb, ret, cs); 3203 3239 ret = 0; 3204 3240 out_free: 3241 + /* it's reportedly faster than delegating the null check to kfree() */ 3205 3242 if (iovec) 3206 3243 kfree(iovec); 3207 3244 return ret; ··· 3299 3334 return -EAGAIN; 3300 3335 } 3301 3336 out_free: 3337 + /* it's reportedly faster than delegating the null check to kfree() */ 3302 3338 if (iovec) 3303 3339 kfree(iovec); 3304 3340 return ret; ··· 5618 5652 break; 5619 5653 } 5620 5654 req->flags &= ~REQ_F_NEED_CLEANUP; 5655 + } 5656 + 5657 + if (req->flags & REQ_F_INFLIGHT) { 5658 + struct io_ring_ctx *ctx = req->ctx; 5659 + unsigned long flags; 5660 + 5661 + spin_lock_irqsave(&ctx->inflight_lock, flags); 5662 + list_del(&req->inflight_entry); 5663 + if (waitqueue_active(&ctx->inflight_wait)) 5664 + wake_up(&ctx->inflight_wait); 5665 + spin_unlock_irqrestore(&ctx->inflight_lock, flags); 5666 + req->flags &= ~REQ_F_INFLIGHT; 5621 5667 } 5622 5668 } 5623 5669 ··· 7957 7979 ACCT_LOCKED); 7958 7980 7959 7981 INIT_WORK(&ctx->exit_work, io_ring_exit_work); 7960 - queue_work(system_wq, &ctx->exit_work); 7982 + /* 7983 + * Use system_unbound_wq to avoid spawning tons of event kworkers 7984 + * if we're exiting a ton of rings at the same time. It just adds 7985 + * noise and overhead, there's no discernable change in runtime 7986 + * over using system_wq. 7987 + */ 7988 + queue_work(system_unbound_wq, &ctx->exit_work); 7961 7989 } 7962 7990 7963 7991 static int io_uring_release(struct inode *inode, struct file *file) ··· 8047 8063 return found; 8048 8064 } 8049 8065 8066 + static bool io_cancel_link_cb(struct io_wq_work *work, void *data) 8067 + { 8068 + return io_match_link(container_of(work, struct io_kiocb, work), data); 8069 + } 8070 + 8071 + static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) 8072 + { 8073 + enum io_wq_cancel cret; 8074 + 8075 + /* cancel this particular work, if it's running */ 8076 + cret = io_wq_cancel_work(ctx->io_wq, &req->work); 8077 + if (cret != IO_WQ_CANCEL_NOTFOUND) 8078 + return; 8079 + 8080 + /* find links that hold this pending, cancel those */ 8081 + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true); 8082 + if (cret != IO_WQ_CANCEL_NOTFOUND) 8083 + return; 8084 + 8085 + /* if we have a poll link holding this pending, cancel that */ 8086 + if (io_poll_remove_link(ctx, req)) 8087 + return; 8088 + 8089 + /* final option, timeout link is holding this req pending */ 8090 + io_timeout_remove_link(ctx, req); 8091 + } 8092 + 8050 8093 static void io_uring_cancel_files(struct io_ring_ctx *ctx, 8051 8094 struct files_struct *files) 8052 8095 { ··· 8105 8094 /* We need to keep going until we don't find a matching req */ 8106 8095 if (!cancel_req) 8107 8096 break; 8108 - 8109 - if (cancel_req->flags & REQ_F_OVERFLOW) { 8110 - spin_lock_irq(&ctx->completion_lock); 8111 - list_del(&cancel_req->compl.list); 8112 - cancel_req->flags &= ~REQ_F_OVERFLOW; 8113 - 8114 - io_cqring_mark_overflow(ctx); 8115 - WRITE_ONCE(ctx->rings->cq_overflow, 8116 - atomic_inc_return(&ctx->cached_cq_overflow)); 8117 - io_commit_cqring(ctx); 8118 - spin_unlock_irq(&ctx->completion_lock); 8119 - 8120 - /* 8121 - * Put inflight ref and overflow ref. If that's 8122 - * all we had, then we're done with this request. 8123 - */ 8124 - if (refcount_sub_and_test(2, &cancel_req->refs)) { 8125 - io_free_req(cancel_req); 8126 - finish_wait(&ctx->inflight_wait, &wait); 8127 - continue; 8128 - } 8129 - } else { 8130 - io_wq_cancel_work(ctx->io_wq, &cancel_req->work); 8131 - /* could be a link, check and remove if it is */ 8132 - if (!io_poll_remove_link(ctx, cancel_req)) 8133 - io_timeout_remove_link(ctx, cancel_req); 8134 - io_put_req(cancel_req); 8135 - } 8136 - 8097 + /* cancel this request, or head link requests */ 8098 + io_attempt_cancel(ctx, cancel_req); 8099 + io_put_req(cancel_req); 8137 8100 schedule(); 8138 8101 finish_wait(&ctx->inflight_wait, &wait); 8139 8102 }