Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-5.19-2022-06-02' of git://git.kernel.dk/linux-block

Pull more io_uring updates from Jens Axboe:

- A small series with some prep patches for the upcoming 5.20 split of
the io_uring.c file. No functional changes here, just minor bits that
are nice to get out of the way now (me)

- Fix for a memory leak in high numbered provided buffer groups,
introduced in the merge window (me)

- Wire up the new socket opcode for allocated direct descriptors,
making it consistent with the other opcodes that can instantiate a
descriptor (me)

- Fix for the inflight tracking, should go into 5.18-stable as well
(me)

- Fix for a deadlock for io-wq offloaded file slot allocations (Pavel)

- Direct descriptor failure fput leak fix (Xiaoguang)

- Fix for the direct descriptor allocation hinting in case of
unsuccessful install (Xiaoguang)

* tag 'io_uring-5.19-2022-06-02' of git://git.kernel.dk/linux-block:
io_uring: reinstate the inflight tracking
io_uring: fix deadlock on iowq file slot alloc
io_uring: let IORING_OP_FILES_UPDATE support choosing fixed file slots
io_uring: defer alloc_hint update to io_file_bitmap_set()
io_uring: ensure fput() called correspondingly when direct install fails
io_uring: wire up allocated direct descriptors for socket
io_uring: fix a memory leak of buffer group list on exit
io_uring: move shutdown under the general net section
io_uring: unify calling convention for async prep handling
io_uring: add io_op_defs 'def' pointer in req init and issue
io_uring: make prep and issue side of req handlers named consistently
io_uring: make timeout prep handlers consistent with other prep handlers

+224 -122
+218 -122
fs/io_uring.c
··· 112 112 IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) 113 113 114 114 #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ 115 - REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA) 115 + REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ 116 + REQ_F_ASYNC_DATA) 116 117 117 118 #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\ 118 119 IO_REQ_CLEAN_FLAGS) ··· 541 540 const struct io_ring_ctx *last; 542 541 struct io_wq *io_wq; 543 542 struct percpu_counter inflight; 543 + atomic_t inflight_tracked; 544 544 atomic_t in_idle; 545 545 546 546 spinlock_t task_lock; ··· 576 574 struct file *file; 577 575 int fd; 578 576 u32 file_slot; 577 + u32 flags; 579 578 }; 580 579 581 580 struct io_timeout_data { ··· 1358 1355 static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, 1359 1356 unsigned issue_flags); 1360 1357 static struct file *io_file_get_normal(struct io_kiocb *req, int fd); 1361 - static void io_drop_inflight_file(struct io_kiocb *req); 1362 - static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags); 1363 1358 static void io_queue_sqe(struct io_kiocb *req); 1364 1359 static void io_rsrc_put_work(struct work_struct *work); 1365 1360 ··· 1367 1366 1368 1367 static int io_install_fixed_file(struct io_kiocb *req, struct file *file, 1369 1368 unsigned int issue_flags, u32 slot_index); 1370 - static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); 1369 + static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags, 1370 + unsigned int offset); 1371 + static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); 1371 1372 1372 1373 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); 1373 1374 static void io_eventfd_signal(struct io_ring_ctx *ctx); ··· 1760 1757 bool cancel_all) 1761 1758 __must_hold(&req->ctx->timeout_lock) 1762 1759 { 1760 + struct io_kiocb *req; 1761 + 1763 1762 if (task && head->task != task) 1764 1763 return false; 1765 - return cancel_all; 1764 + if (cancel_all) 1765 + return true; 1766 + 1767 + io_for_each_link(req, head) { 1768 + if (req->flags & REQ_F_INFLIGHT) 1769 + return true; 1770 + } 1771 + return false; 1772 + } 1773 + 1774 + static bool io_match_linked(struct io_kiocb *head) 1775 + { 1776 + struct io_kiocb *req; 1777 + 1778 + io_for_each_link(req, head) { 1779 + if (req->flags & REQ_F_INFLIGHT) 1780 + return true; 1781 + } 1782 + return false; 1766 1783 } 1767 1784 1768 1785 /* ··· 1792 1769 static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, 1793 1770 bool cancel_all) 1794 1771 { 1772 + bool matched; 1773 + 1795 1774 if (task && head->task != task) 1796 1775 return false; 1797 - return cancel_all; 1776 + if (cancel_all) 1777 + return true; 1778 + 1779 + if (head->flags & REQ_F_LINK_TIMEOUT) { 1780 + struct io_ring_ctx *ctx = head->ctx; 1781 + 1782 + /* protect against races with linked timeouts */ 1783 + spin_lock_irq(&ctx->timeout_lock); 1784 + matched = io_match_linked(head); 1785 + spin_unlock_irq(&ctx->timeout_lock); 1786 + } else { 1787 + matched = io_match_linked(head); 1788 + } 1789 + return matched; 1798 1790 } 1799 1791 1800 1792 static inline bool req_has_async_data(struct io_kiocb *req) ··· 1963 1925 static inline bool io_req_ffs_set(struct io_kiocb *req) 1964 1926 { 1965 1927 return req->flags & REQ_F_FIXED_FILE; 1928 + } 1929 + 1930 + static inline void io_req_track_inflight(struct io_kiocb *req) 1931 + { 1932 + if (!(req->flags & REQ_F_INFLIGHT)) { 1933 + req->flags |= REQ_F_INFLIGHT; 1934 + atomic_inc(&current->io_uring->inflight_tracked); 1935 + } 1966 1936 } 1967 1937 1968 1938 static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) ··· 3033 2987 struct io_wq_work_node *node; 3034 2988 unsigned long flags; 3035 2989 bool running; 3036 - 3037 - io_drop_inflight_file(req); 3038 2990 3039 2991 spin_lock_irqsave(&tctx->task_lock, flags); 3040 2992 wq_list_add_tail(&req->io_task_work.node, list); ··· 4220 4176 return 0; 4221 4177 } 4222 4178 4179 + static int io_readv_prep_async(struct io_kiocb *req) 4180 + { 4181 + return io_rw_prep_async(req, READ); 4182 + } 4183 + 4184 + static int io_writev_prep_async(struct io_kiocb *req) 4185 + { 4186 + return io_rw_prep_async(req, WRITE); 4187 + } 4188 + 4223 4189 /* 4224 4190 * This is our waitqueue callback handler, registered through __folio_lock_async() 4225 4191 * when we initially tried to do the IO with the iocb armed our waitqueue. ··· 5157 5103 return 0; 5158 5104 } 5159 5105 5160 - static int io_shutdown_prep(struct io_kiocb *req, 5161 - const struct io_uring_sqe *sqe) 5162 - { 5163 - #if defined(CONFIG_NET) 5164 - if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 5165 - sqe->buf_index || sqe->splice_fd_in)) 5166 - return -EINVAL; 5167 - 5168 - req->shutdown.how = READ_ONCE(sqe->len); 5169 - return 0; 5170 - #else 5171 - return -EOPNOTSUPP; 5172 - #endif 5173 - } 5174 - 5175 - static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 5176 - { 5177 - #if defined(CONFIG_NET) 5178 - struct socket *sock; 5179 - int ret; 5180 - 5181 - if (issue_flags & IO_URING_F_NONBLOCK) 5182 - return -EAGAIN; 5183 - 5184 - sock = sock_from_file(req->file); 5185 - if (unlikely(!sock)) 5186 - return -ENOTSOCK; 5187 - 5188 - ret = __sys_shutdown_sock(sock, req->shutdown.how); 5189 - io_req_complete(req, ret); 5190 - return 0; 5191 - #else 5192 - return -EOPNOTSUPP; 5193 - #endif 5194 - } 5195 - 5196 5106 static int __io_splice_prep(struct io_kiocb *req, 5197 5107 const struct io_uring_sqe *sqe) 5198 5108 { ··· 5463 5445 unsigned long nr = ctx->nr_user_files; 5464 5446 int ret; 5465 5447 5466 - if (table->alloc_hint >= nr) 5467 - table->alloc_hint = 0; 5468 - 5469 5448 do { 5470 5449 ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint); 5471 - if (ret != nr) { 5472 - table->alloc_hint = ret + 1; 5450 + if (ret != nr) 5473 5451 return ret; 5474 - } 5452 + 5475 5453 if (!table->alloc_hint) 5476 5454 break; 5477 5455 ··· 5478 5464 return -ENFILE; 5479 5465 } 5480 5466 5467 + /* 5468 + * Note when io_fixed_fd_install() returns error value, it will ensure 5469 + * fput() is called correspondingly. 5470 + */ 5481 5471 static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags, 5482 5472 struct file *file, unsigned int file_slot) 5483 5473 { ··· 5489 5471 struct io_ring_ctx *ctx = req->ctx; 5490 5472 int ret; 5491 5473 5492 - if (alloc_slot) { 5493 - io_ring_submit_lock(ctx, issue_flags); 5494 - ret = io_file_bitmap_get(ctx); 5495 - if (unlikely(ret < 0)) { 5496 - io_ring_submit_unlock(ctx, issue_flags); 5497 - return ret; 5498 - } 5474 + io_ring_submit_lock(ctx, issue_flags); 5499 5475 5476 + if (alloc_slot) { 5477 + ret = io_file_bitmap_get(ctx); 5478 + if (unlikely(ret < 0)) 5479 + goto err; 5500 5480 file_slot = ret; 5501 5481 } else { 5502 5482 file_slot--; 5503 5483 } 5504 5484 5505 5485 ret = io_install_fixed_file(req, file, issue_flags, file_slot); 5506 - if (alloc_slot) { 5507 - io_ring_submit_unlock(ctx, issue_flags); 5508 - if (!ret) 5509 - return file_slot; 5510 - } 5511 - 5486 + if (!ret && alloc_slot) 5487 + ret = file_slot; 5488 + err: 5489 + io_ring_submit_unlock(ctx, issue_flags); 5490 + if (unlikely(ret < 0)) 5491 + fput(file); 5512 5492 return ret; 5513 5493 } 5514 5494 ··· 5988 5972 5989 5973 static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 5990 5974 { 5991 - if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) 5975 + if (sqe->off || sqe->addr || sqe->len || sqe->buf_index) 5992 5976 return -EINVAL; 5993 5977 if (req->flags & REQ_F_FIXED_FILE) 5994 5978 return -EBADF; 5995 5979 5996 5980 req->close.fd = READ_ONCE(sqe->fd); 5997 5981 req->close.file_slot = READ_ONCE(sqe->file_index); 5998 - if (req->close.file_slot && req->close.fd) 5982 + req->close.flags = READ_ONCE(sqe->close_flags); 5983 + if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT) 5984 + return -EINVAL; 5985 + if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) && 5986 + req->close.file_slot && req->close.fd) 5999 5987 return -EINVAL; 6000 5988 6001 5989 return 0; ··· 6015 5995 6016 5996 if (req->close.file_slot) { 6017 5997 ret = io_close_fixed(req, issue_flags); 6018 - goto err; 5998 + if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT)) 5999 + goto err; 6019 6000 } 6020 6001 6021 6002 spin_lock(&files->file_lock); ··· 6084 6063 } 6085 6064 6086 6065 #if defined(CONFIG_NET) 6066 + static int io_shutdown_prep(struct io_kiocb *req, 6067 + const struct io_uring_sqe *sqe) 6068 + { 6069 + if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 6070 + sqe->buf_index || sqe->splice_fd_in)) 6071 + return -EINVAL; 6072 + 6073 + req->shutdown.how = READ_ONCE(sqe->len); 6074 + return 0; 6075 + } 6076 + 6077 + static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 6078 + { 6079 + struct socket *sock; 6080 + int ret; 6081 + 6082 + if (issue_flags & IO_URING_F_NONBLOCK) 6083 + return -EAGAIN; 6084 + 6085 + sock = sock_from_file(req->file); 6086 + if (unlikely(!sock)) 6087 + return -ENOTSOCK; 6088 + 6089 + ret = __sys_shutdown_sock(sock, req->shutdown.how); 6090 + io_req_complete(req, ret); 6091 + return 0; 6092 + } 6093 + 6087 6094 static bool io_net_retry(struct socket *sock, int flags) 6088 6095 { 6089 6096 if (!(flags & MSG_WAITALL)) ··· 6723 6674 fd_install(fd, file); 6724 6675 ret = fd; 6725 6676 } else { 6726 - ret = io_install_fixed_file(req, file, issue_flags, 6727 - sock->file_slot - 1); 6677 + ret = io_fixed_fd_install(req, issue_flags, file, 6678 + sock->file_slot); 6728 6679 } 6729 6680 __io_req_complete(req, issue_flags, ret, 0); 6730 6681 return 0; ··· 6816 6767 IO_NETOP_PREP_ASYNC(connect); 6817 6768 IO_NETOP_PREP(accept); 6818 6769 IO_NETOP_PREP(socket); 6770 + IO_NETOP_PREP(shutdown); 6819 6771 IO_NETOP_FN(send); 6820 6772 IO_NETOP_FN(recv); 6821 6773 #endif /* CONFIG_NET */ ··· 6955 6905 6956 6906 if (!req->cqe.res) { 6957 6907 struct poll_table_struct pt = { ._key = req->apoll_events }; 6958 - unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED; 6959 - 6960 - if (unlikely(!io_assign_file(req, flags))) 6961 - return -EBADF; 6962 6908 req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 6963 6909 } 6964 6910 ··· 7436 7390 return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); 7437 7391 } 7438 7392 7439 - static int io_poll_update_prep(struct io_kiocb *req, 7393 + static int io_poll_remove_prep(struct io_kiocb *req, 7440 7394 const struct io_uring_sqe *sqe) 7441 7395 { 7442 7396 struct io_poll_update *upd = &req->poll_update; ··· 7500 7454 return 0; 7501 7455 } 7502 7456 7503 - static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags) 7457 + static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 7504 7458 { 7505 7459 struct io_cancel_data cd = { .data = req->poll_update.old_user_data, }; 7506 7460 struct io_ring_ctx *ctx = req->ctx; ··· 7744 7698 return 0; 7745 7699 } 7746 7700 7747 - static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, 7748 - bool is_timeout_link) 7701 + static int __io_timeout_prep(struct io_kiocb *req, 7702 + const struct io_uring_sqe *sqe, 7703 + bool is_timeout_link) 7749 7704 { 7750 7705 struct io_timeout_data *data; 7751 7706 unsigned flags; ··· 7799 7752 link->last->flags |= REQ_F_ARM_LTIMEOUT; 7800 7753 } 7801 7754 return 0; 7755 + } 7756 + 7757 + static int io_timeout_prep(struct io_kiocb *req, 7758 + const struct io_uring_sqe *sqe) 7759 + { 7760 + return __io_timeout_prep(req, sqe, false); 7761 + } 7762 + 7763 + static int io_link_timeout_prep(struct io_kiocb *req, 7764 + const struct io_uring_sqe *sqe) 7765 + { 7766 + return __io_timeout_prep(req, sqe, true); 7802 7767 } 7803 7768 7804 7769 static int io_timeout(struct io_kiocb *req, unsigned int issue_flags) ··· 8029 7970 return 0; 8030 7971 } 8031 7972 8032 - static int io_rsrc_update_prep(struct io_kiocb *req, 7973 + static int io_files_update_prep(struct io_kiocb *req, 8033 7974 const struct io_uring_sqe *sqe) 8034 7975 { 8035 7976 if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) ··· 8045 7986 return 0; 8046 7987 } 8047 7988 7989 + static int io_files_update_with_index_alloc(struct io_kiocb *req, 7990 + unsigned int issue_flags) 7991 + { 7992 + __s32 __user *fds = u64_to_user_ptr(req->rsrc_update.arg); 7993 + unsigned int done; 7994 + struct file *file; 7995 + int ret, fd; 7996 + 7997 + for (done = 0; done < req->rsrc_update.nr_args; done++) { 7998 + if (copy_from_user(&fd, &fds[done], sizeof(fd))) { 7999 + ret = -EFAULT; 8000 + break; 8001 + } 8002 + 8003 + file = fget(fd); 8004 + if (!file) { 8005 + ret = -EBADF; 8006 + break; 8007 + } 8008 + ret = io_fixed_fd_install(req, issue_flags, file, 8009 + IORING_FILE_INDEX_ALLOC); 8010 + if (ret < 0) 8011 + break; 8012 + if (copy_to_user(&fds[done], &ret, sizeof(ret))) { 8013 + ret = -EFAULT; 8014 + __io_close_fixed(req, issue_flags, ret); 8015 + break; 8016 + } 8017 + } 8018 + 8019 + if (done) 8020 + return done; 8021 + return ret; 8022 + } 8023 + 8048 8024 static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) 8049 8025 { 8050 8026 struct io_ring_ctx *ctx = req->ctx; ··· 8093 7999 up.resv = 0; 8094 8000 up.resv2 = 0; 8095 8001 8096 - io_ring_submit_lock(ctx, issue_flags); 8097 - ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, 8098 - &up, req->rsrc_update.nr_args); 8099 - io_ring_submit_unlock(ctx, issue_flags); 8002 + if (req->rsrc_update.offset == IORING_FILE_INDEX_ALLOC) { 8003 + ret = io_files_update_with_index_alloc(req, issue_flags); 8004 + } else { 8005 + io_ring_submit_lock(ctx, issue_flags); 8006 + ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, 8007 + &up, req->rsrc_update.nr_args); 8008 + io_ring_submit_unlock(ctx, issue_flags); 8009 + } 8100 8010 8101 8011 if (ret < 0) 8102 8012 req_set_fail(req); ··· 8123 8025 case IORING_OP_POLL_ADD: 8124 8026 return io_poll_add_prep(req, sqe); 8125 8027 case IORING_OP_POLL_REMOVE: 8126 - return io_poll_update_prep(req, sqe); 8028 + return io_poll_remove_prep(req, sqe); 8127 8029 case IORING_OP_FSYNC: 8128 8030 return io_fsync_prep(req, sqe); 8129 8031 case IORING_OP_SYNC_FILE_RANGE: ··· 8137 8039 case IORING_OP_CONNECT: 8138 8040 return io_connect_prep(req, sqe); 8139 8041 case IORING_OP_TIMEOUT: 8140 - return io_timeout_prep(req, sqe, false); 8042 + return io_timeout_prep(req, sqe); 8141 8043 case IORING_OP_TIMEOUT_REMOVE: 8142 8044 return io_timeout_remove_prep(req, sqe); 8143 8045 case IORING_OP_ASYNC_CANCEL: 8144 8046 return io_async_cancel_prep(req, sqe); 8145 8047 case IORING_OP_LINK_TIMEOUT: 8146 - return io_timeout_prep(req, sqe, true); 8048 + return io_link_timeout_prep(req, sqe); 8147 8049 case IORING_OP_ACCEPT: 8148 8050 return io_accept_prep(req, sqe); 8149 8051 case IORING_OP_FALLOCATE: ··· 8153 8055 case IORING_OP_CLOSE: 8154 8056 return io_close_prep(req, sqe); 8155 8057 case IORING_OP_FILES_UPDATE: 8156 - return io_rsrc_update_prep(req, sqe); 8058 + return io_files_update_prep(req, sqe); 8157 8059 case IORING_OP_STATX: 8158 8060 return io_statx_prep(req, sqe); 8159 8061 case IORING_OP_FADVISE: ··· 8221 8123 8222 8124 switch (req->opcode) { 8223 8125 case IORING_OP_READV: 8224 - return io_rw_prep_async(req, READ); 8126 + return io_readv_prep_async(req); 8225 8127 case IORING_OP_WRITEV: 8226 - return io_rw_prep_async(req, WRITE); 8128 + return io_writev_prep_async(req); 8227 8129 case IORING_OP_SENDMSG: 8228 8130 return io_sendmsg_prep_async(req); 8229 8131 case IORING_OP_RECVMSG: ··· 8362 8264 kfree(req->apoll); 8363 8265 req->apoll = NULL; 8364 8266 } 8267 + if (req->flags & REQ_F_INFLIGHT) { 8268 + struct io_uring_task *tctx = req->task->io_uring; 8269 + 8270 + atomic_dec(&tctx->inflight_tracked); 8271 + } 8365 8272 if (req->flags & REQ_F_CREDS) 8366 8273 put_cred(req->creds); 8367 8274 if (req->flags & REQ_F_ASYNC_DATA) { ··· 8391 8288 8392 8289 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) 8393 8290 { 8291 + const struct io_op_def *def = &io_op_defs[req->opcode]; 8394 8292 const struct cred *creds = NULL; 8395 8293 int ret; 8396 8294 ··· 8401 8297 if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) 8402 8298 creds = override_creds(req->creds); 8403 8299 8404 - if (!io_op_defs[req->opcode].audit_skip) 8300 + if (!def->audit_skip) 8405 8301 audit_uring_entry(req->opcode); 8406 8302 8407 8303 switch (req->opcode) { ··· 8425 8321 ret = io_poll_add(req, issue_flags); 8426 8322 break; 8427 8323 case IORING_OP_POLL_REMOVE: 8428 - ret = io_poll_update(req, issue_flags); 8324 + ret = io_poll_remove(req, issue_flags); 8429 8325 break; 8430 8326 case IORING_OP_SYNC_FILE_RANGE: 8431 8327 ret = io_sync_file_range(req, issue_flags); ··· 8540 8436 break; 8541 8437 } 8542 8438 8543 - if (!io_op_defs[req->opcode].audit_skip) 8439 + if (!def->audit_skip) 8544 8440 audit_uring_exit(!ret, ret); 8545 8441 8546 8442 if (creds) ··· 8673 8569 return file; 8674 8570 } 8675 8571 8676 - /* 8677 - * Drop the file for requeue operations. Only used of req->file is the 8678 - * io_uring descriptor itself. 8679 - */ 8680 - static void io_drop_inflight_file(struct io_kiocb *req) 8681 - { 8682 - if (unlikely(req->flags & REQ_F_INFLIGHT)) { 8683 - fput(req->file); 8684 - req->file = NULL; 8685 - req->flags &= ~REQ_F_INFLIGHT; 8686 - } 8687 - } 8688 - 8689 8572 static struct file *io_file_get_normal(struct io_kiocb *req, int fd) 8690 8573 { 8691 8574 struct file *file = fget(fd); ··· 8681 8590 8682 8591 /* we don't allow fixed io_uring files */ 8683 8592 if (file && file->f_op == &io_uring_fops) 8684 - req->flags |= REQ_F_INFLIGHT; 8593 + io_req_track_inflight(req); 8685 8594 return file; 8686 8595 } 8687 8596 ··· 8879 8788 const struct io_uring_sqe *sqe) 8880 8789 __must_hold(&ctx->uring_lock) 8881 8790 { 8791 + const struct io_op_def *def; 8882 8792 unsigned int sqe_flags; 8883 8793 int personality; 8884 8794 u8 opcode; ··· 8897 8805 req->opcode = 0; 8898 8806 return -EINVAL; 8899 8807 } 8808 + def = &io_op_defs[opcode]; 8900 8809 if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { 8901 8810 /* enforce forwards compatibility on users */ 8902 8811 if (sqe_flags & ~SQE_VALID_FLAGS) 8903 8812 return -EINVAL; 8904 8813 if (sqe_flags & IOSQE_BUFFER_SELECT) { 8905 - if (!io_op_defs[opcode].buffer_select) 8814 + if (!def->buffer_select) 8906 8815 return -EOPNOTSUPP; 8907 8816 req->buf_index = READ_ONCE(sqe->buf_group); 8908 8817 } ··· 8929 8836 } 8930 8837 } 8931 8838 8932 - if (!io_op_defs[opcode].ioprio && sqe->ioprio) 8839 + if (!def->ioprio && sqe->ioprio) 8933 8840 return -EINVAL; 8934 - if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) 8841 + if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) 8935 8842 return -EINVAL; 8936 8843 8937 - if (io_op_defs[opcode].needs_file) { 8844 + if (def->needs_file) { 8938 8845 struct io_submit_state *state = &ctx->submit_state; 8939 8846 8940 8847 req->cqe.fd = READ_ONCE(sqe->fd); ··· 8943 8850 * Plug now if we have more than 2 IO left after this, and the 8944 8851 * target is potentially a read/write to block based storage. 8945 8852 */ 8946 - if (state->need_plug && io_op_defs[opcode].plug) { 8853 + if (state->need_plug && def->plug) { 8947 8854 state->plug_started = true; 8948 8855 state->need_plug = false; 8949 8856 blk_start_plug_nr_ios(&state->plug, state->submit_nr); ··· 9751 9658 { 9752 9659 WARN_ON_ONCE(test_bit(bit, table->bitmap)); 9753 9660 __set_bit(bit, table->bitmap); 9754 - if (bit == table->alloc_hint) 9755 - table->alloc_hint++; 9661 + table->alloc_hint = bit + 1; 9756 9662 } 9757 9663 9758 9664 static inline void io_file_bitmap_clear(struct io_file_table *table, int bit) ··· 10205 10113 10206 10114 static int io_install_fixed_file(struct io_kiocb *req, struct file *file, 10207 10115 unsigned int issue_flags, u32 slot_index) 10116 + __must_hold(&req->ctx->uring_lock) 10208 10117 { 10209 10118 struct io_ring_ctx *ctx = req->ctx; 10210 10119 bool needs_switch = false; 10211 10120 struct io_fixed_file *file_slot; 10212 - int ret = -EBADF; 10121 + int ret; 10213 10122 10214 - io_ring_submit_lock(ctx, issue_flags); 10215 10123 if (file->f_op == &io_uring_fops) 10216 - goto err; 10217 - ret = -ENXIO; 10124 + return -EBADF; 10218 10125 if (!ctx->file_data) 10219 - goto err; 10220 - ret = -EINVAL; 10126 + return -ENXIO; 10221 10127 if (slot_index >= ctx->nr_user_files) 10222 - goto err; 10128 + return -EINVAL; 10223 10129 10224 10130 slot_index = array_index_nospec(slot_index, ctx->nr_user_files); 10225 10131 file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); ··· 10248 10158 err: 10249 10159 if (needs_switch) 10250 10160 io_rsrc_node_switch(ctx, ctx->file_data); 10251 - io_ring_submit_unlock(ctx, issue_flags); 10252 10161 if (ret) 10253 10162 fput(file); 10254 10163 return ret; 10255 10164 } 10256 10165 10257 - static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) 10166 + static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags, 10167 + unsigned int offset) 10258 10168 { 10259 - unsigned int offset = req->close.file_slot - 1; 10260 10169 struct io_ring_ctx *ctx = req->ctx; 10261 10170 struct io_fixed_file *file_slot; 10262 10171 struct file *file; ··· 10290 10201 out: 10291 10202 io_ring_submit_unlock(ctx, issue_flags); 10292 10203 return ret; 10204 + } 10205 + 10206 + static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) 10207 + { 10208 + return __io_close_fixed(req, issue_flags, req->close.file_slot - 1); 10293 10209 } 10294 10210 10295 10211 static int __io_sqe_files_update(struct io_ring_ctx *ctx, ··· 10445 10351 xa_init(&tctx->xa); 10446 10352 init_waitqueue_head(&tctx->wait); 10447 10353 atomic_set(&tctx->in_idle, 0); 10354 + atomic_set(&tctx->inflight_tracked, 0); 10448 10355 task->io_uring = tctx; 10449 10356 spin_lock_init(&tctx->task_lock); 10450 10357 INIT_WQ_LIST(&tctx->task_list); ··· 11141 11046 xa_for_each(&ctx->io_bl_xa, index, bl) { 11142 11047 xa_erase(&ctx->io_bl_xa, bl->bgid); 11143 11048 __io_remove_buffers(ctx, bl, -1U); 11049 + kfree(bl); 11144 11050 } 11145 11051 11146 11052 while (!list_empty(&ctx->io_buffers_pages)) { ··· 11677 11581 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) 11678 11582 { 11679 11583 if (tracked) 11680 - return 0; 11584 + return atomic_read(&tctx->inflight_tracked); 11681 11585 return percpu_counter_sum(&tctx->inflight); 11682 11586 } 11683 11587
+6
include/uapi/linux/io_uring.h
··· 47 47 __u32 unlink_flags; 48 48 __u32 hardlink_flags; 49 49 __u32 xattr_flags; 50 + __u32 close_flags; 50 51 }; 51 52 __u64 user_data; /* data to be passed back at completion time */ 52 53 /* pack this to avoid bogus arm OABI complaints */ ··· 258 257 * accept flags stored in sqe->ioprio 259 258 */ 260 259 #define IORING_ACCEPT_MULTISHOT (1U << 0) 260 + 261 + /* 262 + * close flags, store in sqe->close_flags 263 + */ 264 + #define IORING_CLOSE_FD_AND_FILE_SLOT (1U << 0) 261 265 262 266 /* 263 267 * IO completion data structure (Completion Queue Entry)