Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-5.18-2022-04-14' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

- Ensure we check and -EINVAL any use of reserved or struct padding.

Although we generally always do that, it's missed in two spots for
resource updates, one for the ring fd registration from this merge
window, and one for the extended arg. Make sure we have all of them
handled. (Dylan)

- A few fixes for the deferred file assignment (me, Pavel)

- Add a feature flag for the deferred file assignment so apps can tell
we handle it correctly (me)

- Fix a small perf regression with the current file position fix in
this merge window (me)

* tag 'io_uring-5.18-2022-04-14' of git://git.kernel.dk/linux-block:
io_uring: abort file assignment prior to assigning creds
io_uring: fix poll error reporting
io_uring: fix poll file assign deadlock
io_uring: use right issue_flags for splice/tee
io_uring: verify pad field is 0 in io_get_ext_arg
io_uring: verify resv is 0 in ringfd register/unregister
io_uring: verify that resv2 is 0 in io_uring_rsrc_update2
io_uring: move io_uring_rsrc_update2 validation
io_uring: fix assign file locking issue
io_uring: stop using io_wq_work as an fd placeholder
io_uring: move apoll->events cache
io_uring: io_kiocb_update_pos() should not touch file for non -1 offset
io_uring: flag the fact that linked file assignment is sane

+59 -41
-1
fs/io-wq.h
··· 155 155 struct io_wq_work { 156 156 struct io_wq_work_node list; 157 157 unsigned flags; 158 - int fd; 159 158 }; 160 159 161 160 static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
+58 -40
fs/io_uring.c
··· 907 907 908 908 u64 user_data; 909 909 u32 result; 910 - u32 cflags; 910 + /* fd initially, then cflags for completion */ 911 + union { 912 + u32 cflags; 913 + int fd; 914 + }; 911 915 912 916 struct io_ring_ctx *ctx; 913 917 struct task_struct *task; ··· 920 916 /* store used ubuf, so we can prevent reloading */ 921 917 struct io_mapped_ubuf *imu; 922 918 923 - /* used by request caches, completion batching and iopoll */ 924 - struct io_wq_work_node comp_list; 919 + union { 920 + /* used by request caches, completion batching and iopoll */ 921 + struct io_wq_work_node comp_list; 922 + /* cache ->apoll->events */ 923 + int apoll_events; 924 + }; 925 925 atomic_t refs; 926 926 atomic_t poll_refs; 927 927 struct io_task_work io_task_work; ··· 3191 3183 static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) 3192 3184 { 3193 3185 struct kiocb *kiocb = &req->rw.kiocb; 3194 - bool is_stream = req->file->f_mode & FMODE_STREAM; 3195 3186 3196 - if (kiocb->ki_pos == -1) { 3197 - if (!is_stream) { 3198 - req->flags |= REQ_F_CUR_POS; 3199 - kiocb->ki_pos = req->file->f_pos; 3200 - return &kiocb->ki_pos; 3201 - } else { 3202 - kiocb->ki_pos = 0; 3203 - return NULL; 3204 - } 3187 + if (kiocb->ki_pos != -1) 3188 + return &kiocb->ki_pos; 3189 + 3190 + if (!(req->file->f_mode & FMODE_STREAM)) { 3191 + req->flags |= REQ_F_CUR_POS; 3192 + kiocb->ki_pos = req->file->f_pos; 3193 + return &kiocb->ki_pos; 3205 3194 } 3206 - return is_stream ? NULL : &kiocb->ki_pos; 3195 + 3196 + kiocb->ki_pos = 0; 3197 + return NULL; 3207 3198 } 3208 3199 3209 3200 static void kiocb_done(struct io_kiocb *req, ssize_t ret, ··· 4358 4351 return -EAGAIN; 4359 4352 4360 4353 if (sp->flags & SPLICE_F_FD_IN_FIXED) 4361 - in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); 4354 + in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); 4362 4355 else 4363 4356 in = io_file_get_normal(req, sp->splice_fd_in); 4364 4357 if (!in) { ··· 4400 4393 return -EAGAIN; 4401 4394 4402 4395 if (sp->flags & SPLICE_F_FD_IN_FIXED) 4403 - in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); 4396 + in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); 4404 4397 else 4405 4398 in = io_file_get_normal(req, sp->splice_fd_in); 4406 4399 if (!in) { ··· 5841 5834 static int io_poll_check_events(struct io_kiocb *req, bool locked) 5842 5835 { 5843 5836 struct io_ring_ctx *ctx = req->ctx; 5844 - struct io_poll_iocb *poll = io_poll_get_single(req); 5845 5837 int v; 5846 5838 5847 5839 /* req->task == current here, checking PF_EXITING is safe */ ··· 5857 5851 return -ECANCELED; 5858 5852 5859 5853 if (!req->result) { 5860 - struct poll_table_struct pt = { ._key = req->cflags }; 5854 + struct poll_table_struct pt = { ._key = req->apoll_events }; 5855 + unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED; 5861 5856 5862 - if (unlikely(!io_assign_file(req, IO_URING_F_UNLOCKED))) 5863 - req->result = -EBADF; 5864 - else 5865 - req->result = vfs_poll(req->file, &pt) & req->cflags; 5857 + if (unlikely(!io_assign_file(req, flags))) 5858 + return -EBADF; 5859 + req->result = vfs_poll(req->file, &pt) & req->apoll_events; 5866 5860 } 5867 5861 5868 5862 /* multishot, just fill an CQE and proceed */ 5869 - if (req->result && !(req->cflags & EPOLLONESHOT)) { 5870 - __poll_t mask = mangle_poll(req->result & poll->events); 5863 + if (req->result && !(req->apoll_events & EPOLLONESHOT)) { 5864 + __poll_t mask = mangle_poll(req->result & req->apoll_events); 5871 5865 bool filled; 5872 5866 5873 5867 spin_lock(&ctx->completion_lock); ··· 5945 5939 * CPU. We want to avoid pulling in req->apoll->events for that 5946 5940 * case. 5947 5941 */ 5948 - req->cflags = events; 5942 + req->apoll_events = events; 5949 5943 if (req->opcode == IORING_OP_POLL_ADD) 5950 5944 req->io_task_work.func = io_poll_task_func; 5951 5945 else ··· 6337 6331 return -EINVAL; 6338 6332 6339 6333 io_req_set_refcount(req); 6340 - req->cflags = poll->events = io_poll_parse_events(sqe, flags); 6334 + req->apoll_events = poll->events = io_poll_parse_events(sqe, flags); 6341 6335 return 0; 6342 6336 } 6343 6337 ··· 6839 6833 up.nr = 0; 6840 6834 up.tags = 0; 6841 6835 up.resv = 0; 6836 + up.resv2 = 0; 6842 6837 6843 6838 io_ring_submit_lock(ctx, needs_lock); 6844 6839 ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, ··· 7095 7088 return true; 7096 7089 7097 7090 if (req->flags & REQ_F_FIXED_FILE) 7098 - req->file = io_file_get_fixed(req, req->work.fd, issue_flags); 7091 + req->file = io_file_get_fixed(req, req->fd, issue_flags); 7099 7092 else 7100 - req->file = io_file_get_normal(req, req->work.fd); 7093 + req->file = io_file_get_normal(req, req->fd); 7101 7094 if (req->file) 7102 7095 return true; 7103 7096 ··· 7111 7104 const struct cred *creds = NULL; 7112 7105 int ret; 7113 7106 7107 + if (unlikely(!io_assign_file(req, issue_flags))) 7108 + return -EBADF; 7109 + 7114 7110 if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) 7115 7111 creds = override_creds(req->creds); 7116 7112 7117 7113 if (!io_op_defs[req->opcode].audit_skip) 7118 7114 audit_uring_entry(req->opcode); 7119 - if (unlikely(!io_assign_file(req, issue_flags))) 7120 - return -EBADF; 7121 7115 7122 7116 switch (req->opcode) { 7123 7117 case IORING_OP_NOP: ··· 7279 7271 if (timeout) 7280 7272 io_queue_linked_timeout(timeout); 7281 7273 7282 - if (!io_assign_file(req, issue_flags)) { 7283 - err = -EBADF; 7284 - work->flags |= IO_WQ_WORK_CANCEL; 7285 - } 7286 7274 7287 7275 /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ 7288 7276 if (work->flags & IO_WQ_WORK_CANCEL) { 7277 + fail: 7289 7278 io_req_task_queue_fail(req, err); 7290 7279 return; 7280 + } 7281 + if (!io_assign_file(req, issue_flags)) { 7282 + err = -EBADF; 7283 + work->flags |= IO_WQ_WORK_CANCEL; 7284 + goto fail; 7291 7285 } 7292 7286 7293 7287 if (req->flags & REQ_F_FORCE_ASYNC) { ··· 7638 7628 if (io_op_defs[opcode].needs_file) { 7639 7629 struct io_submit_state *state = &ctx->submit_state; 7640 7630 7641 - req->work.fd = READ_ONCE(sqe->fd); 7631 + req->fd = READ_ONCE(sqe->fd); 7642 7632 7643 7633 /* 7644 7634 * Plug now if we have more than 2 IO left after this, and the ··· 10534 10524 break; 10535 10525 } 10536 10526 10527 + if (reg.resv) { 10528 + ret = -EINVAL; 10529 + break; 10530 + } 10531 + 10537 10532 if (reg.offset == -1U) { 10538 10533 start = 0; 10539 10534 end = IO_RINGFD_REG_MAX; ··· 10585 10570 ret = -EFAULT; 10586 10571 break; 10587 10572 } 10588 - if (reg.offset >= IO_RINGFD_REG_MAX) { 10573 + if (reg.resv || reg.offset >= IO_RINGFD_REG_MAX) { 10589 10574 ret = -EINVAL; 10590 10575 break; 10591 10576 } ··· 10712 10697 return -EINVAL; 10713 10698 if (copy_from_user(&arg, argp, sizeof(arg))) 10714 10699 return -EFAULT; 10700 + if (arg.pad) 10701 + return -EINVAL; 10715 10702 *sig = u64_to_user_ptr(arg.sigmask); 10716 10703 *argsz = arg.sigmask_sz; 10717 10704 *ts = u64_to_user_ptr(arg.ts); ··· 11195 11178 IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | 11196 11179 IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | 11197 11180 IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | 11198 - IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP; 11181 + IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | 11182 + IORING_FEAT_LINKED_FILE; 11199 11183 11200 11184 if (copy_to_user(params, p, sizeof(*p))) { 11201 11185 ret = -EFAULT; ··· 11407 11389 __u32 tmp; 11408 11390 int err; 11409 11391 11410 - if (up->resv) 11411 - return -EINVAL; 11412 11392 if (check_add_overflow(up->offset, nr_args, &tmp)) 11413 11393 return -EOVERFLOW; 11414 11394 err = io_rsrc_node_switch_start(ctx); ··· 11432 11416 memset(&up, 0, sizeof(up)); 11433 11417 if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) 11434 11418 return -EFAULT; 11419 + if (up.resv || up.resv2) 11420 + return -EINVAL; 11435 11421 return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); 11436 11422 } 11437 11423 ··· 11446 11428 return -EINVAL; 11447 11429 if (copy_from_user(&up, arg, sizeof(up))) 11448 11430 return -EFAULT; 11449 - if (!up.nr || up.resv) 11431 + if (!up.nr || up.resv || up.resv2) 11450 11432 return -EINVAL; 11451 11433 return __io_register_rsrc_update(ctx, type, &up, up.nr); 11452 11434 }
+1
include/uapi/linux/io_uring.h
··· 296 296 #define IORING_FEAT_NATIVE_WORKERS (1U << 9) 297 297 #define IORING_FEAT_RSRC_TAGS (1U << 10) 298 298 #define IORING_FEAT_CQE_SKIP (1U << 11) 299 + #define IORING_FEAT_LINKED_FILE (1U << 12) 299 300 300 301 /* 301 302 * io_uring_register(2) opcodes and arguments