Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-5.10-2020-11-07' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
"A set of fixes for io_uring:

- SQPOLL cancelation fixes

- Two fixes for the io_identity COW

- Cancelation overflow fix (Pavel)

- Drain request cancelation fix (Pavel)

- Link timeout race fix (Pavel)"

* tag 'io_uring-5.10-2020-11-07' of git://git.kernel.dk/linux-block:
io_uring: fix link lookup racing with link timeout
io_uring: use correct pointer for io_uring_show_cred()
io_uring: don't forget to task-cancel drained reqs
io_uring: fix overflowed cancel w/ linked ->files
io_uring: drop req/tctx io_identity separately
io_uring: ensure consistent view of original task ->mm from SQPOLL
io_uring: properly handle SQPOLL request cancelations
io-wq: cancel request if it's asking for files and we don't have them

+142 -48
+4
fs/io-wq.c
··· 482 482 current->files = work->identity->files; 483 483 current->nsproxy = work->identity->nsproxy; 484 484 task_unlock(current); 485 + if (!work->identity->files) { 486 + /* failed grabbing files, ensure work gets cancelled */ 487 + work->flags |= IO_WQ_WORK_CANCEL; 488 + } 485 489 } 486 490 if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs) 487 491 current->fs = work->identity->fs;
+136 -47
fs/io_uring.c
··· 995 995 if (mm) { 996 996 kthread_unuse_mm(mm); 997 997 mmput(mm); 998 + current->mm = NULL; 998 999 } 999 1000 } 1000 1001 1001 1002 static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) 1002 1003 { 1003 - if (!current->mm) { 1004 - if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) || 1005 - !ctx->sqo_task->mm || 1006 - !mmget_not_zero(ctx->sqo_task->mm))) 1007 - return -EFAULT; 1008 - kthread_use_mm(ctx->sqo_task->mm); 1004 + struct mm_struct *mm; 1005 + 1006 + if (current->mm) 1007 + return 0; 1008 + 1009 + /* Should never happen */ 1010 + if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL))) 1011 + return -EFAULT; 1012 + 1013 + task_lock(ctx->sqo_task); 1014 + mm = ctx->sqo_task->mm; 1015 + if (unlikely(!mm || !mmget_not_zero(mm))) 1016 + mm = NULL; 1017 + task_unlock(ctx->sqo_task); 1018 + 1019 + if (mm) { 1020 + kthread_use_mm(mm); 1021 + return 0; 1009 1022 } 1010 1023 1011 - return 0; 1024 + return -EFAULT; 1012 1025 } 1013 1026 1014 1027 static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, ··· 1287 1274 /* add one for this request */ 1288 1275 refcount_inc(&id->count); 1289 1276 1290 - /* drop old identity, assign new one. one ref for req, one for tctx */ 1291 - if (req->work.identity != tctx->identity && 1292 - refcount_sub_and_test(2, &req->work.identity->count)) 1277 + /* drop tctx and req identity references, if needed */ 1278 + if (tctx->identity != &tctx->__identity && 1279 + refcount_dec_and_test(&tctx->identity->count)) 1280 + kfree(tctx->identity); 1281 + if (req->work.identity != &tctx->__identity && 1282 + refcount_dec_and_test(&req->work.identity->count)) 1293 1283 kfree(req->work.identity); 1294 1284 1295 1285 req->work.identity = id; ··· 1593 1577 } 1594 1578 } 1595 1579 1596 - static inline bool io_match_files(struct io_kiocb *req, 1597 - struct files_struct *files) 1580 + static inline bool __io_match_files(struct io_kiocb *req, 1581 + struct files_struct *files) 1598 1582 { 1583 + return ((req->flags & REQ_F_WORK_INITIALIZED) && 1584 + (req->work.flags & IO_WQ_WORK_FILES)) && 1585 + req->work.identity->files == files; 1586 + } 1587 + 1588 + static bool io_match_files(struct io_kiocb *req, 1589 + struct files_struct *files) 1590 + { 1591 + struct io_kiocb *link; 1592 + 1599 1593 if (!files) 1600 1594 return true; 1601 - if ((req->flags & REQ_F_WORK_INITIALIZED) && 1602 - (req->work.flags & IO_WQ_WORK_FILES)) 1603 - return req->work.identity->files == files; 1595 + if (__io_match_files(req, files)) 1596 + return true; 1597 + if (req->flags & REQ_F_LINK_HEAD) { 1598 + list_for_each_entry(link, &req->link_list, link_list) { 1599 + if (__io_match_files(link, files)) 1600 + return true; 1601 + } 1602 + } 1604 1603 return false; 1605 1604 } 1606 1605 ··· 1699 1668 WRITE_ONCE(cqe->user_data, req->user_data); 1700 1669 WRITE_ONCE(cqe->res, res); 1701 1670 WRITE_ONCE(cqe->flags, cflags); 1702 - } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) { 1671 + } else if (ctx->cq_overflow_flushed || 1672 + atomic_read(&req->task->io_uring->in_idle)) { 1703 1673 /* 1704 1674 * If we're in ring overflow flush mode, or in task cancel mode, 1705 1675 * then we cannot store the request for later flushing, we need ··· 1870 1838 io_dismantle_req(req); 1871 1839 1872 1840 percpu_counter_dec(&tctx->inflight); 1873 - if (tctx->in_idle) 1841 + if (atomic_read(&tctx->in_idle)) 1874 1842 wake_up(&tctx->wait); 1875 1843 put_task_struct(req->task); 1876 1844 ··· 7727 7695 xa_init(&tctx->xa); 7728 7696 init_waitqueue_head(&tctx->wait); 7729 7697 tctx->last = NULL; 7730 - tctx->in_idle = 0; 7698 + atomic_set(&tctx->in_idle, 0); 7699 + tctx->sqpoll = false; 7731 7700 io_init_identity(&tctx->__identity); 7732 7701 tctx->identity = &tctx->__identity; 7733 7702 task->io_uring = tctx; ··· 8421 8388 return false; 8422 8389 } 8423 8390 8424 - static bool io_match_link_files(struct io_kiocb *req, 8425 - struct files_struct *files) 8426 - { 8427 - struct io_kiocb *link; 8428 - 8429 - if (io_match_files(req, files)) 8430 - return true; 8431 - if (req->flags & REQ_F_LINK_HEAD) { 8432 - list_for_each_entry(link, &req->link_list, link_list) { 8433 - if (io_match_files(link, files)) 8434 - return true; 8435 - } 8436 - } 8437 - return false; 8438 - } 8439 - 8440 8391 /* 8441 8392 * We're looking to cancel 'req' because it's holding on to our files, but 8442 8393 * 'req' could be a link to another request. See if it is, and cancel that ··· 8470 8453 8471 8454 static bool io_cancel_link_cb(struct io_wq_work *work, void *data) 8472 8455 { 8473 - return io_match_link(container_of(work, struct io_kiocb, work), data); 8456 + struct io_kiocb *req = container_of(work, struct io_kiocb, work); 8457 + bool ret; 8458 + 8459 + if (req->flags & REQ_F_LINK_TIMEOUT) { 8460 + unsigned long flags; 8461 + struct io_ring_ctx *ctx = req->ctx; 8462 + 8463 + /* protect against races with linked timeouts */ 8464 + spin_lock_irqsave(&ctx->completion_lock, flags); 8465 + ret = io_match_link(req, data); 8466 + spin_unlock_irqrestore(&ctx->completion_lock, flags); 8467 + } else { 8468 + ret = io_match_link(req, data); 8469 + } 8470 + return ret; 8474 8471 } 8475 8472 8476 8473 static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) ··· 8510 8479 } 8511 8480 8512 8481 static void io_cancel_defer_files(struct io_ring_ctx *ctx, 8482 + struct task_struct *task, 8513 8483 struct files_struct *files) 8514 8484 { 8515 8485 struct io_defer_entry *de = NULL; ··· 8518 8486 8519 8487 spin_lock_irq(&ctx->completion_lock); 8520 8488 list_for_each_entry_reverse(de, &ctx->defer_list, list) { 8521 - if (io_match_link_files(de->req, files)) { 8489 + if (io_task_match(de->req, task) && 8490 + io_match_files(de->req, files)) { 8522 8491 list_cut_position(&list, &ctx->defer_list, &de->list); 8523 8492 break; 8524 8493 } ··· 8545 8512 if (list_empty_careful(&ctx->inflight_list)) 8546 8513 return false; 8547 8514 8548 - io_cancel_defer_files(ctx, files); 8549 8515 /* cancel all at once, should be faster than doing it one by one*/ 8550 8516 io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true); 8551 8517 ··· 8630 8598 { 8631 8599 struct task_struct *task = current; 8632 8600 8633 - if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) 8601 + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { 8634 8602 task = ctx->sq_data->thread; 8603 + atomic_inc(&task->io_uring->in_idle); 8604 + io_sq_thread_park(ctx->sq_data); 8605 + } 8606 + 8607 + if (files) 8608 + io_cancel_defer_files(ctx, NULL, files); 8609 + else 8610 + io_cancel_defer_files(ctx, task, NULL); 8635 8611 8636 8612 io_cqring_overflow_flush(ctx, true, task, files); 8637 8613 ··· 8647 8607 io_run_task_work(); 8648 8608 cond_resched(); 8649 8609 } 8610 + 8611 + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { 8612 + atomic_dec(&task->io_uring->in_idle); 8613 + /* 8614 + * If the files that are going away are the ones in the thread 8615 + * identity, clear them out. 8616 + */ 8617 + if (task->io_uring->identity->files == files) 8618 + task->io_uring->identity->files = NULL; 8619 + io_sq_thread_unpark(ctx->sq_data); 8620 + } 8650 8621 } 8651 8622 8652 8623 /* 8653 8624 * Note that this task has used io_uring. We use it for cancelation purposes. 8654 8625 */ 8655 - static int io_uring_add_task_file(struct file *file) 8626 + static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) 8656 8627 { 8657 8628 struct io_uring_task *tctx = current->io_uring; 8658 8629 ··· 8684 8633 } 8685 8634 tctx->last = file; 8686 8635 } 8636 + 8637 + /* 8638 + * This is race safe in that the task itself is doing this, hence it 8639 + * cannot be going through the exit/cancel paths at the same time. 8640 + * This cannot be modified while exit/cancel is running. 8641 + */ 8642 + if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL)) 8643 + tctx->sqpoll = true; 8687 8644 8688 8645 return 0; 8689 8646 } ··· 8734 8675 unsigned long index; 8735 8676 8736 8677 /* make sure overflow events are dropped */ 8737 - tctx->in_idle = true; 8678 + atomic_inc(&tctx->in_idle); 8738 8679 8739 8680 xa_for_each(&tctx->xa, index, file) { 8740 8681 struct io_ring_ctx *ctx = file->private_data; ··· 8743 8684 if (files) 8744 8685 io_uring_del_task_file(file); 8745 8686 } 8687 + 8688 + atomic_dec(&tctx->in_idle); 8689 + } 8690 + 8691 + static s64 tctx_inflight(struct io_uring_task *tctx) 8692 + { 8693 + unsigned long index; 8694 + struct file *file; 8695 + s64 inflight; 8696 + 8697 + inflight = percpu_counter_sum(&tctx->inflight); 8698 + if (!tctx->sqpoll) 8699 + return inflight; 8700 + 8701 + /* 8702 + * If we have SQPOLL rings, then we need to iterate and find them, and 8703 + * add the pending count for those. 8704 + */ 8705 + xa_for_each(&tctx->xa, index, file) { 8706 + struct io_ring_ctx *ctx = file->private_data; 8707 + 8708 + if (ctx->flags & IORING_SETUP_SQPOLL) { 8709 + struct io_uring_task *__tctx = ctx->sqo_task->io_uring; 8710 + 8711 + inflight += percpu_counter_sum(&__tctx->inflight); 8712 + } 8713 + } 8714 + 8715 + return inflight; 8746 8716 } 8747 8717 8748 8718 /* ··· 8785 8697 s64 inflight; 8786 8698 8787 8699 /* make sure overflow events are dropped */ 8788 - tctx->in_idle = true; 8700 + atomic_inc(&tctx->in_idle); 8789 8701 8790 8702 do { 8791 8703 /* read completions before cancelations */ 8792 - inflight = percpu_counter_sum(&tctx->inflight); 8704 + inflight = tctx_inflight(tctx); 8793 8705 if (!inflight) 8794 8706 break; 8795 8707 __io_uring_files_cancel(NULL); ··· 8800 8712 * If we've seen completions, retry. This avoids a race where 8801 8713 * a completion comes in before we did prepare_to_wait(). 8802 8714 */ 8803 - if (inflight != percpu_counter_sum(&tctx->inflight)) 8715 + if (inflight != tctx_inflight(tctx)) 8804 8716 continue; 8805 8717 schedule(); 8806 8718 } while (1); 8807 8719 8808 8720 finish_wait(&tctx->wait, &wait); 8809 - tctx->in_idle = false; 8721 + atomic_dec(&tctx->in_idle); 8810 8722 } 8811 8723 8812 8724 static int io_uring_flush(struct file *file, void *data) ··· 8951 8863 io_sqpoll_wait_sq(ctx); 8952 8864 submitted = to_submit; 8953 8865 } else if (to_submit) { 8954 - ret = io_uring_add_task_file(f.file); 8866 + ret = io_uring_add_task_file(ctx, f.file); 8955 8867 if (unlikely(ret)) 8956 8868 goto out; 8957 8869 mutex_lock(&ctx->uring_lock); ··· 8988 8900 #ifdef CONFIG_PROC_FS 8989 8901 static int io_uring_show_cred(int id, void *p, void *data) 8990 8902 { 8991 - const struct cred *cred = p; 8903 + struct io_identity *iod = p; 8904 + const struct cred *cred = iod->creds; 8992 8905 struct seq_file *m = data; 8993 8906 struct user_namespace *uns = seq_user_ns(m); 8994 8907 struct group_info *gi; ··· 9181 9092 #if defined(CONFIG_UNIX) 9182 9093 ctx->ring_sock->file = file; 9183 9094 #endif 9184 - if (unlikely(io_uring_add_task_file(file))) { 9095 + if (unlikely(io_uring_add_task_file(ctx, file))) { 9185 9096 file = ERR_PTR(-ENOMEM); 9186 9097 goto err_fd; 9187 9098 }
+2 -1
include/linux/io_uring.h
··· 30 30 struct percpu_counter inflight; 31 31 struct io_identity __identity; 32 32 struct io_identity *identity; 33 - bool in_idle; 33 + atomic_t in_idle; 34 + bool sqpoll; 34 35 }; 35 36 36 37 #if defined(CONFIG_IO_URING)