Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-5.12-2021-03-27' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

- Use thread info versions of flag testing, as discussed last week.

- The series enabling PF_IO_WORKER to just take signals, instead of
needing to special case that they do not in a bunch of places. Ends
up being pretty trivial to do, and then we can revert all the special
casing we're currently doing.

- Kill dead pointer assignment

- Fix hashed part of async work queue trace

- Fix sign extension issue for IORING_OP_PROVIDE_BUFFERS

- Fix a link completion ordering regression in this merge window

- Cancellation fixes

* tag 'io_uring-5.12-2021-03-27' of git://git.kernel.dk/linux-block:
io_uring: remove unsued assignment to pointer io
io_uring: don't cancel extra on files match
io_uring: don't cancel-track common timeouts
io_uring: do post-completion chore on t-out cancel
io_uring: fix timeout cancel return code
Revert "signal: don't allow STOP on PF_IO_WORKER threads"
Revert "kernel: freezer should treat PF_IO_WORKER like PF_KTHREAD for freezing"
Revert "kernel: treat PF_IO_WORKER like PF_KTHREAD for ptrace/signals"
Revert "signal: don't allow sending any signals to PF_IO_WORKER threads"
kernel: stop masking signals in create_io_thread()
io_uring: handle signals for IO threads like a normal thread
kernel: don't call do_exit() for PF_IO_WORKER threads
io_uring: maintain CQE order of a failed link
io-wq: fix race around pending work on teardown
io_uring: do ctx sqd ejection in a clear context
io_uring: fix provide_buffers sign extension
io_uring: don't skip file_end_write() on reissue
io_uring: correct io_queue_async_work() traces
io_uring: don't use {test,clear}_tsk_thread_flag() for current

+94 -76
+21 -11
fs/io-wq.c
··· 16 16 #include <linux/rculist_nulls.h> 17 17 #include <linux/cpu.h> 18 18 #include <linux/tracehook.h> 19 - #include <linux/freezer.h> 20 19 21 20 #include "../kernel/sched/sched.h" 22 21 #include "io-wq.h" ··· 387 388 388 389 static bool io_flush_signals(void) 389 390 { 390 - if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) { 391 + if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { 391 392 __set_current_state(TASK_RUNNING); 392 - if (current->task_works) 393 - task_work_run(); 394 - clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL); 393 + tracehook_notify_signal(); 395 394 return true; 396 395 } 397 396 return false; ··· 502 505 if (io_flush_signals()) 503 506 continue; 504 507 ret = schedule_timeout(WORKER_IDLE_TIMEOUT); 505 - if (try_to_freeze() || ret) 506 - continue; 507 - if (fatal_signal_pending(current)) 508 + if (signal_pending(current)) { 509 + struct ksignal ksig; 510 + 511 + if (!get_signal(&ksig)) 512 + continue; 508 513 break; 514 + } 515 + if (ret) 516 + continue; 509 517 /* timed out, exit unless we're the fixed worker */ 510 518 if (test_bit(IO_WQ_BIT_EXIT, &wq->state) || 511 519 !(worker->flags & IO_WORKER_F_FIXED)) ··· 718 716 set_current_state(TASK_INTERRUPTIBLE); 719 717 io_wq_check_workers(wq); 720 718 schedule_timeout(HZ); 721 - try_to_freeze(); 722 - if (fatal_signal_pending(current)) 719 + if (signal_pending(current)) { 720 + struct ksignal ksig; 721 + 722 + if (!get_signal(&ksig)) 723 + continue; 723 724 set_bit(IO_WQ_BIT_EXIT, &wq->state); 725 + } 724 726 } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)); 725 727 726 728 io_wq_check_workers(wq); ··· 1071 1065 1072 1066 for_each_node(node) { 1073 1067 struct io_wqe *wqe = wq->wqes[node]; 1074 - WARN_ON_ONCE(!wq_list_empty(&wqe->work_list)); 1068 + struct io_cb_cancel_data match = { 1069 + .fn = io_wq_work_match_all, 1070 + .cancel_all = true, 1071 + }; 1072 + io_wqe_cancel_pending_work(wqe, &match); 1075 1073 kfree(wqe); 1076 1074 } 1077 1075 io_wq_put_hash(wq->hash);
+51 -47
fs/io_uring.c
··· 78 78 #include <linux/task_work.h> 79 79 #include <linux/pagemap.h> 80 80 #include <linux/io_uring.h> 81 - #include <linux/freezer.h> 82 81 83 82 #define CREATE_TRACE_POINTS 84 83 #include <trace/events/io_uring.h> ··· 1094 1095 io_for_each_link(req, head) { 1095 1096 if (req->flags & REQ_F_INFLIGHT) 1096 1097 return true; 1097 - if (req->task->files == files) 1098 - return true; 1099 1098 } 1100 1099 return false; 1101 1100 } ··· 1236 1239 BUG_ON(!tctx); 1237 1240 BUG_ON(!tctx->io_wq); 1238 1241 1239 - trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, 1240 - &req->work, req->flags); 1241 1242 /* init ->work of the whole link before punting */ 1242 1243 io_prep_async_link(req); 1244 + trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, 1245 + &req->work, req->flags); 1243 1246 io_wq_enqueue(tctx->io_wq, &req->work); 1244 1247 if (link) 1245 1248 io_queue_linked_timeout(link); 1246 1249 } 1247 1250 1248 - static void io_kill_timeout(struct io_kiocb *req) 1251 + static void io_kill_timeout(struct io_kiocb *req, int status) 1249 1252 { 1250 1253 struct io_timeout_data *io = req->async_data; 1251 1254 int ret; ··· 1255 1258 atomic_set(&req->ctx->cq_timeouts, 1256 1259 atomic_read(&req->ctx->cq_timeouts) + 1); 1257 1260 list_del_init(&req->timeout.list); 1258 - io_cqring_fill_event(req, 0); 1261 + io_cqring_fill_event(req, status); 1259 1262 io_put_req_deferred(req, 1); 1260 1263 } 1261 - } 1262 - 1263 - /* 1264 - * Returns true if we found and killed one or more timeouts 1265 - */ 1266 - static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, 1267 - struct files_struct *files) 1268 - { 1269 - struct io_kiocb *req, *tmp; 1270 - int canceled = 0; 1271 - 1272 - spin_lock_irq(&ctx->completion_lock); 1273 - list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { 1274 - if (io_match_task(req, tsk, files)) { 1275 - io_kill_timeout(req); 1276 - canceled++; 1277 - } 1278 - } 1279 - spin_unlock_irq(&ctx->completion_lock); 1280 - return canceled != 0; 1281 1264 } 1282 1265 1283 1266 static void __io_queue_deferred(struct io_ring_ctx *ctx) ··· 1304 1327 break; 1305 1328 1306 1329 list_del_init(&req->timeout.list); 1307 - io_kill_timeout(req); 1330 + io_kill_timeout(req, 0); 1308 1331 } while (!list_empty(&ctx->timeout_list)); 1309 1332 1310 1333 ctx->cq_last_tm_flush = seq; ··· 2501 2524 { 2502 2525 int cflags = 0; 2503 2526 2527 + if (req->rw.kiocb.ki_flags & IOCB_WRITE) 2528 + kiocb_end_write(req); 2504 2529 if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req)) 2505 2530 return; 2506 2531 if (res != req->result) 2507 2532 req_set_fail_links(req); 2508 - 2509 - if (req->rw.kiocb.ki_flags & IOCB_WRITE) 2510 - kiocb_end_write(req); 2511 2533 if (req->flags & REQ_F_BUFFER_SELECTED) 2512 2534 cflags = io_put_rw_kbuf(req); 2513 2535 __io_req_complete(req, issue_flags, res, cflags); ··· 3954 3978 static int io_provide_buffers_prep(struct io_kiocb *req, 3955 3979 const struct io_uring_sqe *sqe) 3956 3980 { 3981 + unsigned long size; 3957 3982 struct io_provide_buf *p = &req->pbuf; 3958 3983 u64 tmp; 3959 3984 ··· 3968 3991 p->addr = READ_ONCE(sqe->addr); 3969 3992 p->len = READ_ONCE(sqe->len); 3970 3993 3971 - if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs))) 3994 + size = (unsigned long)p->len * p->nbufs; 3995 + if (!access_ok(u64_to_user_ptr(p->addr), size)) 3972 3996 return -EFAULT; 3973 3997 3974 3998 p->bgid = READ_ONCE(sqe->buf_group); ··· 4798 4820 ret = -ENOMEM; 4799 4821 goto out; 4800 4822 } 4801 - io = req->async_data; 4802 4823 memcpy(req->async_data, &__io, sizeof(__io)); 4803 4824 return -EAGAIN; 4804 4825 } ··· 5560 5583 5561 5584 data->mode = io_translate_timeout_mode(flags); 5562 5585 hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode); 5563 - io_req_track_inflight(req); 5586 + if (is_timeout_link) 5587 + io_req_track_inflight(req); 5564 5588 return 0; 5565 5589 } 5566 5590 ··· 6457 6479 ret = io_init_req(ctx, req, sqe); 6458 6480 if (unlikely(ret)) { 6459 6481 fail_req: 6460 - io_put_req(req); 6461 - io_req_complete(req, ret); 6462 6482 if (link->head) { 6463 6483 /* fail even hard links since we don't submit */ 6464 6484 link->head->flags |= REQ_F_FAIL_LINK; ··· 6464 6488 io_req_complete(link->head, -ECANCELED); 6465 6489 link->head = NULL; 6466 6490 } 6491 + io_put_req(req); 6492 + io_req_complete(req, ret); 6467 6493 return ret; 6468 6494 } 6469 6495 ret = io_req_prep(req, sqe); ··· 6742 6764 timeout = jiffies + sqd->sq_thread_idle; 6743 6765 continue; 6744 6766 } 6745 - if (fatal_signal_pending(current)) 6767 + if (signal_pending(current)) { 6768 + struct ksignal ksig; 6769 + 6770 + if (!get_signal(&ksig)) 6771 + continue; 6746 6772 break; 6773 + } 6747 6774 sqt_spin = false; 6748 6775 cap_entries = !list_is_singular(&sqd->ctx_list); 6749 6776 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { ··· 6791 6808 6792 6809 mutex_unlock(&sqd->lock); 6793 6810 schedule(); 6794 - try_to_freeze(); 6795 6811 mutex_lock(&sqd->lock); 6796 6812 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 6797 6813 io_ring_clear_wakeup_flag(ctx); ··· 6855 6873 return 1; 6856 6874 if (!signal_pending(current)) 6857 6875 return 0; 6858 - if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL)) 6876 + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) 6859 6877 return -ERESTARTSYS; 6860 6878 return -EINTR; 6861 6879 } ··· 8545 8563 struct io_tctx_node *node; 8546 8564 int ret; 8547 8565 8566 + /* prevent SQPOLL from submitting new requests */ 8567 + if (ctx->sq_data) { 8568 + io_sq_thread_park(ctx->sq_data); 8569 + list_del_init(&ctx->sqd_list); 8570 + io_sqd_update_thread_idle(ctx->sq_data); 8571 + io_sq_thread_unpark(ctx->sq_data); 8572 + } 8573 + 8548 8574 /* 8549 8575 * If we're doing polled IO and end up having requests being 8550 8576 * submitted async (out-of-line), then completions can come in while ··· 8589 8599 io_ring_ctx_free(ctx); 8590 8600 } 8591 8601 8602 + /* Returns true if we found and killed one or more timeouts */ 8603 + static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, 8604 + struct files_struct *files) 8605 + { 8606 + struct io_kiocb *req, *tmp; 8607 + int canceled = 0; 8608 + 8609 + spin_lock_irq(&ctx->completion_lock); 8610 + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { 8611 + if (io_match_task(req, tsk, files)) { 8612 + io_kill_timeout(req, -ECANCELED); 8613 + canceled++; 8614 + } 8615 + } 8616 + io_commit_cqring(ctx); 8617 + spin_unlock_irq(&ctx->completion_lock); 8618 + 8619 + if (canceled != 0) 8620 + io_cqring_ev_posted(ctx); 8621 + return canceled != 0; 8622 + } 8623 + 8592 8624 static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) 8593 8625 { 8594 8626 unsigned long index; ··· 8625 8613 xa_for_each(&ctx->personalities, index, creds) 8626 8614 io_unregister_personality(ctx, index); 8627 8615 mutex_unlock(&ctx->uring_lock); 8628 - 8629 - /* prevent SQPOLL from submitting new requests */ 8630 - if (ctx->sq_data) { 8631 - io_sq_thread_park(ctx->sq_data); 8632 - list_del_init(&ctx->sqd_list); 8633 - io_sqd_update_thread_idle(ctx->sq_data); 8634 - io_sq_thread_unpark(ctx->sq_data); 8635 - } 8636 8616 8637 8617 io_kill_timeouts(ctx, NULL, NULL); 8638 8618 io_poll_remove_all(ctx, NULL, NULL);
+8 -8
kernel/fork.c
··· 1948 1948 p = dup_task_struct(current, node); 1949 1949 if (!p) 1950 1950 goto fork_out; 1951 - if (args->io_thread) 1951 + if (args->io_thread) { 1952 + /* 1953 + * Mark us an IO worker, and block any signal that isn't 1954 + * fatal or STOP 1955 + */ 1952 1956 p->flags |= PF_IO_WORKER; 1957 + siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1958 + } 1953 1959 1954 1960 /* 1955 1961 * This _must_ happen before we call free_task(), i.e. before we jump ··· 2444 2438 .stack_size = (unsigned long)arg, 2445 2439 .io_thread = 1, 2446 2440 }; 2447 - struct task_struct *tsk; 2448 2441 2449 - tsk = copy_process(NULL, 0, node, &args); 2450 - if (!IS_ERR(tsk)) { 2451 - sigfillset(&tsk->blocked); 2452 - sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); 2453 - } 2454 - return tsk; 2442 + return copy_process(NULL, 0, node, &args); 2455 2443 } 2456 2444 2457 2445 /*
+1 -1
kernel/freezer.c
··· 134 134 return false; 135 135 } 136 136 137 - if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) 137 + if (!(p->flags & PF_KTHREAD)) 138 138 fake_signal_wake_up(p); 139 139 else 140 140 wake_up_state(p, TASK_INTERRUPTIBLE);
+1 -1
kernel/ptrace.c
··· 375 375 audit_ptrace(task); 376 376 377 377 retval = -EPERM; 378 - if (unlikely(task->flags & (PF_KTHREAD | PF_IO_WORKER))) 378 + if (unlikely(task->flags & PF_KTHREAD)) 379 379 goto out; 380 380 if (same_thread_group(task, current)) 381 381 goto out;
+12 -8
kernel/signal.c
··· 91 91 return true; 92 92 93 93 /* Only allow kernel generated signals to this kthread */ 94 - if (unlikely((t->flags & (PF_KTHREAD | PF_IO_WORKER)) && 94 + if (unlikely((t->flags & PF_KTHREAD) && 95 95 (handler == SIG_KTHREAD_KERNEL) && !force)) 96 96 return true; 97 97 ··· 288 288 JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING)); 289 289 BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK)); 290 290 291 - if (unlikely(fatal_signal_pending(task) || 292 - (task->flags & (PF_EXITING | PF_IO_WORKER)))) 291 + if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING))) 293 292 return false; 294 293 295 294 if (mask & JOBCTL_STOP_SIGMASK) ··· 833 834 834 835 if (!valid_signal(sig)) 835 836 return -EINVAL; 836 - /* PF_IO_WORKER threads don't take any signals */ 837 - if (t->flags & PF_IO_WORKER) 838 - return -ESRCH; 839 837 840 838 if (!si_fromuser(info)) 841 839 return 0; ··· 1096 1100 /* 1097 1101 * Skip useless siginfo allocation for SIGKILL and kernel threads. 1098 1102 */ 1099 - if ((sig == SIGKILL) || (t->flags & (PF_KTHREAD | PF_IO_WORKER))) 1103 + if ((sig == SIGKILL) || (t->flags & PF_KTHREAD)) 1100 1104 goto out_set; 1101 1105 1102 1106 /* ··· 2768 2772 } 2769 2773 2770 2774 /* 2775 + * PF_IO_WORKER threads will catch and exit on fatal signals 2776 + * themselves. They have cleanup that must be performed, so 2777 + * we cannot call do_exit() on their behalf. 2778 + */ 2779 + if (current->flags & PF_IO_WORKER) 2780 + goto out; 2781 + 2782 + /* 2771 2783 * Death signals, no core dump. 2772 2784 */ 2773 2785 do_group_exit(ksig->info.si_signo); 2774 2786 /* NOTREACHED */ 2775 2787 } 2776 2788 spin_unlock_irq(&sighand->siglock); 2777 - 2789 + out: 2778 2790 ksig->sig = signr; 2779 2791 2780 2792 if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))