Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-6.16-20250614' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

- Fix for a race between SQPOLL exit and fdinfo reading.

It's slim and I was only able to reproduce this with an artificial
delay in the kernel. Followup sparse fix as well to unify the access
to ->thread.

- Fix for multiple buffer peeking, avoiding truncation if possible.

- Run local task_work for IOPOLL reaping when the ring is exiting.

This currently isn't done due to an assumption that polled IO will
never need task_work, but a fix on the block side is going to change
that.

* tag 'io_uring-6.16-20250614' of git://git.kernel.dk/linux:
io_uring: run local task_work from ring exit IOPOLL reaping
io_uring/kbuf: don't truncate end buffer for multiple buffer peeks
io_uring: consistently use rcu semantics with sqpoll thread
io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo()

+59 -23
+10 -2
io_uring/fdinfo.c
··· 141 141 142 142 if (ctx->flags & IORING_SETUP_SQPOLL) { 143 143 struct io_sq_data *sq = ctx->sq_data; 144 + struct task_struct *tsk; 144 145 146 + rcu_read_lock(); 147 + tsk = rcu_dereference(sq->thread); 145 148 /* 146 149 * sq->thread might be NULL if we raced with the sqpoll 147 150 * thread termination. 148 151 */ 149 - if (sq->thread) { 152 + if (tsk) { 153 + get_task_struct(tsk); 154 + rcu_read_unlock(); 155 + getrusage(tsk, RUSAGE_SELF, &sq_usage); 156 + put_task_struct(tsk); 150 157 sq_pid = sq->task_pid; 151 158 sq_cpu = sq->sq_cpu; 152 - getrusage(sq->thread, RUSAGE_SELF, &sq_usage); 153 159 sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 154 160 + sq_usage.ru_stime.tv_usec); 155 161 sq_work_time = sq->work_time; 162 + } else { 163 + rcu_read_unlock(); 156 164 } 157 165 } 158 166
+5 -2
io_uring/io_uring.c
··· 1523 1523 } 1524 1524 } 1525 1525 mutex_unlock(&ctx->uring_lock); 1526 + 1527 + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) 1528 + io_move_task_work_from_local(ctx); 1526 1529 } 1527 1530 1528 1531 static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) ··· 2909 2906 struct task_struct *tsk; 2910 2907 2911 2908 io_sq_thread_park(sqd); 2912 - tsk = sqd->thread; 2909 + tsk = sqpoll_task_locked(sqd); 2913 2910 if (tsk && tsk->io_uring && tsk->io_uring->io_wq) 2914 2911 io_wq_cancel_cb(tsk->io_uring->io_wq, 2915 2912 io_cancel_ctx_cb, ctx, true); ··· 3145 3142 s64 inflight; 3146 3143 DEFINE_WAIT(wait); 3147 3144 3148 - WARN_ON_ONCE(sqd && sqd->thread != current); 3145 + WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); 3149 3146 3150 3147 if (!current->io_uring) 3151 3148 return;
+4 -1
io_uring/kbuf.c
··· 270 270 /* truncate end piece, if needed, for non partial buffers */ 271 271 if (len > arg->max_len) { 272 272 len = arg->max_len; 273 - if (!(bl->flags & IOBL_INC)) 273 + if (!(bl->flags & IOBL_INC)) { 274 + if (iov != arg->iovs) 275 + break; 274 276 buf->len = len; 277 + } 275 278 } 276 279 277 280 iov->iov_base = u64_to_user_ptr(buf->addr);
+5 -2
io_uring/register.c
··· 273 273 if (ctx->flags & IORING_SETUP_SQPOLL) { 274 274 sqd = ctx->sq_data; 275 275 if (sqd) { 276 + struct task_struct *tsk; 277 + 276 278 /* 277 279 * Observe the correct sqd->lock -> ctx->uring_lock 278 280 * ordering. Fine to drop uring_lock here, we hold ··· 284 282 mutex_unlock(&ctx->uring_lock); 285 283 mutex_lock(&sqd->lock); 286 284 mutex_lock(&ctx->uring_lock); 287 - if (sqd->thread) 288 - tctx = sqd->thread->io_uring; 285 + tsk = sqpoll_task_locked(sqd); 286 + if (tsk) 287 + tctx = tsk->io_uring; 289 288 } 290 289 } else { 291 290 tctx = current->io_uring;
+28 -15
io_uring/sqpoll.c
··· 30 30 void io_sq_thread_unpark(struct io_sq_data *sqd) 31 31 __releases(&sqd->lock) 32 32 { 33 - WARN_ON_ONCE(sqd->thread == current); 33 + WARN_ON_ONCE(sqpoll_task_locked(sqd) == current); 34 34 35 35 /* 36 36 * Do the dance but not conditional clear_bit() because it'd race with ··· 46 46 void io_sq_thread_park(struct io_sq_data *sqd) 47 47 __acquires(&sqd->lock) 48 48 { 49 - WARN_ON_ONCE(data_race(sqd->thread) == current); 49 + struct task_struct *tsk; 50 50 51 51 atomic_inc(&sqd->park_pending); 52 52 set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); 53 53 mutex_lock(&sqd->lock); 54 - if (sqd->thread) 55 - wake_up_process(sqd->thread); 54 + 55 + tsk = sqpoll_task_locked(sqd); 56 + if (tsk) { 57 + WARN_ON_ONCE(tsk == current); 58 + wake_up_process(tsk); 59 + } 56 60 } 57 61 58 62 void io_sq_thread_stop(struct io_sq_data *sqd) 59 63 { 60 - WARN_ON_ONCE(sqd->thread == current); 64 + struct task_struct *tsk; 65 + 61 66 WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); 62 67 63 68 set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); 64 69 mutex_lock(&sqd->lock); 65 - if (sqd->thread) 66 - wake_up_process(sqd->thread); 70 + tsk = sqpoll_task_locked(sqd); 71 + if (tsk) { 72 + WARN_ON_ONCE(tsk == current); 73 + wake_up_process(tsk); 74 + } 67 75 mutex_unlock(&sqd->lock); 68 76 wait_for_completion(&sqd->exited); 69 77 } ··· 278 270 /* offload context creation failed, just exit */ 279 271 if (!current->io_uring) { 280 272 mutex_lock(&sqd->lock); 281 - sqd->thread = NULL; 273 + rcu_assign_pointer(sqd->thread, NULL); 274 + put_task_struct(current); 282 275 mutex_unlock(&sqd->lock); 283 276 goto err_out; 284 277 } ··· 388 379 io_sq_tw(&retry_list, UINT_MAX); 389 380 390 381 io_uring_cancel_generic(true, sqd); 391 - sqd->thread = NULL; 382 + rcu_assign_pointer(sqd->thread, NULL); 383 + put_task_struct(current); 392 384 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 393 385 atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); 394 386 io_run_task_work(); ··· 494 484 goto err_sqpoll; 495 485 } 496 486 497 - sqd->thread = tsk; 487 + mutex_lock(&sqd->lock); 488 + rcu_assign_pointer(sqd->thread, tsk); 489 + mutex_unlock(&sqd->lock); 490 + 498 491 task_to_put = get_task_struct(tsk); 499 492 ret = io_uring_alloc_task_context(tsk, ctx); 500 493 wake_up_new_task(tsk); ··· 508 495 ret = -EINVAL; 509 496 goto err; 510 497 } 511 - 512 - if (task_to_put) 513 - put_task_struct(task_to_put); 514 498 return 0; 515 499 err_sqpoll: 516 500 complete(&ctx->sq_data->exited); ··· 525 515 int ret = -EINVAL; 526 516 527 517 if (sqd) { 518 + struct task_struct *tsk; 519 + 528 520 io_sq_thread_park(sqd); 529 521 /* Don't set affinity for a dying thread */ 530 - if (sqd->thread) 531 - ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask); 522 + tsk = sqpoll_task_locked(sqd); 523 + if (tsk) 524 + ret = io_wq_cpu_affinity(tsk->io_uring, mask); 532 525 io_sq_thread_unpark(sqd); 533 526 } 534 527
+7 -1
io_uring/sqpoll.h
··· 8 8 /* ctx's that are using this sqd */ 9 9 struct list_head ctx_list; 10 10 11 - struct task_struct *thread; 11 + struct task_struct __rcu *thread; 12 12 struct wait_queue_head wait; 13 13 14 14 unsigned sq_thread_idle; ··· 29 29 void io_put_sq_data(struct io_sq_data *sqd); 30 30 void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); 31 31 int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); 32 + 33 + static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) 34 + { 35 + return rcu_dereference_protected(sqd->thread, 36 + lockdep_is_held(&sqd->lock)); 37 + }