Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring: ensure ctx->rings is stable for task work flags manipulation

If DEFER_TASKRUN | SETUP_TASKRUN is used and task work is added while
the ring is being resized, it's possible for the OR'ing of
IORING_SQ_TASKRUN to happen in the small window of swapping into the
new rings and the old rings being freed.

Prevent this by adding a 2nd ->rings pointer, ->rings_rcu, which is
protected by RCU. The task work flags manipulation is inside RCU
already, and if the resize ring freeing is done post an RCU synchronize,
then there's no need to add locking to the fast path of task work
additions.

Note: this is only done for DEFER_TASKRUN, as that's the only setup mode
that supports ring resizing. If this ever changes, then they too need to
use the io_ctx_mark_taskrun() helper.

Link: https://lore.kernel.org/io-uring/20260309062759.482210-1-naup96721@gmail.com/
Cc: stable@vger.kernel.org
Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

+34 -2
+1
include/linux/io_uring_types.h
··· 388 388 * regularly bounce b/w CPUs. 389 389 */ 390 390 struct { 391 + struct io_rings __rcu *rings_rcu; 391 392 struct llist_head work_llist; 392 393 struct llist_head retry_llist; 393 394 unsigned long check_cq;
+2
io_uring/io_uring.c
··· 2066 2066 io_free_region(ctx->user, &ctx->sq_region); 2067 2067 io_free_region(ctx->user, &ctx->ring_region); 2068 2068 ctx->rings = NULL; 2069 + RCU_INIT_POINTER(ctx->rings_rcu, NULL); 2069 2070 ctx->sq_sqes = NULL; 2070 2071 } 2071 2072 ··· 2704 2703 if (ret) 2705 2704 return ret; 2706 2705 ctx->rings = rings = io_region_get_ptr(&ctx->ring_region); 2706 + rcu_assign_pointer(ctx->rings_rcu, rings); 2707 2707 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) 2708 2708 ctx->sq_array = (u32 *)((char *)rings + rl->sq_array_offset); 2709 2709
+11
io_uring/register.c
··· 633 633 ctx->sq_entries = p->sq_entries; 634 634 ctx->cq_entries = p->cq_entries; 635 635 636 + /* 637 + * Just mark any flag we may have missed and that the application 638 + * should act on unconditionally. Worst case it'll be an extra 639 + * syscall. 640 + */ 641 + atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags); 636 642 ctx->rings = n.rings; 643 + rcu_assign_pointer(ctx->rings_rcu, n.rings); 644 + 637 645 ctx->sq_sqes = n.sq_sqes; 638 646 swap_old(ctx, o, n, ring_region); 639 647 swap_old(ctx, o, n, sq_region); ··· 650 642 out: 651 643 spin_unlock(&ctx->completion_lock); 652 644 mutex_unlock(&ctx->mmap_lock); 645 + /* Wait for concurrent io_ctx_mark_taskrun() */ 646 + if (to_free == &o) 647 + synchronize_rcu_expedited(); 653 648 io_register_free_rings(ctx, to_free); 654 649 655 650 if (ctx->sq_data)
+20 -2
io_uring/tw.c
··· 152 152 WARN_ON_ONCE(ret); 153 153 } 154 154 155 + /* 156 + * Sets IORING_SQ_TASKRUN in the sq_flags shared with userspace, using the 157 + * RCU protected rings pointer to be safe against concurrent ring resizing. 158 + */ 159 + static void io_ctx_mark_taskrun(struct io_ring_ctx *ctx) 160 + { 161 + lockdep_assert_in_rcu_read_lock(); 162 + 163 + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) { 164 + struct io_rings *rings = rcu_dereference(ctx->rings_rcu); 165 + 166 + atomic_or(IORING_SQ_TASKRUN, &rings->sq_flags); 167 + } 168 + } 169 + 155 170 void io_req_local_work_add(struct io_kiocb *req, unsigned flags) 156 171 { 157 172 struct io_ring_ctx *ctx = req->ctx; ··· 221 206 */ 222 207 223 208 if (!head) { 224 - if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) 225 - atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); 209 + io_ctx_mark_taskrun(ctx); 226 210 if (ctx->has_evfd) 227 211 io_eventfd_signal(ctx, false); 228 212 } ··· 245 231 if (!llist_add(&req->io_task_work.node, &tctx->task_list)) 246 232 return; 247 233 234 + /* 235 + * Doesn't need to use ->rings_rcu, as resizing isn't supported for 236 + * !DEFER_TASKRUN. 237 + */ 248 238 if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) 249 239 atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); 250 240