Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-6.1-2022-11-11' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:
"Nothing major, just a few minor tweaks:

- Tweak for the TCP zero-copy io_uring self test (Pavel)

- Rather than use our internal cached value of number of CQ events
available, use what the user can see (Dylan)

- Fix a typo in a comment, added in this release (me)

- Don't allow wrapping while adding provided buffers (me)

- Fix a double poll race, and add a lockdep assertion for it too
(Pavel)"

* tag 'io_uring-6.1-2022-11-11' of git://git.kernel.dk/linux:
io_uring/poll: lockdep annote io_poll_req_insert_locked
io_uring/poll: fix double poll req->flags races
io_uring: check for rollover of buffer ID when providing buffers
io_uring: calculate CQEs from the user visible value
io_uring: fix typo in io_uring.h comment
selftests/net: don't tests batched TCP io_uring zc

+31 -16
+1 -1
include/uapi/linux/io_uring.h
··· 222 222 223 223 /* 224 224 * sqe->uring_cmd_flags 225 - * IORING_URING_CMD_FIXED use registered buffer; pass thig flag 225 + * IORING_URING_CMD_FIXED use registered buffer; pass this flag 226 226 * along with setting sqe->buf_index. 227 227 */ 228 228 #define IORING_URING_CMD_FIXED (1U << 0)
+8 -2
io_uring/io_uring.c
··· 176 176 return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); 177 177 } 178 178 179 + static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx) 180 + { 181 + return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head); 182 + } 183 + 179 184 static bool io_match_linked(struct io_kiocb *head) 180 185 { 181 186 struct io_kiocb *req; ··· 2320 2315 static inline bool io_should_wake(struct io_wait_queue *iowq) 2321 2316 { 2322 2317 struct io_ring_ctx *ctx = iowq->ctx; 2323 - int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; 2318 + int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail; 2324 2319 2325 2320 /* 2326 2321 * Wake up if we have enough events, or if a timeout occurred since we ··· 2404 2399 return ret; 2405 2400 io_cqring_overflow_flush(ctx); 2406 2401 2407 - if (io_cqring_events(ctx) >= min_events) 2402 + /* if user messes with these they will just get an early return */ 2403 + if (__io_cqring_events_user(ctx) >= min_events) 2408 2404 return 0; 2409 2405 } while (ret > 0); 2410 2406
+2
io_uring/kbuf.c
··· 346 346 tmp = READ_ONCE(sqe->off); 347 347 if (tmp > USHRT_MAX) 348 348 return -E2BIG; 349 + if (tmp + p->nbufs >= USHRT_MAX) 350 + return -EINVAL; 349 351 p->bid = tmp; 350 352 return 0; 351 353 }
+19 -12
io_uring/poll.c
··· 116 116 struct io_hash_table *table = &req->ctx->cancel_table_locked; 117 117 u32 index = hash_long(req->cqe.user_data, table->hash_bits); 118 118 119 + lockdep_assert_held(&req->ctx->uring_lock); 120 + 119 121 hlist_add_head(&req->hash_node, &table->hbs[index].list); 120 122 } 121 123 ··· 396 394 return 1; 397 395 } 398 396 399 - static void io_poll_double_prepare(struct io_kiocb *req) 397 + /* fails only when polling is already completing by the first entry */ 398 + static bool io_poll_double_prepare(struct io_kiocb *req) 400 399 { 401 400 struct wait_queue_head *head; 402 401 struct io_poll *poll = io_poll_get_single(req); ··· 406 403 rcu_read_lock(); 407 404 head = smp_load_acquire(&poll->head); 408 405 /* 409 - * poll arm may not hold ownership and so race with 410 - * io_poll_wake() by modifying req->flags. There is only one 411 - * poll entry queued, serialise with it by taking its head lock. 406 + * poll arm might not hold ownership and so race for req->flags with 407 + * io_poll_wake(). There is only one poll entry queued, serialise with 408 + * it by taking its head lock. As we're still arming the tw hanlder 409 + * is not going to be run, so there are no races with it. 412 410 */ 413 - if (head) 411 + if (head) { 414 412 spin_lock_irq(&head->lock); 415 - 416 - req->flags |= REQ_F_DOUBLE_POLL; 417 - if (req->opcode == IORING_OP_POLL_ADD) 418 - req->flags |= REQ_F_ASYNC_DATA; 419 - 420 - if (head) 413 + req->flags |= REQ_F_DOUBLE_POLL; 414 + if (req->opcode == IORING_OP_POLL_ADD) 415 + req->flags |= REQ_F_ASYNC_DATA; 421 416 spin_unlock_irq(&head->lock); 417 + } 422 418 rcu_read_unlock(); 419 + return !!head; 423 420 } 424 421 425 422 static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, ··· 457 454 /* mark as double wq entry */ 458 455 wqe_private |= IO_WQE_F_DOUBLE; 459 456 io_init_poll_iocb(poll, first->events, first->wait.func); 460 - io_poll_double_prepare(req); 457 + if (!io_poll_double_prepare(req)) { 458 + /* the request is completing, just back off */ 459 + kfree(poll); 460 + return; 461 + } 461 462 *poll_ptr = poll; 462 463 } else { 463 464 /* fine to modify, there is no poll queued to race with us */
+1 -1
tools/testing/selftests/net/io_uring_zerocopy_tx.sh
··· 29 29 for IP in "${IPs[@]}"; do 30 30 for mode in $(seq 1 3); do 31 31 $0 "$IP" udp -m "$mode" -t 1 -n 32 32 - $0 "$IP" tcp -m "$mode" -t 1 -n 32 32 + $0 "$IP" tcp -m "$mode" -t 1 -n 1 33 33 done 34 34 done 35 35