Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/net: support bundles for send

If IORING_OP_SEND is used with provided buffers, the caller may also
set IORING_RECVSEND_BUNDLE to turn it into a multi-buffer send. The idea
is that an application can fill outgoing buffers in a provided buffer
group, and then arm a single send that will service them all. Once
there are no more buffers to send, or if the requested length has
been sent, the request posts a single completion for all the buffers.

This only enables it for IORING_OP_SEND, IORING_OP_SENDMSG is coming
in a separate patch. However, this patch does do a lot of the prep
work that makes wiring up the sendmsg variant pretty trivial. They
share the prep side.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

+138 -18
+9
include/uapi/linux/io_uring.h
··· 351 351 * 0 is reported if zerocopy was actually possible. 352 352 * IORING_NOTIF_USAGE_ZC_COPIED if data was copied 353 353 * (at least partially). 354 + * 355 + * IORING_RECVSEND_BUNDLE Used with IOSQE_BUFFER_SELECT. If set, send will 356 + * grab as many buffers from the buffer group ID 357 + * given and send them all. The completion result 358 + * will be the number of buffers send, with the 359 + * starting buffer ID in cqe->flags as per usual 360 + * for provided buffer usage. The buffers will be 361 + * contigious from the starting buffer ID. 354 362 */ 355 363 #define IORING_RECVSEND_POLL_FIRST (1U << 0) 356 364 #define IORING_RECV_MULTISHOT (1U << 1) 357 365 #define IORING_RECVSEND_FIXED_BUF (1U << 2) 358 366 #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) 367 + #define IORING_RECVSEND_BUNDLE (1U << 4) 359 368 360 369 /* 361 370 * cqe.res for IORING_CQE_F_NOTIF if
+129 -18
io_uring/net.c
··· 57 57 struct user_msghdr __user *umsg; 58 58 void __user *buf; 59 59 }; 60 - unsigned len; 60 + int len; 61 61 unsigned done_io; 62 62 unsigned msg_flags; 63 63 unsigned nr_multishot_loops; ··· 389 389 return ret; 390 390 } 391 391 392 + #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 393 + 392 394 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 393 395 { 394 396 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); ··· 409 407 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 410 408 sr->len = READ_ONCE(sqe->len); 411 409 sr->flags = READ_ONCE(sqe->ioprio); 412 - if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) 410 + if (sr->flags & ~SENDMSG_FLAGS) 413 411 return -EINVAL; 414 412 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 415 413 if (sr->msg_flags & MSG_DONTWAIT) 416 414 req->flags |= REQ_F_NOWAIT; 415 + if (sr->flags & IORING_RECVSEND_BUNDLE) { 416 + if (req->opcode == IORING_OP_SENDMSG) 417 + return -EINVAL; 418 + if (!(req->flags & REQ_F_BUFFER_SELECT)) 419 + return -EINVAL; 420 + sr->msg_flags |= MSG_WAITALL; 421 + sr->buf_group = req->buf_index; 422 + req->buf_list = NULL; 423 + } 417 424 418 425 #ifdef CONFIG_COMPAT 419 426 if (req->ctx->compat) ··· 436 425 { 437 426 req->flags &= ~REQ_F_NEED_CLEANUP; 438 427 io_netmsg_recycle(req, issue_flags); 428 + } 429 + 430 + /* 431 + * For bundle completions, we need to figure out how many segments we consumed. 432 + * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 433 + * could be using an ITER_IOVEC. If the latter, then if we consumed all of 434 + * the segments, then it's a trivial questiont o answer. If we have residual 435 + * data in the iter, then loop the segments to figure out how much we 436 + * transferred. 437 + */ 438 + static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 439 + { 440 + struct iovec *iov; 441 + int nbufs; 442 + 443 + /* no data is always zero segments, and a ubuf is always 1 segment */ 444 + if (ret <= 0) 445 + return 0; 446 + if (iter_is_ubuf(&kmsg->msg.msg_iter)) 447 + return 1; 448 + 449 + iov = kmsg->free_iov; 450 + if (!iov) 451 + iov = &kmsg->fast_iov; 452 + 453 + /* if all data was transferred, it's basic pointer math */ 454 + if (!iov_iter_count(&kmsg->msg.msg_iter)) 455 + return iter_iov(&kmsg->msg.msg_iter) - iov; 456 + 457 + /* short transfer, count segments */ 458 + nbufs = 0; 459 + do { 460 + int this_len = min_t(int, iov[nbufs].iov_len, ret); 461 + 462 + nbufs++; 463 + ret -= this_len; 464 + } while (ret); 465 + 466 + return nbufs; 467 + } 468 + 469 + static inline bool io_send_finish(struct io_kiocb *req, int *ret, 470 + struct io_async_msghdr *kmsg, 471 + unsigned issue_flags) 472 + { 473 + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 474 + bool bundle_finished = *ret <= 0; 475 + unsigned int cflags; 476 + 477 + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 478 + cflags = io_put_kbuf(req, issue_flags); 479 + goto finish; 480 + } 481 + 482 + cflags = io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), issue_flags); 483 + 484 + if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 485 + goto finish; 486 + 487 + /* 488 + * Fill CQE for this receive and see if we should keep trying to 489 + * receive from this socket. 490 + */ 491 + if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 492 + io_mshot_prep_retry(req, kmsg); 493 + return false; 494 + } 495 + 496 + /* Otherwise stop bundle and use the current result. */ 497 + finish: 498 + io_req_set_res(req, *ret, cflags); 499 + *ret = IOU_OK; 500 + return true; 439 501 } 440 502 441 503 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) ··· 566 482 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 567 483 struct io_async_msghdr *kmsg = req->async_data; 568 484 struct socket *sock; 569 - unsigned int cflags; 570 485 unsigned flags; 571 486 int min_ret = 0; 572 487 int ret; ··· 578 495 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 579 496 return -EAGAIN; 580 497 581 - if (io_do_buffer_select(req)) { 582 - size_t len = sr->len; 583 - void __user *buf; 584 - 585 - buf = io_buffer_select(req, &len, issue_flags); 586 - if (unlikely(!buf)) 587 - return -ENOBUFS; 588 - sr->buf = buf; 589 - sr->len = len; 590 - } 591 - 592 498 flags = sr->msg_flags; 593 499 if (issue_flags & IO_URING_F_NONBLOCK) 594 500 flags |= MSG_DONTWAIT; 595 - if (flags & MSG_WAITALL) 501 + 502 + retry_bundle: 503 + if (io_do_buffer_select(req)) { 504 + struct buf_sel_arg arg = { 505 + .iovs = &kmsg->fast_iov, 506 + .max_len = min_not_zero(sr->len, INT_MAX), 507 + .nr_iovs = 1, 508 + .mode = KBUF_MODE_EXPAND, 509 + }; 510 + 511 + if (kmsg->free_iov) { 512 + arg.nr_iovs = kmsg->free_iov_nr; 513 + arg.iovs = kmsg->free_iov; 514 + arg.mode |= KBUF_MODE_FREE; 515 + } 516 + 517 + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 518 + arg.nr_iovs = 1; 519 + 520 + ret = io_buffers_select(req, &arg, issue_flags); 521 + if (unlikely(ret < 0)) 522 + return ret; 523 + 524 + sr->len = arg.out_len; 525 + iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, arg.iovs, ret, 526 + arg.out_len); 527 + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 528 + kmsg->free_iov_nr = ret; 529 + kmsg->free_iov = arg.iovs; 530 + } 531 + } 532 + 533 + /* 534 + * If MSG_WAITALL is set, or this is a bundle send, then we need 535 + * the full amount. If just bundle is set, if we do a short send 536 + * then we complete the bundle sequence rather than continue on. 537 + */ 538 + if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 596 539 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 597 540 598 541 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; ··· 643 534 ret += sr->done_io; 644 535 else if (sr->done_io) 645 536 ret = sr->done_io; 537 + 538 + if (!io_send_finish(req, &ret, kmsg, issue_flags)) 539 + goto retry_bundle; 540 + 646 541 io_req_msg_cleanup(req, issue_flags); 647 - cflags = io_put_kbuf(req, issue_flags); 648 - io_req_set_res(req, ret, cflags); 649 - return IOU_OK; 542 + return ret; 650 543 } 651 544 652 545 static int io_recvmsg_mshot_prep(struct io_kiocb *req,