Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-6.0-2022-09-02' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

- A single fix for over-eager retries for networking (Pavel)

- Revert the notification slot support for zerocopy sends.

It turns out that even after more than a year or development and
testing, there's not full agreement on whether just using plain
ordered notifications is Good Enough to avoid the complexity of using
the notifications slots. Because of that, we decided that it's best
left to a future final decision.

We can always bring back this feature, but we can't really change it
or remove it once we've released 6.0 with it enabled. The reverts
leave the usual CQE notifications as the primary interface for
knowing when data was sent, and when it was acked. (Pavel)

* tag 'io_uring-6.0-2022-09-02' of git://git.kernel.dk/linux-block:
selftests/net: return back io_uring zc send tests
io_uring/net: simplify zerocopy send user API
io_uring/notif: remove notif registration
Revert "io_uring: rename IORING_OP_FILES_UPDATE"
Revert "io_uring: add zc notification flush requests"
selftests/net: temporarily disable io_uring zc test
io_uring/net: fix overexcessive retries

+100 -323
+6 -22
include/uapi/linux/io_uring.h
··· 71 71 __s32 splice_fd_in; 72 72 __u32 file_index; 73 73 struct { 74 - __u16 notification_idx; 75 74 __u16 addr_len; 75 + __u16 __pad3[1]; 76 76 }; 77 77 }; 78 78 union { ··· 178 178 IORING_OP_FALLOCATE, 179 179 IORING_OP_OPENAT, 180 180 IORING_OP_CLOSE, 181 - IORING_OP_RSRC_UPDATE, 182 - IORING_OP_FILES_UPDATE = IORING_OP_RSRC_UPDATE, 181 + IORING_OP_FILES_UPDATE, 183 182 IORING_OP_STATX, 184 183 IORING_OP_READ, 185 184 IORING_OP_WRITE, ··· 205 206 IORING_OP_GETXATTR, 206 207 IORING_OP_SOCKET, 207 208 IORING_OP_URING_CMD, 208 - IORING_OP_SENDZC_NOTIF, 209 + IORING_OP_SEND_ZC, 209 210 210 211 /* this goes last, obviously */ 211 212 IORING_OP_LAST, ··· 227 228 #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) 228 229 #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) 229 230 #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) 230 - 231 231 /* 232 232 * sqe->splice_flags 233 233 * extends splice(2) flags ··· 279 281 * 280 282 * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in 281 283 * the buf_index field. 282 - * 283 - * IORING_RECVSEND_NOTIF_FLUSH Flush a notification after a successful 284 - * successful. Only for zerocopy sends. 285 284 */ 286 285 #define IORING_RECVSEND_POLL_FIRST (1U << 0) 287 286 #define IORING_RECV_MULTISHOT (1U << 1) 288 287 #define IORING_RECVSEND_FIXED_BUF (1U << 2) 289 - #define IORING_RECVSEND_NOTIF_FLUSH (1U << 3) 290 288 291 289 /* 292 290 * accept flags stored in sqe->ioprio 293 291 */ 294 292 #define IORING_ACCEPT_MULTISHOT (1U << 0) 295 - 296 - 297 - /* 298 - * IORING_OP_RSRC_UPDATE flags 299 - */ 300 - enum { 301 - IORING_RSRC_UPDATE_FILES, 302 - IORING_RSRC_UPDATE_NOTIF, 303 - }; 304 293 305 294 /* 306 295 * IORING_OP_MSG_RING command types, stored in sqe->addr ··· 326 341 * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID 327 342 * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries 328 343 * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv 344 + * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct 345 + * them from sends. 329 346 */ 330 347 #define IORING_CQE_F_BUFFER (1U << 0) 331 348 #define IORING_CQE_F_MORE (1U << 1) 332 349 #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) 350 + #define IORING_CQE_F_NOTIF (1U << 3) 333 351 334 352 enum { 335 353 IORING_CQE_BUFFER_SHIFT = 16, ··· 472 484 473 485 /* register a range of fixed file slots for automatic slot allocation */ 474 486 IORING_REGISTER_FILE_ALLOC_RANGE = 25, 475 - 476 - /* zerocopy notification API */ 477 - IORING_REGISTER_NOTIFIERS = 26, 478 - IORING_UNREGISTER_NOTIFIERS = 27, 479 487 480 488 /* this goes last */ 481 489 IORING_REGISTER_LAST
+2 -12
io_uring/io_uring.c
··· 2640 2640 io_unregister_personality(ctx, index); 2641 2641 if (ctx->rings) 2642 2642 io_poll_remove_all(ctx, NULL, true); 2643 - io_notif_unregister(ctx); 2644 2643 mutex_unlock(&ctx->uring_lock); 2645 2644 2646 2645 /* failed during ring init, it couldn't have issued any requests */ ··· 3838 3839 break; 3839 3840 ret = io_register_file_alloc_range(ctx, arg); 3840 3841 break; 3841 - case IORING_REGISTER_NOTIFIERS: 3842 - ret = io_notif_register(ctx, arg, nr_args); 3843 - break; 3844 - case IORING_UNREGISTER_NOTIFIERS: 3845 - ret = -EINVAL; 3846 - if (arg || nr_args) 3847 - break; 3848 - ret = io_notif_unregister(ctx); 3849 - break; 3850 3842 default: 3851 3843 ret = -EINVAL; 3852 3844 break; ··· 3923 3933 BUILD_BUG_SQE_ELEM(42, __u16, personality); 3924 3934 BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); 3925 3935 BUILD_BUG_SQE_ELEM(44, __u32, file_index); 3926 - BUILD_BUG_SQE_ELEM(44, __u16, notification_idx); 3927 - BUILD_BUG_SQE_ELEM(46, __u16, addr_len); 3936 + BUILD_BUG_SQE_ELEM(44, __u16, addr_len); 3937 + BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); 3928 3938 BUILD_BUG_SQE_ELEM(48, __u64, addr3); 3929 3939 BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); 3930 3940 BUILD_BUG_SQE_ELEM(56, __u64, __pad2);
+35 -24
io_uring/net.c
··· 65 65 struct file *file; 66 66 void __user *buf; 67 67 size_t len; 68 - u16 slot_idx; 69 68 unsigned msg_flags; 70 69 unsigned flags; 71 70 unsigned addr_len; 72 71 void __user *addr; 73 72 size_t done_io; 73 + struct io_kiocb *notif; 74 74 }; 75 75 76 76 #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) ··· 879 879 return ret; 880 880 } 881 881 882 + void io_sendzc_cleanup(struct io_kiocb *req) 883 + { 884 + struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); 885 + 886 + zc->notif->flags |= REQ_F_CQE_SKIP; 887 + io_notif_flush(zc->notif); 888 + zc->notif = NULL; 889 + } 890 + 882 891 int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 883 892 { 884 893 struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); 885 894 struct io_ring_ctx *ctx = req->ctx; 895 + struct io_kiocb *notif; 886 896 887 - if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)) 897 + if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) || 898 + READ_ONCE(sqe->__pad3[0])) 899 + return -EINVAL; 900 + /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 901 + if (req->flags & REQ_F_CQE_SKIP) 888 902 return -EINVAL; 889 903 890 904 zc->flags = READ_ONCE(sqe->ioprio); 891 905 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | 892 - IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH)) 906 + IORING_RECVSEND_FIXED_BUF)) 893 907 return -EINVAL; 894 908 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 895 909 unsigned idx = READ_ONCE(sqe->buf_index); ··· 914 900 req->imu = READ_ONCE(ctx->user_bufs[idx]); 915 901 io_req_set_rsrc_node(req, ctx, 0); 916 902 } 903 + notif = zc->notif = io_alloc_notif(ctx); 904 + if (!notif) 905 + return -ENOMEM; 906 + notif->cqe.user_data = req->cqe.user_data; 907 + notif->cqe.res = 0; 908 + notif->cqe.flags = IORING_CQE_F_NOTIF; 909 + req->flags |= REQ_F_NEED_CLEANUP; 917 910 918 911 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 919 912 zc->len = READ_ONCE(sqe->len); 920 913 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 921 - zc->slot_idx = READ_ONCE(sqe->notification_idx); 922 914 if (zc->msg_flags & MSG_DONTWAIT) 923 915 req->flags |= REQ_F_NOWAIT; 924 916 ··· 976 956 shinfo->nr_frags = frag; 977 957 from->bvec += bi.bi_idx; 978 958 from->nr_segs -= bi.bi_idx; 979 - from->count = bi.bi_size; 959 + from->count -= copied; 980 960 from->iov_offset = bi.bi_bvec_done; 981 961 982 962 skb->data_len += copied; ··· 996 976 int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) 997 977 { 998 978 struct sockaddr_storage __address, *addr = NULL; 999 - struct io_ring_ctx *ctx = req->ctx; 1000 979 struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); 1001 - struct io_notif_slot *notif_slot; 1002 - struct io_kiocb *notif; 1003 980 struct msghdr msg; 1004 981 struct iovec iov; 1005 982 struct socket *sock; 1006 - unsigned msg_flags; 983 + unsigned msg_flags, cflags; 1007 984 int ret, min_ret = 0; 1008 985 1009 986 if (!(req->flags & REQ_F_POLLED) && 1010 987 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1011 988 return -EAGAIN; 1012 - 1013 - if (issue_flags & IO_URING_F_UNLOCKED) 1014 - return -EAGAIN; 1015 989 sock = sock_from_file(req->file); 1016 990 if (unlikely(!sock)) 1017 991 return -ENOTSOCK; 1018 - 1019 - notif_slot = io_get_notif_slot(ctx, zc->slot_idx); 1020 - if (!notif_slot) 1021 - return -EINVAL; 1022 - notif = io_get_notif(ctx, notif_slot); 1023 - if (!notif) 1024 - return -ENOMEM; 1025 992 1026 993 msg.msg_name = NULL; 1027 994 msg.msg_control = NULL; ··· 1040 1033 &msg.msg_iter); 1041 1034 if (unlikely(ret)) 1042 1035 return ret; 1043 - ret = io_notif_account_mem(notif, zc->len); 1036 + ret = io_notif_account_mem(zc->notif, zc->len); 1044 1037 if (unlikely(ret)) 1045 1038 return ret; 1046 1039 } ··· 1052 1045 min_ret = iov_iter_count(&msg.msg_iter); 1053 1046 1054 1047 msg.msg_flags = msg_flags; 1055 - msg.msg_ubuf = &io_notif_to_data(notif)->uarg; 1048 + msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1056 1049 msg.sg_from_iter = io_sg_from_iter; 1057 1050 ret = sock_sendmsg(sock, &msg); 1058 1051 ··· 1067 1060 req->flags |= REQ_F_PARTIAL_IO; 1068 1061 return io_setup_async_addr(req, addr, issue_flags); 1069 1062 } 1063 + if (ret < 0 && !zc->done_io) 1064 + zc->notif->flags |= REQ_F_CQE_SKIP; 1070 1065 if (ret == -ERESTARTSYS) 1071 1066 ret = -EINTR; 1072 1067 req_set_fail(req); 1073 - } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) { 1074 - io_notif_slot_flush_submit(notif_slot, 0); 1075 1068 } 1076 1069 1077 1070 if (ret >= 0) 1078 1071 ret += zc->done_io; 1079 1072 else if (zc->done_io) 1080 1073 ret = zc->done_io; 1081 - io_req_set_res(req, ret, 0); 1074 + 1075 + io_notif_flush(zc->notif); 1076 + req->flags &= ~REQ_F_NEED_CLEANUP; 1077 + cflags = ret >= 0 ? IORING_CQE_F_MORE : 0; 1078 + io_req_set_res(req, ret, cflags); 1082 1079 return IOU_OK; 1083 1080 } 1084 1081
+1
io_uring/net.h
··· 55 55 56 56 int io_sendzc(struct io_kiocb *req, unsigned int issue_flags); 57 57 int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); 58 + void io_sendzc_cleanup(struct io_kiocb *req); 58 59 59 60 void io_netmsg_cache_free(struct io_cache_entry *entry); 60 61 #else
+2 -81
io_uring/notif.c
··· 42 42 } 43 43 } 44 44 45 - struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, 46 - struct io_notif_slot *slot) 45 + struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) 47 46 __must_hold(&ctx->uring_lock) 48 47 { 49 48 struct io_kiocb *notif; ··· 58 59 io_get_task_refs(1); 59 60 notif->rsrc_node = NULL; 60 61 io_req_set_rsrc_node(notif, ctx, 0); 61 - notif->cqe.user_data = slot->tag; 62 - notif->cqe.flags = slot->seq++; 63 - notif->cqe.res = 0; 64 62 65 63 nd = io_notif_to_data(notif); 66 64 nd->account_pages = 0; 67 65 nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; 68 66 nd->uarg.callback = io_uring_tx_zerocopy_callback; 69 - /* master ref owned by io_notif_slot, will be dropped on flush */ 70 67 refcount_set(&nd->uarg.refcnt, 1); 71 68 return notif; 72 69 } 73 70 74 - void io_notif_slot_flush(struct io_notif_slot *slot) 71 + void io_notif_flush(struct io_kiocb *notif) 75 72 __must_hold(&slot->notif->ctx->uring_lock) 76 73 { 77 - struct io_kiocb *notif = slot->notif; 78 74 struct io_notif_data *nd = io_notif_to_data(notif); 79 - 80 - slot->notif = NULL; 81 75 82 76 /* drop slot's master ref */ 83 77 if (refcount_dec_and_test(&nd->uarg.refcnt)) { 84 78 notif->io_task_work.func = __io_notif_complete_tw; 85 79 io_req_task_work_add(notif); 86 80 } 87 - } 88 - 89 - __cold int io_notif_unregister(struct io_ring_ctx *ctx) 90 - __must_hold(&ctx->uring_lock) 91 - { 92 - int i; 93 - 94 - if (!ctx->notif_slots) 95 - return -ENXIO; 96 - 97 - for (i = 0; i < ctx->nr_notif_slots; i++) { 98 - struct io_notif_slot *slot = &ctx->notif_slots[i]; 99 - struct io_kiocb *notif = slot->notif; 100 - struct io_notif_data *nd; 101 - 102 - if (!notif) 103 - continue; 104 - nd = io_notif_to_data(notif); 105 - slot->notif = NULL; 106 - if (!refcount_dec_and_test(&nd->uarg.refcnt)) 107 - continue; 108 - notif->io_task_work.func = __io_notif_complete_tw; 109 - io_req_task_work_add(notif); 110 - } 111 - 112 - kvfree(ctx->notif_slots); 113 - ctx->notif_slots = NULL; 114 - ctx->nr_notif_slots = 0; 115 - return 0; 116 - } 117 - 118 - __cold int io_notif_register(struct io_ring_ctx *ctx, 119 - void __user *arg, unsigned int size) 120 - __must_hold(&ctx->uring_lock) 121 - { 122 - struct io_uring_notification_slot __user *slots; 123 - struct io_uring_notification_slot slot; 124 - struct io_uring_notification_register reg; 125 - unsigned i; 126 - 127 - if (ctx->nr_notif_slots) 128 - return -EBUSY; 129 - if (size != sizeof(reg)) 130 - return -EINVAL; 131 - if (copy_from_user(&reg, arg, sizeof(reg))) 132 - return -EFAULT; 133 - if (!reg.nr_slots || reg.nr_slots > IORING_MAX_NOTIF_SLOTS) 134 - return -EINVAL; 135 - if (reg.resv || reg.resv2 || reg.resv3) 136 - return -EINVAL; 137 - 138 - slots = u64_to_user_ptr(reg.data); 139 - ctx->notif_slots = kvcalloc(reg.nr_slots, sizeof(ctx->notif_slots[0]), 140 - GFP_KERNEL_ACCOUNT); 141 - if (!ctx->notif_slots) 142 - return -ENOMEM; 143 - 144 - for (i = 0; i < reg.nr_slots; i++, ctx->nr_notif_slots++) { 145 - struct io_notif_slot *notif_slot = &ctx->notif_slots[i]; 146 - 147 - if (copy_from_user(&slot, &slots[i], sizeof(slot))) { 148 - io_notif_unregister(ctx); 149 - return -EFAULT; 150 - } 151 - if (slot.resv[0] | slot.resv[1] | slot.resv[2]) { 152 - io_notif_unregister(ctx); 153 - return -EINVAL; 154 - } 155 - notif_slot->tag = slot.tag; 156 - } 157 - return 0; 158 81 }
+2 -52
io_uring/notif.h
··· 8 8 #include "rsrc.h" 9 9 10 10 #define IO_NOTIF_SPLICE_BATCH 32 11 - #define IORING_MAX_NOTIF_SLOTS (1U << 15) 12 11 13 12 struct io_notif_data { 14 13 struct file *file; ··· 15 16 unsigned long account_pages; 16 17 }; 17 18 18 - struct io_notif_slot { 19 - /* 20 - * Current/active notifier. A slot holds only one active notifier at a 21 - * time and keeps one reference to it. Flush releases the reference and 22 - * lazily replaces it with a new notifier. 23 - */ 24 - struct io_kiocb *notif; 25 - 26 - /* 27 - * Default ->user_data for this slot notifiers CQEs 28 - */ 29 - u64 tag; 30 - /* 31 - * Notifiers of a slot live in generations, we create a new notifier 32 - * only after flushing the previous one. Track the sequential number 33 - * for all notifiers and copy it into notifiers's cqe->cflags 34 - */ 35 - u32 seq; 36 - }; 37 - 38 - int io_notif_register(struct io_ring_ctx *ctx, 39 - void __user *arg, unsigned int size); 40 - int io_notif_unregister(struct io_ring_ctx *ctx); 41 - 42 - void io_notif_slot_flush(struct io_notif_slot *slot); 43 - struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, 44 - struct io_notif_slot *slot); 19 + void io_notif_flush(struct io_kiocb *notif); 20 + struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx); 45 21 46 22 static inline struct io_notif_data *io_notif_to_data(struct io_kiocb *notif) 47 23 { 48 24 return io_kiocb_to_cmd(notif, struct io_notif_data); 49 - } 50 - 51 - static inline struct io_kiocb *io_get_notif(struct io_ring_ctx *ctx, 52 - struct io_notif_slot *slot) 53 - { 54 - if (!slot->notif) 55 - slot->notif = io_alloc_notif(ctx, slot); 56 - return slot->notif; 57 - } 58 - 59 - static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx, 60 - unsigned idx) 61 - __must_hold(&ctx->uring_lock) 62 - { 63 - if (idx >= ctx->nr_notif_slots) 64 - return NULL; 65 - idx = array_index_nospec(idx, ctx->nr_notif_slots); 66 - return &ctx->notif_slots[idx]; 67 - } 68 - 69 - static inline void io_notif_slot_flush_submit(struct io_notif_slot *slot, 70 - unsigned int issue_flags) 71 - { 72 - io_notif_slot_flush(slot); 73 25 } 74 26 75 27 static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len)
+6 -6
io_uring/opdef.c
··· 246 246 .prep = io_close_prep, 247 247 .issue = io_close, 248 248 }, 249 - [IORING_OP_RSRC_UPDATE] = { 249 + [IORING_OP_FILES_UPDATE] = { 250 250 .audit_skip = 1, 251 251 .iopoll = 1, 252 - .name = "RSRC_UPDATE", 253 - .prep = io_rsrc_update_prep, 254 - .issue = io_rsrc_update, 255 - .ioprio = 1, 252 + .name = "FILES_UPDATE", 253 + .prep = io_files_update_prep, 254 + .issue = io_files_update, 256 255 }, 257 256 [IORING_OP_STATX] = { 258 257 .audit_skip = 1, ··· 470 471 .issue = io_uring_cmd, 471 472 .prep_async = io_uring_cmd_prep_async, 472 473 }, 473 - [IORING_OP_SENDZC_NOTIF] = { 474 + [IORING_OP_SEND_ZC] = { 474 475 .name = "SENDZC_NOTIF", 475 476 .needs_file = 1, 476 477 .unbound_nonreg_file = 1, ··· 483 484 .prep = io_sendzc_prep, 484 485 .issue = io_sendzc, 485 486 .prep_async = io_sendzc_prep_async, 487 + .cleanup = io_sendzc_cleanup, 486 488 #else 487 489 .prep = io_eopnotsupp_prep, 488 490 #endif
+2 -53
io_uring/rsrc.c
··· 15 15 #include "io_uring.h" 16 16 #include "openclose.h" 17 17 #include "rsrc.h" 18 - #include "notif.h" 19 18 20 19 struct io_rsrc_update { 21 20 struct file *file; 22 21 u64 arg; 23 22 u32 nr_args; 24 23 u32 offset; 25 - int type; 26 24 }; 27 25 28 26 static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, ··· 653 655 return -EINVAL; 654 656 } 655 657 656 - int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 658 + int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 657 659 { 658 660 struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); 659 661 ··· 667 669 if (!up->nr_args) 668 670 return -EINVAL; 669 671 up->arg = READ_ONCE(sqe->addr); 670 - up->type = READ_ONCE(sqe->ioprio); 671 672 return 0; 672 673 } 673 674 ··· 709 712 return ret; 710 713 } 711 714 712 - static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) 715 + int io_files_update(struct io_kiocb *req, unsigned int issue_flags) 713 716 { 714 717 struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); 715 718 struct io_ring_ctx *ctx = req->ctx; ··· 736 739 req_set_fail(req); 737 740 io_req_set_res(req, ret, 0); 738 741 return IOU_OK; 739 - } 740 - 741 - static int io_notif_update(struct io_kiocb *req, unsigned int issue_flags) 742 - { 743 - struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); 744 - struct io_ring_ctx *ctx = req->ctx; 745 - unsigned len = up->nr_args; 746 - unsigned idx_end, idx = up->offset; 747 - int ret = 0; 748 - 749 - io_ring_submit_lock(ctx, issue_flags); 750 - if (unlikely(check_add_overflow(idx, len, &idx_end))) { 751 - ret = -EOVERFLOW; 752 - goto out; 753 - } 754 - if (unlikely(idx_end > ctx->nr_notif_slots)) { 755 - ret = -EINVAL; 756 - goto out; 757 - } 758 - 759 - for (; idx < idx_end; idx++) { 760 - struct io_notif_slot *slot = &ctx->notif_slots[idx]; 761 - 762 - if (!slot->notif) 763 - continue; 764 - if (up->arg) 765 - slot->tag = up->arg; 766 - io_notif_slot_flush_submit(slot, issue_flags); 767 - } 768 - out: 769 - io_ring_submit_unlock(ctx, issue_flags); 770 - if (ret < 0) 771 - req_set_fail(req); 772 - io_req_set_res(req, ret, 0); 773 - return IOU_OK; 774 - } 775 - 776 - int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) 777 - { 778 - struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); 779 - 780 - switch (up->type) { 781 - case IORING_RSRC_UPDATE_FILES: 782 - return io_files_update(req, issue_flags); 783 - case IORING_RSRC_UPDATE_NOTIF: 784 - return io_notif_update(req, issue_flags); 785 - } 786 - return -EINVAL; 787 742 } 788 743 789 744 int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
+2 -2
io_uring/rsrc.h
··· 167 167 return &data->tags[table_idx][off]; 168 168 } 169 169 170 - int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags); 171 - int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); 170 + int io_files_update(struct io_kiocb *req, unsigned int issue_flags); 171 + int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); 172 172 173 173 int __io_account_mem(struct user_struct *user, unsigned long nr_pages); 174 174
+39 -64
tools/testing/selftests/net/io_uring_zerocopy_tx.c
··· 47 47 MODE_MIXED = 3, 48 48 }; 49 49 50 - static bool cfg_flush = false; 51 50 static bool cfg_cork = false; 52 51 static int cfg_mode = MODE_ZC_FIXED; 53 52 static int cfg_nr_reqs = 8; ··· 162 163 163 164 ret = syscall(__NR_io_uring_register, ring->ring_fd, 164 165 IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); 165 - return (ret < 0) ? -errno : ret; 166 - } 167 - 168 - static int io_uring_register_notifications(struct io_uring *ring, 169 - unsigned nr, 170 - struct io_uring_notification_slot *slots) 171 - { 172 - int ret; 173 - struct io_uring_notification_register r = { 174 - .nr_slots = nr, 175 - .data = (unsigned long)slots, 176 - }; 177 - 178 - ret = syscall(__NR_io_uring_register, ring->ring_fd, 179 - IORING_REGISTER_NOTIFIERS, &r, sizeof(r)); 180 166 return (ret < 0) ? -errno : ret; 181 167 } 182 168 ··· 281 297 282 298 static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, 283 299 const void *buf, size_t len, int flags, 284 - unsigned slot_idx, unsigned zc_flags) 300 + unsigned zc_flags) 285 301 { 286 302 io_uring_prep_send(sqe, sockfd, buf, len, flags); 287 - sqe->opcode = (__u8) IORING_OP_SENDZC_NOTIF; 288 - sqe->notification_idx = slot_idx; 303 + sqe->opcode = (__u8) IORING_OP_SEND_ZC; 289 304 sqe->ioprio = zc_flags; 290 305 } 291 306 ··· 357 374 358 375 static void do_tx(int domain, int type, int protocol) 359 376 { 360 - struct io_uring_notification_slot b[1] = {{.tag = NOTIF_TAG}}; 361 377 struct io_uring_sqe *sqe; 362 378 struct io_uring_cqe *cqe; 363 379 unsigned long packets = 0, bytes = 0; ··· 371 389 ret = io_uring_queue_init(512, &ring, 0); 372 390 if (ret) 373 391 error(1, ret, "io_uring: queue init"); 374 - 375 - ret = io_uring_register_notifications(&ring, 1, b); 376 - if (ret) 377 - error(1, ret, "io_uring: tx ctx registration"); 378 392 379 393 iov.iov_base = payload; 380 394 iov.iov_len = cfg_payload_len; ··· 387 409 for (i = 0; i < cfg_nr_reqs; i++) { 388 410 unsigned zc_flags = 0; 389 411 unsigned buf_idx = 0; 390 - unsigned slot_idx = 0; 391 412 unsigned mode = cfg_mode; 392 - unsigned msg_flags = 0; 413 + unsigned msg_flags = MSG_WAITALL; 393 414 394 415 if (cfg_mode == MODE_MIXED) 395 416 mode = rand() % 3; ··· 400 423 cfg_payload_len, msg_flags); 401 424 sqe->user_data = NONZC_TAG; 402 425 } else { 403 - if (cfg_flush) { 404 - zc_flags |= IORING_RECVSEND_NOTIF_FLUSH; 405 - compl_cqes++; 406 - } 426 + compl_cqes++; 407 427 io_uring_prep_sendzc(sqe, fd, payload, 408 428 cfg_payload_len, 409 - msg_flags, slot_idx, zc_flags); 429 + msg_flags, zc_flags); 410 430 if (mode == MODE_ZC_FIXED) { 411 431 sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; 412 432 sqe->buf_index = buf_idx; ··· 416 442 if (ret != cfg_nr_reqs) 417 443 error(1, ret, "submit"); 418 444 445 + if (cfg_cork) 446 + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); 419 447 for (i = 0; i < cfg_nr_reqs; i++) { 420 448 ret = io_uring_wait_cqe(&ring, &cqe); 421 449 if (ret) 422 450 error(1, ret, "wait cqe"); 423 451 424 - if (cqe->user_data == NOTIF_TAG) { 452 + if (cqe->user_data != NONZC_TAG && 453 + cqe->user_data != ZC_TAG) 454 + error(1, -EINVAL, "invalid cqe->user_data"); 455 + 456 + if (cqe->flags & IORING_CQE_F_NOTIF) { 457 + if (cqe->flags & IORING_CQE_F_MORE) 458 + error(1, -EINVAL, "invalid notif flags"); 425 459 compl_cqes--; 426 460 i--; 427 - } else if (cqe->user_data != NONZC_TAG && 428 - cqe->user_data != ZC_TAG) { 429 - error(1, cqe->res, "invalid user_data"); 430 - } else if (cqe->res <= 0 && cqe->res != -EAGAIN) { 461 + } else if (cqe->res <= 0) { 462 + if (cqe->flags & IORING_CQE_F_MORE) 463 + error(1, cqe->res, "more with a failed send"); 431 464 error(1, cqe->res, "send failed"); 432 465 } else { 433 - if (cqe->res > 0) { 434 - packets++; 435 - bytes += cqe->res; 436 - } 437 - /* failed requests don't flush */ 438 - if (cfg_flush && 439 - cqe->res <= 0 && 440 - cqe->user_data == ZC_TAG) 441 - compl_cqes--; 466 + if (cqe->user_data == ZC_TAG && 467 + !(cqe->flags & IORING_CQE_F_MORE)) 468 + error(1, cqe->res, "missing more flag"); 469 + packets++; 470 + bytes += cqe->res; 442 471 } 443 472 io_uring_cqe_seen(&ring); 444 473 } 445 - if (cfg_cork) 446 - do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); 447 474 } while (gettimeofday_ms() < tstop); 448 475 449 - if (close(fd)) 450 - error(1, errno, "close"); 476 + while (compl_cqes) { 477 + ret = io_uring_wait_cqe(&ring, &cqe); 478 + if (ret) 479 + error(1, ret, "wait cqe"); 480 + if (cqe->flags & IORING_CQE_F_MORE) 481 + error(1, -EINVAL, "invalid notif flags"); 482 + if (!(cqe->flags & IORING_CQE_F_NOTIF)) 483 + error(1, -EINVAL, "missing notif flag"); 484 + 485 + io_uring_cqe_seen(&ring); 486 + compl_cqes--; 487 + } 451 488 452 489 fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n", 453 490 packets, bytes >> 20, 454 491 packets / (cfg_runtime_ms / 1000), 455 492 (bytes >> 20) / (cfg_runtime_ms / 1000)); 456 493 457 - while (compl_cqes) { 458 - ret = io_uring_wait_cqe(&ring, &cqe); 459 - if (ret) 460 - error(1, ret, "wait cqe"); 461 - io_uring_cqe_seen(&ring); 462 - compl_cqes--; 463 - } 494 + if (close(fd)) 495 + error(1, errno, "close"); 464 496 } 465 497 466 498 static void do_test(int domain, int type, int protocol) ··· 480 500 481 501 static void usage(const char *filepath) 482 502 { 483 - error(1, 0, "Usage: %s [-f] [-n<N>] [-z0] [-s<payload size>] " 484 - "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath); 503 + error(1, 0, "Usage: %s (-4|-6) (udp|tcp) -D<dst_ip> [-s<payload size>] " 504 + "[-t<time s>] [-n<batch>] [-p<port>] [-m<mode>]", filepath); 485 505 } 486 506 487 507 static void parse_opts(int argc, char **argv) ··· 499 519 usage(argv[0]); 500 520 cfg_payload_len = max_payload_len; 501 521 502 - while ((c = getopt(argc, argv, "46D:p:s:t:n:fc:m:")) != -1) { 522 + while ((c = getopt(argc, argv, "46D:p:s:t:n:c:m:")) != -1) { 503 523 switch (c) { 504 524 case '4': 505 525 if (cfg_family != PF_UNSPEC) ··· 527 547 break; 528 548 case 'n': 529 549 cfg_nr_reqs = strtoul(optarg, NULL, 0); 530 - break; 531 - case 'f': 532 - cfg_flush = 1; 533 550 break; 534 551 case 'c': 535 552 cfg_cork = strtol(optarg, NULL, 0); ··· 560 583 561 584 if (cfg_payload_len > max_payload_len) 562 585 error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); 563 - if (cfg_mode == MODE_NONZC && cfg_flush) 564 - error(1, 0, "-f: only zerocopy modes support notifications"); 565 586 if (optind != argc - 1) 566 587 usage(argv[0]); 567 588 }
+3 -7
tools/testing/selftests/net/io_uring_zerocopy_tx.sh
··· 25 25 # No arguments: automated test 26 26 if [[ "$#" -eq "0" ]]; then 27 27 IPs=( "4" "6" ) 28 - protocols=( "tcp" "udp" ) 29 28 30 29 for IP in "${IPs[@]}"; do 31 - for proto in "${protocols[@]}"; do 32 - for mode in $(seq 1 3); do 33 - $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 34 - $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -f 35 - $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -c -f 36 - done 30 + for mode in $(seq 1 3); do 31 + $0 "$IP" udp -m "$mode" -t 1 -n 32 32 + $0 "$IP" tcp -m "$mode" -t 1 -n 32 37 33 done 38 34 done 39 35