Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/zcrx: allow synchronous buffer return

Returning buffers via a ring is performant and convenient, but it
becomes a problem when/if the user misconfigured the ring size and it
becomes full. Add a synchronous way to return buffers back to the page
pool via a new register opcode. It's supposed to be a reliable slow
path for refilling.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Pavel Begunkov and committed by
Jens Axboe
705d2ac7 8fd08d8d

+90
+12
include/uapi/linux/io_uring.h
··· 689 689 /* query various aspects of io_uring, see linux/io_uring/query.h */ 690 690 IORING_REGISTER_QUERY = 35, 691 691 692 + /* return zcrx buffers back into circulation */ 693 + IORING_REGISTER_ZCRX_REFILL = 36, 694 + 692 695 /* this goes last */ 693 696 IORING_REGISTER_LAST, 694 697 ··· 1071 1068 __u32 zcrx_id; 1072 1069 __u32 __resv2; 1073 1070 __u64 __resv[3]; 1071 + }; 1072 + 1073 + struct io_uring_zcrx_sync_refill { 1074 + __u32 zcrx_id; 1075 + /* the number of entries to return */ 1076 + __u32 nr_entries; 1077 + /* pointer to an array of struct io_uring_zcrx_rqe */ 1078 + __u64 rqes; 1079 + __u64 __resv[2]; 1074 1080 }; 1075 1081 1076 1082 #ifdef __cplusplus
+3
io_uring/register.c
··· 833 833 case IORING_REGISTER_QUERY: 834 834 ret = io_query(ctx, arg, nr_args); 835 835 break; 836 + case IORING_REGISTER_ZCRX_REFILL: 837 + ret = io_zcrx_return_bufs(ctx, arg, nr_args); 838 + break; 836 839 default: 837 840 ret = -EINVAL; 838 841 break;
+68
io_uring/zcrx.c
··· 927 927 .uninstall = io_pp_uninstall, 928 928 }; 929 929 930 + #define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16) 931 + #define IO_ZCRX_SYS_REFILL_BATCH 32 932 + 933 + static void io_return_buffers(struct io_zcrx_ifq *ifq, 934 + struct io_uring_zcrx_rqe *rqes, unsigned nr) 935 + { 936 + int i; 937 + 938 + for (i = 0; i < nr; i++) { 939 + struct net_iov *niov; 940 + netmem_ref netmem; 941 + 942 + if (!io_parse_rqe(&rqes[i], ifq, &niov)) 943 + continue; 944 + 945 + scoped_guard(spinlock_bh, &ifq->rq_lock) { 946 + if (!io_zcrx_put_niov_uref(niov)) 947 + continue; 948 + } 949 + 950 + netmem = net_iov_to_netmem(niov); 951 + if (!page_pool_unref_and_test(netmem)) 952 + continue; 953 + io_zcrx_return_niov(niov); 954 + } 955 + } 956 + 957 + int io_zcrx_return_bufs(struct io_ring_ctx *ctx, 958 + void __user *arg, unsigned nr_arg) 959 + { 960 + struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH]; 961 + struct io_uring_zcrx_rqe __user *user_rqes; 962 + struct io_uring_zcrx_sync_refill zr; 963 + struct io_zcrx_ifq *ifq; 964 + unsigned nr, i; 965 + 966 + if (nr_arg) 967 + return -EINVAL; 968 + if (copy_from_user(&zr, arg, sizeof(zr))) 969 + return -EFAULT; 970 + if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS) 971 + return -EINVAL; 972 + if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv))) 973 + return -EINVAL; 974 + 975 + ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id); 976 + if (!ifq) 977 + return -EINVAL; 978 + nr = zr.nr_entries; 979 + user_rqes = u64_to_user_ptr(zr.rqes); 980 + 981 + for (i = 0; i < nr;) { 982 + unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH); 983 + size_t size = batch * sizeof(rqes[0]); 984 + 985 + if (copy_from_user(rqes, user_rqes + i, size)) 986 + return i ? i : -EFAULT; 987 + io_return_buffers(ifq, rqes, batch); 988 + 989 + i += batch; 990 + 991 + if (fatal_signal_pending(current)) 992 + return i; 993 + cond_resched(); 994 + } 995 + return nr; 996 + } 997 + 930 998 static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov, 931 999 struct io_zcrx_ifq *ifq, int off, int len) 932 1000 {
+7
io_uring/zcrx.h
··· 63 63 }; 64 64 65 65 #if defined(CONFIG_IO_URING_ZCRX) 66 + int io_zcrx_return_bufs(struct io_ring_ctx *ctx, 67 + void __user *arg, unsigned nr_arg); 66 68 int io_register_zcrx_ifq(struct io_ring_ctx *ctx, 67 69 struct io_uring_zcrx_ifq_reg __user *arg); 68 70 void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); ··· 96 94 unsigned int id) 97 95 { 98 96 return NULL; 97 + } 98 + static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx, 99 + void __user *arg, unsigned nr_arg) 100 + { 101 + return -EOPNOTSUPP; 99 102 } 100 103 #endif 101 104