Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/rsrc: allow cloning with node replacements

Currently cloning a buffer table will fail if the destination already has
a table. But it should be possible to use it to replace existing elements.
Add a IORING_REGISTER_DST_REPLACE cloning flag, which if set, will allow
the destination to already having a buffer table. If that is the case,
then entries designated by offset + nr buffers will be replaced if they
already exist.

Note that it's allowed to use IORING_REGISTER_DST_REPLACE and not have
an existing table, in which case it'll work just like not having the
flag set and an empty table - it'll just assign the newly created table
for that case.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

+54 -15
+2 -1
include/uapi/linux/io_uring.h
··· 713 713 }; 714 714 715 715 enum { 716 - IORING_REGISTER_SRC_REGISTERED = 1, 716 + IORING_REGISTER_SRC_REGISTERED = (1U << 0), 717 + IORING_REGISTER_DST_REPLACE = (1U << 1), 717 718 }; 718 719 719 720 struct io_uring_clone_buffers {
+52 -14
io_uring/rsrc.c
··· 930 930 static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx, 931 931 struct io_uring_clone_buffers *arg) 932 932 { 933 - int i, ret, nbufs, off, nr; 934 933 struct io_rsrc_data data; 934 + int i, ret, off, nr; 935 + unsigned int nbufs; 936 + 937 + /* if offsets are given, must have nr specified too */ 938 + if (!arg->nr && (arg->dst_off || arg->src_off)) 939 + return -EINVAL; 940 + /* not allowed unless REPLACE is set */ 941 + if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE)) 942 + return -EBUSY; 943 + 944 + nbufs = READ_ONCE(src_ctx->buf_table.nr); 945 + if (!arg->nr) 946 + arg->nr = nbufs; 947 + else if (arg->nr > nbufs) 948 + return -EINVAL; 949 + else if (arg->nr > IORING_MAX_REG_BUFFERS) 950 + return -EINVAL; 951 + if (check_add_overflow(arg->nr, arg->dst_off, &nbufs)) 952 + return -EOVERFLOW; 953 + 954 + ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr)); 955 + if (ret) 956 + return ret; 957 + 958 + /* Fill entries in data from dst that won't overlap with src */ 959 + for (i = 0; i < min(arg->dst_off, ctx->buf_table.nr); i++) { 960 + struct io_rsrc_node *src_node = ctx->buf_table.nodes[i]; 961 + 962 + if (src_node) { 963 + data.nodes[i] = src_node; 964 + src_node->refs++; 965 + } 966 + } 935 967 936 968 /* 937 969 * Drop our own lock here. We'll setup the data we need and reference ··· 985 953 if (check_add_overflow(arg->nr, arg->src_off, &off)) 986 954 goto out_unlock; 987 955 if (off > nbufs) 988 - goto out_unlock; 989 - if (check_add_overflow(arg->nr, arg->dst_off, &off)) 990 - goto out_unlock; 991 - ret = -EINVAL; 992 - if (off > IORING_MAX_REG_BUFFERS) 993 - goto out_unlock; 994 - ret = io_rsrc_data_alloc(&data, off); 995 - if (ret) 996 956 goto out_unlock; 997 957 998 958 off = arg->dst_off; ··· 1013 989 /* Have a ref on the bufs now, drop src lock and re-grab our own lock */ 1014 990 mutex_unlock(&src_ctx->uring_lock); 1015 991 mutex_lock(&ctx->uring_lock); 992 + 993 + /* 994 + * If asked for replace, put the old table. data->nodes[] holds both 995 + * old and new nodes at this point. 996 + */ 997 + if (arg->flags & IORING_REGISTER_DST_REPLACE) 998 + io_rsrc_data_free(&ctx->buf_table); 999 + 1000 + /* 1001 + * ctx->buf_table should be empty now - either the contents are being 1002 + * replaced and we just freed the table, or someone raced setting up 1003 + * a buffer table while the clone was happening. If not empty, fall 1004 + * through to failure handling. 1005 + */ 1016 1006 if (!ctx->buf_table.nr) { 1017 1007 ctx->buf_table = data; 1018 1008 return 0; ··· 1036 998 mutex_lock(&src_ctx->uring_lock); 1037 999 /* someone raced setting up buffers, dump ours */ 1038 1000 ret = -EBUSY; 1039 - i = nbufs; 1040 1001 out_put_free: 1002 + i = data.nr; 1041 1003 while (i--) { 1042 1004 io_buffer_unmap(src_ctx, data.nodes[i]); 1043 1005 kfree(data.nodes[i]); 1044 1006 } 1045 - io_rsrc_data_free(&data); 1046 1007 out_unlock: 1008 + io_rsrc_data_free(&data); 1047 1009 mutex_unlock(&src_ctx->uring_lock); 1048 1010 mutex_lock(&ctx->uring_lock); 1049 1011 return ret; ··· 1063 1025 struct file *file; 1064 1026 int ret; 1065 1027 1066 - if (ctx->buf_table.nr) 1067 - return -EBUSY; 1068 1028 if (copy_from_user(&buf, arg, sizeof(buf))) 1069 1029 return -EFAULT; 1070 - if (buf.flags & ~IORING_REGISTER_SRC_REGISTERED) 1030 + if (buf.flags & ~(IORING_REGISTER_SRC_REGISTERED|IORING_REGISTER_DST_REPLACE)) 1071 1031 return -EINVAL; 1032 + if (!(buf.flags & IORING_REGISTER_DST_REPLACE) && ctx->buf_table.nr) 1033 + return -EBUSY; 1072 1034 if (memchr_inv(buf.pad, 0, sizeof(buf.pad))) 1073 1035 return -EINVAL; 1074 1036