Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/zcrx: dmabuf backed zerocopy receive

Add support for dmabuf backed zcrx areas. To use it, the user should
pass IORING_ZCRX_AREA_DMABUF in the struct io_uring_zcrx_area_reg flags
field and pass a dmabuf fd in the dmabuf_fd field.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20bb1890e60a82ec945ab36370d1fd54be414ab6.1746097431.git.asml.silence@gmail.com
Link: https://lore.kernel.org/io-uring/6e37db97303212bbd8955f9501cf99b579f8aece.1746547722.git.asml.silence@gmail.com
[axboe: fold in fixup]
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Pavel Begunkov and committed by
Jens Axboe
a5c98e94 8a628042

+160 -18
+5 -1
include/uapi/linux/io_uring.h
··· 990 990 __u64 __resv[2]; 991 991 }; 992 992 993 + enum io_uring_zcrx_area_flags { 994 + IORING_ZCRX_AREA_DMABUF = 1, 995 + }; 996 + 993 997 struct io_uring_zcrx_area_reg { 994 998 __u64 addr; 995 999 __u64 len; 996 1000 __u64 rq_area_token; 997 1001 __u32 flags; 998 - __u32 __resv1; 1002 + __u32 dmabuf_fd; 999 1003 __u64 __resv2[2]; 1000 1004 }; 1001 1005
+148 -17
io_uring/zcrx.c
··· 47 47 return area->mem.pages[net_iov_idx(niov)]; 48 48 } 49 49 50 + static void io_release_dmabuf(struct io_zcrx_mem *mem) 51 + { 52 + if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) 53 + return; 54 + 55 + if (mem->sgt) 56 + dma_buf_unmap_attachment_unlocked(mem->attach, mem->sgt, 57 + DMA_FROM_DEVICE); 58 + if (mem->attach) 59 + dma_buf_detach(mem->dmabuf, mem->attach); 60 + if (mem->dmabuf) 61 + dma_buf_put(mem->dmabuf); 62 + 63 + mem->sgt = NULL; 64 + mem->attach = NULL; 65 + mem->dmabuf = NULL; 66 + } 67 + 68 + static int io_import_dmabuf(struct io_zcrx_ifq *ifq, 69 + struct io_zcrx_mem *mem, 70 + struct io_uring_zcrx_area_reg *area_reg) 71 + { 72 + unsigned long off = (unsigned long)area_reg->addr; 73 + unsigned long len = (unsigned long)area_reg->len; 74 + unsigned long total_size = 0; 75 + struct scatterlist *sg; 76 + int dmabuf_fd = area_reg->dmabuf_fd; 77 + int i, ret; 78 + 79 + if (WARN_ON_ONCE(!ifq->dev)) 80 + return -EFAULT; 81 + if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) 82 + return -EINVAL; 83 + 84 + mem->is_dmabuf = true; 85 + mem->dmabuf = dma_buf_get(dmabuf_fd); 86 + if (IS_ERR(mem->dmabuf)) { 87 + ret = PTR_ERR(mem->dmabuf); 88 + mem->dmabuf = NULL; 89 + goto err; 90 + } 91 + 92 + mem->attach = dma_buf_attach(mem->dmabuf, ifq->dev); 93 + if (IS_ERR(mem->attach)) { 94 + ret = PTR_ERR(mem->attach); 95 + mem->attach = NULL; 96 + goto err; 97 + } 98 + 99 + mem->sgt = dma_buf_map_attachment_unlocked(mem->attach, DMA_FROM_DEVICE); 100 + if (IS_ERR(mem->sgt)) { 101 + ret = PTR_ERR(mem->sgt); 102 + mem->sgt = NULL; 103 + goto err; 104 + } 105 + 106 + for_each_sgtable_dma_sg(mem->sgt, sg, i) 107 + total_size += sg_dma_len(sg); 108 + 109 + if (total_size < off + len) 110 + return -EINVAL; 111 + 112 + mem->dmabuf_offset = off; 113 + mem->size = len; 114 + return 0; 115 + err: 116 + io_release_dmabuf(mem); 117 + return ret; 118 + } 119 + 120 + static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) 121 + { 122 + unsigned long off = area->mem.dmabuf_offset; 123 + struct scatterlist *sg; 124 + unsigned i, niov_idx = 0; 125 + 126 + if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) 127 + return -EINVAL; 128 + 129 + for_each_sgtable_dma_sg(area->mem.sgt, sg, i) { 130 + dma_addr_t dma = sg_dma_address(sg); 131 + unsigned long sg_len = sg_dma_len(sg); 132 + unsigned long sg_off = min(sg_len, off); 133 + 134 + off -= sg_off; 135 + sg_len -= sg_off; 136 + dma += sg_off; 137 + 138 + while (sg_len && niov_idx < area->nia.num_niovs) { 139 + struct net_iov *niov = &area->nia.niovs[niov_idx]; 140 + 141 + if (net_mp_niov_set_dma_addr(niov, dma)) 142 + return 0; 143 + sg_len -= PAGE_SIZE; 144 + dma += PAGE_SIZE; 145 + niov_idx++; 146 + } 147 + } 148 + return niov_idx; 149 + } 150 + 151 + static int io_import_umem(struct io_zcrx_ifq *ifq, 152 + struct io_zcrx_mem *mem, 153 + struct io_uring_zcrx_area_reg *area_reg) 154 + { 155 + struct page **pages; 156 + int nr_pages; 157 + 158 + if (area_reg->dmabuf_fd) 159 + return -EINVAL; 160 + if (!area_reg->addr) 161 + return -EFAULT; 162 + pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len, 163 + &nr_pages); 164 + if (IS_ERR(pages)) 165 + return PTR_ERR(pages); 166 + 167 + mem->pages = pages; 168 + mem->nr_folios = nr_pages; 169 + mem->size = area_reg->len; 170 + return 0; 171 + } 172 + 50 173 static void io_release_area_mem(struct io_zcrx_mem *mem) 51 174 { 175 + if (mem->is_dmabuf) { 176 + io_release_dmabuf(mem); 177 + return; 178 + } 52 179 if (mem->pages) { 53 180 unpin_user_pages(mem->pages, mem->nr_folios); 54 181 kvfree(mem->pages); ··· 186 59 struct io_zcrx_mem *mem, 187 60 struct io_uring_zcrx_area_reg *area_reg) 188 61 { 189 - struct page **pages; 190 - int nr_pages; 191 62 int ret; 192 63 193 64 ret = io_validate_user_buf_range(area_reg->addr, area_reg->len); 194 65 if (ret) 195 66 return ret; 196 - if (!area_reg->addr) 197 - return -EFAULT; 198 67 if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK) 199 68 return -EINVAL; 200 69 201 - pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len, 202 - &nr_pages); 203 - if (IS_ERR(pages)) 204 - return PTR_ERR(pages); 205 - 206 - mem->pages = pages; 207 - mem->nr_folios = nr_pages; 208 - mem->size = area_reg->len; 209 - return 0; 70 + if (area_reg->flags & IORING_ZCRX_AREA_DMABUF) 71 + return io_import_dmabuf(ifq, mem, area_reg); 72 + return io_import_umem(ifq, mem, area_reg); 210 73 } 211 74 212 75 static void io_zcrx_unmap_umem(struct io_zcrx_ifq *ifq, ··· 218 101 { 219 102 int i; 220 103 221 - io_zcrx_unmap_umem(ifq, area, nr_mapped); 104 + if (area->mem.is_dmabuf) 105 + io_release_dmabuf(&area->mem); 106 + else 107 + io_zcrx_unmap_umem(ifq, area, nr_mapped); 222 108 223 109 for (i = 0; i < area->nia.num_niovs; i++) 224 110 net_mp_niov_set_dma_addr(&area->nia.niovs[i], 0); ··· 265 145 if (area->is_mapped) 266 146 return 0; 267 147 268 - nr = io_zcrx_map_area_umem(ifq, area); 148 + if (area->mem.is_dmabuf) 149 + nr = io_zcrx_map_area_dmabuf(ifq, area); 150 + else 151 + nr = io_zcrx_map_area_umem(ifq, area); 152 + 269 153 if (nr != area->nia.num_niovs) { 270 154 __io_zcrx_unmap_area(ifq, area, nr); 271 155 return -EINVAL; ··· 375 251 kfree(area); 376 252 } 377 253 254 + #define IO_ZCRX_AREA_SUPPORTED_FLAGS (IORING_ZCRX_AREA_DMABUF) 255 + 378 256 static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, 379 257 struct io_zcrx_area **res, 380 258 struct io_uring_zcrx_area_reg *area_reg) ··· 385 259 unsigned nr_iovs; 386 260 int i, ret; 387 261 388 - if (area_reg->flags || area_reg->rq_area_token) 262 + if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS) 389 263 return -EINVAL; 390 - if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1]) 264 + if (area_reg->rq_area_token) 265 + return -EINVAL; 266 + if (area_reg->__resv2[0] || area_reg->__resv2[1]) 391 267 return -EINVAL; 392 268 393 269 ret = -ENOMEM; ··· 946 818 struct io_zcrx_area *area = ifq->area; 947 819 size_t copied = 0; 948 820 int ret = 0; 821 + 822 + if (area->mem.is_dmabuf) 823 + return -EFAULT; 949 824 950 825 while (len) { 951 826 size_t copy_size = min_t(size_t, PAGE_SIZE, len);
+7
io_uring/zcrx.h
··· 3 3 #define IOU_ZC_RX_H 4 4 5 5 #include <linux/io_uring_types.h> 6 + #include <linux/dma-buf.h> 6 7 #include <linux/socket.h> 7 8 #include <net/page_pool/types.h> 8 9 #include <net/net_trackers.h> 9 10 10 11 struct io_zcrx_mem { 11 12 unsigned long size; 13 + bool is_dmabuf; 12 14 13 15 struct page **pages; 14 16 unsigned long nr_folios; 17 + 18 + struct dma_buf_attachment *attach; 19 + struct dma_buf *dmabuf; 20 + struct sg_table *sgt; 21 + unsigned long dmabuf_offset; 15 22 }; 16 23 17 24 struct io_zcrx_area {