Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'io_uring-6.16' into for-6.17/io_uring

Merge in 6.16 io_uring fixes, to avoid clashes with pending net and
settings changes.

* io_uring-6.16:
io_uring: gate REQ_F_ISREG on !S_ANON_INODE as well
io_uring/kbuf: flag partial buffer mappings
io_uring/net: mark iov as dynamically allocated even for single segments
io_uring: fix resource leak in io_import_dmabuf()
io_uring: don't assume uaddr alignment in io_vec_fill_bvec
io_uring/rsrc: don't rely on user vaddr alignment
io_uring/rsrc: fix folio unpinning
io_uring: make fallocate be hashed work

Jens Axboe 1bc88902 cf73d997

+54 -25
+2 -1
io_uring/io_uring.c
··· 1706 1706 1707 1707 io_req_flags_t io_file_get_flags(struct file *file) 1708 1708 { 1709 + struct inode *inode = file_inode(file); 1709 1710 io_req_flags_t res = 0; 1710 1711 1711 1712 BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1); 1712 1713 1713 - if (S_ISREG(file_inode(file)->i_mode)) 1714 + if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE)) 1714 1715 res |= REQ_F_ISREG; 1715 1716 if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) 1716 1717 res |= REQ_F_SUPPORT_NOWAIT;
+1
io_uring/kbuf.c
··· 271 271 if (len > arg->max_len) { 272 272 len = arg->max_len; 273 273 if (!(bl->flags & IOBL_INC)) { 274 + arg->partial_map = 1; 274 275 if (iov != arg->iovs) 275 276 break; 276 277 buf->len = len;
+2 -1
io_uring/kbuf.h
··· 58 58 size_t max_len; 59 59 unsigned short nr_iovs; 60 60 unsigned short mode; 61 - unsigned buf_group; 61 + unsigned short buf_group; 62 + unsigned short partial_map; 62 63 }; 63 64 64 65 void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+21 -13
io_uring/net.c
··· 75 75 u16 flags; 76 76 /* initialised and used only by !msg send variants */ 77 77 u16 buf_group; 78 - bool retry; 78 + unsigned short retry_flags; 79 79 void __user *msg_control; 80 80 /* used only for send zerocopy */ 81 81 struct io_kiocb *notif; 82 + }; 83 + 84 + enum sr_retry_flags { 85 + IO_SR_MSG_RETRY = 1, 86 + IO_SR_MSG_PARTIAL_MAP = 2, 82 87 }; 83 88 84 89 /* ··· 192 187 193 188 req->flags &= ~REQ_F_BL_EMPTY; 194 189 sr->done_io = 0; 195 - sr->retry = false; 190 + sr->retry_flags = 0; 196 191 sr->len = 0; /* get from the provided buffer */ 197 192 } 198 193 ··· 402 397 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 403 398 404 399 sr->done_io = 0; 405 - sr->retry = false; 400 + sr->retry_flags = 0; 406 401 sr->len = READ_ONCE(sqe->len); 407 402 sr->flags = READ_ONCE(sqe->ioprio); 408 403 if (sr->flags & ~SENDMSG_FLAGS) ··· 756 751 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 757 752 758 753 sr->done_io = 0; 759 - sr->retry = false; 754 + sr->retry_flags = 0; 760 755 761 756 if (unlikely(sqe->file_index || sqe->addr2)) 762 757 return -EINVAL; ··· 828 823 829 824 cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), 830 825 issue_flags); 831 - if (sr->retry) 826 + if (sr->retry_flags & IO_SR_MSG_RETRY) 832 827 cflags = req->cqe.flags | (cflags & CQE_F_MASK); 833 828 /* bundle with no more immediate buffers, we're done */ 834 829 if (req->flags & REQ_F_BL_EMPTY) ··· 837 832 * If more is available AND it was a full transfer, retry and 838 833 * append to this one 839 834 */ 840 - if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 && 835 + if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 && 841 836 !iov_iter_count(&kmsg->msg.msg_iter)) { 842 837 req->cqe.flags = cflags & ~CQE_F_MASK; 843 838 sr->len = kmsg->msg.msg_inq; 844 839 sr->done_io += this_ret; 845 - sr->retry = true; 840 + sr->retry_flags |= IO_SR_MSG_RETRY; 846 841 return false; 847 842 } 848 843 } else { ··· 1082 1077 if (unlikely(ret < 0)) 1083 1078 return ret; 1084 1079 1080 + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1081 + kmsg->vec.nr = ret; 1082 + kmsg->vec.iovec = arg.iovs; 1083 + req->flags |= REQ_F_NEED_CLEANUP; 1084 + } 1085 + if (arg.partial_map) 1086 + sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP; 1087 + 1085 1088 /* special case 1 vec, can be a fast path */ 1086 1089 if (ret == 1) { 1087 1090 sr->buf = arg.iovs[0].iov_base; ··· 1098 1085 } 1099 1086 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1100 1087 arg.out_len); 1101 - if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { 1102 - kmsg->vec.nr = ret; 1103 - kmsg->vec.iovec = arg.iovs; 1104 - req->flags |= REQ_F_NEED_CLEANUP; 1105 - } 1106 1088 } else { 1107 1089 void __user *buf; 1108 1090 ··· 1283 1275 int ret; 1284 1276 1285 1277 zc->done_io = 0; 1286 - zc->retry = false; 1278 + zc->retry_flags = 0; 1287 1279 1288 1280 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1289 1281 return -EINVAL;
+1
io_uring/opdef.c
··· 216 216 }, 217 217 [IORING_OP_FALLOCATE] = { 218 218 .needs_file = 1, 219 + .hash_reg_file = 1, 219 220 .prep = io_fallocate_prep, 220 221 .issue = io_fallocate, 221 222 },
+22 -8
io_uring/rsrc.c
··· 112 112 struct io_mapped_ubuf *imu = priv; 113 113 unsigned int i; 114 114 115 - for (i = 0; i < imu->nr_bvecs; i++) 116 - unpin_user_page(imu->bvec[i].bv_page); 115 + for (i = 0; i < imu->nr_bvecs; i++) { 116 + struct folio *folio = page_folio(imu->bvec[i].bv_page); 117 + 118 + unpin_user_folio(folio, 1); 119 + } 117 120 } 118 121 119 122 static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx, ··· 736 733 737 734 data->nr_pages_mid = folio_nr_pages(folio); 738 735 data->folio_shift = folio_shift(folio); 736 + data->first_folio_page_idx = folio_page_idx(folio, page_array[0]); 739 737 740 738 /* 741 739 * Check if pages are contiguous inside a folio, and all folios have ··· 830 826 if (coalesced) 831 827 imu->folio_shift = data.folio_shift; 832 828 refcount_set(&imu->refs, 1); 833 - off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1); 829 + 830 + off = (unsigned long)iov->iov_base & ~PAGE_MASK; 831 + if (coalesced) 832 + off += data.first_folio_page_idx << PAGE_SHIFT; 833 + 834 834 node->buf = imu; 835 835 ret = 0; 836 836 ··· 850 842 if (ret) { 851 843 if (imu) 852 844 io_free_imu(ctx, imu); 853 - if (pages) 854 - unpin_user_pages(pages, nr_pages); 845 + if (pages) { 846 + for (i = 0; i < nr_pages; i++) 847 + unpin_user_folio(page_folio(pages[i]), 1); 848 + } 855 849 io_cache_free(&ctx->node_cache, node); 856 850 node = ERR_PTR(ret); 857 851 } ··· 1341 1331 { 1342 1332 unsigned long folio_size = 1 << imu->folio_shift; 1343 1333 unsigned long folio_mask = folio_size - 1; 1344 - u64 folio_addr = imu->ubuf & ~folio_mask; 1345 1334 struct bio_vec *res_bvec = vec->bvec; 1346 1335 size_t total_len = 0; 1347 1336 unsigned bvec_idx = 0; ··· 1362 1353 if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) 1363 1354 return -EOVERFLOW; 1364 1355 1365 - /* by using folio address it also accounts for bvec offset */ 1366 - offset = buf_addr - folio_addr; 1356 + offset = buf_addr - imu->ubuf; 1357 + /* 1358 + * Only the first bvec can have non zero bv_offset, account it 1359 + * here and work with full folios below. 1360 + */ 1361 + offset += imu->bvec[0].bv_offset; 1362 + 1367 1363 src_bvec = imu->bvec + (offset >> imu->folio_shift); 1368 1364 offset &= folio_mask; 1369 1365
+1
io_uring/rsrc.h
··· 49 49 unsigned int nr_pages_mid; 50 50 unsigned int folio_shift; 51 51 unsigned int nr_folios; 52 + unsigned long first_folio_page_idx; 52 53 }; 53 54 54 55 bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
+4 -2
io_uring/zcrx.c
··· 106 106 for_each_sgtable_dma_sg(mem->sgt, sg, i) 107 107 total_size += sg_dma_len(sg); 108 108 109 - if (total_size < off + len) 110 - return -EINVAL; 109 + if (total_size < off + len) { 110 + ret = -EINVAL; 111 + goto err; 112 + } 111 113 112 114 mem->dmabuf_offset = off; 113 115 mem->size = len;