Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-6.1/passthrough-2022-10-04' of git://git.kernel.dk/linux

Pull passthrough updates from Jens Axboe:
"With these changes, passthrough NVMe support over io_uring now
performs at the same level as block device O_DIRECT, and in many cases
6-8% better.

This contains:

- Add support for fixed buffers for passthrough (Anuj, Kanchan)

- Enable batched allocations and freeing on passthrough, similarly to
what we support on the normal storage path (me)

- Fix from Geert fixing an issue with !CONFIG_IO_URING"

* tag 'for-6.1/passthrough-2022-10-04' of git://git.kernel.dk/linux:
io_uring: Add missing inline to io_uring_cmd_import_fixed() dummy
nvme: wire up fixed buffer support for nvme passthrough
nvme: pass ubuffer as an integer
block: extend functionality to map bvec iterator
block: factor out blk_rq_map_bio_alloc helper
block: rename bio_map_put to blk_mq_map_bio_put
nvme: refactor nvme_alloc_request
nvme: refactor nvme_add_user_metadata
nvme: Use blk_rq_map_user_io helper
scsi: Use blk_rq_map_user_io helper
block: add blk_rq_map_user_io
io_uring: introduce fixed buffer support for io_uring_cmd
io_uring: add io_uring_cmd_import_fixed
nvme: enable batched completions of passthrough IO
nvme: split out metadata vs non metadata end_io uring_cmd completions
block: allow end_io based requests in the completion batch handling
block: change request end_io handler to pass back a return value
block: enable batched allocation for blk_mq_alloc_request()
block: kill deprecated BUG_ON() in the flush handling

+478 -183
+7 -4
block/blk-flush.c
··· 205 205 * flush data request completion path. Restore @rq for 206 206 * normal completion and end it. 207 207 */ 208 - BUG_ON(!list_empty(&rq->queuelist)); 209 208 list_del_init(&rq->flush.list); 210 209 blk_flush_restore_request(rq); 211 210 blk_mq_end_request(rq, error); ··· 217 218 blk_kick_flush(q, fq, cmd_flags); 218 219 } 219 220 220 - static void flush_end_io(struct request *flush_rq, blk_status_t error) 221 + static enum rq_end_io_ret flush_end_io(struct request *flush_rq, 222 + blk_status_t error) 221 223 { 222 224 struct request_queue *q = flush_rq->q; 223 225 struct list_head *running; ··· 232 232 if (!req_ref_put_and_test(flush_rq)) { 233 233 fq->rq_status = error; 234 234 spin_unlock_irqrestore(&fq->mq_flush_lock, flags); 235 - return; 235 + return RQ_END_IO_NONE; 236 236 } 237 237 238 238 blk_account_io_flush(flush_rq); ··· 269 269 } 270 270 271 271 spin_unlock_irqrestore(&fq->mq_flush_lock, flags); 272 + return RQ_END_IO_NONE; 272 273 } 273 274 274 275 bool is_flush_rq(struct request *rq) ··· 355 354 blk_flush_queue_rq(flush_rq, false); 356 355 } 357 356 358 - static void mq_flush_data_end_io(struct request *rq, blk_status_t error) 357 + static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq, 358 + blk_status_t error) 359 359 { 360 360 struct request_queue *q = rq->q; 361 361 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; ··· 378 376 spin_unlock_irqrestore(&fq->mq_flush_lock, flags); 379 377 380 378 blk_mq_sched_restart(hctx); 379 + return RQ_END_IO_NONE; 381 380 } 382 381 383 382 /**
+134 -20
block/blk-map.c
··· 231 231 return ret; 232 232 } 233 233 234 - static void bio_map_put(struct bio *bio) 234 + static void blk_mq_map_bio_put(struct bio *bio) 235 235 { 236 236 if (bio->bi_opf & REQ_ALLOC_CACHE) { 237 237 bio_put(bio); ··· 239 239 bio_uninit(bio); 240 240 kfree(bio); 241 241 } 242 + } 243 + 244 + static struct bio *blk_rq_map_bio_alloc(struct request *rq, 245 + unsigned int nr_vecs, gfp_t gfp_mask) 246 + { 247 + struct bio *bio; 248 + 249 + if (rq->cmd_flags & REQ_POLLED) { 250 + blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE; 251 + 252 + bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask, 253 + &fs_bio_set); 254 + if (!bio) 255 + return NULL; 256 + } else { 257 + bio = bio_kmalloc(nr_vecs, gfp_mask); 258 + if (!bio) 259 + return NULL; 260 + bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); 261 + } 262 + return bio; 242 263 } 243 264 244 265 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, ··· 274 253 if (!iov_iter_count(iter)) 275 254 return -EINVAL; 276 255 277 - if (rq->cmd_flags & REQ_POLLED) { 278 - blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE; 279 - 280 - bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask, 281 - &fs_bio_set); 282 - if (!bio) 283 - return -ENOMEM; 284 - } else { 285 - bio = bio_kmalloc(nr_vecs, gfp_mask); 286 - if (!bio) 287 - return -ENOMEM; 288 - bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); 289 - } 256 + bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); 257 + if (bio == NULL) 258 + return -ENOMEM; 290 259 291 260 while (iov_iter_count(iter)) { 292 261 struct page **pages, *stack_pages[UIO_FASTIOV]; ··· 342 331 343 332 out_unmap: 344 333 bio_release_pages(bio, false); 345 - bio_map_put(bio); 334 + blk_mq_map_bio_put(bio); 346 335 return ret; 347 336 } 348 337 ··· 548 537 } 549 538 EXPORT_SYMBOL(blk_rq_append_bio); 550 539 540 + /* Prepare bio for passthrough IO given ITER_BVEC iter */ 541 + static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) 542 + { 543 + struct request_queue *q = rq->q; 544 + size_t nr_iter = iov_iter_count(iter); 545 + size_t nr_segs = iter->nr_segs; 546 + struct bio_vec *bvecs, *bvprvp = NULL; 547 + struct queue_limits *lim = &q->limits; 548 + unsigned int nsegs = 0, bytes = 0; 549 + struct bio *bio; 550 + size_t i; 551 + 552 + if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q)) 553 + return -EINVAL; 554 + if (nr_segs > queue_max_segments(q)) 555 + return -EINVAL; 556 + 557 + /* no iovecs to alloc, as we already have a BVEC iterator */ 558 + bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL); 559 + if (bio == NULL) 560 + return -ENOMEM; 561 + 562 + bio_iov_bvec_set(bio, (struct iov_iter *)iter); 563 + blk_rq_bio_prep(rq, bio, nr_segs); 564 + 565 + /* loop to perform a bunch of sanity checks */ 566 + bvecs = (struct bio_vec *)iter->bvec; 567 + for (i = 0; i < nr_segs; i++) { 568 + struct bio_vec *bv = &bvecs[i]; 569 + 570 + /* 571 + * If the queue doesn't support SG gaps and adding this 572 + * offset would create a gap, fallback to copy. 573 + */ 574 + if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) { 575 + blk_mq_map_bio_put(bio); 576 + return -EREMOTEIO; 577 + } 578 + /* check full condition */ 579 + if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len) 580 + goto put_bio; 581 + if (bytes + bv->bv_len > nr_iter) 582 + goto put_bio; 583 + if (bv->bv_offset + bv->bv_len > PAGE_SIZE) 584 + goto put_bio; 585 + 586 + nsegs++; 587 + bytes += bv->bv_len; 588 + bvprvp = bv; 589 + } 590 + return 0; 591 + put_bio: 592 + blk_mq_map_bio_put(bio); 593 + return -EINVAL; 594 + } 595 + 551 596 /** 552 597 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 553 598 * @q: request queue where request should be inserted ··· 623 556 struct rq_map_data *map_data, 624 557 const struct iov_iter *iter, gfp_t gfp_mask) 625 558 { 626 - bool copy = false; 559 + bool copy = false, map_bvec = false; 627 560 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 628 561 struct bio *bio = NULL; 629 562 struct iov_iter i; 630 563 int ret = -EINVAL; 631 - 632 - if (!iter_is_iovec(iter)) 633 - goto fail; 634 564 635 565 if (map_data) 636 566 copy = true; ··· 635 571 copy = true; 636 572 else if (iov_iter_alignment(iter) & align) 637 573 copy = true; 574 + else if (iov_iter_is_bvec(iter)) 575 + map_bvec = true; 576 + else if (!iter_is_iovec(iter)) 577 + copy = true; 638 578 else if (queue_virt_boundary(q)) 639 579 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 580 + 581 + if (map_bvec) { 582 + ret = blk_rq_map_user_bvec(rq, iter); 583 + if (!ret) 584 + return 0; 585 + if (ret != -EREMOTEIO) 586 + goto fail; 587 + /* fall back to copying the data on limits mismatches */ 588 + copy = true; 589 + } 640 590 641 591 i = *iter; 642 592 do { ··· 689 611 } 690 612 EXPORT_SYMBOL(blk_rq_map_user); 691 613 614 + int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data, 615 + void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask, 616 + bool vec, int iov_count, bool check_iter_count, int rw) 617 + { 618 + int ret = 0; 619 + 620 + if (vec) { 621 + struct iovec fast_iov[UIO_FASTIOV]; 622 + struct iovec *iov = fast_iov; 623 + struct iov_iter iter; 624 + 625 + ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len, 626 + UIO_FASTIOV, &iov, &iter); 627 + if (ret < 0) 628 + return ret; 629 + 630 + if (iov_count) { 631 + /* SG_IO howto says that the shorter of the two wins */ 632 + iov_iter_truncate(&iter, buf_len); 633 + if (check_iter_count && !iov_iter_count(&iter)) { 634 + kfree(iov); 635 + return -EINVAL; 636 + } 637 + } 638 + 639 + ret = blk_rq_map_user_iov(req->q, req, map_data, &iter, 640 + gfp_mask); 641 + kfree(iov); 642 + } else if (buf_len) { 643 + ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len, 644 + gfp_mask); 645 + } 646 + return ret; 647 + } 648 + EXPORT_SYMBOL(blk_rq_map_user_io); 649 + 692 650 /** 693 651 * blk_rq_unmap_user - unmap a request with user data 694 652 * @bio: start of bio list ··· 750 636 751 637 next_bio = bio; 752 638 bio = bio->bi_next; 753 - bio_map_put(next_bio); 639 + blk_mq_map_bio_put(next_bio); 754 640 } 755 641 756 642 return ret;
+91 -16
block/blk-mq.c
··· 510 510 alloc_time_ns); 511 511 } 512 512 513 - struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, 514 - blk_mq_req_flags_t flags) 513 + static struct request *blk_mq_rq_cache_fill(struct request_queue *q, 514 + struct blk_plug *plug, 515 + blk_opf_t opf, 516 + blk_mq_req_flags_t flags) 515 517 { 516 518 struct blk_mq_alloc_data data = { 517 519 .q = q, 518 520 .flags = flags, 519 521 .cmd_flags = opf, 520 - .nr_tags = 1, 522 + .nr_tags = plug->nr_ios, 523 + .cached_rq = &plug->cached_rq, 521 524 }; 522 525 struct request *rq; 523 - int ret; 524 526 525 - ret = blk_queue_enter(q, flags); 526 - if (ret) 527 - return ERR_PTR(ret); 527 + if (blk_queue_enter(q, flags)) 528 + return NULL; 529 + 530 + plug->nr_ios = 1; 528 531 529 532 rq = __blk_mq_alloc_requests(&data); 530 - if (!rq) 531 - goto out_queue_exit; 533 + if (unlikely(!rq)) 534 + blk_queue_exit(q); 535 + return rq; 536 + } 537 + 538 + static struct request *blk_mq_alloc_cached_request(struct request_queue *q, 539 + blk_opf_t opf, 540 + blk_mq_req_flags_t flags) 541 + { 542 + struct blk_plug *plug = current->plug; 543 + struct request *rq; 544 + 545 + if (!plug) 546 + return NULL; 547 + if (rq_list_empty(plug->cached_rq)) { 548 + if (plug->nr_ios == 1) 549 + return NULL; 550 + rq = blk_mq_rq_cache_fill(q, plug, opf, flags); 551 + if (rq) 552 + goto got_it; 553 + return NULL; 554 + } 555 + rq = rq_list_peek(&plug->cached_rq); 556 + if (!rq || rq->q != q) 557 + return NULL; 558 + 559 + if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) 560 + return NULL; 561 + if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) 562 + return NULL; 563 + 564 + plug->cached_rq = rq_list_next(rq); 565 + got_it: 566 + rq->cmd_flags = opf; 567 + INIT_LIST_HEAD(&rq->queuelist); 568 + return rq; 569 + } 570 + 571 + struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, 572 + blk_mq_req_flags_t flags) 573 + { 574 + struct request *rq; 575 + 576 + rq = blk_mq_alloc_cached_request(q, opf, flags); 577 + if (!rq) { 578 + struct blk_mq_alloc_data data = { 579 + .q = q, 580 + .flags = flags, 581 + .cmd_flags = opf, 582 + .nr_tags = 1, 583 + }; 584 + int ret; 585 + 586 + ret = blk_queue_enter(q, flags); 587 + if (ret) 588 + return ERR_PTR(ret); 589 + 590 + rq = __blk_mq_alloc_requests(&data); 591 + if (!rq) 592 + goto out_queue_exit; 593 + } 532 594 rq->__data_len = 0; 533 595 rq->__sector = (sector_t) -1; 534 596 rq->bio = rq->biotail = NULL; ··· 823 761 * can find how many bytes remain in the request 824 762 * later. 825 763 */ 826 - req->bio = NULL; 827 - req->__data_len = 0; 764 + if (!req->end_io) { 765 + req->bio = NULL; 766 + req->__data_len = 0; 767 + } 828 768 } 829 769 830 770 /** ··· 1003 939 1004 940 if (rq->end_io) { 1005 941 rq_qos_done(rq->q, rq); 1006 - rq->end_io(rq, error); 942 + if (rq->end_io(rq, error) == RQ_END_IO_FREE) 943 + blk_mq_free_request(rq); 1007 944 } else { 1008 945 blk_mq_free_request(rq); 1009 946 } ··· 1056 991 __blk_mq_end_request_acct(rq, now); 1057 992 1058 993 rq_qos_done(rq->q, rq); 994 + 995 + /* 996 + * If end_io handler returns NONE, then it still has 997 + * ownership of the request. 998 + */ 999 + if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE) 1000 + continue; 1059 1001 1060 1002 WRITE_ONCE(rq->state, MQ_RQ_IDLE); 1061 1003 if (!req_ref_put_and_test(rq)) ··· 1305 1233 blk_status_t ret; 1306 1234 }; 1307 1235 1308 - static void blk_end_sync_rq(struct request *rq, blk_status_t ret) 1236 + static enum rq_end_io_ret blk_end_sync_rq(struct request *rq, blk_status_t ret) 1309 1237 { 1310 1238 struct blk_rq_wait *wait = rq->end_io_data; 1311 1239 1312 1240 wait->ret = ret; 1313 1241 complete(&wait->done); 1242 + return RQ_END_IO_NONE; 1314 1243 } 1315 1244 1316 1245 bool blk_rq_is_poll(struct request *rq) ··· 1545 1472 1546 1473 void blk_mq_put_rq_ref(struct request *rq) 1547 1474 { 1548 - if (is_flush_rq(rq)) 1549 - rq->end_io(rq, 0); 1550 - else if (req_ref_put_and_test(rq)) 1475 + if (is_flush_rq(rq)) { 1476 + if (rq->end_io(rq, 0) == RQ_END_IO_FREE) 1477 + blk_mq_free_request(rq); 1478 + } else if (req_ref_put_and_test(rq)) { 1551 1479 __blk_mq_free_request(rq); 1480 + } 1552 1481 } 1553 1482 1554 1483 static bool blk_mq_check_expired(struct request *rq, void *priv)
+3 -1
drivers/md/dm-rq.c
··· 292 292 dm_complete_request(rq, error); 293 293 } 294 294 295 - static void end_clone_request(struct request *clone, blk_status_t error) 295 + static enum rq_end_io_ret end_clone_request(struct request *clone, 296 + blk_status_t error) 296 297 { 297 298 struct dm_rq_target_io *tio = clone->end_io_data; 298 299 299 300 dm_complete_request(tio->orig, error); 301 + return RQ_END_IO_NONE; 300 302 } 301 303 302 304 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
+4 -2
drivers/nvme/host/core.c
··· 1172 1172 queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2); 1173 1173 } 1174 1174 1175 - static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) 1175 + static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, 1176 + blk_status_t status) 1176 1177 { 1177 1178 struct nvme_ctrl *ctrl = rq->end_io_data; 1178 1179 unsigned long flags; ··· 1185 1184 dev_err(ctrl->device, 1186 1185 "failed nvme_keep_alive_end_io error=%d\n", 1187 1186 status); 1188 - return; 1187 + return RQ_END_IO_NONE; 1189 1188 } 1190 1189 1191 1190 ctrl->comp_seen = false; ··· 1196 1195 spin_unlock_irqrestore(&ctrl->lock, flags); 1197 1196 if (startka) 1198 1197 nvme_queue_keep_alive_work(ctrl); 1198 + return RQ_END_IO_NONE; 1199 1199 } 1200 1200 1201 1201 static void nvme_keep_alive_work(struct work_struct *work)
+146 -81
drivers/nvme/host/ioctl.c
··· 20 20 return (void __user *)ptrval; 21 21 } 22 22 23 - static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, 24 - unsigned len, u32 seed, bool write) 23 + static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, 24 + unsigned len, u32 seed) 25 25 { 26 26 struct bio_integrity_payload *bip; 27 27 int ret = -ENOMEM; 28 28 void *buf; 29 + struct bio *bio = req->bio; 29 30 30 31 buf = kmalloc(len, GFP_KERNEL); 31 32 if (!buf) 32 33 goto out; 33 34 34 35 ret = -EFAULT; 35 - if (write && copy_from_user(buf, ubuf, len)) 36 + if ((req_op(req) == REQ_OP_DRV_OUT) && copy_from_user(buf, ubuf, len)) 36 37 goto out_free_meta; 37 38 38 39 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); ··· 46 45 bip->bip_iter.bi_sector = seed; 47 46 ret = bio_integrity_add_page(bio, virt_to_page(buf), len, 48 47 offset_in_page(buf)); 49 - if (ret == len) 50 - return buf; 51 - ret = -ENOMEM; 48 + if (ret != len) { 49 + ret = -ENOMEM; 50 + goto out_free_meta; 51 + } 52 + 53 + req->cmd_flags |= REQ_INTEGRITY; 54 + return buf; 52 55 out_free_meta: 53 56 kfree(buf); 54 57 out: ··· 70 65 } 71 66 72 67 static struct request *nvme_alloc_user_request(struct request_queue *q, 73 - struct nvme_command *cmd, void __user *ubuffer, 74 - unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 75 - u32 meta_seed, void **metap, unsigned timeout, bool vec, 76 - blk_opf_t rq_flags, blk_mq_req_flags_t blk_flags) 68 + struct nvme_command *cmd, blk_opf_t rq_flags, 69 + blk_mq_req_flags_t blk_flags) 77 70 { 78 - bool write = nvme_is_write(cmd); 79 - struct nvme_ns *ns = q->queuedata; 80 - struct block_device *bdev = ns ? ns->disk->part0 : NULL; 81 71 struct request *req; 82 - struct bio *bio = NULL; 83 - void *meta = NULL; 84 - int ret; 85 72 86 73 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 87 74 if (IS_ERR(req)) 88 75 return req; 89 76 nvme_init_request(req, cmd); 90 - 91 - if (timeout) 92 - req->timeout = timeout; 93 77 nvme_req(req)->flags |= NVME_REQ_USERCMD; 78 + return req; 79 + } 94 80 95 - if (ubuffer && bufflen) { 96 - if (!vec) 97 - ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, 98 - GFP_KERNEL); 99 - else { 100 - struct iovec fast_iov[UIO_FASTIOV]; 101 - struct iovec *iov = fast_iov; 102 - struct iov_iter iter; 81 + static int nvme_map_user_request(struct request *req, u64 ubuffer, 82 + unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 83 + u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, 84 + bool vec) 85 + { 86 + struct request_queue *q = req->q; 87 + struct nvme_ns *ns = q->queuedata; 88 + struct block_device *bdev = ns ? ns->disk->part0 : NULL; 89 + struct bio *bio = NULL; 90 + void *meta = NULL; 91 + int ret; 103 92 104 - ret = import_iovec(rq_data_dir(req), ubuffer, bufflen, 105 - UIO_FASTIOV, &iov, &iter); 106 - if (ret < 0) 107 - goto out; 108 - ret = blk_rq_map_user_iov(q, req, NULL, &iter, 109 - GFP_KERNEL); 110 - kfree(iov); 111 - } 112 - if (ret) 93 + if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 94 + struct iov_iter iter; 95 + 96 + /* fixedbufs is only for non-vectored io */ 97 + if (WARN_ON_ONCE(vec)) 98 + return -EINVAL; 99 + ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 100 + rq_data_dir(req), &iter, ioucmd); 101 + if (ret < 0) 113 102 goto out; 114 - bio = req->bio; 115 - if (bdev) 116 - bio_set_dev(bio, bdev); 117 - if (bdev && meta_buffer && meta_len) { 118 - meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, 119 - meta_seed, write); 120 - if (IS_ERR(meta)) { 121 - ret = PTR_ERR(meta); 122 - goto out_unmap; 123 - } 124 - req->cmd_flags |= REQ_INTEGRITY; 125 - *metap = meta; 126 - } 103 + ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 104 + } else { 105 + ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 106 + bufflen, GFP_KERNEL, vec, 0, 0, 107 + rq_data_dir(req)); 127 108 } 128 109 129 - return req; 110 + if (ret) 111 + goto out; 112 + bio = req->bio; 113 + if (bdev) 114 + bio_set_dev(bio, bdev); 115 + 116 + if (bdev && meta_buffer && meta_len) { 117 + meta = nvme_add_user_metadata(req, meta_buffer, meta_len, 118 + meta_seed); 119 + if (IS_ERR(meta)) { 120 + ret = PTR_ERR(meta); 121 + goto out_unmap; 122 + } 123 + *metap = meta; 124 + } 125 + 126 + return ret; 130 127 131 128 out_unmap: 132 129 if (bio) 133 130 blk_rq_unmap_user(bio); 134 131 out: 135 132 blk_mq_free_request(req); 136 - return ERR_PTR(ret); 133 + return ret; 137 134 } 138 135 139 136 static int nvme_submit_user_cmd(struct request_queue *q, 140 - struct nvme_command *cmd, void __user *ubuffer, 137 + struct nvme_command *cmd, u64 ubuffer, 141 138 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 142 139 u32 meta_seed, u64 *result, unsigned timeout, bool vec) 143 140 { ··· 150 143 u32 effects; 151 144 int ret; 152 145 153 - req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer, 154 - meta_len, meta_seed, &meta, timeout, vec, 0, 0); 146 + req = nvme_alloc_user_request(q, cmd, 0, 0); 155 147 if (IS_ERR(req)) 156 148 return PTR_ERR(req); 149 + 150 + req->timeout = timeout; 151 + if (ubuffer && bufflen) { 152 + ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 153 + meta_len, meta_seed, &meta, NULL, vec); 154 + if (ret) 155 + return ret; 156 + } 157 157 158 158 bio = req->bio; 159 159 ctrl = nvme_req(req)->ctrl; ··· 241 227 c.rw.appmask = cpu_to_le16(io.appmask); 242 228 243 229 return nvme_submit_user_cmd(ns->queue, &c, 244 - nvme_to_user_ptr(io.addr), length, 230 + io.addr, length, 245 231 metadata, meta_len, lower_32_bits(io.slba), NULL, 0, 246 232 false); 247 233 } ··· 295 281 timeout = msecs_to_jiffies(cmd.timeout_ms); 296 282 297 283 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 298 - nvme_to_user_ptr(cmd.addr), cmd.data_len, 284 + cmd.addr, cmd.data_len, 299 285 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 300 286 0, &result, timeout, false); 301 287 ··· 341 327 timeout = msecs_to_jiffies(cmd.timeout_ms); 342 328 343 329 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 344 - nvme_to_user_ptr(cmd.addr), cmd.data_len, 330 + cmd.addr, cmd.data_len, 345 331 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 346 332 0, &cmd.result, timeout, vec); 347 333 ··· 370 356 struct bio *bio; 371 357 struct request *req; 372 358 }; 373 - void *meta; /* kernel-resident buffer */ 374 - void __user *meta_buffer; 375 359 u32 meta_len; 360 + u32 nvme_status; 361 + union { 362 + struct { 363 + void *meta; /* kernel-resident buffer */ 364 + void __user *meta_buffer; 365 + }; 366 + u64 result; 367 + } u; 376 368 }; 377 369 378 370 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( ··· 387 367 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 388 368 } 389 369 390 - static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) 370 + static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) 391 371 { 392 372 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 393 373 struct request *req = pdu->req; 394 - struct bio *bio = req->bio; 395 374 int status; 396 375 u64 result; 397 376 ··· 401 382 402 383 result = le64_to_cpu(nvme_req(req)->result.u64); 403 384 404 - if (pdu->meta) 405 - status = nvme_finish_user_metadata(req, pdu->meta_buffer, 406 - pdu->meta, pdu->meta_len, status); 407 - if (bio) 408 - blk_rq_unmap_user(bio); 385 + if (pdu->meta_len) 386 + status = nvme_finish_user_metadata(req, pdu->u.meta_buffer, 387 + pdu->u.meta, pdu->meta_len, status); 388 + if (req->bio) 389 + blk_rq_unmap_user(req->bio); 409 390 blk_mq_free_request(req); 410 391 411 392 io_uring_cmd_done(ioucmd, status, result); 412 393 } 413 394 414 - static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err) 395 + static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) 396 + { 397 + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 398 + 399 + if (pdu->bio) 400 + blk_rq_unmap_user(pdu->bio); 401 + 402 + io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result); 403 + } 404 + 405 + static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 406 + blk_status_t err) 415 407 { 416 408 struct io_uring_cmd *ioucmd = req->end_io_data; 417 409 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 418 - /* extract bio before reusing the same field for request */ 419 - struct bio *bio = pdu->bio; 420 410 void *cookie = READ_ONCE(ioucmd->cookie); 421 411 422 - pdu->req = req; 423 - req->bio = bio; 412 + req->bio = pdu->bio; 413 + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 414 + pdu->nvme_status = -EINTR; 415 + else 416 + pdu->nvme_status = nvme_req(req)->status; 417 + pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); 424 418 425 419 /* 426 420 * For iopoll, complete it directly. ··· 443 411 nvme_uring_task_cb(ioucmd); 444 412 else 445 413 io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); 414 + 415 + return RQ_END_IO_FREE; 416 + } 417 + 418 + static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, 419 + blk_status_t err) 420 + { 421 + struct io_uring_cmd *ioucmd = req->end_io_data; 422 + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 423 + void *cookie = READ_ONCE(ioucmd->cookie); 424 + 425 + req->bio = pdu->bio; 426 + pdu->req = req; 427 + 428 + /* 429 + * For iopoll, complete it directly. 430 + * Otherwise, move the completion to task work. 431 + */ 432 + if (cookie != NULL && blk_rq_is_poll(req)) 433 + nvme_uring_task_meta_cb(ioucmd); 434 + else 435 + io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); 436 + 437 + return RQ_END_IO_NONE; 446 438 } 447 439 448 440 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ··· 481 425 blk_opf_t rq_flags = 0; 482 426 blk_mq_req_flags_t blk_flags = 0; 483 427 void *meta = NULL; 428 + int ret; 484 429 485 430 if (!capable(CAP_SYS_ADMIN)) 486 431 return -EACCES; ··· 521 464 rq_flags |= REQ_POLLED; 522 465 523 466 retry: 524 - req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr), 525 - d.data_len, nvme_to_user_ptr(d.metadata), 526 - d.metadata_len, 0, &meta, d.timeout_ms ? 527 - msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags, 528 - blk_flags); 467 + req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 529 468 if (IS_ERR(req)) 530 469 return PTR_ERR(req); 531 - req->end_io = nvme_uring_cmd_end_io; 532 - req->end_io_data = ioucmd; 470 + req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 471 + 472 + if (d.addr && d.data_len) { 473 + ret = nvme_map_user_request(req, d.addr, 474 + d.data_len, nvme_to_user_ptr(d.metadata), 475 + d.metadata_len, 0, &meta, ioucmd, vec); 476 + if (ret) 477 + return ret; 478 + } 533 479 534 480 if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) { 535 481 if (unlikely(!req->bio)) { ··· 547 487 } 548 488 /* to free bio on completion, as req->bio will be null at that time */ 549 489 pdu->bio = req->bio; 550 - pdu->meta = meta; 551 - pdu->meta_buffer = nvme_to_user_ptr(d.metadata); 552 490 pdu->meta_len = d.metadata_len; 553 - 491 + req->end_io_data = ioucmd; 492 + if (pdu->meta_len) { 493 + pdu->u.meta = meta; 494 + pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata); 495 + req->end_io = nvme_uring_cmd_end_io_meta; 496 + } else { 497 + req->end_io = nvme_uring_cmd_end_io; 498 + } 554 499 blk_execute_rq_nowait(req, false); 555 500 return -EIOCBQUEUED; 556 501 }
+8 -4
drivers/nvme/host/pci.c
··· 1268 1268 return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); 1269 1269 } 1270 1270 1271 - static void abort_endio(struct request *req, blk_status_t error) 1271 + static enum rq_end_io_ret abort_endio(struct request *req, blk_status_t error) 1272 1272 { 1273 1273 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 1274 1274 ··· 1276 1276 "Abort status: 0x%x", nvme_req(req)->status); 1277 1277 atomic_inc(&nvmeq->dev->ctrl.abort_limit); 1278 1278 blk_mq_free_request(req); 1279 + return RQ_END_IO_NONE; 1279 1280 } 1280 1281 1281 1282 static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) ··· 2448 2447 return result; 2449 2448 } 2450 2449 2451 - static void nvme_del_queue_end(struct request *req, blk_status_t error) 2450 + static enum rq_end_io_ret nvme_del_queue_end(struct request *req, 2451 + blk_status_t error) 2452 2452 { 2453 2453 struct nvme_queue *nvmeq = req->end_io_data; 2454 2454 2455 2455 blk_mq_free_request(req); 2456 2456 complete(&nvmeq->delete_done); 2457 + return RQ_END_IO_NONE; 2457 2458 } 2458 2459 2459 - static void nvme_del_cq_end(struct request *req, blk_status_t error) 2460 + static enum rq_end_io_ret nvme_del_cq_end(struct request *req, 2461 + blk_status_t error) 2460 2462 { 2461 2463 struct nvme_queue *nvmeq = req->end_io_data; 2462 2464 2463 2465 if (error) 2464 2466 set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); 2465 2467 2466 - nvme_del_queue_end(req, error); 2468 + return nvme_del_queue_end(req, error); 2467 2469 } 2468 2470 2469 2471 static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
+3 -2
drivers/nvme/target/passthru.c
··· 245 245 nvme_passthru_end(ctrl, effects, req->cmd, status); 246 246 } 247 247 248 - static void nvmet_passthru_req_done(struct request *rq, 249 - blk_status_t blk_status) 248 + static enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq, 249 + blk_status_t blk_status) 250 250 { 251 251 struct nvmet_req *req = rq->end_io_data; 252 252 253 253 req->cqe->result = nvme_req(rq)->result; 254 254 nvmet_req_complete(req, nvme_req(rq)->status); 255 255 blk_mq_free_request(rq); 256 + return RQ_END_IO_NONE; 256 257 } 257 258 258 259 static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
+3 -1
drivers/scsi/scsi_error.c
··· 2004 2004 } 2005 2005 } 2006 2006 2007 - static void eh_lock_door_done(struct request *req, blk_status_t status) 2007 + static enum rq_end_io_ret eh_lock_door_done(struct request *req, 2008 + blk_status_t status) 2008 2009 { 2009 2010 blk_mq_free_request(req); 2011 + return RQ_END_IO_NONE; 2010 2012 } 2011 2013 2012 2014 /**
+3 -19
drivers/scsi/scsi_ioctl.c
··· 449 449 if (ret < 0) 450 450 goto out_put_request; 451 451 452 - ret = 0; 453 - if (hdr->iovec_count && hdr->dxfer_len) { 454 - struct iov_iter i; 455 - struct iovec *iov = NULL; 456 - 457 - ret = import_iovec(rq_data_dir(rq), hdr->dxferp, 458 - hdr->iovec_count, 0, &iov, &i); 459 - if (ret < 0) 460 - goto out_put_request; 461 - 462 - /* SG_IO howto says that the shorter of the two wins */ 463 - iov_iter_truncate(&i, hdr->dxfer_len); 464 - 465 - ret = blk_rq_map_user_iov(rq->q, rq, NULL, &i, GFP_KERNEL); 466 - kfree(iov); 467 - } else if (hdr->dxfer_len) 468 - ret = blk_rq_map_user(rq->q, rq, NULL, hdr->dxferp, 469 - hdr->dxfer_len, GFP_KERNEL); 470 - 452 + ret = blk_rq_map_user_io(rq, NULL, hdr->dxferp, hdr->dxfer_len, 453 + GFP_KERNEL, hdr->iovec_count && hdr->dxfer_len, 454 + hdr->iovec_count, 0, rq_data_dir(rq)); 471 455 if (ret) 472 456 goto out_put_request; 473 457
+7 -24
drivers/scsi/sg.c
··· 177 177 } Sg_device; 178 178 179 179 /* tasklet or soft irq callback */ 180 - static void sg_rq_end_io(struct request *rq, blk_status_t status); 180 + static enum rq_end_io_ret sg_rq_end_io(struct request *rq, blk_status_t status); 181 181 static int sg_start_req(Sg_request *srp, unsigned char *cmd); 182 182 static int sg_finish_rem_req(Sg_request * srp); 183 183 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); ··· 1311 1311 * This function is a "bottom half" handler that is called by the mid 1312 1312 * level when a command is completed (or has failed). 1313 1313 */ 1314 - static void 1314 + static enum rq_end_io_ret 1315 1315 sg_rq_end_io(struct request *rq, blk_status_t status) 1316 1316 { 1317 1317 struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); ··· 1324 1324 int result, resid, done = 1; 1325 1325 1326 1326 if (WARN_ON(srp->done != 0)) 1327 - return; 1327 + return RQ_END_IO_NONE; 1328 1328 1329 1329 sfp = srp->parentfp; 1330 1330 if (WARN_ON(sfp == NULL)) 1331 - return; 1331 + return RQ_END_IO_NONE; 1332 1332 1333 1333 sdp = sfp->parentdp; 1334 1334 if (unlikely(atomic_read(&sdp->detaching))) ··· 1406 1406 INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext); 1407 1407 schedule_work(&srp->ew.work); 1408 1408 } 1409 + return RQ_END_IO_NONE; 1409 1410 } 1410 1411 1411 1412 static const struct file_operations sg_fops = { ··· 1804 1803 md->from_user = 0; 1805 1804 } 1806 1805 1807 - if (iov_count) { 1808 - struct iovec *iov = NULL; 1809 - struct iov_iter i; 1810 - 1811 - res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i); 1812 - if (res < 0) 1813 - return res; 1814 - 1815 - iov_iter_truncate(&i, hp->dxfer_len); 1816 - if (!iov_iter_count(&i)) { 1817 - kfree(iov); 1818 - return -EINVAL; 1819 - } 1820 - 1821 - res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC); 1822 - kfree(iov); 1823 - } else 1824 - res = blk_rq_map_user(q, rq, md, hp->dxferp, 1825 - hp->dxfer_len, GFP_ATOMIC); 1826 - 1806 + res = blk_rq_map_user_io(rq, md, hp->dxferp, hp->dxfer_len, 1807 + GFP_ATOMIC, iov_count, iov_count, 1, rw); 1827 1808 if (!res) { 1828 1809 srp->bio = rq->bio; 1829 1810
+3 -1
drivers/scsi/st.c
··· 512 512 atomic64_dec(&STp->stats->in_flight); 513 513 } 514 514 515 - static void st_scsi_execute_end(struct request *req, blk_status_t status) 515 + static enum rq_end_io_ret st_scsi_execute_end(struct request *req, 516 + blk_status_t status) 516 517 { 517 518 struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); 518 519 struct st_request *SRpnt = req->end_io_data; ··· 533 532 534 533 blk_rq_unmap_user(tmp); 535 534 blk_mq_free_request(req); 535 + return RQ_END_IO_NONE; 536 536 } 537 537 538 538 static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
+4 -2
drivers/target/target_core_pscsi.c
··· 39 39 } 40 40 41 41 static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd); 42 - static void pscsi_req_done(struct request *, blk_status_t); 42 + static enum rq_end_io_ret pscsi_req_done(struct request *, blk_status_t); 43 43 44 44 /* pscsi_attach_hba(): 45 45 * ··· 1002 1002 return 0; 1003 1003 } 1004 1004 1005 - static void pscsi_req_done(struct request *req, blk_status_t status) 1005 + static enum rq_end_io_ret pscsi_req_done(struct request *req, 1006 + blk_status_t status) 1006 1007 { 1007 1008 struct se_cmd *cmd = req->end_io_data; 1008 1009 struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); ··· 1030 1029 } 1031 1030 1032 1031 blk_mq_free_request(req); 1032 + return RQ_END_IO_NONE; 1033 1033 } 1034 1034 1035 1035 static const struct target_backend_ops pscsi_ops = {
+6 -2
drivers/ufs/core/ufshpb.c
··· 613 613 srgn->srgn_state = HPB_SRGN_VALID; 614 614 } 615 615 616 - static void ufshpb_umap_req_compl_fn(struct request *req, blk_status_t error) 616 + static enum rq_end_io_ret ufshpb_umap_req_compl_fn(struct request *req, 617 + blk_status_t error) 617 618 { 618 619 struct ufshpb_req *umap_req = (struct ufshpb_req *)req->end_io_data; 619 620 620 621 ufshpb_put_req(umap_req->hpb, umap_req); 622 + return RQ_END_IO_NONE; 621 623 } 622 624 623 - static void ufshpb_map_req_compl_fn(struct request *req, blk_status_t error) 625 + static enum rq_end_io_ret ufshpb_map_req_compl_fn(struct request *req, 626 + blk_status_t error) 624 627 { 625 628 struct ufshpb_req *map_req = (struct ufshpb_req *) req->end_io_data; 626 629 struct ufshpb_lu *hpb = map_req->hpb; ··· 639 636 spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); 640 637 641 638 ufshpb_put_map_req(map_req->hpb, map_req); 639 + return RQ_END_IO_NONE; 642 640 } 643 641 644 642 static void ufshpb_set_unmap_cmd(unsigned char *cdb, struct ufshpb_region *rgn)
+10 -2
include/linux/blk-mq.h
··· 14 14 #define BLKDEV_MIN_RQ 4 15 15 #define BLKDEV_DEFAULT_RQ 128 16 16 17 - typedef void (rq_end_io_fn)(struct request *, blk_status_t); 17 + enum rq_end_io_ret { 18 + RQ_END_IO_NONE, 19 + RQ_END_IO_FREE, 20 + }; 21 + 22 + typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); 18 23 19 24 /* 20 25 * request flags */ ··· 853 848 struct io_comp_batch *iob, int ioerror, 854 849 void (*complete)(struct io_comp_batch *)) 855 850 { 856 - if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror) 851 + if (!iob || (req->rq_flags & RQF_ELV) || ioerror) 857 852 return false; 853 + 858 854 if (!iob->complete) 859 855 iob->complete = complete; 860 856 else if (iob->complete != complete) ··· 985 979 986 980 int blk_rq_map_user(struct request_queue *, struct request *, 987 981 struct rq_map_data *, void __user *, unsigned long, gfp_t); 982 + int blk_rq_map_user_io(struct request *, struct rq_map_data *, 983 + void __user *, unsigned long, gfp_t, bool, int, bool, int); 988 984 int blk_rq_map_user_iov(struct request_queue *, struct request *, 989 985 struct rq_map_data *, const struct iov_iter *, gfp_t); 990 986 int blk_rq_unmap_user(struct bio *);
+9 -1
include/linux/io_uring.h
··· 4 4 5 5 #include <linux/sched.h> 6 6 #include <linux/xarray.h> 7 + #include <uapi/linux/io_uring.h> 7 8 8 9 enum io_uring_cmd_flags { 9 10 IO_URING_F_COMPLETE_DEFER = 1, ··· 28 27 void *cookie; 29 28 }; 30 29 u32 cmd_op; 31 - u32 pad; 30 + u32 flags; 32 31 u8 pdu[32]; /* available inline for free use */ 33 32 }; 34 33 35 34 #if defined(CONFIG_IO_URING) 35 + int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, 36 + struct iov_iter *iter, void *ioucmd); 36 37 void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); 37 38 void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, 38 39 void (*task_work_cb)(struct io_uring_cmd *)); ··· 62 59 __io_uring_free(tsk); 63 60 } 64 61 #else 62 + static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, 63 + struct iov_iter *iter, void *ioucmd) 64 + { 65 + return -EOPNOTSUPP; 66 + } 65 67 static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, 66 68 ssize_t ret2) 67 69 {
+9
include/uapi/linux/io_uring.h
··· 56 56 __u32 hardlink_flags; 57 57 __u32 xattr_flags; 58 58 __u32 msg_ring_flags; 59 + __u32 uring_cmd_flags; 59 60 }; 60 61 __u64 user_data; /* data to be passed back at completion time */ 61 62 /* pack this to avoid bogus arm OABI complaints */ ··· 219 218 /* this goes last, obviously */ 220 219 IORING_OP_LAST, 221 220 }; 221 + 222 + /* 223 + * sqe->uring_cmd_flags 224 + * IORING_URING_CMD_FIXED use registered buffer; pass thig flag 225 + * along with setting sqe->buf_index. 226 + */ 227 + #define IORING_URING_CMD_FIXED (1U << 0) 228 + 222 229 223 230 /* 224 231 * sqe->fsync_flags
+28 -1
io_uring/uring_cmd.c
··· 4 4 #include <linux/file.h> 5 5 #include <linux/io_uring.h> 6 6 #include <linux/security.h> 7 + #include <linux/nospec.h> 7 8 8 9 #include <uapi/linux/io_uring.h> 9 10 10 11 #include "io_uring.h" 12 + #include "rsrc.h" 11 13 #include "uring_cmd.h" 12 14 13 15 static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) ··· 78 76 { 79 77 struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); 80 78 81 - if (sqe->rw_flags || sqe->__pad1) 79 + if (sqe->__pad1) 82 80 return -EINVAL; 81 + 82 + ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags); 83 + if (ioucmd->flags & ~IORING_URING_CMD_FIXED) 84 + return -EINVAL; 85 + 86 + if (ioucmd->flags & IORING_URING_CMD_FIXED) { 87 + struct io_ring_ctx *ctx = req->ctx; 88 + u16 index; 89 + 90 + req->buf_index = READ_ONCE(sqe->buf_index); 91 + if (unlikely(req->buf_index >= ctx->nr_user_bufs)) 92 + return -EFAULT; 93 + index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); 94 + req->imu = ctx->user_bufs[index]; 95 + io_req_set_rsrc_node(req, ctx, 0); 96 + } 83 97 ioucmd->cmd = sqe->cmd; 84 98 ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); 85 99 return 0; ··· 147 129 148 130 return IOU_ISSUE_SKIP_COMPLETE; 149 131 } 132 + 133 + int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, 134 + struct iov_iter *iter, void *ioucmd) 135 + { 136 + struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); 137 + 138 + return io_import_fixed(rw, iter, req->imu, ubuf, len); 139 + } 140 + EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);