Merge tag 'for-linus-20190412' of git://git.kernel.dk/linux-block

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-20190412' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"Set of fixes that should go into this round. This pull is larger than
I'd like at this time, but there's really no specific reason for that.
Some are fixes for issues that went into this merge window, others are
not. Anyway, this contains:

- Hardware queue limiting for virtio-blk/scsi (Dongli)

- Multi-page bvec fixes for lightnvm pblk

- Multi-bio dio error fix (Jason)

- Remove the cache hint from the io_uring tool side, since we didn't
move forward with that (me)

- Make io_uring SETUP_SQPOLL root restricted (me)

- Fix leak of page in error handling for pc requests (Jérôme)

- Fix BFQ regression introduced in this merge window (Paolo)

- Fix break logic for bio segment iteration (Ming)

- Fix NVMe cancel request error handling (Ming)

- NVMe pull request with two fixes (Christoph):
- fix the initial CSN for nvme-fc (James)
- handle log page offsets properly in the target (Keith)"

* tag 'for-linus-20190412' of git://git.kernel.dk/linux-block:
block: fix the return errno for direct IO
nvmet: fix discover log page when offsets are used
nvme-fc: correct csn initialization and increments on error
block: do not leak memory in bio_copy_user_iov()
lightnvm: pblk: fix crash in pblk_end_partial_read due to multipage bvecs
nvme: cancel request synchronously
blk-mq: introduce blk_mq_complete_request_sync()
scsi: virtio_scsi: limit number of hw queues by nr_cpu_ids
virtio-blk: limit number of hw queues by nr_cpu_ids
block, bfq: fix use after free in bfq_bfqq_expire
io_uring: restrict IORING_SETUP_SQPOLL to root
tools/io_uring: remove IOCQE_FLAG_CACHEHIT
block: don't use for-inside-for in bio_for_each_segment_all

Linus Torvalds 7 years ago 4443f8e6 b60bc066

+173 -108

20 changed files

expand all collapse all

block

bfq-iosched.c

bfq-iosched.h

bfq-wf2q.c

bio.c

blk-mq.c

drivers

block

virtio_blk.c

lightnvm

pblk-read.c

nvme

host

core.c

fc.c

target

admin-cmd.c

discovery.c

nvmet.h

scsi

virtio_scsi.c

block_dev.c

io_uring.c

include

linux

bio.h

blk-mq.h

bvec.h

nvme.h

tools

io_uring

io_uring-bench.c

+7 -8

block/bfq-iosched.c

reviewed

··· 2822 2822 bfq_remove_request(q, rq); 2823 2823 } 2824 2824 2825 2825 - static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) 2825 2825 + static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) 2826 2826 { 2827 2827 /* 2828 2828 * If this bfqq is shared between multiple processes, check ··· 2855 2855 /* 2856 2856 * All in-service entities must have been properly deactivated 2857 2857 * or requeued before executing the next function, which 2858 2858 - * resets all in-service entites as no more in service. 2858 2858 + * resets all in-service entities as no more in service. This 2859 2859 + * may cause bfqq to be freed. If this happens, the next 2860 2860 + * function returns true. 2859 2861 */ 2860 2860 - __bfq_bfqd_reset_in_service(bfqd); 2862 2862 + return __bfq_bfqd_reset_in_service(bfqd); 2861 2863 } 2862 2864 2863 2865 /** ··· 3264 3262 bool slow; 3265 3263 unsigned long delta = 0; 3266 3264 struct bfq_entity *entity = &bfqq->entity; 3267 3267 - int ref; 3268 3265 3269 3266 /* 3270 3267 * Check whether the process is slow (see bfq_bfqq_is_slow). ··· 3348 3347 * reason. 3349 3348 */ 3350 3349 __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); 3351 3351 - ref = bfqq->ref; 3352 3352 - __bfq_bfqq_expire(bfqd, bfqq); 3353 3353 - 3354 3354 - if (ref == 1) /* bfqq is gone, no more actions on it */ 3350 3350 + if (__bfq_bfqq_expire(bfqd, bfqq)) 3351 3351 + /* bfqq is gone, no more actions on it */ 3355 3352 return; 3356 3353 3357 3354 bfqq->injected_service = 0;

+1 -1

block/bfq-iosched.h

reviewed

··· 995 995 bool ins_into_idle_tree); 996 996 bool next_queue_may_preempt(struct bfq_data *bfqd); 997 997 struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd); 998 998 - void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); 998 998 + bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); 999 999 void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, 1000 1000 bool ins_into_idle_tree, bool expiration); 1001 1001 void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);

+15 -2

block/bfq-wf2q.c

reviewed

··· 1605 1605 return bfqq; 1606 1606 } 1607 1607 1608 1608 - void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) 1608 1608 + /* returns true if the in-service queue gets freed */ 1609 1609 + bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) 1609 1610 { 1610 1611 struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue; 1611 1612 struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity; ··· 1630 1629 * service tree either, then release the service reference to 1631 1630 * the queue it represents (taken with bfq_get_entity). 1632 1631 */ 1633 1633 - if (!in_serv_entity->on_st) 1632 1632 + if (!in_serv_entity->on_st) { 1633 1633 + /* 1634 1634 + * If no process is referencing in_serv_bfqq any 1635 1635 + * longer, then the service reference may be the only 1636 1636 + * reference to the queue. If this is the case, then 1637 1637 + * bfqq gets freed here. 1638 1638 + */ 1639 1639 + int ref = in_serv_bfqq->ref; 1634 1640 bfq_put_queue(in_serv_bfqq); 1641 1641 + if (ref == 1) 1642 1642 + return true; 1643 1643 + } 1644 1644 + 1645 1645 + return false; 1635 1646 } 1636 1647 1637 1648 void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,

+4 -1

block/bio.c

reviewed

··· 1298 1298 } 1299 1299 } 1300 1300 1301 1301 - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) 1301 1301 + if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { 1302 1302 + if (!map_data) 1303 1303 + __free_page(page); 1302 1304 break; 1305 1305 + } 1303 1306 1304 1307 len -= bytes; 1305 1308 offset = 0;

block/blk-mq.c

reviewed

··· 654 654 } 655 655 EXPORT_SYMBOL(blk_mq_complete_request); 656 656 657 657 + void blk_mq_complete_request_sync(struct request *rq) 658 658 + { 659 659 + WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); 660 660 + rq->q->mq_ops->complete(rq); 661 661 + } 662 662 + EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync); 663 663 + 657 664 int blk_mq_request_started(struct request *rq) 658 665 { 659 666 return blk_mq_rq_state(rq) != MQ_RQ_IDLE;

drivers/block/virtio_blk.c

reviewed

··· 513 513 if (err) 514 514 num_vqs = 1; 515 515 516 516 + num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); 517 517 + 516 518 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 517 519 if (!vblk->vqs) 518 520 return -ENOMEM;

+28 -22

drivers/lightnvm/pblk-read.c

reviewed

··· 231 231 struct pblk_sec_meta *meta; 232 232 struct bio *new_bio = rqd->bio; 233 233 struct bio *bio = pr_ctx->orig_bio; 234 234 - struct bio_vec src_bv, dst_bv; 235 234 void *meta_list = rqd->meta_list; 236 236 - int bio_init_idx = pr_ctx->bio_init_idx; 237 235 unsigned long *read_bitmap = pr_ctx->bitmap; 236 236 + struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT; 237 237 + struct bvec_iter new_iter = BVEC_ITER_ALL_INIT; 238 238 int nr_secs = pr_ctx->orig_nr_secs; 239 239 int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); 240 240 void *src_p, *dst_p; 241 241 - int hole, i; 241 241 + int bit, i; 242 242 243 243 if (unlikely(nr_holes == 1)) { 244 244 struct ppa_addr ppa; ··· 257 257 258 258 /* Fill the holes in the original bio */ 259 259 i = 0; 260 260 - hole = find_first_zero_bit(read_bitmap, nr_secs); 261 261 - do { 262 262 - struct pblk_line *line; 260 260 + for (bit = 0; bit < nr_secs; bit++) { 261 261 + if (!test_bit(bit, read_bitmap)) { 262 262 + struct bio_vec dst_bv, src_bv; 263 263 + struct pblk_line *line; 263 264 264 264 - line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); 265 265 - kref_put(&line->ref, pblk_line_put); 265 265 + line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); 266 266 + kref_put(&line->ref, pblk_line_put); 266 267 267 267 - meta = pblk_get_meta(pblk, meta_list, hole); 268 268 - meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); 268 268 + meta = pblk_get_meta(pblk, meta_list, bit); 269 269 + meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); 269 270 270 270 - src_bv = new_bio->bi_io_vec[i++]; 271 271 - dst_bv = bio->bi_io_vec[bio_init_idx + hole]; 271 271 + dst_bv = bio_iter_iovec(bio, orig_iter); 272 272 + src_bv = bio_iter_iovec(new_bio, new_iter); 272 273 273 273 - src_p = kmap_atomic(src_bv.bv_page); 274 274 - dst_p = kmap_atomic(dst_bv.bv_page); 274 274 + src_p = kmap_atomic(src_bv.bv_page); 275 275 + dst_p = kmap_atomic(dst_bv.bv_page); 275 276 276 276 - memcpy(dst_p + dst_bv.bv_offset, 277 277 - src_p + src_bv.bv_offset, 278 278 - PBLK_EXPOSED_PAGE_SIZE); 277 277 + memcpy(dst_p + dst_bv.bv_offset, 278 278 + src_p + src_bv.bv_offset, 279 279 + PBLK_EXPOSED_PAGE_SIZE); 279 280 280 280 - kunmap_atomic(src_p); 281 281 - kunmap_atomic(dst_p); 281 281 + kunmap_atomic(src_p); 282 282 + kunmap_atomic(dst_p); 282 283 283 283 - mempool_free(src_bv.bv_page, &pblk->page_bio_pool); 284 284 + flush_dcache_page(dst_bv.bv_page); 285 285 + mempool_free(src_bv.bv_page, &pblk->page_bio_pool); 284 286 285 285 - hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); 286 286 - } while (hole < nr_secs); 287 287 + bio_advance_iter(new_bio, &new_iter, 288 288 + PBLK_EXPOSED_PAGE_SIZE); 289 289 + i++; 290 290 + } 291 291 + bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE); 292 292 + } 287 293 288 294 bio_put(new_bio); 289 295 kfree(pr_ctx);

+1 -1

drivers/nvme/host/core.c

reviewed

··· 288 288 "Cancelling I/O %d", req->tag); 289 289 290 290 nvme_req(req)->status = NVME_SC_ABORT_REQ; 291 291 - blk_mq_complete_request(req); 291 291 + blk_mq_complete_request_sync(req); 292 292 return true; 293 293 } 294 294 EXPORT_SYMBOL_GPL(nvme_cancel_request);

+15 -5

drivers/nvme/host/fc.c

reviewed

··· 1845 1845 memset(queue, 0, sizeof(*queue)); 1846 1846 queue->ctrl = ctrl; 1847 1847 queue->qnum = idx; 1848 1848 - atomic_set(&queue->csn, 1); 1848 1848 + atomic_set(&queue->csn, 0); 1849 1849 queue->dev = ctrl->dev; 1850 1850 1851 1851 if (idx > 0) ··· 1887 1887 */ 1888 1888 1889 1889 queue->connection_id = 0; 1890 1890 - atomic_set(&queue->csn, 1); 1890 1890 + atomic_set(&queue->csn, 0); 1891 1891 } 1892 1892 1893 1893 static void ··· 2183 2183 { 2184 2184 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2185 2185 struct nvme_command *sqe = &cmdiu->sqe; 2186 2186 - u32 csn; 2187 2186 int ret, opstate; 2188 2187 2189 2188 /* ··· 2197 2198 2198 2199 /* format the FC-NVME CMD IU and fcp_req */ 2199 2200 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 2200 2200 - csn = atomic_inc_return(&queue->csn); 2201 2201 - cmdiu->csn = cpu_to_be32(csn); 2202 2201 cmdiu->data_len = cpu_to_be32(data_len); 2203 2202 switch (io_dir) { 2204 2203 case NVMEFC_FCP_WRITE: ··· 2254 2257 if (!(op->flags & FCOP_FLAGS_AEN)) 2255 2258 blk_mq_start_request(op->rq); 2256 2259 2260 2260 + cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn)); 2257 2261 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 2258 2262 &ctrl->rport->remoteport, 2259 2263 queue->lldd_handle, &op->fcp_req); 2260 2264 2261 2265 if (ret) { 2266 2266 + /* 2267 2267 + * If the lld fails to send the command is there an issue with 2268 2268 + * the csn value? If the command that fails is the Connect, 2269 2269 + * no - as the connection won't be live. If it is a command 2270 2270 + * post-connect, it's possible a gap in csn may be created. 2271 2271 + * Does this matter? As Linux initiators don't send fused 2272 2272 + * commands, no. The gap would exist, but as there's nothing 2273 2273 + * that depends on csn order to be delivered on the target 2274 2274 + * side, it shouldn't hurt. It would be difficult for a 2275 2275 + * target to even detect the csn gap as it has no idea when the 2276 2276 + * cmd with the csn was supposed to arrive. 2277 2277 + */ 2262 2278 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); 2263 2279 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 2264 2280

drivers/nvme/target/admin-cmd.c

reviewed

··· 24 24 return len; 25 25 } 26 26 27 27 + u64 nvmet_get_log_page_offset(struct nvme_command *cmd) 28 28 + { 29 29 + return le64_to_cpu(cmd->get_log_page.lpo); 30 30 + } 31 31 + 27 32 static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) 28 33 { 29 34 nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));

+44 -22

drivers/nvme/target/discovery.c

reviewed

··· 131 131 memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); 132 132 } 133 133 134 134 + static size_t discovery_log_entries(struct nvmet_req *req) 135 135 + { 136 136 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 137 137 + struct nvmet_subsys_link *p; 138 138 + struct nvmet_port *r; 139 139 + size_t entries = 0; 140 140 + 141 141 + list_for_each_entry(p, &req->port->subsystems, entry) { 142 142 + if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) 143 143 + continue; 144 144 + entries++; 145 145 + } 146 146 + list_for_each_entry(r, &req->port->referrals, entry) 147 147 + entries++; 148 148 + return entries; 149 149 + } 150 150 + 134 151 static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) 135 152 { 136 153 const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry); 137 154 struct nvmet_ctrl *ctrl = req->sq->ctrl; 138 155 struct nvmf_disc_rsp_page_hdr *hdr; 156 156 + u64 offset = nvmet_get_log_page_offset(req->cmd); 139 157 size_t data_len = nvmet_get_log_page_len(req->cmd); 140 140 - size_t alloc_len = max(data_len, sizeof(*hdr)); 141 141 - int residual_len = data_len - sizeof(*hdr); 158 158 + size_t alloc_len; 142 159 struct nvmet_subsys_link *p; 143 160 struct nvmet_port *r; 144 161 u32 numrec = 0; 145 162 u16 status = 0; 163 163 + void *buffer; 164 164 + 165 165 + /* Spec requires dword aligned offsets */ 166 166 + if (offset & 0x3) { 167 167 + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 168 168 + goto out; 169 169 + } 146 170 147 171 /* 148 172 * Make sure we're passing at least a buffer of response header size. 149 173 * If host provided data len is less than the header size, only the 150 174 * number of bytes requested by host will be sent to host. 151 175 */ 152 152 - hdr = kzalloc(alloc_len, GFP_KERNEL); 153 153 - if (!hdr) { 176 176 + down_read(&nvmet_config_sem); 177 177 + alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req); 178 178 + buffer = kzalloc(alloc_len, GFP_KERNEL); 179 179 + if (!buffer) { 180 180 + up_read(&nvmet_config_sem); 154 181 status = NVME_SC_INTERNAL; 155 182 goto out; 156 183 } 157 184 158 158 - down_read(&nvmet_config_sem); 185 185 + hdr = buffer; 159 186 list_for_each_entry(p, &req->port->subsystems, entry) { 187 187 + char traddr[NVMF_TRADDR_SIZE]; 188 188 + 160 189 if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) 161 190 continue; 162 162 - if (residual_len >= entry_size) { 163 163 - char traddr[NVMF_TRADDR_SIZE]; 164 191 165 165 - nvmet_set_disc_traddr(req, req->port, traddr); 166 166 - nvmet_format_discovery_entry(hdr, req->port, 167 167 - p->subsys->subsysnqn, traddr, 168 168 - NVME_NQN_NVME, numrec); 169 169 - residual_len -= entry_size; 170 170 - } 192 192 + nvmet_set_disc_traddr(req, req->port, traddr); 193 193 + nvmet_format_discovery_entry(hdr, req->port, 194 194 + p->subsys->subsysnqn, traddr, 195 195 + NVME_NQN_NVME, numrec); 171 196 numrec++; 172 197 } 173 198 174 199 list_for_each_entry(r, &req->port->referrals, entry) { 175 175 - if (residual_len >= entry_size) { 176 176 - nvmet_format_discovery_entry(hdr, r, 177 177 - NVME_DISC_SUBSYS_NAME, 178 178 - r->disc_addr.traddr, 179 179 - NVME_NQN_DISC, numrec); 180 180 - residual_len -= entry_size; 181 181 - } 200 200 + nvmet_format_discovery_entry(hdr, r, 201 201 + NVME_DISC_SUBSYS_NAME, 202 202 + r->disc_addr.traddr, 203 203 + NVME_NQN_DISC, numrec); 182 204 numrec++; 183 205 } 184 206 ··· 212 190 213 191 up_read(&nvmet_config_sem); 214 192 215 215 - status = nvmet_copy_to_sgl(req, 0, hdr, data_len); 216 216 - kfree(hdr); 193 193 + status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len); 194 194 + kfree(buffer); 217 195 out: 218 196 nvmet_req_complete(req, status); 219 197 }

drivers/nvme/target/nvmet.h

reviewed

··· 428 428 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len); 429 429 430 430 u32 nvmet_get_log_page_len(struct nvme_command *cmd); 431 431 + u64 nvmet_get_log_page_offset(struct nvme_command *cmd); 431 432 432 433 extern struct list_head *nvmet_ports; 433 434 void nvmet_port_disc_changed(struct nvmet_port *port,

drivers/scsi/virtio_scsi.c

reviewed

··· 793 793 794 794 /* We need to know how many queues before we allocate. */ 795 795 num_queues = virtscsi_config_get(vdev, num_queues) ? : 1; 796 796 + num_queues = min_t(unsigned int, nr_cpu_ids, num_queues); 796 797 797 798 num_targets = virtscsi_config_get(vdev, max_target) + 1; 798 799

+4 -4

fs/block_dev.c

reviewed

··· 307 307 struct blkdev_dio *dio = bio->bi_private; 308 308 bool should_dirty = dio->should_dirty; 309 309 310 310 - if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) { 311 311 - if (bio->bi_status && !dio->bio.bi_status) 312 312 - dio->bio.bi_status = bio->bi_status; 313 313 - } else { 310 310 + if (bio->bi_status && !dio->bio.bi_status) 311 311 + dio->bio.bi_status = bio->bi_status; 312 312 + 313 313 + if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) { 314 314 if (!dio->is_sync) { 315 315 struct kiocb *iocb = dio->iocb; 316 316 ssize_t ret;

fs/io_uring.c

reviewed

··· 2245 2245 goto err; 2246 2246 2247 2247 if (ctx->flags & IORING_SETUP_SQPOLL) { 2248 2248 + ret = -EPERM; 2249 2249 + if (!capable(CAP_SYS_ADMIN)) 2250 2250 + goto err; 2251 2251 + 2248 2252 if (p->flags & IORING_SETUP_SQ_AFF) { 2249 2253 int cpu; 2250 2254

+12 -8

include/linux/bio.h

reviewed

··· 120 120 return bio->bi_vcnt >= bio->bi_max_vecs; 121 121 } 122 122 123 123 - #define mp_bvec_for_each_segment(bv, bvl, i, iter_all) \ 124 124 - for (bv = bvec_init_iter_all(&iter_all); \ 125 125 - (iter_all.done < (bvl)->bv_len) && \ 126 126 - (mp_bvec_next_segment((bvl), &iter_all), 1); \ 127 127 - iter_all.done += bv->bv_len, i += 1) 123 123 + static inline bool bio_next_segment(const struct bio *bio, 124 124 + struct bvec_iter_all *iter) 125 125 + { 126 126 + if (iter->idx >= bio->bi_vcnt) 127 127 + return false; 128 128 + 129 129 + bvec_advance(&bio->bi_io_vec[iter->idx], iter); 130 130 + return true; 131 131 + } 128 132 129 133 /* 130 134 * drivers should _never_ use the all version - the bio may have been split 131 135 * before it got to the driver and the driver won't own all of it 132 136 */ 133 133 - #define bio_for_each_segment_all(bvl, bio, i, iter_all) \ 134 134 - for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++) \ 135 135 - mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all) 137 137 + #define bio_for_each_segment_all(bvl, bio, i, iter) \ 138 138 + for (i = 0, bvl = bvec_init_iter_all(&iter); \ 139 139 + bio_next_segment((bio), &iter); i++) 136 140 137 141 static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, 138 142 unsigned bytes)

include/linux/blk-mq.h

reviewed

··· 302 302 void blk_mq_kick_requeue_list(struct request_queue *q); 303 303 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); 304 304 bool blk_mq_complete_request(struct request *rq); 305 305 + void blk_mq_complete_request_sync(struct request *rq); 305 306 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, 306 307 struct bio *bio); 307 308 bool blk_mq_queue_stopped(struct request_queue *q);

+10 -4

include/linux/bvec.h

reviewed

··· 145 145 146 146 static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all) 147 147 { 148 148 - iter_all->bv.bv_page = NULL; 149 148 iter_all->done = 0; 149 149 + iter_all->idx = 0; 150 150 151 151 return &iter_all->bv; 152 152 } 153 153 154 154 - static inline void mp_bvec_next_segment(const struct bio_vec *bvec, 155 155 - struct bvec_iter_all *iter_all) 154 154 + static inline void bvec_advance(const struct bio_vec *bvec, 155 155 + struct bvec_iter_all *iter_all) 156 156 { 157 157 struct bio_vec *bv = &iter_all->bv; 158 158 159 159 - if (bv->bv_page) { 159 159 + if (iter_all->done) { 160 160 bv->bv_page = nth_page(bv->bv_page, 1); 161 161 bv->bv_offset = 0; 162 162 } else { ··· 165 165 } 166 166 bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, 167 167 bvec->bv_len - iter_all->done); 168 168 + iter_all->done += bv->bv_len; 169 169 + 170 170 + if (iter_all->done == bvec->bv_len) { 171 171 + iter_all->idx++; 172 172 + iter_all->done = 0; 173 173 + } 168 174 } 169 175 170 176 /*

+7 -2

include/linux/nvme.h

reviewed

··· 967 967 __le16 numdl; 968 968 __le16 numdu; 969 969 __u16 rsvd11; 970 970 - __le32 lpol; 971 971 - __le32 lpou; 970 970 + union { 971 971 + struct { 972 972 + __le32 lpol; 973 973 + __le32 lpou; 974 974 + }; 975 975 + __le64 lpo; 976 976 + }; 972 977 __u32 rsvd14[2]; 973 978 }; 974 979

+4 -28

tools/io_uring/io_uring-bench.c

reviewed

··· 32 32 #include "liburing.h" 33 33 #include "barrier.h" 34 34 35 35 - #ifndef IOCQE_FLAG_CACHEHIT 36 36 - #define IOCQE_FLAG_CACHEHIT (1U << 0) 37 37 - #endif 38 38 - 39 35 #define min(a, b) ((a < b) ? (a) : (b)) 40 36 41 37 struct io_sq_ring { ··· 81 85 unsigned long reaps; 82 86 unsigned long done; 83 87 unsigned long calls; 84 84 - unsigned long cachehit, cachemiss; 85 88 volatile int finish; 86 89 87 90 __s32 *fds; ··· 265 270 return -1; 266 271 } 267 272 } 268 268 - if (cqe->flags & IOCQE_FLAG_CACHEHIT) 269 269 - s->cachehit++; 270 270 - else 271 271 - s->cachemiss++; 272 273 reaped++; 273 274 head++; 274 275 } while (1); ··· 480 489 int main(int argc, char *argv[]) 481 490 { 482 491 struct submitter *s = &submitters[0]; 483 483 - unsigned long done, calls, reap, cache_hit, cache_miss; 492 492 + unsigned long done, calls, reap; 484 493 int err, i, flags, fd; 485 494 char *fdepths; 486 495 void *ret; ··· 560 569 pthread_create(&s->thread, NULL, submitter_fn, s); 561 570 562 571 fdepths = malloc(8 * s->nr_files); 563 563 - cache_hit = cache_miss = reap = calls = done = 0; 572 572 + reap = calls = done = 0; 564 573 do { 565 574 unsigned long this_done = 0; 566 575 unsigned long this_reap = 0; 567 576 unsigned long this_call = 0; 568 568 - unsigned long this_cache_hit = 0; 569 569 - unsigned long this_cache_miss = 0; 570 577 unsigned long rpc = 0, ipc = 0; 571 571 - double hit = 0.0; 572 578 573 579 sleep(1); 574 580 this_done += s->done; 575 581 this_call += s->calls; 576 582 this_reap += s->reaps; 577 577 - this_cache_hit += s->cachehit; 578 578 - this_cache_miss += s->cachemiss; 579 579 - if (this_cache_hit && this_cache_miss) { 580 580 - unsigned long hits, total; 581 581 - 582 582 - hits = this_cache_hit - cache_hit; 583 583 - total = hits + this_cache_miss - cache_miss; 584 584 - hit = (double) hits / (double) total; 585 585 - hit *= 100.0; 586 586 - } 587 583 if (this_call - calls) { 588 584 rpc = (this_done - done) / (this_call - calls); 589 585 ipc = (this_reap - reap) / (this_call - calls); 590 586 } else 591 587 rpc = ipc = -1; 592 588 file_depths(fdepths); 593 593 - printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s), Cachehit=%0.2f%%\n", 589 589 + printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n", 594 590 this_done - done, rpc, ipc, s->inflight, 595 595 - fdepths, hit); 591 591 + fdepths); 596 592 done = this_done; 597 593 calls = this_call; 598 594 reap = this_reap; 599 599 - cache_hit = s->cachehit; 600 600 - cache_miss = s->cachemiss; 601 595 } while (!finish); 602 596 603 597 pthread_join(s->thread, &ret);