Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull block IO fixes from Jens Axboe:
"Second round of updates and fixes for 3.14-rc2. Most of this stuff
has been queued up for a while. The notable exception is the blk-mq
changes, which are naturally a bit more in flux still.

The pull request contains:

- Two bug fixes for the new immutable vecs, causing crashes with raid
or swap. From Kent.

- Various blk-mq tweaks and fixes from Christoph. A fix for
integrity bio's from Nic.

- A few bcache fixes from Kent and Darrick Wong.

- xen-blk{front,back} fixes from David Vrabel, Matt Rushton, Nicolas
Swenson, and Roger Pau Monne.

- Fix for a vec miscount with integrity vectors from Martin.

- Minor annotations or fixes from Masanari Iida and Rashika Kheria.

- Tweak to null_blk to do more normal FIFO processing of requests
from Shlomo Pongratz.

- Elevator switching bypass fix from Tejun.

- Softlockup in blkdev_issue_discard() fix when !CONFIG_PREEMPT from
me"

* 'for-linus' of git://git.kernel.dk/linux-block: (31 commits)
block: add cond_resched() to potentially long running ioctl discard loop
xen-blkback: init persistent_purge_work work_struct
blk-mq: pair blk_mq_start_request / blk_mq_requeue_request
blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq
block: Fix cloning of discard/write same bios
block: Fix type mismatch in ssize_t_blk_mq_tag_sysfs_show
blk-mq: rework flush sequencing logic
null_blk: use blk_complete_request and blk_mq_complete_request
virtio_blk: use blk_mq_complete_request
blk-mq: rework I/O completions
fs: Add prototype declaration to appropriate header file include/linux/bio.h
fs: Mark function as static in fs/bio-integrity.c
block/null_blk: Fix completion processing from LIFO to FIFO
block: Explicitly handle discard/write same segments
block: Fix nr_vecs for inline integrity vectors
blk-mq: Add bio_integrity setup to blk_mq_make_request
blk-mq: initialize sg_reserved_size
blk-mq: handle dma_drain_size
blk-mq: divert __blk_put_request for MQ ops
blk-mq: support at_head inserations for blk_execute_rq
...

Linus Torvalds 12 years ago 5e57dc81 0d25e369

+398 -305

29 changed files

expand all

block

blk-core.c

blk-exec.c

blk-flush.c

blk-lib.c

blk-merge.c

blk-mq-tag.c

blk-mq.c

blk-mq.h

blk-sysfs.c

blk-timeout.c

blk.h

drivers

block

null_blk.c

virtio_blk.c

xen-blkback

blkback.c

common.h

xenbus.c

xen-blkfront.c

bcache

bcache.h

bset.c

btree.c

request.c

sysfs.c

bio-integrity.c

bio.c

include

linux

bio.h

blk-mq.h

blkdev.h

xen

interface

blkif.h

lib

percpu_ida.c

+17 -3

block/blk-core.c

··· 693 693 if (!uninit_q) 694 694 return NULL; 695 695 696 + uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); 697 + if (!uninit_q->flush_rq) 698 + goto out_cleanup_queue; 699 + 696 700 q = blk_init_allocated_queue(uninit_q, rfn, lock); 697 701 if (!q) 698 - blk_cleanup_queue(uninit_q); 699 - 702 + goto out_free_flush_rq; 700 703 return q; 704 + 705 + out_free_flush_rq: 706 + kfree(uninit_q->flush_rq); 707 + out_cleanup_queue: 708 + blk_cleanup_queue(uninit_q); 709 + return NULL; 701 710 } 702 711 EXPORT_SYMBOL(blk_init_queue_node); 703 712 ··· 1136 1127 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 1137 1128 { 1138 1129 if (q->mq_ops) 1139 - return blk_mq_alloc_request(q, rw, gfp_mask, false); 1130 + return blk_mq_alloc_request(q, rw, gfp_mask); 1140 1131 else 1141 1132 return blk_old_get_request(q, rw, gfp_mask); 1142 1133 } ··· 1286 1277 { 1287 1278 if (unlikely(!q)) 1288 1279 return; 1280 + 1281 + if (q->mq_ops) { 1282 + blk_mq_free_request(req); 1283 + return; 1284 + } 1289 1285 1290 1286 blk_pm_put_request(req); 1291 1287

+1 -1

block/blk-exec.c

··· 65 65 * be resued after dying flag is set 66 66 */ 67 67 if (q->mq_ops) { 68 - blk_mq_insert_request(q, rq, true); 68 + blk_mq_insert_request(q, rq, at_head, true); 69 69 return; 70 70 } 71 71

+37 -64

block/blk-flush.c

··· 130 130 blk_clear_rq_complete(rq); 131 131 } 132 132 133 - static void mq_flush_data_run(struct work_struct *work) 133 + static void mq_flush_run(struct work_struct *work) 134 134 { 135 135 struct request *rq; 136 136 137 - rq = container_of(work, struct request, mq_flush_data); 137 + rq = container_of(work, struct request, mq_flush_work); 138 138 139 139 memset(&rq->csd, 0, sizeof(rq->csd)); 140 140 blk_mq_run_request(rq, true, false); 141 141 } 142 142 143 - static void blk_mq_flush_data_insert(struct request *rq) 143 + static bool blk_flush_queue_rq(struct request *rq) 144 144 { 145 - INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); 146 - kblockd_schedule_work(rq->q, &rq->mq_flush_data); 145 + if (rq->q->mq_ops) { 146 + INIT_WORK(&rq->mq_flush_work, mq_flush_run); 147 + kblockd_schedule_work(rq->q, &rq->mq_flush_work); 148 + return false; 149 + } else { 150 + list_add_tail(&rq->queuelist, &rq->q->queue_head); 151 + return true; 152 + } 147 153 } 148 154 149 155 /** ··· 193 187 194 188 case REQ_FSEQ_DATA: 195 189 list_move_tail(&rq->flush.list, &q->flush_data_in_flight); 196 - if (q->mq_ops) 197 - blk_mq_flush_data_insert(rq); 198 - else { 199 - list_add(&rq->queuelist, &q->queue_head); 200 - queued = true; 201 - } 190 + queued = blk_flush_queue_rq(rq); 202 191 break; 203 192 204 193 case REQ_FSEQ_DONE: ··· 217 216 } 218 217 219 218 kicked = blk_kick_flush(q); 220 - /* blk_mq_run_flush will run queue */ 221 - if (q->mq_ops) 222 - return queued; 223 219 return kicked | queued; 224 220 } 225 221 ··· 228 230 struct request *rq, *n; 229 231 unsigned long flags = 0; 230 232 231 - if (q->mq_ops) { 232 - blk_mq_free_request(flush_rq); 233 + if (q->mq_ops) 233 234 spin_lock_irqsave(&q->mq_flush_lock, flags); 234 - } 235 + 235 236 running = &q->flush_queue[q->flush_running_idx]; 236 237 BUG_ON(q->flush_pending_idx == q->flush_running_idx); 237 238 ··· 260 263 * kblockd. 261 264 */ 262 265 if (queued || q->flush_queue_delayed) { 263 - if (!q->mq_ops) 264 - blk_run_queue_async(q); 265 - else 266 - /* 267 - * This can be optimized to only run queues with requests 268 - * queued if necessary. 269 - */ 270 - blk_mq_run_queues(q, true); 266 + WARN_ON(q->mq_ops); 267 + blk_run_queue_async(q); 271 268 } 272 269 q->flush_queue_delayed = 0; 273 270 if (q->mq_ops) 274 271 spin_unlock_irqrestore(&q->mq_flush_lock, flags); 275 - } 276 - 277 - static void mq_flush_work(struct work_struct *work) 278 - { 279 - struct request_queue *q; 280 - struct request *rq; 281 - 282 - q = container_of(work, struct request_queue, mq_flush_work); 283 - 284 - /* We don't need set REQ_FLUSH_SEQ, it's for consistency */ 285 - rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, 286 - __GFP_WAIT|GFP_ATOMIC, true); 287 - rq->cmd_type = REQ_TYPE_FS; 288 - rq->end_io = flush_end_io; 289 - 290 - blk_mq_run_request(rq, true, false); 291 - } 292 - 293 - /* 294 - * We can't directly use q->flush_rq, because it doesn't have tag and is not in 295 - * hctx->rqs[]. so we must allocate a new request, since we can't sleep here, 296 - * so offload the work to workqueue. 297 - * 298 - * Note: we assume a flush request finished in any hardware queue will flush 299 - * the whole disk cache. 300 - */ 301 - static void mq_run_flush(struct request_queue *q) 302 - { 303 - kblockd_schedule_work(q, &q->mq_flush_work); 304 272 } 305 273 306 274 /** ··· 302 340 * different from running_idx, which means flush is in flight. 303 341 */ 304 342 q->flush_pending_idx ^= 1; 343 + 305 344 if (q->mq_ops) { 306 - mq_run_flush(q); 307 - return true; 345 + struct blk_mq_ctx *ctx = first_rq->mq_ctx; 346 + struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); 347 + 348 + blk_mq_rq_init(hctx, q->flush_rq); 349 + q->flush_rq->mq_ctx = ctx; 350 + 351 + /* 352 + * Reuse the tag value from the fist waiting request, 353 + * with blk-mq the tag is generated during request 354 + * allocation and drivers can rely on it being inside 355 + * the range they asked for. 356 + */ 357 + q->flush_rq->tag = first_rq->tag; 358 + } else { 359 + blk_rq_init(q, q->flush_rq); 308 360 } 309 361 310 - blk_rq_init(q, &q->flush_rq); 311 - q->flush_rq.cmd_type = REQ_TYPE_FS; 312 - q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; 313 - q->flush_rq.rq_disk = first_rq->rq_disk; 314 - q->flush_rq.end_io = flush_end_io; 362 + q->flush_rq->cmd_type = REQ_TYPE_FS; 363 + q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; 364 + q->flush_rq->rq_disk = first_rq->rq_disk; 365 + q->flush_rq->end_io = flush_end_io; 315 366 316 - list_add_tail(&q->flush_rq.queuelist, &q->queue_head); 317 - return true; 367 + return blk_flush_queue_rq(q->flush_rq); 318 368 } 319 369 320 370 static void flush_data_end_io(struct request *rq, int error) ··· 532 558 void blk_mq_init_flush(struct request_queue *q) 533 559 { 534 560 spin_lock_init(&q->mq_flush_lock); 535 - INIT_WORK(&q->mq_flush_work, mq_flush_work); 536 561 }

block/blk-lib.c

··· 119 119 120 120 atomic_inc(&bb.done); 121 121 submit_bio(type, bio); 122 + 123 + /* 124 + * We can loop for a long time in here, if someone does 125 + * full device discards (like mkfs). Be nice and allow 126 + * us to schedule out to avoid softlocking if preempt 127 + * is disabled. 128 + */ 129 + cond_resched(); 122 130 } 123 131 blk_finish_plug(&plug); 124 132

+62 -29

block/blk-merge.c

··· 21 21 if (!bio) 22 22 return 0; 23 23 24 + /* 25 + * This should probably be returning 0, but blk_add_request_payload() 26 + * (Christoph!!!!) 27 + */ 28 + if (bio->bi_rw & REQ_DISCARD) 29 + return 1; 30 + 31 + if (bio->bi_rw & REQ_WRITE_SAME) 32 + return 1; 33 + 24 34 fbio = bio; 25 35 cluster = blk_queue_cluster(q); 26 36 seg_size = 0; ··· 171 161 *bvprv = *bvec; 172 162 } 173 163 164 + static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 165 + struct scatterlist *sglist, 166 + struct scatterlist **sg) 167 + { 168 + struct bio_vec bvec, bvprv = { NULL }; 169 + struct bvec_iter iter; 170 + int nsegs, cluster; 171 + 172 + nsegs = 0; 173 + cluster = blk_queue_cluster(q); 174 + 175 + if (bio->bi_rw & REQ_DISCARD) { 176 + /* 177 + * This is a hack - drivers should be neither modifying the 178 + * biovec, nor relying on bi_vcnt - but because of 179 + * blk_add_request_payload(), a discard bio may or may not have 180 + * a payload we need to set up here (thank you Christoph) and 181 + * bi_vcnt is really the only way of telling if we need to. 182 + */ 183 + 184 + if (bio->bi_vcnt) 185 + goto single_segment; 186 + 187 + return 0; 188 + } 189 + 190 + if (bio->bi_rw & REQ_WRITE_SAME) { 191 + single_segment: 192 + *sg = sglist; 193 + bvec = bio_iovec(bio); 194 + sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 195 + return 1; 196 + } 197 + 198 + for_each_bio(bio) 199 + bio_for_each_segment(bvec, bio, iter) 200 + __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, 201 + &nsegs, &cluster); 202 + 203 + return nsegs; 204 + } 205 + 174 206 /* 175 207 * map a request to scatterlist, return number of sg entries setup. Caller 176 208 * must make sure sg can hold rq->nr_phys_segments entries ··· 220 168 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 221 169 struct scatterlist *sglist) 222 170 { 223 - struct bio_vec bvec, bvprv = { NULL }; 224 - struct req_iterator iter; 225 - struct scatterlist *sg; 226 - int nsegs, cluster; 171 + struct scatterlist *sg = NULL; 172 + int nsegs = 0; 227 173 228 - nsegs = 0; 229 - cluster = blk_queue_cluster(q); 230 - 231 - /* 232 - * for each bio in rq 233 - */ 234 - sg = NULL; 235 - rq_for_each_segment(bvec, rq, iter) { 236 - __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, 237 - &nsegs, &cluster); 238 - } /* segments in rq */ 239 - 174 + if (rq->bio) 175 + nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 240 176 241 177 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 242 178 (blk_rq_bytes(rq) & q->dma_pad_mask)) { ··· 270 230 int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 271 231 struct scatterlist *sglist) 272 232 { 273 - struct bio_vec bvec, bvprv = { NULL }; 274 - struct scatterlist *sg; 275 - int nsegs, cluster; 276 - struct bvec_iter iter; 233 + struct scatterlist *sg = NULL; 234 + int nsegs; 235 + struct bio *next = bio->bi_next; 236 + bio->bi_next = NULL; 277 237 278 - nsegs = 0; 279 - cluster = blk_queue_cluster(q); 280 - 281 - sg = NULL; 282 - bio_for_each_segment(bvec, bio, iter) { 283 - __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, 284 - &nsegs, &cluster); 285 - } /* segments in bio */ 286 - 238 + nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); 239 + bio->bi_next = next; 287 240 if (sg) 288 241 sg_mark_end(sg); 289 242

+1 -1

block/blk-mq-tag.c

··· 184 184 ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) 185 185 { 186 186 char *orig_page = page; 187 - int cpu; 187 + unsigned int cpu; 188 188 189 189 if (!tags) 190 190 return 0;

+87 -56

block/blk-mq.c

··· 226 226 return rq; 227 227 } 228 228 229 - struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 230 - gfp_t gfp, bool reserved) 229 + struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp) 231 230 { 232 231 struct request *rq; 233 232 234 233 if (blk_mq_queue_enter(q)) 235 234 return NULL; 236 235 237 - rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); 236 + rq = blk_mq_alloc_request_pinned(q, rw, gfp, false); 238 237 if (rq) 239 238 blk_mq_put_ctx(rq->mq_ctx); 240 239 return rq; ··· 257 258 /* 258 259 * Re-init and set pdu, if we have it 259 260 */ 260 - static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) 261 + void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) 261 262 { 262 263 blk_rq_init(hctx->queue, rq); 263 264 ··· 304 305 bio_endio(bio, error); 305 306 } 306 307 307 - void blk_mq_complete_request(struct request *rq, int error) 308 + void blk_mq_end_io(struct request *rq, int error) 308 309 { 309 310 struct bio *bio = rq->bio; 310 311 unsigned int bytes = 0; ··· 329 330 else 330 331 blk_mq_free_request(rq); 331 332 } 333 + EXPORT_SYMBOL(blk_mq_end_io); 332 334 333 - void __blk_mq_end_io(struct request *rq, int error) 334 - { 335 - if (!blk_mark_rq_complete(rq)) 336 - blk_mq_complete_request(rq, error); 337 - } 338 - 339 - static void blk_mq_end_io_remote(void *data) 335 + static void __blk_mq_complete_request_remote(void *data) 340 336 { 341 337 struct request *rq = data; 342 338 343 - __blk_mq_end_io(rq, rq->errors); 339 + rq->q->softirq_done_fn(rq); 344 340 } 345 341 346 - /* 347 - * End IO on this request on a multiqueue enabled driver. We'll either do 348 - * it directly inline, or punt to a local IPI handler on the matching 349 - * remote CPU. 350 - */ 351 - void blk_mq_end_io(struct request *rq, int error) 342 + void __blk_mq_complete_request(struct request *rq) 352 343 { 353 344 struct blk_mq_ctx *ctx = rq->mq_ctx; 354 345 int cpu; 355 346 356 - if (!ctx->ipi_redirect) 357 - return __blk_mq_end_io(rq, error); 347 + if (!ctx->ipi_redirect) { 348 + rq->q->softirq_done_fn(rq); 349 + return; 350 + } 358 351 359 352 cpu = get_cpu(); 360 353 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { 361 - rq->errors = error; 362 - rq->csd.func = blk_mq_end_io_remote; 354 + rq->csd.func = __blk_mq_complete_request_remote; 363 355 rq->csd.info = rq; 364 356 rq->csd.flags = 0; 365 357 __smp_call_function_single(ctx->cpu, &rq->csd, 0); 366 358 } else { 367 - __blk_mq_end_io(rq, error); 359 + rq->q->softirq_done_fn(rq); 368 360 } 369 361 put_cpu(); 370 362 } 371 - EXPORT_SYMBOL(blk_mq_end_io); 372 363 373 - static void blk_mq_start_request(struct request *rq) 364 + /** 365 + * blk_mq_complete_request - end I/O on a request 366 + * @rq: the request being processed 367 + * 368 + * Description: 369 + * Ends all I/O on a request. It does not handle partial completions. 370 + * The actual completion happens out-of-order, through a IPI handler. 371 + **/ 372 + void blk_mq_complete_request(struct request *rq) 373 + { 374 + if (unlikely(blk_should_fake_timeout(rq->q))) 375 + return; 376 + if (!blk_mark_rq_complete(rq)) 377 + __blk_mq_complete_request(rq); 378 + } 379 + EXPORT_SYMBOL(blk_mq_complete_request); 380 + 381 + static void blk_mq_start_request(struct request *rq, bool last) 374 382 { 375 383 struct request_queue *q = rq->q; 376 384 ··· 390 384 */ 391 385 rq->deadline = jiffies + q->rq_timeout; 392 386 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 387 + 388 + if (q->dma_drain_size && blk_rq_bytes(rq)) { 389 + /* 390 + * Make sure space for the drain appears. We know we can do 391 + * this because max_hw_segments has been adjusted to be one 392 + * fewer than the device can handle. 393 + */ 394 + rq->nr_phys_segments++; 395 + } 396 + 397 + /* 398 + * Flag the last request in the series so that drivers know when IO 399 + * should be kicked off, if they don't do it on a per-request basis. 400 + * 401 + * Note: the flag isn't the only condition drivers should do kick off. 402 + * If drive is busy, the last request might not have the bit set. 403 + */ 404 + if (last) 405 + rq->cmd_flags |= REQ_END; 393 406 } 394 407 395 408 static void blk_mq_requeue_request(struct request *rq) ··· 417 392 418 393 trace_block_rq_requeue(q, rq); 419 394 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 395 + 396 + rq->cmd_flags &= ~REQ_END; 397 + 398 + if (q->dma_drain_size && blk_rq_bytes(rq)) 399 + rq->nr_phys_segments--; 420 400 } 421 401 422 402 struct blk_mq_timeout_data { ··· 589 559 590 560 rq = list_first_entry(&rq_list, struct request, queuelist); 591 561 list_del_init(&rq->queuelist); 592 - blk_mq_start_request(rq); 593 562 594 - /* 595 - * Last request in the series. Flag it as such, this 596 - * enables drivers to know when IO should be kicked off, 597 - * if they don't do it on a per-request basis. 598 - * 599 - * Note: the flag isn't the only condition drivers 600 - * should do kick off. If drive is busy, the last 601 - * request might not have the bit set. 602 - */ 603 - if (list_empty(&rq_list)) 604 - rq->cmd_flags |= REQ_END; 563 + blk_mq_start_request(rq, list_empty(&rq_list)); 605 564 606 565 ret = q->mq_ops->queue_rq(hctx, rq); 607 566 switch (ret) { ··· 608 589 break; 609 590 default: 610 591 pr_err("blk-mq: bad return on queue: %d\n", ret); 611 - rq->errors = -EIO; 612 592 case BLK_MQ_RQ_QUEUE_ERROR: 593 + rq->errors = -EIO; 613 594 blk_mq_end_io(rq, rq->errors); 614 595 break; 615 596 } ··· 712 693 } 713 694 714 695 static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 715 - struct request *rq) 696 + struct request *rq, bool at_head) 716 697 { 717 698 struct blk_mq_ctx *ctx = rq->mq_ctx; 718 699 719 700 trace_block_rq_insert(hctx->queue, rq); 720 701 721 - list_add_tail(&rq->queuelist, &ctx->rq_list); 702 + if (at_head) 703 + list_add(&rq->queuelist, &ctx->rq_list); 704 + else 705 + list_add_tail(&rq->queuelist, &ctx->rq_list); 722 706 blk_mq_hctx_mark_pending(hctx, ctx); 723 707 724 708 /* ··· 731 709 } 732 710 733 711 void blk_mq_insert_request(struct request_queue *q, struct request *rq, 734 - bool run_queue) 712 + bool at_head, bool run_queue) 735 713 { 736 714 struct blk_mq_hw_ctx *hctx; 737 715 struct blk_mq_ctx *ctx, *current_ctx; ··· 750 728 rq->mq_ctx = ctx; 751 729 } 752 730 spin_lock(&ctx->lock); 753 - __blk_mq_insert_request(hctx, rq); 731 + __blk_mq_insert_request(hctx, rq, at_head); 754 732 spin_unlock(&ctx->lock); 755 733 756 734 blk_mq_put_ctx(current_ctx); ··· 782 760 783 761 /* ctx->cpu might be offline */ 784 762 spin_lock(&ctx->lock); 785 - __blk_mq_insert_request(hctx, rq); 763 + __blk_mq_insert_request(hctx, rq, false); 786 764 spin_unlock(&ctx->lock); 787 765 788 766 blk_mq_put_ctx(current_ctx); ··· 820 798 rq = list_first_entry(list, struct request, queuelist); 821 799 list_del_init(&rq->queuelist); 822 800 rq->mq_ctx = ctx; 823 - __blk_mq_insert_request(hctx, rq); 801 + __blk_mq_insert_request(hctx, rq, false); 824 802 } 825 803 spin_unlock(&ctx->lock); 826 804 ··· 910 888 911 889 blk_queue_bounce(q, &bio); 912 890 891 + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 892 + bio_endio(bio, -EIO); 893 + return; 894 + } 895 + 913 896 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) 914 897 return; 915 898 ··· 977 950 __blk_mq_free_request(hctx, ctx, rq); 978 951 else { 979 952 blk_mq_bio_to_request(rq, bio); 980 - __blk_mq_insert_request(hctx, rq); 953 + __blk_mq_insert_request(hctx, rq, false); 981 954 } 982 955 983 956 spin_unlock(&ctx->lock); ··· 1336 1309 reg->queue_depth = BLK_MQ_MAX_DEPTH; 1337 1310 } 1338 1311 1339 - /* 1340 - * Set aside a tag for flush requests. It will only be used while 1341 - * another flush request is in progress but outside the driver. 1342 - * 1343 - * TODO: only allocate if flushes are supported 1344 - */ 1345 - reg->queue_depth++; 1346 - reg->reserved_tags++; 1347 - 1348 1312 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) 1349 1313 return ERR_PTR(-EINVAL); 1350 1314 ··· 1378 1360 q->mq_ops = reg->ops; 1379 1361 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 1380 1362 1363 + q->sg_reserved_size = INT_MAX; 1364 + 1381 1365 blk_queue_make_request(q, blk_mq_make_request); 1382 1366 blk_queue_rq_timed_out(q, reg->ops->timeout); 1383 1367 if (reg->timeout) 1384 1368 blk_queue_rq_timeout(q, reg->timeout); 1385 1369 1370 + if (reg->ops->complete) 1371 + blk_queue_softirq_done(q, reg->ops->complete); 1372 + 1386 1373 blk_mq_init_flush(q); 1387 1374 blk_mq_init_cpu_queues(q, reg->nr_hw_queues); 1388 1375 1389 - if (blk_mq_init_hw_queues(q, reg, driver_data)) 1376 + q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, 1377 + cache_line_size()), GFP_KERNEL); 1378 + if (!q->flush_rq) 1390 1379 goto err_hw; 1380 + 1381 + if (blk_mq_init_hw_queues(q, reg, driver_data)) 1382 + goto err_flush_rq; 1391 1383 1392 1384 blk_mq_map_swqueue(q); 1393 1385 ··· 1406 1378 mutex_unlock(&all_q_mutex); 1407 1379 1408 1380 return q; 1381 + 1382 + err_flush_rq: 1383 + kfree(q->flush_rq); 1409 1384 err_hw: 1410 1385 kfree(q->mq_map); 1411 1386 err_map:

+2 -2

block/blk-mq.h

··· 22 22 struct kobject kobj; 23 23 }; 24 24 25 - void __blk_mq_end_io(struct request *rq, int error); 26 - void blk_mq_complete_request(struct request *rq, int error); 25 + void __blk_mq_complete_request(struct request *rq); 27 26 void blk_mq_run_request(struct request *rq, bool run_queue, bool async); 28 27 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 29 28 void blk_mq_init_flush(struct request_queue *q); 30 29 void blk_mq_drain_queue(struct request_queue *q); 31 30 void blk_mq_free_queue(struct request_queue *q); 31 + void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq); 32 32 33 33 /* 34 34 * CPU hotplug helpers

block/blk-sysfs.c

··· 549 549 if (q->mq_ops) 550 550 blk_mq_free_queue(q); 551 551 552 + kfree(q->flush_rq); 553 + 552 554 blk_trace_shutdown(q); 553 555 554 556 bdi_destroy(&q->backing_dev_info);

+1 -1

block/blk-timeout.c

··· 91 91 case BLK_EH_HANDLED: 92 92 /* Can we use req->errors here? */ 93 93 if (q->mq_ops) 94 - blk_mq_complete_request(req, req->errors); 94 + __blk_mq_complete_request(req); 95 95 else 96 96 __blk_complete_request(req); 97 97 break;

+1 -1

block/blk.h

··· 113 113 q->flush_queue_delayed = 1; 114 114 return NULL; 115 115 } 116 - if (unlikely(blk_queue_dying(q)) || 116 + if (unlikely(blk_queue_bypass(q)) || 117 117 !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) 118 118 return NULL; 119 119 }

+33 -64

drivers/block/null_blk.c

··· 60 60 NULL_IRQ_NONE = 0, 61 61 NULL_IRQ_SOFTIRQ = 1, 62 62 NULL_IRQ_TIMER = 2, 63 + }; 63 64 65 + enum { 64 66 NULL_Q_BIO = 0, 65 67 NULL_Q_RQ = 1, 66 68 NULL_Q_MQ = 2, ··· 174 172 175 173 static void end_cmd(struct nullb_cmd *cmd) 176 174 { 177 - if (cmd->rq) { 178 - if (queue_mode == NULL_Q_MQ) 179 - blk_mq_end_io(cmd->rq, 0); 180 - else { 181 - INIT_LIST_HEAD(&cmd->rq->queuelist); 182 - blk_end_request_all(cmd->rq, 0); 183 - } 184 - } else if (cmd->bio) 175 + switch (queue_mode) { 176 + case NULL_Q_MQ: 177 + blk_mq_end_io(cmd->rq, 0); 178 + return; 179 + case NULL_Q_RQ: 180 + INIT_LIST_HEAD(&cmd->rq->queuelist); 181 + blk_end_request_all(cmd->rq, 0); 182 + break; 183 + case NULL_Q_BIO: 185 184 bio_endio(cmd->bio, 0); 185 + break; 186 + } 186 187 187 - if (queue_mode != NULL_Q_MQ) 188 - free_cmd(cmd); 188 + free_cmd(cmd); 189 189 } 190 190 191 191 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) ··· 199 195 cq = &per_cpu(completion_queues, smp_processor_id()); 200 196 201 197 while ((entry = llist_del_all(&cq->list)) != NULL) { 198 + entry = llist_reverse_order(entry); 202 199 do { 203 200 cmd = container_of(entry, struct nullb_cmd, ll_list); 204 201 end_cmd(cmd); ··· 226 221 227 222 static void null_softirq_done_fn(struct request *rq) 228 223 { 229 - blk_end_request_all(rq, 0); 224 + end_cmd(rq->special); 230 225 } 231 - 232 - #ifdef CONFIG_SMP 233 - 234 - static void null_ipi_cmd_end_io(void *data) 235 - { 236 - struct completion_queue *cq; 237 - struct llist_node *entry, *next; 238 - struct nullb_cmd *cmd; 239 - 240 - cq = &per_cpu(completion_queues, smp_processor_id()); 241 - 242 - entry = llist_del_all(&cq->list); 243 - 244 - while (entry) { 245 - next = entry->next; 246 - cmd = llist_entry(entry, struct nullb_cmd, ll_list); 247 - end_cmd(cmd); 248 - entry = next; 249 - } 250 - } 251 - 252 - static void null_cmd_end_ipi(struct nullb_cmd *cmd) 253 - { 254 - struct call_single_data *data = &cmd->csd; 255 - int cpu = get_cpu(); 256 - struct completion_queue *cq = &per_cpu(completion_queues, cpu); 257 - 258 - cmd->ll_list.next = NULL; 259 - 260 - if (llist_add(&cmd->ll_list, &cq->list)) { 261 - data->func = null_ipi_cmd_end_io; 262 - data->flags = 0; 263 - __smp_call_function_single(cpu, data, 0); 264 - } 265 - 266 - put_cpu(); 267 - } 268 - 269 - #endif /* CONFIG_SMP */ 270 226 271 227 static inline void null_handle_cmd(struct nullb_cmd *cmd) 272 228 { 273 229 /* Complete IO by inline, softirq or timer */ 274 230 switch (irqmode) { 231 + case NULL_IRQ_SOFTIRQ: 232 + switch (queue_mode) { 233 + case NULL_Q_MQ: 234 + blk_mq_complete_request(cmd->rq); 235 + break; 236 + case NULL_Q_RQ: 237 + blk_complete_request(cmd->rq); 238 + break; 239 + case NULL_Q_BIO: 240 + /* 241 + * XXX: no proper submitting cpu information available. 242 + */ 243 + end_cmd(cmd); 244 + break; 245 + } 246 + break; 275 247 case NULL_IRQ_NONE: 276 248 end_cmd(cmd); 277 - break; 278 - case NULL_IRQ_SOFTIRQ: 279 - #ifdef CONFIG_SMP 280 - null_cmd_end_ipi(cmd); 281 - #else 282 - end_cmd(cmd); 283 - #endif 284 249 break; 285 250 case NULL_IRQ_TIMER: 286 251 null_cmd_end_timer(cmd); ··· 386 411 .queue_rq = null_queue_rq, 387 412 .map_queue = blk_mq_map_queue, 388 413 .init_hctx = null_init_hctx, 414 + .complete = null_softirq_done_fn, 389 415 }; 390 416 391 417 static struct blk_mq_reg null_mq_reg = { ··· 585 609 { 586 610 unsigned int i; 587 611 588 - #if !defined(CONFIG_SMP) 589 - if (irqmode == NULL_IRQ_SOFTIRQ) { 590 - pr_warn("null_blk: softirq completions not available.\n"); 591 - pr_warn("null_blk: using direct completions.\n"); 592 - irqmode = NULL_IRQ_NONE; 593 - } 594 - #endif 595 612 if (bs > PAGE_SIZE) { 596 613 pr_warn("null_blk: invalid block size\n"); 597 614 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);

+4 -3

drivers/block/virtio_blk.c

··· 110 110 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 111 111 } 112 112 113 - static inline void virtblk_request_done(struct virtblk_req *vbr) 113 + static inline void virtblk_request_done(struct request *req) 114 114 { 115 - struct request *req = vbr->req; 115 + struct virtblk_req *vbr = req->special; 116 116 int error = virtblk_result(vbr); 117 117 118 118 if (req->cmd_type == REQ_TYPE_BLOCK_PC) { ··· 138 138 do { 139 139 virtqueue_disable_cb(vq); 140 140 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { 141 - virtblk_request_done(vbr); 141 + blk_mq_complete_request(vbr->req); 142 142 req_done = true; 143 143 } 144 144 if (unlikely(virtqueue_is_broken(vq))) ··· 479 479 .map_queue = blk_mq_map_queue, 480 480 .alloc_hctx = blk_mq_alloc_single_hw_queue, 481 481 .free_hctx = blk_mq_free_single_hw_queue, 482 + .complete = virtblk_request_done, 482 483 }; 483 484 484 485 static struct blk_mq_reg virtio_mq_reg = {

+44 -24

drivers/block/xen-blkback/blkback.c

··· 299 299 BUG_ON(num != 0); 300 300 } 301 301 302 - static void unmap_purged_grants(struct work_struct *work) 302 + void xen_blkbk_unmap_purged_grants(struct work_struct *work) 303 303 { 304 304 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 305 305 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; ··· 375 375 376 376 pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); 377 377 378 - INIT_LIST_HEAD(&blkif->persistent_purge_list); 378 + BUG_ON(!list_empty(&blkif->persistent_purge_list)); 379 379 root = &blkif->persistent_gnts; 380 380 purge_list: 381 381 foreach_grant_safe(persistent_gnt, n, root, node) { ··· 420 420 blkif->vbd.overflow_max_grants = 0; 421 421 422 422 /* We can defer this work */ 423 - INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants); 424 423 schedule_work(&blkif->persistent_purge_work); 425 424 pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total); 426 425 return; ··· 624 625 print_stats(blkif); 625 626 } 626 627 627 - /* Since we are shutting down remove all pages from the buffer */ 628 - shrink_free_pagepool(blkif, 0 /* All */); 629 - 630 - /* Free all persistent grant pages */ 631 - if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) 632 - free_persistent_gnts(blkif, &blkif->persistent_gnts, 633 - blkif->persistent_gnt_c); 634 - 635 - BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); 636 - blkif->persistent_gnt_c = 0; 628 + /* Drain pending purge work */ 629 + flush_work(&blkif->persistent_purge_work); 637 630 638 631 if (log_stats) 639 632 print_stats(blkif); ··· 634 643 xen_blkif_put(blkif); 635 644 636 645 return 0; 646 + } 647 + 648 + /* 649 + * Remove persistent grants and empty the pool of free pages 650 + */ 651 + void xen_blkbk_free_caches(struct xen_blkif *blkif) 652 + { 653 + /* Free all persistent grant pages */ 654 + if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) 655 + free_persistent_gnts(blkif, &blkif->persistent_gnts, 656 + blkif->persistent_gnt_c); 657 + 658 + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); 659 + blkif->persistent_gnt_c = 0; 660 + 661 + /* Since we are shutting down remove all pages from the buffer */ 662 + shrink_free_pagepool(blkif, 0 /* All */); 637 663 } 638 664 639 665 /* ··· 846 838 struct grant_page **pages = pending_req->indirect_pages; 847 839 struct xen_blkif *blkif = pending_req->blkif; 848 840 int indirect_grefs, rc, n, nseg, i; 849 - struct blkif_request_segment_aligned *segments = NULL; 841 + struct blkif_request_segment *segments = NULL; 850 842 851 843 nseg = pending_req->nr_pages; 852 844 indirect_grefs = INDIRECT_PAGES(nseg); ··· 942 934 { 943 935 atomic_set(&blkif->drain, 1); 944 936 do { 945 - /* The initial value is one, and one refcnt taken at the 946 - * start of the xen_blkif_schedule thread. */ 947 - if (atomic_read(&blkif->refcnt) <= 2) 937 + if (atomic_read(&blkif->inflight) == 0) 948 938 break; 949 939 wait_for_completion_interruptible_timeout( 950 940 &blkif->drain_complete, HZ); ··· 982 976 * the proper response on the ring. 983 977 */ 984 978 if (atomic_dec_and_test(&pending_req->pendcnt)) { 985 - xen_blkbk_unmap(pending_req->blkif, 979 + struct xen_blkif *blkif = pending_req->blkif; 980 + 981 + xen_blkbk_unmap(blkif, 986 982 pending_req->segments, 987 983 pending_req->nr_pages); 988 - make_response(pending_req->blkif, pending_req->id, 984 + make_response(blkif, pending_req->id, 989 985 pending_req->operation, pending_req->status); 990 - xen_blkif_put(pending_req->blkif); 991 - if (atomic_read(&pending_req->blkif->refcnt) <= 2) { 992 - if (atomic_read(&pending_req->blkif->drain)) 993 - complete(&pending_req->blkif->drain_complete); 986 + free_req(blkif, pending_req); 987 + /* 988 + * Make sure the request is freed before releasing blkif, 989 + * or there could be a race between free_req and the 990 + * cleanup done in xen_blkif_free during shutdown. 991 + * 992 + * NB: The fact that we might try to wake up pending_free_wq 993 + * before drain_complete (in case there's a drain going on) 994 + * it's not a problem with our current implementation 995 + * because we can assure there's no thread waiting on 996 + * pending_free_wq if there's a drain going on, but it has 997 + * to be taken into account if the current model is changed. 998 + */ 999 + if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { 1000 + complete(&blkif->drain_complete); 994 1001 } 995 - free_req(pending_req->blkif, pending_req); 1002 + xen_blkif_put(blkif); 996 1003 } 997 1004 } 998 1005 ··· 1259 1240 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. 1260 1241 */ 1261 1242 xen_blkif_get(blkif); 1243 + atomic_inc(&blkif->inflight); 1262 1244 1263 1245 for (i = 0; i < nseg; i++) { 1264 1246 while ((bio == NULL) ||

+4 -1

drivers/block/xen-blkback/common.h

··· 57 57 #define MAX_INDIRECT_SEGMENTS 256 58 58 59 59 #define SEGS_PER_INDIRECT_FRAME \ 60 - (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) 60 + (PAGE_SIZE/sizeof(struct blkif_request_segment)) 61 61 #define MAX_INDIRECT_PAGES \ 62 62 ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) 63 63 #define INDIRECT_PAGES(_segs) \ ··· 278 278 /* for barrier (drain) requests */ 279 279 struct completion drain_complete; 280 280 atomic_t drain; 281 + atomic_t inflight; 281 282 /* One thread per one blkif. */ 282 283 struct task_struct *xenblkd; 283 284 unsigned int waiting_reqs; ··· 377 376 irqreturn_t xen_blkif_be_int(int irq, void *dev_id); 378 377 int xen_blkif_schedule(void *arg); 379 378 int xen_blkif_purge_persistent(void *arg); 379 + void xen_blkbk_free_caches(struct xen_blkif *blkif); 380 380 381 381 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, 382 382 struct backend_info *be, int state); ··· 385 383 int xen_blkbk_barrier(struct xenbus_transaction xbt, 386 384 struct backend_info *be, int state); 387 385 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); 386 + void xen_blkbk_unmap_purged_grants(struct work_struct *work); 388 387 389 388 static inline void blkif_get_x86_32_req(struct blkif_request *dst, 390 389 struct blkif_x86_32_request *src)

+14

drivers/block/xen-blkback/xenbus.c

··· 125 125 blkif->persistent_gnts.rb_node = NULL; 126 126 spin_lock_init(&blkif->free_pages_lock); 127 127 INIT_LIST_HEAD(&blkif->free_pages); 128 + INIT_LIST_HEAD(&blkif->persistent_purge_list); 128 129 blkif->free_pages_num = 0; 129 130 atomic_set(&blkif->persistent_gnt_in_use, 0); 131 + atomic_set(&blkif->inflight, 0); 132 + INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); 130 133 131 134 INIT_LIST_HEAD(&blkif->pending_free); 132 135 ··· 261 258 262 259 if (!atomic_dec_and_test(&blkif->refcnt)) 263 260 BUG(); 261 + 262 + /* Remove all persistent grants and the cache of ballooned pages. */ 263 + xen_blkbk_free_caches(blkif); 264 + 265 + /* Make sure everything is drained before shutting down */ 266 + BUG_ON(blkif->persistent_gnt_c != 0); 267 + BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); 268 + BUG_ON(blkif->free_pages_num != 0); 269 + BUG_ON(!list_empty(&blkif->persistent_purge_list)); 270 + BUG_ON(!list_empty(&blkif->free_pages)); 271 + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); 264 272 265 273 /* Check that there is no request in use */ 266 274 list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {

+7 -4

drivers/block/xen-blkfront.c

··· 162 162 #define DEV_NAME "xvd" /* name in /dev */ 163 163 164 164 #define SEGS_PER_INDIRECT_FRAME \ 165 - (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) 165 + (PAGE_SIZE/sizeof(struct blkif_request_segment)) 166 166 #define INDIRECT_GREFS(_segs) \ 167 167 ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) 168 168 ··· 393 393 unsigned long id; 394 394 unsigned int fsect, lsect; 395 395 int i, ref, n; 396 - struct blkif_request_segment_aligned *segments = NULL; 396 + struct blkif_request_segment *segments = NULL; 397 397 398 398 /* 399 399 * Used to store if we are able to queue the request by just using ··· 550 550 } else { 551 551 n = i % SEGS_PER_INDIRECT_FRAME; 552 552 segments[n] = 553 - (struct blkif_request_segment_aligned) { 553 + (struct blkif_request_segment) { 554 554 .gref = ref, 555 555 .first_sect = fsect, 556 556 .last_sect = lsect }; ··· 1904 1904 case XenbusStateReconfiguring: 1905 1905 case XenbusStateReconfigured: 1906 1906 case XenbusStateUnknown: 1907 - case XenbusStateClosed: 1908 1907 break; 1909 1908 1910 1909 case XenbusStateConnected: 1911 1910 blkfront_connect(info); 1912 1911 break; 1913 1912 1913 + case XenbusStateClosed: 1914 + if (dev->state == XenbusStateClosed) 1915 + break; 1916 + /* Missed the backend's Closing state -- fallthrough */ 1914 1917 case XenbusStateClosing: 1915 1918 blkfront_closing(info); 1916 1919 break;

+3 -1

drivers/md/bcache/bcache.h

··· 210 210 #define GC_MARK_RECLAIMABLE 0 211 211 #define GC_MARK_DIRTY 1 212 212 #define GC_MARK_METADATA 2 213 - BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13); 213 + #define GC_SECTORS_USED_SIZE 13 214 + #define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE)) 215 + BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE); 214 216 BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1); 215 217 216 218 #include "journal.h"

+5 -2

drivers/md/bcache/bset.c

··· 23 23 for (k = i->start; k < bset_bkey_last(i); k = next) { 24 24 next = bkey_next(k); 25 25 26 - printk(KERN_ERR "block %u key %zi/%u: ", set, 26 + printk(KERN_ERR "block %u key %li/%u: ", set, 27 27 (uint64_t *) k - i->d, i->keys); 28 28 29 29 if (b->ops->key_dump) ··· 1185 1185 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, 1186 1186 order); 1187 1187 if (!out) { 1188 + struct page *outp; 1189 + 1188 1190 BUG_ON(order > state->page_order); 1189 1191 1190 - out = page_address(mempool_alloc(state->pool, GFP_NOIO)); 1192 + outp = mempool_alloc(state->pool, GFP_NOIO); 1193 + out = page_address(outp); 1191 1194 used_mempool = true; 1192 1195 order = state->page_order; 1193 1196 }

+2 -2

drivers/md/bcache/btree.c

··· 1167 1167 /* guard against overflow */ 1168 1168 SET_GC_SECTORS_USED(g, min_t(unsigned, 1169 1169 GC_SECTORS_USED(g) + KEY_SIZE(k), 1170 - (1 << 14) - 1)); 1170 + MAX_GC_SECTORS_USED)); 1171 1171 1172 1172 BUG_ON(!GC_SECTORS_USED(g)); 1173 1173 } ··· 1805 1805 1806 1806 static size_t insert_u64s_remaining(struct btree *b) 1807 1807 { 1808 - ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys); 1808 + long ret = bch_btree_keys_u64s_remaining(&b->keys); 1809 1809 1810 1810 /* 1811 1811 * Might land in the middle of an existing extent and have to split it

+3 -3

drivers/md/bcache/request.c

··· 353 353 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); 354 354 struct bio *bio = op->bio, *n; 355 355 356 - if (op->bypass) 357 - return bch_data_invalidate(cl); 358 - 359 356 if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { 360 357 set_gc_sectors(op->c); 361 358 wake_up_gc(op->c); 362 359 } 360 + 361 + if (op->bypass) 362 + return bch_data_invalidate(cl); 363 363 364 364 /* 365 365 * Journal writes are marked REQ_FLUSH; if the original write was a

+1 -1

drivers/md/bcache/sysfs.c

··· 416 416 return MAP_CONTINUE; 417 417 } 418 418 419 - int bch_bset_print_stats(struct cache_set *c, char *buf) 419 + static int bch_bset_print_stats(struct cache_set *c, char *buf) 420 420 { 421 421 struct bset_stats_op op; 422 422 int ret;

+11 -2

fs/bio-integrity.c

··· 114 114 } 115 115 EXPORT_SYMBOL(bio_integrity_free); 116 116 117 + static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) 118 + { 119 + if (bip->bip_slab == BIO_POOL_NONE) 120 + return BIP_INLINE_VECS; 121 + 122 + return bvec_nr_vecs(bip->bip_slab); 123 + } 124 + 117 125 /** 118 126 * bio_integrity_add_page - Attach integrity metadata 119 127 * @bio: bio to update ··· 137 129 struct bio_integrity_payload *bip = bio->bi_integrity; 138 130 struct bio_vec *iv; 139 131 140 - if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { 132 + if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { 141 133 printk(KERN_ERR "%s: bip_vec full\n", __func__); 142 134 return 0; 143 135 } ··· 234 226 } 235 227 EXPORT_SYMBOL(bio_integrity_tag_size); 236 228 237 - int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set) 229 + static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, 230 + int set) 238 231 { 239 232 struct bio_integrity_payload *bip = bio->bi_integrity; 240 233 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);

+10 -5

fs/bio.c

··· 611 611 struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, 612 612 struct bio_set *bs) 613 613 { 614 - unsigned nr_iovecs = 0; 615 614 struct bvec_iter iter; 616 615 struct bio_vec bv; 617 616 struct bio *bio; ··· 637 638 * __bio_clone_fast() anyways. 638 639 */ 639 640 640 - bio_for_each_segment(bv, bio_src, iter) 641 - nr_iovecs++; 642 - 643 - bio = bio_alloc_bioset(gfp_mask, nr_iovecs, bs); 641 + bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); 644 642 if (!bio) 645 643 return NULL; 646 644 ··· 646 650 bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; 647 651 bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; 648 652 653 + if (bio->bi_rw & REQ_DISCARD) 654 + goto integrity_clone; 655 + 656 + if (bio->bi_rw & REQ_WRITE_SAME) { 657 + bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; 658 + goto integrity_clone; 659 + } 660 + 649 661 bio_for_each_segment(bv, bio_src, iter) 650 662 bio->bi_io_vec[bio->bi_vcnt++] = bv; 651 663 664 + integrity_clone: 652 665 if (bio_integrity(bio_src)) { 653 666 int ret; 654 667

+12

include/linux/bio.h

··· 250 250 struct bio_vec bv; 251 251 struct bvec_iter iter; 252 252 253 + /* 254 + * We special case discard/write same, because they interpret bi_size 255 + * differently: 256 + */ 257 + 258 + if (bio->bi_rw & REQ_DISCARD) 259 + return 1; 260 + 261 + if (bio->bi_rw & REQ_WRITE_SAME) 262 + return 1; 263 + 253 264 bio_for_each_segment(bv, bio, iter) 254 265 segs++; 255 266 ··· 343 332 extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); 344 333 345 334 extern struct bio_set *fs_bio_set; 335 + unsigned int bio_integrity_tag_size(struct bio *bio); 346 336 347 337 static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 348 338 {

+7 -2

include/linux/blk-mq.h

··· 83 83 */ 84 84 rq_timed_out_fn *timeout; 85 85 86 + softirq_done_fn *complete; 87 + 86 88 /* 87 89 * Override for hctx allocations (should probably go) 88 90 */ ··· 121 119 122 120 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); 123 121 124 - void blk_mq_insert_request(struct request_queue *, struct request *, bool); 122 + void blk_mq_insert_request(struct request_queue *, struct request *, 123 + bool, bool); 125 124 void blk_mq_run_queues(struct request_queue *q, bool async); 126 125 void blk_mq_free_request(struct request *rq); 127 126 bool blk_mq_can_queue(struct blk_mq_hw_ctx *); 128 - struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); 127 + struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); 129 128 struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); 130 129 struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); 131 130 ··· 135 132 void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); 136 133 137 134 void blk_mq_end_io(struct request *rq, int error); 135 + 136 + void blk_mq_complete_request(struct request *rq); 138 137 139 138 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); 140 139 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);

+3 -8

include/linux/blkdev.h

··· 98 98 struct list_head queuelist; 99 99 union { 100 100 struct call_single_data csd; 101 - struct work_struct mq_flush_data; 101 + struct work_struct mq_flush_work; 102 102 }; 103 103 104 104 struct request_queue *q; ··· 448 448 unsigned long flush_pending_since; 449 449 struct list_head flush_queue[2]; 450 450 struct list_head flush_data_in_flight; 451 - union { 452 - struct request flush_rq; 453 - struct { 454 - spinlock_t mq_flush_lock; 455 - struct work_struct mq_flush_work; 456 - }; 457 - }; 451 + struct request *flush_rq; 452 + spinlock_t mq_flush_lock; 458 453 459 454 struct mutex sysfs_lock; 460 455

+14 -20

include/xen/interface/io/blkif.h

··· 113 113 * it's less than the number provided by the backend. The indirect_grefs field 114 114 * in blkif_request_indirect should be filled by the frontend with the 115 115 * grant references of the pages that are holding the indirect segments. 116 - * This pages are filled with an array of blkif_request_segment_aligned 117 - * that hold the information about the segments. The number of indirect 118 - * pages to use is determined by the maximum number of segments 119 - * a indirect request contains. Every indirect page can contain a maximum 120 - * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)), 121 - * so to calculate the number of indirect pages to use we have to do 122 - * ceil(indirect_segments/512). 116 + * These pages are filled with an array of blkif_request_segment that hold the 117 + * information about the segments. The number of indirect pages to use is 118 + * determined by the number of segments an indirect request contains. Every 119 + * indirect page can contain a maximum of 120 + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to 121 + * calculate the number of indirect pages to use we have to do 122 + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). 123 123 * 124 124 * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* 125 125 * create the "feature-max-indirect-segments" node! ··· 135 135 136 136 #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 137 137 138 - struct blkif_request_segment_aligned { 139 - grant_ref_t gref; /* reference to I/O buffer frame */ 140 - /* @first_sect: first sector in frame to transfer (inclusive). */ 141 - /* @last_sect: last sector in frame to transfer (inclusive). */ 142 - uint8_t first_sect, last_sect; 143 - uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */ 144 - } __attribute__((__packed__)); 138 + struct blkif_request_segment { 139 + grant_ref_t gref; /* reference to I/O buffer frame */ 140 + /* @first_sect: first sector in frame to transfer (inclusive). */ 141 + /* @last_sect: last sector in frame to transfer (inclusive). */ 142 + uint8_t first_sect, last_sect; 143 + }; 145 144 146 145 struct blkif_request_rw { 147 146 uint8_t nr_segments; /* number of segments */ ··· 150 151 #endif 151 152 uint64_t id; /* private guest value, echoed in resp */ 152 153 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 153 - struct blkif_request_segment { 154 - grant_ref_t gref; /* reference to I/O buffer frame */ 155 - /* @first_sect: first sector in frame to transfer (inclusive). */ 156 - /* @last_sect: last sector in frame to transfer (inclusive). */ 157 - uint8_t first_sect, last_sect; 158 - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 154 + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 159 155 } __attribute__((__packed__)); 160 156 161 157 struct blkif_request_discard {

+2 -5

lib/percpu_ida.c

··· 54 54 /* 55 55 * Try to steal tags from a remote cpu's percpu freelist. 56 56 * 57 - * We first check how many percpu freelists have tags - we don't steal tags 58 - * unless enough percpu freelists have tags on them that it's possible more than 59 - * half the total tags could be stuck on remote percpu freelists. 57 + * We first check how many percpu freelists have tags 60 58 * 61 59 * Then we iterate through the cpus until we find some tags - we don't attempt 62 60 * to find the "best" cpu to steal from, to keep cacheline bouncing to a ··· 67 69 struct percpu_ida_cpu *remote; 68 70 69 71 for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); 70 - cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2; 71 - cpus_have_tags--) { 72 + cpus_have_tags; cpus_have_tags--) { 72 73 cpu = cpumask_next(cpu, &pool->cpus_have_tags); 73 74 74 75 if (cpu >= nr_cpu_ids) {

Configure Feed

Configure Feed