Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
block: add blk_run_queue_async
block: blk_delay_queue() should use kblockd workqueue
md: fix up raid1/raid10 unplugging.
md: incorporate new plugging into raid5.
md: provide generic support for handling unplug callbacks.
md - remove old plugging code.
md/dm - remove remains of plug_fn callback.
md: use new plugging interface for RAID IO.
block: drop queue lock before calling __blk_run_queue() for kblockd punt
Revert "block: add callback function for unplug notification"
block: Enhance new plugging support to support general callbacks

Linus Torvalds 15 years ago 8a83f331 5d5b1b9f

+186 -187

18 changed files

expand all collapse all

block

blk-core.c

blk-exec.c

blk-flush.c

blk-settings.c

blk.h

cfq-iosched.c

elevator.c

drivers

dm-raid.c

md.c

md.h

raid1.c

raid10.c

raid5.c

raid5.h

scsi

scsi_lib.c

scsi_transport_fc.c

include

linux

blkdev.h

device-mapper.h

+65 -18

block/blk-core.c

reviewed

··· 204 204 205 205 q = container_of(work, struct request_queue, delay_work.work); 206 206 spin_lock_irq(q->queue_lock); 207 207 - __blk_run_queue(q, false); 207 207 + __blk_run_queue(q); 208 208 spin_unlock_irq(q->queue_lock); 209 209 } 210 210 ··· 220 220 */ 221 221 void blk_delay_queue(struct request_queue *q, unsigned long msecs) 222 222 { 223 223 - schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs)); 223 223 + queue_delayed_work(kblockd_workqueue, &q->delay_work, 224 224 + msecs_to_jiffies(msecs)); 224 225 } 225 226 EXPORT_SYMBOL(blk_delay_queue); 226 227 ··· 239 238 WARN_ON(!irqs_disabled()); 240 239 241 240 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 242 242 - __blk_run_queue(q, false); 241 241 + __blk_run_queue(q); 243 242 } 244 243 EXPORT_SYMBOL(blk_start_queue); 245 244 ··· 297 296 * Description: 298 297 * See @blk_run_queue. This variant must be called with the queue lock 299 298 * held and interrupts disabled. 300 300 - * 301 299 */ 302 302 - void __blk_run_queue(struct request_queue *q, bool force_kblockd) 300 300 + void __blk_run_queue(struct request_queue *q) 303 301 { 304 302 if (unlikely(blk_queue_stopped(q))) 305 303 return; ··· 307 307 * Only recurse once to avoid overrunning the stack, let the unplug 308 308 * handling reinvoke the handler shortly if we already got there. 309 309 */ 310 310 - if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 310 310 + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 311 311 q->request_fn(q); 312 312 queue_flag_clear(QUEUE_FLAG_REENTER, q); 313 313 } else 314 314 queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); 315 315 } 316 316 EXPORT_SYMBOL(__blk_run_queue); 317 317 + 318 318 + /** 319 319 + * blk_run_queue_async - run a single device queue in workqueue context 320 320 + * @q: The queue to run 321 321 + * 322 322 + * Description: 323 323 + * Tells kblockd to perform the equivalent of @blk_run_queue on behalf 324 324 + * of us. 325 325 + */ 326 326 + void blk_run_queue_async(struct request_queue *q) 327 327 + { 328 328 + if (likely(!blk_queue_stopped(q))) 329 329 + queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); 330 330 + } 317 331 318 332 /** 319 333 * blk_run_queue - run a single device queue ··· 342 328 unsigned long flags; 343 329 344 330 spin_lock_irqsave(q->queue_lock, flags); 345 345 - __blk_run_queue(q, false); 331 331 + __blk_run_queue(q); 346 332 spin_unlock_irqrestore(q->queue_lock, flags); 347 333 } 348 334 EXPORT_SYMBOL(blk_run_queue); ··· 991 977 blk_queue_end_tag(q, rq); 992 978 993 979 add_acct_request(q, rq, where); 994 994 - __blk_run_queue(q, false); 980 980 + __blk_run_queue(q); 995 981 spin_unlock_irqrestore(q->queue_lock, flags); 996 982 } 997 983 EXPORT_SYMBOL(blk_insert_request); ··· 1335 1321 } else { 1336 1322 spin_lock_irq(q->queue_lock); 1337 1323 add_acct_request(q, req, where); 1338 1338 - __blk_run_queue(q, false); 1324 1324 + __blk_run_queue(q); 1339 1325 out_unlock: 1340 1326 spin_unlock_irq(q->queue_lock); 1341 1327 } ··· 2652 2638 2653 2639 plug->magic = PLUG_MAGIC; 2654 2640 INIT_LIST_HEAD(&plug->list); 2641 2641 + INIT_LIST_HEAD(&plug->cb_list); 2655 2642 plug->should_sort = 0; 2656 2643 2657 2644 /* ··· 2685 2670 */ 2686 2671 static void queue_unplugged(struct request_queue *q, unsigned int depth, 2687 2672 bool from_schedule) 2673 2673 + __releases(q->queue_lock) 2688 2674 { 2689 2675 trace_block_unplug(q, depth, !from_schedule); 2690 2690 - __blk_run_queue(q, from_schedule); 2691 2676 2692 2692 - if (q->unplugged_fn) 2693 2693 - q->unplugged_fn(q); 2677 2677 + /* 2678 2678 + * If we are punting this to kblockd, then we can safely drop 2679 2679 + * the queue_lock before waking kblockd (which needs to take 2680 2680 + * this lock). 2681 2681 + */ 2682 2682 + if (from_schedule) { 2683 2683 + spin_unlock(q->queue_lock); 2684 2684 + blk_run_queue_async(q); 2685 2685 + } else { 2686 2686 + __blk_run_queue(q); 2687 2687 + spin_unlock(q->queue_lock); 2688 2688 + } 2689 2689 + 2690 2690 + } 2691 2691 + 2692 2692 + static void flush_plug_callbacks(struct blk_plug *plug) 2693 2693 + { 2694 2694 + LIST_HEAD(callbacks); 2695 2695 + 2696 2696 + if (list_empty(&plug->cb_list)) 2697 2697 + return; 2698 2698 + 2699 2699 + list_splice_init(&plug->cb_list, &callbacks); 2700 2700 + 2701 2701 + while (!list_empty(&callbacks)) { 2702 2702 + struct blk_plug_cb *cb = list_first_entry(&callbacks, 2703 2703 + struct blk_plug_cb, 2704 2704 + list); 2705 2705 + list_del(&cb->list); 2706 2706 + cb->callback(cb); 2707 2707 + } 2694 2708 } 2695 2709 2696 2710 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) ··· 2732 2688 2733 2689 BUG_ON(plug->magic != PLUG_MAGIC); 2734 2690 2691 2691 + flush_plug_callbacks(plug); 2735 2692 if (list_empty(&plug->list)) 2736 2693 return; 2737 2694 ··· 2757 2712 BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); 2758 2713 BUG_ON(!rq->q); 2759 2714 if (rq->q != q) { 2760 2760 - if (q) { 2715 2715 + /* 2716 2716 + * This drops the queue lock 2717 2717 + */ 2718 2718 + if (q) 2761 2719 queue_unplugged(q, depth, from_schedule); 2762 2762 - spin_unlock(q->queue_lock); 2763 2763 - } 2764 2720 q = rq->q; 2765 2721 depth = 0; 2766 2722 spin_lock(q->queue_lock); ··· 2779 2733 depth++; 2780 2734 } 2781 2735 2782 2782 - if (q) { 2736 2736 + /* 2737 2737 + * This drops the queue lock 2738 2738 + */ 2739 2739 + if (q) 2783 2740 queue_unplugged(q, depth, from_schedule); 2784 2784 - spin_unlock(q->queue_lock); 2785 2785 - } 2786 2741 2787 2742 local_irq_restore(flags); 2788 2743 }

+1 -1

block/blk-exec.c

reviewed

··· 55 55 WARN_ON(irqs_disabled()); 56 56 spin_lock_irq(q->queue_lock); 57 57 __elv_add_request(q, rq, where); 58 58 - __blk_run_queue(q, false); 58 58 + __blk_run_queue(q); 59 59 /* the queue is stopped so it won't be plugged+unplugged */ 60 60 if (rq->cmd_type == REQ_TYPE_PM_RESUME) 61 61 q->request_fn(q);

+2 -2

block/blk-flush.c

reviewed

··· 218 218 * request_fn may confuse the driver. Always use kblockd. 219 219 */ 220 220 if (queued) 221 221 - __blk_run_queue(q, true); 221 221 + blk_run_queue_async(q); 222 222 } 223 223 224 224 /** ··· 274 274 * the comment in flush_end_io(). 275 275 */ 276 276 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) 277 277 - __blk_run_queue(q, true); 277 277 + blk_run_queue_async(q); 278 278 } 279 279 280 280 /**

-16

block/blk-settings.c

reviewed

··· 790 790 } 791 791 EXPORT_SYMBOL_GPL(blk_queue_flush); 792 792 793 793 - /** 794 794 - * blk_queue_unplugged - register a callback for an unplug event 795 795 - * @q: the request queue for the device 796 796 - * @fn: the function to call 797 797 - * 798 798 - * Some stacked drivers may need to know when IO is dispatched on an 799 799 - * unplug event. By registrering a callback here, they will be notified 800 800 - * when someone flushes their on-stack queue plug. The function will be 801 801 - * called with the queue lock held. 802 802 - */ 803 803 - void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn) 804 804 - { 805 805 - q->unplugged_fn = fn; 806 806 - } 807 807 - EXPORT_SYMBOL(blk_queue_unplugged); 808 808 - 809 793 static int __init blk_settings_init(void) 810 794 { 811 795 blk_max_low_pfn = max_low_pfn - 1;

block/blk.h

reviewed

··· 22 22 void blk_delete_timer(struct request *); 23 23 void blk_add_timer(struct request *); 24 24 void __generic_unplug_device(struct request_queue *); 25 25 + void blk_run_queue_async(struct request_queue *q); 25 26 26 27 /* 27 28 * Internal atomic flags for request handling

+3 -3

block/cfq-iosched.c

reviewed

··· 3368 3368 cfqd->busy_queues > 1) { 3369 3369 cfq_del_timer(cfqd, cfqq); 3370 3370 cfq_clear_cfqq_wait_request(cfqq); 3371 3371 - __blk_run_queue(cfqd->queue, false); 3371 3371 + __blk_run_queue(cfqd->queue); 3372 3372 } else { 3373 3373 cfq_blkiocg_update_idle_time_stats( 3374 3374 &cfqq->cfqg->blkg); ··· 3383 3383 * this new queue is RT and the current one is BE 3384 3384 */ 3385 3385 cfq_preempt_queue(cfqd, cfqq); 3386 3386 - __blk_run_queue(cfqd->queue, false); 3386 3386 + __blk_run_queue(cfqd->queue); 3387 3387 } 3388 3388 } 3389 3389 ··· 3743 3743 struct request_queue *q = cfqd->queue; 3744 3744 3745 3745 spin_lock_irq(q->queue_lock); 3746 3746 - __blk_run_queue(cfqd->queue, false); 3746 3746 + __blk_run_queue(cfqd->queue); 3747 3747 spin_unlock_irq(q->queue_lock); 3748 3748 } 3749 3749

+2 -2

block/elevator.c

reviewed

··· 642 642 */ 643 643 elv_drain_elevator(q); 644 644 while (q->rq.elvpriv) { 645 645 - __blk_run_queue(q, false); 645 645 + __blk_run_queue(q); 646 646 spin_unlock_irq(q->queue_lock); 647 647 msleep(10); 648 648 spin_lock_irq(q->queue_lock); ··· 695 695 * with anything. There's no point in delaying queue 696 696 * processing. 697 697 */ 698 698 - __blk_run_queue(q, false); 698 698 + __blk_run_queue(q); 699 699 break; 700 700 701 701 case ELEVATOR_INSERT_SORT_MERGE:

-8

drivers/md/dm-raid.c

reviewed

··· 390 390 return md_raid5_congested(&rs->md, bits); 391 391 } 392 392 393 393 - static void raid_unplug(struct dm_target_callbacks *cb) 394 394 - { 395 395 - struct raid_set *rs = container_of(cb, struct raid_set, callbacks); 396 396 - 397 397 - md_raid5_kick_device(rs->md.private); 398 398 - } 399 399 - 400 393 /* 401 394 * Construct a RAID4/5/6 mapping: 402 395 * Args: ··· 480 487 } 481 488 482 489 rs->callbacks.congested_fn = raid_is_congested; 483 483 - rs->callbacks.unplug_fn = raid_unplug; 484 490 dm_table_add_target_callbacks(ti->table, &rs->callbacks); 485 491 486 492 return 0;

+46 -41

drivers/md/md.c

reviewed

··· 447 447 448 448 /* Support for plugging. 449 449 * This mirrors the plugging support in request_queue, but does not 450 450 - * require having a whole queue 450 450 + * require having a whole queue or request structures. 451 451 + * We allocate an md_plug_cb for each md device and each thread it gets 452 452 + * plugged on. This links tot the private plug_handle structure in the 453 453 + * personality data where we keep a count of the number of outstanding 454 454 + * plugs so other code can see if a plug is active. 451 455 */ 452 452 - static void plugger_work(struct work_struct *work) 453 453 - { 454 454 - struct plug_handle *plug = 455 455 - container_of(work, struct plug_handle, unplug_work); 456 456 - plug->unplug_fn(plug); 457 457 - } 458 458 - static void plugger_timeout(unsigned long data) 459 459 - { 460 460 - struct plug_handle *plug = (void *)data; 461 461 - kblockd_schedule_work(NULL, &plug->unplug_work); 462 462 - } 463 463 - void plugger_init(struct plug_handle *plug, 464 464 - void (*unplug_fn)(struct plug_handle *)) 465 465 - { 466 466 - plug->unplug_flag = 0; 467 467 - plug->unplug_fn = unplug_fn; 468 468 - init_timer(&plug->unplug_timer); 469 469 - plug->unplug_timer.function = plugger_timeout; 470 470 - plug->unplug_timer.data = (unsigned long)plug; 471 471 - INIT_WORK(&plug->unplug_work, plugger_work); 472 472 - } 473 473 - EXPORT_SYMBOL_GPL(plugger_init); 456 456 + struct md_plug_cb { 457 457 + struct blk_plug_cb cb; 458 458 + mddev_t *mddev; 459 459 + }; 474 460 475 475 - void plugger_set_plug(struct plug_handle *plug) 461 461 + static void plugger_unplug(struct blk_plug_cb *cb) 476 462 { 477 477 - if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag)) 478 478 - mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1); 463 463 + struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb); 464 464 + if (atomic_dec_and_test(&mdcb->mddev->plug_cnt)) 465 465 + md_wakeup_thread(mdcb->mddev->thread); 466 466 + kfree(mdcb); 479 467 } 480 480 - EXPORT_SYMBOL_GPL(plugger_set_plug); 481 468 482 482 - int plugger_remove_plug(struct plug_handle *plug) 469 469 + /* Check that an unplug wakeup will come shortly. 470 470 + * If not, wakeup the md thread immediately 471 471 + */ 472 472 + int mddev_check_plugged(mddev_t *mddev) 483 473 { 484 484 - if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) { 485 485 - del_timer(&plug->unplug_timer); 486 486 - return 1; 487 487 - } else 474 474 + struct blk_plug *plug = current->plug; 475 475 + struct md_plug_cb *mdcb; 476 476 + 477 477 + if (!plug) 488 478 return 0; 489 489 - } 490 490 - EXPORT_SYMBOL_GPL(plugger_remove_plug); 491 479 480 480 + list_for_each_entry(mdcb, &plug->cb_list, cb.list) { 481 481 + if (mdcb->cb.callback == plugger_unplug && 482 482 + mdcb->mddev == mddev) { 483 483 + /* Already on the list, move to top */ 484 484 + if (mdcb != list_first_entry(&plug->cb_list, 485 485 + struct md_plug_cb, 486 486 + cb.list)) 487 487 + list_move(&mdcb->cb.list, &plug->cb_list); 488 488 + return 1; 489 489 + } 490 490 + } 491 491 + /* Not currently on the callback list */ 492 492 + mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC); 493 493 + if (!mdcb) 494 494 + return 0; 495 495 + 496 496 + mdcb->mddev = mddev; 497 497 + mdcb->cb.callback = plugger_unplug; 498 498 + atomic_inc(&mddev->plug_cnt); 499 499 + list_add(&mdcb->cb.list, &plug->cb_list); 500 500 + return 1; 501 501 + } 502 502 + EXPORT_SYMBOL_GPL(mddev_check_plugged); 492 503 493 504 static inline mddev_t *mddev_get(mddev_t *mddev) 494 505 { ··· 549 538 atomic_set(&mddev->active, 1); 550 539 atomic_set(&mddev->openers, 0); 551 540 atomic_set(&mddev->active_io, 0); 541 541 + atomic_set(&mddev->plug_cnt, 0); 552 542 spin_lock_init(&mddev->write_lock); 553 543 atomic_set(&mddev->flush_pending, 0); 554 544 init_waitqueue_head(&mddev->sb_wait); ··· 4735 4723 mddev->bitmap_info.chunksize = 0; 4736 4724 mddev->bitmap_info.daemon_sleep = 0; 4737 4725 mddev->bitmap_info.max_write_behind = 0; 4738 4738 - mddev->plug = NULL; 4739 4726 } 4740 4727 4741 4728 static void __md_stop_writes(mddev_t *mddev) ··· 6698 6687 return 0; 6699 6688 } 6700 6689 EXPORT_SYMBOL_GPL(md_allow_write); 6701 6701 - 6702 6702 - void md_unplug(mddev_t *mddev) 6703 6703 - { 6704 6704 - if (mddev->plug) 6705 6705 - mddev->plug->unplug_fn(mddev->plug); 6706 6706 - } 6707 6690 6708 6691 #define SYNC_MARKS 10 6709 6692 #define SYNC_MARK_STEP (3*HZ)

+4 -22

drivers/md/md.h

reviewed

··· 29 29 typedef struct mddev_s mddev_t; 30 30 typedef struct mdk_rdev_s mdk_rdev_t; 31 31 32 32 - /* generic plugging support - like that provided with request_queue, 33 33 - * but does not require a request_queue 34 34 - */ 35 35 - struct plug_handle { 36 36 - void (*unplug_fn)(struct plug_handle *); 37 37 - struct timer_list unplug_timer; 38 38 - struct work_struct unplug_work; 39 39 - unsigned long unplug_flag; 40 40 - }; 41 41 - #define PLUGGED_FLAG 1 42 42 - void plugger_init(struct plug_handle *plug, 43 43 - void (*unplug_fn)(struct plug_handle *)); 44 44 - void plugger_set_plug(struct plug_handle *plug); 45 45 - int plugger_remove_plug(struct plug_handle *plug); 46 46 - static inline void plugger_flush(struct plug_handle *plug) 47 47 - { 48 48 - del_timer_sync(&plug->unplug_timer); 49 49 - cancel_work_sync(&plug->unplug_work); 50 50 - } 51 51 - 52 32 /* 53 33 * MD's 'extended' device 54 34 */ ··· 179 199 int delta_disks, new_level, new_layout; 180 200 int new_chunk_sectors; 181 201 202 202 + atomic_t plug_cnt; /* If device is expecting 203 203 + * more bios soon. 204 204 + */ 182 205 struct mdk_thread_s *thread; /* management thread */ 183 206 struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ 184 207 sector_t curr_resync; /* last block scheduled */ ··· 319 336 struct list_head all_mddevs; 320 337 321 338 struct attribute_group *to_remove; 322 322 - struct plug_handle *plug; /* if used by personality */ 323 339 324 340 struct bio_set *bio_set; 325 341 ··· 498 516 extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); 499 517 extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); 500 518 extern void restore_bitmap_write_access(struct file *file); 501 501 - extern void md_unplug(mddev_t *mddev); 502 519 503 520 extern void mddev_init(mddev_t *mddev); 504 521 extern int md_run(mddev_t *mddev); ··· 511 530 mddev_t *mddev); 512 531 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, 513 532 mddev_t *mddev); 533 533 + extern int mddev_check_plugged(mddev_t *mddev); 514 534 #endif /* _MD_MD_H */

+14 -15

drivers/md/raid1.c

reviewed

··· 565 565 spin_unlock_irq(&conf->device_lock); 566 566 } 567 567 568 568 - static void md_kick_device(mddev_t *mddev) 569 569 - { 570 570 - blk_flush_plug(current); 571 571 - md_wakeup_thread(mddev->thread); 572 572 - } 573 573 - 574 568 /* Barriers.... 575 569 * Sometimes we need to suspend IO while we do something else, 576 570 * either some resync/recovery, or reconfigure the array. ··· 594 600 595 601 /* Wait until no block IO is waiting */ 596 602 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, 597 597 - conf->resync_lock, md_kick_device(conf->mddev)); 603 603 + conf->resync_lock, ); 598 604 599 605 /* block any new IO from starting */ 600 606 conf->barrier++; ··· 602 608 /* Now wait for all pending IO to complete */ 603 609 wait_event_lock_irq(conf->wait_barrier, 604 610 !conf->nr_pending && conf->barrier < RESYNC_DEPTH, 605 605 - conf->resync_lock, md_kick_device(conf->mddev)); 611 611 + conf->resync_lock, ); 606 612 607 613 spin_unlock_irq(&conf->resync_lock); 608 614 } ··· 624 630 conf->nr_waiting++; 625 631 wait_event_lock_irq(conf->wait_barrier, !conf->barrier, 626 632 conf->resync_lock, 627 627 - md_kick_device(conf->mddev)); 633 633 + ); 628 634 conf->nr_waiting--; 629 635 } 630 636 conf->nr_pending++; ··· 660 666 wait_event_lock_irq(conf->wait_barrier, 661 667 conf->nr_pending == conf->nr_queued+1, 662 668 conf->resync_lock, 663 663 - ({ flush_pending_writes(conf); 664 664 - md_kick_device(conf->mddev); })); 669 669 + flush_pending_writes(conf)); 665 670 spin_unlock_irq(&conf->resync_lock); 666 671 } 667 672 static void unfreeze_array(conf_t *conf) ··· 722 729 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 723 730 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); 724 731 mdk_rdev_t *blocked_rdev; 732 732 + int plugged; 725 733 726 734 /* 727 735 * Register the new request and wait if the reconstruction ··· 814 820 * inc refcount on their rdev. Record them by setting 815 821 * bios[x] to bio 816 822 */ 823 823 + plugged = mddev_check_plugged(mddev); 824 824 + 817 825 disks = conf->raid_disks; 818 826 retry_write: 819 827 blocked_rdev = NULL; ··· 921 925 /* In case raid1d snuck in to freeze_array */ 922 926 wake_up(&conf->wait_barrier); 923 927 924 924 - if (do_sync || !bitmap) 928 928 + if (do_sync || !bitmap || !plugged) 925 929 md_wakeup_thread(mddev->thread); 926 930 927 931 return 0; ··· 1512 1516 conf_t *conf = mddev->private; 1513 1517 struct list_head *head = &conf->retry_list; 1514 1518 mdk_rdev_t *rdev; 1519 1519 + struct blk_plug plug; 1515 1520 1516 1521 md_check_recovery(mddev); 1517 1517 - 1522 1522 + 1523 1523 + blk_start_plug(&plug); 1518 1524 for (;;) { 1519 1525 char b[BDEVNAME_SIZE]; 1520 1526 1521 1521 - flush_pending_writes(conf); 1527 1527 + if (atomic_read(&mddev->plug_cnt) == 0) 1528 1528 + flush_pending_writes(conf); 1522 1529 1523 1530 spin_lock_irqsave(&conf->device_lock, flags); 1524 1531 if (list_empty(head)) { ··· 1592 1593 } 1593 1594 cond_resched(); 1594 1595 } 1596 1596 + blk_finish_plug(&plug); 1595 1597 } 1596 1598 1597 1599 ··· 2039 2039 2040 2040 md_unregister_thread(mddev->thread); 2041 2041 mddev->thread = NULL; 2042 2042 - blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 2043 2042 if (conf->r1bio_pool) 2044 2043 mempool_destroy(conf->r1bio_pool); 2045 2044 kfree(conf->mirrors);

+13 -14

drivers/md/raid10.c

reviewed

··· 634 634 spin_unlock_irq(&conf->device_lock); 635 635 } 636 636 637 637 - static void md_kick_device(mddev_t *mddev) 638 638 - { 639 639 - blk_flush_plug(current); 640 640 - md_wakeup_thread(mddev->thread); 641 641 - } 642 642 - 643 637 /* Barriers.... 644 638 * Sometimes we need to suspend IO while we do something else, 645 639 * either some resync/recovery, or reconfigure the array. ··· 663 669 664 670 /* Wait until no block IO is waiting (unless 'force') */ 665 671 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, 666 666 - conf->resync_lock, md_kick_device(conf->mddev)); 672 672 + conf->resync_lock, ); 667 673 668 674 /* block any new IO from starting */ 669 675 conf->barrier++; 670 676 671 671 - /* No wait for all pending IO to complete */ 677 677 + /* Now wait for all pending IO to complete */ 672 678 wait_event_lock_irq(conf->wait_barrier, 673 679 !conf->nr_pending && conf->barrier < RESYNC_DEPTH, 674 674 - conf->resync_lock, md_kick_device(conf->mddev)); 680 680 + conf->resync_lock, ); 675 681 676 682 spin_unlock_irq(&conf->resync_lock); 677 683 } ··· 692 698 conf->nr_waiting++; 693 699 wait_event_lock_irq(conf->wait_barrier, !conf->barrier, 694 700 conf->resync_lock, 695 695 - md_kick_device(conf->mddev)); 701 701 + ); 696 702 conf->nr_waiting--; 697 703 } 698 704 conf->nr_pending++; ··· 728 734 wait_event_lock_irq(conf->wait_barrier, 729 735 conf->nr_pending == conf->nr_queued+1, 730 736 conf->resync_lock, 731 731 - ({ flush_pending_writes(conf); 732 732 - md_kick_device(conf->mddev); })); 737 737 + flush_pending_writes(conf)); 738 738 + 733 739 spin_unlock_irq(&conf->resync_lock); 734 740 } 735 741 ··· 756 762 const unsigned long do_fua = (bio->bi_rw & REQ_FUA); 757 763 unsigned long flags; 758 764 mdk_rdev_t *blocked_rdev; 765 765 + int plugged; 759 766 760 767 if (unlikely(bio->bi_rw & REQ_FLUSH)) { 761 768 md_flush_request(mddev, bio); ··· 865 870 * inc refcount on their rdev. Record them by setting 866 871 * bios[x] to bio 867 872 */ 873 873 + plugged = mddev_check_plugged(mddev); 874 874 + 868 875 raid10_find_phys(conf, r10_bio); 869 876 retry_write: 870 877 blocked_rdev = NULL; ··· 943 946 /* In case raid10d snuck in to freeze_array */ 944 947 wake_up(&conf->wait_barrier); 945 948 946 946 - if (do_sync || !mddev->bitmap) 949 949 + if (do_sync || !mddev->bitmap || !plugged) 947 950 md_wakeup_thread(mddev->thread); 948 948 - 949 951 return 0; 950 952 } 951 953 ··· 1636 1640 conf_t *conf = mddev->private; 1637 1641 struct list_head *head = &conf->retry_list; 1638 1642 mdk_rdev_t *rdev; 1643 1643 + struct blk_plug plug; 1639 1644 1640 1645 md_check_recovery(mddev); 1641 1646 1647 1647 + blk_start_plug(&plug); 1642 1648 for (;;) { 1643 1649 char b[BDEVNAME_SIZE]; 1644 1650 ··· 1714 1716 } 1715 1717 cond_resched(); 1716 1718 } 1719 1719 + blk_finish_plug(&plug); 1717 1720 } 1718 1721 1719 1722

+26 -35

drivers/md/raid5.c

reviewed

··· 27 27 * 28 28 * We group bitmap updates into batches. Each batch has a number. 29 29 * We may write out several batches at once, but that isn't very important. 30 30 - * conf->bm_write is the number of the last batch successfully written. 31 31 - * conf->bm_flush is the number of the last batch that was closed to 30 30 + * conf->seq_write is the number of the last batch successfully written. 31 31 + * conf->seq_flush is the number of the last batch that was closed to 32 32 * new additions. 33 33 * When we discover that we will need to write to any block in a stripe 34 34 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq 35 35 - * the number of the batch it will be in. This is bm_flush+1. 35 35 + * the number of the batch it will be in. This is seq_flush+1. 36 36 * When we are ready to do a write, if that batch hasn't been written yet, 37 37 * we plug the array and queue the stripe for later. 38 38 * When an unplug happens, we increment bm_flush, thus closing the current ··· 199 199 BUG_ON(!list_empty(&sh->lru)); 200 200 BUG_ON(atomic_read(&conf->active_stripes)==0); 201 201 if (test_bit(STRIPE_HANDLE, &sh->state)) { 202 202 - if (test_bit(STRIPE_DELAYED, &sh->state)) { 202 202 + if (test_bit(STRIPE_DELAYED, &sh->state)) 203 203 list_add_tail(&sh->lru, &conf->delayed_list); 204 204 - plugger_set_plug(&conf->plug); 205 205 - } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 206 206 - sh->bm_seq - conf->seq_write > 0) { 204 204 + else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 205 205 + sh->bm_seq - conf->seq_write > 0) 207 206 list_add_tail(&sh->lru, &conf->bitmap_list); 208 208 - plugger_set_plug(&conf->plug); 209 209 - } else { 207 207 + else { 210 208 clear_bit(STRIPE_BIT_DELAY, &sh->state); 211 209 list_add_tail(&sh->lru, &conf->handle_list); 212 210 } ··· 459 461 < (conf->max_nr_stripes *3/4) 460 462 || !conf->inactive_blocked), 461 463 conf->device_lock, 462 462 - md_raid5_kick_device(conf)); 464 464 + ); 463 465 conf->inactive_blocked = 0; 464 466 } else 465 467 init_stripe(sh, sector, previous); ··· 1468 1470 wait_event_lock_irq(conf->wait_for_stripe, 1469 1471 !list_empty(&conf->inactive_list), 1470 1472 conf->device_lock, 1471 1471 - blk_flush_plug(current)); 1473 1473 + ); 1472 1474 osh = get_free_stripe(conf); 1473 1475 spin_unlock_irq(&conf->device_lock); 1474 1476 atomic_set(&nsh->count, 1); ··· 3621 3623 atomic_inc(&conf->preread_active_stripes); 3622 3624 list_add_tail(&sh->lru, &conf->hold_list); 3623 3625 } 3624 3624 - } else 3625 3625 - plugger_set_plug(&conf->plug); 3626 3626 + } 3626 3627 } 3627 3628 3628 3629 static void activate_bit_delay(raid5_conf_t *conf) ··· 3636 3639 atomic_inc(&sh->count); 3637 3640 __release_stripe(conf, sh); 3638 3641 } 3639 3639 - } 3640 3640 - 3641 3641 - void md_raid5_kick_device(raid5_conf_t *conf) 3642 3642 - { 3643 3643 - blk_flush_plug(current); 3644 3644 - raid5_activate_delayed(conf); 3645 3645 - md_wakeup_thread(conf->mddev->thread); 3646 3646 - } 3647 3647 - EXPORT_SYMBOL_GPL(md_raid5_kick_device); 3648 3648 - 3649 3649 - static void raid5_unplug(struct plug_handle *plug) 3650 3650 - { 3651 3651 - raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); 3652 3652 - 3653 3653 - md_raid5_kick_device(conf); 3654 3642 } 3655 3643 3656 3644 int md_raid5_congested(mddev_t *mddev, int bits) ··· 3927 3945 struct stripe_head *sh; 3928 3946 const int rw = bio_data_dir(bi); 3929 3947 int remaining; 3948 3948 + int plugged; 3930 3949 3931 3950 if (unlikely(bi->bi_rw & REQ_FLUSH)) { 3932 3951 md_flush_request(mddev, bi); ··· 3946 3963 bi->bi_next = NULL; 3947 3964 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 3948 3965 3966 3966 + plugged = mddev_check_plugged(mddev); 3949 3967 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 3950 3968 DEFINE_WAIT(w); 3951 3969 int disks, data_disks; ··· 4041 4057 * add failed due to overlap. Flush everything 4042 4058 * and wait a while 4043 4059 */ 4044 4044 - md_raid5_kick_device(conf); 4060 4060 + md_wakeup_thread(mddev->thread); 4045 4061 release_stripe(sh); 4046 4062 schedule(); 4047 4063 goto retry; ··· 4061 4077 } 4062 4078 4063 4079 } 4080 4080 + if (!plugged) 4081 4081 + md_wakeup_thread(mddev->thread); 4082 4082 + 4064 4083 spin_lock_irq(&conf->device_lock); 4065 4084 remaining = raid5_dec_bi_phys_segments(bi); 4066 4085 spin_unlock_irq(&conf->device_lock); ··· 4465 4478 struct stripe_head *sh; 4466 4479 raid5_conf_t *conf = mddev->private; 4467 4480 int handled; 4481 4481 + struct blk_plug plug; 4468 4482 4469 4483 pr_debug("+++ raid5d active\n"); 4470 4484 4471 4485 md_check_recovery(mddev); 4472 4486 4487 4487 + blk_start_plug(&plug); 4473 4488 handled = 0; 4474 4489 spin_lock_irq(&conf->device_lock); 4475 4490 while (1) { 4476 4491 struct bio *bio; 4477 4492 4478 4478 - if (conf->seq_flush != conf->seq_write) { 4479 4479 - int seq = conf->seq_flush; 4493 4493 + if (atomic_read(&mddev->plug_cnt) == 0 && 4494 4494 + !list_empty(&conf->bitmap_list)) { 4495 4495 + /* Now is a good time to flush some bitmap updates */ 4496 4496 + conf->seq_flush++; 4480 4497 spin_unlock_irq(&conf->device_lock); 4481 4498 bitmap_unplug(mddev->bitmap); 4482 4499 spin_lock_irq(&conf->device_lock); 4483 4483 - conf->seq_write = seq; 4500 4500 + conf->seq_write = conf->seq_flush; 4484 4501 activate_bit_delay(conf); 4485 4502 } 4503 4503 + if (atomic_read(&mddev->plug_cnt) == 0) 4504 4504 + raid5_activate_delayed(conf); 4486 4505 4487 4506 while ((bio = remove_bio_from_retry(conf))) { 4488 4507 int ok; ··· 4518 4525 spin_unlock_irq(&conf->device_lock); 4519 4526 4520 4527 async_tx_issue_pending_all(); 4528 4528 + blk_finish_plug(&plug); 4521 4529 4522 4530 pr_debug("--- raid5d inactive\n"); 4523 4531 } ··· 5135 5141 mdname(mddev)); 5136 5142 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 5137 5143 5138 5138 - plugger_init(&conf->plug, raid5_unplug); 5139 5139 - mddev->plug = &conf->plug; 5140 5144 if (mddev->queue) { 5141 5145 int chunk_size; 5142 5146 /* read-ahead size must cover two whole stripes, which ··· 5184 5192 mddev->thread = NULL; 5185 5193 if (mddev->queue) 5186 5194 mddev->queue->backing_dev_info.congested_fn = NULL; 5187 5187 - plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ 5188 5195 free_conf(conf); 5189 5196 mddev->private = NULL; 5190 5197 mddev->to_remove = &raid5_attrs_group;

-2

drivers/md/raid5.h

reviewed

··· 400 400 * Cleared when a sync completes. 401 401 */ 402 402 403 403 - struct plug_handle plug; 404 404 - 405 403 /* per cpu variables */ 406 404 struct raid5_percpu { 407 405 struct page *spare_page; /* Used when checking P/Q in raid6 */

+1 -1

drivers/scsi/scsi_lib.c

reviewed

··· 443 443 &sdev->request_queue->queue_flags); 444 444 if (flagset) 445 445 queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); 446 446 - __blk_run_queue(sdev->request_queue, false); 446 446 + __blk_run_queue(sdev->request_queue); 447 447 if (flagset) 448 448 queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); 449 449 spin_unlock(sdev->request_queue->queue_lock);

+1 -1

drivers/scsi/scsi_transport_fc.c

reviewed

··· 3829 3829 !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); 3830 3830 if (flagset) 3831 3831 queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); 3832 3832 - __blk_run_queue(rport->rqst_q, false); 3832 3832 + __blk_run_queue(rport->rqst_q); 3833 3833 if (flagset) 3834 3834 queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); 3835 3835 spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);

+7 -5

include/linux/blkdev.h

reviewed

··· 196 196 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 197 197 typedef int (prep_rq_fn) (struct request_queue *, struct request *); 198 198 typedef void (unprep_rq_fn) (struct request_queue *, struct request *); 199 199 - typedef void (unplugged_fn) (struct request_queue *); 200 199 201 200 struct bio_vec; 202 201 struct bvec_merge_data { ··· 283 284 rq_timed_out_fn *rq_timed_out_fn; 284 285 dma_drain_needed_fn *dma_drain_needed; 285 286 lld_busy_fn *lld_busy_fn; 286 286 - unplugged_fn *unplugged_fn; 287 287 288 288 /* 289 289 * Dispatch queue sorting ··· 697 699 extern void blk_stop_queue(struct request_queue *q); 698 700 extern void blk_sync_queue(struct request_queue *q); 699 701 extern void __blk_stop_queue(struct request_queue *q); 700 700 - extern void __blk_run_queue(struct request_queue *q, bool force_kblockd); 702 702 + extern void __blk_run_queue(struct request_queue *q); 701 703 extern void blk_run_queue(struct request_queue *); 702 704 extern int blk_rq_map_user(struct request_queue *, struct request *, 703 705 struct rq_map_data *, void __user *, unsigned long, ··· 841 843 extern void blk_queue_update_dma_alignment(struct request_queue *, int); 842 844 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 843 845 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); 844 844 - extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *); 845 846 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 846 847 extern void blk_queue_flush(struct request_queue *q, unsigned int flush); 847 848 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); ··· 857 860 struct blk_plug { 858 861 unsigned long magic; 859 862 struct list_head list; 863 863 + struct list_head cb_list; 860 864 unsigned int should_sort; 865 865 + }; 866 866 + struct blk_plug_cb { 867 867 + struct list_head list; 868 868 + void (*callback)(struct blk_plug_cb *); 861 869 }; 862 870 863 871 extern void blk_start_plug(struct blk_plug *); ··· 889 887 { 890 888 struct blk_plug *plug = tsk->plug; 891 889 892 892 - return plug && !list_empty(&plug->list); 890 890 + return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list)); 893 891 } 894 892 895 893 /*

-1

include/linux/device-mapper.h

reviewed

··· 197 197 struct dm_target_callbacks { 198 198 struct list_head list; 199 199 int (*congested_fn) (struct dm_target_callbacks *, int); 200 200 - void (*unplug_fn)(struct dm_target_callbacks *); 201 200 }; 202 201 203 202 int dm_register_target(struct target_type *t);