Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD fixes from Shaohua Li:
"This fixes several bugs, three of them are marked for stable:

- an initialization issue fixed by Ming

- a bio clone race issue fixed by me

- an async tx flush issue fixed by Ofer

- other cleanups"

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
MD: fix warnning for UP case
md/raid5: add thread_group worker async_tx_issue_pending_all
md: simplify code with bio_io_error
md/raid1: fix writebehind bio clone
md: raid1-10: move raid1/raid10 common code into raid1-10.c
md: raid1/raid10: initialize bvec table via bio_add_page()
md: remove 'idx' from 'struct resync_pages'

+115 -126
+1 -1
drivers/md/md.c
··· 2287 2287 2288 2288 static bool set_in_sync(struct mddev *mddev) 2289 2289 { 2290 - WARN_ON_ONCE(!spin_is_locked(&mddev->lock)); 2290 + WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock)); 2291 2291 if (!mddev->in_sync) { 2292 2292 mddev->sync_checkers++; 2293 2293 spin_unlock(&mddev->lock);
-54
drivers/md/md.h
··· 731 731 !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) 732 732 mddev->queue->limits.max_write_zeroes_sectors = 0; 733 733 } 734 - 735 - /* Maximum size of each resync request */ 736 - #define RESYNC_BLOCK_SIZE (64*1024) 737 - #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) 738 - 739 - /* for managing resync I/O pages */ 740 - struct resync_pages { 741 - unsigned idx; /* for get/put page from the pool */ 742 - void *raid_bio; 743 - struct page *pages[RESYNC_PAGES]; 744 - }; 745 - 746 - static inline int resync_alloc_pages(struct resync_pages *rp, 747 - gfp_t gfp_flags) 748 - { 749 - int i; 750 - 751 - for (i = 0; i < RESYNC_PAGES; i++) { 752 - rp->pages[i] = alloc_page(gfp_flags); 753 - if (!rp->pages[i]) 754 - goto out_free; 755 - } 756 - 757 - return 0; 758 - 759 - out_free: 760 - while (--i >= 0) 761 - put_page(rp->pages[i]); 762 - return -ENOMEM; 763 - } 764 - 765 - static inline void resync_free_pages(struct resync_pages *rp) 766 - { 767 - int i; 768 - 769 - for (i = 0; i < RESYNC_PAGES; i++) 770 - put_page(rp->pages[i]); 771 - } 772 - 773 - static inline void resync_get_all_pages(struct resync_pages *rp) 774 - { 775 - int i; 776 - 777 - for (i = 0; i < RESYNC_PAGES; i++) 778 - get_page(rp->pages[i]); 779 - } 780 - 781 - static inline struct page *resync_fetch_page(struct resync_pages *rp, 782 - unsigned idx) 783 - { 784 - if (WARN_ON_ONCE(idx >= RESYNC_PAGES)) 785 - return NULL; 786 - return rp->pages[idx]; 787 - } 788 734 #endif /* _MD_MD_H */
+81
drivers/md/raid1-10.c
··· 1 + /* Maximum size of each resync request */ 2 + #define RESYNC_BLOCK_SIZE (64*1024) 3 + #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) 4 + 5 + /* for managing resync I/O pages */ 6 + struct resync_pages { 7 + void *raid_bio; 8 + struct page *pages[RESYNC_PAGES]; 9 + }; 10 + 11 + static inline int resync_alloc_pages(struct resync_pages *rp, 12 + gfp_t gfp_flags) 13 + { 14 + int i; 15 + 16 + for (i = 0; i < RESYNC_PAGES; i++) { 17 + rp->pages[i] = alloc_page(gfp_flags); 18 + if (!rp->pages[i]) 19 + goto out_free; 20 + } 21 + 22 + return 0; 23 + 24 + out_free: 25 + while (--i >= 0) 26 + put_page(rp->pages[i]); 27 + return -ENOMEM; 28 + } 29 + 30 + static inline void resync_free_pages(struct resync_pages *rp) 31 + { 32 + int i; 33 + 34 + for (i = 0; i < RESYNC_PAGES; i++) 35 + put_page(rp->pages[i]); 36 + } 37 + 38 + static inline void resync_get_all_pages(struct resync_pages *rp) 39 + { 40 + int i; 41 + 42 + for (i = 0; i < RESYNC_PAGES; i++) 43 + get_page(rp->pages[i]); 44 + } 45 + 46 + static inline struct page *resync_fetch_page(struct resync_pages *rp, 47 + unsigned idx) 48 + { 49 + if (WARN_ON_ONCE(idx >= RESYNC_PAGES)) 50 + return NULL; 51 + return rp->pages[idx]; 52 + } 53 + 54 + /* 55 + * 'strct resync_pages' stores actual pages used for doing the resync 56 + * IO, and it is per-bio, so make .bi_private points to it. 57 + */ 58 + static inline struct resync_pages *get_resync_pages(struct bio *bio) 59 + { 60 + return bio->bi_private; 61 + } 62 + 63 + /* generally called after bio_reset() for reseting bvec */ 64 + static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp, 65 + int size) 66 + { 67 + int idx = 0; 68 + 69 + /* initialize bvec table again */ 70 + do { 71 + struct page *page = resync_fetch_page(rp, idx); 72 + int len = min_t(int, size, PAGE_SIZE); 73 + 74 + /* 75 + * won't fail because the vec table is big 76 + * enough to hold all these pages 77 + */ 78 + bio_add_page(bio, page, len, 0); 79 + size -= len; 80 + } while (idx++ < RESYNC_PAGES && size > 0); 81 + }
+20 -48
drivers/md/raid1.c
··· 81 81 #define raid1_log(md, fmt, args...) \ 82 82 do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) 83 83 84 - /* 85 - * 'strct resync_pages' stores actual pages used for doing the resync 86 - * IO, and it is per-bio, so make .bi_private points to it. 87 - */ 88 - static inline struct resync_pages *get_resync_pages(struct bio *bio) 89 - { 90 - return bio->bi_private; 91 - } 84 + #include "raid1-10.c" 92 85 93 86 /* 94 87 * for resync bio, r1bio pointer can be retrieved from the per-bio ··· 163 170 resync_get_all_pages(rp); 164 171 } 165 172 166 - rp->idx = 0; 167 173 rp->raid_bio = r1_bio; 168 174 bio->bi_private = rp; 169 175 } ··· 484 492 } 485 493 486 494 if (behind) { 487 - /* we release behind master bio when all write are done */ 488 - if (r1_bio->behind_master_bio == bio) 489 - to_put = NULL; 490 - 491 495 if (test_bit(WriteMostly, &rdev->flags)) 492 496 atomic_dec(&r1_bio->behind_remaining); 493 497 ··· 790 802 bio->bi_next = NULL; 791 803 bio->bi_bdev = rdev->bdev; 792 804 if (test_bit(Faulty, &rdev->flags)) { 793 - bio->bi_status = BLK_STS_IOERR; 794 - bio_endio(bio); 805 + bio_io_error(bio); 795 806 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 796 807 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 797 808 /* Just ignore it */ ··· 1075 1088 wake_up(&conf->wait_barrier); 1076 1089 } 1077 1090 1078 - static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, 1091 + static void alloc_behind_master_bio(struct r1bio *r1_bio, 1079 1092 struct bio *bio) 1080 1093 { 1081 1094 int size = bio->bi_iter.bi_size; ··· 1085 1098 1086 1099 behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev); 1087 1100 if (!behind_bio) 1088 - goto fail; 1101 + return; 1089 1102 1090 1103 /* discard op, we don't support writezero/writesame yet */ 1091 - if (!bio_has_data(bio)) 1104 + if (!bio_has_data(bio)) { 1105 + behind_bio->bi_iter.bi_size = size; 1092 1106 goto skip_copy; 1107 + } 1093 1108 1094 1109 while (i < vcnt && size) { 1095 1110 struct page *page; ··· 1112 1123 r1_bio->behind_master_bio = behind_bio;; 1113 1124 set_bit(R1BIO_BehindIO, &r1_bio->state); 1114 1125 1115 - return behind_bio; 1126 + return; 1116 1127 1117 1128 free_pages: 1118 1129 pr_debug("%dB behind alloc failed, doing sync I/O\n", 1119 1130 bio->bi_iter.bi_size); 1120 1131 bio_free_pages(behind_bio); 1121 - fail: 1122 - return behind_bio; 1132 + bio_put(behind_bio); 1123 1133 } 1124 1134 1125 1135 struct raid1_plug_cb { ··· 1471 1483 (atomic_read(&bitmap->behind_writes) 1472 1484 < mddev->bitmap_info.max_write_behind) && 1473 1485 !waitqueue_active(&bitmap->behind_wait)) { 1474 - mbio = alloc_behind_master_bio(r1_bio, bio); 1486 + alloc_behind_master_bio(r1_bio, bio); 1475 1487 } 1476 1488 1477 1489 bitmap_startwrite(bitmap, r1_bio->sector, ··· 1481 1493 first_clone = 0; 1482 1494 } 1483 1495 1484 - if (!mbio) { 1485 - if (r1_bio->behind_master_bio) 1486 - mbio = bio_clone_fast(r1_bio->behind_master_bio, 1487 - GFP_NOIO, 1488 - mddev->bio_set); 1489 - else 1490 - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); 1491 - } 1496 + if (r1_bio->behind_master_bio) 1497 + mbio = bio_clone_fast(r1_bio->behind_master_bio, 1498 + GFP_NOIO, mddev->bio_set); 1499 + else 1500 + mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); 1492 1501 1493 1502 if (r1_bio->behind_master_bio) { 1494 1503 if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) ··· 2071 2086 /* Fix variable parts of all bios */ 2072 2087 vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9); 2073 2088 for (i = 0; i < conf->raid_disks * 2; i++) { 2074 - int j; 2075 - int size; 2076 2089 blk_status_t status; 2077 - struct bio_vec *bi; 2078 2090 struct bio *b = r1_bio->bios[i]; 2079 2091 struct resync_pages *rp = get_resync_pages(b); 2080 2092 if (b->bi_end_io != end_sync_read) ··· 2080 2098 status = b->bi_status; 2081 2099 bio_reset(b); 2082 2100 b->bi_status = status; 2083 - b->bi_vcnt = vcnt; 2084 - b->bi_iter.bi_size = r1_bio->sectors << 9; 2085 2101 b->bi_iter.bi_sector = r1_bio->sector + 2086 2102 conf->mirrors[i].rdev->data_offset; 2087 2103 b->bi_bdev = conf->mirrors[i].rdev->bdev; ··· 2087 2107 rp->raid_bio = r1_bio; 2088 2108 b->bi_private = rp; 2089 2109 2090 - size = b->bi_iter.bi_size; 2091 - bio_for_each_segment_all(bi, b, j) { 2092 - bi->bv_offset = 0; 2093 - if (size > PAGE_SIZE) 2094 - bi->bv_len = PAGE_SIZE; 2095 - else 2096 - bi->bv_len = size; 2097 - size -= PAGE_SIZE; 2098 - } 2110 + /* initialize bvec table again */ 2111 + md_bio_reset_resync_pages(b, rp, r1_bio->sectors << 9); 2099 2112 } 2100 2113 for (primary = 0; primary < conf->raid_disks * 2; primary++) 2101 2114 if (r1_bio->bios[primary]->bi_end_io == end_sync_read && ··· 2339 2366 wbio = bio_clone_fast(r1_bio->behind_master_bio, 2340 2367 GFP_NOIO, 2341 2368 mddev->bio_set); 2342 - /* We really need a _all clone */ 2343 - wbio->bi_iter = (struct bvec_iter){ 0 }; 2344 2369 } else { 2345 2370 wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, 2346 2371 mddev->bio_set); ··· 2590 2619 int good_sectors = RESYNC_SECTORS; 2591 2620 int min_bad = 0; /* number of sectors that are bad in all devices */ 2592 2621 int idx = sector_to_idx(sector_nr); 2622 + int page_idx = 0; 2593 2623 2594 2624 if (!conf->r1buf_pool) 2595 2625 if (init_resync(conf)) ··· 2818 2846 bio = r1_bio->bios[i]; 2819 2847 rp = get_resync_pages(bio); 2820 2848 if (bio->bi_end_io) { 2821 - page = resync_fetch_page(rp, rp->idx++); 2849 + page = resync_fetch_page(rp, page_idx); 2822 2850 2823 2851 /* 2824 2852 * won't fail because the vec table is big ··· 2830 2858 nr_sectors += len>>9; 2831 2859 sector_nr += len>>9; 2832 2860 sync_blocks -= (len>>9); 2833 - } while (get_resync_pages(r1_bio->bios[disk]->bi_private)->idx < RESYNC_PAGES); 2861 + } while (++page_idx < RESYNC_PAGES); 2834 2862 2835 2863 r1_bio->sectors = nr_sectors; 2836 2864
+8 -17
drivers/md/raid10.c
··· 110 110 #define raid10_log(md, fmt, args...) \ 111 111 do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) 112 112 113 - /* 114 - * 'strct resync_pages' stores actual pages used for doing the resync 115 - * IO, and it is per-bio, so make .bi_private points to it. 116 - */ 117 - static inline struct resync_pages *get_resync_pages(struct bio *bio) 118 - { 119 - return bio->bi_private; 120 - } 113 + #include "raid1-10.c" 121 114 122 115 /* 123 116 * for resync bio, r10bio pointer can be retrieved from the per-bio ··· 214 221 resync_get_all_pages(rp); 215 222 } 216 223 217 - rp->idx = 0; 218 224 rp->raid_bio = r10_bio; 219 225 bio->bi_private = rp; 220 226 if (rbio) { ··· 905 913 bio->bi_next = NULL; 906 914 bio->bi_bdev = rdev->bdev; 907 915 if (test_bit(Faulty, &rdev->flags)) { 908 - bio->bi_status = BLK_STS_IOERR; 909 - bio_endio(bio); 916 + bio_io_error(bio); 910 917 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 911 918 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 912 919 /* Just ignore it */ ··· 1089 1098 bio->bi_next = NULL; 1090 1099 bio->bi_bdev = rdev->bdev; 1091 1100 if (test_bit(Faulty, &rdev->flags)) { 1092 - bio->bi_status = BLK_STS_IOERR; 1093 - bio_endio(bio); 1101 + bio_io_error(bio); 1094 1102 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 1095 1103 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 1096 1104 /* Just ignore it */ ··· 2077 2087 rp = get_resync_pages(tbio); 2078 2088 bio_reset(tbio); 2079 2089 2080 - tbio->bi_vcnt = vcnt; 2081 - tbio->bi_iter.bi_size = fbio->bi_iter.bi_size; 2090 + md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size); 2091 + 2082 2092 rp->raid_bio = r10_bio; 2083 2093 tbio->bi_private = rp; 2084 2094 tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; ··· 2843 2853 sector_t sectors_skipped = 0; 2844 2854 int chunks_skipped = 0; 2845 2855 sector_t chunk_mask = conf->geo.chunk_mask; 2856 + int page_idx = 0; 2846 2857 2847 2858 if (!conf->r10buf_pool) 2848 2859 if (init_resync(conf)) ··· 3346 3355 break; 3347 3356 for (bio= biolist ; bio ; bio=bio->bi_next) { 3348 3357 struct resync_pages *rp = get_resync_pages(bio); 3349 - page = resync_fetch_page(rp, rp->idx++); 3358 + page = resync_fetch_page(rp, page_idx); 3350 3359 /* 3351 3360 * won't fail because the vec table is big enough 3352 3361 * to hold all these pages ··· 3355 3364 } 3356 3365 nr_sectors += len>>9; 3357 3366 sector_nr += len>>9; 3358 - } while (get_resync_pages(biolist)->idx < RESYNC_PAGES); 3367 + } while (++page_idx < RESYNC_PAGES); 3359 3368 r10_bio->sectors = nr_sectors; 3360 3369 3361 3370 while (biolist) {
+5 -6
drivers/md/raid5.c
··· 3381 3381 sh->dev[i].sector + STRIPE_SECTORS) { 3382 3382 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 3383 3383 3384 - bi->bi_status = BLK_STS_IOERR; 3385 3384 md_write_end(conf->mddev); 3386 - bio_endio(bi); 3385 + bio_io_error(bi); 3387 3386 bi = nextbi; 3388 3387 } 3389 3388 if (bitmap_end) ··· 3402 3403 sh->dev[i].sector + STRIPE_SECTORS) { 3403 3404 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 3404 3405 3405 - bi->bi_status = BLK_STS_IOERR; 3406 3406 md_write_end(conf->mddev); 3407 - bio_endio(bi); 3407 + bio_io_error(bi); 3408 3408 bi = bi2; 3409 3409 } 3410 3410 ··· 3427 3429 struct bio *nextbi = 3428 3430 r5_next_bio(bi, sh->dev[i].sector); 3429 3431 3430 - bi->bi_status = BLK_STS_IOERR; 3431 - bio_endio(bi); 3432 + bio_io_error(bi); 3432 3433 bi = nextbi; 3433 3434 } 3434 3435 } ··· 6234 6237 pr_debug("%d stripes handled\n", handled); 6235 6238 6236 6239 spin_unlock_irq(&conf->device_lock); 6240 + 6241 + async_tx_issue_pending_all(); 6237 6242 blk_finish_plug(&plug); 6238 6243 6239 6244 pr_debug("--- raid5worker inactive\n");