Merge tag 'md/4.2-fixes' of git://neil.brown.name/md

+17 -11

drivers/md/bitmap.c

··· 494 494 bitmap_super_t *sb; 495 495 unsigned long chunksize, daemon_sleep, write_behind; 496 496 497 - bitmap->storage.sb_page = alloc_page(GFP_KERNEL); 497 + bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 498 498 if (bitmap->storage.sb_page == NULL) 499 499 return -ENOMEM; 500 500 bitmap->storage.sb_page->index = 0; ··· 541 541 sb->state = cpu_to_le32(bitmap->flags); 542 542 bitmap->events_cleared = bitmap->mddev->events; 543 543 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 544 + bitmap->mddev->bitmap_info.nodes = 0; 544 545 545 546 kunmap_atomic(sb); 546 547 ··· 559 558 unsigned long sectors_reserved = 0; 560 559 int err = -EINVAL; 561 560 struct page *sb_page; 561 + loff_t offset = bitmap->mddev->bitmap_info.offset; 562 562 563 563 if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { 564 564 chunksize = 128 * 1024 * 1024; ··· 586 584 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); 587 585 /* to 4k blocks */ 588 586 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); 589 - bitmap->mddev->bitmap_info.offset += bitmap->cluster_slot * (bm_blocks << 3); 587 + offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); 590 588 pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, 591 - bitmap->cluster_slot, (unsigned long long)bitmap->mddev->bitmap_info.offset); 589 + bitmap->cluster_slot, offset); 592 590 } 593 591 594 592 if (bitmap->storage.file) { ··· 599 597 bitmap, bytes, sb_page); 600 598 } else { 601 599 err = read_sb_page(bitmap->mddev, 602 - bitmap->mddev->bitmap_info.offset, 600 + offset, 603 601 sb_page, 604 602 0, sizeof(bitmap_super_t)); 605 603 } ··· 613 611 daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; 614 612 write_behind = le32_to_cpu(sb->write_behind); 615 613 sectors_reserved = le32_to_cpu(sb->sectors_reserved); 616 - nodes = le32_to_cpu(sb->nodes); 617 - strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); 614 + /* XXX: This is a hack to ensure that we don't use clustering 615 + * in case: 616 + * - dm-raid is in use and 617 + * - the nodes written in bitmap_sb is erroneous. 618 + */ 619 + if (!bitmap->mddev->sync_super) { 620 + nodes = le32_to_cpu(sb->nodes); 621 + strlcpy(bitmap->mddev->bitmap_info.cluster_name, 622 + sb->cluster_name, 64); 623 + } 618 624 619 625 /* verify that the bitmap-specific fields are valid */ 620 626 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) ··· 681 671 kunmap_atomic(sb); 682 672 /* Assiging chunksize is required for "re_read" */ 683 673 bitmap->mddev->bitmap_info.chunksize = chunksize; 684 - if (nodes && (bitmap->cluster_slot < 0)) { 674 + if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { 685 675 err = md_setup_cluster(bitmap->mddev, nodes); 686 676 if (err) { 687 677 pr_err("%s: Could not setup cluster service (%d)\n", ··· 1875 1865 1876 1866 if (IS_ERR(bitmap)) 1877 1867 return PTR_ERR(bitmap); 1878 - 1879 - rv = bitmap_read_sb(bitmap); 1880 - if (rv) 1881 - goto err; 1882 1868 1883 1869 rv = bitmap_init_from_disk(bitmap, 0); 1884 1870 if (rv)

+11 -1

drivers/md/md-cluster.c

··· 44 44 45 45 /* md_cluster_info flags */ 46 46 #define MD_CLUSTER_WAITING_FOR_NEWDISK 1 47 + #define MD_CLUSTER_SUSPEND_READ_BALANCING 2 47 48 48 49 49 50 struct md_cluster_info { ··· 276 275 277 276 static void recover_prep(void *arg) 278 277 { 278 + struct mddev *mddev = arg; 279 + struct md_cluster_info *cinfo = mddev->cluster_info; 280 + set_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state); 279 281 } 280 282 281 283 static void recover_slot(void *arg, struct dlm_slot *slot) ··· 311 307 312 308 cinfo->slot_number = our_slot; 313 309 complete(&cinfo->completion); 310 + clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state); 314 311 } 315 312 316 313 static const struct dlm_lockspace_ops md_ls_ops = { ··· 821 816 resync_send(mddev, RESYNCING, 0, 0); 822 817 } 823 818 824 - static int area_resyncing(struct mddev *mddev, sector_t lo, sector_t hi) 819 + static int area_resyncing(struct mddev *mddev, int direction, 820 + sector_t lo, sector_t hi) 825 821 { 826 822 struct md_cluster_info *cinfo = mddev->cluster_info; 827 823 int ret = 0; 828 824 struct suspend_info *s; 825 + 826 + if ((direction == READ) && 827 + test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state)) 828 + return 1; 829 829 830 830 spin_lock_irq(&cinfo->suspend_lock); 831 831 if (list_empty(&cinfo->suspend_list))

+1 -1

drivers/md/md-cluster.h

··· 18 18 int (*metadata_update_start)(struct mddev *mddev); 19 19 int (*metadata_update_finish)(struct mddev *mddev); 20 20 int (*metadata_update_cancel)(struct mddev *mddev); 21 - int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi); 21 + int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi); 22 22 int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); 23 23 int (*add_new_disk_finish)(struct mddev *mddev); 24 24 int (*new_disk_ack)(struct mddev *mddev, bool ack);

+3 -1

drivers/md/md.c

··· 5382 5382 { 5383 5383 struct md_personality *pers = mddev->pers; 5384 5384 mddev_detach(mddev); 5385 + /* Ensure ->event_work is done */ 5386 + flush_workqueue(md_misc_wq); 5385 5387 spin_lock(&mddev->lock); 5386 5388 mddev->ready = 0; 5387 5389 mddev->pers = NULL; ··· 7439 7437 err = request_module("md-cluster"); 7440 7438 if (err) { 7441 7439 pr_err("md-cluster module not found.\n"); 7442 - return err; 7440 + return -ENOENT; 7443 7441 } 7444 7442 7445 7443 spin_lock(&pers_lock);

+5 -4

drivers/md/raid1.c

··· 336 336 spin_lock_irqsave(&conf->device_lock, flags); 337 337 if (r1_bio->mddev->degraded == conf->raid_disks || 338 338 (r1_bio->mddev->degraded == conf->raid_disks-1 && 339 - !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))) 339 + test_bit(In_sync, &conf->mirrors[mirror].rdev->flags))) 340 340 uptodate = 1; 341 341 spin_unlock_irqrestore(&conf->device_lock, flags); 342 342 } ··· 541 541 542 542 if ((conf->mddev->recovery_cp < this_sector + sectors) || 543 543 (mddev_is_clustered(conf->mddev) && 544 - md_cluster_ops->area_resyncing(conf->mddev, this_sector, 544 + md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector, 545 545 this_sector + sectors))) 546 546 choose_first = 1; 547 547 else ··· 1111 1111 ((bio_end_sector(bio) > mddev->suspend_lo && 1112 1112 bio->bi_iter.bi_sector < mddev->suspend_hi) || 1113 1113 (mddev_is_clustered(mddev) && 1114 - md_cluster_ops->area_resyncing(mddev, bio->bi_iter.bi_sector, bio_end_sector(bio))))) { 1114 + md_cluster_ops->area_resyncing(mddev, WRITE, 1115 + bio->bi_iter.bi_sector, bio_end_sector(bio))))) { 1115 1116 /* As the suspend_* range is controlled by 1116 1117 * userspace, we want an interruptible 1117 1118 * wait. ··· 1125 1124 if (bio_end_sector(bio) <= mddev->suspend_lo || 1126 1125 bio->bi_iter.bi_sector >= mddev->suspend_hi || 1127 1126 (mddev_is_clustered(mddev) && 1128 - !md_cluster_ops->area_resyncing(mddev, 1127 + !md_cluster_ops->area_resyncing(mddev, WRITE, 1129 1128 bio->bi_iter.bi_sector, bio_end_sector(bio)))) 1130 1129 break; 1131 1130 schedule();

+4 -1

drivers/md/raid10.c

··· 3556 3556 /* far_copies must be 1 */ 3557 3557 conf->prev.stride = conf->dev_sectors; 3558 3558 } 3559 + conf->reshape_safe = conf->reshape_progress; 3559 3560 spin_lock_init(&conf->device_lock); 3560 3561 INIT_LIST_HEAD(&conf->retry_list); 3561 3562 ··· 3761 3760 } 3762 3761 conf->offset_diff = min_offset_diff; 3763 3762 3764 - conf->reshape_safe = conf->reshape_progress; 3765 3763 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 3766 3764 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 3767 3765 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); ··· 4103 4103 conf->reshape_progress = size; 4104 4104 } else 4105 4105 conf->reshape_progress = 0; 4106 + conf->reshape_safe = conf->reshape_progress; 4106 4107 spin_unlock_irq(&conf->device_lock); 4107 4108 4108 4109 if (mddev->delta_disks && mddev->bitmap) { ··· 4171 4170 rdev->new_data_offset = rdev->data_offset; 4172 4171 smp_wmb(); 4173 4172 conf->reshape_progress = MaxSector; 4173 + conf->reshape_safe = MaxSector; 4174 4174 mddev->reshape_position = MaxSector; 4175 4175 spin_unlock_irq(&conf->device_lock); 4176 4176 return ret; ··· 4526 4524 md_finish_reshape(conf->mddev); 4527 4525 smp_wmb(); 4528 4526 conf->reshape_progress = MaxSector; 4527 + conf->reshape_safe = MaxSector; 4529 4528 spin_unlock_irq(&conf->device_lock); 4530 4529 4531 4530 /* read-ahead size must cover two whole stripes, which is

+28 -7

drivers/md/raid5.c

··· 2162 2162 if (!sc) 2163 2163 return -ENOMEM; 2164 2164 2165 + /* Need to ensure auto-resizing doesn't interfere */ 2166 + mutex_lock(&conf->cache_size_mutex); 2167 + 2165 2168 for (i = conf->max_nr_stripes; i; i--) { 2166 2169 nsh = alloc_stripe(sc, GFP_KERNEL); 2167 2170 if (!nsh) ··· 2181 2178 kmem_cache_free(sc, nsh); 2182 2179 } 2183 2180 kmem_cache_destroy(sc); 2181 + mutex_unlock(&conf->cache_size_mutex); 2184 2182 return -ENOMEM; 2185 2183 } 2186 2184 /* Step 2 - Must use GFP_NOIO now. ··· 2228 2224 } else 2229 2225 err = -ENOMEM; 2230 2226 2227 + mutex_unlock(&conf->cache_size_mutex); 2231 2228 /* Step 4, return new stripes to service */ 2232 2229 while(!list_empty(&newstripes)) { 2233 2230 nsh = list_entry(newstripes.next, struct stripe_head, lru); ··· 4066 4061 &first_bad, &bad_sectors)) 4067 4062 set_bit(R5_ReadRepl, &dev->flags); 4068 4063 else { 4069 - if (rdev) 4064 + if (rdev && !test_bit(Faulty, &rdev->flags)) 4070 4065 set_bit(R5_NeedReplace, &dev->flags); 4066 + else 4067 + clear_bit(R5_NeedReplace, &dev->flags); 4071 4068 rdev = rcu_dereference(conf->disks[i].rdev); 4072 4069 clear_bit(R5_ReadRepl, &dev->flags); 4073 4070 } ··· 5864 5857 pr_debug("%d stripes handled\n", handled); 5865 5858 5866 5859 spin_unlock_irq(&conf->device_lock); 5867 - if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) { 5860 + if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && 5861 + mutex_trylock(&conf->cache_size_mutex)) { 5868 5862 grow_one_stripe(conf, __GFP_NOWARN); 5869 5863 /* Set flag even if allocation failed. This helps 5870 5864 * slow down allocation requests when mem is short 5871 5865 */ 5872 5866 set_bit(R5_DID_ALLOC, &conf->cache_state); 5867 + mutex_unlock(&conf->cache_size_mutex); 5873 5868 } 5874 5869 5875 5870 async_tx_issue_pending_all(); ··· 5903 5894 return -EINVAL; 5904 5895 5905 5896 conf->min_nr_stripes = size; 5897 + mutex_lock(&conf->cache_size_mutex); 5906 5898 while (size < conf->max_nr_stripes && 5907 5899 drop_one_stripe(conf)) 5908 5900 ; 5901 + mutex_unlock(&conf->cache_size_mutex); 5909 5902 5910 5903 5911 5904 err = md_allow_write(mddev); 5912 5905 if (err) 5913 5906 return err; 5914 5907 5908 + mutex_lock(&conf->cache_size_mutex); 5915 5909 while (size > conf->max_nr_stripes) 5916 5910 if (!grow_one_stripe(conf, GFP_KERNEL)) 5917 5911 break; 5912 + mutex_unlock(&conf->cache_size_mutex); 5918 5913 5919 5914 return 0; 5920 5915 } ··· 6384 6371 struct shrink_control *sc) 6385 6372 { 6386 6373 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); 6387 - int ret = 0; 6388 - while (ret < sc->nr_to_scan) { 6389 - if (drop_one_stripe(conf) == 0) 6390 - return SHRINK_STOP; 6391 - ret++; 6374 + unsigned long ret = SHRINK_STOP; 6375 + 6376 + if (mutex_trylock(&conf->cache_size_mutex)) { 6377 + ret= 0; 6378 + while (ret < sc->nr_to_scan) { 6379 + if (drop_one_stripe(conf) == 0) { 6380 + ret = SHRINK_STOP; 6381 + break; 6382 + } 6383 + ret++; 6384 + } 6385 + mutex_unlock(&conf->cache_size_mutex); 6392 6386 } 6393 6387 return ret; 6394 6388 } ··· 6464 6444 goto abort; 6465 6445 spin_lock_init(&conf->device_lock); 6466 6446 seqcount_init(&conf->gen_lock); 6447 + mutex_init(&conf->cache_size_mutex); 6467 6448 init_waitqueue_head(&conf->wait_for_quiescent); 6468 6449 for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { 6469 6450 init_waitqueue_head(&conf->wait_for_stripe[i]);

+2 -1

drivers/md/raid5.h

··· 482 482 */ 483 483 int active_name; 484 484 char cache_name[2][32]; 485 - struct kmem_cache *slab_cache; /* for allocating stripes */ 485 + struct kmem_cache *slab_cache; /* for allocating stripes */ 486 + struct mutex cache_size_mutex; /* Protect changes to cache size */ 486 487 487 488 int seq_flush, seq_write; 488 489 int quiesce;

Configure Feed

Configure Feed