Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"The important part of this pull is Filipe's set of fixes for btrfs
device replacement. Filipe fixed a few issues seen on the list and a
number he found on his own"

* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: deal with duplciates during extent_map insertion in btrfs_get_extent
Btrfs: fix race between device replace and read repair
Btrfs: fix race between device replace and discard
Btrfs: fix race between device replace and chunk allocation
Btrfs: fix race setting block group back to RW mode during device replace
Btrfs: fix unprotected assignment of the left cursor for device replace
Btrfs: fix race setting block group readonly during device replace
Btrfs: fix race between device replace and block group removal
Btrfs: fix race between readahead and device replace/removal

+103 -18
+6
fs/btrfs/extent-tree.c
··· 2042 2042 struct btrfs_bio *bbio = NULL; 2043 2043 2044 2044 2045 + /* 2046 + * Avoid races with device replace and make sure our bbio has devices 2047 + * associated to its stripes that don't go away while we are discarding. 2048 + */ 2049 + btrfs_bio_counter_inc_blocked(root->fs_info); 2045 2050 /* Tell the block device(s) that the sectors can be discarded */ 2046 2051 ret = btrfs_map_block(root->fs_info, REQ_DISCARD, 2047 2052 bytenr, &num_bytes, &bbio, 0); ··· 2079 2074 } 2080 2075 btrfs_put_bbio(bbio); 2081 2076 } 2077 + btrfs_bio_counter_dec(root->fs_info); 2082 2078 2083 2079 if (actual_bytes) 2084 2080 *actual_bytes = discarded_bytes;
+10
fs/btrfs/extent_io.c
··· 2025 2025 bio->bi_iter.bi_size = 0; 2026 2026 map_length = length; 2027 2027 2028 + /* 2029 + * Avoid races with device replace and make sure our bbio has devices 2030 + * associated to its stripes that don't go away while we are doing the 2031 + * read repair operation. 2032 + */ 2033 + btrfs_bio_counter_inc_blocked(fs_info); 2028 2034 ret = btrfs_map_block(fs_info, WRITE, logical, 2029 2035 &map_length, &bbio, mirror_num); 2030 2036 if (ret) { 2037 + btrfs_bio_counter_dec(fs_info); 2031 2038 bio_put(bio); 2032 2039 return -EIO; 2033 2040 } ··· 2044 2037 dev = bbio->stripes[mirror_num-1].dev; 2045 2038 btrfs_put_bbio(bbio); 2046 2039 if (!dev || !dev->bdev || !dev->writeable) { 2040 + btrfs_bio_counter_dec(fs_info); 2047 2041 bio_put(bio); 2048 2042 return -EIO; 2049 2043 } ··· 2053 2045 2054 2046 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { 2055 2047 /* try to remap that extent elsewhere? */ 2048 + btrfs_bio_counter_dec(fs_info); 2056 2049 bio_put(bio); 2057 2050 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); 2058 2051 return -EIO; ··· 2063 2054 "read error corrected: ino %llu off %llu (dev %s sector %llu)", 2064 2055 btrfs_ino(inode), start, 2065 2056 rcu_str_deref(dev->name), sector); 2057 + btrfs_bio_counter_dec(fs_info); 2066 2058 bio_put(bio); 2067 2059 return 0; 2068 2060 }
+12 -1
fs/btrfs/inode.c
··· 6979 6979 * existing will always be non-NULL, since there must be 6980 6980 * extent causing the -EEXIST. 6981 6981 */ 6982 - if (start >= extent_map_end(existing) || 6982 + if (existing->start == em->start && 6983 + extent_map_end(existing) == extent_map_end(em) && 6984 + em->block_start == existing->block_start) { 6985 + /* 6986 + * these two extents are the same, it happens 6987 + * with inlines especially 6988 + */ 6989 + free_extent_map(em); 6990 + em = existing; 6991 + err = 0; 6992 + 6993 + } else if (start >= extent_map_end(existing) || 6983 6994 start <= existing->start) { 6984 6995 /* 6985 6996 * The existing extent map is the one nearest to
+5 -1
fs/btrfs/ordered-data.c
··· 718 718 return count; 719 719 } 720 720 721 - void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, 721 + int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, 722 722 const u64 range_start, const u64 range_len) 723 723 { 724 724 struct btrfs_root *root; 725 725 struct list_head splice; 726 726 int done; 727 + int total_done = 0; 727 728 728 729 INIT_LIST_HEAD(&splice); 729 730 ··· 743 742 done = btrfs_wait_ordered_extents(root, nr, 744 743 range_start, range_len); 745 744 btrfs_put_fs_root(root); 745 + total_done += done; 746 746 747 747 spin_lock(&fs_info->ordered_root_lock); 748 748 if (nr != -1) { ··· 754 752 list_splice_tail(&splice, &fs_info->ordered_roots); 755 753 spin_unlock(&fs_info->ordered_root_lock); 756 754 mutex_unlock(&fs_info->ordered_operations_mutex); 755 + 756 + return total_done; 757 757 } 758 758 759 759 /*
+1 -1
fs/btrfs/ordered-data.h
··· 199 199 u32 *sum, int len); 200 200 int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, 201 201 const u64 range_start, const u64 range_len); 202 - void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, 202 + int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, 203 203 const u64 range_start, const u64 range_len); 204 204 void btrfs_get_logged_extents(struct inode *inode, 205 205 struct list_head *logged_list,
+2
fs/btrfs/reada.c
··· 761 761 762 762 do { 763 763 enqueued = 0; 764 + mutex_lock(&fs_devices->device_list_mutex); 764 765 list_for_each_entry(device, &fs_devices->devices, dev_list) { 765 766 if (atomic_read(&device->reada_in_flight) < 766 767 MAX_IN_FLIGHT) 767 768 enqueued += reada_start_machine_dev(fs_info, 768 769 device); 769 770 } 771 + mutex_unlock(&fs_devices->device_list_mutex); 770 772 total += enqueued; 771 773 } while (enqueued && total < 10000); 772 774
+47 -3
fs/btrfs/scrub.c
··· 3582 3582 */ 3583 3583 scrub_pause_on(fs_info); 3584 3584 ret = btrfs_inc_block_group_ro(root, cache); 3585 + if (!ret && is_dev_replace) { 3586 + /* 3587 + * If we are doing a device replace wait for any tasks 3588 + * that started dellaloc right before we set the block 3589 + * group to RO mode, as they might have just allocated 3590 + * an extent from it or decided they could do a nocow 3591 + * write. And if any such tasks did that, wait for their 3592 + * ordered extents to complete and then commit the 3593 + * current transaction, so that we can later see the new 3594 + * extent items in the extent tree - the ordered extents 3595 + * create delayed data references (for cow writes) when 3596 + * they complete, which will be run and insert the 3597 + * corresponding extent items into the extent tree when 3598 + * we commit the transaction they used when running 3599 + * inode.c:btrfs_finish_ordered_io(). We later use 3600 + * the commit root of the extent tree to find extents 3601 + * to copy from the srcdev into the tgtdev, and we don't 3602 + * want to miss any new extents. 3603 + */ 3604 + btrfs_wait_block_group_reservations(cache); 3605 + btrfs_wait_nocow_writers(cache); 3606 + ret = btrfs_wait_ordered_roots(fs_info, -1, 3607 + cache->key.objectid, 3608 + cache->key.offset); 3609 + if (ret > 0) { 3610 + struct btrfs_trans_handle *trans; 3611 + 3612 + trans = btrfs_join_transaction(root); 3613 + if (IS_ERR(trans)) 3614 + ret = PTR_ERR(trans); 3615 + else 3616 + ret = btrfs_commit_transaction(trans, 3617 + root); 3618 + if (ret) { 3619 + scrub_pause_off(fs_info); 3620 + btrfs_put_block_group(cache); 3621 + break; 3622 + } 3623 + } 3624 + } 3585 3625 scrub_pause_off(fs_info); 3586 3626 3587 3627 if (ret == 0) { ··· 3642 3602 break; 3643 3603 } 3644 3604 3605 + btrfs_dev_replace_lock(&fs_info->dev_replace, 1); 3645 3606 dev_replace->cursor_right = found_key.offset + length; 3646 3607 dev_replace->cursor_left = found_key.offset; 3647 3608 dev_replace->item_needs_writeback = 1; 3609 + btrfs_dev_replace_unlock(&fs_info->dev_replace, 1); 3648 3610 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, 3649 3611 found_key.offset, cache, is_dev_replace); 3650 3612 ··· 3681 3639 atomic_set(&sctx->wr_ctx.flush_all_writes, 0); 3682 3640 3683 3641 scrub_pause_off(fs_info); 3642 + 3643 + btrfs_dev_replace_lock(&fs_info->dev_replace, 1); 3644 + dev_replace->cursor_left = dev_replace->cursor_right; 3645 + dev_replace->item_needs_writeback = 1; 3646 + btrfs_dev_replace_unlock(&fs_info->dev_replace, 1); 3684 3647 3685 3648 if (ro_set) 3686 3649 btrfs_dec_block_group_ro(root, cache); ··· 3724 3677 ret = -ENOMEM; 3725 3678 break; 3726 3679 } 3727 - 3728 - dev_replace->cursor_left = dev_replace->cursor_right; 3729 - dev_replace->item_needs_writeback = 1; 3730 3680 skip: 3731 3681 key.offset = found_key.offset + length; 3732 3682 btrfs_release_path(path);
+20 -12
fs/btrfs/volumes.c
··· 2761 2761 u64 dev_extent_len = 0; 2762 2762 u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2763 2763 int i, ret = 0; 2764 + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2764 2765 2765 2766 /* Just in case */ 2766 2767 root = root->fs_info->chunk_root; ··· 2788 2787 check_system_chunk(trans, extent_root, map->type); 2789 2788 unlock_chunks(root->fs_info->chunk_root); 2790 2789 2790 + /* 2791 + * Take the device list mutex to prevent races with the final phase of 2792 + * a device replace operation that replaces the device object associated 2793 + * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()). 2794 + */ 2795 + mutex_lock(&fs_devices->device_list_mutex); 2791 2796 for (i = 0; i < map->num_stripes; i++) { 2792 2797 struct btrfs_device *device = map->stripes[i].dev; 2793 2798 ret = btrfs_free_dev_extent(trans, device, 2794 2799 map->stripes[i].physical, 2795 2800 &dev_extent_len); 2796 2801 if (ret) { 2802 + mutex_unlock(&fs_devices->device_list_mutex); 2797 2803 btrfs_abort_transaction(trans, root, ret); 2798 2804 goto out; 2799 2805 } ··· 2819 2811 if (map->stripes[i].dev) { 2820 2812 ret = btrfs_update_device(trans, map->stripes[i].dev); 2821 2813 if (ret) { 2814 + mutex_unlock(&fs_devices->device_list_mutex); 2822 2815 btrfs_abort_transaction(trans, root, ret); 2823 2816 goto out; 2824 2817 } 2825 2818 } 2826 2819 } 2820 + mutex_unlock(&fs_devices->device_list_mutex); 2821 + 2827 2822 ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset); 2828 2823 if (ret) { 2829 2824 btrfs_abort_transaction(trans, root, ret); ··· 5773 5762 } 5774 5763 } 5775 5764 if (found) { 5776 - if (physical_of_found + map->stripe_len <= 5777 - dev_replace->cursor_left) { 5778 - struct btrfs_bio_stripe *tgtdev_stripe = 5779 - bbio->stripes + num_stripes; 5765 + struct btrfs_bio_stripe *tgtdev_stripe = 5766 + bbio->stripes + num_stripes; 5780 5767 5781 - tgtdev_stripe->physical = physical_of_found; 5782 - tgtdev_stripe->length = 5783 - bbio->stripes[index_srcdev].length; 5784 - tgtdev_stripe->dev = dev_replace->tgtdev; 5785 - bbio->tgtdev_map[index_srcdev] = num_stripes; 5768 + tgtdev_stripe->physical = physical_of_found; 5769 + tgtdev_stripe->length = 5770 + bbio->stripes[index_srcdev].length; 5771 + tgtdev_stripe->dev = dev_replace->tgtdev; 5772 + bbio->tgtdev_map[index_srcdev] = num_stripes; 5786 5773 5787 - tgtdev_indexes++; 5788 - num_stripes++; 5789 - } 5774 + tgtdev_indexes++; 5775 + num_stripes++; 5790 5776 } 5791 5777 } 5792 5778