Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"Miao Xie has been very busy, fixing races and enospc problems and many
other small but important pieces.

Alexandre Oliva discovered some problems with how our error handling
was interacting with the block layer and for now has disabled our
partial handling of sub-page writes. The real sub-page work is in a
series of patches from IBM that we still need to integrate and test.
The code Alexandre has turned off was really incomplete.

Josef has more error handling fixes and an important fix for the new
skinny extent format.

This also has my fix for the tracepoint crash from late in 3.9. It's
the first stage in a larger clean up to get rid of btrfs_bio and make
a proper bioset for all the items we need to tack into the bio. For
now the bioset only holds our mirror_num and stripe_index, but for the
next merge window I'll shuffle more in."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits)
Btrfs: use a btrfs bioset instead of abusing bio internals
Btrfs: make sure roots are assigned before freeing their nodes
Btrfs: explicitly use global_block_rsv for quota_tree
btrfs: do away with non-whole_page extent I/O
Btrfs: don't invoke btrfs_invalidate_inodes() in the spin lock context
Btrfs: remove BUG_ON() in btrfs_read_fs_tree_no_radix()
Btrfs: pause the space balance when remounting to R/O
Btrfs: fix unprotected root node of the subvolume's inode rb-tree
Btrfs: fix accessing a freed tree root
Btrfs: return errno if possible when we fail to allocate memory
Btrfs: update the global reserve if it is empty
Btrfs: don't steal the reserved space from the global reserve if their space type is different
Btrfs: optimize the error handle of use_block_rsv()
Btrfs: don't use global block reservation for inode cache truncation
Btrfs: don't abort the current transaction if there is no enough space for inode cache
Correct allowed raid levels on balance.
Btrfs: fix possible memory leak in replace_path()
Btrfs: fix possible memory leak in the find_parent_nodes()
Btrfs: don't allow device replace on RAID5/RAID6
Btrfs: handle running extent ops with skinny metadata
...

+303 -248
+2 -1
fs/btrfs/backref.c
··· 918 918 ref->parent, bsz, 0); 919 919 if (!eb || !extent_buffer_uptodate(eb)) { 920 920 free_extent_buffer(eb); 921 - return -EIO; 921 + ret = -EIO; 922 + goto out; 922 923 } 923 924 ret = find_extent_in_eb(eb, bytenr, 924 925 *extent_item_pos, &eie);
+1 -1
fs/btrfs/check-integrity.c
··· 1700 1700 unsigned int j; 1701 1701 DECLARE_COMPLETION_ONSTACK(complete); 1702 1702 1703 - bio = bio_alloc(GFP_NOFS, num_pages - i); 1703 + bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); 1704 1704 if (!bio) { 1705 1705 printk(KERN_INFO 1706 1706 "btrfsic: bio_alloc() for %u pages failed!\n",
+3 -1
fs/btrfs/ctree.c
··· 951 951 BUG_ON(ret); /* -ENOMEM */ 952 952 } 953 953 if (new_flags != 0) { 954 + int level = btrfs_header_level(buf); 955 + 954 956 ret = btrfs_set_disk_extent_flags(trans, root, 955 957 buf->start, 956 958 buf->len, 957 - new_flags, 0); 959 + new_flags, level, 0); 958 960 if (ret) 959 961 return ret; 960 962 }
+4 -4
fs/btrfs/ctree.h
··· 88 88 /* holds checksums of all the data extents */ 89 89 #define BTRFS_CSUM_TREE_OBJECTID 7ULL 90 90 91 - /* for storing balance parameters in the root tree */ 92 - #define BTRFS_BALANCE_OBJECTID -4ULL 93 - 94 91 /* holds quota configuration and tracking */ 95 92 #define BTRFS_QUOTA_TREE_OBJECTID 8ULL 93 + 94 + /* for storing balance parameters in the root tree */ 95 + #define BTRFS_BALANCE_OBJECTID -4ULL 96 96 97 97 /* orhpan objectid for tracking unlinked/truncated files */ 98 98 #define BTRFS_ORPHAN_OBJECTID -5ULL ··· 3075 3075 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3076 3076 struct btrfs_root *root, 3077 3077 u64 bytenr, u64 num_bytes, u64 flags, 3078 - int is_data); 3078 + int level, int is_data); 3079 3079 int btrfs_free_extent(struct btrfs_trans_handle *trans, 3080 3080 struct btrfs_root *root, 3081 3081 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
+1
fs/btrfs/delayed-ref.h
··· 60 60 struct btrfs_delayed_extent_op { 61 61 struct btrfs_disk_key key; 62 62 u64 flags_to_set; 63 + int level; 63 64 unsigned int update_key:1; 64 65 unsigned int update_flags:1; 65 66 unsigned int is_data:1;
+5
fs/btrfs/dev-replace.c
··· 313 313 struct btrfs_device *tgt_device = NULL; 314 314 struct btrfs_device *src_device = NULL; 315 315 316 + if (btrfs_fs_incompat(fs_info, RAID56)) { 317 + pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n"); 318 + return -EINVAL; 319 + } 320 + 316 321 switch (args->start.cont_reading_from_srcdev_mode) { 317 322 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: 318 323 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
+30 -22
fs/btrfs/disk-io.c
··· 152 152 { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, 153 153 { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, 154 154 { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, 155 - { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, 155 + { .id = BTRFS_QUOTA_TREE_OBJECTID, .name_stem = "quota" }, 156 156 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, 157 157 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 158 158 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, ··· 1513 1513 } 1514 1514 1515 1515 root->commit_root = btrfs_root_node(root); 1516 - BUG_ON(!root->node); /* -ENOMEM */ 1517 1516 out: 1518 1517 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1519 1518 root->ref_cows = 1; ··· 1987 1988 { 1988 1989 free_extent_buffer(info->tree_root->node); 1989 1990 free_extent_buffer(info->tree_root->commit_root); 1990 - free_extent_buffer(info->dev_root->node); 1991 - free_extent_buffer(info->dev_root->commit_root); 1992 - free_extent_buffer(info->extent_root->node); 1993 - free_extent_buffer(info->extent_root->commit_root); 1994 - free_extent_buffer(info->csum_root->node); 1995 - free_extent_buffer(info->csum_root->commit_root); 1991 + info->tree_root->node = NULL; 1992 + info->tree_root->commit_root = NULL; 1993 + 1994 + if (info->dev_root) { 1995 + free_extent_buffer(info->dev_root->node); 1996 + free_extent_buffer(info->dev_root->commit_root); 1997 + info->dev_root->node = NULL; 1998 + info->dev_root->commit_root = NULL; 1999 + } 2000 + if (info->extent_root) { 2001 + free_extent_buffer(info->extent_root->node); 2002 + free_extent_buffer(info->extent_root->commit_root); 2003 + info->extent_root->node = NULL; 2004 + info->extent_root->commit_root = NULL; 2005 + } 2006 + if (info->csum_root) { 2007 + free_extent_buffer(info->csum_root->node); 2008 + free_extent_buffer(info->csum_root->commit_root); 2009 + info->csum_root->node = NULL; 2010 + info->csum_root->commit_root = NULL; 2011 + } 1996 2012 if (info->quota_root) { 1997 2013 free_extent_buffer(info->quota_root->node); 1998 2014 free_extent_buffer(info->quota_root->commit_root); 1999 - } 2000 - 2001 - info->tree_root->node = NULL; 2002 - info->tree_root->commit_root = NULL; 2003 - info->dev_root->node = NULL; 2004 - info->dev_root->commit_root = NULL; 2005 - info->extent_root->node = NULL; 2006 - info->extent_root->commit_root = NULL; 2007 - info->csum_root->node = NULL; 2008 - info->csum_root->commit_root = NULL; 2009 - if (info->quota_root) { 2010 2015 info->quota_root->node = NULL; 2011 2016 info->quota_root->commit_root = NULL; 2012 2017 } 2013 - 2014 2018 if (chunk_root) { 2015 2019 free_extent_buffer(info->chunk_root->node); 2016 2020 free_extent_buffer(info->chunk_root->commit_root); ··· 3130 3128 * caller 3131 3129 */ 3132 3130 device->flush_bio = NULL; 3133 - bio = bio_alloc(GFP_NOFS, 0); 3131 + bio = btrfs_io_bio_alloc(GFP_NOFS, 0); 3134 3132 if (!bio) 3135 3133 return -ENOMEM; 3136 3134 ··· 3661 3659 ordered_operations); 3662 3660 3663 3661 list_del_init(&btrfs_inode->ordered_operations); 3662 + spin_unlock(&root->fs_info->ordered_extent_lock); 3664 3663 3665 3664 btrfs_invalidate_inodes(btrfs_inode->root); 3665 + 3666 + spin_lock(&root->fs_info->ordered_extent_lock); 3666 3667 } 3667 3668 3668 3669 spin_unlock(&root->fs_info->ordered_extent_lock); ··· 3787 3782 list_del_init(&btrfs_inode->delalloc_inodes); 3788 3783 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, 3789 3784 &btrfs_inode->runtime_flags); 3785 + spin_unlock(&root->fs_info->delalloc_lock); 3790 3786 3791 3787 btrfs_invalidate_inodes(btrfs_inode->root); 3788 + 3789 + spin_lock(&root->fs_info->delalloc_lock); 3792 3790 } 3793 3791 3794 3792 spin_unlock(&root->fs_info->delalloc_lock); ··· 3816 3808 while (start <= end) { 3817 3809 eb = btrfs_find_tree_block(root, start, 3818 3810 root->leafsize); 3819 - start += eb->len; 3811 + start += root->leafsize; 3820 3812 if (!eb) 3821 3813 continue; 3822 3814 wait_on_extent_buffer_writeback(eb);
+51 -45
fs/btrfs/extent-tree.c
··· 2070 2070 u32 item_size; 2071 2071 int ret; 2072 2072 int err = 0; 2073 - int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2074 - node->type == BTRFS_SHARED_BLOCK_REF_KEY); 2073 + int metadata = !extent_op->is_data; 2075 2074 2076 2075 if (trans->aborted) 2077 2076 return 0; ··· 2085 2086 key.objectid = node->bytenr; 2086 2087 2087 2088 if (metadata) { 2088 - struct btrfs_delayed_tree_ref *tree_ref; 2089 - 2090 - tree_ref = btrfs_delayed_node_to_tree_ref(node); 2091 2089 key.type = BTRFS_METADATA_ITEM_KEY; 2092 - key.offset = tree_ref->level; 2090 + key.offset = extent_op->level; 2093 2091 } else { 2094 2092 key.type = BTRFS_EXTENT_ITEM_KEY; 2095 2093 key.offset = node->num_bytes; ··· 2715 2719 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2716 2720 struct btrfs_root *root, 2717 2721 u64 bytenr, u64 num_bytes, u64 flags, 2718 - int is_data) 2722 + int level, int is_data) 2719 2723 { 2720 2724 struct btrfs_delayed_extent_op *extent_op; 2721 2725 int ret; ··· 2728 2732 extent_op->update_flags = 1; 2729 2733 extent_op->update_key = 0; 2730 2734 extent_op->is_data = is_data ? 1 : 0; 2735 + extent_op->level = level; 2731 2736 2732 2737 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, 2733 2738 num_bytes, extent_op); ··· 3106 3109 WARN_ON(ret); 3107 3110 3108 3111 if (i_size_read(inode) > 0) { 3112 + ret = btrfs_check_trunc_cache_free_space(root, 3113 + &root->fs_info->global_block_rsv); 3114 + if (ret) 3115 + goto out_put; 3116 + 3109 3117 ret = btrfs_truncate_free_space_cache(root, trans, path, 3110 3118 inode); 3111 3119 if (ret) ··· 4564 4562 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 4565 4563 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; 4566 4564 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 4565 + if (fs_info->quota_root) 4566 + fs_info->quota_root->block_rsv = &fs_info->global_block_rsv; 4567 4567 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 4568 4568 4569 4569 update_global_block_rsv(fs_info); ··· 6655 6651 struct btrfs_block_rsv *block_rsv; 6656 6652 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 6657 6653 int ret; 6654 + bool global_updated = false; 6658 6655 6659 6656 block_rsv = get_block_rsv(trans, root); 6660 6657 6661 - if (block_rsv->size == 0) { 6662 - ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6663 - BTRFS_RESERVE_NO_FLUSH); 6664 - /* 6665 - * If we couldn't reserve metadata bytes try and use some from 6666 - * the global reserve. 6667 - */ 6668 - if (ret && block_rsv != global_rsv) { 6669 - ret = block_rsv_use_bytes(global_rsv, blocksize); 6670 - if (!ret) 6671 - return global_rsv; 6672 - return ERR_PTR(ret); 6673 - } else if (ret) { 6674 - return ERR_PTR(ret); 6675 - } 6676 - return block_rsv; 6677 - } 6678 - 6658 + if (unlikely(block_rsv->size == 0)) 6659 + goto try_reserve; 6660 + again: 6679 6661 ret = block_rsv_use_bytes(block_rsv, blocksize); 6680 6662 if (!ret) 6681 6663 return block_rsv; 6682 - if (ret && !block_rsv->failfast) { 6683 - if (btrfs_test_opt(root, ENOSPC_DEBUG)) { 6684 - static DEFINE_RATELIMIT_STATE(_rs, 6685 - DEFAULT_RATELIMIT_INTERVAL * 10, 6686 - /*DEFAULT_RATELIMIT_BURST*/ 1); 6687 - if (__ratelimit(&_rs)) 6688 - WARN(1, KERN_DEBUG 6689 - "btrfs: block rsv returned %d\n", ret); 6690 - } 6691 - ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6692 - BTRFS_RESERVE_NO_FLUSH); 6693 - if (!ret) { 6694 - return block_rsv; 6695 - } else if (ret && block_rsv != global_rsv) { 6696 - ret = block_rsv_use_bytes(global_rsv, blocksize); 6697 - if (!ret) 6698 - return global_rsv; 6699 - } 6664 + 6665 + if (block_rsv->failfast) 6666 + return ERR_PTR(ret); 6667 + 6668 + if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) { 6669 + global_updated = true; 6670 + update_global_block_rsv(root->fs_info); 6671 + goto again; 6700 6672 } 6701 6673 6702 - return ERR_PTR(-ENOSPC); 6674 + if (btrfs_test_opt(root, ENOSPC_DEBUG)) { 6675 + static DEFINE_RATELIMIT_STATE(_rs, 6676 + DEFAULT_RATELIMIT_INTERVAL * 10, 6677 + /*DEFAULT_RATELIMIT_BURST*/ 1); 6678 + if (__ratelimit(&_rs)) 6679 + WARN(1, KERN_DEBUG 6680 + "btrfs: block rsv returned %d\n", ret); 6681 + } 6682 + try_reserve: 6683 + ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6684 + BTRFS_RESERVE_NO_FLUSH); 6685 + if (!ret) 6686 + return block_rsv; 6687 + /* 6688 + * If we couldn't reserve metadata bytes try and use some from 6689 + * the global reserve if its space type is the same as the global 6690 + * reservation. 6691 + */ 6692 + if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL && 6693 + block_rsv->space_info == global_rsv->space_info) { 6694 + ret = block_rsv_use_bytes(global_rsv, blocksize); 6695 + if (!ret) 6696 + return global_rsv; 6697 + } 6698 + return ERR_PTR(ret); 6703 6699 } 6704 6700 6705 6701 static void unuse_block_rsv(struct btrfs_fs_info *fs_info, ··· 6767 6763 extent_op->update_key = 1; 6768 6764 extent_op->update_flags = 1; 6769 6765 extent_op->is_data = 0; 6766 + extent_op->level = level; 6770 6767 6771 6768 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, 6772 6769 ins.objectid, ··· 6939 6934 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 6940 6935 BUG_ON(ret); /* -ENOMEM */ 6941 6936 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 6942 - eb->len, flag, 0); 6937 + eb->len, flag, 6938 + btrfs_header_level(eb), 0); 6943 6939 BUG_ON(ret); /* -ENOMEM */ 6944 6940 wc->flags[level] |= flag; 6945 6941 }
+73 -65
fs/btrfs/extent_io.c
··· 23 23 24 24 static struct kmem_cache *extent_state_cache; 25 25 static struct kmem_cache *extent_buffer_cache; 26 + static struct bio_set *btrfs_bioset; 26 27 27 28 #ifdef CONFIG_BTRFS_DEBUG 28 29 static LIST_HEAD(buffers); ··· 126 125 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); 127 126 if (!extent_buffer_cache) 128 127 goto free_state_cache; 128 + 129 + btrfs_bioset = bioset_create(BIO_POOL_SIZE, 130 + offsetof(struct btrfs_io_bio, bio)); 131 + if (!btrfs_bioset) 132 + goto free_buffer_cache; 129 133 return 0; 134 + 135 + free_buffer_cache: 136 + kmem_cache_destroy(extent_buffer_cache); 137 + extent_buffer_cache = NULL; 130 138 131 139 free_state_cache: 132 140 kmem_cache_destroy(extent_state_cache); 141 + extent_state_cache = NULL; 133 142 return -ENOMEM; 134 143 } 135 144 ··· 156 145 kmem_cache_destroy(extent_state_cache); 157 146 if (extent_buffer_cache) 158 147 kmem_cache_destroy(extent_buffer_cache); 148 + if (btrfs_bioset) 149 + bioset_free(btrfs_bioset); 159 150 } 160 151 161 152 void extent_io_tree_init(struct extent_io_tree *tree, ··· 1961 1948 } 1962 1949 1963 1950 /* 1964 - * helper function to unlock a page if all the extents in the tree 1965 - * for that page are unlocked 1966 - */ 1967 - static void check_page_locked(struct extent_io_tree *tree, struct page *page) 1968 - { 1969 - u64 start = page_offset(page); 1970 - u64 end = start + PAGE_CACHE_SIZE - 1; 1971 - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) 1972 - unlock_page(page); 1973 - } 1974 - 1975 - /* 1976 - * helper function to end page writeback if all the extents 1977 - * in the tree for that page are done with writeback 1978 - */ 1979 - static void check_page_writeback(struct extent_io_tree *tree, 1980 - struct page *page) 1981 - { 1982 - end_page_writeback(page); 1983 - } 1984 - 1985 - /* 1986 1951 * When IO fails, either with EIO or csum verification fails, we 1987 1952 * try other mirrors that might have a good copy of the data. This 1988 1953 * io_failure_record is used to record state as we go through all the ··· 2037 2046 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) 2038 2047 return 0; 2039 2048 2040 - bio = bio_alloc(GFP_NOFS, 1); 2049 + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 2041 2050 if (!bio) 2042 2051 return -EIO; 2043 2052 bio->bi_private = &compl; ··· 2327 2336 return -EIO; 2328 2337 } 2329 2338 2330 - bio = bio_alloc(GFP_NOFS, 1); 2339 + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 2331 2340 if (!bio) { 2332 2341 free_io_failure(inode, failrec, 0); 2333 2342 return -EIO; ··· 2389 2398 struct extent_io_tree *tree; 2390 2399 u64 start; 2391 2400 u64 end; 2392 - int whole_page; 2393 2401 2394 2402 do { 2395 2403 struct page *page = bvec->bv_page; 2396 2404 tree = &BTRFS_I(page->mapping->host)->io_tree; 2397 2405 2398 - start = page_offset(page) + bvec->bv_offset; 2399 - end = start + bvec->bv_len - 1; 2406 + /* We always issue full-page reads, but if some block 2407 + * in a page fails to read, blk_update_request() will 2408 + * advance bv_offset and adjust bv_len to compensate. 2409 + * Print a warning for nonzero offsets, and an error 2410 + * if they don't add up to a full page. */ 2411 + if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) 2412 + printk("%s page write in btrfs with offset %u and length %u\n", 2413 + bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE 2414 + ? KERN_ERR "partial" : KERN_INFO "incomplete", 2415 + bvec->bv_offset, bvec->bv_len); 2400 2416 2401 - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2402 - whole_page = 1; 2403 - else 2404 - whole_page = 0; 2417 + start = page_offset(page); 2418 + end = start + bvec->bv_offset + bvec->bv_len - 1; 2405 2419 2406 2420 if (--bvec >= bio->bi_io_vec) 2407 2421 prefetchw(&bvec->bv_page->flags); ··· 2414 2418 if (end_extent_writepage(page, err, start, end)) 2415 2419 continue; 2416 2420 2417 - if (whole_page) 2418 - end_page_writeback(page); 2419 - else 2420 - check_page_writeback(tree, page); 2421 + end_page_writeback(page); 2421 2422 } while (bvec >= bio->bi_io_vec); 2422 2423 2423 2424 bio_put(bio); ··· 2439 2446 struct extent_io_tree *tree; 2440 2447 u64 start; 2441 2448 u64 end; 2442 - int whole_page; 2443 2449 int mirror; 2444 2450 int ret; 2445 2451 ··· 2449 2457 struct page *page = bvec->bv_page; 2450 2458 struct extent_state *cached = NULL; 2451 2459 struct extent_state *state; 2460 + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 2452 2461 2453 2462 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2454 - "mirror=%ld\n", (u64)bio->bi_sector, err, 2455 - (long int)bio->bi_bdev); 2463 + "mirror=%lu\n", (u64)bio->bi_sector, err, 2464 + io_bio->mirror_num); 2456 2465 tree = &BTRFS_I(page->mapping->host)->io_tree; 2457 2466 2458 - start = page_offset(page) + bvec->bv_offset; 2459 - end = start + bvec->bv_len - 1; 2467 + /* We always issue full-page reads, but if some block 2468 + * in a page fails to read, blk_update_request() will 2469 + * advance bv_offset and adjust bv_len to compensate. 2470 + * Print a warning for nonzero offsets, and an error 2471 + * if they don't add up to a full page. */ 2472 + if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) 2473 + printk("%s page read in btrfs with offset %u and length %u\n", 2474 + bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE 2475 + ? KERN_ERR "partial" : KERN_INFO "incomplete", 2476 + bvec->bv_offset, bvec->bv_len); 2460 2477 2461 - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2462 - whole_page = 1; 2463 - else 2464 - whole_page = 0; 2478 + start = page_offset(page); 2479 + end = start + bvec->bv_offset + bvec->bv_len - 1; 2465 2480 2466 2481 if (++bvec <= bvec_end) 2467 2482 prefetchw(&bvec->bv_page->flags); ··· 2484 2485 } 2485 2486 spin_unlock(&tree->lock); 2486 2487 2487 - mirror = (int)(unsigned long)bio->bi_bdev; 2488 + mirror = io_bio->mirror_num; 2488 2489 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2489 2490 ret = tree->ops->readpage_end_io_hook(page, start, end, 2490 2491 state, mirror); ··· 2527 2528 } 2528 2529 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); 2529 2530 2530 - if (whole_page) { 2531 - if (uptodate) { 2532 - SetPageUptodate(page); 2533 - } else { 2534 - ClearPageUptodate(page); 2535 - SetPageError(page); 2536 - } 2537 - unlock_page(page); 2531 + if (uptodate) { 2532 + SetPageUptodate(page); 2538 2533 } else { 2539 - if (uptodate) { 2540 - check_page_uptodate(tree, page); 2541 - } else { 2542 - ClearPageUptodate(page); 2543 - SetPageError(page); 2544 - } 2545 - check_page_locked(tree, page); 2534 + ClearPageUptodate(page); 2535 + SetPageError(page); 2546 2536 } 2537 + unlock_page(page); 2547 2538 } while (bvec <= bvec_end); 2548 2539 2549 2540 bio_put(bio); 2550 2541 } 2551 2542 2543 + /* 2544 + * this allocates from the btrfs_bioset. We're returning a bio right now 2545 + * but you can call btrfs_io_bio for the appropriate container_of magic 2546 + */ 2552 2547 struct bio * 2553 2548 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2554 2549 gfp_t gfp_flags) 2555 2550 { 2556 2551 struct bio *bio; 2557 2552 2558 - bio = bio_alloc(gfp_flags, nr_vecs); 2553 + bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); 2559 2554 2560 2555 if (bio == NULL && (current->flags & PF_MEMALLOC)) { 2561 - while (!bio && (nr_vecs /= 2)) 2562 - bio = bio_alloc(gfp_flags, nr_vecs); 2556 + while (!bio && (nr_vecs /= 2)) { 2557 + bio = bio_alloc_bioset(gfp_flags, 2558 + nr_vecs, btrfs_bioset); 2559 + } 2563 2560 } 2564 2561 2565 2562 if (bio) { ··· 2565 2570 } 2566 2571 return bio; 2567 2572 } 2573 + 2574 + struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) 2575 + { 2576 + return bio_clone_bioset(bio, gfp_mask, btrfs_bioset); 2577 + } 2578 + 2579 + 2580 + /* this also allocates from the btrfs_bioset */ 2581 + struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 2582 + { 2583 + return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); 2584 + } 2585 + 2568 2586 2569 2587 static int __must_check submit_one_bio(int rw, struct bio *bio, 2570 2588 int mirror_num, unsigned long bio_flags) ··· 3996 3988 last_for_get_extent = isize; 3997 3989 } 3998 3990 3999 - lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3991 + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0, 4000 3992 &cached_state); 4001 3993 4002 3994 em = get_extent_skip_holes(inode, start, last_for_get_extent, ··· 4083 4075 out_free: 4084 4076 free_extent_map(em); 4085 4077 out: 4086 - unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, 4078 + unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, 4087 4079 &cached_state, GFP_NOFS); 4088 4080 return ret; 4089 4081 }
+2
fs/btrfs/extent_io.h
··· 336 336 struct bio * 337 337 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 338 338 gfp_t gfp_flags); 339 + struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs); 340 + struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask); 339 341 340 342 struct btrfs_fs_info; 341 343
+20 -23
fs/btrfs/free-space-cache.c
··· 197 197 block_group->key.objectid); 198 198 } 199 199 200 - int btrfs_truncate_free_space_cache(struct btrfs_root *root, 201 - struct btrfs_trans_handle *trans, 202 - struct btrfs_path *path, 203 - struct inode *inode) 200 + int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, 201 + struct btrfs_block_rsv *rsv) 204 202 { 205 - struct btrfs_block_rsv *rsv; 206 203 u64 needed_bytes; 207 - loff_t oldsize; 208 - int ret = 0; 209 - 210 - rsv = trans->block_rsv; 211 - trans->block_rsv = &root->fs_info->global_block_rsv; 204 + int ret; 212 205 213 206 /* 1 for slack space, 1 for updating the inode */ 214 207 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 215 208 btrfs_calc_trans_metadata_size(root, 1); 216 209 217 - spin_lock(&trans->block_rsv->lock); 218 - if (trans->block_rsv->reserved < needed_bytes) { 219 - spin_unlock(&trans->block_rsv->lock); 220 - trans->block_rsv = rsv; 221 - return -ENOSPC; 222 - } 223 - spin_unlock(&trans->block_rsv->lock); 210 + spin_lock(&rsv->lock); 211 + if (rsv->reserved < needed_bytes) 212 + ret = -ENOSPC; 213 + else 214 + ret = 0; 215 + spin_unlock(&rsv->lock); 216 + return 0; 217 + } 218 + 219 + int btrfs_truncate_free_space_cache(struct btrfs_root *root, 220 + struct btrfs_trans_handle *trans, 221 + struct btrfs_path *path, 222 + struct inode *inode) 223 + { 224 + loff_t oldsize; 225 + int ret = 0; 224 226 225 227 oldsize = i_size_read(inode); 226 228 btrfs_i_size_write(inode, 0); ··· 234 232 */ 235 233 ret = btrfs_truncate_inode_items(trans, root, inode, 236 234 0, BTRFS_EXTENT_DATA_KEY); 237 - 238 235 if (ret) { 239 - trans->block_rsv = rsv; 240 236 btrfs_abort_transaction(trans, root, ret); 241 237 return ret; 242 238 } ··· 242 242 ret = btrfs_update_inode(trans, root, inode); 243 243 if (ret) 244 244 btrfs_abort_transaction(trans, root, ret); 245 - trans->block_rsv = rsv; 246 245 247 246 return ret; 248 247 } ··· 919 920 920 921 /* Make sure we can fit our crcs into the first page */ 921 922 if (io_ctl.check_crcs && 922 - (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { 923 - WARN_ON(1); 923 + (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) 924 924 goto out_nospc; 925 - } 926 925 927 926 io_ctl_set_generation(&io_ctl, trans->transid); 928 927
+2
fs/btrfs/free-space-cache.h
··· 54 54 struct btrfs_block_group_cache *block_group, 55 55 struct btrfs_path *path); 56 56 57 + int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, 58 + struct btrfs_block_rsv *rsv); 57 59 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 58 60 struct btrfs_trans_handle *trans, 59 61 struct btrfs_path *path,
+5 -3
fs/btrfs/inode-map.c
··· 429 429 num_bytes = trans->bytes_reserved; 430 430 /* 431 431 * 1 item for inode item insertion if need 432 - * 3 items for inode item update (in the worst case) 432 + * 4 items for inode item update (in the worst case) 433 + * 1 items for slack space if we need do truncation 433 434 * 1 item for free space object 434 435 * 3 items for pre-allocation 435 436 */ 436 - trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); 437 + trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10); 437 438 ret = btrfs_block_rsv_add(root, trans->block_rsv, 438 439 trans->bytes_reserved, 439 440 BTRFS_RESERVE_NO_FLUSH); ··· 469 468 if (i_size_read(inode) > 0) { 470 469 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 471 470 if (ret) { 472 - btrfs_abort_transaction(trans, root, ret); 471 + if (ret != -ENOSPC) 472 + btrfs_abort_transaction(trans, root, ret); 473 473 goto out_put; 474 474 } 475 475 }
+54 -29
fs/btrfs/inode.c
··· 715 715 async_extent->ram_size - 1, 0); 716 716 717 717 em = alloc_extent_map(); 718 - if (!em) 718 + if (!em) { 719 + ret = -ENOMEM; 719 720 goto out_free_reserve; 721 + } 720 722 em->start = async_extent->start; 721 723 em->len = async_extent->ram_size; 722 724 em->orig_start = em->start; ··· 925 923 } 926 924 927 925 em = alloc_extent_map(); 928 - if (!em) 926 + if (!em) { 927 + ret = -ENOMEM; 929 928 goto out_reserve; 929 + } 930 930 em->start = start; 931 931 em->orig_start = em->start; 932 932 ram_size = ins.offset; ··· 4728 4724 btrfs_end_transaction(trans, root); 4729 4725 btrfs_btree_balance_dirty(root); 4730 4726 no_delete: 4727 + btrfs_remove_delayed_node(inode); 4731 4728 clear_inode(inode); 4732 4729 return; 4733 4730 } ··· 4844 4839 struct rb_node **p; 4845 4840 struct rb_node *parent; 4846 4841 u64 ino = btrfs_ino(inode); 4847 - again: 4848 - p = &root->inode_tree.rb_node; 4849 - parent = NULL; 4850 4842 4851 4843 if (inode_unhashed(inode)) 4852 4844 return; 4853 - 4845 + again: 4846 + parent = NULL; 4854 4847 spin_lock(&root->inode_lock); 4848 + p = &root->inode_tree.rb_node; 4855 4849 while (*p) { 4856 4850 parent = *p; 4857 4851 entry = rb_entry(parent, struct btrfs_inode, rb_node); ··· 6932 6928 /* IO errors */ 6933 6929 int errors; 6934 6930 6931 + /* orig_bio is our btrfs_io_bio */ 6935 6932 struct bio *orig_bio; 6933 + 6934 + /* dio_bio came from fs/direct-io.c */ 6935 + struct bio *dio_bio; 6936 6936 }; 6937 6937 6938 6938 static void btrfs_endio_direct_read(struct bio *bio, int err) ··· 6946 6938 struct bio_vec *bvec = bio->bi_io_vec; 6947 6939 struct inode *inode = dip->inode; 6948 6940 struct btrfs_root *root = BTRFS_I(inode)->root; 6941 + struct bio *dio_bio; 6949 6942 u64 start; 6950 6943 6951 6944 start = dip->logical_offset; ··· 6986 6977 6987 6978 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6988 6979 dip->logical_offset + dip->bytes - 1); 6989 - bio->bi_private = dip->private; 6980 + dio_bio = dip->dio_bio; 6990 6981 6991 6982 kfree(dip); 6992 6983 6993 6984 /* If we had a csum failure make sure to clear the uptodate flag */ 6994 6985 if (err) 6995 - clear_bit(BIO_UPTODATE, &bio->bi_flags); 6996 - dio_end_io(bio, err); 6986 + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); 6987 + dio_end_io(dio_bio, err); 6988 + bio_put(bio); 6997 6989 } 6998 6990 6999 6991 static void btrfs_endio_direct_write(struct bio *bio, int err) ··· 7005 6995 struct btrfs_ordered_extent *ordered = NULL; 7006 6996 u64 ordered_offset = dip->logical_offset; 7007 6997 u64 ordered_bytes = dip->bytes; 6998 + struct bio *dio_bio; 7008 6999 int ret; 7009 7000 7010 7001 if (err) ··· 7033 7022 goto again; 7034 7023 } 7035 7024 out_done: 7036 - bio->bi_private = dip->private; 7025 + dio_bio = dip->dio_bio; 7037 7026 7038 7027 kfree(dip); 7039 7028 7040 7029 /* If we had an error make sure to clear the uptodate flag */ 7041 7030 if (err) 7042 - clear_bit(BIO_UPTODATE, &bio->bi_flags); 7043 - dio_end_io(bio, err); 7031 + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); 7032 + dio_end_io(dio_bio, err); 7033 + bio_put(bio); 7044 7034 } 7045 7035 7046 7036 static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, ··· 7077 7065 if (!atomic_dec_and_test(&dip->pending_bios)) 7078 7066 goto out; 7079 7067 7080 - if (dip->errors) 7068 + if (dip->errors) { 7081 7069 bio_io_error(dip->orig_bio); 7082 - else { 7083 - set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); 7070 + } else { 7071 + set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags); 7084 7072 bio_endio(dip->orig_bio, 0); 7085 7073 } 7086 7074 out: ··· 7255 7243 return 0; 7256 7244 } 7257 7245 7258 - static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, 7259 - loff_t file_offset) 7246 + static void btrfs_submit_direct(int rw, struct bio *dio_bio, 7247 + struct inode *inode, loff_t file_offset) 7260 7248 { 7261 7249 struct btrfs_root *root = BTRFS_I(inode)->root; 7262 7250 struct btrfs_dio_private *dip; 7263 - struct bio_vec *bvec = bio->bi_io_vec; 7251 + struct bio_vec *bvec = dio_bio->bi_io_vec; 7252 + struct bio *io_bio; 7264 7253 int skip_sum; 7265 7254 int write = rw & REQ_WRITE; 7266 7255 int ret = 0; 7267 7256 7268 7257 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 7269 7258 7270 - dip = kmalloc(sizeof(*dip), GFP_NOFS); 7271 - if (!dip) { 7259 + io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); 7260 + 7261 + if (!io_bio) { 7272 7262 ret = -ENOMEM; 7273 7263 goto free_ordered; 7274 7264 } 7275 7265 7276 - dip->private = bio->bi_private; 7266 + dip = kmalloc(sizeof(*dip), GFP_NOFS); 7267 + if (!dip) { 7268 + ret = -ENOMEM; 7269 + goto free_io_bio; 7270 + } 7271 + 7272 + dip->private = dio_bio->bi_private; 7273 + io_bio->bi_private = dio_bio->bi_private; 7277 7274 dip->inode = inode; 7278 7275 dip->logical_offset = file_offset; 7279 7276 ··· 7290 7269 do { 7291 7270 dip->bytes += bvec->bv_len; 7292 7271 bvec++; 7293 - } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); 7272 + } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1)); 7294 7273 7295 - dip->disk_bytenr = (u64)bio->bi_sector << 9; 7296 - bio->bi_private = dip; 7274 + dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; 7275 + io_bio->bi_private = dip; 7297 7276 dip->errors = 0; 7298 - dip->orig_bio = bio; 7277 + dip->orig_bio = io_bio; 7278 + dip->dio_bio = dio_bio; 7299 7279 atomic_set(&dip->pending_bios, 0); 7300 7280 7301 7281 if (write) 7302 - bio->bi_end_io = btrfs_endio_direct_write; 7282 + io_bio->bi_end_io = btrfs_endio_direct_write; 7303 7283 else 7304 - bio->bi_end_io = btrfs_endio_direct_read; 7284 + io_bio->bi_end_io = btrfs_endio_direct_read; 7305 7285 7306 7286 ret = btrfs_submit_direct_hook(rw, dip, skip_sum); 7307 7287 if (!ret) 7308 7288 return; 7289 + 7290 + free_io_bio: 7291 + bio_put(io_bio); 7292 + 7309 7293 free_ordered: 7310 7294 /* 7311 7295 * If this is a write, we need to clean up the reserved space and kill ··· 7326 7300 btrfs_put_ordered_extent(ordered); 7327 7301 btrfs_put_ordered_extent(ordered); 7328 7302 } 7329 - bio_endio(bio, ret); 7303 + bio_endio(dio_bio, ret); 7330 7304 } 7331 7305 7332 7306 static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, ··· 8005 7979 inode_tree_del(inode); 8006 7980 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 8007 7981 free: 8008 - btrfs_remove_delayed_node(inode); 8009 7982 call_rcu(&inode->i_rcu, btrfs_i_callback); 8010 7983 } 8011 7984
+5 -5
fs/btrfs/ioctl.c
··· 1801 1801 item_off = btrfs_item_ptr_offset(leaf, i); 1802 1802 item_len = btrfs_item_size_nr(leaf, i); 1803 1803 1804 - if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1804 + btrfs_item_key_to_cpu(leaf, key, i); 1805 + if (!key_in_sk(key, sk)) 1806 + continue; 1807 + 1808 + if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1805 1809 item_len = 0; 1806 1810 1807 1811 if (sizeof(sh) + item_len + *sk_offset > ··· 1813 1809 ret = 1; 1814 1810 goto overflow; 1815 1811 } 1816 - 1817 - btrfs_item_key_to_cpu(leaf, key, i); 1818 - if (!key_in_sk(key, sk)) 1819 - continue; 1820 1812 1821 1813 sh.objectid = key->objectid; 1822 1814 sh.offset = key->offset;
+1 -1
fs/btrfs/raid56.c
··· 1050 1050 } 1051 1051 1052 1052 /* put a new bio on the list */ 1053 - bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1); 1053 + bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1); 1054 1054 if (!bio) 1055 1055 return -ENOMEM; 1056 1056
+6 -1
fs/btrfs/relocation.c
··· 1773 1773 if (!eb || !extent_buffer_uptodate(eb)) { 1774 1774 ret = (!eb) ? -ENOMEM : -EIO; 1775 1775 free_extent_buffer(eb); 1776 - return ret; 1776 + break; 1777 1777 } 1778 1778 btrfs_tree_lock(eb); 1779 1779 if (cow) { ··· 3350 3350 } 3351 3351 3352 3352 truncate: 3353 + ret = btrfs_check_trunc_cache_free_space(root, 3354 + &fs_info->global_block_rsv); 3355 + if (ret) 3356 + goto out; 3357 + 3353 3358 path = btrfs_alloc_path(); 3354 3359 if (!path) { 3355 3360 ret = -ENOMEM;
+5 -5
fs/btrfs/scrub.c
··· 1296 1296 } 1297 1297 1298 1298 WARN_ON(!page->page); 1299 - bio = bio_alloc(GFP_NOFS, 1); 1299 + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 1300 1300 if (!bio) { 1301 1301 page->io_error = 1; 1302 1302 sblock->no_io_error_seen = 0; ··· 1431 1431 return -EIO; 1432 1432 } 1433 1433 1434 - bio = bio_alloc(GFP_NOFS, 1); 1434 + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 1435 1435 if (!bio) 1436 1436 return -EIO; 1437 1437 bio->bi_bdev = page_bad->dev->bdev; ··· 1522 1522 sbio->dev = wr_ctx->tgtdev; 1523 1523 bio = sbio->bio; 1524 1524 if (!bio) { 1525 - bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); 1525 + bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); 1526 1526 if (!bio) { 1527 1527 mutex_unlock(&wr_ctx->wr_lock); 1528 1528 return -ENOMEM; ··· 1930 1930 sbio->dev = spage->dev; 1931 1931 bio = sbio->bio; 1932 1932 if (!bio) { 1933 - bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); 1933 + bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); 1934 1934 if (!bio) 1935 1935 return -ENOMEM; 1936 1936 sbio->bio = bio; ··· 3307 3307 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); 3308 3308 return -EIO; 3309 3309 } 3310 - bio = bio_alloc(GFP_NOFS, 1); 3310 + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 3311 3311 if (!bio) { 3312 3312 spin_lock(&sctx->stat_lock); 3313 3313 sctx->stat.malloc_errors++;
+1
fs/btrfs/super.c
··· 1263 1263 1264 1264 btrfs_dev_replace_suspend_for_unmount(fs_info); 1265 1265 btrfs_scrub_cancel(fs_info); 1266 + btrfs_pause_balance(fs_info); 1266 1267 1267 1268 ret = btrfs_commit_super(root); 1268 1269 if (ret)
+12 -42
fs/btrfs/volumes.c
··· 3120 3120 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; 3121 3121 if (num_devices == 1) 3122 3122 allowed |= BTRFS_BLOCK_GROUP_DUP; 3123 - else if (num_devices < 4) 3123 + else if (num_devices > 1) 3124 3124 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); 3125 - else 3126 - allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 3127 - BTRFS_BLOCK_GROUP_RAID10 | 3128 - BTRFS_BLOCK_GROUP_RAID5 | 3129 - BTRFS_BLOCK_GROUP_RAID6); 3130 - 3125 + if (num_devices > 2) 3126 + allowed |= BTRFS_BLOCK_GROUP_RAID5; 3127 + if (num_devices > 3) 3128 + allowed |= (BTRFS_BLOCK_GROUP_RAID10 | 3129 + BTRFS_BLOCK_GROUP_RAID6); 3131 3130 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3132 3131 (!alloc_profile_is_valid(bctl->data.target, 1) || 3133 3132 (bctl->data.target & ~allowed))) { ··· 5018 5019 return 0; 5019 5020 } 5020 5021 5021 - static void *merge_stripe_index_into_bio_private(void *bi_private, 5022 - unsigned int stripe_index) 5023 - { 5024 - /* 5025 - * with single, dup, RAID0, RAID1 and RAID10, stripe_index is 5026 - * at most 1. 5027 - * The alternative solution (instead of stealing bits from the 5028 - * pointer) would be to allocate an intermediate structure 5029 - * that contains the old private pointer plus the stripe_index. 5030 - */ 5031 - BUG_ON((((uintptr_t)bi_private) & 3) != 0); 5032 - BUG_ON(stripe_index > 3); 5033 - return (void *)(((uintptr_t)bi_private) | stripe_index); 5034 - } 5035 - 5036 - static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) 5037 - { 5038 - return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); 5039 - } 5040 - 5041 - static unsigned int extract_stripe_index_from_bio_private(void *bi_private) 5042 - { 5043 - return (unsigned int)((uintptr_t)bi_private) & 3; 5044 - } 5045 - 5046 5022 static void btrfs_end_bio(struct bio *bio, int err) 5047 5023 { 5048 - struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); 5024 + struct btrfs_bio *bbio = bio->bi_private; 5049 5025 int is_orig_bio = 0; 5050 5026 5051 5027 if (err) { 5052 5028 atomic_inc(&bbio->error); 5053 5029 if (err == -EIO || err == -EREMOTEIO) { 5054 5030 unsigned int stripe_index = 5055 - extract_stripe_index_from_bio_private( 5056 - bio->bi_private); 5031 + btrfs_io_bio(bio)->stripe_index; 5057 5032 struct btrfs_device *dev; 5058 5033 5059 5034 BUG_ON(stripe_index >= bbio->num_stripes); ··· 5057 5084 } 5058 5085 bio->bi_private = bbio->private; 5059 5086 bio->bi_end_io = bbio->end_io; 5060 - bio->bi_bdev = (struct block_device *) 5061 - (unsigned long)bbio->mirror_num; 5087 + btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5062 5088 /* only send an error to the higher layers if it is 5063 5089 * beyond the tolerance of the btrfs bio 5064 5090 */ ··· 5183 5211 struct btrfs_device *dev = bbio->stripes[dev_nr].dev; 5184 5212 5185 5213 bio->bi_private = bbio; 5186 - bio->bi_private = merge_stripe_index_into_bio_private( 5187 - bio->bi_private, (unsigned int)dev_nr); 5214 + btrfs_io_bio(bio)->stripe_index = dev_nr; 5188 5215 bio->bi_end_io = btrfs_end_bio; 5189 5216 bio->bi_sector = physical >> 9; 5190 5217 #ifdef DEBUG ··· 5244 5273 if (atomic_dec_and_test(&bbio->stripes_pending)) { 5245 5274 bio->bi_private = bbio->private; 5246 5275 bio->bi_end_io = bbio->end_io; 5247 - bio->bi_bdev = (struct block_device *) 5248 - (unsigned long)bbio->mirror_num; 5276 + btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5249 5277 bio->bi_sector = logical >> 9; 5250 5278 kfree(bbio); 5251 5279 bio_endio(bio, -EIO); ··· 5322 5352 } 5323 5353 5324 5354 if (dev_nr < total_devs - 1) { 5325 - bio = bio_clone(first_bio, GFP_NOFS); 5355 + bio = btrfs_bio_clone(first_bio, GFP_NOFS); 5326 5356 BUG_ON(!bio); /* -ENOMEM */ 5327 5357 } else { 5328 5358 bio = first_bio;
+20
fs/btrfs/volumes.h
··· 152 152 int rotating; 153 153 }; 154 154 155 + /* 156 + * we need the mirror number and stripe index to be passed around 157 + * the call chain while we are processing end_io (especially errors). 158 + * Really, what we need is a btrfs_bio structure that has this info 159 + * and is properly sized with its stripe array, but we're not there 160 + * quite yet. We have our own btrfs bioset, and all of the bios 161 + * we allocate are actually btrfs_io_bios. We'll cram as much of 162 + * struct btrfs_bio as we can into this over time. 163 + */ 164 + struct btrfs_io_bio { 165 + unsigned long mirror_num; 166 + unsigned long stripe_index; 167 + struct bio bio; 168 + }; 169 + 170 + static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio) 171 + { 172 + return container_of(bio, struct btrfs_io_bio, bio); 173 + } 174 + 155 175 struct btrfs_bio_stripe { 156 176 struct btrfs_device *dev; 157 177 u64 physical;