Merge tag 'for-4.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

+6 -7

fs/btrfs/acl.c

··· 30 30 name = XATTR_NAME_POSIX_ACL_DEFAULT; 31 31 break; 32 32 default: 33 - BUG(); 33 + return ERR_PTR(-EINVAL); 34 34 } 35 35 36 - size = btrfs_getxattr(inode, name, "", 0); 36 + size = btrfs_getxattr(inode, name, NULL, 0); 37 37 if (size > 0) { 38 38 value = kzalloc(size, GFP_KERNEL); 39 39 if (!value) 40 40 return ERR_PTR(-ENOMEM); 41 41 size = btrfs_getxattr(inode, name, value, size); 42 42 } 43 - if (size > 0) { 43 + if (size > 0) 44 44 acl = posix_acl_from_xattr(&init_user_ns, value, size); 45 - } else if (size == -ERANGE || size == -ENODATA || size == 0) { 45 + else if (size == -ENODATA || size == 0) 46 46 acl = NULL; 47 - } else { 48 - acl = ERR_PTR(-EIO); 49 - } 47 + else 48 + acl = ERR_PTR(size); 50 49 kfree(value); 51 50 52 51 return acl;

+3 -3

fs/btrfs/backref.c

··· 925 925 type = btrfs_get_extent_inline_ref_type(leaf, iref, 926 926 BTRFS_REF_TYPE_ANY); 927 927 if (type == BTRFS_REF_TYPE_INVALID) 928 - return -EINVAL; 928 + return -EUCLEAN; 929 929 930 930 offset = btrfs_extent_inline_ref_offset(leaf, iref); 931 931 ··· 1793 1793 *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref, 1794 1794 BTRFS_REF_TYPE_ANY); 1795 1795 if (*out_type == BTRFS_REF_TYPE_INVALID) 1796 - return -EINVAL; 1796 + return -EUCLEAN; 1797 1797 1798 1798 *ptr += btrfs_extent_inline_ref_size(*out_type); 1799 1799 WARN_ON(*ptr > end); ··· 2225 2225 2226 2226 fspath = init_data_container(total_bytes); 2227 2227 if (IS_ERR(fspath)) 2228 - return (void *)fspath; 2228 + return ERR_CAST(fspath); 2229 2229 2230 2230 ifp = kmalloc(sizeof(*ifp), GFP_KERNEL); 2231 2231 if (!ifp) {

+1 -1

fs/btrfs/btrfs_inode.h

··· 178 178 struct btrfs_delayed_node *delayed_node; 179 179 180 180 /* File creation time. */ 181 - struct timespec i_otime; 181 + struct timespec64 i_otime; 182 182 183 183 /* Hook into fs_info->delayed_iputs */ 184 184 struct list_head delayed_iput;

+7 -2

fs/btrfs/check-integrity.c

··· 1539 1539 } 1540 1540 1541 1541 device = multi->stripes[0].dev; 1542 - block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev); 1542 + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) || 1543 + !device->bdev || !device->name) 1544 + block_ctx_out->dev = NULL; 1545 + else 1546 + block_ctx_out->dev = btrfsic_dev_state_lookup( 1547 + device->bdev->bd_dev); 1543 1548 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1544 1549 block_ctx_out->start = bytenr; 1545 1550 block_ctx_out->len = len; ··· 1629 1624 bio = btrfs_io_bio_alloc(num_pages - i); 1630 1625 bio_set_dev(bio, block_ctx->dev->bdev); 1631 1626 bio->bi_iter.bi_sector = dev_bytenr >> 9; 1632 - bio_set_op_attrs(bio, REQ_OP_READ, 0); 1627 + bio->bi_opf = REQ_OP_READ; 1633 1628 1634 1629 for (j = i; j < num_pages; j++) { 1635 1630 ret = bio_add_page(bio, block_ctx->pagev[j],

+5 -13

fs/btrfs/compression.c

··· 5 5 6 6 #include <linux/kernel.h> 7 7 #include <linux/bio.h> 8 - #include <linux/buffer_head.h> 9 8 #include <linux/file.h> 10 9 #include <linux/fs.h> 11 10 #include <linux/pagemap.h> ··· 13 14 #include <linux/init.h> 14 15 #include <linux/string.h> 15 16 #include <linux/backing-dev.h> 16 - #include <linux/mpage.h> 17 - #include <linux/swap.h> 18 17 #include <linux/writeback.h> 19 - #include <linux/bit_spinlock.h> 20 18 #include <linux/slab.h> 21 19 #include <linux/sched/mm.h> 22 20 #include <linux/log2.h> ··· 299 303 struct bio *bio = NULL; 300 304 struct compressed_bio *cb; 301 305 unsigned long bytes_left; 302 - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 303 306 int pg_index = 0; 304 307 struct page *page; 305 308 u64 first_byte = disk_start; ··· 337 342 page = compressed_pages[pg_index]; 338 343 page->mapping = inode->i_mapping; 339 344 if (bio->bi_iter.bi_size) 340 - submit = io_tree->ops->merge_bio_hook(page, 0, 341 - PAGE_SIZE, 342 - bio, 0); 345 + submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, bio, 0); 343 346 344 347 page->mapping = NULL; 345 348 if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < ··· 606 613 cb->len = bio->bi_iter.bi_size; 607 614 608 615 comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); 609 - bio_set_op_attrs (comp_bio, REQ_OP_READ, 0); 616 + comp_bio->bi_opf = REQ_OP_READ; 610 617 comp_bio->bi_private = cb; 611 618 comp_bio->bi_end_io = end_compressed_bio_read; 612 619 refcount_set(&cb->pending_bios, 1); ··· 619 626 page->index = em_start >> PAGE_SHIFT; 620 627 621 628 if (comp_bio->bi_iter.bi_size) 622 - submit = tree->ops->merge_bio_hook(page, 0, 623 - PAGE_SIZE, 624 - comp_bio, 0); 629 + submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, 630 + comp_bio, 0); 625 631 626 632 page->mapping = NULL; 627 633 if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < ··· 652 660 } 653 661 654 662 comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); 655 - bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); 663 + comp_bio->bi_opf = REQ_OP_READ; 656 664 comp_bio->bi_private = cb; 657 665 comp_bio->bi_end_io = end_compressed_bio_read; 658 666

+10 -43

fs/btrfs/ctree.c

··· 888 888 btrfs_root_last_snapshot(&root->root_item) || 889 889 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 890 890 return 1; 891 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 892 - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && 893 - btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 894 - return 1; 895 - #endif 891 + 896 892 return 0; 897 893 } 898 894 ··· 3124 3128 * higher levels 3125 3129 * 3126 3130 */ 3127 - static void fixup_low_keys(struct btrfs_fs_info *fs_info, 3128 - struct btrfs_path *path, 3131 + static void fixup_low_keys(struct btrfs_path *path, 3129 3132 struct btrfs_disk_key *key, int level) 3130 3133 { 3131 3134 int i; ··· 3176 3181 btrfs_set_item_key(eb, &disk_key, slot); 3177 3182 btrfs_mark_buffer_dirty(eb); 3178 3183 if (slot == 0) 3179 - fixup_low_keys(fs_info, path, &disk_key, 1); 3184 + fixup_low_keys(path, &disk_key, 1); 3180 3185 } 3181 3186 3182 3187 /* ··· 3354 3359 3355 3360 root_add_used(root, fs_info->nodesize); 3356 3361 3357 - memzero_extent_buffer(c, 0, sizeof(struct btrfs_header)); 3358 3362 btrfs_set_header_nritems(c, 1); 3359 - btrfs_set_header_level(c, level); 3360 - btrfs_set_header_bytenr(c, c->start); 3361 - btrfs_set_header_generation(c, trans->transid); 3362 - btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 3363 - btrfs_set_header_owner(c, root->root_key.objectid); 3364 - 3365 - write_extent_buffer_fsid(c, fs_info->fsid); 3366 - write_extent_buffer_chunk_tree_uuid(c, fs_info->chunk_tree_uuid); 3367 - 3368 3363 btrfs_set_node_key(c, &lower_key, 0); 3369 3364 btrfs_set_node_blockptr(c, 0, lower->start); 3370 3365 lower_gen = btrfs_header_generation(lower); ··· 3483 3498 return PTR_ERR(split); 3484 3499 3485 3500 root_add_used(root, fs_info->nodesize); 3486 - 3487 - memzero_extent_buffer(split, 0, sizeof(struct btrfs_header)); 3488 - btrfs_set_header_level(split, btrfs_header_level(c)); 3489 - btrfs_set_header_bytenr(split, split->start); 3490 - btrfs_set_header_generation(split, trans->transid); 3491 - btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 3492 - btrfs_set_header_owner(split, root->root_key.objectid); 3493 - write_extent_buffer_fsid(split, fs_info->fsid); 3494 - write_extent_buffer_chunk_tree_uuid(split, fs_info->chunk_tree_uuid); 3501 + ASSERT(btrfs_header_level(c) == level); 3495 3502 3496 3503 ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid); 3497 3504 if (ret) { ··· 3922 3945 clean_tree_block(fs_info, right); 3923 3946 3924 3947 btrfs_item_key(right, &disk_key, 0); 3925 - fixup_low_keys(fs_info, path, &disk_key, 1); 3948 + fixup_low_keys(path, &disk_key, 1); 3926 3949 3927 3950 /* then fixup the leaf pointer in the path */ 3928 3951 if (path->slots[0] < push_items) { ··· 4269 4292 4270 4293 root_add_used(root, fs_info->nodesize); 4271 4294 4272 - memzero_extent_buffer(right, 0, sizeof(struct btrfs_header)); 4273 - btrfs_set_header_bytenr(right, right->start); 4274 - btrfs_set_header_generation(right, trans->transid); 4275 - btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); 4276 - btrfs_set_header_owner(right, root->root_key.objectid); 4277 - btrfs_set_header_level(right, 0); 4278 - write_extent_buffer_fsid(right, fs_info->fsid); 4279 - write_extent_buffer_chunk_tree_uuid(right, fs_info->chunk_tree_uuid); 4280 - 4281 4295 if (split == 0) { 4282 4296 if (mid <= slot) { 4283 4297 btrfs_set_header_nritems(right, 0); ··· 4288 4320 path->nodes[0] = right; 4289 4321 path->slots[0] = 0; 4290 4322 if (path->slots[1] == 0) 4291 - fixup_low_keys(fs_info, path, &disk_key, 1); 4323 + fixup_low_keys(path, &disk_key, 1); 4292 4324 } 4293 4325 /* 4294 4326 * We create a new leaf 'right' for the required ins_len and ··· 4610 4642 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 4611 4643 btrfs_set_item_key(leaf, &disk_key, slot); 4612 4644 if (slot == 0) 4613 - fixup_low_keys(fs_info, path, &disk_key, 1); 4645 + fixup_low_keys(path, &disk_key, 1); 4614 4646 } 4615 4647 4616 4648 item = btrfs_item_nr(slot); ··· 4712 4744 4713 4745 if (path->slots[0] == 0) { 4714 4746 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 4715 - fixup_low_keys(fs_info, path, &disk_key, 1); 4747 + fixup_low_keys(path, &disk_key, 1); 4716 4748 } 4717 4749 btrfs_unlock_up_safe(path, 1); 4718 4750 ··· 4854 4886 static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 4855 4887 int level, int slot) 4856 4888 { 4857 - struct btrfs_fs_info *fs_info = root->fs_info; 4858 4889 struct extent_buffer *parent = path->nodes[level]; 4859 4890 u32 nritems; 4860 4891 int ret; ··· 4886 4919 struct btrfs_disk_key disk_key; 4887 4920 4888 4921 btrfs_node_key(parent, &disk_key, 0); 4889 - fixup_low_keys(fs_info, path, &disk_key, level + 1); 4922 + fixup_low_keys(path, &disk_key, level + 1); 4890 4923 } 4891 4924 btrfs_mark_buffer_dirty(parent); 4892 4925 } ··· 4989 5022 struct btrfs_disk_key disk_key; 4990 5023 4991 5024 btrfs_item_key(leaf, &disk_key, 0); 4992 - fixup_low_keys(fs_info, path, &disk_key, 1); 5025 + fixup_low_keys(path, &disk_key, 1); 4993 5026 } 4994 5027 4995 5028 /* delete the leaf if it is mostly empty */

+33 -56

fs/btrfs/ctree.h

··· 55 55 56 56 #define BTRFS_OLDEST_GENERATION 0ULL 57 57 58 - #define BTRFS_COMPAT_EXTENT_TREE_V0 59 - 60 58 /* 61 59 * the max metadata block size. This limit is somewhat artificial, 62 60 * but the memmove costs go through the roof for larger blocks. ··· 83 85 #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) 84 86 85 87 #define BTRFS_DIRTY_METADATA_THRESH SZ_32M 88 + 89 + /* 90 + * Use large batch size to reduce overhead of metadata updates. On the reader 91 + * side, we only read it when we are close to ENOSPC and the read overhead is 92 + * mostly related to the number of CPUs, so it is OK to use arbitrary large 93 + * value here. 94 + */ 95 + #define BTRFS_TOTAL_BYTES_PINNED_BATCH SZ_128M 86 96 87 97 #define BTRFS_MAX_EXTENT_SIZE SZ_128M 88 98 ··· 348 342 sizeof(struct btrfs_item)) 349 343 struct btrfs_dev_replace { 350 344 u64 replace_state; /* see #define above */ 351 - u64 time_started; /* seconds since 1-Jan-1970 */ 352 - u64 time_stopped; /* seconds since 1-Jan-1970 */ 345 + time64_t time_started; /* seconds since 1-Jan-1970 */ 346 + time64_t time_stopped; /* seconds since 1-Jan-1970 */ 353 347 atomic64_t num_write_errors; 354 348 atomic64_t num_uncorrectable_read_errors; 355 349 ··· 365 359 struct btrfs_device *srcdev; 366 360 struct btrfs_device *tgtdev; 367 361 368 - pid_t lock_owner; 369 - atomic_t nesting_level; 370 362 struct mutex lock_finishing_cancel_unmount; 371 363 rwlock_t lock; 372 364 atomic_t read_locks; ··· 1217 1213 u64 defrag_trans_start; 1218 1214 struct btrfs_key defrag_progress; 1219 1215 struct btrfs_key defrag_max; 1220 - char *name; 1221 1216 1222 1217 /* the dirty list is only used by non-reference counted roots */ 1223 1218 struct list_head dirty_list; ··· 2431 2428 return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; 2432 2429 } 2433 2430 2434 - /* this returns the number of file bytes represented by the inline item. 2435 - * If an item is compressed, this is the uncompressed size 2436 - */ 2437 - static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb, 2438 - int slot, 2439 - const struct btrfs_file_extent_item *fi) 2440 - { 2441 - struct btrfs_map_token token; 2442 - 2443 - btrfs_init_map_token(&token); 2444 - /* 2445 - * return the space used on disk if this item isn't 2446 - * compressed or encoded 2447 - */ 2448 - if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 && 2449 - btrfs_token_file_extent_encryption(eb, fi, &token) == 0 && 2450 - btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) { 2451 - return btrfs_file_extent_inline_item_len(eb, 2452 - btrfs_item_nr(slot)); 2453 - } 2454 - 2455 - /* otherwise use the ram bytes field */ 2456 - return btrfs_token_file_extent_ram_bytes(eb, fi, &token); 2457 - } 2458 - 2459 - 2460 2431 /* btrfs_dev_stats_item */ 2461 2432 static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, 2462 2433 const struct btrfs_dev_stats_item *ptr, ··· 2653 2676 u64 offset, u64 ram_bytes, 2654 2677 struct btrfs_key *ins); 2655 2678 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 2656 - struct btrfs_fs_info *fs_info, 2657 2679 u64 root_objectid, u64 owner, u64 offset, 2658 2680 struct btrfs_key *ins); 2659 2681 int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, ··· 2692 2716 int btrfs_read_block_groups(struct btrfs_fs_info *info); 2693 2717 int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr); 2694 2718 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 2695 - struct btrfs_fs_info *fs_info, u64 bytes_used, 2696 - u64 type, u64 chunk_offset, u64 size); 2719 + u64 bytes_used, u64 type, u64 chunk_offset, 2720 + u64 size); 2697 2721 void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info); 2698 2722 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( 2699 2723 struct btrfs_fs_info *fs_info, 2700 2724 const u64 chunk_offset); 2701 2725 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 2702 - struct btrfs_fs_info *fs_info, u64 group_start, 2703 - struct extent_map *em); 2726 + u64 group_start, struct extent_map *em); 2704 2727 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); 2705 2728 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache); 2706 2729 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache); ··· 2761 2786 unsigned short type); 2762 2787 void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, 2763 2788 struct btrfs_block_rsv *rsv); 2764 - void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv); 2765 2789 int btrfs_block_rsv_add(struct btrfs_root *root, 2766 2790 struct btrfs_block_rsv *block_rsv, u64 num_bytes, 2767 2791 enum btrfs_reserve_flush_enum flush); ··· 2777 2803 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, 2778 2804 struct btrfs_block_rsv *block_rsv, 2779 2805 u64 num_bytes); 2780 - int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info, 2781 - struct btrfs_block_group_cache *cache); 2806 + int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache); 2782 2807 void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache); 2783 2808 void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2784 2809 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); ··· 2785 2812 u64 start, u64 end); 2786 2813 int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, 2787 2814 u64 num_bytes, u64 *actual_bytes); 2788 - int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 2789 - struct btrfs_fs_info *fs_info, u64 type); 2815 + int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type); 2790 2816 int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); 2791 2817 2792 2818 int btrfs_init_space_info(struct btrfs_fs_info *fs_info); ··· 2794 2822 int btrfs_start_write_no_snapshotting(struct btrfs_root *root); 2795 2823 void btrfs_end_write_no_snapshotting(struct btrfs_root *root); 2796 2824 void btrfs_wait_for_snapshot_creation(struct btrfs_root *root); 2797 - void check_system_chunk(struct btrfs_trans_handle *trans, 2798 - struct btrfs_fs_info *fs_info, const u64 type); 2825 + void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type); 2799 2826 u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 2800 2827 u64 start, u64 end); 2828 + void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg); 2801 2829 2802 2830 /* ctree.c */ 2803 2831 int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, ··· 2983 3011 int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); 2984 3012 2985 3013 /* root-item.c */ 2986 - int btrfs_add_root_ref(struct btrfs_trans_handle *trans, 2987 - struct btrfs_fs_info *fs_info, 2988 - u64 root_id, u64 ref_id, u64 dirid, u64 sequence, 2989 - const char *name, int name_len); 2990 - int btrfs_del_root_ref(struct btrfs_trans_handle *trans, 2991 - struct btrfs_fs_info *fs_info, 2992 - u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, 2993 - const char *name, int name_len); 3014 + int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, 3015 + u64 ref_id, u64 dirid, u64 sequence, const char *name, 3016 + int name_len); 3017 + int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, 3018 + u64 ref_id, u64 dirid, u64 *sequence, const char *name, 3019 + int name_len); 2994 3020 int btrfs_del_root(struct btrfs_trans_handle *trans, 2995 - struct btrfs_fs_info *fs_info, const struct btrfs_key *key); 3021 + const struct btrfs_key *key); 2996 3022 int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2997 3023 const struct btrfs_key *key, 2998 3024 struct btrfs_root_item *item); ··· 3166 3196 int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 3167 3197 size_t size, struct bio *bio, 3168 3198 unsigned long bio_flags); 3169 - void btrfs_set_range_writeback(void *private_data, u64 start, u64 end); 3199 + void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end); 3170 3200 vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf); 3171 3201 int btrfs_readpage(struct file *file, struct page *page); 3172 3202 void btrfs_evict_inode(struct inode *inode); ··· 3422 3452 #ifdef CONFIG_BTRFS_ASSERT 3423 3453 3424 3454 __cold 3425 - static inline void assfail(char *expr, char *file, int line) 3455 + static inline void assfail(const char *expr, const char *file, int line) 3426 3456 { 3427 3457 pr_err("assertion failed: %s, file: %s, line: %d\n", 3428 3458 expr, file, line); ··· 3434 3464 #else 3435 3465 #define ASSERT(expr) ((void)0) 3436 3466 #endif 3467 + 3468 + __cold 3469 + static inline void btrfs_print_v0_err(struct btrfs_fs_info *fs_info) 3470 + { 3471 + btrfs_err(fs_info, 3472 + "Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel"); 3473 + } 3437 3474 3438 3475 __printf(5, 6) 3439 3476 __cold

+6 -8

fs/btrfs/delayed-inode.c

··· 1222 1222 1223 1223 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode) 1224 1224 { 1225 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1225 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1226 1226 struct btrfs_trans_handle *trans; 1227 1227 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1228 1228 struct btrfs_path *path; ··· 1418 1418 1419 1419 /* Will return 0 or -ENOMEM */ 1420 1420 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, 1421 - struct btrfs_fs_info *fs_info, 1422 1421 const char *name, int name_len, 1423 1422 struct btrfs_inode *dir, 1424 1423 struct btrfs_disk_key *disk_key, u8 type, ··· 1457 1458 */ 1458 1459 BUG_ON(ret); 1459 1460 1460 - 1461 1461 mutex_lock(&delayed_node->mutex); 1462 1462 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); 1463 1463 if (unlikely(ret)) { 1464 - btrfs_err(fs_info, 1464 + btrfs_err(trans->fs_info, 1465 1465 "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", 1466 1466 name_len, name, delayed_node->root->objectid, 1467 1467 delayed_node->inode_id, ret); ··· 1493 1495 } 1494 1496 1495 1497 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, 1496 - struct btrfs_fs_info *fs_info, 1497 1498 struct btrfs_inode *dir, u64 index) 1498 1499 { 1499 1500 struct btrfs_delayed_node *node; ··· 1508 1511 item_key.type = BTRFS_DIR_INDEX_KEY; 1509 1512 item_key.offset = index; 1510 1513 1511 - ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key); 1514 + ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node, 1515 + &item_key); 1512 1516 if (!ret) 1513 1517 goto end; 1514 1518 ··· 1531 1533 mutex_lock(&node->mutex); 1532 1534 ret = __btrfs_add_delayed_deletion_item(node, item); 1533 1535 if (unlikely(ret)) { 1534 - btrfs_err(fs_info, 1536 + btrfs_err(trans->fs_info, 1535 1537 "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", 1536 1538 index, node->root->objectid, node->inode_id, ret); 1537 1539 BUG(); ··· 1835 1837 1836 1838 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode) 1837 1839 { 1838 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1840 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1839 1841 struct btrfs_delayed_node *delayed_node; 1840 1842 1841 1843 /*

-2

fs/btrfs/delayed-inode.h

··· 86 86 } 87 87 88 88 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, 89 - struct btrfs_fs_info *fs_info, 90 89 const char *name, int name_len, 91 90 struct btrfs_inode *dir, 92 91 struct btrfs_disk_key *disk_key, u8 type, 93 92 u64 index); 94 93 95 94 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, 96 - struct btrfs_fs_info *fs_info, 97 95 struct btrfs_inode *dir, u64 index); 98 96 99 97 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);

+21 -22

fs/btrfs/delayed-ref.c

··· 709 709 * to make sure the delayed ref is eventually processed before this 710 710 * transaction commits. 711 711 */ 712 - int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 713 - struct btrfs_trans_handle *trans, 712 + int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, 714 713 u64 bytenr, u64 num_bytes, u64 parent, 715 714 u64 ref_root, int level, int action, 716 715 struct btrfs_delayed_extent_op *extent_op, 717 716 int *old_ref_mod, int *new_ref_mod) 718 717 { 718 + struct btrfs_fs_info *fs_info = trans->fs_info; 719 719 struct btrfs_delayed_tree_ref *ref; 720 720 struct btrfs_delayed_ref_head *head_ref; 721 721 struct btrfs_delayed_ref_root *delayed_refs; ··· 730 730 if (!ref) 731 731 return -ENOMEM; 732 732 733 + head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); 734 + if (!head_ref) { 735 + kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 736 + return -ENOMEM; 737 + } 738 + 739 + if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 740 + is_fstree(ref_root)) { 741 + record = kmalloc(sizeof(*record), GFP_NOFS); 742 + if (!record) { 743 + kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 744 + kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 745 + return -ENOMEM; 746 + } 747 + } 748 + 733 749 if (parent) 734 750 ref_type = BTRFS_SHARED_BLOCK_REF_KEY; 735 751 else 736 752 ref_type = BTRFS_TREE_BLOCK_REF_KEY; 753 + 737 754 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, 738 755 ref_root, action, ref_type); 739 756 ref->root = ref_root; 740 757 ref->parent = parent; 741 758 ref->level = level; 742 - 743 - head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); 744 - if (!head_ref) 745 - goto free_ref; 746 - 747 - if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 748 - is_fstree(ref_root)) { 749 - record = kmalloc(sizeof(*record), GFP_NOFS); 750 - if (!record) 751 - goto free_head_ref; 752 - } 753 759 754 760 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, 755 761 ref_root, 0, action, false, is_system); ··· 785 779 btrfs_qgroup_trace_extent_post(fs_info, record); 786 780 787 781 return 0; 788 - 789 - free_head_ref: 790 - kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 791 - free_ref: 792 - kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 793 - 794 - return -ENOMEM; 795 782 } 796 783 797 784 /* 798 785 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. 799 786 */ 800 - int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, 801 - struct btrfs_trans_handle *trans, 787 + int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, 802 788 u64 bytenr, u64 num_bytes, 803 789 u64 parent, u64 ref_root, 804 790 u64 owner, u64 offset, u64 reserved, int action, 805 791 int *old_ref_mod, int *new_ref_mod) 806 792 { 793 + struct btrfs_fs_info *fs_info = trans->fs_info; 807 794 struct btrfs_delayed_data_ref *ref; 808 795 struct btrfs_delayed_ref_head *head_ref; 809 796 struct btrfs_delayed_ref_root *delayed_refs;

+2 -4

fs/btrfs/delayed-ref.h

··· 234 234 kmem_cache_free(btrfs_delayed_ref_head_cachep, head); 235 235 } 236 236 237 - int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 238 - struct btrfs_trans_handle *trans, 237 + int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, 239 238 u64 bytenr, u64 num_bytes, u64 parent, 240 239 u64 ref_root, int level, int action, 241 240 struct btrfs_delayed_extent_op *extent_op, 242 241 int *old_ref_mod, int *new_ref_mod); 243 - int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, 244 - struct btrfs_trans_handle *trans, 242 + int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, 245 243 u64 bytenr, u64 num_bytes, 246 244 u64 parent, u64 ref_root, 247 245 u64 owner, u64 offset, u64 reserved, int action,

+15 -14

fs/btrfs/dev-replace.c

··· 6 6 #include <linux/sched.h> 7 7 #include <linux/bio.h> 8 8 #include <linux/slab.h> 9 - #include <linux/buffer_head.h> 10 9 #include <linux/blkdev.h> 11 - #include <linux/random.h> 12 - #include <linux/iocontext.h> 13 - #include <linux/capability.h> 14 10 #include <linux/kthread.h> 15 11 #include <linux/math64.h> 16 - #include <asm/div64.h> 17 12 #include "ctree.h" 18 13 #include "extent_map.h" 19 14 #include "disk-io.h" ··· 460 465 * go to the tgtdev as well (refer to btrfs_map_block()). 461 466 */ 462 467 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; 463 - dev_replace->time_started = get_seconds(); 468 + dev_replace->time_started = ktime_get_real_seconds(); 464 469 dev_replace->cursor_left = 0; 465 470 dev_replace->committed_cursor_left = 0; 466 471 dev_replace->cursor_left_last_write_of_item = 0; ··· 506 511 dev_replace->srcdev = NULL; 507 512 dev_replace->tgtdev = NULL; 508 513 btrfs_dev_replace_write_unlock(dev_replace); 509 - btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); 514 + btrfs_destroy_dev_replace_tgtdev(tgt_device); 510 515 return ret; 511 516 } 512 517 ··· 613 618 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; 614 619 dev_replace->tgtdev = NULL; 615 620 dev_replace->srcdev = NULL; 616 - dev_replace->time_stopped = get_seconds(); 621 + dev_replace->time_stopped = ktime_get_real_seconds(); 617 622 dev_replace->item_needs_writeback = 1; 618 623 619 624 /* replace old device with new one in mapping tree */ ··· 632 637 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 633 638 btrfs_rm_dev_replace_blocked(fs_info); 634 639 if (tgt_device) 635 - btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); 640 + btrfs_destroy_dev_replace_tgtdev(tgt_device); 636 641 btrfs_rm_dev_replace_unblocked(fs_info); 637 642 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 638 643 ··· 658 663 tgt_device->commit_total_bytes = src_device->commit_total_bytes; 659 664 tgt_device->commit_bytes_used = src_device->bytes_used; 660 665 661 - btrfs_assign_next_active_device(fs_info, src_device, tgt_device); 666 + btrfs_assign_next_active_device(src_device, tgt_device); 662 667 663 668 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); 664 669 fs_info->fs_devices->rw_devices++; ··· 667 672 668 673 btrfs_rm_dev_replace_blocked(fs_info); 669 674 670 - btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device); 675 + btrfs_rm_dev_replace_remove_srcdev(src_device); 671 676 672 677 btrfs_rm_dev_replace_unblocked(fs_info); 678 + 679 + /* 680 + * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will 681 + * update on-disk dev stats value during commit transaction 682 + */ 683 + atomic_inc(&tgt_device->dev_stats_ccnt); 673 684 674 685 /* 675 686 * this is again a consistent state where no dev_replace procedure ··· 808 807 break; 809 808 } 810 809 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; 811 - dev_replace->time_stopped = get_seconds(); 810 + dev_replace->time_stopped = ktime_get_real_seconds(); 812 811 dev_replace->item_needs_writeback = 1; 813 812 btrfs_dev_replace_write_unlock(dev_replace); 814 813 btrfs_scrub_cancel(fs_info); ··· 827 826 btrfs_dev_name(tgt_device)); 828 827 829 828 if (tgt_device) 830 - btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); 829 + btrfs_destroy_dev_replace_tgtdev(tgt_device); 831 830 832 831 leave: 833 832 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); ··· 849 848 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 850 849 dev_replace->replace_state = 851 850 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; 852 - dev_replace->time_stopped = get_seconds(); 851 + dev_replace->time_stopped = ktime_get_real_seconds(); 853 852 dev_replace->item_needs_writeback = 1; 854 853 btrfs_info(fs_info, "suspending dev_replace for unmount"); 855 854 break;

+2 -2

fs/btrfs/dir-item.c

··· 160 160 } 161 161 btrfs_release_path(path); 162 162 163 - ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name, 164 - name_len, dir, &disk_key, type, index); 163 + ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir, 164 + &disk_key, type, index); 165 165 out_free: 166 166 btrfs_free_path(path); 167 167 if (ret)

+30 -83

fs/btrfs/disk-io.c

··· 5 5 6 6 #include <linux/fs.h> 7 7 #include <linux/blkdev.h> 8 - #include <linux/scatterlist.h> 9 - #include <linux/swap.h> 10 8 #include <linux/radix-tree.h> 11 9 #include <linux/writeback.h> 12 10 #include <linux/buffer_head.h> ··· 52 54 53 55 static const struct extent_io_ops btree_extent_io_ops; 54 56 static void end_workqueue_fn(struct btrfs_work *work); 55 - static void free_fs_root(struct btrfs_root *root); 56 57 static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 57 58 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 58 59 struct btrfs_fs_info *fs_info); ··· 105 108 */ 106 109 struct async_submit_bio { 107 110 void *private_data; 108 - struct btrfs_fs_info *fs_info; 109 111 struct bio *bio; 110 112 extent_submit_bio_start_t *submit_bio_start; 111 - extent_submit_bio_done_t *submit_bio_done; 112 113 int mirror_num; 113 - unsigned long bio_flags; 114 114 /* 115 115 * bio_offset is optional, can be used if the pages in the bio 116 116 * can't tell us where in the file the bio should go ··· 206 212 struct page *page, size_t pg_offset, u64 start, u64 len, 207 213 int create) 208 214 { 209 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 215 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 210 216 struct extent_map_tree *em_tree = &inode->extent_tree; 211 217 struct extent_map *em; 212 218 int ret; ··· 609 615 610 616 found_start = btrfs_header_bytenr(eb); 611 617 if (found_start != eb->start) { 612 - btrfs_err_rl(fs_info, "bad tree block start %llu %llu", 613 - found_start, eb->start); 618 + btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu", 619 + eb->start, found_start); 614 620 ret = -EIO; 615 621 goto err; 616 622 } ··· 622 628 } 623 629 found_level = btrfs_header_level(eb); 624 630 if (found_level >= BTRFS_MAX_LEVEL) { 625 - btrfs_err(fs_info, "bad tree block level %d", 626 - (int)btrfs_header_level(eb)); 631 + btrfs_err(fs_info, "bad tree block level %d on %llu", 632 + (int)btrfs_header_level(eb), eb->start); 627 633 ret = -EIO; 628 634 goto err; 629 635 } ··· 773 779 return; 774 780 } 775 781 776 - async->submit_bio_done(async->private_data, async->bio, async->mirror_num); 782 + btrfs_submit_bio_done(async->private_data, async->bio, async->mirror_num); 777 783 } 778 784 779 785 static void run_one_async_free(struct btrfs_work *work) ··· 787 793 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 788 794 int mirror_num, unsigned long bio_flags, 789 795 u64 bio_offset, void *private_data, 790 - extent_submit_bio_start_t *submit_bio_start, 791 - extent_submit_bio_done_t *submit_bio_done) 796 + extent_submit_bio_start_t *submit_bio_start) 792 797 { 793 798 struct async_submit_bio *async; 794 799 ··· 796 803 return BLK_STS_RESOURCE; 797 804 798 805 async->private_data = private_data; 799 - async->fs_info = fs_info; 800 806 async->bio = bio; 801 807 async->mirror_num = mirror_num; 802 808 async->submit_bio_start = submit_bio_start; 803 - async->submit_bio_done = submit_bio_done; 804 809 805 810 btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, 806 811 run_one_async_done, run_one_async_free); 807 812 808 - async->bio_flags = bio_flags; 809 813 async->bio_offset = bio_offset; 810 814 811 815 async->status = 0; ··· 839 849 * submission context. Just jump into btrfs_map_bio 840 850 */ 841 851 return btree_csum_one_bio(bio); 842 - } 843 - 844 - static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio, 845 - int mirror_num) 846 - { 847 - struct inode *inode = private_data; 848 - blk_status_t ret; 849 - 850 - /* 851 - * when we're called for a write, we're already in the async 852 - * submission context. Just jump into btrfs_map_bio 853 - */ 854 - ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1); 855 - if (ret) { 856 - bio->bi_status = ret; 857 - bio_endio(bio); 858 - } 859 - return ret; 860 852 } 861 853 862 854 static int check_async_write(struct btrfs_inode *bi) ··· 883 911 */ 884 912 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, 885 913 bio_offset, private_data, 886 - btree_submit_bio_start, 887 - btree_submit_bio_done); 914 + btree_submit_bio_start); 888 915 } 889 916 890 917 if (ret) ··· 932 961 933 962 fs_info = BTRFS_I(mapping->host)->root->fs_info; 934 963 /* this is a bit racy, but that's ok */ 935 - ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, 936 - BTRFS_DIRTY_METADATA_THRESH); 964 + ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, 965 + BTRFS_DIRTY_METADATA_THRESH, 966 + fs_info->dirty_metadata_batch); 937 967 if (ret < 0) 938 968 return 0; 939 969 } ··· 1153 1181 root->highest_objectid = 0; 1154 1182 root->nr_delalloc_inodes = 0; 1155 1183 root->nr_ordered_extents = 0; 1156 - root->name = NULL; 1157 1184 root->inode_tree = RB_ROOT; 1158 1185 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1159 1186 root->block_rsv = NULL; ··· 1263 1292 goto fail; 1264 1293 } 1265 1294 1266 - memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); 1267 - btrfs_set_header_bytenr(leaf, leaf->start); 1268 - btrfs_set_header_generation(leaf, trans->transid); 1269 - btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 1270 - btrfs_set_header_owner(leaf, objectid); 1271 1295 root->node = leaf; 1272 - 1273 - write_extent_buffer_fsid(leaf, fs_info->fsid); 1274 - write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); 1275 1296 btrfs_mark_buffer_dirty(leaf); 1276 1297 1277 1298 root->commit_root = btrfs_root_node(root); ··· 1337 1374 return ERR_CAST(leaf); 1338 1375 } 1339 1376 1340 - memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); 1341 - btrfs_set_header_bytenr(leaf, leaf->start); 1342 - btrfs_set_header_generation(leaf, trans->transid); 1343 - btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 1344 - btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); 1345 1377 root->node = leaf; 1346 1378 1347 - write_extent_buffer_fsid(root->node, fs_info->fsid); 1348 1379 btrfs_mark_buffer_dirty(root->node); 1349 1380 btrfs_tree_unlock(root->node); 1350 1381 return root; ··· 1503 1546 1504 1547 return 0; 1505 1548 fail: 1506 - /* the caller is responsible to call free_fs_root */ 1549 + /* The caller is responsible to call btrfs_free_fs_root */ 1507 1550 return ret; 1508 1551 } 1509 1552 ··· 1608 1651 ret = btrfs_insert_fs_root(fs_info, root); 1609 1652 if (ret) { 1610 1653 if (ret == -EEXIST) { 1611 - free_fs_root(root); 1654 + btrfs_free_fs_root(root); 1612 1655 goto again; 1613 1656 } 1614 1657 goto fail; 1615 1658 } 1616 1659 return root; 1617 1660 fail: 1618 - free_fs_root(root); 1661 + btrfs_free_fs_root(root); 1619 1662 return ERR_PTR(ret); 1620 1663 } 1621 1664 ··· 1760 1803 struct btrfs_trans_handle *trans; 1761 1804 struct btrfs_transaction *cur; 1762 1805 u64 transid; 1763 - unsigned long now; 1806 + time64_t now; 1764 1807 unsigned long delay; 1765 1808 bool cannot_commit; 1766 1809 ··· 1776 1819 goto sleep; 1777 1820 } 1778 1821 1779 - now = get_seconds(); 1822 + now = ktime_get_seconds(); 1780 1823 if (cur->state < TRANS_STATE_BLOCKED && 1781 1824 !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && 1782 1825 (now < cur->start_time || ··· 2153 2196 2154 2197 static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) 2155 2198 { 2156 - fs_info->dev_replace.lock_owner = 0; 2157 - atomic_set(&fs_info->dev_replace.nesting_level, 0); 2158 2199 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); 2159 2200 rwlock_init(&fs_info->dev_replace.lock); 2160 2201 atomic_set(&fs_info->dev_replace.read_locks, 0); ··· 3030 3075 fs_info->generation = generation; 3031 3076 fs_info->last_trans_committed = generation; 3032 3077 3078 + ret = btrfs_verify_dev_extents(fs_info); 3079 + if (ret) { 3080 + btrfs_err(fs_info, 3081 + "failed to verify dev extents against chunks: %d", 3082 + ret); 3083 + goto fail_block_groups; 3084 + } 3033 3085 ret = btrfs_recover_balance(fs_info); 3034 3086 if (ret) { 3035 3087 btrfs_err(fs_info, "failed to recover balance: %d", ret); ··· 3837 3875 __btrfs_remove_free_space_cache(root->free_ino_pinned); 3838 3876 if (root->free_ino_ctl) 3839 3877 __btrfs_remove_free_space_cache(root->free_ino_ctl); 3840 - free_fs_root(root); 3878 + btrfs_free_fs_root(root); 3841 3879 } 3842 3880 3843 - static void free_fs_root(struct btrfs_root *root) 3881 + void btrfs_free_fs_root(struct btrfs_root *root) 3844 3882 { 3845 3883 iput(root->ino_cache_inode); 3846 3884 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); ··· 3852 3890 free_extent_buffer(root->commit_root); 3853 3891 kfree(root->free_ino_ctl); 3854 3892 kfree(root->free_ino_pinned); 3855 - kfree(root->name); 3856 3893 btrfs_put_fs_root(root); 3857 - } 3858 - 3859 - void btrfs_free_fs_root(struct btrfs_root *root) 3860 - { 3861 - free_fs_root(root); 3862 3894 } 3863 3895 3864 3896 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) ··· 4060 4104 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4061 4105 /* 4062 4106 * This is a fast path so only do this check if we have sanity tests 4063 - * enabled. Normal people shouldn't be marking dummy buffers as dirty 4107 + * enabled. Normal people shouldn't be using umapped buffers as dirty 4064 4108 * outside of the sanity tests. 4065 4109 */ 4066 - if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags))) 4110 + if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags))) 4067 4111 return; 4068 4112 #endif 4069 4113 root = BTRFS_I(buf->pages[0]->mapping->host)->root; ··· 4106 4150 if (flush_delayed) 4107 4151 btrfs_balance_delayed_items(fs_info); 4108 4152 4109 - ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, 4110 - BTRFS_DIRTY_METADATA_THRESH); 4153 + ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, 4154 + BTRFS_DIRTY_METADATA_THRESH, 4155 + fs_info->dirty_metadata_batch); 4111 4156 if (ret > 0) { 4112 4157 balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping); 4113 4158 } ··· 4520 4563 return 0; 4521 4564 } 4522 4565 4523 - static struct btrfs_fs_info *btree_fs_info(void *private_data) 4524 - { 4525 - struct inode *inode = private_data; 4526 - return btrfs_sb(inode->i_sb); 4527 - } 4528 - 4529 4566 static const struct extent_io_ops btree_extent_io_ops = { 4530 4567 /* mandatory callbacks */ 4531 4568 .submit_bio_hook = btree_submit_bio_hook, 4532 4569 .readpage_end_io_hook = btree_readpage_end_io_hook, 4533 - /* note we're sharing with inode.c for the merge bio hook */ 4534 - .merge_bio_hook = btrfs_merge_bio_hook, 4535 4570 .readpage_io_failed_hook = btree_io_failed_hook, 4536 - .set_range_writeback = btrfs_set_range_writeback, 4537 - .tree_fs_info = btree_fs_info, 4538 4571 4539 4572 /* optional callbacks */ 4540 4573 };

+3 -2

fs/btrfs/disk-io.h

··· 120 120 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 121 121 int mirror_num, unsigned long bio_flags, 122 122 u64 bio_offset, void *private_data, 123 - extent_submit_bio_start_t *submit_bio_start, 124 - extent_submit_bio_done_t *submit_bio_done); 123 + extent_submit_bio_start_t *submit_bio_start); 124 + blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, 125 + int mirror_num); 125 126 int btrfs_write_tree_block(struct extent_buffer *buf); 126 127 void btrfs_wait_tree_block_writeback(struct extent_buffer *buf); 127 128 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,

+390 -493

fs/btrfs/extent-tree.c

··· 52 52 }; 53 53 54 54 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 55 - struct btrfs_fs_info *fs_info, 56 - struct btrfs_delayed_ref_node *node, u64 parent, 57 - u64 root_objectid, u64 owner_objectid, 58 - u64 owner_offset, int refs_to_drop, 59 - struct btrfs_delayed_extent_op *extra_op); 55 + struct btrfs_delayed_ref_node *node, u64 parent, 56 + u64 root_objectid, u64 owner_objectid, 57 + u64 owner_offset, int refs_to_drop, 58 + struct btrfs_delayed_extent_op *extra_op); 60 59 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 61 60 struct extent_buffer *leaf, 62 61 struct btrfs_extent_item *ei); 63 62 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 64 - struct btrfs_fs_info *fs_info, 65 63 u64 parent, u64 root_objectid, 66 64 u64 flags, u64 owner, u64 offset, 67 65 struct btrfs_key *ins, int ref_mod); 68 66 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 69 67 struct btrfs_delayed_ref_node *node, 70 68 struct btrfs_delayed_extent_op *extent_op); 71 - static int do_chunk_alloc(struct btrfs_trans_handle *trans, 72 - struct btrfs_fs_info *fs_info, u64 flags, 69 + static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, 73 70 int force); 74 71 static int find_next_key(struct btrfs_path *path, int level, 75 72 struct btrfs_key *key); ··· 217 220 return 0; 218 221 } 219 222 220 - static void free_excluded_extents(struct btrfs_fs_info *fs_info, 221 - struct btrfs_block_group_cache *cache) 223 + static void free_excluded_extents(struct btrfs_block_group_cache *cache) 222 224 { 225 + struct btrfs_fs_info *fs_info = cache->fs_info; 223 226 u64 start, end; 224 227 225 228 start = cache->key.objectid; ··· 231 234 start, end, EXTENT_UPTODATE); 232 235 } 233 236 234 - static int exclude_super_stripes(struct btrfs_fs_info *fs_info, 235 - struct btrfs_block_group_cache *cache) 237 + static int exclude_super_stripes(struct btrfs_block_group_cache *cache) 236 238 { 239 + struct btrfs_fs_info *fs_info = cache->fs_info; 237 240 u64 bytenr; 238 241 u64 *logical; 239 242 int stripe_len; ··· 555 558 caching_ctl->progress = (u64)-1; 556 559 557 560 up_read(&fs_info->commit_root_sem); 558 - free_excluded_extents(fs_info, block_group); 561 + free_excluded_extents(block_group); 559 562 mutex_unlock(&caching_ctl->mutex); 560 563 561 564 wake_up(&caching_ctl->wait); ··· 663 666 wake_up(&caching_ctl->wait); 664 667 if (ret == 1) { 665 668 put_caching_control(caching_ctl); 666 - free_excluded_extents(fs_info, cache); 669 + free_excluded_extents(cache); 667 670 return 0; 668 671 } 669 672 } else { ··· 755 758 756 759 space_info = __find_space_info(fs_info, flags); 757 760 ASSERT(space_info); 758 - percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); 761 + percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes, 762 + BTRFS_TOTAL_BYTES_PINNED_BATCH); 759 763 } 760 764 761 765 /* ··· 868 870 num_refs = btrfs_extent_refs(leaf, ei); 869 871 extent_flags = btrfs_extent_flags(leaf, ei); 870 872 } else { 871 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 872 - struct btrfs_extent_item_v0 *ei0; 873 - BUG_ON(item_size != sizeof(*ei0)); 874 - ei0 = btrfs_item_ptr(leaf, path->slots[0], 875 - struct btrfs_extent_item_v0); 876 - num_refs = btrfs_extent_refs_v0(leaf, ei0); 877 - /* FIXME: this isn't correct for data */ 878 - extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 879 - #else 880 - BUG(); 881 - #endif 873 + ret = -EINVAL; 874 + btrfs_print_v0_err(fs_info); 875 + if (trans) 876 + btrfs_abort_transaction(trans, ret); 877 + else 878 + btrfs_handle_fs_error(fs_info, ret, NULL); 879 + 880 + goto out_free; 882 881 } 882 + 883 883 BUG_ON(num_refs == 0); 884 884 } else { 885 885 num_refs = 0; ··· 1035 1039 * tree block info structure. 1036 1040 */ 1037 1041 1038 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1039 - static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 1040 - struct btrfs_fs_info *fs_info, 1041 - struct btrfs_path *path, 1042 - u64 owner, u32 extra_size) 1043 - { 1044 - struct btrfs_root *root = fs_info->extent_root; 1045 - struct btrfs_extent_item *item; 1046 - struct btrfs_extent_item_v0 *ei0; 1047 - struct btrfs_extent_ref_v0 *ref0; 1048 - struct btrfs_tree_block_info *bi; 1049 - struct extent_buffer *leaf; 1050 - struct btrfs_key key; 1051 - struct btrfs_key found_key; 1052 - u32 new_size = sizeof(*item); 1053 - u64 refs; 1054 - int ret; 1055 - 1056 - leaf = path->nodes[0]; 1057 - BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 1058 - 1059 - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1060 - ei0 = btrfs_item_ptr(leaf, path->slots[0], 1061 - struct btrfs_extent_item_v0); 1062 - refs = btrfs_extent_refs_v0(leaf, ei0); 1063 - 1064 - if (owner == (u64)-1) { 1065 - while (1) { 1066 - if (path->slots[0] >= btrfs_header_nritems(leaf)) { 1067 - ret = btrfs_next_leaf(root, path); 1068 - if (ret < 0) 1069 - return ret; 1070 - BUG_ON(ret > 0); /* Corruption */ 1071 - leaf = path->nodes[0]; 1072 - } 1073 - btrfs_item_key_to_cpu(leaf, &found_key, 1074 - path->slots[0]); 1075 - BUG_ON(key.objectid != found_key.objectid); 1076 - if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 1077 - path->slots[0]++; 1078 - continue; 1079 - } 1080 - ref0 = btrfs_item_ptr(leaf, path->slots[0], 1081 - struct btrfs_extent_ref_v0); 1082 - owner = btrfs_ref_objectid_v0(leaf, ref0); 1083 - break; 1084 - } 1085 - } 1086 - btrfs_release_path(path); 1087 - 1088 - if (owner < BTRFS_FIRST_FREE_OBJECTID) 1089 - new_size += sizeof(*bi); 1090 - 1091 - new_size -= sizeof(*ei0); 1092 - ret = btrfs_search_slot(trans, root, &key, path, 1093 - new_size + extra_size, 1); 1094 - if (ret < 0) 1095 - return ret; 1096 - BUG_ON(ret); /* Corruption */ 1097 - 1098 - btrfs_extend_item(fs_info, path, new_size); 1099 - 1100 - leaf = path->nodes[0]; 1101 - item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1102 - btrfs_set_extent_refs(leaf, item, refs); 1103 - /* FIXME: get real generation */ 1104 - btrfs_set_extent_generation(leaf, item, 0); 1105 - if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1106 - btrfs_set_extent_flags(leaf, item, 1107 - BTRFS_EXTENT_FLAG_TREE_BLOCK | 1108 - BTRFS_BLOCK_FLAG_FULL_BACKREF); 1109 - bi = (struct btrfs_tree_block_info *)(item + 1); 1110 - /* FIXME: get first key of the block */ 1111 - memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi)); 1112 - btrfs_set_tree_block_level(leaf, bi, (int)owner); 1113 - } else { 1114 - btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 1115 - } 1116 - btrfs_mark_buffer_dirty(leaf); 1117 - return 0; 1118 - } 1119 - #endif 1120 - 1121 1042 /* 1122 1043 * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required, 1123 1044 * is_data == BTRFS_REF_TYPE_DATA, data type is requried, ··· 1129 1216 } 1130 1217 1131 1218 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 1132 - struct btrfs_fs_info *fs_info, 1133 1219 struct btrfs_path *path, 1134 1220 u64 bytenr, u64 parent, 1135 1221 u64 root_objectid, 1136 1222 u64 owner, u64 offset) 1137 1223 { 1138 - struct btrfs_root *root = fs_info->extent_root; 1224 + struct btrfs_root *root = trans->fs_info->extent_root; 1139 1225 struct btrfs_key key; 1140 1226 struct btrfs_extent_data_ref *ref; 1141 1227 struct extent_buffer *leaf; ··· 1163 1251 if (parent) { 1164 1252 if (!ret) 1165 1253 return 0; 1166 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1167 - key.type = BTRFS_EXTENT_REF_V0_KEY; 1168 - btrfs_release_path(path); 1169 - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1170 - if (ret < 0) { 1171 - err = ret; 1172 - goto fail; 1173 - } 1174 - if (!ret) 1175 - return 0; 1176 - #endif 1177 1254 goto fail; 1178 1255 } 1179 1256 ··· 1205 1304 } 1206 1305 1207 1306 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 1208 - struct btrfs_fs_info *fs_info, 1209 1307 struct btrfs_path *path, 1210 1308 u64 bytenr, u64 parent, 1211 1309 u64 root_objectid, u64 owner, 1212 1310 u64 offset, int refs_to_add) 1213 1311 { 1214 - struct btrfs_root *root = fs_info->extent_root; 1312 + struct btrfs_root *root = trans->fs_info->extent_root; 1215 1313 struct btrfs_key key; 1216 1314 struct extent_buffer *leaf; 1217 1315 u32 size; ··· 1284 1384 } 1285 1385 1286 1386 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1287 - struct btrfs_fs_info *fs_info, 1288 1387 struct btrfs_path *path, 1289 1388 int refs_to_drop, int *last_ref) 1290 1389 { ··· 1305 1406 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1306 1407 struct btrfs_shared_data_ref); 1307 1408 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1308 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1309 - } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1310 - struct btrfs_extent_ref_v0 *ref0; 1311 - ref0 = btrfs_item_ptr(leaf, path->slots[0], 1312 - struct btrfs_extent_ref_v0); 1313 - num_refs = btrfs_ref_count_v0(leaf, ref0); 1314 - #endif 1409 + } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { 1410 + btrfs_print_v0_err(trans->fs_info); 1411 + btrfs_abort_transaction(trans, -EINVAL); 1412 + return -EINVAL; 1315 1413 } else { 1316 1414 BUG(); 1317 1415 } ··· 1317 1421 num_refs -= refs_to_drop; 1318 1422 1319 1423 if (num_refs == 0) { 1320 - ret = btrfs_del_item(trans, fs_info->extent_root, path); 1424 + ret = btrfs_del_item(trans, trans->fs_info->extent_root, path); 1321 1425 *last_ref = 1; 1322 1426 } else { 1323 1427 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1324 1428 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1325 1429 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1326 1430 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1327 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1328 - else { 1329 - struct btrfs_extent_ref_v0 *ref0; 1330 - ref0 = btrfs_item_ptr(leaf, path->slots[0], 1331 - struct btrfs_extent_ref_v0); 1332 - btrfs_set_ref_count_v0(leaf, ref0, num_refs); 1333 - } 1334 - #endif 1335 1431 btrfs_mark_buffer_dirty(leaf); 1336 1432 } 1337 1433 return ret; ··· 1341 1453 1342 1454 leaf = path->nodes[0]; 1343 1455 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1456 + 1457 + BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 1344 1458 if (iref) { 1345 1459 /* 1346 1460 * If type is invalid, we should have bailed out earlier than ··· 1365 1475 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1366 1476 struct btrfs_shared_data_ref); 1367 1477 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1368 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1369 - } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1370 - struct btrfs_extent_ref_v0 *ref0; 1371 - ref0 = btrfs_item_ptr(leaf, path->slots[0], 1372 - struct btrfs_extent_ref_v0); 1373 - num_refs = btrfs_ref_count_v0(leaf, ref0); 1374 - #endif 1375 1478 } else { 1376 1479 WARN_ON(1); 1377 1480 } ··· 1372 1489 } 1373 1490 1374 1491 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1375 - struct btrfs_fs_info *fs_info, 1376 1492 struct btrfs_path *path, 1377 1493 u64 bytenr, u64 parent, 1378 1494 u64 root_objectid) 1379 1495 { 1380 - struct btrfs_root *root = fs_info->extent_root; 1496 + struct btrfs_root *root = trans->fs_info->extent_root; 1381 1497 struct btrfs_key key; 1382 1498 int ret; 1383 1499 ··· 1392 1510 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1393 1511 if (ret > 0) 1394 1512 ret = -ENOENT; 1395 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1396 - if (ret == -ENOENT && parent) { 1397 - btrfs_release_path(path); 1398 - key.type = BTRFS_EXTENT_REF_V0_KEY; 1399 - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1400 - if (ret > 0) 1401 - ret = -ENOENT; 1402 - } 1403 - #endif 1404 1513 return ret; 1405 1514 } 1406 1515 1407 1516 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1408 - struct btrfs_fs_info *fs_info, 1409 1517 struct btrfs_path *path, 1410 1518 u64 bytenr, u64 parent, 1411 1519 u64 root_objectid) ··· 1412 1540 key.offset = root_objectid; 1413 1541 } 1414 1542 1415 - ret = btrfs_insert_empty_item(trans, fs_info->extent_root, 1543 + ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root, 1416 1544 path, &key, 0); 1417 1545 btrfs_release_path(path); 1418 1546 return ret; ··· 1471 1599 */ 1472 1600 static noinline_for_stack 1473 1601 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1474 - struct btrfs_fs_info *fs_info, 1475 1602 struct btrfs_path *path, 1476 1603 struct btrfs_extent_inline_ref **ref_ret, 1477 1604 u64 bytenr, u64 num_bytes, 1478 1605 u64 parent, u64 root_objectid, 1479 1606 u64 owner, u64 offset, int insert) 1480 1607 { 1608 + struct btrfs_fs_info *fs_info = trans->fs_info; 1481 1609 struct btrfs_root *root = fs_info->extent_root; 1482 1610 struct btrfs_key key; 1483 1611 struct extent_buffer *leaf; ··· 1507 1635 extra_size = -1; 1508 1636 1509 1637 /* 1510 - * Owner is our parent level, so we can just add one to get the level 1511 - * for the block we are interested in. 1638 + * Owner is our level, so we can just add one to get the level for the 1639 + * block we are interested in. 1512 1640 */ 1513 1641 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) { 1514 1642 key.type = BTRFS_METADATA_ITEM_KEY; ··· 1556 1684 1557 1685 leaf = path->nodes[0]; 1558 1686 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1559 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1560 - if (item_size < sizeof(*ei)) { 1561 - if (!insert) { 1562 - err = -ENOENT; 1563 - goto out; 1564 - } 1565 - ret = convert_extent_item_v0(trans, fs_info, path, owner, 1566 - extra_size); 1567 - if (ret < 0) { 1568 - err = ret; 1569 - goto out; 1570 - } 1571 - leaf = path->nodes[0]; 1572 - item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1687 + if (unlikely(item_size < sizeof(*ei))) { 1688 + err = -EINVAL; 1689 + btrfs_print_v0_err(fs_info); 1690 + btrfs_abort_transaction(trans, err); 1691 + goto out; 1573 1692 } 1574 - #endif 1575 - BUG_ON(item_size < sizeof(*ei)); 1576 1693 1577 1694 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1578 1695 flags = btrfs_extent_flags(leaf, ei); ··· 1588 1727 iref = (struct btrfs_extent_inline_ref *)ptr; 1589 1728 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed); 1590 1729 if (type == BTRFS_REF_TYPE_INVALID) { 1591 - err = -EINVAL; 1730 + err = -EUCLEAN; 1592 1731 goto out; 1593 1732 } 1594 1733 ··· 1724 1863 } 1725 1864 1726 1865 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1727 - struct btrfs_fs_info *fs_info, 1728 1866 struct btrfs_path *path, 1729 1867 struct btrfs_extent_inline_ref **ref_ret, 1730 1868 u64 bytenr, u64 num_bytes, u64 parent, ··· 1731 1871 { 1732 1872 int ret; 1733 1873 1734 - ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret, 1735 - bytenr, num_bytes, parent, 1736 - root_objectid, owner, offset, 0); 1874 + ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr, 1875 + num_bytes, parent, root_objectid, 1876 + owner, offset, 0); 1737 1877 if (ret != -ENOENT) 1738 1878 return ret; 1739 1879 ··· 1741 1881 *ref_ret = NULL; 1742 1882 1743 1883 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1744 - ret = lookup_tree_block_ref(trans, fs_info, path, bytenr, 1745 - parent, root_objectid); 1884 + ret = lookup_tree_block_ref(trans, path, bytenr, parent, 1885 + root_objectid); 1746 1886 } else { 1747 - ret = lookup_extent_data_ref(trans, fs_info, path, bytenr, 1748 - parent, root_objectid, owner, 1749 - offset); 1887 + ret = lookup_extent_data_ref(trans, path, bytenr, parent, 1888 + root_objectid, owner, offset); 1750 1889 } 1751 1890 return ret; 1752 1891 } ··· 1754 1895 * helper to update/remove inline back ref 1755 1896 */ 1756 1897 static noinline_for_stack 1757 - void update_inline_extent_backref(struct btrfs_fs_info *fs_info, 1758 - struct btrfs_path *path, 1898 + void update_inline_extent_backref(struct btrfs_path *path, 1759 1899 struct btrfs_extent_inline_ref *iref, 1760 1900 int refs_to_mod, 1761 1901 struct btrfs_delayed_extent_op *extent_op, 1762 1902 int *last_ref) 1763 1903 { 1764 - struct extent_buffer *leaf; 1904 + struct extent_buffer *leaf = path->nodes[0]; 1905 + struct btrfs_fs_info *fs_info = leaf->fs_info; 1765 1906 struct btrfs_extent_item *ei; 1766 1907 struct btrfs_extent_data_ref *dref = NULL; 1767 1908 struct btrfs_shared_data_ref *sref = NULL; ··· 1772 1913 int type; 1773 1914 u64 refs; 1774 1915 1775 - leaf = path->nodes[0]; 1776 1916 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1777 1917 refs = btrfs_extent_refs(leaf, ei); 1778 1918 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); ··· 1823 1965 1824 1966 static noinline_for_stack 1825 1967 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1826 - struct btrfs_fs_info *fs_info, 1827 1968 struct btrfs_path *path, 1828 1969 u64 bytenr, u64 num_bytes, u64 parent, 1829 1970 u64 root_objectid, u64 owner, ··· 1832 1975 struct btrfs_extent_inline_ref *iref; 1833 1976 int ret; 1834 1977 1835 - ret = lookup_inline_extent_backref(trans, fs_info, path, &iref, 1836 - bytenr, num_bytes, parent, 1837 - root_objectid, owner, offset, 1); 1978 + ret = lookup_inline_extent_backref(trans, path, &iref, bytenr, 1979 + num_bytes, parent, root_objectid, 1980 + owner, offset, 1); 1838 1981 if (ret == 0) { 1839 1982 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1840 - update_inline_extent_backref(fs_info, path, iref, 1841 - refs_to_add, extent_op, NULL); 1983 + update_inline_extent_backref(path, iref, refs_to_add, 1984 + extent_op, NULL); 1842 1985 } else if (ret == -ENOENT) { 1843 - setup_inline_extent_backref(fs_info, path, iref, parent, 1986 + setup_inline_extent_backref(trans->fs_info, path, iref, parent, 1844 1987 root_objectid, owner, offset, 1845 1988 refs_to_add, extent_op); 1846 1989 ret = 0; ··· 1849 1992 } 1850 1993 1851 1994 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1852 - struct btrfs_fs_info *fs_info, 1853 1995 struct btrfs_path *path, 1854 1996 u64 bytenr, u64 parent, u64 root_objectid, 1855 1997 u64 owner, u64 offset, int refs_to_add) ··· 1856 2000 int ret; 1857 2001 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1858 2002 BUG_ON(refs_to_add != 1); 1859 - ret = insert_tree_block_ref(trans, fs_info, path, bytenr, 1860 - parent, root_objectid); 2003 + ret = insert_tree_block_ref(trans, path, bytenr, parent, 2004 + root_objectid); 1861 2005 } else { 1862 - ret = insert_extent_data_ref(trans, fs_info, path, bytenr, 1863 - parent, root_objectid, 1864 - owner, offset, refs_to_add); 2006 + ret = insert_extent_data_ref(trans, path, bytenr, parent, 2007 + root_objectid, owner, offset, 2008 + refs_to_add); 1865 2009 } 1866 2010 return ret; 1867 2011 } 1868 2012 1869 2013 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1870 - struct btrfs_fs_info *fs_info, 1871 2014 struct btrfs_path *path, 1872 2015 struct btrfs_extent_inline_ref *iref, 1873 2016 int refs_to_drop, int is_data, int *last_ref) ··· 1875 2020 1876 2021 BUG_ON(!is_data && refs_to_drop != 1); 1877 2022 if (iref) { 1878 - update_inline_extent_backref(fs_info, path, iref, 1879 - -refs_to_drop, NULL, last_ref); 2023 + update_inline_extent_backref(path, iref, -refs_to_drop, NULL, 2024 + last_ref); 1880 2025 } else if (is_data) { 1881 - ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop, 2026 + ret = remove_extent_data_ref(trans, path, refs_to_drop, 1882 2027 last_ref); 1883 2028 } else { 1884 2029 *last_ref = 1; 1885 - ret = btrfs_del_item(trans, fs_info->extent_root, path); 2030 + ret = btrfs_del_item(trans, trans->fs_info->extent_root, path); 1886 2031 } 1887 2032 return ret; 1888 2033 } ··· 2040 2185 owner, offset, BTRFS_ADD_DELAYED_REF); 2041 2186 2042 2187 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 2043 - ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 2188 + ret = btrfs_add_delayed_tree_ref(trans, bytenr, 2044 2189 num_bytes, parent, 2045 2190 root_objectid, (int)owner, 2046 2191 BTRFS_ADD_DELAYED_REF, NULL, 2047 2192 &old_ref_mod, &new_ref_mod); 2048 2193 } else { 2049 - ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 2194 + ret = btrfs_add_delayed_data_ref(trans, bytenr, 2050 2195 num_bytes, parent, 2051 2196 root_objectid, owner, offset, 2052 2197 0, BTRFS_ADD_DELAYED_REF, ··· 2062 2207 return ret; 2063 2208 } 2064 2209 2210 + /* 2211 + * __btrfs_inc_extent_ref - insert backreference for a given extent 2212 + * 2213 + * @trans: Handle of transaction 2214 + * 2215 + * @node: The delayed ref node used to get the bytenr/length for 2216 + * extent whose references are incremented. 2217 + * 2218 + * @parent: If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/ 2219 + * BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical 2220 + * bytenr of the parent block. Since new extents are always 2221 + * created with indirect references, this will only be the case 2222 + * when relocating a shared extent. In that case, root_objectid 2223 + * will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must 2224 + * be 0 2225 + * 2226 + * @root_objectid: The id of the root where this modification has originated, 2227 + * this can be either one of the well-known metadata trees or 2228 + * the subvolume id which references this extent. 2229 + * 2230 + * @owner: For data extents it is the inode number of the owning file. 2231 + * For metadata extents this parameter holds the level in the 2232 + * tree of the extent. 2233 + * 2234 + * @offset: For metadata extents the offset is ignored and is currently 2235 + * always passed as 0. For data extents it is the fileoffset 2236 + * this extent belongs to. 2237 + * 2238 + * @refs_to_add Number of references to add 2239 + * 2240 + * @extent_op Pointer to a structure, holding information necessary when 2241 + * updating a tree block's flags 2242 + * 2243 + */ 2065 2244 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 2066 - struct btrfs_fs_info *fs_info, 2067 2245 struct btrfs_delayed_ref_node *node, 2068 2246 u64 parent, u64 root_objectid, 2069 2247 u64 owner, u64 offset, int refs_to_add, ··· 2118 2230 path->reada = READA_FORWARD; 2119 2231 path->leave_spinning = 1; 2120 2232 /* this will setup the path even if it fails to insert the back ref */ 2121 - ret = insert_inline_extent_backref(trans, fs_info, path, bytenr, 2122 - num_bytes, parent, root_objectid, 2123 - owner, offset, 2124 - refs_to_add, extent_op); 2233 + ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes, 2234 + parent, root_objectid, owner, 2235 + offset, refs_to_add, extent_op); 2125 2236 if ((ret < 0 && ret != -EAGAIN) || !ret) 2126 2237 goto out; 2127 2238 ··· 2143 2256 path->reada = READA_FORWARD; 2144 2257 path->leave_spinning = 1; 2145 2258 /* now insert the actual backref */ 2146 - ret = insert_extent_backref(trans, fs_info, path, bytenr, parent, 2147 - root_objectid, owner, offset, refs_to_add); 2259 + ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid, 2260 + owner, offset, refs_to_add); 2148 2261 if (ret) 2149 2262 btrfs_abort_transaction(trans, ret); 2150 2263 out: ··· 2153 2266 } 2154 2267 2155 2268 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 2156 - struct btrfs_fs_info *fs_info, 2157 2269 struct btrfs_delayed_ref_node *node, 2158 2270 struct btrfs_delayed_extent_op *extent_op, 2159 2271 int insert_reserved) ··· 2169 2283 ins.type = BTRFS_EXTENT_ITEM_KEY; 2170 2284 2171 2285 ref = btrfs_delayed_node_to_data_ref(node); 2172 - trace_run_delayed_data_ref(fs_info, node, ref, node->action); 2286 + trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action); 2173 2287 2174 2288 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 2175 2289 parent = ref->parent; ··· 2178 2292 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2179 2293 if (extent_op) 2180 2294 flags |= extent_op->flags_to_set; 2181 - ret = alloc_reserved_file_extent(trans, fs_info, 2182 - parent, ref_root, flags, 2183 - ref->objectid, ref->offset, 2184 - &ins, node->ref_mod); 2295 + ret = alloc_reserved_file_extent(trans, parent, ref_root, 2296 + flags, ref->objectid, 2297 + ref->offset, &ins, 2298 + node->ref_mod); 2185 2299 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2186 - ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent, 2187 - ref_root, ref->objectid, 2188 - ref->offset, node->ref_mod, 2189 - extent_op); 2300 + ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, 2301 + ref->objectid, ref->offset, 2302 + node->ref_mod, extent_op); 2190 2303 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2191 - ret = __btrfs_free_extent(trans, fs_info, node, parent, 2304 + ret = __btrfs_free_extent(trans, node, parent, 2192 2305 ref_root, ref->objectid, 2193 2306 ref->offset, node->ref_mod, 2194 2307 extent_op); ··· 2216 2331 } 2217 2332 2218 2333 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 2219 - struct btrfs_fs_info *fs_info, 2220 2334 struct btrfs_delayed_ref_head *head, 2221 2335 struct btrfs_delayed_extent_op *extent_op) 2222 2336 { 2337 + struct btrfs_fs_info *fs_info = trans->fs_info; 2223 2338 struct btrfs_key key; 2224 2339 struct btrfs_path *path; 2225 2340 struct btrfs_extent_item *ei; ··· 2285 2400 2286 2401 leaf = path->nodes[0]; 2287 2402 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2288 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2289 - if (item_size < sizeof(*ei)) { 2290 - ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0); 2291 - if (ret < 0) { 2292 - err = ret; 2293 - goto out; 2294 - } 2295 - leaf = path->nodes[0]; 2296 - item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2403 + 2404 + if (unlikely(item_size < sizeof(*ei))) { 2405 + err = -EINVAL; 2406 + btrfs_print_v0_err(fs_info); 2407 + btrfs_abort_transaction(trans, err); 2408 + goto out; 2297 2409 } 2298 - #endif 2299 - BUG_ON(item_size < sizeof(*ei)); 2410 + 2300 2411 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2301 2412 __run_delayed_extent_op(extent_op, leaf, ei); 2302 2413 ··· 2303 2422 } 2304 2423 2305 2424 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 2306 - struct btrfs_fs_info *fs_info, 2307 2425 struct btrfs_delayed_ref_node *node, 2308 2426 struct btrfs_delayed_extent_op *extent_op, 2309 2427 int insert_reserved) ··· 2313 2433 u64 ref_root = 0; 2314 2434 2315 2435 ref = btrfs_delayed_node_to_tree_ref(node); 2316 - trace_run_delayed_tree_ref(fs_info, node, ref, node->action); 2436 + trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action); 2317 2437 2318 2438 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2319 2439 parent = ref->parent; 2320 2440 ref_root = ref->root; 2321 2441 2322 2442 if (node->ref_mod != 1) { 2323 - btrfs_err(fs_info, 2443 + btrfs_err(trans->fs_info, 2324 2444 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", 2325 2445 node->bytenr, node->ref_mod, node->action, ref_root, 2326 2446 parent); ··· 2330 2450 BUG_ON(!extent_op || !extent_op->update_flags); 2331 2451 ret = alloc_reserved_tree_block(trans, node, extent_op); 2332 2452 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2333 - ret = __btrfs_inc_extent_ref(trans, fs_info, node, 2334 - parent, ref_root, 2335 - ref->level, 0, 1, 2336 - extent_op); 2453 + ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, 2454 + ref->level, 0, 1, extent_op); 2337 2455 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2338 - ret = __btrfs_free_extent(trans, fs_info, node, 2339 - parent, ref_root, 2456 + ret = __btrfs_free_extent(trans, node, parent, ref_root, 2340 2457 ref->level, 0, 1, extent_op); 2341 2458 } else { 2342 2459 BUG(); ··· 2343 2466 2344 2467 /* helper function to actually process a single delayed ref entry */ 2345 2468 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 2346 - struct btrfs_fs_info *fs_info, 2347 2469 struct btrfs_delayed_ref_node *node, 2348 2470 struct btrfs_delayed_extent_op *extent_op, 2349 2471 int insert_reserved) ··· 2351 2475 2352 2476 if (trans->aborted) { 2353 2477 if (insert_reserved) 2354 - btrfs_pin_extent(fs_info, node->bytenr, 2478 + btrfs_pin_extent(trans->fs_info, node->bytenr, 2355 2479 node->num_bytes, 1); 2356 2480 return 0; 2357 2481 } 2358 2482 2359 2483 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2360 2484 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2361 - ret = run_delayed_tree_ref(trans, fs_info, node, extent_op, 2485 + ret = run_delayed_tree_ref(trans, node, extent_op, 2362 2486 insert_reserved); 2363 2487 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 2364 2488 node->type == BTRFS_SHARED_DATA_REF_KEY) 2365 - ret = run_delayed_data_ref(trans, fs_info, node, extent_op, 2489 + ret = run_delayed_data_ref(trans, node, extent_op, 2366 2490 insert_reserved); 2367 2491 else 2368 2492 BUG(); ··· 2404 2528 } 2405 2529 2406 2530 static int cleanup_extent_op(struct btrfs_trans_handle *trans, 2407 - struct btrfs_fs_info *fs_info, 2408 2531 struct btrfs_delayed_ref_head *head) 2409 2532 { 2410 2533 struct btrfs_delayed_extent_op *extent_op = head->extent_op; ··· 2417 2542 return 0; 2418 2543 } 2419 2544 spin_unlock(&head->lock); 2420 - ret = run_delayed_extent_op(trans, fs_info, head, extent_op); 2545 + ret = run_delayed_extent_op(trans, head, extent_op); 2421 2546 btrfs_free_delayed_extent_op(extent_op); 2422 2547 return ret ? ret : 1; 2423 2548 } 2424 2549 2425 2550 static int cleanup_ref_head(struct btrfs_trans_handle *trans, 2426 - struct btrfs_fs_info *fs_info, 2427 2551 struct btrfs_delayed_ref_head *head) 2428 2552 { 2553 + 2554 + struct btrfs_fs_info *fs_info = trans->fs_info; 2429 2555 struct btrfs_delayed_ref_root *delayed_refs; 2430 2556 int ret; 2431 2557 2432 2558 delayed_refs = &trans->transaction->delayed_refs; 2433 2559 2434 - ret = cleanup_extent_op(trans, fs_info, head); 2560 + ret = cleanup_extent_op(trans, head); 2435 2561 if (ret < 0) { 2436 2562 unselect_delayed_ref_head(delayed_refs, head); 2437 2563 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); ··· 2474 2598 flags = BTRFS_BLOCK_GROUP_METADATA; 2475 2599 space_info = __find_space_info(fs_info, flags); 2476 2600 ASSERT(space_info); 2477 - percpu_counter_add(&space_info->total_bytes_pinned, 2478 - -head->num_bytes); 2601 + percpu_counter_add_batch(&space_info->total_bytes_pinned, 2602 + -head->num_bytes, 2603 + BTRFS_TOTAL_BYTES_PINNED_BATCH); 2479 2604 2480 2605 if (head->is_data) { 2481 2606 spin_lock(&delayed_refs->lock); ··· 2582 2705 * up and move on to the next ref_head. 2583 2706 */ 2584 2707 if (!ref) { 2585 - ret = cleanup_ref_head(trans, fs_info, locked_ref); 2708 + ret = cleanup_ref_head(trans, locked_ref); 2586 2709 if (ret > 0 ) { 2587 2710 /* We dropped our lock, we need to loop. */ 2588 2711 ret = 0; ··· 2629 2752 locked_ref->extent_op = NULL; 2630 2753 spin_unlock(&locked_ref->lock); 2631 2754 2632 - ret = run_one_delayed_ref(trans, fs_info, ref, extent_op, 2755 + ret = run_one_delayed_ref(trans, ref, extent_op, 2633 2756 must_insert_reserved); 2634 2757 2635 2758 btrfs_free_delayed_extent_op(extent_op); ··· 3104 3227 3105 3228 ret = 1; 3106 3229 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3107 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3108 - if (item_size < sizeof(*ei)) { 3109 - WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3110 - goto out; 3111 - } 3112 - #endif 3113 3230 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 3114 3231 3115 3232 if (item_size != sizeof(*ei) + ··· 3931 4060 struct btrfs_space_info *found; 3932 4061 int factor; 3933 4062 3934 - if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 3935 - BTRFS_BLOCK_GROUP_RAID10)) 3936 - factor = 2; 3937 - else 3938 - factor = 1; 4063 + factor = btrfs_bg_type_to_factor(flags); 3939 4064 3940 4065 found = __find_space_info(info, flags); 3941 4066 ASSERT(found); ··· 4156 4289 if (IS_ERR(trans)) 4157 4290 return PTR_ERR(trans); 4158 4291 4159 - ret = do_chunk_alloc(trans, fs_info, alloc_target, 4292 + ret = do_chunk_alloc(trans, alloc_target, 4160 4293 CHUNK_ALLOC_NO_FORCE); 4161 4294 btrfs_end_transaction(trans); 4162 4295 if (ret < 0) { ··· 4176 4309 * allocation, and no removed chunk in current transaction, 4177 4310 * don't bother committing the transaction. 4178 4311 */ 4179 - have_pinned_space = percpu_counter_compare( 4312 + have_pinned_space = __percpu_counter_compare( 4180 4313 &data_sinfo->total_bytes_pinned, 4181 - used + bytes - data_sinfo->total_bytes); 4314 + used + bytes - data_sinfo->total_bytes, 4315 + BTRFS_TOTAL_BYTES_PINNED_BATCH); 4182 4316 spin_unlock(&data_sinfo->lock); 4183 4317 4184 4318 /* commit the current transaction and try again */ ··· 4226 4358 data_sinfo->flags, bytes, 1); 4227 4359 spin_unlock(&data_sinfo->lock); 4228 4360 4229 - return ret; 4361 + return 0; 4230 4362 } 4231 4363 4232 4364 int btrfs_check_data_free_space(struct inode *inode, ··· 4379 4511 * for allocating a chunk, otherwise if it's false, reserve space necessary for 4380 4512 * removing a chunk. 4381 4513 */ 4382 - void check_system_chunk(struct btrfs_trans_handle *trans, 4383 - struct btrfs_fs_info *fs_info, u64 type) 4514 + void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) 4384 4515 { 4516 + struct btrfs_fs_info *fs_info = trans->fs_info; 4385 4517 struct btrfs_space_info *info; 4386 4518 u64 left; 4387 4519 u64 thresh; ··· 4420 4552 * the paths we visit in the chunk tree (they were already COWed 4421 4553 * or created in the current transaction for example). 4422 4554 */ 4423 - ret = btrfs_alloc_chunk(trans, fs_info, flags); 4555 + ret = btrfs_alloc_chunk(trans, flags); 4424 4556 } 4425 4557 4426 4558 if (!ret) { ··· 4441 4573 * - return 1 if it successfully allocates a chunk, 4442 4574 * - return errors including -ENOSPC otherwise. 4443 4575 */ 4444 - static int do_chunk_alloc(struct btrfs_trans_handle *trans, 4445 - struct btrfs_fs_info *fs_info, u64 flags, int force) 4576 + static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, 4577 + int force) 4446 4578 { 4579 + struct btrfs_fs_info *fs_info = trans->fs_info; 4447 4580 struct btrfs_space_info *space_info; 4448 - int wait_for_alloc = 0; 4581 + bool wait_for_alloc = false; 4582 + bool should_alloc = false; 4449 4583 int ret = 0; 4450 4584 4451 4585 /* Don't re-enter if we're already allocating a chunk */ ··· 4457 4587 space_info = __find_space_info(fs_info, flags); 4458 4588 ASSERT(space_info); 4459 4589 4460 - again: 4461 - spin_lock(&space_info->lock); 4462 - if (force < space_info->force_alloc) 4463 - force = space_info->force_alloc; 4464 - if (space_info->full) { 4465 - if (should_alloc_chunk(fs_info, space_info, force)) 4466 - ret = -ENOSPC; 4467 - else 4468 - ret = 0; 4469 - spin_unlock(&space_info->lock); 4470 - return ret; 4471 - } 4590 + do { 4591 + spin_lock(&space_info->lock); 4592 + if (force < space_info->force_alloc) 4593 + force = space_info->force_alloc; 4594 + should_alloc = should_alloc_chunk(fs_info, space_info, force); 4595 + if (space_info->full) { 4596 + /* No more free physical space */ 4597 + if (should_alloc) 4598 + ret = -ENOSPC; 4599 + else 4600 + ret = 0; 4601 + spin_unlock(&space_info->lock); 4602 + return ret; 4603 + } else if (!should_alloc) { 4604 + spin_unlock(&space_info->lock); 4605 + return 0; 4606 + } else if (space_info->chunk_alloc) { 4607 + /* 4608 + * Someone is already allocating, so we need to block 4609 + * until this someone is finished and then loop to 4610 + * recheck if we should continue with our allocation 4611 + * attempt. 4612 + */ 4613 + wait_for_alloc = true; 4614 + spin_unlock(&space_info->lock); 4615 + mutex_lock(&fs_info->chunk_mutex); 4616 + mutex_unlock(&fs_info->chunk_mutex); 4617 + } else { 4618 + /* Proceed with allocation */ 4619 + space_info->chunk_alloc = 1; 4620 + wait_for_alloc = false; 4621 + spin_unlock(&space_info->lock); 4622 + } 4472 4623 4473 - if (!should_alloc_chunk(fs_info, space_info, force)) { 4474 - spin_unlock(&space_info->lock); 4475 - return 0; 4476 - } else if (space_info->chunk_alloc) { 4477 - wait_for_alloc = 1; 4478 - } else { 4479 - space_info->chunk_alloc = 1; 4480 - } 4481 - 4482 - spin_unlock(&space_info->lock); 4624 + cond_resched(); 4625 + } while (wait_for_alloc); 4483 4626 4484 4627 mutex_lock(&fs_info->chunk_mutex); 4485 - 4486 - /* 4487 - * The chunk_mutex is held throughout the entirety of a chunk 4488 - * allocation, so once we've acquired the chunk_mutex we know that the 4489 - * other guy is done and we need to recheck and see if we should 4490 - * allocate. 4491 - */ 4492 - if (wait_for_alloc) { 4493 - mutex_unlock(&fs_info->chunk_mutex); 4494 - wait_for_alloc = 0; 4495 - cond_resched(); 4496 - goto again; 4497 - } 4498 - 4499 4628 trans->allocating_chunk = true; 4500 4629 4501 4630 /* ··· 4520 4651 * Check if we have enough space in SYSTEM chunk because we may need 4521 4652 * to update devices. 4522 4653 */ 4523 - check_system_chunk(trans, fs_info, flags); 4654 + check_system_chunk(trans, flags); 4524 4655 4525 - ret = btrfs_alloc_chunk(trans, fs_info, flags); 4656 + ret = btrfs_alloc_chunk(trans, flags); 4526 4657 trans->allocating_chunk = false; 4527 4658 4528 4659 spin_lock(&space_info->lock); ··· 4572 4703 u64 space_size; 4573 4704 u64 avail; 4574 4705 u64 used; 4706 + int factor; 4575 4707 4576 4708 /* Don't overcommit when in mixed mode. */ 4577 4709 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) ··· 4607 4737 * doesn't include the parity drive, so we don't have to 4608 4738 * change the math 4609 4739 */ 4610 - if (profile & (BTRFS_BLOCK_GROUP_DUP | 4611 - BTRFS_BLOCK_GROUP_RAID1 | 4612 - BTRFS_BLOCK_GROUP_RAID10)) 4613 - avail >>= 1; 4740 + factor = btrfs_bg_type_to_factor(profile); 4741 + avail = div_u64(avail, factor); 4614 4742 4615 4743 /* 4616 4744 * If we aren't flushing all things, let us overcommit up to ··· 4780 4912 return 0; 4781 4913 4782 4914 /* See if there is enough pinned space to make this reservation */ 4783 - if (percpu_counter_compare(&space_info->total_bytes_pinned, 4784 - bytes) >= 0) 4915 + if (__percpu_counter_compare(&space_info->total_bytes_pinned, 4916 + bytes, 4917 + BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) 4785 4918 goto commit; 4786 4919 4787 4920 /* ··· 4799 4930 bytes -= delayed_rsv->size; 4800 4931 spin_unlock(&delayed_rsv->lock); 4801 4932 4802 - if (percpu_counter_compare(&space_info->total_bytes_pinned, 4803 - bytes) < 0) { 4933 + if (__percpu_counter_compare(&space_info->total_bytes_pinned, 4934 + bytes, 4935 + BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) { 4804 4936 return -ENOSPC; 4805 4937 } 4806 4938 ··· 4854 4984 ret = PTR_ERR(trans); 4855 4985 break; 4856 4986 } 4857 - ret = do_chunk_alloc(trans, fs_info, 4987 + ret = do_chunk_alloc(trans, 4858 4988 btrfs_metadata_alloc_profile(fs_info), 4859 4989 CHUNK_ALLOC_NO_FORCE); 4860 4990 btrfs_end_transaction(trans); ··· 5529 5659 kfree(rsv); 5530 5660 } 5531 5661 5532 - void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv) 5533 - { 5534 - kfree(rsv); 5535 - } 5536 - 5537 5662 int btrfs_block_rsv_add(struct btrfs_root *root, 5538 5663 struct btrfs_block_rsv *block_rsv, u64 num_bytes, 5539 5664 enum btrfs_reserve_flush_enum flush) ··· 5884 6019 5885 6020 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) 5886 6021 { 5887 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 6022 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 5888 6023 unsigned nr_extents; 5889 6024 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 5890 6025 int ret = 0; ··· 5957 6092 void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, 5958 6093 bool qgroup_free) 5959 6094 { 5960 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 6095 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 5961 6096 5962 6097 num_bytes = ALIGN(num_bytes, fs_info->sectorsize); 5963 6098 spin_lock(&inode->lock); ··· 5986 6121 void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, 5987 6122 bool qgroup_free) 5988 6123 { 5989 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 6124 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 5990 6125 unsigned num_extents; 5991 6126 5992 6127 spin_lock(&inode->lock); ··· 6084 6219 cache = btrfs_lookup_block_group(info, bytenr); 6085 6220 if (!cache) 6086 6221 return -ENOENT; 6087 - if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | 6088 - BTRFS_BLOCK_GROUP_RAID1 | 6089 - BTRFS_BLOCK_GROUP_RAID10)) 6090 - factor = 2; 6091 - else 6092 - factor = 1; 6222 + factor = btrfs_bg_type_to_factor(cache->flags); 6223 + 6093 6224 /* 6094 6225 * If this block group has free space cache written out, we 6095 6226 * need to make sure to load it if we are removing space. This ··· 6129 6268 trace_btrfs_space_reservation(info, "pinned", 6130 6269 cache->space_info->flags, 6131 6270 num_bytes, 1); 6132 - percpu_counter_add(&cache->space_info->total_bytes_pinned, 6133 - num_bytes); 6271 + percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, 6272 + num_bytes, 6273 + BTRFS_TOTAL_BYTES_PINNED_BATCH); 6134 6274 set_extent_dirty(info->pinned_extents, 6135 6275 bytenr, bytenr + num_bytes - 1, 6136 6276 GFP_NOFS | __GFP_NOFAIL); ··· 6141 6279 if (list_empty(&cache->dirty_list)) { 6142 6280 list_add_tail(&cache->dirty_list, 6143 6281 &trans->transaction->dirty_bgs); 6144 - trans->transaction->num_dirty_bgs++; 6282 + trans->transaction->num_dirty_bgs++; 6145 6283 btrfs_get_block_group(cache); 6146 6284 } 6147 6285 spin_unlock(&trans->transaction->dirty_bgs_lock); ··· 6152 6290 * dirty list to avoid races between cleaner kthread and space 6153 6291 * cache writeout. 6154 6292 */ 6155 - if (!alloc && old_val == 0) { 6156 - spin_lock(&info->unused_bgs_lock); 6157 - if (list_empty(&cache->bg_list)) { 6158 - btrfs_get_block_group(cache); 6159 - trace_btrfs_add_unused_block_group(cache); 6160 - list_add_tail(&cache->bg_list, 6161 - &info->unused_bgs); 6162 - } 6163 - spin_unlock(&info->unused_bgs_lock); 6164 - } 6293 + if (!alloc && old_val == 0) 6294 + btrfs_mark_bg_unused(cache); 6165 6295 6166 6296 btrfs_put_block_group(cache); 6167 6297 total -= num_bytes; ··· 6201 6347 6202 6348 trace_btrfs_space_reservation(fs_info, "pinned", 6203 6349 cache->space_info->flags, num_bytes, 1); 6204 - percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes); 6350 + percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, 6351 + num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); 6205 6352 set_extent_dirty(fs_info->pinned_extents, bytenr, 6206 6353 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 6207 6354 return 0; ··· 6566 6711 trace_btrfs_space_reservation(fs_info, "pinned", 6567 6712 space_info->flags, len, 0); 6568 6713 space_info->max_extent_size = 0; 6569 - percpu_counter_add(&space_info->total_bytes_pinned, -len); 6714 + percpu_counter_add_batch(&space_info->total_bytes_pinned, 6715 + -len, BTRFS_TOTAL_BYTES_PINNED_BATCH); 6570 6716 if (cache->ro) { 6571 6717 space_info->bytes_readonly += len; 6572 6718 readonly = true; ··· 6671 6815 } 6672 6816 6673 6817 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 6674 - struct btrfs_fs_info *info, 6675 - struct btrfs_delayed_ref_node *node, u64 parent, 6676 - u64 root_objectid, u64 owner_objectid, 6677 - u64 owner_offset, int refs_to_drop, 6678 - struct btrfs_delayed_extent_op *extent_op) 6818 + struct btrfs_delayed_ref_node *node, u64 parent, 6819 + u64 root_objectid, u64 owner_objectid, 6820 + u64 owner_offset, int refs_to_drop, 6821 + struct btrfs_delayed_extent_op *extent_op) 6679 6822 { 6823 + struct btrfs_fs_info *info = trans->fs_info; 6680 6824 struct btrfs_key key; 6681 6825 struct btrfs_path *path; 6682 6826 struct btrfs_root *extent_root = info->extent_root; ··· 6708 6852 if (is_data) 6709 6853 skinny_metadata = false; 6710 6854 6711 - ret = lookup_extent_backref(trans, info, path, &iref, 6712 - bytenr, num_bytes, parent, 6713 - root_objectid, owner_objectid, 6855 + ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes, 6856 + parent, root_objectid, owner_objectid, 6714 6857 owner_offset); 6715 6858 if (ret == 0) { 6716 6859 extent_slot = path->slots[0]; ··· 6732 6877 break; 6733 6878 extent_slot--; 6734 6879 } 6735 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 6736 - item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 6737 - if (found_extent && item_size < sizeof(*ei)) 6738 - found_extent = 0; 6739 - #endif 6880 + 6740 6881 if (!found_extent) { 6741 6882 BUG_ON(iref); 6742 - ret = remove_extent_backref(trans, info, path, NULL, 6883 + ret = remove_extent_backref(trans, path, NULL, 6743 6884 refs_to_drop, 6744 6885 is_data, &last_ref); 6745 6886 if (ret) { ··· 6808 6957 6809 6958 leaf = path->nodes[0]; 6810 6959 item_size = btrfs_item_size_nr(leaf, extent_slot); 6811 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 6812 - if (item_size < sizeof(*ei)) { 6813 - BUG_ON(found_extent || extent_slot != path->slots[0]); 6814 - ret = convert_extent_item_v0(trans, info, path, owner_objectid, 6815 - 0); 6816 - if (ret < 0) { 6817 - btrfs_abort_transaction(trans, ret); 6818 - goto out; 6819 - } 6820 - 6821 - btrfs_release_path(path); 6822 - path->leave_spinning = 1; 6823 - 6824 - key.objectid = bytenr; 6825 - key.type = BTRFS_EXTENT_ITEM_KEY; 6826 - key.offset = num_bytes; 6827 - 6828 - ret = btrfs_search_slot(trans, extent_root, &key, path, 6829 - -1, 1); 6830 - if (ret) { 6831 - btrfs_err(info, 6832 - "umm, got %d back from search, was looking for %llu", 6833 - ret, bytenr); 6834 - btrfs_print_leaf(path->nodes[0]); 6835 - } 6836 - if (ret < 0) { 6837 - btrfs_abort_transaction(trans, ret); 6838 - goto out; 6839 - } 6840 - 6841 - extent_slot = path->slots[0]; 6842 - leaf = path->nodes[0]; 6843 - item_size = btrfs_item_size_nr(leaf, extent_slot); 6960 + if (unlikely(item_size < sizeof(*ei))) { 6961 + ret = -EINVAL; 6962 + btrfs_print_v0_err(info); 6963 + btrfs_abort_transaction(trans, ret); 6964 + goto out; 6844 6965 } 6845 - #endif 6846 - BUG_ON(item_size < sizeof(*ei)); 6847 6966 ei = btrfs_item_ptr(leaf, extent_slot, 6848 6967 struct btrfs_extent_item); 6849 6968 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && ··· 6849 7028 btrfs_mark_buffer_dirty(leaf); 6850 7029 } 6851 7030 if (found_extent) { 6852 - ret = remove_extent_backref(trans, info, path, 6853 - iref, refs_to_drop, 6854 - is_data, &last_ref); 7031 + ret = remove_extent_backref(trans, path, iref, 7032 + refs_to_drop, is_data, 7033 + &last_ref); 6855 7034 if (ret) { 6856 7035 btrfs_abort_transaction(trans, ret); 6857 7036 goto out; ··· 6993 7172 root->root_key.objectid, 6994 7173 btrfs_header_level(buf), 0, 6995 7174 BTRFS_DROP_DELAYED_REF); 6996 - ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start, 7175 + ret = btrfs_add_delayed_tree_ref(trans, buf->start, 6997 7176 buf->len, parent, 6998 7177 root->root_key.objectid, 6999 7178 btrfs_header_level(buf), ··· 7072 7251 old_ref_mod = new_ref_mod = 0; 7073 7252 ret = 0; 7074 7253 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 7075 - ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 7254 + ret = btrfs_add_delayed_tree_ref(trans, bytenr, 7076 7255 num_bytes, parent, 7077 7256 root_objectid, (int)owner, 7078 7257 BTRFS_DROP_DELAYED_REF, NULL, 7079 7258 &old_ref_mod, &new_ref_mod); 7080 7259 } else { 7081 - ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 7260 + ret = btrfs_add_delayed_data_ref(trans, bytenr, 7082 7261 num_bytes, parent, 7083 7262 root_objectid, owner, offset, 7084 7263 0, BTRFS_DROP_DELAYED_REF, ··· 7355 7534 * for the proper type. 7356 7535 */ 7357 7536 if (!block_group_bits(block_group, flags)) { 7358 - u64 extra = BTRFS_BLOCK_GROUP_DUP | 7537 + u64 extra = BTRFS_BLOCK_GROUP_DUP | 7359 7538 BTRFS_BLOCK_GROUP_RAID1 | 7360 7539 BTRFS_BLOCK_GROUP_RAID5 | 7361 7540 BTRFS_BLOCK_GROUP_RAID6 | ··· 7559 7738 goto loop; 7560 7739 } 7561 7740 checks: 7562 - search_start = ALIGN(offset, fs_info->stripesize); 7741 + search_start = round_up(offset, fs_info->stripesize); 7563 7742 7564 7743 /* move on to the next group */ 7565 7744 if (search_start + num_bytes > ··· 7571 7750 if (offset < search_start) 7572 7751 btrfs_add_free_space(block_group, offset, 7573 7752 search_start - offset); 7574 - BUG_ON(offset > search_start); 7575 7753 7576 7754 ret = btrfs_add_reserved_bytes(block_group, ram_bytes, 7577 7755 num_bytes, delalloc); ··· 7646 7826 goto out; 7647 7827 } 7648 7828 7649 - ret = do_chunk_alloc(trans, fs_info, flags, 7650 - CHUNK_ALLOC_FORCE); 7829 + ret = do_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE); 7651 7830 7652 7831 /* 7653 7832 * If we can't allocate a new chunk we've already looped ··· 7872 8053 } 7873 8054 7874 8055 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 7875 - struct btrfs_fs_info *fs_info, 7876 8056 u64 parent, u64 root_objectid, 7877 8057 u64 flags, u64 owner, u64 offset, 7878 8058 struct btrfs_key *ins, int ref_mod) 7879 8059 { 8060 + struct btrfs_fs_info *fs_info = trans->fs_info; 7880 8061 int ret; 7881 8062 struct btrfs_extent_item *extent_item; 7882 8063 struct btrfs_extent_inline_ref *iref; ··· 8050 8231 u64 offset, u64 ram_bytes, 8051 8232 struct btrfs_key *ins) 8052 8233 { 8053 - struct btrfs_fs_info *fs_info = root->fs_info; 8054 8234 int ret; 8055 8235 8056 8236 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); ··· 8058 8240 root->root_key.objectid, owner, offset, 8059 8241 BTRFS_ADD_DELAYED_EXTENT); 8060 8242 8061 - ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, 8243 + ret = btrfs_add_delayed_data_ref(trans, ins->objectid, 8062 8244 ins->offset, 0, 8063 8245 root->root_key.objectid, owner, 8064 8246 offset, ram_bytes, ··· 8072 8254 * space cache bits as well 8073 8255 */ 8074 8256 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 8075 - struct btrfs_fs_info *fs_info, 8076 8257 u64 root_objectid, u64 owner, u64 offset, 8077 8258 struct btrfs_key *ins) 8078 8259 { 8260 + struct btrfs_fs_info *fs_info = trans->fs_info; 8079 8261 int ret; 8080 8262 struct btrfs_block_group_cache *block_group; 8081 8263 struct btrfs_space_info *space_info; ··· 8103 8285 spin_unlock(&block_group->lock); 8104 8286 spin_unlock(&space_info->lock); 8105 8287 8106 - ret = alloc_reserved_file_extent(trans, fs_info, 0, root_objectid, 8107 - 0, owner, offset, ins, 1); 8288 + ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner, 8289 + offset, ins, 1); 8108 8290 btrfs_put_block_group(block_group); 8109 8291 return ret; 8110 8292 } 8111 8293 8112 8294 static struct extent_buffer * 8113 8295 btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, 8114 - u64 bytenr, int level) 8296 + u64 bytenr, int level, u64 owner) 8115 8297 { 8116 8298 struct btrfs_fs_info *fs_info = root->fs_info; 8117 8299 struct extent_buffer *buf; ··· 8120 8302 if (IS_ERR(buf)) 8121 8303 return buf; 8122 8304 8123 - btrfs_set_header_generation(buf, trans->transid); 8124 8305 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 8125 8306 btrfs_tree_lock(buf); 8126 8307 clean_tree_block(fs_info, buf); ··· 8128 8311 btrfs_set_lock_blocking(buf); 8129 8312 set_extent_buffer_uptodate(buf); 8130 8313 8314 + memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header)); 8315 + btrfs_set_header_level(buf, level); 8316 + btrfs_set_header_bytenr(buf, buf->start); 8317 + btrfs_set_header_generation(buf, trans->transid); 8318 + btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV); 8319 + btrfs_set_header_owner(buf, owner); 8320 + write_extent_buffer_fsid(buf, fs_info->fsid); 8321 + write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid); 8131 8322 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 8132 8323 buf->log_index = root->log_transid % 2; 8133 8324 /* ··· 8244 8419 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 8245 8420 if (btrfs_is_testing(fs_info)) { 8246 8421 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, 8247 - level); 8422 + level, root_objectid); 8248 8423 if (!IS_ERR(buf)) 8249 8424 root->alloc_bytenr += blocksize; 8250 8425 return buf; ··· 8260 8435 if (ret) 8261 8436 goto out_unuse; 8262 8437 8263 - buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); 8438 + buf = btrfs_init_new_buffer(trans, root, ins.objectid, level, 8439 + root_objectid); 8264 8440 if (IS_ERR(buf)) { 8265 8441 ret = PTR_ERR(buf); 8266 8442 goto out_free_reserved; ··· 8293 8467 btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, 8294 8468 root_objectid, level, 0, 8295 8469 BTRFS_ADD_DELAYED_EXTENT); 8296 - ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid, 8470 + ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, 8297 8471 ins.offset, parent, 8298 8472 root_objectid, level, 8299 8473 BTRFS_ADD_DELAYED_EXTENT, ··· 8325 8499 int keep_locks; 8326 8500 int reada_slot; 8327 8501 int reada_count; 8328 - int for_reloc; 8329 8502 }; 8330 8503 8331 8504 #define DROP_REFERENCE 1 ··· 8644 8819 } 8645 8820 8646 8821 if (need_account) { 8647 - ret = btrfs_qgroup_trace_subtree(trans, root, next, 8822 + ret = btrfs_qgroup_trace_subtree(trans, next, 8648 8823 generation, level - 1); 8649 8824 if (ret) { 8650 8825 btrfs_err_rl(fs_info, ··· 8744 8919 else 8745 8920 ret = btrfs_dec_ref(trans, root, eb, 0); 8746 8921 BUG_ON(ret); /* -ENOMEM */ 8747 - ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, eb); 8922 + ret = btrfs_qgroup_trace_leaf_items(trans, eb); 8748 8923 if (ret) { 8749 8924 btrfs_err_rl(fs_info, 8750 8925 "error %d accounting leaf items. Quota is out of sync, rescan required.", ··· 8961 9136 wc->stage = DROP_REFERENCE; 8962 9137 wc->update_ref = update_ref; 8963 9138 wc->keep_locks = 0; 8964 - wc->for_reloc = for_reloc; 8965 9139 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info); 8966 9140 8967 9141 while (1) { ··· 9023 9199 if (err) 9024 9200 goto out_end_trans; 9025 9201 9026 - ret = btrfs_del_root(trans, fs_info, &root->root_key); 9202 + ret = btrfs_del_root(trans, &root->root_key); 9027 9203 if (ret) { 9028 9204 btrfs_abort_transaction(trans, ret); 9029 9205 err = ret; ··· 9126 9302 wc->stage = DROP_REFERENCE; 9127 9303 wc->update_ref = 0; 9128 9304 wc->keep_locks = 1; 9129 - wc->for_reloc = 1; 9130 9305 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info); 9131 9306 9132 9307 while (1) { ··· 9240 9417 return ret; 9241 9418 } 9242 9419 9243 - int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info, 9244 - struct btrfs_block_group_cache *cache) 9420 + int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache) 9245 9421 9246 9422 { 9423 + struct btrfs_fs_info *fs_info = cache->fs_info; 9247 9424 struct btrfs_trans_handle *trans; 9248 9425 u64 alloc_flags; 9249 9426 int ret; ··· 9277 9454 */ 9278 9455 alloc_flags = update_block_group_flags(fs_info, cache->flags); 9279 9456 if (alloc_flags != cache->flags) { 9280 - ret = do_chunk_alloc(trans, fs_info, alloc_flags, 9457 + ret = do_chunk_alloc(trans, alloc_flags, 9281 9458 CHUNK_ALLOC_FORCE); 9282 9459 /* 9283 9460 * ENOSPC is allowed here, we may have enough space ··· 9294 9471 if (!ret) 9295 9472 goto out; 9296 9473 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags); 9297 - ret = do_chunk_alloc(trans, fs_info, alloc_flags, 9298 - CHUNK_ALLOC_FORCE); 9474 + ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); 9299 9475 if (ret < 0) 9300 9476 goto out; 9301 9477 ret = inc_block_group_ro(cache, 0); ··· 9302 9480 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { 9303 9481 alloc_flags = update_block_group_flags(fs_info, cache->flags); 9304 9482 mutex_lock(&fs_info->chunk_mutex); 9305 - check_system_chunk(trans, fs_info, alloc_flags); 9483 + check_system_chunk(trans, alloc_flags); 9306 9484 mutex_unlock(&fs_info->chunk_mutex); 9307 9485 } 9308 9486 mutex_unlock(&fs_info->ro_block_group_mutex); ··· 9311 9489 return ret; 9312 9490 } 9313 9491 9314 - int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 9315 - struct btrfs_fs_info *fs_info, u64 type) 9492 + int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) 9316 9493 { 9317 - u64 alloc_flags = get_alloc_profile(fs_info, type); 9494 + u64 alloc_flags = get_alloc_profile(trans->fs_info, type); 9318 9495 9319 - return do_chunk_alloc(trans, fs_info, alloc_flags, CHUNK_ALLOC_FORCE); 9496 + return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); 9320 9497 } 9321 9498 9322 9499 /* ··· 9341 9520 continue; 9342 9521 } 9343 9522 9344 - if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | 9345 - BTRFS_BLOCK_GROUP_RAID10 | 9346 - BTRFS_BLOCK_GROUP_DUP)) 9347 - factor = 2; 9348 - else 9349 - factor = 1; 9350 - 9523 + factor = btrfs_bg_type_to_factor(block_group->flags); 9351 9524 free_bytes += (block_group->key.offset - 9352 9525 btrfs_block_group_used(&block_group->item)) * 9353 9526 factor; ··· 9532 9717 int ret = 0; 9533 9718 struct btrfs_key found_key; 9534 9719 struct extent_buffer *leaf; 9720 + struct btrfs_block_group_item bg; 9721 + u64 flags; 9535 9722 int slot; 9536 9723 9537 9724 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); ··· 9568 9751 "logical %llu len %llu found bg but no related chunk", 9569 9752 found_key.objectid, found_key.offset); 9570 9753 ret = -ENOENT; 9754 + } else if (em->start != found_key.objectid || 9755 + em->len != found_key.offset) { 9756 + btrfs_err(fs_info, 9757 + "block group %llu len %llu mismatch with chunk %llu len %llu", 9758 + found_key.objectid, found_key.offset, 9759 + em->start, em->len); 9760 + ret = -EUCLEAN; 9571 9761 } else { 9572 - ret = 0; 9762 + read_extent_buffer(leaf, &bg, 9763 + btrfs_item_ptr_offset(leaf, slot), 9764 + sizeof(bg)); 9765 + flags = btrfs_block_group_flags(&bg) & 9766 + BTRFS_BLOCK_GROUP_TYPE_MASK; 9767 + 9768 + if (flags != (em->map_lookup->type & 9769 + BTRFS_BLOCK_GROUP_TYPE_MASK)) { 9770 + btrfs_err(fs_info, 9771 + "block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", 9772 + found_key.objectid, 9773 + found_key.offset, flags, 9774 + (BTRFS_BLOCK_GROUP_TYPE_MASK & 9775 + em->map_lookup->type)); 9776 + ret = -EUCLEAN; 9777 + } else { 9778 + ret = 0; 9779 + } 9573 9780 } 9574 9781 free_extent_map(em); 9575 9782 goto out; ··· 9688 9847 */ 9689 9848 if (block_group->cached == BTRFS_CACHE_NO || 9690 9849 block_group->cached == BTRFS_CACHE_ERROR) 9691 - free_excluded_extents(info, block_group); 9850 + free_excluded_extents(block_group); 9692 9851 9693 9852 btrfs_remove_free_space_cache(block_group); 9694 9853 ASSERT(block_group->cached != BTRFS_CACHE_STARTED); ··· 9844 10003 return cache; 9845 10004 } 9846 10005 10006 + 10007 + /* 10008 + * Iterate all chunks and verify that each of them has the corresponding block 10009 + * group 10010 + */ 10011 + static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) 10012 + { 10013 + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 10014 + struct extent_map *em; 10015 + struct btrfs_block_group_cache *bg; 10016 + u64 start = 0; 10017 + int ret = 0; 10018 + 10019 + while (1) { 10020 + read_lock(&map_tree->map_tree.lock); 10021 + /* 10022 + * lookup_extent_mapping will return the first extent map 10023 + * intersecting the range, so setting @len to 1 is enough to 10024 + * get the first chunk. 10025 + */ 10026 + em = lookup_extent_mapping(&map_tree->map_tree, start, 1); 10027 + read_unlock(&map_tree->map_tree.lock); 10028 + if (!em) 10029 + break; 10030 + 10031 + bg = btrfs_lookup_block_group(fs_info, em->start); 10032 + if (!bg) { 10033 + btrfs_err(fs_info, 10034 + "chunk start=%llu len=%llu doesn't have corresponding block group", 10035 + em->start, em->len); 10036 + ret = -EUCLEAN; 10037 + free_extent_map(em); 10038 + break; 10039 + } 10040 + if (bg->key.objectid != em->start || 10041 + bg->key.offset != em->len || 10042 + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != 10043 + (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { 10044 + btrfs_err(fs_info, 10045 + "chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", 10046 + em->start, em->len, 10047 + em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, 10048 + bg->key.objectid, bg->key.offset, 10049 + bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); 10050 + ret = -EUCLEAN; 10051 + free_extent_map(em); 10052 + btrfs_put_block_group(bg); 10053 + break; 10054 + } 10055 + start = em->start + em->len; 10056 + free_extent_map(em); 10057 + btrfs_put_block_group(bg); 10058 + } 10059 + return ret; 10060 + } 10061 + 9847 10062 int btrfs_read_block_groups(struct btrfs_fs_info *info) 9848 10063 { 9849 10064 struct btrfs_path *path; ··· 9986 10089 * info has super bytes accounted for, otherwise we'll think 9987 10090 * we have more space than we actually do. 9988 10091 */ 9989 - ret = exclude_super_stripes(info, cache); 10092 + ret = exclude_super_stripes(cache); 9990 10093 if (ret) { 9991 10094 /* 9992 10095 * We may have excluded something, so call this just in 9993 10096 * case. 9994 10097 */ 9995 - free_excluded_extents(info, cache); 10098 + free_excluded_extents(cache); 9996 10099 btrfs_put_block_group(cache); 9997 10100 goto error; 9998 10101 } ··· 10007 10110 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 10008 10111 cache->last_byte_to_unpin = (u64)-1; 10009 10112 cache->cached = BTRFS_CACHE_FINISHED; 10010 - free_excluded_extents(info, cache); 10113 + free_excluded_extents(cache); 10011 10114 } else if (btrfs_block_group_used(&cache->item) == 0) { 10012 10115 cache->last_byte_to_unpin = (u64)-1; 10013 10116 cache->cached = BTRFS_CACHE_FINISHED; 10014 10117 add_new_free_space(cache, found_key.objectid, 10015 10118 found_key.objectid + 10016 10119 found_key.offset); 10017 - free_excluded_extents(info, cache); 10120 + free_excluded_extents(cache); 10018 10121 } 10019 10122 10020 10123 ret = btrfs_add_block_group_cache(info, cache); ··· 10037 10140 if (btrfs_chunk_readonly(info, cache->key.objectid)) { 10038 10141 inc_block_group_ro(cache, 1); 10039 10142 } else if (btrfs_block_group_used(&cache->item) == 0) { 10040 - spin_lock(&info->unused_bgs_lock); 10041 - /* Should always be true but just in case. */ 10042 - if (list_empty(&cache->bg_list)) { 10043 - btrfs_get_block_group(cache); 10044 - trace_btrfs_add_unused_block_group(cache); 10045 - list_add_tail(&cache->bg_list, 10046 - &info->unused_bgs); 10047 - } 10048 - spin_unlock(&info->unused_bgs_lock); 10143 + ASSERT(list_empty(&cache->bg_list)); 10144 + btrfs_mark_bg_unused(cache); 10049 10145 } 10050 10146 } 10051 10147 ··· 10066 10176 10067 10177 btrfs_add_raid_kobjects(info); 10068 10178 init_global_block_rsv(info); 10069 - ret = 0; 10179 + ret = check_chunk_block_group_mappings(info); 10070 10180 error: 10071 10181 btrfs_free_path(path); 10072 10182 return ret; ··· 10096 10206 sizeof(item)); 10097 10207 if (ret) 10098 10208 btrfs_abort_transaction(trans, ret); 10099 - ret = btrfs_finish_chunk_alloc(trans, fs_info, key.objectid, 10100 - key.offset); 10209 + ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset); 10101 10210 if (ret) 10102 10211 btrfs_abort_transaction(trans, ret); 10103 10212 add_block_group_free_space(trans, block_group); ··· 10107 10218 trans->can_flush_pending_bgs = can_flush_pending_bgs; 10108 10219 } 10109 10220 10110 - int btrfs_make_block_group(struct btrfs_trans_handle *trans, 10111 - struct btrfs_fs_info *fs_info, u64 bytes_used, 10221 + int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, 10112 10222 u64 type, u64 chunk_offset, u64 size) 10113 10223 { 10224 + struct btrfs_fs_info *fs_info = trans->fs_info; 10114 10225 struct btrfs_block_group_cache *cache; 10115 10226 int ret; 10116 10227 ··· 10129 10240 cache->last_byte_to_unpin = (u64)-1; 10130 10241 cache->cached = BTRFS_CACHE_FINISHED; 10131 10242 cache->needs_free_space = 1; 10132 - ret = exclude_super_stripes(fs_info, cache); 10243 + ret = exclude_super_stripes(cache); 10133 10244 if (ret) { 10134 10245 /* 10135 10246 * We may have excluded something, so call this just in 10136 10247 * case. 10137 10248 */ 10138 - free_excluded_extents(fs_info, cache); 10249 + free_excluded_extents(cache); 10139 10250 btrfs_put_block_group(cache); 10140 10251 return ret; 10141 10252 } 10142 10253 10143 10254 add_new_free_space(cache, chunk_offset, chunk_offset + size); 10144 10255 10145 - free_excluded_extents(fs_info, cache); 10256 + free_excluded_extents(cache); 10146 10257 10147 10258 #ifdef CONFIG_BTRFS_DEBUG 10148 10259 if (btrfs_should_fragment_free_space(cache)) { ··· 10200 10311 } 10201 10312 10202 10313 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 10203 - struct btrfs_fs_info *fs_info, u64 group_start, 10204 - struct extent_map *em) 10314 + u64 group_start, struct extent_map *em) 10205 10315 { 10316 + struct btrfs_fs_info *fs_info = trans->fs_info; 10206 10317 struct btrfs_root *root = fs_info->extent_root; 10207 10318 struct btrfs_path *path; 10208 10319 struct btrfs_block_group_cache *block_group; ··· 10226 10337 * Free the reserved super bytes from this block group before 10227 10338 * remove it. 10228 10339 */ 10229 - free_excluded_extents(fs_info, block_group); 10340 + free_excluded_extents(block_group); 10230 10341 btrfs_free_ref_tree_range(fs_info, block_group->key.objectid, 10231 10342 block_group->key.offset); 10232 10343 10233 10344 memcpy(&key, &block_group->key, sizeof(key)); 10234 10345 index = btrfs_bg_flags_to_raid_index(block_group->flags); 10235 - if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 10236 - BTRFS_BLOCK_GROUP_RAID1 | 10237 - BTRFS_BLOCK_GROUP_RAID10)) 10238 - factor = 2; 10239 - else 10240 - factor = 1; 10346 + factor = btrfs_bg_type_to_factor(block_group->flags); 10241 10347 10242 10348 /* make sure this block group isn't part of an allocation cluster */ 10243 10349 cluster = &fs_info->data_alloc_cluster; ··· 10571 10687 /* Don't want to race with allocators so take the groups_sem */ 10572 10688 down_write(&space_info->groups_sem); 10573 10689 spin_lock(&block_group->lock); 10574 - if (block_group->reserved || 10690 + if (block_group->reserved || block_group->pinned || 10575 10691 btrfs_block_group_used(&block_group->item) || 10576 10692 block_group->ro || 10577 10693 list_is_singular(&block_group->list)) { ··· 10648 10764 10649 10765 space_info->bytes_pinned -= block_group->pinned; 10650 10766 space_info->bytes_readonly += block_group->pinned; 10651 - percpu_counter_add(&space_info->total_bytes_pinned, 10652 - -block_group->pinned); 10767 + percpu_counter_add_batch(&space_info->total_bytes_pinned, 10768 + -block_group->pinned, 10769 + BTRFS_TOTAL_BYTES_PINNED_BATCH); 10653 10770 block_group->pinned = 0; 10654 10771 10655 10772 spin_unlock(&block_group->lock); ··· 10667 10782 * Btrfs_remove_chunk will abort the transaction if things go 10668 10783 * horribly wrong. 10669 10784 */ 10670 - ret = btrfs_remove_chunk(trans, fs_info, 10671 - block_group->key.objectid); 10785 + ret = btrfs_remove_chunk(trans, block_group->key.objectid); 10672 10786 10673 10787 if (ret) { 10674 10788 if (trimming) ··· 10949 11065 wait_var_event(&root->will_be_snapshotted, 10950 11066 !atomic_read(&root->will_be_snapshotted)); 10951 11067 } 11068 + } 11069 + 11070 + void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg) 11071 + { 11072 + struct btrfs_fs_info *fs_info = bg->fs_info; 11073 + 11074 + spin_lock(&fs_info->unused_bgs_lock); 11075 + if (list_empty(&bg->bg_list)) { 11076 + btrfs_get_block_group(bg); 11077 + trace_btrfs_add_unused_block_group(bg); 11078 + list_add_tail(&bg->bg_list, &fs_info->unused_bgs); 11079 + } 11080 + spin_unlock(&fs_info->unused_bgs_lock); 10952 11081 }

+67 -89

fs/btrfs/extent_io.c

··· 140 140 141 141 static void flush_write_bio(struct extent_page_data *epd); 142 142 143 - static inline struct btrfs_fs_info * 144 - tree_fs_info(struct extent_io_tree *tree) 145 - { 146 - if (tree->ops) 147 - return tree->ops->tree_fs_info(tree->private_data); 148 - return NULL; 149 - } 150 - 151 143 int __init extent_io_init(void) 152 144 { 153 145 extent_state_cache = kmem_cache_create("btrfs_extent_state", ··· 556 564 557 565 static void extent_io_tree_panic(struct extent_io_tree *tree, int err) 558 566 { 559 - btrfs_panic(tree_fs_info(tree), err, 560 - "Locking error: Extent tree was modified by another thread while locked."); 567 + struct inode *inode = tree->private_data; 568 + 569 + btrfs_panic(btrfs_sb(inode->i_sb), err, 570 + "locking error: extent tree was modified by another thread while locked"); 561 571 } 562 572 563 573 /* ··· 1380 1386 } 1381 1387 } 1382 1388 1383 - /* 1384 - * helper function to set both pages and extents in the tree writeback 1385 - */ 1386 - static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 1387 - { 1388 - tree->ops->set_range_writeback(tree->private_data, start, end); 1389 - } 1390 - 1391 1389 /* find the first state struct with 'bits' set after 'start', and 1392 1390 * return it. tree->lock must be held. NULL will returned if 1393 1391 * nothing was found after 'start' ··· 2045 2059 struct extent_buffer *eb, int mirror_num) 2046 2060 { 2047 2061 u64 start = eb->start; 2048 - unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); 2062 + int i, num_pages = num_extent_pages(eb); 2049 2063 int ret = 0; 2050 2064 2051 2065 if (sb_rdonly(fs_info->sb)) ··· 2384 2398 start - page_offset(page), 2385 2399 (int)phy_offset, failed_bio->bi_end_io, 2386 2400 NULL); 2387 - bio_set_op_attrs(bio, REQ_OP_READ, read_mode); 2401 + bio->bi_opf = REQ_OP_READ | read_mode; 2388 2402 2389 2403 btrfs_debug(btrfs_sb(inode->i_sb), 2390 2404 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", ··· 2776 2790 else 2777 2791 contig = bio_end_sector(bio) == sector; 2778 2792 2779 - if (tree->ops && tree->ops->merge_bio_hook(page, offset, 2780 - page_size, bio, bio_flags)) 2793 + if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size, 2794 + bio, bio_flags)) 2781 2795 can_merge = false; 2782 2796 2783 2797 if (prev_bio_flags != bio_flags || !contig || !can_merge || ··· 3408 3422 continue; 3409 3423 } 3410 3424 3411 - set_range_writeback(tree, cur, cur + iosize - 1); 3425 + btrfs_set_range_writeback(tree, cur, cur + iosize - 1); 3412 3426 if (!PageWriteback(page)) { 3413 3427 btrfs_err(BTRFS_I(inode)->root->fs_info, 3414 3428 "page %lu not writeback, cur %llu end %llu", ··· 3524 3538 struct btrfs_fs_info *fs_info, 3525 3539 struct extent_page_data *epd) 3526 3540 { 3527 - unsigned long i, num_pages; 3541 + int i, num_pages; 3528 3542 int flush = 0; 3529 3543 int ret = 0; 3530 3544 ··· 3574 3588 if (!ret) 3575 3589 return ret; 3576 3590 3577 - num_pages = num_extent_pages(eb->start, eb->len); 3591 + num_pages = num_extent_pages(eb); 3578 3592 for (i = 0; i < num_pages; i++) { 3579 3593 struct page *p = eb->pages[i]; 3580 3594 ··· 3698 3712 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; 3699 3713 u64 offset = eb->start; 3700 3714 u32 nritems; 3701 - unsigned long i, num_pages; 3715 + int i, num_pages; 3702 3716 unsigned long start, end; 3703 3717 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; 3704 3718 int ret = 0; 3705 3719 3706 3720 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); 3707 - num_pages = num_extent_pages(eb->start, eb->len); 3721 + num_pages = num_extent_pages(eb); 3708 3722 atomic_set(&eb->io_pages, num_pages); 3709 3723 3710 3724 /* set btree blocks beyond nritems with 0 to avoid stale content. */ ··· 4629 4643 } 4630 4644 4631 4645 /* 4632 - * Helper for releasing extent buffer page. 4646 + * Release all pages attached to the extent buffer. 4633 4647 */ 4634 - static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) 4648 + static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb) 4635 4649 { 4636 - unsigned long index; 4637 - struct page *page; 4638 - int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); 4650 + int i; 4651 + int num_pages; 4652 + int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); 4639 4653 4640 4654 BUG_ON(extent_buffer_under_io(eb)); 4641 4655 4642 - index = num_extent_pages(eb->start, eb->len); 4643 - if (index == 0) 4644 - return; 4656 + num_pages = num_extent_pages(eb); 4657 + for (i = 0; i < num_pages; i++) { 4658 + struct page *page = eb->pages[i]; 4645 4659 4646 - do { 4647 - index--; 4648 - page = eb->pages[index]; 4649 4660 if (!page) 4650 4661 continue; 4651 4662 if (mapped) ··· 4674 4691 4675 4692 /* One for when we allocated the page */ 4676 4693 put_page(page); 4677 - } while (index != 0); 4694 + } 4678 4695 } 4679 4696 4680 4697 /* ··· 4682 4699 */ 4683 4700 static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) 4684 4701 { 4685 - btrfs_release_extent_buffer_page(eb); 4702 + btrfs_release_extent_buffer_pages(eb); 4686 4703 __free_extent_buffer(eb); 4687 4704 } 4688 4705 ··· 4726 4743 4727 4744 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) 4728 4745 { 4729 - unsigned long i; 4746 + int i; 4730 4747 struct page *p; 4731 4748 struct extent_buffer *new; 4732 - unsigned long num_pages = num_extent_pages(src->start, src->len); 4749 + int num_pages = num_extent_pages(src); 4733 4750 4734 4751 new = __alloc_extent_buffer(src->fs_info, src->start, src->len); 4735 4752 if (new == NULL) ··· 4749 4766 } 4750 4767 4751 4768 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); 4752 - set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); 4769 + set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags); 4753 4770 4754 4771 return new; 4755 4772 } ··· 4758 4775 u64 start, unsigned long len) 4759 4776 { 4760 4777 struct extent_buffer *eb; 4761 - unsigned long num_pages; 4762 - unsigned long i; 4763 - 4764 - num_pages = num_extent_pages(start, len); 4778 + int num_pages; 4779 + int i; 4765 4780 4766 4781 eb = __alloc_extent_buffer(fs_info, start, len); 4767 4782 if (!eb) 4768 4783 return NULL; 4769 4784 4785 + num_pages = num_extent_pages(eb); 4770 4786 for (i = 0; i < num_pages; i++) { 4771 4787 eb->pages[i] = alloc_page(GFP_NOFS); 4772 4788 if (!eb->pages[i]) ··· 4773 4791 } 4774 4792 set_extent_buffer_uptodate(eb); 4775 4793 btrfs_set_header_nritems(eb, 0); 4776 - set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); 4794 + set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); 4777 4795 4778 4796 return eb; 4779 4797 err: ··· 4825 4843 static void mark_extent_buffer_accessed(struct extent_buffer *eb, 4826 4844 struct page *accessed) 4827 4845 { 4828 - unsigned long num_pages, i; 4846 + int num_pages, i; 4829 4847 4830 4848 check_buffer_tree_ref(eb); 4831 4849 4832 - num_pages = num_extent_pages(eb->start, eb->len); 4850 + num_pages = num_extent_pages(eb); 4833 4851 for (i = 0; i < num_pages; i++) { 4834 4852 struct page *p = eb->pages[i]; 4835 4853 ··· 4926 4944 u64 start) 4927 4945 { 4928 4946 unsigned long len = fs_info->nodesize; 4929 - unsigned long num_pages = num_extent_pages(start, len); 4930 - unsigned long i; 4947 + int num_pages; 4948 + int i; 4931 4949 unsigned long index = start >> PAGE_SHIFT; 4932 4950 struct extent_buffer *eb; 4933 4951 struct extent_buffer *exists = NULL; ··· 4949 4967 if (!eb) 4950 4968 return ERR_PTR(-ENOMEM); 4951 4969 4970 + num_pages = num_extent_pages(eb); 4952 4971 for (i = 0; i < num_pages; i++, index++) { 4953 4972 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); 4954 4973 if (!p) { ··· 4992 5009 uptodate = 0; 4993 5010 4994 5011 /* 4995 - * see below about how we avoid a nasty race with release page 4996 - * and why we unlock later 5012 + * We can't unlock the pages just yet since the extent buffer 5013 + * hasn't been properly inserted in the radix tree, this 5014 + * opens a race with btree_releasepage which can free a page 5015 + * while we are still filling in all pages for the buffer and 5016 + * we could crash. 4997 5017 */ 4998 5018 } 4999 5019 if (uptodate) ··· 5025 5039 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); 5026 5040 5027 5041 /* 5028 - * there is a race where release page may have 5029 - * tried to find this extent buffer in the radix 5030 - * but failed. It will tell the VM it is safe to 5031 - * reclaim the, and it will clear the page private bit. 5032 - * We must make sure to set the page private bit properly 5033 - * after the extent buffer is in the radix tree so 5034 - * it doesn't get lost 5042 + * Now it's safe to unlock the pages because any calls to 5043 + * btree_releasepage will correctly detect that a page belongs to a 5044 + * live buffer and won't free them prematurely. 5035 5045 */ 5036 - SetPageChecked(eb->pages[0]); 5037 - for (i = 1; i < num_pages; i++) { 5038 - p = eb->pages[i]; 5039 - ClearPageChecked(p); 5040 - unlock_page(p); 5041 - } 5042 - unlock_page(eb->pages[0]); 5046 + for (i = 0; i < num_pages; i++) 5047 + unlock_page(eb->pages[i]); 5043 5048 return eb; 5044 5049 5045 5050 free_eb: ··· 5052 5075 __free_extent_buffer(eb); 5053 5076 } 5054 5077 5055 - /* Expects to have eb->eb_lock already held */ 5056 5078 static int release_extent_buffer(struct extent_buffer *eb) 5057 5079 { 5080 + lockdep_assert_held(&eb->refs_lock); 5081 + 5058 5082 WARN_ON(atomic_read(&eb->refs) == 0); 5059 5083 if (atomic_dec_and_test(&eb->refs)) { 5060 5084 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) { ··· 5072 5094 } 5073 5095 5074 5096 /* Should be safe to release our pages at this point */ 5075 - btrfs_release_extent_buffer_page(eb); 5097 + btrfs_release_extent_buffer_pages(eb); 5076 5098 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 5077 - if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) { 5099 + if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) { 5078 5100 __free_extent_buffer(eb); 5079 5101 return 1; 5080 5102 } ··· 5105 5127 5106 5128 spin_lock(&eb->refs_lock); 5107 5129 if (atomic_read(&eb->refs) == 2 && 5108 - test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) 5130 + test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) 5109 5131 atomic_dec(&eb->refs); 5110 5132 5111 5133 if (atomic_read(&eb->refs) == 2 && ··· 5137 5159 5138 5160 void clear_extent_buffer_dirty(struct extent_buffer *eb) 5139 5161 { 5140 - unsigned long i; 5141 - unsigned long num_pages; 5162 + int i; 5163 + int num_pages; 5142 5164 struct page *page; 5143 5165 5144 - num_pages = num_extent_pages(eb->start, eb->len); 5166 + num_pages = num_extent_pages(eb); 5145 5167 5146 5168 for (i = 0; i < num_pages; i++) { 5147 5169 page = eb->pages[i]; ··· 5167 5189 5168 5190 int set_extent_buffer_dirty(struct extent_buffer *eb) 5169 5191 { 5170 - unsigned long i; 5171 - unsigned long num_pages; 5192 + int i; 5193 + int num_pages; 5172 5194 int was_dirty = 0; 5173 5195 5174 5196 check_buffer_tree_ref(eb); 5175 5197 5176 5198 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 5177 5199 5178 - num_pages = num_extent_pages(eb->start, eb->len); 5200 + num_pages = num_extent_pages(eb); 5179 5201 WARN_ON(atomic_read(&eb->refs) == 0); 5180 5202 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); 5181 5203 ··· 5186 5208 5187 5209 void clear_extent_buffer_uptodate(struct extent_buffer *eb) 5188 5210 { 5189 - unsigned long i; 5211 + int i; 5190 5212 struct page *page; 5191 - unsigned long num_pages; 5213 + int num_pages; 5192 5214 5193 5215 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 5194 - num_pages = num_extent_pages(eb->start, eb->len); 5216 + num_pages = num_extent_pages(eb); 5195 5217 for (i = 0; i < num_pages; i++) { 5196 5218 page = eb->pages[i]; 5197 5219 if (page) ··· 5201 5223 5202 5224 void set_extent_buffer_uptodate(struct extent_buffer *eb) 5203 5225 { 5204 - unsigned long i; 5226 + int i; 5205 5227 struct page *page; 5206 - unsigned long num_pages; 5228 + int num_pages; 5207 5229 5208 5230 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 5209 - num_pages = num_extent_pages(eb->start, eb->len); 5231 + num_pages = num_extent_pages(eb); 5210 5232 for (i = 0; i < num_pages; i++) { 5211 5233 page = eb->pages[i]; 5212 5234 SetPageUptodate(page); ··· 5216 5238 int read_extent_buffer_pages(struct extent_io_tree *tree, 5217 5239 struct extent_buffer *eb, int wait, int mirror_num) 5218 5240 { 5219 - unsigned long i; 5241 + int i; 5220 5242 struct page *page; 5221 5243 int err; 5222 5244 int ret = 0; 5223 5245 int locked_pages = 0; 5224 5246 int all_uptodate = 1; 5225 - unsigned long num_pages; 5247 + int num_pages; 5226 5248 unsigned long num_reads = 0; 5227 5249 struct bio *bio = NULL; 5228 5250 unsigned long bio_flags = 0; ··· 5230 5252 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 5231 5253 return 0; 5232 5254 5233 - num_pages = num_extent_pages(eb->start, eb->len); 5255 + num_pages = num_extent_pages(eb); 5234 5256 for (i = 0; i < num_pages; i++) { 5235 5257 page = eb->pages[i]; 5236 5258 if (wait == WAIT_NONE) { ··· 5554 5576 struct extent_buffer *src) 5555 5577 { 5556 5578 int i; 5557 - unsigned num_pages; 5579 + int num_pages; 5558 5580 5559 5581 ASSERT(dst->len == src->len); 5560 5582 5561 - num_pages = num_extent_pages(dst->start, dst->len); 5583 + num_pages = num_extent_pages(dst); 5562 5584 for (i = 0; i < num_pages; i++) 5563 5585 copy_page(page_address(dst->pages[i]), 5564 5586 page_address(src->pages[i]));

+4 -12

fs/btrfs/extent_io.h

··· 46 46 #define EXTENT_BUFFER_STALE 6 47 47 #define EXTENT_BUFFER_WRITEBACK 7 48 48 #define EXTENT_BUFFER_READ_ERR 8 /* read IO error */ 49 - #define EXTENT_BUFFER_DUMMY 9 49 + #define EXTENT_BUFFER_UNMAPPED 9 50 50 #define EXTENT_BUFFER_IN_TREE 10 51 51 #define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ 52 52 ··· 92 92 typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, 93 93 struct bio *bio, u64 bio_offset); 94 94 95 - typedef blk_status_t (extent_submit_bio_done_t)(void *private_data, 96 - struct bio *bio, int mirror_num); 97 - 98 95 struct extent_io_ops { 99 96 /* 100 97 * The following callbacks must be allways defined, the function ··· 101 104 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, 102 105 struct page *page, u64 start, u64 end, 103 106 int mirror); 104 - int (*merge_bio_hook)(struct page *page, unsigned long offset, 105 - size_t size, struct bio *bio, 106 - unsigned long bio_flags); 107 107 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 108 - struct btrfs_fs_info *(*tree_fs_info)(void *private_data); 109 - void (*set_range_writeback)(void *private_data, u64 start, u64 end); 110 108 111 109 /* 112 110 * Optional hooks, called if the pointer is not NULL ··· 432 440 int mirror_num); 433 441 void wait_on_extent_buffer_writeback(struct extent_buffer *eb); 434 442 435 - static inline unsigned long num_extent_pages(u64 start, u64 len) 443 + static inline int num_extent_pages(const struct extent_buffer *eb) 436 444 { 437 - return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 438 - (start >> PAGE_SHIFT); 445 + return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) - 446 + (eb->start >> PAGE_SHIFT); 439 447 } 440 448 441 449 static inline void extent_buffer_get(struct extent_buffer *eb)

+2 -2

fs/btrfs/file-item.c

··· 922 922 const bool new_inline, 923 923 struct extent_map *em) 924 924 { 925 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 925 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 926 926 struct btrfs_root *root = inode->root; 927 927 struct extent_buffer *leaf = path->nodes[0]; 928 928 const int slot = path->slots[0]; ··· 942 942 btrfs_file_extent_num_bytes(leaf, fi); 943 943 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 944 944 size_t size; 945 - size = btrfs_file_extent_inline_len(leaf, slot, fi); 945 + size = btrfs_file_extent_ram_bytes(leaf, fi); 946 946 extent_end = ALIGN(extent_start + size, 947 947 fs_info->sectorsize); 948 948 }

+21 -107

fs/btrfs/file.c

··· 5 5 6 6 #include <linux/fs.h> 7 7 #include <linux/pagemap.h> 8 - #include <linux/highmem.h> 9 8 #include <linux/time.h> 10 9 #include <linux/init.h> 11 10 #include <linux/string.h> 12 11 #include <linux/backing-dev.h> 13 - #include <linux/mpage.h> 14 12 #include <linux/falloc.h> 15 - #include <linux/swap.h> 16 13 #include <linux/writeback.h> 17 14 #include <linux/compat.h> 18 15 #include <linux/slab.h> ··· 80 83 static int __btrfs_add_inode_defrag(struct btrfs_inode *inode, 81 84 struct inode_defrag *defrag) 82 85 { 83 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 86 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 84 87 struct inode_defrag *entry; 85 88 struct rb_node **p; 86 89 struct rb_node *parent = NULL; ··· 132 135 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, 133 136 struct btrfs_inode *inode) 134 137 { 135 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 136 138 struct btrfs_root *root = inode->root; 139 + struct btrfs_fs_info *fs_info = root->fs_info; 137 140 struct inode_defrag *defrag; 138 141 u64 transid; 139 142 int ret; ··· 182 185 static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode, 183 186 struct inode_defrag *defrag) 184 187 { 185 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 188 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 186 189 int ret; 187 190 188 191 if (!__need_auto_defrag(fs_info)) ··· 830 833 btrfs_file_extent_num_bytes(leaf, fi); 831 834 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 832 835 extent_end = key.offset + 833 - btrfs_file_extent_inline_len(leaf, 834 - path->slots[0], fi); 836 + btrfs_file_extent_ram_bytes(leaf, fi); 835 837 } else { 836 838 /* can't happen */ 837 839 BUG(); ··· 1129 1133 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 1130 1134 struct btrfs_inode *inode, u64 start, u64 end) 1131 1135 { 1132 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1136 + struct btrfs_fs_info *fs_info = trans->fs_info; 1133 1137 struct btrfs_root *root = inode->root; 1134 1138 struct extent_buffer *leaf; 1135 1139 struct btrfs_path *path; ··· 1466 1470 u64 *lockstart, u64 *lockend, 1467 1471 struct extent_state **cached_state) 1468 1472 { 1469 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1473 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1470 1474 u64 start_pos; 1471 1475 u64 last_pos; 1472 1476 int i; ··· 1522 1526 static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, 1523 1527 size_t *write_bytes) 1524 1528 { 1525 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1529 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1526 1530 struct btrfs_root *root = inode->root; 1527 1531 struct btrfs_ordered_extent *ordered; 1528 1532 u64 lockstart, lockend; ··· 1565 1569 return ret; 1566 1570 } 1567 1571 1568 - static noinline ssize_t __btrfs_buffered_write(struct file *file, 1569 - struct iov_iter *i, 1570 - loff_t pos) 1572 + static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, 1573 + struct iov_iter *i) 1571 1574 { 1575 + struct file *file = iocb->ki_filp; 1576 + loff_t pos = iocb->ki_pos; 1572 1577 struct inode *inode = file_inode(file); 1573 1578 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1574 1579 struct btrfs_root *root = BTRFS_I(inode)->root; ··· 1801 1804 { 1802 1805 struct file *file = iocb->ki_filp; 1803 1806 struct inode *inode = file_inode(file); 1804 - loff_t pos = iocb->ki_pos; 1807 + loff_t pos; 1805 1808 ssize_t written; 1806 1809 ssize_t written_buffered; 1807 1810 loff_t endbyte; ··· 1812 1815 if (written < 0 || !iov_iter_count(from)) 1813 1816 return written; 1814 1817 1815 - pos += written; 1816 - written_buffered = __btrfs_buffered_write(file, from, pos); 1818 + pos = iocb->ki_pos; 1819 + written_buffered = btrfs_buffered_write(iocb, from); 1817 1820 if (written_buffered < 0) { 1818 1821 err = written_buffered; 1819 1822 goto out; ··· 1950 1953 if (iocb->ki_flags & IOCB_DIRECT) { 1951 1954 num_written = __btrfs_direct_write(iocb, from); 1952 1955 } else { 1953 - num_written = __btrfs_buffered_write(file, from, pos); 1956 + num_written = btrfs_buffered_write(iocb, from); 1954 1957 if (num_written > 0) 1955 1958 iocb->ki_pos = pos + num_written; 1956 1959 if (clean_page) ··· 2039 2042 struct btrfs_trans_handle *trans; 2040 2043 struct btrfs_log_ctx ctx; 2041 2044 int ret = 0, err; 2042 - bool full_sync = false; 2043 2045 u64 len; 2044 2046 2045 2047 /* ··· 2062 2066 2063 2067 inode_lock(inode); 2064 2068 atomic_inc(&root->log_batch); 2065 - full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 2066 - &BTRFS_I(inode)->runtime_flags); 2069 + 2067 2070 /* 2068 - * We might have have had more pages made dirty after calling 2069 - * start_ordered_ops and before acquiring the inode's i_mutex. 2071 + * We have to do this here to avoid the priority inversion of waiting on 2072 + * IO of a lower priority task while holding a transaciton open. 2070 2073 */ 2071 - if (full_sync) { 2072 - /* 2073 - * For a full sync, we need to make sure any ordered operations 2074 - * start and finish before we start logging the inode, so that 2075 - * all extents are persisted and the respective file extent 2076 - * items are in the fs/subvol btree. 2077 - */ 2078 - ret = btrfs_wait_ordered_range(inode, start, len); 2079 - } else { 2080 - /* 2081 - * Start any new ordered operations before starting to log the 2082 - * inode. We will wait for them to finish in btrfs_sync_log(). 2083 - * 2084 - * Right before acquiring the inode's mutex, we might have new 2085 - * writes dirtying pages, which won't immediately start the 2086 - * respective ordered operations - that is done through the 2087 - * fill_delalloc callbacks invoked from the writepage and 2088 - * writepages address space operations. So make sure we start 2089 - * all ordered operations before starting to log our inode. Not 2090 - * doing this means that while logging the inode, writeback 2091 - * could start and invoke writepage/writepages, which would call 2092 - * the fill_delalloc callbacks (cow_file_range, 2093 - * submit_compressed_extents). These callbacks add first an 2094 - * extent map to the modified list of extents and then create 2095 - * the respective ordered operation, which means in 2096 - * tree-log.c:btrfs_log_inode() we might capture all existing 2097 - * ordered operations (with btrfs_get_logged_extents()) before 2098 - * the fill_delalloc callback adds its ordered operation, and by 2099 - * the time we visit the modified list of extent maps (with 2100 - * btrfs_log_changed_extents()), we see and process the extent 2101 - * map they created. We then use the extent map to construct a 2102 - * file extent item for logging without waiting for the 2103 - * respective ordered operation to finish - this file extent 2104 - * item points to a disk location that might not have yet been 2105 - * written to, containing random data - so after a crash a log 2106 - * replay will make our inode have file extent items that point 2107 - * to disk locations containing invalid data, as we returned 2108 - * success to userspace without waiting for the respective 2109 - * ordered operation to finish, because it wasn't captured by 2110 - * btrfs_get_logged_extents(). 2111 - */ 2112 - ret = start_ordered_ops(inode, start, end); 2113 - } 2074 + ret = btrfs_wait_ordered_range(inode, start, len); 2114 2075 if (ret) { 2115 2076 inode_unlock(inode); 2116 2077 goto out; 2117 2078 } 2118 2079 atomic_inc(&root->log_batch); 2119 2080 2120 - /* 2121 - * If the last transaction that changed this file was before the current 2122 - * transaction and we have the full sync flag set in our inode, we can 2123 - * bail out now without any syncing. 2124 - * 2125 - * Note that we can't bail out if the full sync flag isn't set. This is 2126 - * because when the full sync flag is set we start all ordered extents 2127 - * and wait for them to fully complete - when they complete they update 2128 - * the inode's last_trans field through: 2129 - * 2130 - * btrfs_finish_ordered_io() -> 2131 - * btrfs_update_inode_fallback() -> 2132 - * btrfs_update_inode() -> 2133 - * btrfs_set_inode_last_trans() 2134 - * 2135 - * So we are sure that last_trans is up to date and can do this check to 2136 - * bail out safely. For the fast path, when the full sync flag is not 2137 - * set in our inode, we can not do it because we start only our ordered 2138 - * extents and don't wait for them to complete (that is when 2139 - * btrfs_finish_ordered_io runs), so here at this point their last_trans 2140 - * value might be less than or equals to fs_info->last_trans_committed, 2141 - * and setting a speculative last_trans for an inode when a buffered 2142 - * write is made (such as fs_info->generation + 1 for example) would not 2143 - * be reliable since after setting the value and before fsync is called 2144 - * any number of transactions can start and commit (transaction kthread 2145 - * commits the current transaction periodically), and a transaction 2146 - * commit does not start nor waits for ordered extents to complete. 2147 - */ 2148 2081 smp_mb(); 2149 2082 if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || 2150 - (full_sync && BTRFS_I(inode)->last_trans <= 2151 - fs_info->last_trans_committed) || 2152 - (!btrfs_have_ordered_extents_in_range(inode, start, len) && 2153 - BTRFS_I(inode)->last_trans 2154 - <= fs_info->last_trans_committed)) { 2083 + BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) { 2155 2084 /* 2156 2085 * We've had everything committed since the last time we were 2157 2086 * modified so clear this flag in case it was set for whatever ··· 2160 2239 goto out; 2161 2240 } 2162 2241 } 2163 - if (!full_sync) { 2164 - ret = btrfs_wait_ordered_range(inode, start, len); 2165 - if (ret) { 2166 - btrfs_end_transaction(trans); 2167 - goto out; 2168 - } 2169 - } 2170 2242 ret = btrfs_commit_transaction(trans); 2171 2243 } else { 2172 2244 ret = btrfs_end_transaction(trans); ··· 2224 2310 struct btrfs_inode *inode, 2225 2311 struct btrfs_path *path, u64 offset, u64 end) 2226 2312 { 2227 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 2313 + struct btrfs_fs_info *fs_info = trans->fs_info; 2228 2314 struct btrfs_root *root = inode->root; 2229 2315 struct extent_buffer *leaf; 2230 2316 struct btrfs_file_extent_item *fi;

+6 -13

fs/btrfs/free-space-cache.c

··· 71 71 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 72 72 if (IS_ERR(inode)) 73 73 return inode; 74 - if (is_bad_inode(inode)) { 75 - iput(inode); 76 - return ERR_PTR(-ENOENT); 77 - } 78 74 79 75 mapping_set_gfp_mask(inode->i_mapping, 80 76 mapping_gfp_constraint(inode->i_mapping, ··· 296 300 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 297 301 check_crcs = 1; 298 302 299 - /* Make sure we can fit our crcs into the first page */ 303 + /* Make sure we can fit our crcs and generation into the first page */ 300 304 if (write && check_crcs && 301 - (num_pages * sizeof(u32)) >= PAGE_SIZE) 305 + (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 302 306 return -ENOSPC; 303 307 304 308 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); ··· 543 547 io_ctl_map_page(io_ctl, 0); 544 548 } 545 549 546 - memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 550 + copy_page(io_ctl->cur, bitmap); 547 551 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 548 552 if (io_ctl->index < io_ctl->num_pages) 549 553 io_ctl_map_page(io_ctl, 0); ··· 603 607 if (ret) 604 608 return ret; 605 609 606 - memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 610 + copy_page(entry->bitmap, io_ctl->cur); 607 611 io_ctl_unmap_page(io_ctl); 608 612 609 613 return 0; ··· 651 655 struct btrfs_free_space_ctl *ctl, 652 656 struct btrfs_path *path, u64 offset) 653 657 { 654 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 658 + struct btrfs_fs_info *fs_info = root->fs_info; 655 659 struct btrfs_free_space_header *header; 656 660 struct extent_buffer *leaf; 657 661 struct btrfs_io_ctl io_ctl; ··· 1119 1123 { 1120 1124 int ret; 1121 1125 struct inode *inode = io_ctl->inode; 1122 - struct btrfs_fs_info *fs_info; 1123 1126 1124 1127 if (!inode) 1125 1128 return 0; 1126 - 1127 - fs_info = btrfs_sb(inode->i_sb); 1128 1129 1129 1130 /* Flush the dirty pages in the cache file. */ 1130 1131 ret = flush_dirty_cache(inode); ··· 1138 1145 BTRFS_I(inode)->generation = 0; 1139 1146 if (block_group) { 1140 1147 #ifdef DEBUG 1141 - btrfs_err(fs_info, 1148 + btrfs_err(root->fs_info, 1142 1149 "failed to write free space cache for block group %llu", 1143 1150 block_group->key.objectid); 1144 1151 #endif

+1 -1

fs/btrfs/free-space-tree.c

··· 1236 1236 if (ret) 1237 1237 goto abort; 1238 1238 1239 - ret = btrfs_del_root(trans, fs_info, &free_space_root->root_key); 1239 + ret = btrfs_del_root(trans, &free_space_root->root_key); 1240 1240 if (ret) 1241 1241 goto abort; 1242 1242

+4 -8

fs/btrfs/inode-map.c

··· 3 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 4 */ 5 5 6 - #include <linux/delay.h> 7 6 #include <linux/kthread.h> 8 7 #include <linux/pagemap.h> 9 8 ··· 243 244 return; 244 245 245 246 while (1) { 246 - bool add_to_ctl = true; 247 - 248 247 spin_lock(rbroot_lock); 249 248 n = rb_first(rbroot); 250 249 if (!n) { ··· 254 257 BUG_ON(info->bitmap); /* Logic error */ 255 258 256 259 if (info->offset > root->ino_cache_progress) 257 - add_to_ctl = false; 258 - else if (info->offset + info->bytes > root->ino_cache_progress) 259 - count = root->ino_cache_progress - info->offset + 1; 260 + count = 0; 260 261 else 261 - count = info->bytes; 262 + count = min(root->ino_cache_progress - info->offset + 1, 263 + info->bytes); 262 264 263 265 rb_erase(&info->offset_index, rbroot); 264 266 spin_unlock(rbroot_lock); 265 - if (add_to_ctl) 267 + if (count) 266 268 __btrfs_add_free_space(root->fs_info, ctl, 267 269 info->offset, count); 268 270 kmem_cache_free(btrfs_free_space_cachep, info);

+54 -107

fs/btrfs/inode.c

··· 14 14 #include <linux/init.h> 15 15 #include <linux/string.h> 16 16 #include <linux/backing-dev.h> 17 - #include <linux/mpage.h> 18 - #include <linux/swap.h> 19 17 #include <linux/writeback.h> 20 18 #include <linux/compat.h> 21 - #include <linux/bit_spinlock.h> 22 19 #include <linux/xattr.h> 23 20 #include <linux/posix_acl.h> 24 21 #include <linux/falloc.h> 25 22 #include <linux/slab.h> 26 23 #include <linux/ratelimit.h> 27 - #include <linux/mount.h> 28 24 #include <linux/btrfs.h> 29 25 #include <linux/blkdev.h> 30 26 #include <linux/posix_acl_xattr.h> ··· 1439 1443 nocow = 1; 1440 1444 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1441 1445 extent_end = found_key.offset + 1442 - btrfs_file_extent_inline_len(leaf, 1443 - path->slots[0], fi); 1446 + btrfs_file_extent_ram_bytes(leaf, fi); 1444 1447 extent_end = ALIGN(extent_end, 1445 1448 fs_info->sectorsize); 1446 1449 } else { ··· 1747 1752 void __btrfs_del_delalloc_inode(struct btrfs_root *root, 1748 1753 struct btrfs_inode *inode) 1749 1754 { 1750 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1755 + struct btrfs_fs_info *fs_info = root->fs_info; 1751 1756 1752 1757 if (!list_empty(&inode->delalloc_inodes)) { 1753 1758 list_del_init(&inode->delalloc_inodes); ··· 1898 1903 } 1899 1904 1900 1905 /* 1901 - * extent_io.c merge_bio_hook, this must check the chunk tree to make sure 1902 - * we don't create bios that span stripes or chunks 1906 + * Merge bio hook, this must check the chunk tree to make sure we don't create 1907 + * bios that span stripes or chunks 1903 1908 * 1904 1909 * return 1 if page cannot be merged to bio 1905 1910 * return 0 if page can be merged to bio ··· 1957 1962 * At IO completion time the cums attached on the ordered extent record 1958 1963 * are inserted into the btree 1959 1964 */ 1960 - static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, 1965 + blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, 1961 1966 int mirror_num) 1962 1967 { 1963 1968 struct inode *inode = private_data; ··· 2030 2035 /* we're doing a write, do the async checksumming */ 2031 2036 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, 2032 2037 bio_offset, inode, 2033 - btrfs_submit_bio_start, 2034 - btrfs_submit_bio_done); 2038 + btrfs_submit_bio_start); 2035 2039 goto out; 2036 2040 } else if (!skip_sum) { 2037 2041 ret = btrfs_csum_one_bio(inode, bio, 0, 0); ··· 3604 3610 filled = true; 3605 3611 3606 3612 path = btrfs_alloc_path(); 3607 - if (!path) { 3608 - ret = -ENOMEM; 3609 - goto make_bad; 3610 - } 3613 + if (!path) 3614 + return -ENOMEM; 3611 3615 3612 3616 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 3613 3617 3614 3618 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3615 3619 if (ret) { 3616 - if (ret > 0) 3617 - ret = -ENOENT; 3618 - goto make_bad; 3620 + btrfs_free_path(path); 3621 + return ret; 3619 3622 } 3620 3623 3621 3624 leaf = path->nodes[0]; ··· 3765 3774 3766 3775 btrfs_sync_inode_flags_to_i_flags(inode); 3767 3776 return 0; 3768 - 3769 - make_bad: 3770 - btrfs_free_path(path); 3771 - make_bad_inode(inode); 3772 - return ret; 3773 3777 } 3774 3778 3775 3779 /* ··· 3970 3984 goto err; 3971 3985 } 3972 3986 skip_backref: 3973 - ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index); 3987 + ret = btrfs_delete_delayed_dir_index(trans, dir, index); 3974 3988 if (ret) { 3975 3989 btrfs_abort_transaction(trans, ret); 3976 3990 goto err; ··· 4073 4087 } 4074 4088 4075 4089 static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, 4076 - struct btrfs_root *root, 4077 - struct inode *dir, u64 objectid, 4078 - const char *name, int name_len) 4090 + struct inode *dir, u64 objectid, 4091 + const char *name, int name_len) 4079 4092 { 4080 - struct btrfs_fs_info *fs_info = root->fs_info; 4093 + struct btrfs_root *root = BTRFS_I(dir)->root; 4081 4094 struct btrfs_path *path; 4082 4095 struct extent_buffer *leaf; 4083 4096 struct btrfs_dir_item *di; ··· 4109 4124 } 4110 4125 btrfs_release_path(path); 4111 4126 4112 - ret = btrfs_del_root_ref(trans, fs_info, objectid, 4113 - root->root_key.objectid, dir_ino, 4114 - &index, name, name_len); 4127 + ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid, 4128 + dir_ino, &index, name, name_len); 4115 4129 if (ret < 0) { 4116 4130 if (ret != -ENOENT) { 4117 4131 btrfs_abort_transaction(trans, ret); ··· 4129 4145 4130 4146 leaf = path->nodes[0]; 4131 4147 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4132 - btrfs_release_path(path); 4133 4148 index = key.offset; 4134 4149 } 4135 4150 btrfs_release_path(path); 4136 4151 4137 - ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index); 4152 + ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index); 4138 4153 if (ret) { 4139 4154 btrfs_abort_transaction(trans, ret); 4140 4155 goto out; ··· 4226 4243 prev = node; 4227 4244 entry = rb_entry(node, struct btrfs_inode, rb_node); 4228 4245 4229 - if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode))) 4246 + if (objectid < btrfs_ino(entry)) 4230 4247 node = node->rb_left; 4231 - else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode))) 4248 + else if (objectid > btrfs_ino(entry)) 4232 4249 node = node->rb_right; 4233 4250 else 4234 4251 break; ··· 4236 4253 if (!node) { 4237 4254 while (prev) { 4238 4255 entry = rb_entry(prev, struct btrfs_inode, rb_node); 4239 - if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) { 4256 + if (objectid <= btrfs_ino(entry)) { 4240 4257 node = prev; 4241 4258 break; 4242 4259 } ··· 4245 4262 } 4246 4263 while (node) { 4247 4264 entry = rb_entry(node, struct btrfs_inode, rb_node); 4248 - objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1; 4265 + objectid = btrfs_ino(entry) + 1; 4249 4266 inode = igrab(&entry->vfs_inode); 4250 4267 if (inode) { 4251 4268 spin_unlock(&root->inode_lock); ··· 4326 4343 4327 4344 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); 4328 4345 4329 - ret = btrfs_unlink_subvol(trans, root, dir, 4330 - dest->root_key.objectid, 4331 - dentry->d_name.name, 4332 - dentry->d_name.len); 4346 + ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid, 4347 + dentry->d_name.name, dentry->d_name.len); 4333 4348 if (ret) { 4334 4349 err = ret; 4335 4350 btrfs_abort_transaction(trans, ret); ··· 4422 4441 return PTR_ERR(trans); 4423 4442 4424 4443 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 4425 - err = btrfs_unlink_subvol(trans, root, dir, 4444 + err = btrfs_unlink_subvol(trans, dir, 4426 4445 BTRFS_I(inode)->location.objectid, 4427 4446 dentry->d_name.name, 4428 4447 dentry->d_name.len); ··· 4624 4643 BTRFS_I(inode), leaf, fi, 4625 4644 found_key.offset); 4626 4645 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 4627 - item_end += btrfs_file_extent_inline_len(leaf, 4628 - path->slots[0], fi); 4646 + item_end += btrfs_file_extent_ram_bytes(leaf, 4647 + fi); 4629 4648 4630 4649 trace_btrfs_truncate_show_fi_inline( 4631 4650 BTRFS_I(inode), leaf, fi, path->slots[0], ··· 5596 5615 parent = *p; 5597 5616 entry = rb_entry(parent, struct btrfs_inode, rb_node); 5598 5617 5599 - if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode))) 5618 + if (ino < btrfs_ino(entry)) 5600 5619 p = &parent->rb_left; 5601 - else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode))) 5620 + else if (ino > btrfs_ino(entry)) 5602 5621 p = &parent->rb_right; 5603 5622 else { 5604 5623 WARN_ON(!(entry->vfs_inode.i_state & ··· 5689 5708 int ret; 5690 5709 5691 5710 ret = btrfs_read_locked_inode(inode); 5692 - if (!is_bad_inode(inode)) { 5711 + if (!ret) { 5693 5712 inode_tree_add(inode); 5694 5713 unlock_new_inode(inode); 5695 5714 if (new) 5696 5715 *new = 1; 5697 5716 } else { 5698 - unlock_new_inode(inode); 5699 - iput(inode); 5700 - ASSERT(ret < 0); 5701 - inode = ERR_PTR(ret < 0 ? ret : -ESTALE); 5717 + iget_failed(inode); 5718 + /* 5719 + * ret > 0 can come from btrfs_search_slot called by 5720 + * btrfs_read_locked_inode, this means the inode item 5721 + * was not found. 5722 + */ 5723 + if (ret > 0) 5724 + ret = -ENOENT; 5725 + inode = ERR_PTR(ret); 5702 5726 } 5703 5727 } 5704 5728 ··· 5731 5745 inode->i_mtime = current_time(inode); 5732 5746 inode->i_atime = inode->i_mtime; 5733 5747 inode->i_ctime = inode->i_mtime; 5734 - BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime); 5748 + BTRFS_I(inode)->i_otime = inode->i_mtime; 5735 5749 5736 5750 return inode; 5737 5751 } ··· 6010 6024 if (put) 6011 6025 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list); 6012 6026 btrfs_free_path(path); 6013 - return ret; 6014 - } 6015 - 6016 - int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) 6017 - { 6018 - struct btrfs_root *root = BTRFS_I(inode)->root; 6019 - struct btrfs_trans_handle *trans; 6020 - int ret = 0; 6021 - bool nolock = false; 6022 - 6023 - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) 6024 - return 0; 6025 - 6026 - if (btrfs_fs_closing(root->fs_info) && 6027 - btrfs_is_free_space_inode(BTRFS_I(inode))) 6028 - nolock = true; 6029 - 6030 - if (wbc->sync_mode == WB_SYNC_ALL) { 6031 - if (nolock) 6032 - trans = btrfs_join_transaction_nolock(root); 6033 - else 6034 - trans = btrfs_join_transaction(root); 6035 - if (IS_ERR(trans)) 6036 - return PTR_ERR(trans); 6037 - ret = btrfs_commit_transaction(trans); 6038 - } 6039 6027 return ret; 6040 6028 } 6041 6029 ··· 6311 6351 inode->i_mtime = current_time(inode); 6312 6352 inode->i_atime = inode->i_mtime; 6313 6353 inode->i_ctime = inode->i_mtime; 6314 - BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime); 6354 + BTRFS_I(inode)->i_otime = inode->i_mtime; 6315 6355 6316 6356 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 6317 6357 struct btrfs_inode_item); ··· 6380 6420 struct btrfs_inode *parent_inode, struct btrfs_inode *inode, 6381 6421 const char *name, int name_len, int add_backref, u64 index) 6382 6422 { 6383 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 6384 6423 int ret = 0; 6385 6424 struct btrfs_key key; 6386 6425 struct btrfs_root *root = parent_inode->root; ··· 6395 6436 } 6396 6437 6397 6438 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { 6398 - ret = btrfs_add_root_ref(trans, fs_info, key.objectid, 6439 + ret = btrfs_add_root_ref(trans, key.objectid, 6399 6440 root->root_key.objectid, parent_ino, 6400 6441 index, name, name_len); 6401 6442 } else if (add_backref) { ··· 6431 6472 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { 6432 6473 u64 local_index; 6433 6474 int err; 6434 - err = btrfs_del_root_ref(trans, fs_info, key.objectid, 6475 + err = btrfs_del_root_ref(trans, key.objectid, 6435 6476 root->root_key.objectid, parent_ino, 6436 6477 &local_index, name, name_len); 6437 6478 ··· 6791 6832 size_t pg_offset, u64 start, u64 len, 6792 6833 int create) 6793 6834 { 6794 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 6835 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 6795 6836 int ret; 6796 6837 int err = 0; 6797 6838 u64 extent_start = 0; ··· 6887 6928 extent_start); 6888 6929 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 6889 6930 size_t size; 6890 - size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); 6931 + 6932 + size = btrfs_file_extent_ram_bytes(leaf, item); 6891 6933 extent_end = ALIGN(extent_start + size, 6892 6934 fs_info->sectorsize); 6893 6935 ··· 6939 6979 if (new_inline) 6940 6980 goto out; 6941 6981 6942 - size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); 6982 + size = btrfs_file_extent_ram_bytes(leaf, item); 6943 6983 extent_offset = page_offset(page) + pg_offset - extent_start; 6944 6984 copy_size = min_t(u64, PAGE_SIZE - pg_offset, 6945 6985 size - extent_offset); ··· 7810 7850 isector >>= inode->i_sb->s_blocksize_bits; 7811 7851 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, 7812 7852 pgoff, isector, repair_endio, repair_arg); 7813 - bio_set_op_attrs(bio, REQ_OP_READ, read_mode); 7853 + bio->bi_opf = REQ_OP_READ | read_mode; 7814 7854 7815 7855 btrfs_debug(BTRFS_I(inode)->root->fs_info, 7816 7856 "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", ··· 8244 8284 if (write && async_submit) { 8245 8285 ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0, 8246 8286 file_offset, inode, 8247 - btrfs_submit_bio_start_direct_io, 8248 - btrfs_submit_bio_done); 8287 + btrfs_submit_bio_start_direct_io); 8249 8288 goto err; 8250 8289 } else if (write) { 8251 8290 /* ··· 9484 9525 /* src is a subvolume */ 9485 9526 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { 9486 9527 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; 9487 - ret = btrfs_unlink_subvol(trans, root, old_dir, 9488 - root_objectid, 9528 + ret = btrfs_unlink_subvol(trans, old_dir, root_objectid, 9489 9529 old_dentry->d_name.name, 9490 9530 old_dentry->d_name.len); 9491 9531 } else { /* src is an inode */ ··· 9503 9545 /* dest is a subvolume */ 9504 9546 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { 9505 9547 root_objectid = BTRFS_I(new_inode)->root->root_key.objectid; 9506 - ret = btrfs_unlink_subvol(trans, dest, new_dir, 9507 - root_objectid, 9548 + ret = btrfs_unlink_subvol(trans, new_dir, root_objectid, 9508 9549 new_dentry->d_name.name, 9509 9550 new_dentry->d_name.len); 9510 9551 } else { /* dest is an inode */ ··· 9763 9806 9764 9807 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { 9765 9808 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; 9766 - ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, 9809 + ret = btrfs_unlink_subvol(trans, old_dir, root_objectid, 9767 9810 old_dentry->d_name.name, 9768 9811 old_dentry->d_name.len); 9769 9812 } else { ··· 9785 9828 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == 9786 9829 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 9787 9830 root_objectid = BTRFS_I(new_inode)->location.objectid; 9788 - ret = btrfs_unlink_subvol(trans, dest, new_dir, 9789 - root_objectid, 9831 + ret = btrfs_unlink_subvol(trans, new_dir, root_objectid, 9790 9832 new_dentry->d_name.name, 9791 9833 new_dentry->d_name.len); 9792 9834 BUG_ON(new_inode->i_nlink == 0); ··· 10407 10451 return -EAGAIN; 10408 10452 } 10409 10453 10410 - static struct btrfs_fs_info *iotree_fs_info(void *private_data) 10411 - { 10412 - struct inode *inode = private_data; 10413 - return btrfs_sb(inode->i_sb); 10414 - } 10415 - 10416 10454 static void btrfs_check_extent_io_range(void *private_data, const char *caller, 10417 10455 u64 start, u64 end) 10418 10456 { ··· 10421 10471 } 10422 10472 } 10423 10473 10424 - void btrfs_set_range_writeback(void *private_data, u64 start, u64 end) 10474 + void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 10425 10475 { 10426 - struct inode *inode = private_data; 10476 + struct inode *inode = tree->private_data; 10427 10477 unsigned long index = start >> PAGE_SHIFT; 10428 10478 unsigned long end_index = end >> PAGE_SHIFT; 10429 10479 struct page *page; ··· 10479 10529 /* mandatory callbacks */ 10480 10530 .submit_bio_hook = btrfs_submit_bio_hook, 10481 10531 .readpage_end_io_hook = btrfs_readpage_end_io_hook, 10482 - .merge_bio_hook = btrfs_merge_bio_hook, 10483 10532 .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, 10484 - .tree_fs_info = iotree_fs_info, 10485 - .set_range_writeback = btrfs_set_range_writeback, 10486 10533 10487 10534 /* optional callbacks */ 10488 10535 .fill_delalloc = run_delalloc_range,

+22 -47

fs/btrfs/ioctl.c

··· 5 5 6 6 #include <linux/kernel.h> 7 7 #include <linux/bio.h> 8 - #include <linux/buffer_head.h> 9 8 #include <linux/file.h> 10 9 #include <linux/fs.h> 11 10 #include <linux/fsnotify.h> 12 11 #include <linux/pagemap.h> 13 12 #include <linux/highmem.h> 14 13 #include <linux/time.h> 15 - #include <linux/init.h> 16 14 #include <linux/string.h> 17 15 #include <linux/backing-dev.h> 18 16 #include <linux/mount.h> 19 - #include <linux/mpage.h> 20 17 #include <linux/namei.h> 21 - #include <linux/swap.h> 22 18 #include <linux/writeback.h> 23 19 #include <linux/compat.h> 24 - #include <linux/bit_spinlock.h> 25 20 #include <linux/security.h> 26 21 #include <linux/xattr.h> 27 22 #include <linux/mm.h> ··· 601 606 trans->block_rsv = &block_rsv; 602 607 trans->bytes_reserved = block_rsv.size; 603 608 604 - ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit); 609 + ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit); 605 610 if (ret) 606 611 goto fail; 607 612 ··· 611 616 goto fail; 612 617 } 613 618 614 - memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); 615 - btrfs_set_header_bytenr(leaf, leaf->start); 616 - btrfs_set_header_generation(leaf, trans->transid); 617 - btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 618 - btrfs_set_header_owner(leaf, objectid); 619 - 620 - write_extent_buffer_fsid(leaf, fs_info->fsid); 621 - write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); 622 619 btrfs_mark_buffer_dirty(leaf); 623 620 624 621 inode_item = &root_item->inode; ··· 698 711 ret = btrfs_update_inode(trans, root, dir); 699 712 BUG_ON(ret); 700 713 701 - ret = btrfs_add_root_ref(trans, fs_info, 702 - objectid, root->root_key.objectid, 714 + ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid, 703 715 btrfs_ino(BTRFS_I(dir)), index, name, namelen); 704 716 BUG_ON(ret); 705 717 ··· 2493 2507 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 2494 2508 void __user *argp) 2495 2509 { 2496 - struct btrfs_ioctl_ino_lookup_args *args; 2497 - struct inode *inode; 2510 + struct btrfs_ioctl_ino_lookup_args *args; 2511 + struct inode *inode; 2498 2512 int ret = 0; 2499 2513 2500 2514 args = memdup_user(argp, sizeof(*args)); ··· 2927 2941 ret = btrfs_defrag_root(root); 2928 2942 break; 2929 2943 case S_IFREG: 2930 - if (!(file->f_mode & FMODE_WRITE)) { 2931 - ret = -EINVAL; 2944 + /* 2945 + * Note that this does not check the file descriptor for write 2946 + * access. This prevents defragmenting executables that are 2947 + * running and allows defrag on files open in read-only mode. 2948 + */ 2949 + if (!capable(CAP_SYS_ADMIN) && 2950 + inode_permission(inode, MAY_WRITE)) { 2951 + ret = -EPERM; 2932 2952 goto out; 2933 2953 } 2934 2954 ··· 3157 3165 di_args->total_bytes = btrfs_device_get_total_bytes(dev); 3158 3166 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 3159 3167 if (dev->name) { 3160 - struct rcu_string *name; 3161 - 3162 - name = rcu_dereference(dev->name); 3163 - strncpy(di_args->path, name->str, sizeof(di_args->path) - 1); 3168 + strncpy(di_args->path, rcu_str_deref(dev->name), 3169 + sizeof(di_args->path) - 1); 3164 3170 di_args->path[sizeof(di_args->path) - 1] = 0; 3165 3171 } else { 3166 3172 di_args->path[0] = '\0'; ··· 5108 5118 struct inode *inode = file_inode(file); 5109 5119 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5110 5120 struct btrfs_ioctl_quota_ctl_args *sa; 5111 - struct btrfs_trans_handle *trans = NULL; 5112 5121 int ret; 5113 - int err; 5114 5122 5115 5123 if (!capable(CAP_SYS_ADMIN)) 5116 5124 return -EPERM; ··· 5124 5136 } 5125 5137 5126 5138 down_write(&fs_info->subvol_sem); 5127 - trans = btrfs_start_transaction(fs_info->tree_root, 2); 5128 - if (IS_ERR(trans)) { 5129 - ret = PTR_ERR(trans); 5130 - goto out; 5131 - } 5132 5139 5133 5140 switch (sa->cmd) { 5134 5141 case BTRFS_QUOTA_CTL_ENABLE: 5135 - ret = btrfs_quota_enable(trans, fs_info); 5142 + ret = btrfs_quota_enable(fs_info); 5136 5143 break; 5137 5144 case BTRFS_QUOTA_CTL_DISABLE: 5138 - ret = btrfs_quota_disable(trans, fs_info); 5145 + ret = btrfs_quota_disable(fs_info); 5139 5146 break; 5140 5147 default: 5141 5148 ret = -EINVAL; 5142 5149 break; 5143 5150 } 5144 5151 5145 - err = btrfs_commit_transaction(trans); 5146 - if (err && !ret) 5147 - ret = err; 5148 - out: 5149 5152 kfree(sa); 5150 5153 up_write(&fs_info->subvol_sem); 5151 5154 drop_write: ··· 5174 5195 } 5175 5196 5176 5197 if (sa->assign) { 5177 - ret = btrfs_add_qgroup_relation(trans, fs_info, 5178 - sa->src, sa->dst); 5198 + ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst); 5179 5199 } else { 5180 - ret = btrfs_del_qgroup_relation(trans, fs_info, 5181 - sa->src, sa->dst); 5200 + ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst); 5182 5201 } 5183 5202 5184 5203 /* update qgroup status and info */ 5185 - err = btrfs_run_qgroups(trans, fs_info); 5204 + err = btrfs_run_qgroups(trans); 5186 5205 if (err < 0) 5187 5206 btrfs_handle_fs_error(fs_info, err, 5188 5207 "failed to update qgroup status and info"); ··· 5198 5221 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 5199 5222 { 5200 5223 struct inode *inode = file_inode(file); 5201 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5202 5224 struct btrfs_root *root = BTRFS_I(inode)->root; 5203 5225 struct btrfs_ioctl_qgroup_create_args *sa; 5204 5226 struct btrfs_trans_handle *trans; ··· 5229 5253 } 5230 5254 5231 5255 if (sa->create) { 5232 - ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid); 5256 + ret = btrfs_create_qgroup(trans, sa->qgroupid); 5233 5257 } else { 5234 - ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid); 5258 + ret = btrfs_remove_qgroup(trans, sa->qgroupid); 5235 5259 } 5236 5260 5237 5261 err = btrfs_end_transaction(trans); ··· 5248 5272 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 5249 5273 { 5250 5274 struct inode *inode = file_inode(file); 5251 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5252 5275 struct btrfs_root *root = BTRFS_I(inode)->root; 5253 5276 struct btrfs_ioctl_qgroup_limit_args *sa; 5254 5277 struct btrfs_trans_handle *trans; ··· 5280 5305 qgroupid = root->root_key.objectid; 5281 5306 } 5282 5307 5283 - ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim); 5308 + ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim); 5284 5309 5285 5310 err = btrfs_end_transaction(trans); 5286 5311 if (err && !ret)

-138

fs/btrfs/ordered-data.c

··· 6 6 #include <linux/slab.h> 7 7 #include <linux/blkdev.h> 8 8 #include <linux/writeback.h> 9 - #include <linux/pagevec.h> 10 9 #include "ctree.h" 11 10 #include "transaction.h" 12 11 #include "btrfs_inode.h" ··· 420 421 return ret == 0; 421 422 } 422 423 423 - /* Needs to either be called under a log transaction or the log_mutex */ 424 - void btrfs_get_logged_extents(struct btrfs_inode *inode, 425 - struct list_head *logged_list, 426 - const loff_t start, 427 - const loff_t end) 428 - { 429 - struct btrfs_ordered_inode_tree *tree; 430 - struct btrfs_ordered_extent *ordered; 431 - struct rb_node *n; 432 - struct rb_node *prev; 433 - 434 - tree = &inode->ordered_tree; 435 - spin_lock_irq(&tree->lock); 436 - n = __tree_search(&tree->tree, end, &prev); 437 - if (!n) 438 - n = prev; 439 - for (; n; n = rb_prev(n)) { 440 - ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); 441 - if (ordered->file_offset > end) 442 - continue; 443 - if (entry_end(ordered) <= start) 444 - break; 445 - if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) 446 - continue; 447 - list_add(&ordered->log_list, logged_list); 448 - refcount_inc(&ordered->refs); 449 - } 450 - spin_unlock_irq(&tree->lock); 451 - } 452 - 453 - void btrfs_put_logged_extents(struct list_head *logged_list) 454 - { 455 - struct btrfs_ordered_extent *ordered; 456 - 457 - while (!list_empty(logged_list)) { 458 - ordered = list_first_entry(logged_list, 459 - struct btrfs_ordered_extent, 460 - log_list); 461 - list_del_init(&ordered->log_list); 462 - btrfs_put_ordered_extent(ordered); 463 - } 464 - } 465 - 466 - void btrfs_submit_logged_extents(struct list_head *logged_list, 467 - struct btrfs_root *log) 468 - { 469 - int index = log->log_transid % 2; 470 - 471 - spin_lock_irq(&log->log_extents_lock[index]); 472 - list_splice_tail(logged_list, &log->logged_list[index]); 473 - spin_unlock_irq(&log->log_extents_lock[index]); 474 - } 475 - 476 - void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, 477 - struct btrfs_root *log, u64 transid) 478 - { 479 - struct btrfs_ordered_extent *ordered; 480 - int index = transid % 2; 481 - 482 - spin_lock_irq(&log->log_extents_lock[index]); 483 - while (!list_empty(&log->logged_list[index])) { 484 - struct inode *inode; 485 - ordered = list_first_entry(&log->logged_list[index], 486 - struct btrfs_ordered_extent, 487 - log_list); 488 - list_del_init(&ordered->log_list); 489 - inode = ordered->inode; 490 - spin_unlock_irq(&log->log_extents_lock[index]); 491 - 492 - if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && 493 - !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { 494 - u64 start = ordered->file_offset; 495 - u64 end = ordered->file_offset + ordered->len - 1; 496 - 497 - WARN_ON(!inode); 498 - filemap_fdatawrite_range(inode->i_mapping, start, end); 499 - } 500 - wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, 501 - &ordered->flags)); 502 - 503 - /* 504 - * In order to keep us from losing our ordered extent 505 - * information when committing the transaction we have to make 506 - * sure that any logged extents are completed when we go to 507 - * commit the transaction. To do this we simply increase the 508 - * current transactions pending_ordered counter and decrement it 509 - * when the ordered extent completes. 510 - */ 511 - if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { 512 - struct btrfs_ordered_inode_tree *tree; 513 - 514 - tree = &BTRFS_I(inode)->ordered_tree; 515 - spin_lock_irq(&tree->lock); 516 - if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { 517 - set_bit(BTRFS_ORDERED_PENDING, &ordered->flags); 518 - atomic_inc(&trans->transaction->pending_ordered); 519 - } 520 - spin_unlock_irq(&tree->lock); 521 - } 522 - btrfs_put_ordered_extent(ordered); 523 - spin_lock_irq(&log->log_extents_lock[index]); 524 - } 525 - spin_unlock_irq(&log->log_extents_lock[index]); 526 - } 527 - 528 - void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid) 529 - { 530 - struct btrfs_ordered_extent *ordered; 531 - int index = transid % 2; 532 - 533 - spin_lock_irq(&log->log_extents_lock[index]); 534 - while (!list_empty(&log->logged_list[index])) { 535 - ordered = list_first_entry(&log->logged_list[index], 536 - struct btrfs_ordered_extent, 537 - log_list); 538 - list_del_init(&ordered->log_list); 539 - spin_unlock_irq(&log->log_extents_lock[index]); 540 - btrfs_put_ordered_extent(ordered); 541 - spin_lock_irq(&log->log_extents_lock[index]); 542 - } 543 - spin_unlock_irq(&log->log_extents_lock[index]); 544 - } 545 - 546 424 /* 547 425 * used to drop a reference on an ordered extent. This will free 548 426 * the extent if the last reference is dropped ··· 787 911 refcount_inc(&entry->refs); 788 912 spin_unlock_irq(&tree->lock); 789 913 return entry; 790 - } 791 - 792 - bool btrfs_have_ordered_extents_in_range(struct inode *inode, 793 - u64 file_offset, 794 - u64 len) 795 - { 796 - struct btrfs_ordered_extent *oe; 797 - 798 - oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len); 799 - if (oe) { 800 - btrfs_put_ordered_extent(oe); 801 - return true; 802 - } 803 - return false; 804 914 } 805 915 806 916 /*

+3 -20

fs/btrfs/ordered-data.h

··· 54 54 #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent 55 55 * has done its due diligence in updating 56 56 * the isize. */ 57 - #define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered 58 - ordered extent */ 59 - #define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ 57 + #define BTRFS_ORDERED_TRUNCATED 8 /* Set when we have to truncate an extent */ 60 58 61 - #define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent 62 - * in the logging code. */ 63 - #define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to 59 + #define BTRFS_ORDERED_PENDING 9 /* We are waiting for this ordered extent to 64 60 * complete in the current transaction. */ 65 - #define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */ 61 + #define BTRFS_ORDERED_REGULAR 10 /* Regular IO for COW */ 66 62 67 63 struct btrfs_ordered_extent { 68 64 /* logical offset in the file */ ··· 178 182 struct btrfs_inode *inode, 179 183 u64 file_offset, 180 184 u64 len); 181 - bool btrfs_have_ordered_extents_in_range(struct inode *inode, 182 - u64 file_offset, 183 - u64 len); 184 185 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 185 186 struct btrfs_ordered_extent *ordered); 186 187 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, ··· 186 193 const u64 range_start, const u64 range_len); 187 194 u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, 188 195 const u64 range_start, const u64 range_len); 189 - void btrfs_get_logged_extents(struct btrfs_inode *inode, 190 - struct list_head *logged_list, 191 - const loff_t start, 192 - const loff_t end); 193 - void btrfs_put_logged_extents(struct list_head *logged_list); 194 - void btrfs_submit_logged_extents(struct list_head *logged_list, 195 - struct btrfs_root *log); 196 - void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, 197 - struct btrfs_root *log, u64 transid); 198 - void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); 199 196 int __init ordered_data_init(void); 200 197 void __cold ordered_data_exit(void); 201 198

+7 -32

fs/btrfs/print-tree.c

··· 52 52 u64 offset; 53 53 int ref_index = 0; 54 54 55 - if (item_size < sizeof(*ei)) { 56 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 57 - struct btrfs_extent_item_v0 *ei0; 58 - BUG_ON(item_size != sizeof(*ei0)); 59 - ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0); 60 - pr_info("\t\textent refs %u\n", 61 - btrfs_extent_refs_v0(eb, ei0)); 62 - return; 63 - #else 64 - BUG(); 65 - #endif 55 + if (unlikely(item_size < sizeof(*ei))) { 56 + btrfs_print_v0_err(eb->fs_info); 57 + btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL); 66 58 } 67 59 68 60 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); ··· 124 132 } 125 133 WARN_ON(ptr > end); 126 134 } 127 - 128 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 129 - static void print_extent_ref_v0(struct extent_buffer *eb, int slot) 130 - { 131 - struct btrfs_extent_ref_v0 *ref0; 132 - 133 - ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); 134 - printk("\t\textent back ref root %llu gen %llu owner %llu num_refs %lu\n", 135 - btrfs_ref_root_v0(eb, ref0), 136 - btrfs_ref_generation_v0(eb, ref0), 137 - btrfs_ref_objectid_v0(eb, ref0), 138 - (unsigned long)btrfs_ref_count_v0(eb, ref0)); 139 - } 140 - #endif 141 135 142 136 static void print_uuid_item(struct extent_buffer *l, unsigned long offset, 143 137 u32 item_size) ··· 245 267 struct btrfs_file_extent_item); 246 268 if (btrfs_file_extent_type(l, fi) == 247 269 BTRFS_FILE_EXTENT_INLINE) { 248 - pr_info("\t\tinline extent data size %u\n", 249 - btrfs_file_extent_inline_len(l, i, fi)); 270 + pr_info("\t\tinline extent data size %llu\n", 271 + btrfs_file_extent_ram_bytes(l, fi)); 250 272 break; 251 273 } 252 274 pr_info("\t\textent data disk bytenr %llu nr %llu\n", ··· 258 280 btrfs_file_extent_ram_bytes(l, fi)); 259 281 break; 260 282 case BTRFS_EXTENT_REF_V0_KEY: 261 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 262 - print_extent_ref_v0(l, i); 263 - #else 264 - BUG(); 265 - #endif 283 + btrfs_print_v0_err(fs_info); 284 + btrfs_handle_fs_error(fs_info, -EINVAL, NULL); 266 285 break; 267 286 case BTRFS_BLOCK_GROUP_ITEM_KEY: 268 287 bi = btrfs_item_ptr(l, i,

+156 -114

fs/btrfs/qgroup.c

··· 530 530 fs_info->qgroup_ulist = NULL; 531 531 } 532 532 533 - static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 534 - struct btrfs_root *quota_root, 535 - u64 src, u64 dst) 533 + static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, 534 + u64 dst) 536 535 { 537 536 int ret; 537 + struct btrfs_root *quota_root = trans->fs_info->quota_root; 538 538 struct btrfs_path *path; 539 539 struct btrfs_key key; 540 540 ··· 554 554 return ret; 555 555 } 556 556 557 - static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 558 - struct btrfs_root *quota_root, 559 - u64 src, u64 dst) 557 + static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, 558 + u64 dst) 560 559 { 561 560 int ret; 561 + struct btrfs_root *quota_root = trans->fs_info->quota_root; 562 562 struct btrfs_path *path; 563 563 struct btrfs_key key; 564 564 ··· 653 653 return ret; 654 654 } 655 655 656 - static int del_qgroup_item(struct btrfs_trans_handle *trans, 657 - struct btrfs_root *quota_root, u64 qgroupid) 656 + static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid) 658 657 { 659 658 int ret; 659 + struct btrfs_root *quota_root = trans->fs_info->quota_root; 660 660 struct btrfs_path *path; 661 661 struct btrfs_key key; 662 662 ··· 700 700 } 701 701 702 702 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 703 - struct btrfs_root *root, 704 703 struct btrfs_qgroup *qgroup) 705 704 { 705 + struct btrfs_root *quota_root = trans->fs_info->quota_root; 706 706 struct btrfs_path *path; 707 707 struct btrfs_key key; 708 708 struct extent_buffer *l; ··· 718 718 if (!path) 719 719 return -ENOMEM; 720 720 721 - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 721 + ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 722 722 if (ret > 0) 723 723 ret = -ENOENT; 724 724 ··· 742 742 } 743 743 744 744 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 745 - struct btrfs_root *root, 746 745 struct btrfs_qgroup *qgroup) 747 746 { 747 + struct btrfs_fs_info *fs_info = trans->fs_info; 748 + struct btrfs_root *quota_root = fs_info->quota_root; 748 749 struct btrfs_path *path; 749 750 struct btrfs_key key; 750 751 struct extent_buffer *l; ··· 753 752 int ret; 754 753 int slot; 755 754 756 - if (btrfs_is_testing(root->fs_info)) 755 + if (btrfs_is_testing(fs_info)) 757 756 return 0; 758 757 759 758 key.objectid = 0; ··· 764 763 if (!path) 765 764 return -ENOMEM; 766 765 767 - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 766 + ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 768 767 if (ret > 0) 769 768 ret = -ENOENT; 770 769 ··· 787 786 return ret; 788 787 } 789 788 790 - static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 791 - struct btrfs_fs_info *fs_info, 792 - struct btrfs_root *root) 789 + static int update_qgroup_status_item(struct btrfs_trans_handle *trans) 793 790 { 791 + struct btrfs_fs_info *fs_info = trans->fs_info; 792 + struct btrfs_root *quota_root = fs_info->quota_root; 794 793 struct btrfs_path *path; 795 794 struct btrfs_key key; 796 795 struct extent_buffer *l; ··· 806 805 if (!path) 807 806 return -ENOMEM; 808 807 809 - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 808 + ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 810 809 if (ret > 0) 811 810 ret = -ENOENT; 812 811 ··· 876 875 return ret; 877 876 } 878 877 879 - int btrfs_quota_enable(struct btrfs_trans_handle *trans, 880 - struct btrfs_fs_info *fs_info) 878 + int btrfs_quota_enable(struct btrfs_fs_info *fs_info) 881 879 { 882 880 struct btrfs_root *quota_root; 883 881 struct btrfs_root *tree_root = fs_info->tree_root; ··· 886 886 struct btrfs_key key; 887 887 struct btrfs_key found_key; 888 888 struct btrfs_qgroup *qgroup = NULL; 889 + struct btrfs_trans_handle *trans = NULL; 889 890 int ret = 0; 890 891 int slot; 891 892 ··· 894 893 if (fs_info->quota_root) 895 894 goto out; 896 895 896 + /* 897 + * 1 for quota root item 898 + * 1 for BTRFS_QGROUP_STATUS item 899 + * 900 + * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items 901 + * per subvolume. However those are not currently reserved since it 902 + * would be a lot of overkill. 903 + */ 904 + trans = btrfs_start_transaction(tree_root, 2); 905 + if (IS_ERR(trans)) { 906 + ret = PTR_ERR(trans); 907 + trans = NULL; 908 + goto out; 909 + } 910 + 897 911 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 898 912 if (!fs_info->qgroup_ulist) { 899 913 ret = -ENOMEM; 914 + btrfs_abort_transaction(trans, ret); 900 915 goto out; 901 916 } 902 917 ··· 923 906 BTRFS_QUOTA_TREE_OBJECTID); 924 907 if (IS_ERR(quota_root)) { 925 908 ret = PTR_ERR(quota_root); 909 + btrfs_abort_transaction(trans, ret); 926 910 goto out; 927 911 } 928 912 929 913 path = btrfs_alloc_path(); 930 914 if (!path) { 931 915 ret = -ENOMEM; 916 + btrfs_abort_transaction(trans, ret); 932 917 goto out_free_root; 933 918 } 934 919 ··· 940 921 941 922 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 942 923 sizeof(*ptr)); 943 - if (ret) 924 + if (ret) { 925 + btrfs_abort_transaction(trans, ret); 944 926 goto out_free_path; 927 + } 945 928 946 929 leaf = path->nodes[0]; 947 930 ptr = btrfs_item_ptr(leaf, path->slots[0], ··· 965 944 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 966 945 if (ret > 0) 967 946 goto out_add_root; 968 - if (ret < 0) 947 + if (ret < 0) { 948 + btrfs_abort_transaction(trans, ret); 969 949 goto out_free_path; 970 - 950 + } 971 951 972 952 while (1) { 973 953 slot = path->slots[0]; ··· 978 956 if (found_key.type == BTRFS_ROOT_REF_KEY) { 979 957 ret = add_qgroup_item(trans, quota_root, 980 958 found_key.offset); 981 - if (ret) 959 + if (ret) { 960 + btrfs_abort_transaction(trans, ret); 982 961 goto out_free_path; 962 + } 983 963 984 964 qgroup = add_qgroup_rb(fs_info, found_key.offset); 985 965 if (IS_ERR(qgroup)) { 986 966 ret = PTR_ERR(qgroup); 967 + btrfs_abort_transaction(trans, ret); 987 968 goto out_free_path; 988 969 } 989 970 } 990 971 ret = btrfs_next_item(tree_root, path); 991 - if (ret < 0) 972 + if (ret < 0) { 973 + btrfs_abort_transaction(trans, ret); 992 974 goto out_free_path; 975 + } 993 976 if (ret) 994 977 break; 995 978 } ··· 1002 975 out_add_root: 1003 976 btrfs_release_path(path); 1004 977 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 1005 - if (ret) 978 + if (ret) { 979 + btrfs_abort_transaction(trans, ret); 1006 980 goto out_free_path; 981 + } 1007 982 1008 983 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 1009 984 if (IS_ERR(qgroup)) { 1010 985 ret = PTR_ERR(qgroup); 986 + btrfs_abort_transaction(trans, ret); 1011 987 goto out_free_path; 1012 988 } 1013 989 spin_lock(&fs_info->qgroup_lock); 1014 990 fs_info->quota_root = quota_root; 1015 991 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1016 992 spin_unlock(&fs_info->qgroup_lock); 993 + 994 + ret = btrfs_commit_transaction(trans); 995 + if (ret) { 996 + trans = NULL; 997 + goto out_free_path; 998 + } 999 + 1017 1000 ret = qgroup_rescan_init(fs_info, 0, 1); 1018 1001 if (!ret) { 1019 1002 qgroup_rescan_zero_tracking(fs_info); ··· 1043 1006 if (ret) { 1044 1007 ulist_free(fs_info->qgroup_ulist); 1045 1008 fs_info->qgroup_ulist = NULL; 1009 + if (trans) 1010 + btrfs_end_transaction(trans); 1046 1011 } 1047 1012 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1048 1013 return ret; 1049 1014 } 1050 1015 1051 - int btrfs_quota_disable(struct btrfs_trans_handle *trans, 1052 - struct btrfs_fs_info *fs_info) 1016 + int btrfs_quota_disable(struct btrfs_fs_info *fs_info) 1053 1017 { 1054 1018 struct btrfs_root *quota_root; 1019 + struct btrfs_trans_handle *trans = NULL; 1055 1020 int ret = 0; 1056 1021 1057 1022 mutex_lock(&fs_info->qgroup_ioctl_lock); 1058 1023 if (!fs_info->quota_root) 1059 1024 goto out; 1025 + 1026 + /* 1027 + * 1 For the root item 1028 + * 1029 + * We should also reserve enough items for the quota tree deletion in 1030 + * btrfs_clean_quota_tree but this is not done. 1031 + */ 1032 + trans = btrfs_start_transaction(fs_info->tree_root, 1); 1033 + if (IS_ERR(trans)) { 1034 + ret = PTR_ERR(trans); 1035 + goto out; 1036 + } 1037 + 1060 1038 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1061 1039 btrfs_qgroup_wait_for_completion(fs_info, false); 1062 1040 spin_lock(&fs_info->qgroup_lock); ··· 1083 1031 btrfs_free_qgroup_config(fs_info); 1084 1032 1085 1033 ret = btrfs_clean_quota_tree(trans, quota_root); 1086 - if (ret) 1087 - goto out; 1034 + if (ret) { 1035 + btrfs_abort_transaction(trans, ret); 1036 + goto end_trans; 1037 + } 1088 1038 1089 - ret = btrfs_del_root(trans, fs_info, &quota_root->root_key); 1090 - if (ret) 1091 - goto out; 1039 + ret = btrfs_del_root(trans, &quota_root->root_key); 1040 + if (ret) { 1041 + btrfs_abort_transaction(trans, ret); 1042 + goto end_trans; 1043 + } 1092 1044 1093 1045 list_del(&quota_root->dirty_list); 1094 1046 ··· 1104 1048 free_extent_buffer(quota_root->node); 1105 1049 free_extent_buffer(quota_root->commit_root); 1106 1050 kfree(quota_root); 1051 + 1052 + end_trans: 1053 + ret = btrfs_end_transaction(trans); 1107 1054 out: 1108 1055 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1109 1056 return ret; ··· 1236 1177 return ret; 1237 1178 } 1238 1179 1239 - int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1240 - struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1180 + int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1181 + u64 dst) 1241 1182 { 1183 + struct btrfs_fs_info *fs_info = trans->fs_info; 1242 1184 struct btrfs_root *quota_root; 1243 1185 struct btrfs_qgroup *parent; 1244 1186 struct btrfs_qgroup *member; ··· 1276 1216 } 1277 1217 } 1278 1218 1279 - ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1219 + ret = add_qgroup_relation_item(trans, src, dst); 1280 1220 if (ret) 1281 1221 goto out; 1282 1222 1283 - ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1223 + ret = add_qgroup_relation_item(trans, dst, src); 1284 1224 if (ret) { 1285 - del_qgroup_relation_item(trans, quota_root, src, dst); 1225 + del_qgroup_relation_item(trans, src, dst); 1286 1226 goto out; 1287 1227 } 1288 1228 ··· 1300 1240 return ret; 1301 1241 } 1302 1242 1303 - static int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1304 - struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1243 + static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1244 + u64 dst) 1305 1245 { 1246 + struct btrfs_fs_info *fs_info = trans->fs_info; 1306 1247 struct btrfs_root *quota_root; 1307 1248 struct btrfs_qgroup *parent; 1308 1249 struct btrfs_qgroup *member; ··· 1337 1276 ret = -ENOENT; 1338 1277 goto out; 1339 1278 exist: 1340 - ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1341 - err = del_qgroup_relation_item(trans, quota_root, dst, src); 1279 + ret = del_qgroup_relation_item(trans, src, dst); 1280 + err = del_qgroup_relation_item(trans, dst, src); 1342 1281 if (err && !ret) 1343 1282 ret = err; 1344 1283 ··· 1351 1290 return ret; 1352 1291 } 1353 1292 1354 - int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1355 - struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1293 + int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1294 + u64 dst) 1356 1295 { 1296 + struct btrfs_fs_info *fs_info = trans->fs_info; 1357 1297 int ret = 0; 1358 1298 1359 1299 mutex_lock(&fs_info->qgroup_ioctl_lock); 1360 - ret = __del_qgroup_relation(trans, fs_info, src, dst); 1300 + ret = __del_qgroup_relation(trans, src, dst); 1361 1301 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1362 1302 1363 1303 return ret; 1364 1304 } 1365 1305 1366 - int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1367 - struct btrfs_fs_info *fs_info, u64 qgroupid) 1306 + int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) 1368 1307 { 1308 + struct btrfs_fs_info *fs_info = trans->fs_info; 1369 1309 struct btrfs_root *quota_root; 1370 1310 struct btrfs_qgroup *qgroup; 1371 1311 int ret = 0; ··· 1398 1336 return ret; 1399 1337 } 1400 1338 1401 - int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1402 - struct btrfs_fs_info *fs_info, u64 qgroupid) 1339 + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) 1403 1340 { 1341 + struct btrfs_fs_info *fs_info = trans->fs_info; 1404 1342 struct btrfs_root *quota_root; 1405 1343 struct btrfs_qgroup *qgroup; 1406 1344 struct btrfs_qgroup_list *list; ··· 1424 1362 goto out; 1425 1363 } 1426 1364 } 1427 - ret = del_qgroup_item(trans, quota_root, qgroupid); 1365 + ret = del_qgroup_item(trans, qgroupid); 1428 1366 if (ret && ret != -ENOENT) 1429 1367 goto out; 1430 1368 1431 1369 while (!list_empty(&qgroup->groups)) { 1432 1370 list = list_first_entry(&qgroup->groups, 1433 1371 struct btrfs_qgroup_list, next_group); 1434 - ret = __del_qgroup_relation(trans, fs_info, 1435 - qgroupid, 1436 - list->group->qgroupid); 1372 + ret = __del_qgroup_relation(trans, qgroupid, 1373 + list->group->qgroupid); 1437 1374 if (ret) 1438 1375 goto out; 1439 1376 } ··· 1445 1384 return ret; 1446 1385 } 1447 1386 1448 - int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1449 - struct btrfs_fs_info *fs_info, u64 qgroupid, 1387 + int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, 1450 1388 struct btrfs_qgroup_limit *limit) 1451 1389 { 1390 + struct btrfs_fs_info *fs_info = trans->fs_info; 1452 1391 struct btrfs_root *quota_root; 1453 1392 struct btrfs_qgroup *qgroup; 1454 1393 int ret = 0; ··· 1512 1451 1513 1452 spin_unlock(&fs_info->qgroup_lock); 1514 1453 1515 - ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1454 + ret = update_qgroup_limit_item(trans, qgroup); 1516 1455 if (ret) { 1517 1456 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1518 1457 btrfs_info(fs_info, "unable to update quota limit for %llu", ··· 1580 1519 return 0; 1581 1520 } 1582 1521 1583 - int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1584 - struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1585 - gfp_t gfp_flag) 1522 + int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, 1523 + u64 num_bytes, gfp_t gfp_flag) 1586 1524 { 1525 + struct btrfs_fs_info *fs_info = trans->fs_info; 1587 1526 struct btrfs_qgroup_extent_record *record; 1588 1527 struct btrfs_delayed_ref_root *delayed_refs; 1589 1528 int ret; ··· 1591 1530 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1592 1531 || bytenr == 0 || num_bytes == 0) 1593 1532 return 0; 1594 - if (WARN_ON(trans == NULL)) 1595 - return -EINVAL; 1596 1533 record = kmalloc(sizeof(*record), gfp_flag); 1597 1534 if (!record) 1598 1535 return -ENOMEM; ··· 1611 1552 } 1612 1553 1613 1554 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1614 - struct btrfs_fs_info *fs_info, 1615 1555 struct extent_buffer *eb) 1616 1556 { 1557 + struct btrfs_fs_info *fs_info = trans->fs_info; 1617 1558 int nr = btrfs_header_nritems(eb); 1618 1559 int i, extent_type, ret; 1619 1560 struct btrfs_key key; ··· 1643 1584 1644 1585 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1645 1586 1646 - ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1647 - num_bytes, GFP_NOFS); 1587 + ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes, 1588 + GFP_NOFS); 1648 1589 if (ret) 1649 1590 return ret; 1650 1591 } ··· 1714 1655 } 1715 1656 1716 1657 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1717 - struct btrfs_root *root, 1718 1658 struct extent_buffer *root_eb, 1719 1659 u64 root_gen, int root_level) 1720 1660 { 1721 - struct btrfs_fs_info *fs_info = root->fs_info; 1661 + struct btrfs_fs_info *fs_info = trans->fs_info; 1722 1662 int ret = 0; 1723 1663 int level; 1724 1664 struct extent_buffer *eb = root_eb; ··· 1736 1678 } 1737 1679 1738 1680 if (root_level == 0) { 1739 - ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1681 + ret = btrfs_qgroup_trace_leaf_items(trans, root_eb); 1740 1682 goto out; 1741 1683 } 1742 1684 ··· 1794 1736 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1795 1737 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1796 1738 1797 - ret = btrfs_qgroup_trace_extent(trans, fs_info, 1798 - child_bytenr, 1739 + ret = btrfs_qgroup_trace_extent(trans, child_bytenr, 1799 1740 fs_info->nodesize, 1800 1741 GFP_NOFS); 1801 1742 if (ret) ··· 1802 1745 } 1803 1746 1804 1747 if (level == 0) { 1805 - ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1806 - path->nodes[level]); 1748 + ret = btrfs_qgroup_trace_leaf_items(trans, 1749 + path->nodes[level]); 1807 1750 if (ret) 1808 1751 goto out; 1809 1752 ··· 2038 1981 return is_fstree(unode->val); 2039 1982 } 2040 1983 2041 - int 2042 - btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 2043 - struct btrfs_fs_info *fs_info, 2044 - u64 bytenr, u64 num_bytes, 2045 - struct ulist *old_roots, struct ulist *new_roots) 1984 + int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, 1985 + u64 num_bytes, struct ulist *old_roots, 1986 + struct ulist *new_roots) 2046 1987 { 1988 + struct btrfs_fs_info *fs_info = trans->fs_info; 2047 1989 struct ulist *qgroups = NULL; 2048 1990 struct ulist *tmp = NULL; 2049 1991 u64 seq; ··· 2172 2116 ulist_del(record->old_roots, qgroup_to_skip, 2173 2117 0); 2174 2118 } 2175 - ret = btrfs_qgroup_account_extent(trans, fs_info, 2176 - record->bytenr, record->num_bytes, 2177 - record->old_roots, new_roots); 2119 + ret = btrfs_qgroup_account_extent(trans, record->bytenr, 2120 + record->num_bytes, 2121 + record->old_roots, 2122 + new_roots); 2178 2123 record->old_roots = NULL; 2179 2124 new_roots = NULL; 2180 2125 } ··· 2193 2136 /* 2194 2137 * called from commit_transaction. Writes all changed qgroups to disk. 2195 2138 */ 2196 - int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2197 - struct btrfs_fs_info *fs_info) 2139 + int btrfs_run_qgroups(struct btrfs_trans_handle *trans) 2198 2140 { 2141 + struct btrfs_fs_info *fs_info = trans->fs_info; 2199 2142 struct btrfs_root *quota_root = fs_info->quota_root; 2200 2143 int ret = 0; 2201 2144 ··· 2209 2152 struct btrfs_qgroup, dirty); 2210 2153 list_del_init(&qgroup->dirty); 2211 2154 spin_unlock(&fs_info->qgroup_lock); 2212 - ret = update_qgroup_info_item(trans, quota_root, qgroup); 2155 + ret = update_qgroup_info_item(trans, qgroup); 2213 2156 if (ret) 2214 2157 fs_info->qgroup_flags |= 2215 2158 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2216 - ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2159 + ret = update_qgroup_limit_item(trans, qgroup); 2217 2160 if (ret) 2218 2161 fs_info->qgroup_flags |= 2219 2162 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; ··· 2225 2168 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2226 2169 spin_unlock(&fs_info->qgroup_lock); 2227 2170 2228 - ret = update_qgroup_status_item(trans, fs_info, quota_root); 2171 + ret = update_qgroup_status_item(trans); 2229 2172 if (ret) 2230 2173 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2231 2174 ··· 2238 2181 * cause a transaction abort so we take extra care here to only error 2239 2182 * when a readonly fs is a reasonable outcome. 2240 2183 */ 2241 - int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2242 - struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2243 - struct btrfs_qgroup_inherit *inherit) 2184 + int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, 2185 + u64 objectid, struct btrfs_qgroup_inherit *inherit) 2244 2186 { 2245 2187 int ret = 0; 2246 2188 int i; 2247 2189 u64 *i_qgroups; 2190 + struct btrfs_fs_info *fs_info = trans->fs_info; 2248 2191 struct btrfs_root *quota_root = fs_info->quota_root; 2249 2192 struct btrfs_qgroup *srcgroup; 2250 2193 struct btrfs_qgroup *dstgroup; ··· 2286 2229 if (ret) 2287 2230 goto out; 2288 2231 2289 - if (srcid) { 2290 - struct btrfs_root *srcroot; 2291 - struct btrfs_key srckey; 2292 - 2293 - srckey.objectid = srcid; 2294 - srckey.type = BTRFS_ROOT_ITEM_KEY; 2295 - srckey.offset = (u64)-1; 2296 - srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2297 - if (IS_ERR(srcroot)) { 2298 - ret = PTR_ERR(srcroot); 2299 - goto out; 2300 - } 2301 - 2302 - level_size = fs_info->nodesize; 2303 - } 2304 - 2305 2232 /* 2306 2233 * add qgroup to all inherited groups 2307 2234 */ ··· 2294 2253 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2295 2254 if (*i_qgroups == 0) 2296 2255 continue; 2297 - ret = add_qgroup_relation_item(trans, quota_root, 2298 - objectid, *i_qgroups); 2256 + ret = add_qgroup_relation_item(trans, objectid, 2257 + *i_qgroups); 2299 2258 if (ret && ret != -EEXIST) 2300 2259 goto out; 2301 - ret = add_qgroup_relation_item(trans, quota_root, 2302 - *i_qgroups, objectid); 2260 + ret = add_qgroup_relation_item(trans, *i_qgroups, 2261 + objectid); 2303 2262 if (ret && ret != -EEXIST) 2304 2263 goto out; 2305 2264 } ··· 2322 2281 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2323 2282 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2324 2283 2325 - ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2284 + ret = update_qgroup_limit_item(trans, dstgroup); 2326 2285 if (ret) { 2327 2286 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2328 2287 btrfs_info(fs_info, ··· 2342 2301 * our counts don't go crazy, so at this point the only 2343 2302 * difference between the two roots should be the root node. 2344 2303 */ 2304 + level_size = fs_info->nodesize; 2345 2305 dstgroup->rfer = srcgroup->rfer; 2346 2306 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2347 2307 dstgroup->excl = level_size; ··· 2640 2598 * returns < 0 on error, 0 when more leafs are to be scanned. 2641 2599 * returns 1 when done. 2642 2600 */ 2643 - static int 2644 - qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2645 - struct btrfs_trans_handle *trans) 2601 + static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, 2602 + struct btrfs_path *path) 2646 2603 { 2604 + struct btrfs_fs_info *fs_info = trans->fs_info; 2647 2605 struct btrfs_key found; 2648 2606 struct extent_buffer *scratch_leaf = NULL; 2649 2607 struct ulist *roots = NULL; ··· 2711 2669 if (ret < 0) 2712 2670 goto out; 2713 2671 /* For rescan, just pass old_roots as NULL */ 2714 - ret = btrfs_qgroup_account_extent(trans, fs_info, 2715 - found.objectid, num_bytes, NULL, roots); 2672 + ret = btrfs_qgroup_account_extent(trans, found.objectid, 2673 + num_bytes, NULL, roots); 2716 2674 if (ret < 0) 2717 2675 goto out; 2718 2676 } ··· 2758 2716 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2759 2717 err = -EINTR; 2760 2718 } else { 2761 - err = qgroup_rescan_leaf(fs_info, path, trans); 2719 + err = qgroup_rescan_leaf(trans, path); 2762 2720 } 2763 2721 if (err > 0) 2764 2722 btrfs_commit_transaction(trans); ··· 2793 2751 err); 2794 2752 goto done; 2795 2753 } 2796 - ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2754 + ret = update_qgroup_status_item(trans); 2797 2755 if (ret < 0) { 2798 2756 err = ret; 2799 2757 btrfs_err(fs_info, "fail to update qgroup status: %d", err);

+17 -29

fs/btrfs/qgroup.h

··· 141 141 #define QGROUP_RELEASE (1<<1) 142 142 #define QGROUP_FREE (1<<2) 143 143 144 - int btrfs_quota_enable(struct btrfs_trans_handle *trans, 145 - struct btrfs_fs_info *fs_info); 146 - int btrfs_quota_disable(struct btrfs_trans_handle *trans, 147 - struct btrfs_fs_info *fs_info); 144 + int btrfs_quota_enable(struct btrfs_fs_info *fs_info); 145 + int btrfs_quota_disable(struct btrfs_fs_info *fs_info); 148 146 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); 149 147 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); 150 148 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 151 149 bool interruptible); 152 - int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 153 - struct btrfs_fs_info *fs_info, u64 src, u64 dst); 154 - int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 155 - struct btrfs_fs_info *fs_info, u64 src, u64 dst); 156 - int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 157 - struct btrfs_fs_info *fs_info, u64 qgroupid); 158 - int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 159 - struct btrfs_fs_info *fs_info, u64 qgroupid); 160 - int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 161 - struct btrfs_fs_info *fs_info, u64 qgroupid, 150 + int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 151 + u64 dst); 152 + int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 153 + u64 dst); 154 + int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid); 155 + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid); 156 + int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, 162 157 struct btrfs_qgroup_limit *limit); 163 158 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); 164 159 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); ··· 212 217 * Return <0 for error, like memory allocation failure or invalid parameter 213 218 * (NULL trans) 214 219 */ 215 - int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 216 - struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 217 - gfp_t gfp_flag); 220 + int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, 221 + u64 num_bytes, gfp_t gfp_flag); 218 222 219 223 /* 220 224 * Inform qgroup to trace all leaf items of data ··· 222 228 * Return <0 for error(ENOMEM) 223 229 */ 224 230 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 225 - struct btrfs_fs_info *fs_info, 226 231 struct extent_buffer *eb); 227 232 /* 228 233 * Inform qgroup to trace a whole subtree, including all its child tree ··· 234 241 * Return <0 for error(ENOMEM or tree search error) 235 242 */ 236 243 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 237 - struct btrfs_root *root, 238 244 struct extent_buffer *root_eb, 239 245 u64 root_gen, int root_level); 240 - int 241 - btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 242 - struct btrfs_fs_info *fs_info, 243 - u64 bytenr, u64 num_bytes, 244 - struct ulist *old_roots, struct ulist *new_roots); 246 + int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, 247 + u64 num_bytes, struct ulist *old_roots, 248 + struct ulist *new_roots); 245 249 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans); 246 - int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 247 - struct btrfs_fs_info *fs_info); 248 - int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 249 - struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 250 - struct btrfs_qgroup_inherit *inherit); 250 + int btrfs_run_qgroups(struct btrfs_trans_handle *trans); 251 + int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, 252 + u64 objectid, struct btrfs_qgroup_inherit *inherit); 251 253 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 252 254 u64 ref_root, u64 num_bytes, 253 255 enum btrfs_qgroup_rsv_type type);

+36 -77

fs/btrfs/raid56.c

··· 5 5 */ 6 6 7 7 #include <linux/sched.h> 8 - #include <linux/wait.h> 9 8 #include <linux/bio.h> 10 9 #include <linux/slab.h> 11 - #include <linux/buffer_head.h> 12 10 #include <linux/blkdev.h> 13 - #include <linux/random.h> 14 - #include <linux/iocontext.h> 15 - #include <linux/capability.h> 16 - #include <linux/ratelimit.h> 17 - #include <linux/kthread.h> 18 11 #include <linux/raid/pq.h> 19 12 #include <linux/hash.h> 20 13 #include <linux/list_sort.h> 21 14 #include <linux/raid/xor.h> 22 15 #include <linux/mm.h> 23 - #include <asm/div64.h> 24 16 #include "ctree.h" 25 - #include "extent_map.h" 26 17 #include "disk-io.h" 27 - #include "transaction.h" 28 - #include "print-tree.h" 29 18 #include "volumes.h" 30 19 #include "raid56.h" 31 20 #include "async-thread.h" 32 - #include "check-integrity.h" 33 - #include "rcu-string.h" 34 21 35 22 /* set when additional merges to this rbio are not allowed */ 36 23 #define RBIO_RMW_LOCKED_BIT 1 ··· 162 175 static noinline void finish_rmw(struct btrfs_raid_bio *rbio); 163 176 static void rmw_work(struct btrfs_work *work); 164 177 static void read_rebuild_work(struct btrfs_work *work); 165 - static void async_rmw_stripe(struct btrfs_raid_bio *rbio); 166 - static void async_read_rebuild(struct btrfs_raid_bio *rbio); 167 178 static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio); 168 179 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed); 169 180 static void __free_raid_bio(struct btrfs_raid_bio *rbio); ··· 170 185 171 186 static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, 172 187 int need_check); 173 - static void async_scrub_parity(struct btrfs_raid_bio *rbio); 188 + static void scrub_parity_work(struct btrfs_work *work); 189 + 190 + static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func) 191 + { 192 + btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL); 193 + btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); 194 + } 174 195 175 196 /* 176 197 * the stripe hash table is used for locking, and to collect ··· 251 260 s = kmap(rbio->bio_pages[i]); 252 261 d = kmap(rbio->stripe_pages[i]); 253 262 254 - memcpy(d, s, PAGE_SIZE); 263 + copy_page(d, s); 255 264 256 265 kunmap(rbio->bio_pages[i]); 257 266 kunmap(rbio->stripe_pages[i]); ··· 507 516 } 508 517 509 518 /* 510 - * returns true if the bio list inside this rbio 511 - * covers an entire stripe (no rmw required). 512 - * Must be called with the bio list lock held, or 513 - * at a time when you know it is impossible to add 514 - * new bios into the list 519 + * Returns true if the bio list inside this rbio covers an entire stripe (no 520 + * rmw required). 515 521 */ 516 - static int __rbio_is_full(struct btrfs_raid_bio *rbio) 517 - { 518 - unsigned long size = rbio->bio_list_bytes; 519 - int ret = 1; 520 - 521 - if (size != rbio->nr_data * rbio->stripe_len) 522 - ret = 0; 523 - 524 - BUG_ON(size > rbio->nr_data * rbio->stripe_len); 525 - return ret; 526 - } 527 - 528 522 static int rbio_is_full(struct btrfs_raid_bio *rbio) 529 523 { 530 524 unsigned long flags; 531 - int ret; 525 + unsigned long size = rbio->bio_list_bytes; 526 + int ret = 1; 532 527 533 528 spin_lock_irqsave(&rbio->bio_list_lock, flags); 534 - ret = __rbio_is_full(rbio); 529 + if (size != rbio->nr_data * rbio->stripe_len) 530 + ret = 0; 531 + BUG_ON(size > rbio->nr_data * rbio->stripe_len); 535 532 spin_unlock_irqrestore(&rbio->bio_list_lock, flags); 533 + 536 534 return ret; 537 535 } 538 536 ··· 792 812 spin_unlock_irqrestore(&h->lock, flags); 793 813 794 814 if (next->operation == BTRFS_RBIO_READ_REBUILD) 795 - async_read_rebuild(next); 815 + start_async_work(next, read_rebuild_work); 796 816 else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) { 797 817 steal_rbio(rbio, next); 798 - async_read_rebuild(next); 818 + start_async_work(next, read_rebuild_work); 799 819 } else if (next->operation == BTRFS_RBIO_WRITE) { 800 820 steal_rbio(rbio, next); 801 - async_rmw_stripe(next); 821 + start_async_work(next, rmw_work); 802 822 } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { 803 823 steal_rbio(rbio, next); 804 - async_scrub_parity(next); 824 + start_async_work(next, scrub_parity_work); 805 825 } 806 826 807 827 goto done_nolock; ··· 1255 1275 pointers); 1256 1276 } else { 1257 1277 /* raid5 */ 1258 - memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); 1278 + copy_page(pointers[nr_data], pointers[0]); 1259 1279 run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 1260 1280 } 1261 1281 ··· 1323 1343 1324 1344 bio->bi_private = rbio; 1325 1345 bio->bi_end_io = raid_write_end_io; 1326 - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 1346 + bio->bi_opf = REQ_OP_WRITE; 1327 1347 1328 1348 submit_bio(bio); 1329 1349 } ··· 1488 1508 rbio_orig_end_io(rbio, BLK_STS_IOERR); 1489 1509 } 1490 1510 1491 - static void async_rmw_stripe(struct btrfs_raid_bio *rbio) 1492 - { 1493 - btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL); 1494 - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); 1495 - } 1496 - 1497 - static void async_read_rebuild(struct btrfs_raid_bio *rbio) 1498 - { 1499 - btrfs_init_work(&rbio->work, btrfs_rmw_helper, 1500 - read_rebuild_work, NULL, NULL); 1501 - 1502 - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); 1503 - } 1504 - 1505 1511 /* 1506 1512 * the stripe must be locked by the caller. It will 1507 1513 * unlock after all the writes are done ··· 1565 1599 1566 1600 bio->bi_private = rbio; 1567 1601 bio->bi_end_io = raid_rmw_end_io; 1568 - bio_set_op_attrs(bio, REQ_OP_READ, 0); 1602 + bio->bi_opf = REQ_OP_READ; 1569 1603 1570 1604 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 1571 1605 ··· 1618 1652 1619 1653 ret = lock_stripe_add(rbio); 1620 1654 if (ret == 0) 1621 - async_rmw_stripe(rbio); 1655 + start_async_work(rbio, rmw_work); 1622 1656 return 0; 1623 1657 } 1624 1658 ··· 1686 1720 list_del_init(&cur->plug_list); 1687 1721 1688 1722 if (rbio_is_full(cur)) { 1723 + int ret; 1724 + 1689 1725 /* we have a full stripe, send it down */ 1690 - full_stripe_write(cur); 1726 + ret = full_stripe_write(cur); 1727 + BUG_ON(ret); 1691 1728 continue; 1692 1729 } 1693 1730 if (last) { ··· 1910 1941 BUG_ON(failb != -1); 1911 1942 pstripe: 1912 1943 /* Copy parity block into failed block to start with */ 1913 - memcpy(pointers[faila], 1914 - pointers[rbio->nr_data], 1915 - PAGE_SIZE); 1944 + copy_page(pointers[faila], pointers[rbio->nr_data]); 1916 1945 1917 1946 /* rearrange the pointer array */ 1918 1947 p = pointers[faila]; ··· 2112 2145 2113 2146 bio->bi_private = rbio; 2114 2147 bio->bi_end_io = raid_recover_end_io; 2115 - bio_set_op_attrs(bio, REQ_OP_READ, 0); 2148 + bio->bi_opf = REQ_OP_READ; 2116 2149 2117 2150 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 2118 2151 ··· 2415 2448 pointers); 2416 2449 } else { 2417 2450 /* raid5 */ 2418 - memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); 2451 + copy_page(pointers[nr_data], pointers[0]); 2419 2452 run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 2420 2453 } 2421 2454 ··· 2423 2456 p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 2424 2457 parity = kmap(p); 2425 2458 if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) 2426 - memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE); 2459 + copy_page(parity, pointers[rbio->scrubp]); 2427 2460 else 2428 2461 /* Parity is right, needn't writeback */ 2429 2462 bitmap_clear(rbio->dbitmap, pagenr, 1); ··· 2484 2517 2485 2518 bio->bi_private = rbio; 2486 2519 bio->bi_end_io = raid_write_end_io; 2487 - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 2520 + bio->bi_opf = REQ_OP_WRITE; 2488 2521 2489 2522 submit_bio(bio); 2490 2523 } ··· 2666 2699 2667 2700 bio->bi_private = rbio; 2668 2701 bio->bi_end_io = raid56_parity_scrub_end_io; 2669 - bio_set_op_attrs(bio, REQ_OP_READ, 0); 2702 + bio->bi_opf = REQ_OP_READ; 2670 2703 2671 2704 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 2672 2705 ··· 2695 2728 raid56_parity_scrub_stripe(rbio); 2696 2729 } 2697 2730 2698 - static void async_scrub_parity(struct btrfs_raid_bio *rbio) 2699 - { 2700 - btrfs_init_work(&rbio->work, btrfs_rmw_helper, 2701 - scrub_parity_work, NULL, NULL); 2702 - 2703 - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); 2704 - } 2705 - 2706 2731 void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) 2707 2732 { 2708 2733 if (!lock_stripe_add(rbio)) 2709 - async_scrub_parity(rbio); 2734 + start_async_work(rbio, scrub_parity_work); 2710 2735 } 2711 2736 2712 2737 /* The following code is used for dev replace of a missing RAID 5/6 device. */ ··· 2740 2781 void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio) 2741 2782 { 2742 2783 if (!lock_stripe_add(rbio)) 2743 - async_read_rebuild(rbio); 2784 + start_async_work(rbio, read_rebuild_work); 2744 2785 }

+1 -2

fs/btrfs/reada.c

··· 7 7 #include <linux/pagemap.h> 8 8 #include <linux/writeback.h> 9 9 #include <linux/blkdev.h> 10 - #include <linux/rbtree.h> 11 10 #include <linux/slab.h> 12 11 #include <linux/workqueue.h> 13 12 #include "ctree.h" ··· 354 355 dev = bbio->stripes[nzones].dev; 355 356 356 357 /* cannot read ahead on missing device. */ 357 - if (!dev->bdev) 358 + if (!dev->bdev) 358 359 continue; 359 360 360 361 zone = reada_find_zone(dev, logical, bbio);

+42 -174

fs/btrfs/relocation.c

··· 586 586 return btrfs_get_fs_root(fs_info, &key, false); 587 587 } 588 588 589 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 590 - static noinline_for_stack 591 - struct btrfs_root *find_tree_root(struct reloc_control *rc, 592 - struct extent_buffer *leaf, 593 - struct btrfs_extent_ref_v0 *ref0) 594 - { 595 - struct btrfs_root *root; 596 - u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 597 - u64 generation = btrfs_ref_generation_v0(leaf, ref0); 598 - 599 - BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 600 - 601 - root = read_fs_root(rc->extent_root->fs_info, root_objectid); 602 - BUG_ON(IS_ERR(root)); 603 - 604 - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && 605 - generation != btrfs_root_generation(&root->root_item)) 606 - return NULL; 607 - 608 - return root; 609 - } 610 - #endif 611 - 612 589 static noinline_for_stack 613 590 int find_inline_backref(struct extent_buffer *leaf, int slot, 614 591 unsigned long *ptr, unsigned long *end) ··· 598 621 btrfs_item_key_to_cpu(leaf, &key, slot); 599 622 600 623 item_size = btrfs_item_size_nr(leaf, slot); 601 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 602 624 if (item_size < sizeof(*ei)) { 603 - WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 625 + btrfs_print_v0_err(leaf->fs_info); 626 + btrfs_handle_fs_error(leaf->fs_info, -EINVAL, NULL); 604 627 return 1; 605 628 } 606 - #endif 607 629 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 608 630 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 609 631 BTRFS_EXTENT_FLAG_TREE_BLOCK)); ··· 768 792 type = btrfs_get_extent_inline_ref_type(eb, iref, 769 793 BTRFS_REF_TYPE_BLOCK); 770 794 if (type == BTRFS_REF_TYPE_INVALID) { 771 - err = -EINVAL; 795 + err = -EUCLEAN; 772 796 goto out; 773 797 } 774 798 key.type = type; ··· 787 811 goto next; 788 812 } 789 813 790 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 791 - if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 792 - key.type == BTRFS_EXTENT_REF_V0_KEY) { 793 - if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 794 - struct btrfs_extent_ref_v0 *ref0; 795 - ref0 = btrfs_item_ptr(eb, path1->slots[0], 796 - struct btrfs_extent_ref_v0); 797 - if (key.objectid == key.offset) { 798 - root = find_tree_root(rc, eb, ref0); 799 - if (root && !should_ignore_root(root)) 800 - cur->root = root; 801 - else 802 - list_add(&cur->list, &useless); 803 - break; 804 - } 805 - if (is_cowonly_root(btrfs_ref_root_v0(eb, 806 - ref0))) 807 - cur->cowonly = 1; 808 - } 809 - #else 810 - ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); 811 814 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 812 - #endif 813 815 if (key.objectid == key.offset) { 814 816 /* 815 817 * only root blocks of reloc trees use ··· 830 876 edge->node[UPPER] = upper; 831 877 832 878 goto next; 879 + } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { 880 + err = -EINVAL; 881 + btrfs_print_v0_err(rc->extent_root->fs_info); 882 + btrfs_handle_fs_error(rc->extent_root->fs_info, err, 883 + NULL); 884 + goto out; 833 885 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 834 886 goto next; 835 887 } ··· 1281 1321 struct mapping_node *node = NULL; 1282 1322 struct reloc_control *rc = fs_info->reloc_ctl; 1283 1323 1284 - spin_lock(&rc->reloc_root_tree.lock); 1285 - rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1286 - root->node->start); 1287 - if (rb_node) { 1288 - node = rb_entry(rb_node, struct mapping_node, rb_node); 1289 - rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1324 + if (rc) { 1325 + spin_lock(&rc->reloc_root_tree.lock); 1326 + rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1327 + root->node->start); 1328 + if (rb_node) { 1329 + node = rb_entry(rb_node, struct mapping_node, rb_node); 1330 + rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1331 + } 1332 + spin_unlock(&rc->reloc_root_tree.lock); 1333 + if (!node) 1334 + return; 1335 + BUG_ON((struct btrfs_root *)node->data != root); 1290 1336 } 1291 - spin_unlock(&rc->reloc_root_tree.lock); 1292 - 1293 - if (!node) 1294 - return; 1295 - BUG_ON((struct btrfs_root *)node->data != root); 1296 1337 1297 1338 spin_lock(&fs_info->trans_lock); 1298 1339 list_del_init(&root->root_list); ··· 1879 1918 * and tree block numbers, if current trans doesn't free 1880 1919 * data reloc tree inode. 1881 1920 */ 1882 - ret = btrfs_qgroup_trace_subtree(trans, src, parent, 1921 + ret = btrfs_qgroup_trace_subtree(trans, parent, 1883 1922 btrfs_header_generation(parent), 1884 1923 btrfs_header_level(parent)); 1885 1924 if (ret < 0) 1886 1925 break; 1887 - ret = btrfs_qgroup_trace_subtree(trans, dest, 1888 - path->nodes[level], 1926 + ret = btrfs_qgroup_trace_subtree(trans, path->nodes[level], 1889 1927 btrfs_header_generation(path->nodes[level]), 1890 1928 btrfs_header_level(path->nodes[level])); 1891 1929 if (ret < 0) ··· 3293 3333 return 0; 3294 3334 } 3295 3335 3296 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3297 - static int get_ref_objectid_v0(struct reloc_control *rc, 3298 - struct btrfs_path *path, 3299 - struct btrfs_key *extent_key, 3300 - u64 *ref_objectid, int *path_change) 3301 - { 3302 - struct btrfs_key key; 3303 - struct extent_buffer *leaf; 3304 - struct btrfs_extent_ref_v0 *ref0; 3305 - int ret; 3306 - int slot; 3307 - 3308 - leaf = path->nodes[0]; 3309 - slot = path->slots[0]; 3310 - while (1) { 3311 - if (slot >= btrfs_header_nritems(leaf)) { 3312 - ret = btrfs_next_leaf(rc->extent_root, path); 3313 - if (ret < 0) 3314 - return ret; 3315 - BUG_ON(ret > 0); 3316 - leaf = path->nodes[0]; 3317 - slot = path->slots[0]; 3318 - if (path_change) 3319 - *path_change = 1; 3320 - } 3321 - btrfs_item_key_to_cpu(leaf, &key, slot); 3322 - if (key.objectid != extent_key->objectid) 3323 - return -ENOENT; 3324 - 3325 - if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 3326 - slot++; 3327 - continue; 3328 - } 3329 - ref0 = btrfs_item_ptr(leaf, slot, 3330 - struct btrfs_extent_ref_v0); 3331 - *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 3332 - break; 3333 - } 3334 - return 0; 3335 - } 3336 - #endif 3337 - 3338 3336 /* 3339 3337 * helper to add a tree block to the list. 3340 3338 * the major work is getting the generation and level of the block ··· 3325 3407 level = (int)extent_key->offset; 3326 3408 } 3327 3409 generation = btrfs_extent_generation(eb, ei); 3410 + } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) { 3411 + btrfs_print_v0_err(eb->fs_info); 3412 + btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL); 3413 + return -EINVAL; 3328 3414 } else { 3329 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3330 - u64 ref_owner; 3331 - int ret; 3332 - 3333 - BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3334 - ret = get_ref_objectid_v0(rc, path, extent_key, 3335 - &ref_owner, NULL); 3336 - if (ret < 0) 3337 - return ret; 3338 - BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3339 - level = (int)ref_owner; 3340 - /* FIXME: get real generation */ 3341 - generation = 0; 3342 - #else 3343 3415 BUG(); 3344 - #endif 3345 3416 } 3346 3417 3347 3418 btrfs_release_path(path); ··· 3470 3563 key.offset = 0; 3471 3564 3472 3565 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3473 - if (IS_ERR(inode) || is_bad_inode(inode)) { 3474 - if (!IS_ERR(inode)) 3475 - iput(inode); 3566 + if (IS_ERR(inode)) 3476 3567 return -ENOENT; 3477 - } 3478 3568 3479 3569 truncate: 3480 3570 ret = btrfs_check_trunc_cache_free_space(fs_info, ··· 3685 3781 eb = path->nodes[0]; 3686 3782 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3687 3783 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3688 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3689 - if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3690 - ptr = end; 3691 - else 3692 - #endif 3693 - ptr += sizeof(struct btrfs_extent_item); 3784 + ptr += sizeof(struct btrfs_extent_item); 3694 3785 3695 3786 while (ptr < end) { 3696 3787 iref = (struct btrfs_extent_inline_ref *)ptr; ··· 3700 3801 ret = find_data_references(rc, extent_key, 3701 3802 eb, dref, blocks); 3702 3803 } else { 3703 - ret = -EINVAL; 3804 + ret = -EUCLEAN; 3704 3805 btrfs_err(rc->extent_root->fs_info, 3705 3806 "extent %llu slot %d has an invalid inline ref type", 3706 3807 eb->start, path->slots[0]); ··· 3731 3832 if (key.objectid != extent_key->objectid) 3732 3833 break; 3733 3834 3734 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3735 - if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3736 - key.type == BTRFS_EXTENT_REF_V0_KEY) { 3737 - #else 3738 - BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3739 3835 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3740 - #endif 3741 3836 ret = __add_tree_block(rc, key.offset, blocksize, 3742 3837 blocks); 3743 3838 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { ··· 3739 3846 struct btrfs_extent_data_ref); 3740 3847 ret = find_data_references(rc, extent_key, 3741 3848 eb, dref, blocks); 3849 + } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { 3850 + btrfs_print_v0_err(eb->fs_info); 3851 + btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL); 3852 + ret = -EINVAL; 3742 3853 } else { 3743 3854 ret = 0; 3744 3855 } ··· 3981 4084 flags = btrfs_extent_flags(path->nodes[0], ei); 3982 4085 ret = check_extent_flags(flags); 3983 4086 BUG_ON(ret); 3984 - 4087 + } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) { 4088 + err = -EINVAL; 4089 + btrfs_print_v0_err(trans->fs_info); 4090 + btrfs_abort_transaction(trans, err); 4091 + break; 3985 4092 } else { 3986 - #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3987 - u64 ref_owner; 3988 - int path_change = 0; 3989 - 3990 - BUG_ON(item_size != 3991 - sizeof(struct btrfs_extent_item_v0)); 3992 - ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 3993 - &path_change); 3994 - if (ret < 0) { 3995 - err = ret; 3996 - break; 3997 - } 3998 - if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 3999 - flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 4000 - else 4001 - flags = BTRFS_EXTENT_FLAG_DATA; 4002 - 4003 - if (path_change) { 4004 - btrfs_release_path(path); 4005 - 4006 - path->search_commit_root = 1; 4007 - path->skip_locking = 1; 4008 - ret = btrfs_search_slot(NULL, rc->extent_root, 4009 - &key, path, 0, 0); 4010 - if (ret < 0) { 4011 - err = ret; 4012 - break; 4013 - } 4014 - BUG_ON(ret > 0); 4015 - } 4016 - #else 4017 4093 BUG(); 4018 - #endif 4019 4094 } 4020 4095 4021 4096 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { ··· 4038 4169 } 4039 4170 } 4040 4171 if (trans && progress && err == -ENOSPC) { 4041 - ret = btrfs_force_chunk_alloc(trans, fs_info, 4042 - rc->block_group->flags); 4172 + ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags); 4043 4173 if (ret == 1) { 4044 4174 err = 0; 4045 4175 progress = 0; ··· 4152 4284 key.type = BTRFS_INODE_ITEM_KEY; 4153 4285 key.offset = 0; 4154 4286 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 4155 - BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 4287 + BUG_ON(IS_ERR(inode)); 4156 4288 BTRFS_I(inode)->index_cnt = group->key.objectid; 4157 4289 4158 4290 err = btrfs_orphan_add(trans, BTRFS_I(inode)); ··· 4243 4375 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 4244 4376 BUG_ON(!rc->block_group); 4245 4377 4246 - ret = btrfs_inc_block_group_ro(fs_info, rc->block_group); 4378 + ret = btrfs_inc_block_group_ro(rc->block_group); 4247 4379 if (ret) { 4248 4380 err = ret; 4249 4381 goto out;

+10 -12

fs/btrfs/root-tree.c

··· 320 320 321 321 /* drop the root item for 'key' from the tree root */ 322 322 int btrfs_del_root(struct btrfs_trans_handle *trans, 323 - struct btrfs_fs_info *fs_info, const struct btrfs_key *key) 323 + const struct btrfs_key *key) 324 324 { 325 - struct btrfs_root *root = fs_info->tree_root; 325 + struct btrfs_root *root = trans->fs_info->tree_root; 326 326 struct btrfs_path *path; 327 327 int ret; 328 328 ··· 341 341 return ret; 342 342 } 343 343 344 - int btrfs_del_root_ref(struct btrfs_trans_handle *trans, 345 - struct btrfs_fs_info *fs_info, 346 - u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, 347 - const char *name, int name_len) 344 + int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, 345 + u64 ref_id, u64 dirid, u64 *sequence, const char *name, 346 + int name_len) 348 347 349 348 { 350 - struct btrfs_root *tree_root = fs_info->tree_root; 349 + struct btrfs_root *tree_root = trans->fs_info->tree_root; 351 350 struct btrfs_path *path; 352 351 struct btrfs_root_ref *ref; 353 352 struct extent_buffer *leaf; ··· 412 413 * 413 414 * Will return 0, -ENOMEM, or anything from the CoW path 414 415 */ 415 - int btrfs_add_root_ref(struct btrfs_trans_handle *trans, 416 - struct btrfs_fs_info *fs_info, 417 - u64 root_id, u64 ref_id, u64 dirid, u64 sequence, 418 - const char *name, int name_len) 416 + int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, 417 + u64 ref_id, u64 dirid, u64 sequence, const char *name, 418 + int name_len) 419 419 { 420 - struct btrfs_root *tree_root = fs_info->tree_root; 420 + struct btrfs_root *tree_root = trans->fs_info->tree_root; 421 421 struct btrfs_key key; 422 422 int ret; 423 423 struct btrfs_path *path;

+7 -672

fs/btrfs/scrub.c

··· 188 188 refcount_t refs; 189 189 }; 190 190 191 - struct scrub_fixup_nodatasum { 192 - struct scrub_ctx *sctx; 193 - struct btrfs_device *dev; 194 - u64 logical; 195 - struct btrfs_root *root; 196 - struct btrfs_work work; 197 - int mirror_num; 198 - }; 199 - 200 - struct scrub_nocow_inode { 201 - u64 inum; 202 - u64 offset; 203 - u64 root; 204 - struct list_head list; 205 - }; 206 - 207 - struct scrub_copy_nocow_ctx { 208 - struct scrub_ctx *sctx; 209 - u64 logical; 210 - u64 len; 211 - int mirror_num; 212 - u64 physical_for_dev_replace; 213 - struct list_head inodes; 214 - struct btrfs_work work; 215 - }; 216 - 217 191 struct scrub_warning { 218 192 struct btrfs_path *path; 219 193 u64 extent_item_size; ··· 206 232 207 233 static void scrub_pending_bio_inc(struct scrub_ctx *sctx); 208 234 static void scrub_pending_bio_dec(struct scrub_ctx *sctx); 209 - static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); 210 - static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); 211 235 static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); 212 236 static int scrub_setup_recheck_block(struct scrub_block *original_sblock, 213 237 struct scrub_block *sblocks_for_recheck); ··· 249 277 static void scrub_wr_submit(struct scrub_ctx *sctx); 250 278 static void scrub_wr_bio_end_io(struct bio *bio); 251 279 static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); 252 - static int write_page_nocow(struct scrub_ctx *sctx, 253 - u64 physical_for_dev_replace, struct page *page); 254 - static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, 255 - struct scrub_copy_nocow_ctx *ctx); 256 - static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 257 - int mirror_num, u64 physical_for_dev_replace); 258 - static void copy_nocow_pages_worker(struct btrfs_work *work); 259 280 static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 260 281 static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 261 282 static void scrub_put_ctx(struct scrub_ctx *sctx); ··· 518 553 out: 519 554 btrfs_put_block_group(bg_cache); 520 555 return ret; 521 - } 522 - 523 - /* 524 - * used for workers that require transaction commits (i.e., for the 525 - * NOCOW case) 526 - */ 527 - static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) 528 - { 529 - struct btrfs_fs_info *fs_info = sctx->fs_info; 530 - 531 - refcount_inc(&sctx->refs); 532 - /* 533 - * increment scrubs_running to prevent cancel requests from 534 - * completing as long as a worker is running. we must also 535 - * increment scrubs_paused to prevent deadlocking on pause 536 - * requests used for transactions commits (as the worker uses a 537 - * transaction context). it is safe to regard the worker 538 - * as paused for all matters practical. effectively, we only 539 - * avoid cancellation requests from completing. 540 - */ 541 - mutex_lock(&fs_info->scrub_lock); 542 - atomic_inc(&fs_info->scrubs_running); 543 - atomic_inc(&fs_info->scrubs_paused); 544 - mutex_unlock(&fs_info->scrub_lock); 545 - 546 - /* 547 - * check if @scrubs_running=@scrubs_paused condition 548 - * inside wait_event() is not an atomic operation. 549 - * which means we may inc/dec @scrub_running/paused 550 - * at any time. Let's wake up @scrub_pause_wait as 551 - * much as we can to let commit transaction blocked less. 552 - */ 553 - wake_up(&fs_info->scrub_pause_wait); 554 - 555 - atomic_inc(&sctx->workers_pending); 556 - } 557 - 558 - /* used for workers that require transaction commits */ 559 - static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) 560 - { 561 - struct btrfs_fs_info *fs_info = sctx->fs_info; 562 - 563 - /* 564 - * see scrub_pending_trans_workers_inc() why we're pretending 565 - * to be paused in the scrub counters 566 - */ 567 - mutex_lock(&fs_info->scrub_lock); 568 - atomic_dec(&fs_info->scrubs_running); 569 - atomic_dec(&fs_info->scrubs_paused); 570 - mutex_unlock(&fs_info->scrub_lock); 571 - atomic_dec(&sctx->workers_pending); 572 - wake_up(&fs_info->scrub_pause_wait); 573 - wake_up(&sctx->list_wait); 574 - scrub_put_ctx(sctx); 575 556 } 576 557 577 558 static void scrub_free_csums(struct scrub_ctx *sctx) ··· 793 882 btrfs_free_path(path); 794 883 } 795 884 796 - static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) 797 - { 798 - struct page *page = NULL; 799 - unsigned long index; 800 - struct scrub_fixup_nodatasum *fixup = fixup_ctx; 801 - int ret; 802 - int corrected = 0; 803 - struct btrfs_key key; 804 - struct inode *inode = NULL; 805 - struct btrfs_fs_info *fs_info; 806 - u64 end = offset + PAGE_SIZE - 1; 807 - struct btrfs_root *local_root; 808 - int srcu_index; 809 - 810 - key.objectid = root; 811 - key.type = BTRFS_ROOT_ITEM_KEY; 812 - key.offset = (u64)-1; 813 - 814 - fs_info = fixup->root->fs_info; 815 - srcu_index = srcu_read_lock(&fs_info->subvol_srcu); 816 - 817 - local_root = btrfs_read_fs_root_no_name(fs_info, &key); 818 - if (IS_ERR(local_root)) { 819 - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); 820 - return PTR_ERR(local_root); 821 - } 822 - 823 - key.type = BTRFS_INODE_ITEM_KEY; 824 - key.objectid = inum; 825 - key.offset = 0; 826 - inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); 827 - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); 828 - if (IS_ERR(inode)) 829 - return PTR_ERR(inode); 830 - 831 - index = offset >> PAGE_SHIFT; 832 - 833 - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 834 - if (!page) { 835 - ret = -ENOMEM; 836 - goto out; 837 - } 838 - 839 - if (PageUptodate(page)) { 840 - if (PageDirty(page)) { 841 - /* 842 - * we need to write the data to the defect sector. the 843 - * data that was in that sector is not in memory, 844 - * because the page was modified. we must not write the 845 - * modified page to that sector. 846 - * 847 - * TODO: what could be done here: wait for the delalloc 848 - * runner to write out that page (might involve 849 - * COW) and see whether the sector is still 850 - * referenced afterwards. 851 - * 852 - * For the meantime, we'll treat this error 853 - * incorrectable, although there is a chance that a 854 - * later scrub will find the bad sector again and that 855 - * there's no dirty page in memory, then. 856 - */ 857 - ret = -EIO; 858 - goto out; 859 - } 860 - ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE, 861 - fixup->logical, page, 862 - offset - page_offset(page), 863 - fixup->mirror_num); 864 - unlock_page(page); 865 - corrected = !ret; 866 - } else { 867 - /* 868 - * we need to get good data first. the general readpage path 869 - * will call repair_io_failure for us, we just have to make 870 - * sure we read the bad mirror. 871 - */ 872 - ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, 873 - EXTENT_DAMAGED); 874 - if (ret) { 875 - /* set_extent_bits should give proper error */ 876 - WARN_ON(ret > 0); 877 - if (ret > 0) 878 - ret = -EFAULT; 879 - goto out; 880 - } 881 - 882 - ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page, 883 - btrfs_get_extent, 884 - fixup->mirror_num); 885 - wait_on_page_locked(page); 886 - 887 - corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset, 888 - end, EXTENT_DAMAGED, 0, NULL); 889 - if (!corrected) 890 - clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, 891 - EXTENT_DAMAGED); 892 - } 893 - 894 - out: 895 - if (page) 896 - put_page(page); 897 - 898 - iput(inode); 899 - 900 - if (ret < 0) 901 - return ret; 902 - 903 - if (ret == 0 && corrected) { 904 - /* 905 - * we only need to call readpage for one of the inodes belonging 906 - * to this extent. so make iterate_extent_inodes stop 907 - */ 908 - return 1; 909 - } 910 - 911 - return -EIO; 912 - } 913 - 914 - static void scrub_fixup_nodatasum(struct btrfs_work *work) 915 - { 916 - struct btrfs_fs_info *fs_info; 917 - int ret; 918 - struct scrub_fixup_nodatasum *fixup; 919 - struct scrub_ctx *sctx; 920 - struct btrfs_trans_handle *trans = NULL; 921 - struct btrfs_path *path; 922 - int uncorrectable = 0; 923 - 924 - fixup = container_of(work, struct scrub_fixup_nodatasum, work); 925 - sctx = fixup->sctx; 926 - fs_info = fixup->root->fs_info; 927 - 928 - path = btrfs_alloc_path(); 929 - if (!path) { 930 - spin_lock(&sctx->stat_lock); 931 - ++sctx->stat.malloc_errors; 932 - spin_unlock(&sctx->stat_lock); 933 - uncorrectable = 1; 934 - goto out; 935 - } 936 - 937 - trans = btrfs_join_transaction(fixup->root); 938 - if (IS_ERR(trans)) { 939 - uncorrectable = 1; 940 - goto out; 941 - } 942 - 943 - /* 944 - * the idea is to trigger a regular read through the standard path. we 945 - * read a page from the (failed) logical address by specifying the 946 - * corresponding copynum of the failed sector. thus, that readpage is 947 - * expected to fail. 948 - * that is the point where on-the-fly error correction will kick in 949 - * (once it's finished) and rewrite the failed sector if a good copy 950 - * can be found. 951 - */ 952 - ret = iterate_inodes_from_logical(fixup->logical, fs_info, path, 953 - scrub_fixup_readpage, fixup, false); 954 - if (ret < 0) { 955 - uncorrectable = 1; 956 - goto out; 957 - } 958 - WARN_ON(ret != 1); 959 - 960 - spin_lock(&sctx->stat_lock); 961 - ++sctx->stat.corrected_errors; 962 - spin_unlock(&sctx->stat_lock); 963 - 964 - out: 965 - if (trans && !IS_ERR(trans)) 966 - btrfs_end_transaction(trans); 967 - if (uncorrectable) { 968 - spin_lock(&sctx->stat_lock); 969 - ++sctx->stat.uncorrectable_errors; 970 - spin_unlock(&sctx->stat_lock); 971 - btrfs_dev_replace_stats_inc( 972 - &fs_info->dev_replace.num_uncorrectable_read_errors); 973 - btrfs_err_rl_in_rcu(fs_info, 974 - "unable to fixup (nodatasum) error at logical %llu on dev %s", 975 - fixup->logical, rcu_str_deref(fixup->dev->name)); 976 - } 977 - 978 - btrfs_free_path(path); 979 - kfree(fixup); 980 - 981 - scrub_pending_trans_workers_dec(sctx); 982 - } 983 - 984 885 static inline void scrub_get_recover(struct scrub_recover *recover) 985 886 { 986 887 refcount_inc(&recover->refs); ··· 983 1260 984 1261 if (sctx->readonly) { 985 1262 ASSERT(!sctx->is_dev_replace); 986 - goto out; 987 - } 988 - 989 - /* 990 - * NOTE: Even for nodatasum case, it's still possible that it's a 991 - * compressed data extent, thus scrub_fixup_nodatasum(), which write 992 - * inode page cache onto disk, could cause serious data corruption. 993 - * 994 - * So here we could only read from disk, and hope our recovery could 995 - * reach disk before the newer write. 996 - */ 997 - if (0 && !is_metadata && !have_csum) { 998 - struct scrub_fixup_nodatasum *fixup_nodatasum; 999 - 1000 - WARN_ON(sctx->is_dev_replace); 1001 - 1002 - /* 1003 - * !is_metadata and !have_csum, this means that the data 1004 - * might not be COWed, that it might be modified 1005 - * concurrently. The general strategy to work on the 1006 - * commit root does not help in the case when COW is not 1007 - * used. 1008 - */ 1009 - fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); 1010 - if (!fixup_nodatasum) 1011 - goto did_not_correct_error; 1012 - fixup_nodatasum->sctx = sctx; 1013 - fixup_nodatasum->dev = dev; 1014 - fixup_nodatasum->logical = logical; 1015 - fixup_nodatasum->root = fs_info->extent_root; 1016 - fixup_nodatasum->mirror_num = failed_mirror_index + 1; 1017 - scrub_pending_trans_workers_inc(sctx); 1018 - btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, 1019 - scrub_fixup_nodatasum, NULL, NULL); 1020 - btrfs_queue_work(fs_info->scrub_workers, 1021 - &fixup_nodatasum->work); 1022 1263 goto out; 1023 1264 } 1024 1265 ··· 1553 1866 bio = btrfs_io_bio_alloc(1); 1554 1867 bio_set_dev(bio, page_bad->dev->bdev); 1555 1868 bio->bi_iter.bi_sector = page_bad->physical >> 9; 1556 - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 1869 + bio->bi_opf = REQ_OP_WRITE; 1557 1870 1558 1871 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); 1559 1872 if (PAGE_SIZE != ret) { ··· 1648 1961 bio->bi_end_io = scrub_wr_bio_end_io; 1649 1962 bio_set_dev(bio, sbio->dev->bdev); 1650 1963 bio->bi_iter.bi_sector = sbio->physical >> 9; 1651 - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 1964 + bio->bi_opf = REQ_OP_WRITE; 1652 1965 sbio->status = 0; 1653 1966 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 1654 1967 spage->physical_for_dev_replace || ··· 2048 2361 bio->bi_end_io = scrub_bio_end_io; 2049 2362 bio_set_dev(bio, sbio->dev->bdev); 2050 2363 bio->bi_iter.bi_sector = sbio->physical >> 9; 2051 - bio_set_op_attrs(bio, REQ_OP_READ, 0); 2364 + bio->bi_opf = REQ_OP_READ; 2052 2365 sbio->status = 0; 2053 2366 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 2054 2367 spage->physical || ··· 2487 2800 have_csum = scrub_find_csum(sctx, logical, csum); 2488 2801 if (have_csum == 0) 2489 2802 ++sctx->stat.no_csum; 2490 - if (0 && sctx->is_dev_replace && !have_csum) { 2491 - ret = copy_nocow_pages(sctx, logical, l, 2492 - mirror_num, 2493 - physical_for_dev_replace); 2494 - goto behind_scrub_pages; 2495 - } 2496 2803 } 2497 2804 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, 2498 2805 mirror_num, have_csum ? csum : NULL, 0, 2499 2806 physical_for_dev_replace); 2500 - behind_scrub_pages: 2501 2807 if (ret) 2502 2808 return ret; 2503 2809 len -= l; ··· 3543 3863 * -> btrfs_scrub_pause() 3544 3864 */ 3545 3865 scrub_pause_on(fs_info); 3546 - ret = btrfs_inc_block_group_ro(fs_info, cache); 3866 + ret = btrfs_inc_block_group_ro(cache); 3547 3867 if (!ret && is_dev_replace) { 3548 3868 /* 3549 3869 * If we are doing a device replace wait for any tasks ··· 3662 3982 if (!cache->removed && !cache->ro && cache->reserved == 0 && 3663 3983 btrfs_block_group_used(&cache->item) == 0) { 3664 3984 spin_unlock(&cache->lock); 3665 - spin_lock(&fs_info->unused_bgs_lock); 3666 - if (list_empty(&cache->bg_list)) { 3667 - btrfs_get_block_group(cache); 3668 - trace_btrfs_add_unused_block_group(cache); 3669 - list_add_tail(&cache->bg_list, 3670 - &fs_info->unused_bgs); 3671 - } 3672 - spin_unlock(&fs_info->unused_bgs_lock); 3985 + btrfs_mark_bg_unused(cache); 3673 3986 } else { 3674 3987 spin_unlock(&cache->lock); 3675 3988 } ··· 3745 4072 if (!fs_info->scrub_wr_completion_workers) 3746 4073 goto fail_scrub_wr_completion_workers; 3747 4074 3748 - fs_info->scrub_nocow_workers = 3749 - btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0); 3750 - if (!fs_info->scrub_nocow_workers) 3751 - goto fail_scrub_nocow_workers; 3752 4075 fs_info->scrub_parity_workers = 3753 4076 btrfs_alloc_workqueue(fs_info, "scrubparity", flags, 3754 4077 max_active, 2); ··· 3755 4086 return 0; 3756 4087 3757 4088 fail_scrub_parity_workers: 3758 - btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); 3759 - fail_scrub_nocow_workers: 3760 4089 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); 3761 4090 fail_scrub_wr_completion_workers: 3762 4091 btrfs_destroy_workqueue(fs_info->scrub_workers); ··· 3767 4100 if (--fs_info->scrub_workers_refcnt == 0) { 3768 4101 btrfs_destroy_workqueue(fs_info->scrub_workers); 3769 4102 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); 3770 - btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); 3771 4103 btrfs_destroy_workqueue(fs_info->scrub_parity_workers); 3772 4104 } 3773 4105 WARN_ON(fs_info->scrub_workers_refcnt < 0); ··· 3779 4113 struct scrub_ctx *sctx; 3780 4114 int ret; 3781 4115 struct btrfs_device *dev; 3782 - struct rcu_string *name; 3783 4116 3784 4117 if (btrfs_fs_closing(fs_info)) 3785 4118 return -EINVAL; ··· 3832 4167 if (!is_dev_replace && !readonly && 3833 4168 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { 3834 4169 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 3835 - rcu_read_lock(); 3836 - name = rcu_dereference(dev->name); 3837 - btrfs_err(fs_info, "scrub: device %s is not writable", 3838 - name->str); 3839 - rcu_read_unlock(); 4170 + btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable", 4171 + rcu_str_deref(dev->name)); 3840 4172 return -EROFS; 3841 4173 } 3842 4174 ··· 4020 4358 *extent_mirror_num = bbio->mirror_num; 4021 4359 *extent_dev = bbio->stripes[0].dev; 4022 4360 btrfs_put_bbio(bbio); 4023 - } 4024 - 4025 - static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 4026 - int mirror_num, u64 physical_for_dev_replace) 4027 - { 4028 - struct scrub_copy_nocow_ctx *nocow_ctx; 4029 - struct btrfs_fs_info *fs_info = sctx->fs_info; 4030 - 4031 - nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS); 4032 - if (!nocow_ctx) { 4033 - spin_lock(&sctx->stat_lock); 4034 - sctx->stat.malloc_errors++; 4035 - spin_unlock(&sctx->stat_lock); 4036 - return -ENOMEM; 4037 - } 4038 - 4039 - scrub_pending_trans_workers_inc(sctx); 4040 - 4041 - nocow_ctx->sctx = sctx; 4042 - nocow_ctx->logical = logical; 4043 - nocow_ctx->len = len; 4044 - nocow_ctx->mirror_num = mirror_num; 4045 - nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; 4046 - btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, 4047 - copy_nocow_pages_worker, NULL, NULL); 4048 - INIT_LIST_HEAD(&nocow_ctx->inodes); 4049 - btrfs_queue_work(fs_info->scrub_nocow_workers, 4050 - &nocow_ctx->work); 4051 - 4052 - return 0; 4053 - } 4054 - 4055 - static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx) 4056 - { 4057 - struct scrub_copy_nocow_ctx *nocow_ctx = ctx; 4058 - struct scrub_nocow_inode *nocow_inode; 4059 - 4060 - nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS); 4061 - if (!nocow_inode) 4062 - return -ENOMEM; 4063 - nocow_inode->inum = inum; 4064 - nocow_inode->offset = offset; 4065 - nocow_inode->root = root; 4066 - list_add_tail(&nocow_inode->list, &nocow_ctx->inodes); 4067 - return 0; 4068 - } 4069 - 4070 - #define COPY_COMPLETE 1 4071 - 4072 - static void copy_nocow_pages_worker(struct btrfs_work *work) 4073 - { 4074 - struct scrub_copy_nocow_ctx *nocow_ctx = 4075 - container_of(work, struct scrub_copy_nocow_ctx, work); 4076 - struct scrub_ctx *sctx = nocow_ctx->sctx; 4077 - struct btrfs_fs_info *fs_info = sctx->fs_info; 4078 - struct btrfs_root *root = fs_info->extent_root; 4079 - u64 logical = nocow_ctx->logical; 4080 - u64 len = nocow_ctx->len; 4081 - int mirror_num = nocow_ctx->mirror_num; 4082 - u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; 4083 - int ret; 4084 - struct btrfs_trans_handle *trans = NULL; 4085 - struct btrfs_path *path; 4086 - int not_written = 0; 4087 - 4088 - path = btrfs_alloc_path(); 4089 - if (!path) { 4090 - spin_lock(&sctx->stat_lock); 4091 - sctx->stat.malloc_errors++; 4092 - spin_unlock(&sctx->stat_lock); 4093 - not_written = 1; 4094 - goto out; 4095 - } 4096 - 4097 - trans = btrfs_join_transaction(root); 4098 - if (IS_ERR(trans)) { 4099 - not_written = 1; 4100 - goto out; 4101 - } 4102 - 4103 - ret = iterate_inodes_from_logical(logical, fs_info, path, 4104 - record_inode_for_nocow, nocow_ctx, false); 4105 - if (ret != 0 && ret != -ENOENT) { 4106 - btrfs_warn(fs_info, 4107 - "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d", 4108 - logical, physical_for_dev_replace, len, mirror_num, 4109 - ret); 4110 - not_written = 1; 4111 - goto out; 4112 - } 4113 - 4114 - btrfs_end_transaction(trans); 4115 - trans = NULL; 4116 - while (!list_empty(&nocow_ctx->inodes)) { 4117 - struct scrub_nocow_inode *entry; 4118 - entry = list_first_entry(&nocow_ctx->inodes, 4119 - struct scrub_nocow_inode, 4120 - list); 4121 - list_del_init(&entry->list); 4122 - ret = copy_nocow_pages_for_inode(entry->inum, entry->offset, 4123 - entry->root, nocow_ctx); 4124 - kfree(entry); 4125 - if (ret == COPY_COMPLETE) { 4126 - ret = 0; 4127 - break; 4128 - } else if (ret) { 4129 - break; 4130 - } 4131 - } 4132 - out: 4133 - while (!list_empty(&nocow_ctx->inodes)) { 4134 - struct scrub_nocow_inode *entry; 4135 - entry = list_first_entry(&nocow_ctx->inodes, 4136 - struct scrub_nocow_inode, 4137 - list); 4138 - list_del_init(&entry->list); 4139 - kfree(entry); 4140 - } 4141 - if (trans && !IS_ERR(trans)) 4142 - btrfs_end_transaction(trans); 4143 - if (not_written) 4144 - btrfs_dev_replace_stats_inc(&fs_info->dev_replace. 4145 - num_uncorrectable_read_errors); 4146 - 4147 - btrfs_free_path(path); 4148 - kfree(nocow_ctx); 4149 - 4150 - scrub_pending_trans_workers_dec(sctx); 4151 - } 4152 - 4153 - static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len, 4154 - u64 logical) 4155 - { 4156 - struct extent_state *cached_state = NULL; 4157 - struct btrfs_ordered_extent *ordered; 4158 - struct extent_io_tree *io_tree; 4159 - struct extent_map *em; 4160 - u64 lockstart = start, lockend = start + len - 1; 4161 - int ret = 0; 4162 - 4163 - io_tree = &inode->io_tree; 4164 - 4165 - lock_extent_bits(io_tree, lockstart, lockend, &cached_state); 4166 - ordered = btrfs_lookup_ordered_range(inode, lockstart, len); 4167 - if (ordered) { 4168 - btrfs_put_ordered_extent(ordered); 4169 - ret = 1; 4170 - goto out_unlock; 4171 - } 4172 - 4173 - em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 4174 - if (IS_ERR(em)) { 4175 - ret = PTR_ERR(em); 4176 - goto out_unlock; 4177 - } 4178 - 4179 - /* 4180 - * This extent does not actually cover the logical extent anymore, 4181 - * move on to the next inode. 4182 - */ 4183 - if (em->block_start > logical || 4184 - em->block_start + em->block_len < logical + len || 4185 - test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 4186 - free_extent_map(em); 4187 - ret = 1; 4188 - goto out_unlock; 4189 - } 4190 - free_extent_map(em); 4191 - 4192 - out_unlock: 4193 - unlock_extent_cached(io_tree, lockstart, lockend, &cached_state); 4194 - return ret; 4195 - } 4196 - 4197 - static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, 4198 - struct scrub_copy_nocow_ctx *nocow_ctx) 4199 - { 4200 - struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info; 4201 - struct btrfs_key key; 4202 - struct inode *inode; 4203 - struct page *page; 4204 - struct btrfs_root *local_root; 4205 - struct extent_io_tree *io_tree; 4206 - u64 physical_for_dev_replace; 4207 - u64 nocow_ctx_logical; 4208 - u64 len = nocow_ctx->len; 4209 - unsigned long index; 4210 - int srcu_index; 4211 - int ret = 0; 4212 - int err = 0; 4213 - 4214 - key.objectid = root; 4215 - key.type = BTRFS_ROOT_ITEM_KEY; 4216 - key.offset = (u64)-1; 4217 - 4218 - srcu_index = srcu_read_lock(&fs_info->subvol_srcu); 4219 - 4220 - local_root = btrfs_read_fs_root_no_name(fs_info, &key); 4221 - if (IS_ERR(local_root)) { 4222 - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); 4223 - return PTR_ERR(local_root); 4224 - } 4225 - 4226 - key.type = BTRFS_INODE_ITEM_KEY; 4227 - key.objectid = inum; 4228 - key.offset = 0; 4229 - inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); 4230 - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); 4231 - if (IS_ERR(inode)) 4232 - return PTR_ERR(inode); 4233 - 4234 - /* Avoid truncate/dio/punch hole.. */ 4235 - inode_lock(inode); 4236 - inode_dio_wait(inode); 4237 - 4238 - physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; 4239 - io_tree = &BTRFS_I(inode)->io_tree; 4240 - nocow_ctx_logical = nocow_ctx->logical; 4241 - 4242 - ret = check_extent_to_block(BTRFS_I(inode), offset, len, 4243 - nocow_ctx_logical); 4244 - if (ret) { 4245 - ret = ret > 0 ? 0 : ret; 4246 - goto out; 4247 - } 4248 - 4249 - while (len >= PAGE_SIZE) { 4250 - index = offset >> PAGE_SHIFT; 4251 - again: 4252 - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 4253 - if (!page) { 4254 - btrfs_err(fs_info, "find_or_create_page() failed"); 4255 - ret = -ENOMEM; 4256 - goto out; 4257 - } 4258 - 4259 - if (PageUptodate(page)) { 4260 - if (PageDirty(page)) 4261 - goto next_page; 4262 - } else { 4263 - ClearPageError(page); 4264 - err = extent_read_full_page(io_tree, page, 4265 - btrfs_get_extent, 4266 - nocow_ctx->mirror_num); 4267 - if (err) { 4268 - ret = err; 4269 - goto next_page; 4270 - } 4271 - 4272 - lock_page(page); 4273 - /* 4274 - * If the page has been remove from the page cache, 4275 - * the data on it is meaningless, because it may be 4276 - * old one, the new data may be written into the new 4277 - * page in the page cache. 4278 - */ 4279 - if (page->mapping != inode->i_mapping) { 4280 - unlock_page(page); 4281 - put_page(page); 4282 - goto again; 4283 - } 4284 - if (!PageUptodate(page)) { 4285 - ret = -EIO; 4286 - goto next_page; 4287 - } 4288 - } 4289 - 4290 - ret = check_extent_to_block(BTRFS_I(inode), offset, len, 4291 - nocow_ctx_logical); 4292 - if (ret) { 4293 - ret = ret > 0 ? 0 : ret; 4294 - goto next_page; 4295 - } 4296 - 4297 - err = write_page_nocow(nocow_ctx->sctx, 4298 - physical_for_dev_replace, page); 4299 - if (err) 4300 - ret = err; 4301 - next_page: 4302 - unlock_page(page); 4303 - put_page(page); 4304 - 4305 - if (ret) 4306 - break; 4307 - 4308 - offset += PAGE_SIZE; 4309 - physical_for_dev_replace += PAGE_SIZE; 4310 - nocow_ctx_logical += PAGE_SIZE; 4311 - len -= PAGE_SIZE; 4312 - } 4313 - ret = COPY_COMPLETE; 4314 - out: 4315 - inode_unlock(inode); 4316 - iput(inode); 4317 - return ret; 4318 - } 4319 - 4320 - static int write_page_nocow(struct scrub_ctx *sctx, 4321 - u64 physical_for_dev_replace, struct page *page) 4322 - { 4323 - struct bio *bio; 4324 - struct btrfs_device *dev; 4325 - 4326 - dev = sctx->wr_tgtdev; 4327 - if (!dev) 4328 - return -EIO; 4329 - if (!dev->bdev) { 4330 - btrfs_warn_rl(dev->fs_info, 4331 - "scrub write_page_nocow(bdev == NULL) is unexpected"); 4332 - return -EIO; 4333 - } 4334 - bio = btrfs_io_bio_alloc(1); 4335 - bio->bi_iter.bi_size = 0; 4336 - bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; 4337 - bio_set_dev(bio, dev->bdev); 4338 - bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; 4339 - /* bio_add_page won't fail on a freshly allocated bio */ 4340 - bio_add_page(bio, page, PAGE_SIZE, 0); 4341 - 4342 - if (btrfsic_submit_bio_wait(bio)) { 4343 - bio_put(bio); 4344 - btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); 4345 - return -EIO; 4346 - } 4347 - 4348 - bio_put(bio); 4349 - return 0; 4350 4361 }

+147 -25

fs/btrfs/send.c

··· 100 100 u64 cur_inode_rdev; 101 101 u64 cur_inode_last_extent; 102 102 u64 cur_inode_next_write_offset; 103 + bool ignore_cur_inode; 103 104 104 105 u64 send_progress; 105 106 ··· 1501 1500 BUG_ON(compression); 1502 1501 1503 1502 off = btrfs_file_extent_inline_start(ei); 1504 - len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1503 + len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); 1505 1504 1506 1505 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1507 1506 ··· 5007 5006 u64 len; 5008 5007 int ret = 0; 5009 5008 5009 + /* 5010 + * A hole that starts at EOF or beyond it. Since we do not yet support 5011 + * fallocate (for extent preallocation and hole punching), sending a 5012 + * write of zeroes starting at EOF or beyond would later require issuing 5013 + * a truncate operation which would undo the write and achieve nothing. 5014 + */ 5015 + if (offset >= sctx->cur_inode_size) 5016 + return 0; 5017 + 5010 5018 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) 5011 5019 return send_update_extent(sctx, offset, end - offset); 5012 5020 ··· 5170 5160 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 5171 5161 type = btrfs_file_extent_type(leaf, ei); 5172 5162 if (type == BTRFS_FILE_EXTENT_INLINE) { 5173 - ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); 5163 + ext_len = btrfs_file_extent_ram_bytes(leaf, ei); 5174 5164 ext_len = PAGE_ALIGN(ext_len); 5175 5165 } else { 5176 5166 ext_len = btrfs_file_extent_num_bytes(leaf, ei); ··· 5246 5236 struct btrfs_file_extent_item); 5247 5237 type = btrfs_file_extent_type(path->nodes[0], ei); 5248 5238 if (type == BTRFS_FILE_EXTENT_INLINE) { 5249 - len = btrfs_file_extent_inline_len(path->nodes[0], 5250 - path->slots[0], ei); 5239 + len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); 5251 5240 /* 5252 5241 * it is possible the inline item won't cover the whole page, 5253 5242 * but there may be items after this page. Make ··· 5384 5375 } 5385 5376 5386 5377 if (right_type == BTRFS_FILE_EXTENT_INLINE) { 5387 - right_len = btrfs_file_extent_inline_len(eb, slot, ei); 5378 + right_len = btrfs_file_extent_ram_bytes(eb, ei); 5388 5379 right_len = PAGE_ALIGN(right_len); 5389 5380 } else { 5390 5381 right_len = btrfs_file_extent_num_bytes(eb, ei); ··· 5505 5496 struct btrfs_file_extent_item); 5506 5497 type = btrfs_file_extent_type(path->nodes[0], fi); 5507 5498 if (type == BTRFS_FILE_EXTENT_INLINE) { 5508 - u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5509 - path->slots[0], fi); 5499 + u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); 5510 5500 extent_end = ALIGN(key.offset + size, 5511 5501 sctx->send_root->fs_info->sectorsize); 5512 5502 } else { ··· 5568 5560 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 5569 5561 if (btrfs_file_extent_type(leaf, fi) == 5570 5562 BTRFS_FILE_EXTENT_INLINE) { 5571 - u64 size = btrfs_file_extent_inline_len(leaf, slot, fi); 5563 + u64 size = btrfs_file_extent_ram_bytes(leaf, fi); 5572 5564 5573 5565 extent_end = ALIGN(key.offset + size, 5574 5566 root->fs_info->sectorsize); ··· 5614 5606 struct btrfs_file_extent_item); 5615 5607 type = btrfs_file_extent_type(path->nodes[0], fi); 5616 5608 if (type == BTRFS_FILE_EXTENT_INLINE) { 5617 - u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5618 - path->slots[0], fi); 5609 + u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); 5619 5610 extent_end = ALIGN(key->offset + size, 5620 5611 sctx->send_root->fs_info->sectorsize); 5621 5612 } else { ··· 5806 5799 int pending_move = 0; 5807 5800 int refs_processed = 0; 5808 5801 5802 + if (sctx->ignore_cur_inode) 5803 + return 0; 5804 + 5809 5805 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 5810 5806 &refs_processed); 5811 5807 if (ret < 0) ··· 5927 5917 return ret; 5928 5918 } 5929 5919 5920 + struct parent_paths_ctx { 5921 + struct list_head *refs; 5922 + struct send_ctx *sctx; 5923 + }; 5924 + 5925 + static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name, 5926 + void *ctx) 5927 + { 5928 + struct parent_paths_ctx *ppctx = ctx; 5929 + 5930 + return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx, 5931 + ppctx->refs); 5932 + } 5933 + 5934 + /* 5935 + * Issue unlink operations for all paths of the current inode found in the 5936 + * parent snapshot. 5937 + */ 5938 + static int btrfs_unlink_all_paths(struct send_ctx *sctx) 5939 + { 5940 + LIST_HEAD(deleted_refs); 5941 + struct btrfs_path *path; 5942 + struct btrfs_key key; 5943 + struct parent_paths_ctx ctx; 5944 + int ret; 5945 + 5946 + path = alloc_path_for_send(); 5947 + if (!path) 5948 + return -ENOMEM; 5949 + 5950 + key.objectid = sctx->cur_ino; 5951 + key.type = BTRFS_INODE_REF_KEY; 5952 + key.offset = 0; 5953 + ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); 5954 + if (ret < 0) 5955 + goto out; 5956 + 5957 + ctx.refs = &deleted_refs; 5958 + ctx.sctx = sctx; 5959 + 5960 + while (true) { 5961 + struct extent_buffer *eb = path->nodes[0]; 5962 + int slot = path->slots[0]; 5963 + 5964 + if (slot >= btrfs_header_nritems(eb)) { 5965 + ret = btrfs_next_leaf(sctx->parent_root, path); 5966 + if (ret < 0) 5967 + goto out; 5968 + else if (ret > 0) 5969 + break; 5970 + continue; 5971 + } 5972 + 5973 + btrfs_item_key_to_cpu(eb, &key, slot); 5974 + if (key.objectid != sctx->cur_ino) 5975 + break; 5976 + if (key.type != BTRFS_INODE_REF_KEY && 5977 + key.type != BTRFS_INODE_EXTREF_KEY) 5978 + break; 5979 + 5980 + ret = iterate_inode_ref(sctx->parent_root, path, &key, 1, 5981 + record_parent_ref, &ctx); 5982 + if (ret < 0) 5983 + goto out; 5984 + 5985 + path->slots[0]++; 5986 + } 5987 + 5988 + while (!list_empty(&deleted_refs)) { 5989 + struct recorded_ref *ref; 5990 + 5991 + ref = list_first_entry(&deleted_refs, struct recorded_ref, list); 5992 + ret = send_unlink(sctx, ref->full_path); 5993 + if (ret < 0) 5994 + goto out; 5995 + fs_path_free(ref->full_path); 5996 + list_del(&ref->list); 5997 + kfree(ref); 5998 + } 5999 + ret = 0; 6000 + out: 6001 + btrfs_free_path(path); 6002 + if (ret) 6003 + __free_recorded_refs(&deleted_refs); 6004 + return ret; 6005 + } 6006 + 5930 6007 static int changed_inode(struct send_ctx *sctx, 5931 6008 enum btrfs_compare_tree_result result) 5932 6009 { ··· 6028 5931 sctx->cur_inode_new_gen = 0; 6029 5932 sctx->cur_inode_last_extent = (u64)-1; 6030 5933 sctx->cur_inode_next_write_offset = 0; 5934 + sctx->ignore_cur_inode = false; 6031 5935 6032 5936 /* 6033 5937 * Set send_progress to current inode. This will tell all get_cur_xxx ··· 6067 5969 if (left_gen != right_gen && 6068 5970 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 6069 5971 sctx->cur_inode_new_gen = 1; 5972 + } 5973 + 5974 + /* 5975 + * Normally we do not find inodes with a link count of zero (orphans) 5976 + * because the most common case is to create a snapshot and use it 5977 + * for a send operation. However other less common use cases involve 5978 + * using a subvolume and send it after turning it to RO mode just 5979 + * after deleting all hard links of a file while holding an open 5980 + * file descriptor against it or turning a RO snapshot into RW mode, 5981 + * keep an open file descriptor against a file, delete it and then 5982 + * turn the snapshot back to RO mode before using it for a send 5983 + * operation. So if we find such cases, ignore the inode and all its 5984 + * items completely if it's a new inode, or if it's a changed inode 5985 + * make sure all its previous paths (from the parent snapshot) are all 5986 + * unlinked and all other the inode items are ignored. 5987 + */ 5988 + if (result == BTRFS_COMPARE_TREE_NEW || 5989 + result == BTRFS_COMPARE_TREE_CHANGED) { 5990 + u32 nlinks; 5991 + 5992 + nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii); 5993 + if (nlinks == 0) { 5994 + sctx->ignore_cur_inode = true; 5995 + if (result == BTRFS_COMPARE_TREE_CHANGED) 5996 + ret = btrfs_unlink_all_paths(sctx); 5997 + goto out; 5998 + } 6070 5999 } 6071 6000 6072 6001 if (result == BTRFS_COMPARE_TREE_NEW) { ··· 6434 6309 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 6435 6310 goto out; 6436 6311 6437 - if (key->type == BTRFS_INODE_ITEM_KEY) 6312 + if (key->type == BTRFS_INODE_ITEM_KEY) { 6438 6313 ret = changed_inode(sctx, result); 6439 - else if (key->type == BTRFS_INODE_REF_KEY || 6440 - key->type == BTRFS_INODE_EXTREF_KEY) 6441 - ret = changed_ref(sctx, result); 6442 - else if (key->type == BTRFS_XATTR_ITEM_KEY) 6443 - ret = changed_xattr(sctx, result); 6444 - else if (key->type == BTRFS_EXTENT_DATA_KEY) 6445 - ret = changed_extent(sctx, result); 6314 + } else if (!sctx->ignore_cur_inode) { 6315 + if (key->type == BTRFS_INODE_REF_KEY || 6316 + key->type == BTRFS_INODE_EXTREF_KEY) 6317 + ret = changed_ref(sctx, result); 6318 + else if (key->type == BTRFS_XATTR_ITEM_KEY) 6319 + ret = changed_xattr(sctx, result); 6320 + else if (key->type == BTRFS_EXTENT_DATA_KEY) 6321 + ret = changed_extent(sctx, result); 6322 + } 6446 6323 6447 6324 out: 6448 6325 return ret; ··· 6455 6328 int ret; 6456 6329 struct btrfs_root *send_root = sctx->send_root; 6457 6330 struct btrfs_key key; 6458 - struct btrfs_key found_key; 6459 6331 struct btrfs_path *path; 6460 6332 struct extent_buffer *eb; 6461 6333 int slot; ··· 6476 6350 while (1) { 6477 6351 eb = path->nodes[0]; 6478 6352 slot = path->slots[0]; 6479 - btrfs_item_key_to_cpu(eb, &found_key, slot); 6353 + btrfs_item_key_to_cpu(eb, &key, slot); 6480 6354 6481 - ret = changed_cb(path, NULL, &found_key, 6355 + ret = changed_cb(path, NULL, &key, 6482 6356 BTRFS_COMPARE_TREE_NEW, sctx); 6483 6357 if (ret < 0) 6484 6358 goto out; 6485 - 6486 - key.objectid = found_key.objectid; 6487 - key.type = found_key.type; 6488 - key.offset = found_key.offset + 1; 6489 6359 6490 6360 ret = btrfs_next_item(send_root, path); 6491 6361 if (ret < 0)

-1

fs/btrfs/struct-funcs.c

+62 -53

fs/btrfs/super.c

··· 5 5 6 6 #include <linux/blkdev.h> 7 7 #include <linux/module.h> 8 - #include <linux/buffer_head.h> 9 8 #include <linux/fs.h> 10 9 #include <linux/pagemap.h> 11 10 #include <linux/highmem.h> ··· 14 15 #include <linux/string.h> 15 16 #include <linux/backing-dev.h> 16 17 #include <linux/mount.h> 17 - #include <linux/mpage.h> 18 - #include <linux/swap.h> 19 18 #include <linux/writeback.h> 20 19 #include <linux/statfs.h> 21 20 #include <linux/compat.h> ··· 465 468 case Opt_subvolrootid: 466 469 case Opt_device: 467 470 /* 468 - * These are parsed by btrfs_parse_subvol_options 469 - * and btrfs_parse_early_options 470 - * and can be happily ignored here. 471 + * These are parsed by btrfs_parse_subvol_options or 472 + * btrfs_parse_device_options and can be ignored here. 471 473 */ 472 474 break; 473 475 case Opt_nodatasum: ··· 756 760 case Opt_recovery: 757 761 btrfs_warn(info, 758 762 "'recovery' is deprecated, use 'usebackuproot' instead"); 763 + /* fall through */ 759 764 case Opt_usebackuproot: 760 765 btrfs_info(info, 761 766 "trying to use backup root at mount time"); ··· 882 885 * All other options will be parsed on much later in the mount process and 883 886 * only when we need to allocate a new super block. 884 887 */ 885 - static int btrfs_parse_early_options(const char *options, fmode_t flags, 886 - void *holder, struct btrfs_fs_devices **fs_devices) 888 + static int btrfs_parse_device_options(const char *options, fmode_t flags, 889 + void *holder) 887 890 { 888 891 substring_t args[MAX_OPT_ARGS]; 889 892 char *device_name, *opts, *orig, *p; 893 + struct btrfs_device *device = NULL; 890 894 int error = 0; 895 + 896 + lockdep_assert_held(&uuid_mutex); 891 897 892 898 if (!options) 893 899 return 0; ··· 917 917 error = -ENOMEM; 918 918 goto out; 919 919 } 920 - error = btrfs_scan_one_device(device_name, 921 - flags, holder, fs_devices); 920 + device = btrfs_scan_one_device(device_name, flags, 921 + holder); 922 922 kfree(device_name); 923 - if (error) 923 + if (IS_ERR(device)) { 924 + error = PTR_ERR(device); 924 925 goto out; 926 + } 925 927 } 926 928 } 927 929 ··· 937 935 * 938 936 * The value is later passed to mount_subvol() 939 937 */ 940 - static int btrfs_parse_subvol_options(const char *options, fmode_t flags, 941 - char **subvol_name, u64 *subvol_objectid) 938 + static int btrfs_parse_subvol_options(const char *options, char **subvol_name, 939 + u64 *subvol_objectid) 942 940 { 943 941 substring_t args[MAX_OPT_ARGS]; 944 942 char *opts, *orig, *p; ··· 950 948 951 949 /* 952 950 * strsep changes the string, duplicate it because 953 - * btrfs_parse_early_options gets called later 951 + * btrfs_parse_device_options gets called later 954 952 */ 955 953 opts = kstrdup(options, GFP_KERNEL); 956 954 if (!opts) ··· 1519 1517 { 1520 1518 struct block_device *bdev = NULL; 1521 1519 struct super_block *s; 1520 + struct btrfs_device *device = NULL; 1522 1521 struct btrfs_fs_devices *fs_devices = NULL; 1523 1522 struct btrfs_fs_info *fs_info = NULL; 1524 1523 struct security_mnt_opts new_sec_opts; ··· 1529 1526 if (!(flags & SB_RDONLY)) 1530 1527 mode |= FMODE_WRITE; 1531 1528 1532 - error = btrfs_parse_early_options(data, mode, fs_type, 1533 - &fs_devices); 1534 - if (error) { 1535 - return ERR_PTR(error); 1536 - } 1537 - 1538 1529 security_init_mnt_opts(&new_sec_opts); 1539 1530 if (data) { 1540 1531 error = parse_security_options(data, &new_sec_opts); 1541 1532 if (error) 1542 1533 return ERR_PTR(error); 1543 1534 } 1544 - 1545 - error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); 1546 - if (error) 1547 - goto error_sec_opts; 1548 1535 1549 1536 /* 1550 1537 * Setup a dummy root and fs_info for test/set super. This is because ··· 1548 1555 goto error_sec_opts; 1549 1556 } 1550 1557 1551 - fs_info->fs_devices = fs_devices; 1552 - 1553 1558 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1554 1559 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1555 1560 security_init_mnt_opts(&fs_info->security_opts); ··· 1556 1565 goto error_fs_info; 1557 1566 } 1558 1567 1568 + mutex_lock(&uuid_mutex); 1569 + error = btrfs_parse_device_options(data, mode, fs_type); 1570 + if (error) { 1571 + mutex_unlock(&uuid_mutex); 1572 + goto error_fs_info; 1573 + } 1574 + 1575 + device = btrfs_scan_one_device(device_name, mode, fs_type); 1576 + if (IS_ERR(device)) { 1577 + mutex_unlock(&uuid_mutex); 1578 + error = PTR_ERR(device); 1579 + goto error_fs_info; 1580 + } 1581 + 1582 + fs_devices = device->fs_devices; 1583 + fs_info->fs_devices = fs_devices; 1584 + 1559 1585 error = btrfs_open_devices(fs_devices, mode, fs_type); 1586 + mutex_unlock(&uuid_mutex); 1560 1587 if (error) 1561 1588 goto error_fs_info; 1562 1589 ··· 1659 1650 if (!(flags & SB_RDONLY)) 1660 1651 mode |= FMODE_WRITE; 1661 1652 1662 - error = btrfs_parse_subvol_options(data, mode, 1663 - &subvol_name, &subvol_objectid); 1653 + error = btrfs_parse_subvol_options(data, &subvol_name, 1654 + &subvol_objectid); 1664 1655 if (error) { 1665 1656 kfree(subvol_name); 1666 1657 return ERR_PTR(error); ··· 2107 2098 btrfs_account_ro_block_groups_free_space(found); 2108 2099 2109 2100 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { 2110 - if (!list_empty(&found->block_groups[i])) { 2111 - switch (i) { 2112 - case BTRFS_RAID_DUP: 2113 - case BTRFS_RAID_RAID1: 2114 - case BTRFS_RAID_RAID10: 2115 - factor = 2; 2116 - } 2117 - } 2101 + if (!list_empty(&found->block_groups[i])) 2102 + factor = btrfs_bg_type_to_factor( 2103 + btrfs_raid_array[i].bg_flag); 2118 2104 } 2119 2105 } 2120 2106 ··· 2226 2222 unsigned long arg) 2227 2223 { 2228 2224 struct btrfs_ioctl_vol_args *vol; 2229 - struct btrfs_fs_devices *fs_devices; 2225 + struct btrfs_device *device = NULL; 2230 2226 int ret = -ENOTTY; 2231 2227 2232 2228 if (!capable(CAP_SYS_ADMIN)) ··· 2238 2234 2239 2235 switch (cmd) { 2240 2236 case BTRFS_IOC_SCAN_DEV: 2241 - ret = btrfs_scan_one_device(vol->name, FMODE_READ, 2242 - &btrfs_root_fs_type, &fs_devices); 2237 + mutex_lock(&uuid_mutex); 2238 + device = btrfs_scan_one_device(vol->name, FMODE_READ, 2239 + &btrfs_root_fs_type); 2240 + ret = PTR_ERR_OR_ZERO(device); 2241 + mutex_unlock(&uuid_mutex); 2243 2242 break; 2244 2243 case BTRFS_IOC_DEVICES_READY: 2245 - ret = btrfs_scan_one_device(vol->name, FMODE_READ, 2246 - &btrfs_root_fs_type, &fs_devices); 2247 - if (ret) 2244 + mutex_lock(&uuid_mutex); 2245 + device = btrfs_scan_one_device(vol->name, FMODE_READ, 2246 + &btrfs_root_fs_type); 2247 + if (IS_ERR(device)) { 2248 + mutex_unlock(&uuid_mutex); 2249 + ret = PTR_ERR(device); 2248 2250 break; 2249 - ret = !(fs_devices->num_devices == fs_devices->total_devices); 2251 + } 2252 + ret = !(device->fs_devices->num_devices == 2253 + device->fs_devices->total_devices); 2254 + mutex_unlock(&uuid_mutex); 2250 2255 break; 2251 2256 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 2252 2257 ret = btrfs_ioctl_get_supported_features((void __user*)arg); ··· 2303 2290 struct btrfs_fs_devices *cur_devices; 2304 2291 struct btrfs_device *dev, *first_dev = NULL; 2305 2292 struct list_head *head; 2306 - struct rcu_string *name; 2307 2293 2308 2294 /* 2309 2295 * Lightweight locking of the devices. We should not need ··· 2326 2314 cur_devices = cur_devices->seed; 2327 2315 } 2328 2316 2329 - if (first_dev) { 2330 - name = rcu_dereference(first_dev->name); 2331 - seq_escape(m, name->str, " \t\n\\"); 2332 - } else { 2317 + if (first_dev) 2318 + seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\"); 2319 + else 2333 2320 WARN_ON(1); 2334 - } 2335 2321 rcu_read_unlock(); 2336 2322 return 0; 2337 2323 } ··· 2341 2331 .sync_fs = btrfs_sync_fs, 2342 2332 .show_options = btrfs_show_options, 2343 2333 .show_devname = btrfs_show_devname, 2344 - .write_inode = btrfs_write_inode, 2345 2334 .alloc_inode = btrfs_alloc_inode, 2346 2335 .destroy_inode = btrfs_destroy_inode, 2347 2336 .statfs = btrfs_statfs, ··· 2378 2369 2379 2370 static void __init btrfs_print_mod_info(void) 2380 2371 { 2381 - pr_info("Btrfs loaded, crc32c=%s" 2372 + static const char options[] = "" 2382 2373 #ifdef CONFIG_BTRFS_DEBUG 2383 2374 ", debug=on" 2384 2375 #endif ··· 2391 2382 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 2392 2383 ", ref-verify=on" 2393 2384 #endif 2394 - "\n", 2395 - crc32c_impl()); 2385 + ; 2386 + pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options); 2396 2387 } 2397 2388 2398 2389 static int __init init_btrfs_fs(void)

-2

fs/btrfs/sysfs.c

··· 7 7 #include <linux/slab.h> 8 8 #include <linux/spinlock.h> 9 9 #include <linux/completion.h> 10 - #include <linux/buffer_head.h> 11 10 #include <linux/kobject.h> 12 11 #include <linux/bug.h> 13 - #include <linux/genhd.h> 14 12 #include <linux/debugfs.h> 15 13 16 14 #include "ctree.h"

+12 -12

fs/btrfs/tests/qgroup-tests.c

··· 216 216 btrfs_init_dummy_trans(&trans, fs_info); 217 217 218 218 test_msg("qgroup basic add"); 219 - ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID); 219 + ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID); 220 220 if (ret) { 221 221 test_err("couldn't create a qgroup %d", ret); 222 222 return ret; ··· 249 249 return ret; 250 250 } 251 251 252 - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 253 - nodesize, old_roots, new_roots); 252 + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, 253 + new_roots); 254 254 if (ret) { 255 255 test_err("couldn't account space for a qgroup %d", ret); 256 256 return ret; ··· 285 285 return ret; 286 286 } 287 287 288 - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 289 - nodesize, old_roots, new_roots); 288 + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, 289 + new_roots); 290 290 if (ret) { 291 291 test_err("couldn't account space for a qgroup %d", ret); 292 292 return -EINVAL; ··· 322 322 * We have BTRFS_FS_TREE_OBJECTID created already from the 323 323 * previous test. 324 324 */ 325 - ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID); 325 + ret = btrfs_create_qgroup(&trans, BTRFS_FIRST_FREE_OBJECTID); 326 326 if (ret) { 327 327 test_err("couldn't create a qgroup %d", ret); 328 328 return ret; ··· 350 350 return ret; 351 351 } 352 352 353 - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 354 - nodesize, old_roots, new_roots); 353 + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, 354 + new_roots); 355 355 if (ret) { 356 356 test_err("couldn't account space for a qgroup %d", ret); 357 357 return ret; ··· 385 385 return ret; 386 386 } 387 387 388 - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 389 - nodesize, old_roots, new_roots); 388 + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, 389 + new_roots); 390 390 if (ret) { 391 391 test_err("couldn't account space for a qgroup %d", ret); 392 392 return ret; ··· 426 426 return ret; 427 427 } 428 428 429 - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 430 - nodesize, old_roots, new_roots); 429 + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, 430 + new_roots); 431 431 if (ret) { 432 432 test_err("couldn't account space for a qgroup %d", ret); 433 433 return ret;

+5 -6

fs/btrfs/transaction.c

··· 241 241 refcount_set(&cur_trans->use_count, 2); 242 242 atomic_set(&cur_trans->pending_ordered, 0); 243 243 cur_trans->flags = 0; 244 - cur_trans->start_time = get_seconds(); 244 + cur_trans->start_time = ktime_get_seconds(); 245 245 246 246 memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); 247 247 ··· 680 680 681 681 trans = start_transaction(root, 0, TRANS_ATTACH, 682 682 BTRFS_RESERVE_NO_FLUSH, true); 683 - if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT) 683 + if (trans == ERR_PTR(-ENOENT)) 684 684 btrfs_wait_for_commit(root->fs_info, 0); 685 685 686 686 return trans; ··· 1152 1152 ret = btrfs_run_dev_replace(trans, fs_info); 1153 1153 if (ret) 1154 1154 return ret; 1155 - ret = btrfs_run_qgroups(trans, fs_info); 1155 + ret = btrfs_run_qgroups(trans); 1156 1156 if (ret) 1157 1157 return ret; 1158 1158 ··· 1355 1355 goto out; 1356 1356 1357 1357 /* Now qgroup are all updated, we can inherit it to new qgroups */ 1358 - ret = btrfs_qgroup_inherit(trans, fs_info, 1359 - src->root_key.objectid, dst_objectid, 1358 + ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid, 1360 1359 inherit); 1361 1360 if (ret < 0) 1362 1361 goto out; ··· 1573 1574 /* 1574 1575 * insert root back/forward references 1575 1576 */ 1576 - ret = btrfs_add_root_ref(trans, fs_info, objectid, 1577 + ret = btrfs_add_root_ref(trans, objectid, 1577 1578 parent_root->root_key.objectid, 1578 1579 btrfs_ino(BTRFS_I(parent_inode)), index, 1579 1580 dentry->d_name.name, dentry->d_name.len);

+1 -1

fs/btrfs/transaction.h

··· 48 48 int aborted; 49 49 struct list_head list; 50 50 struct extent_io_tree dirty_pages; 51 - unsigned long start_time; 51 + time64_t start_time; 52 52 wait_queue_head_t writer_wait; 53 53 wait_queue_head_t commit_wait; 54 54 wait_queue_head_t pending_wait;

+114 -1

fs/btrfs/tree-checker.c

··· 19 19 #include "tree-checker.h" 20 20 #include "disk-io.h" 21 21 #include "compression.h" 22 + #include "volumes.h" 22 23 23 24 /* 24 25 * Error message should follow the following format: ··· 354 353 return 0; 355 354 } 356 355 356 + __printf(4, 5) 357 + __cold 358 + static void block_group_err(const struct btrfs_fs_info *fs_info, 359 + const struct extent_buffer *eb, int slot, 360 + const char *fmt, ...) 361 + { 362 + struct btrfs_key key; 363 + struct va_format vaf; 364 + va_list args; 365 + 366 + btrfs_item_key_to_cpu(eb, &key, slot); 367 + va_start(args, fmt); 368 + 369 + vaf.fmt = fmt; 370 + vaf.va = &args; 371 + 372 + btrfs_crit(fs_info, 373 + "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", 374 + btrfs_header_level(eb) == 0 ? "leaf" : "node", 375 + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 376 + key.objectid, key.offset, &vaf); 377 + va_end(args); 378 + } 379 + 380 + static int check_block_group_item(struct btrfs_fs_info *fs_info, 381 + struct extent_buffer *leaf, 382 + struct btrfs_key *key, int slot) 383 + { 384 + struct btrfs_block_group_item bgi; 385 + u32 item_size = btrfs_item_size_nr(leaf, slot); 386 + u64 flags; 387 + u64 type; 388 + 389 + /* 390 + * Here we don't really care about alignment since extent allocator can 391 + * handle it. We care more about the size, as if one block group is 392 + * larger than maximum size, it's must be some obvious corruption. 393 + */ 394 + if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) { 395 + block_group_err(fs_info, leaf, slot, 396 + "invalid block group size, have %llu expect (0, %llu]", 397 + key->offset, BTRFS_MAX_DATA_CHUNK_SIZE); 398 + return -EUCLEAN; 399 + } 400 + 401 + if (item_size != sizeof(bgi)) { 402 + block_group_err(fs_info, leaf, slot, 403 + "invalid item size, have %u expect %zu", 404 + item_size, sizeof(bgi)); 405 + return -EUCLEAN; 406 + } 407 + 408 + read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), 409 + sizeof(bgi)); 410 + if (btrfs_block_group_chunk_objectid(&bgi) != 411 + BTRFS_FIRST_CHUNK_TREE_OBJECTID) { 412 + block_group_err(fs_info, leaf, slot, 413 + "invalid block group chunk objectid, have %llu expect %llu", 414 + btrfs_block_group_chunk_objectid(&bgi), 415 + BTRFS_FIRST_CHUNK_TREE_OBJECTID); 416 + return -EUCLEAN; 417 + } 418 + 419 + if (btrfs_block_group_used(&bgi) > key->offset) { 420 + block_group_err(fs_info, leaf, slot, 421 + "invalid block group used, have %llu expect [0, %llu)", 422 + btrfs_block_group_used(&bgi), key->offset); 423 + return -EUCLEAN; 424 + } 425 + 426 + flags = btrfs_block_group_flags(&bgi); 427 + if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) { 428 + block_group_err(fs_info, leaf, slot, 429 + "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", 430 + flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, 431 + hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); 432 + return -EUCLEAN; 433 + } 434 + 435 + type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; 436 + if (type != BTRFS_BLOCK_GROUP_DATA && 437 + type != BTRFS_BLOCK_GROUP_METADATA && 438 + type != BTRFS_BLOCK_GROUP_SYSTEM && 439 + type != (BTRFS_BLOCK_GROUP_METADATA | 440 + BTRFS_BLOCK_GROUP_DATA)) { 441 + block_group_err(fs_info, leaf, slot, 442 + "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", 443 + type, hweight64(type), 444 + BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, 445 + BTRFS_BLOCK_GROUP_SYSTEM, 446 + BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); 447 + return -EUCLEAN; 448 + } 449 + return 0; 450 + } 451 + 357 452 /* 358 453 * Common point to switch the item-specific validation. 359 454 */ ··· 470 373 case BTRFS_DIR_INDEX_KEY: 471 374 case BTRFS_XATTR_ITEM_KEY: 472 375 ret = check_dir_item(fs_info, leaf, key, slot); 376 + break; 377 + case BTRFS_BLOCK_GROUP_ITEM_KEY: 378 + ret = check_block_group_item(fs_info, leaf, key, slot); 473 379 break; 474 380 } 475 381 return ret; ··· 496 396 * skip this check for relocation trees. 497 397 */ 498 398 if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { 399 + u64 owner = btrfs_header_owner(leaf); 499 400 struct btrfs_root *check_root; 500 401 501 - key.objectid = btrfs_header_owner(leaf); 402 + /* These trees must never be empty */ 403 + if (owner == BTRFS_ROOT_TREE_OBJECTID || 404 + owner == BTRFS_CHUNK_TREE_OBJECTID || 405 + owner == BTRFS_EXTENT_TREE_OBJECTID || 406 + owner == BTRFS_DEV_TREE_OBJECTID || 407 + owner == BTRFS_FS_TREE_OBJECTID || 408 + owner == BTRFS_DATA_RELOC_TREE_OBJECTID) { 409 + generic_err(fs_info, leaf, 0, 410 + "invalid root, root %llu must never be empty", 411 + owner); 412 + return -EUCLEAN; 413 + } 414 + key.objectid = owner; 502 415 key.type = BTRFS_ROOT_ITEM_KEY; 503 416 key.offset = (u64)-1; 504 417

+91 -179

fs/btrfs/tree-log.c

··· 545 545 key.type = BTRFS_INODE_ITEM_KEY; 546 546 key.offset = 0; 547 547 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 548 - if (IS_ERR(inode)) { 548 + if (IS_ERR(inode)) 549 549 inode = NULL; 550 - } else if (is_bad_inode(inode)) { 551 - iput(inode); 552 - inode = NULL; 553 - } 554 550 return inode; 555 551 } 556 552 ··· 593 597 if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 594 598 nbytes = 0; 595 599 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 596 - size = btrfs_file_extent_inline_len(eb, slot, item); 600 + size = btrfs_file_extent_ram_bytes(eb, item); 597 601 nbytes = btrfs_file_extent_ram_bytes(eb, item); 598 602 extent_end = ALIGN(start + size, 599 603 fs_info->sectorsize); ··· 681 685 * as the owner of the file extent changed from log tree 682 686 * (doesn't affect qgroup) to fs/file tree(affects qgroup) 683 687 */ 684 - ret = btrfs_qgroup_trace_extent(trans, fs_info, 688 + ret = btrfs_qgroup_trace_extent(trans, 685 689 btrfs_file_extent_disk_bytenr(eb, item), 686 690 btrfs_file_extent_disk_num_bytes(eb, item), 687 691 GFP_NOFS); ··· 711 715 * allocation tree 712 716 */ 713 717 ret = btrfs_alloc_logged_file_extent(trans, 714 - fs_info, 715 718 root->root_key.objectid, 716 719 key->objectid, offset, &ins); 717 720 if (ret) ··· 1286 1291 return ret; 1287 1292 } 1288 1293 1294 + static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir, 1295 + const u8 ref_type, const char *name, 1296 + const int namelen) 1297 + { 1298 + struct btrfs_key key; 1299 + struct btrfs_path *path; 1300 + const u64 parent_id = btrfs_ino(BTRFS_I(dir)); 1301 + int ret; 1302 + 1303 + path = btrfs_alloc_path(); 1304 + if (!path) 1305 + return -ENOMEM; 1306 + 1307 + key.objectid = btrfs_ino(BTRFS_I(inode)); 1308 + key.type = ref_type; 1309 + if (key.type == BTRFS_INODE_REF_KEY) 1310 + key.offset = parent_id; 1311 + else 1312 + key.offset = btrfs_extref_hash(parent_id, name, namelen); 1313 + 1314 + ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0); 1315 + if (ret < 0) 1316 + goto out; 1317 + if (ret > 0) { 1318 + ret = 0; 1319 + goto out; 1320 + } 1321 + if (key.type == BTRFS_INODE_EXTREF_KEY) 1322 + ret = btrfs_find_name_in_ext_backref(path->nodes[0], 1323 + path->slots[0], parent_id, 1324 + name, namelen, NULL); 1325 + else 1326 + ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], 1327 + name, namelen, NULL); 1328 + 1329 + out: 1330 + btrfs_free_path(path); 1331 + return ret; 1332 + } 1333 + 1289 1334 /* 1290 1335 * replay one inode back reference item found in the log tree. 1291 1336 * eb, slot and key refer to the buffer and key found in the log tree. ··· 1434 1399 goto out; 1435 1400 } 1436 1401 } 1402 + 1403 + /* 1404 + * If a reference item already exists for this inode 1405 + * with the same parent and name, but different index, 1406 + * drop it and the corresponding directory index entries 1407 + * from the parent before adding the new reference item 1408 + * and dir index entries, otherwise we would fail with 1409 + * -EEXIST returned from btrfs_add_link() below. 1410 + */ 1411 + ret = btrfs_inode_ref_exists(inode, dir, key->type, 1412 + name, namelen); 1413 + if (ret > 0) { 1414 + ret = btrfs_unlink_inode(trans, root, 1415 + BTRFS_I(dir), 1416 + BTRFS_I(inode), 1417 + name, namelen); 1418 + /* 1419 + * If we dropped the link count to 0, bump it so 1420 + * that later the iput() on the inode will not 1421 + * free it. We will fixup the link count later. 1422 + */ 1423 + if (!ret && inode->i_nlink == 0) 1424 + inc_nlink(inode); 1425 + } 1426 + if (ret < 0) 1427 + goto out; 1437 1428 1438 1429 /* insert our name */ 1439 1430 ret = btrfs_add_link(trans, BTRFS_I(dir), ··· 2181 2120 dir_key->offset, 2182 2121 name, name_len, 0); 2183 2122 } 2184 - if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { 2123 + if (!log_di || log_di == ERR_PTR(-ENOENT)) { 2185 2124 btrfs_dir_item_key_to_cpu(eb, di, &location); 2186 2125 btrfs_release_path(path); 2187 2126 btrfs_release_path(log_path); ··· 2994 2933 /* bail out if we need to do a full commit */ 2995 2934 if (btrfs_need_log_full_commit(fs_info, trans)) { 2996 2935 ret = -EAGAIN; 2997 - btrfs_free_logged_extents(log, log_transid); 2998 2936 mutex_unlock(&root->log_mutex); 2999 2937 goto out; 3000 2938 } ··· 3011 2951 if (ret) { 3012 2952 blk_finish_plug(&plug); 3013 2953 btrfs_abort_transaction(trans, ret); 3014 - btrfs_free_logged_extents(log, log_transid); 3015 2954 btrfs_set_log_full_commit(fs_info, trans); 3016 2955 mutex_unlock(&root->log_mutex); 3017 2956 goto out; ··· 3061 3002 goto out; 3062 3003 } 3063 3004 btrfs_wait_tree_log_extents(log, mark); 3064 - btrfs_free_logged_extents(log, log_transid); 3065 3005 mutex_unlock(&log_root_tree->log_mutex); 3066 3006 ret = -EAGAIN; 3067 3007 goto out; ··· 3078 3020 if (atomic_read(&log_root_tree->log_commit[index2])) { 3079 3021 blk_finish_plug(&plug); 3080 3022 ret = btrfs_wait_tree_log_extents(log, mark); 3081 - btrfs_wait_logged_extents(trans, log, log_transid); 3082 3023 wait_log_commit(log_root_tree, 3083 3024 root_log_ctx.log_transid); 3084 3025 mutex_unlock(&log_root_tree->log_mutex); ··· 3102 3045 if (btrfs_need_log_full_commit(fs_info, trans)) { 3103 3046 blk_finish_plug(&plug); 3104 3047 btrfs_wait_tree_log_extents(log, mark); 3105 - btrfs_free_logged_extents(log, log_transid); 3106 3048 mutex_unlock(&log_root_tree->log_mutex); 3107 3049 ret = -EAGAIN; 3108 3050 goto out_wake_log_root; ··· 3114 3058 if (ret) { 3115 3059 btrfs_set_log_full_commit(fs_info, trans); 3116 3060 btrfs_abort_transaction(trans, ret); 3117 - btrfs_free_logged_extents(log, log_transid); 3118 3061 mutex_unlock(&log_root_tree->log_mutex); 3119 3062 goto out_wake_log_root; 3120 3063 } ··· 3123 3068 EXTENT_NEW | EXTENT_DIRTY); 3124 3069 if (ret) { 3125 3070 btrfs_set_log_full_commit(fs_info, trans); 3126 - btrfs_free_logged_extents(log, log_transid); 3127 3071 mutex_unlock(&log_root_tree->log_mutex); 3128 3072 goto out_wake_log_root; 3129 3073 } 3130 - btrfs_wait_logged_extents(trans, log, log_transid); 3131 3074 3132 3075 btrfs_set_super_log_root(fs_info->super_for_commit, 3133 3076 log_root_tree->node->start); ··· 3211 3158 clear_extent_bits(&log->dirty_log_pages, start, end, 3212 3159 EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); 3213 3160 } 3214 - 3215 - /* 3216 - * We may have short-circuited the log tree with the full commit logic 3217 - * and left ordered extents on our list, so clear these out to keep us 3218 - * from leaking inodes and memory. 3219 - */ 3220 - btrfs_free_logged_extents(log, 0); 3221 - btrfs_free_logged_extents(log, 1); 3222 3161 3223 3162 free_extent_buffer(log->node); 3224 3163 kfree(log); ··· 3801 3756 int start_slot, int nr, int inode_only, 3802 3757 u64 logged_isize) 3803 3758 { 3804 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 3759 + struct btrfs_fs_info *fs_info = trans->fs_info; 3805 3760 unsigned long src_offset; 3806 3761 unsigned long dst_offset; 3807 3762 struct btrfs_root *log = inode->root->log_root; ··· 3982 3937 struct btrfs_file_extent_item); 3983 3938 if (btrfs_file_extent_type(src, extent) == 3984 3939 BTRFS_FILE_EXTENT_INLINE) { 3985 - len = btrfs_file_extent_inline_len(src, 3986 - src_path->slots[0], 3987 - extent); 3940 + len = btrfs_file_extent_ram_bytes(src, extent); 3988 3941 *last_extent = ALIGN(key.offset + len, 3989 3942 fs_info->sectorsize); 3990 3943 } else { ··· 4047 4004 extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); 4048 4005 if (btrfs_file_extent_type(src, extent) == 4049 4006 BTRFS_FILE_EXTENT_INLINE) { 4050 - len = btrfs_file_extent_inline_len(src, i, extent); 4007 + len = btrfs_file_extent_ram_bytes(src, extent); 4051 4008 extent_end = ALIGN(key.offset + len, 4052 4009 fs_info->sectorsize); 4053 4010 } else { ··· 4121 4078 return 0; 4122 4079 } 4123 4080 4124 - static int wait_ordered_extents(struct btrfs_trans_handle *trans, 4125 - struct inode *inode, 4126 - struct btrfs_root *root, 4127 - const struct extent_map *em, 4128 - const struct list_head *logged_list, 4129 - bool *ordered_io_error) 4081 + static int log_extent_csums(struct btrfs_trans_handle *trans, 4082 + struct btrfs_inode *inode, 4083 + struct btrfs_root *log_root, 4084 + const struct extent_map *em) 4130 4085 { 4131 - struct btrfs_fs_info *fs_info = root->fs_info; 4132 - struct btrfs_ordered_extent *ordered; 4133 - struct btrfs_root *log = root->log_root; 4134 - u64 mod_start = em->mod_start; 4135 - u64 mod_len = em->mod_len; 4136 - const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 4137 4086 u64 csum_offset; 4138 4087 u64 csum_len; 4139 4088 LIST_HEAD(ordered_sums); 4140 4089 int ret = 0; 4141 4090 4142 - *ordered_io_error = false; 4143 - 4144 - if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 4091 + if (inode->flags & BTRFS_INODE_NODATASUM || 4092 + test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 4145 4093 em->block_start == EXTENT_MAP_HOLE) 4146 4094 return 0; 4147 4095 4148 - /* 4149 - * Wait far any ordered extent that covers our extent map. If it 4150 - * finishes without an error, first check and see if our csums are on 4151 - * our outstanding ordered extents. 4152 - */ 4153 - list_for_each_entry(ordered, logged_list, log_list) { 4154 - struct btrfs_ordered_sum *sum; 4155 - 4156 - if (!mod_len) 4157 - break; 4158 - 4159 - if (ordered->file_offset + ordered->len <= mod_start || 4160 - mod_start + mod_len <= ordered->file_offset) 4161 - continue; 4162 - 4163 - if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && 4164 - !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) && 4165 - !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { 4166 - const u64 start = ordered->file_offset; 4167 - const u64 end = ordered->file_offset + ordered->len - 1; 4168 - 4169 - WARN_ON(ordered->inode != inode); 4170 - filemap_fdatawrite_range(inode->i_mapping, start, end); 4171 - } 4172 - 4173 - wait_event(ordered->wait, 4174 - (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) || 4175 - test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); 4176 - 4177 - if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { 4178 - /* 4179 - * Clear the AS_EIO/AS_ENOSPC flags from the inode's 4180 - * i_mapping flags, so that the next fsync won't get 4181 - * an outdated io error too. 4182 - */ 4183 - filemap_check_errors(inode->i_mapping); 4184 - *ordered_io_error = true; 4185 - break; 4186 - } 4187 - /* 4188 - * We are going to copy all the csums on this ordered extent, so 4189 - * go ahead and adjust mod_start and mod_len in case this 4190 - * ordered extent has already been logged. 4191 - */ 4192 - if (ordered->file_offset > mod_start) { 4193 - if (ordered->file_offset + ordered->len >= 4194 - mod_start + mod_len) 4195 - mod_len = ordered->file_offset - mod_start; 4196 - /* 4197 - * If we have this case 4198 - * 4199 - * |--------- logged extent ---------| 4200 - * |----- ordered extent ----| 4201 - * 4202 - * Just don't mess with mod_start and mod_len, we'll 4203 - * just end up logging more csums than we need and it 4204 - * will be ok. 4205 - */ 4206 - } else { 4207 - if (ordered->file_offset + ordered->len < 4208 - mod_start + mod_len) { 4209 - mod_len = (mod_start + mod_len) - 4210 - (ordered->file_offset + ordered->len); 4211 - mod_start = ordered->file_offset + 4212 - ordered->len; 4213 - } else { 4214 - mod_len = 0; 4215 - } 4216 - } 4217 - 4218 - if (skip_csum) 4219 - continue; 4220 - 4221 - /* 4222 - * To keep us from looping for the above case of an ordered 4223 - * extent that falls inside of the logged extent. 4224 - */ 4225 - if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, 4226 - &ordered->flags)) 4227 - continue; 4228 - 4229 - list_for_each_entry(sum, &ordered->list, list) { 4230 - ret = btrfs_csum_file_blocks(trans, log, sum); 4231 - if (ret) 4232 - break; 4233 - } 4234 - } 4235 - 4236 - if (*ordered_io_error || !mod_len || ret || skip_csum) 4237 - return ret; 4238 - 4096 + /* If we're compressed we have to save the entire range of csums. */ 4239 4097 if (em->compress_type) { 4240 4098 csum_offset = 0; 4241 4099 csum_len = max(em->block_len, em->orig_block_len); 4242 4100 } else { 4243 - csum_offset = mod_start - em->start; 4244 - csum_len = mod_len; 4101 + csum_offset = em->mod_start - em->start; 4102 + csum_len = em->mod_len; 4245 4103 } 4246 4104 4247 4105 /* block start is already adjusted for the file extent offset. */ 4248 - ret = btrfs_lookup_csums_range(fs_info->csum_root, 4106 + ret = btrfs_lookup_csums_range(trans->fs_info->csum_root, 4249 4107 em->block_start + csum_offset, 4250 4108 em->block_start + csum_offset + 4251 4109 csum_len - 1, &ordered_sums, 0); ··· 4158 4214 struct btrfs_ordered_sum, 4159 4215 list); 4160 4216 if (!ret) 4161 - ret = btrfs_csum_file_blocks(trans, log, sums); 4217 + ret = btrfs_csum_file_blocks(trans, log_root, sums); 4162 4218 list_del(&sums->list); 4163 4219 kfree(sums); 4164 4220 } ··· 4170 4226 struct btrfs_inode *inode, struct btrfs_root *root, 4171 4227 const struct extent_map *em, 4172 4228 struct btrfs_path *path, 4173 - const struct list_head *logged_list, 4174 4229 struct btrfs_log_ctx *ctx) 4175 4230 { 4176 4231 struct btrfs_root *log = root->log_root; ··· 4181 4238 u64 block_len; 4182 4239 int ret; 4183 4240 int extent_inserted = 0; 4184 - bool ordered_io_err = false; 4185 4241 4186 - ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em, 4187 - logged_list, &ordered_io_err); 4242 + ret = log_extent_csums(trans, inode, log, em); 4188 4243 if (ret) 4189 4244 return ret; 4190 - 4191 - if (ordered_io_err) { 4192 - ctx->io_err = -EIO; 4193 - return ctx->io_err; 4194 - } 4195 4245 4196 4246 btrfs_init_map_token(&token); 4197 4247 ··· 4360 4424 struct btrfs_root *root, 4361 4425 struct btrfs_inode *inode, 4362 4426 struct btrfs_path *path, 4363 - struct list_head *logged_list, 4364 4427 struct btrfs_log_ctx *ctx, 4365 4428 const u64 start, 4366 4429 const u64 end) ··· 4415 4480 } 4416 4481 4417 4482 list_sort(NULL, &extents, extent_cmp); 4418 - btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); 4419 - /* 4420 - * Some ordered extents started by fsync might have completed 4421 - * before we could collect them into the list logged_list, which 4422 - * means they're gone, not in our logged_list nor in the inode's 4423 - * ordered tree. We want the application/user space to know an 4424 - * error happened while attempting to persist file data so that 4425 - * it can take proper action. If such error happened, we leave 4426 - * without writing to the log tree and the fsync must report the 4427 - * file data write error and not commit the current transaction. 4428 - */ 4429 - ret = filemap_check_errors(inode->vfs_inode.i_mapping); 4430 - if (ret) 4431 - ctx->io_err = ret; 4432 4483 process: 4433 4484 while (!list_empty(&extents)) { 4434 4485 em = list_entry(extents.next, struct extent_map, list); ··· 4433 4512 4434 4513 write_unlock(&tree->lock); 4435 4514 4436 - ret = log_one_extent(trans, inode, root, em, path, logged_list, 4437 - ctx); 4515 + ret = log_one_extent(trans, inode, root, em, path, ctx); 4438 4516 write_lock(&tree->lock); 4439 4517 clear_em_logging(tree, em); 4440 4518 free_extent_map(em); ··· 4632 4712 4633 4713 if (btrfs_file_extent_type(leaf, extent) == 4634 4714 BTRFS_FILE_EXTENT_INLINE) { 4635 - len = btrfs_file_extent_inline_len(leaf, 4636 - path->slots[0], 4637 - extent); 4715 + len = btrfs_file_extent_ram_bytes(leaf, extent); 4638 4716 ASSERT(len == i_size || 4639 4717 (len == fs_info->sectorsize && 4640 4718 btrfs_file_extent_compression(leaf, extent) != ··· 4816 4898 struct btrfs_key min_key; 4817 4899 struct btrfs_key max_key; 4818 4900 struct btrfs_root *log = root->log_root; 4819 - LIST_HEAD(logged_list); 4820 4901 u64 last_extent = 0; 4821 4902 int err = 0; 4822 4903 int ret; ··· 5011 5094 * we don't need to do more work nor fallback to 5012 5095 * a transaction commit. 5013 5096 */ 5014 - if (IS_ERR(other_inode) && 5015 - PTR_ERR(other_inode) == -ENOENT) { 5097 + if (other_inode == ERR_PTR(-ENOENT)) { 5016 5098 goto next_key; 5017 5099 } else if (IS_ERR(other_inode)) { 5018 5100 err = PTR_ERR(other_inode); ··· 5151 5235 } 5152 5236 if (fast_search) { 5153 5237 ret = btrfs_log_changed_extents(trans, root, inode, dst_path, 5154 - &logged_list, ctx, start, end); 5238 + ctx, start, end); 5155 5239 if (ret) { 5156 5240 err = ret; 5157 5241 goto out_unlock; ··· 5202 5286 inode->last_log_commit = inode->last_sub_trans; 5203 5287 spin_unlock(&inode->lock); 5204 5288 out_unlock: 5205 - if (unlikely(err)) 5206 - btrfs_put_logged_extents(&logged_list); 5207 - else 5208 - btrfs_submit_logged_extents(&logged_list, log); 5209 5289 mutex_unlock(&inode->log_mutex); 5210 5290 5211 5291 btrfs_free_path(path); ··· 5497 5585 struct btrfs_inode *inode, 5498 5586 struct btrfs_log_ctx *ctx) 5499 5587 { 5500 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 5588 + struct btrfs_fs_info *fs_info = trans->fs_info; 5501 5589 int ret; 5502 5590 struct btrfs_path *path; 5503 5591 struct btrfs_key key; ··· 6032 6120 struct btrfs_inode *inode, struct btrfs_inode *old_dir, 6033 6121 struct dentry *parent) 6034 6122 { 6035 - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 6123 + struct btrfs_fs_info *fs_info = trans->fs_info; 6036 6124 6037 6125 /* 6038 6126 * this will force the logging code to walk the dentry chain

+377 -234

fs/btrfs/volumes.c

··· 8 8 #include <linux/slab.h> 9 9 #include <linux/buffer_head.h> 10 10 #include <linux/blkdev.h> 11 - #include <linux/iocontext.h> 12 - #include <linux/capability.h> 13 11 #include <linux/ratelimit.h> 14 12 #include <linux/kthread.h> 15 13 #include <linux/raid/pq.h> 16 14 #include <linux/semaphore.h> 17 15 #include <linux/uuid.h> 18 16 #include <linux/list_sort.h> 19 - #include <asm/div64.h> 20 17 #include "ctree.h" 21 18 #include "extent_map.h" 22 19 #include "disk-io.h" ··· 631 634 * devices. 632 635 */ 633 636 static void btrfs_free_stale_devices(const char *path, 634 - struct btrfs_device *skip_dev) 637 + struct btrfs_device *skip_device) 635 638 { 636 - struct btrfs_fs_devices *fs_devs, *tmp_fs_devs; 637 - struct btrfs_device *dev, *tmp_dev; 639 + struct btrfs_fs_devices *fs_devices, *tmp_fs_devices; 640 + struct btrfs_device *device, *tmp_device; 638 641 639 - list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) { 640 - 641 - if (fs_devs->opened) 642 + list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) { 643 + mutex_lock(&fs_devices->device_list_mutex); 644 + if (fs_devices->opened) { 645 + mutex_unlock(&fs_devices->device_list_mutex); 642 646 continue; 647 + } 643 648 644 - list_for_each_entry_safe(dev, tmp_dev, 645 - &fs_devs->devices, dev_list) { 649 + list_for_each_entry_safe(device, tmp_device, 650 + &fs_devices->devices, dev_list) { 646 651 int not_found = 0; 647 652 648 - if (skip_dev && skip_dev == dev) 653 + if (skip_device && skip_device == device) 649 654 continue; 650 - if (path && !dev->name) 655 + if (path && !device->name) 651 656 continue; 652 657 653 658 rcu_read_lock(); 654 659 if (path) 655 - not_found = strcmp(rcu_str_deref(dev->name), 660 + not_found = strcmp(rcu_str_deref(device->name), 656 661 path); 657 662 rcu_read_unlock(); 658 663 if (not_found) 659 664 continue; 660 665 661 666 /* delete the stale device */ 662 - if (fs_devs->num_devices == 1) { 663 - btrfs_sysfs_remove_fsid(fs_devs); 664 - list_del(&fs_devs->fs_list); 665 - free_fs_devices(fs_devs); 667 + fs_devices->num_devices--; 668 + list_del(&device->dev_list); 669 + btrfs_free_device(device); 670 + 671 + if (fs_devices->num_devices == 0) 666 672 break; 667 - } else { 668 - fs_devs->num_devices--; 669 - list_del(&dev->dev_list); 670 - btrfs_free_device(dev); 671 - } 673 + } 674 + mutex_unlock(&fs_devices->device_list_mutex); 675 + if (fs_devices->num_devices == 0) { 676 + btrfs_sysfs_remove_fsid(fs_devices); 677 + list_del(&fs_devices->fs_list); 678 + free_fs_devices(fs_devices); 672 679 } 673 680 } 674 681 } ··· 751 750 * error pointer when failed 752 751 */ 753 752 static noinline struct btrfs_device *device_list_add(const char *path, 754 - struct btrfs_super_block *disk_super) 753 + struct btrfs_super_block *disk_super, 754 + bool *new_device_added) 755 755 { 756 756 struct btrfs_device *device; 757 757 struct btrfs_fs_devices *fs_devices; ··· 766 764 if (IS_ERR(fs_devices)) 767 765 return ERR_CAST(fs_devices); 768 766 767 + mutex_lock(&fs_devices->device_list_mutex); 769 768 list_add(&fs_devices->fs_list, &fs_uuids); 770 769 771 770 device = NULL; 772 771 } else { 772 + mutex_lock(&fs_devices->device_list_mutex); 773 773 device = find_device(fs_devices, devid, 774 774 disk_super->dev_item.uuid); 775 775 } 776 776 777 777 if (!device) { 778 - if (fs_devices->opened) 778 + if (fs_devices->opened) { 779 + mutex_unlock(&fs_devices->device_list_mutex); 779 780 return ERR_PTR(-EBUSY); 781 + } 780 782 781 783 device = btrfs_alloc_device(NULL, &devid, 782 784 disk_super->dev_item.uuid); 783 785 if (IS_ERR(device)) { 786 + mutex_unlock(&fs_devices->device_list_mutex); 784 787 /* we can safely leave the fs_devices entry around */ 785 788 return device; 786 789 } ··· 793 786 name = rcu_string_strdup(path, GFP_NOFS); 794 787 if (!name) { 795 788 btrfs_free_device(device); 789 + mutex_unlock(&fs_devices->device_list_mutex); 796 790 return ERR_PTR(-ENOMEM); 797 791 } 798 792 rcu_assign_pointer(device->name, name); 799 793 800 - mutex_lock(&fs_devices->device_list_mutex); 801 794 list_add_rcu(&device->dev_list, &fs_devices->devices); 802 795 fs_devices->num_devices++; 803 - mutex_unlock(&fs_devices->device_list_mutex); 804 796 805 797 device->fs_devices = fs_devices; 806 - btrfs_free_stale_devices(path, device); 798 + *new_device_added = true; 807 799 808 800 if (disk_super->label[0]) 809 801 pr_info("BTRFS: device label %s devid %llu transid %llu %s\n", ··· 846 840 * with larger generation number or the last-in if 847 841 * generation are equal. 848 842 */ 843 + mutex_unlock(&fs_devices->device_list_mutex); 849 844 return ERR_PTR(-EEXIST); 850 845 } 851 846 852 847 name = rcu_string_strdup(path, GFP_NOFS); 853 - if (!name) 848 + if (!name) { 849 + mutex_unlock(&fs_devices->device_list_mutex); 854 850 return ERR_PTR(-ENOMEM); 851 + } 855 852 rcu_string_free(device->name); 856 853 rcu_assign_pointer(device->name, name); 857 854 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { ··· 874 865 875 866 fs_devices->total_devices = btrfs_super_num_devices(disk_super); 876 867 868 + mutex_unlock(&fs_devices->device_list_mutex); 877 869 return device; 878 870 } 879 871 ··· 1014 1004 blkdev_put(device->bdev, device->mode); 1015 1005 } 1016 1006 1017 - static void btrfs_prepare_close_one_device(struct btrfs_device *device) 1007 + static void btrfs_close_one_device(struct btrfs_device *device) 1018 1008 { 1019 1009 struct btrfs_fs_devices *fs_devices = device->fs_devices; 1020 1010 struct btrfs_device *new_device; ··· 1032 1022 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) 1033 1023 fs_devices->missing_devices--; 1034 1024 1025 + btrfs_close_bdev(device); 1026 + 1035 1027 new_device = btrfs_alloc_device(NULL, &device->devid, 1036 1028 device->uuid); 1037 1029 BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ ··· 1047 1035 1048 1036 list_replace_rcu(&device->dev_list, &new_device->dev_list); 1049 1037 new_device->fs_devices = device->fs_devices; 1038 + 1039 + call_rcu(&device->rcu, free_device_rcu); 1050 1040 } 1051 1041 1052 1042 static int close_fs_devices(struct btrfs_fs_devices *fs_devices) 1053 1043 { 1054 1044 struct btrfs_device *device, *tmp; 1055 - struct list_head pending_put; 1056 - 1057 - INIT_LIST_HEAD(&pending_put); 1058 1045 1059 1046 if (--fs_devices->opened > 0) 1060 1047 return 0; 1061 1048 1062 1049 mutex_lock(&fs_devices->device_list_mutex); 1063 1050 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) { 1064 - btrfs_prepare_close_one_device(device); 1065 - list_add(&device->dev_list, &pending_put); 1051 + btrfs_close_one_device(device); 1066 1052 } 1067 1053 mutex_unlock(&fs_devices->device_list_mutex); 1068 - 1069 - /* 1070 - * btrfs_show_devname() is using the device_list_mutex, 1071 - * sometimes call to blkdev_put() leads vfs calling 1072 - * into this func. So do put outside of device_list_mutex, 1073 - * as of now. 1074 - */ 1075 - while (!list_empty(&pending_put)) { 1076 - device = list_first_entry(&pending_put, 1077 - struct btrfs_device, dev_list); 1078 - list_del(&device->dev_list); 1079 - btrfs_close_bdev(device); 1080 - call_rcu(&device->rcu, free_device_rcu); 1081 - } 1082 1054 1083 1055 WARN_ON(fs_devices->open_devices); 1084 1056 WARN_ON(fs_devices->rw_devices); ··· 1142 1146 { 1143 1147 int ret; 1144 1148 1145 - mutex_lock(&uuid_mutex); 1149 + lockdep_assert_held(&uuid_mutex); 1150 + 1146 1151 mutex_lock(&fs_devices->device_list_mutex); 1147 1152 if (fs_devices->opened) { 1148 1153 fs_devices->opened++; ··· 1153 1156 ret = open_fs_devices(fs_devices, flags, holder); 1154 1157 } 1155 1158 mutex_unlock(&fs_devices->device_list_mutex); 1156 - mutex_unlock(&uuid_mutex); 1157 1159 1158 1160 return ret; 1159 1161 } ··· 1213 1217 * and we are not allowed to call set_blocksize during the scan. The superblock 1214 1218 * is read via pagecache 1215 1219 */ 1216 - int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 1217 - struct btrfs_fs_devices **fs_devices_ret) 1220 + struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, 1221 + void *holder) 1218 1222 { 1219 1223 struct btrfs_super_block *disk_super; 1220 - struct btrfs_device *device; 1224 + bool new_device_added = false; 1225 + struct btrfs_device *device = NULL; 1221 1226 struct block_device *bdev; 1222 1227 struct page *page; 1223 - int ret = 0; 1224 1228 u64 bytenr; 1229 + 1230 + lockdep_assert_held(&uuid_mutex); 1225 1231 1226 1232 /* 1227 1233 * we would like to check all the supers, but that would make ··· 1236 1238 1237 1239 bdev = blkdev_get_by_path(path, flags, holder); 1238 1240 if (IS_ERR(bdev)) 1239 - return PTR_ERR(bdev); 1241 + return ERR_CAST(bdev); 1240 1242 1241 1243 if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) { 1242 - ret = -EINVAL; 1244 + device = ERR_PTR(-EINVAL); 1243 1245 goto error_bdev_put; 1244 1246 } 1245 1247 1246 - mutex_lock(&uuid_mutex); 1247 - device = device_list_add(path, disk_super); 1248 - if (IS_ERR(device)) 1249 - ret = PTR_ERR(device); 1250 - else 1251 - *fs_devices_ret = device->fs_devices; 1252 - mutex_unlock(&uuid_mutex); 1248 + device = device_list_add(path, disk_super, &new_device_added); 1249 + if (!IS_ERR(device)) { 1250 + if (new_device_added) 1251 + btrfs_free_stale_devices(path, device); 1252 + } 1253 1253 1254 1254 btrfs_release_disk_super(page); 1255 1255 1256 1256 error_bdev_put: 1257 1257 blkdev_put(bdev, flags); 1258 1258 1259 - return ret; 1260 - } 1261 - 1262 - /* helper to account the used device space in the range */ 1263 - int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 1264 - u64 end, u64 *length) 1265 - { 1266 - struct btrfs_key key; 1267 - struct btrfs_root *root = device->fs_info->dev_root; 1268 - struct btrfs_dev_extent *dev_extent; 1269 - struct btrfs_path *path; 1270 - u64 extent_end; 1271 - int ret; 1272 - int slot; 1273 - struct extent_buffer *l; 1274 - 1275 - *length = 0; 1276 - 1277 - if (start >= device->total_bytes || 1278 - test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) 1279 - return 0; 1280 - 1281 - path = btrfs_alloc_path(); 1282 - if (!path) 1283 - return -ENOMEM; 1284 - path->reada = READA_FORWARD; 1285 - 1286 - key.objectid = device->devid; 1287 - key.offset = start; 1288 - key.type = BTRFS_DEV_EXTENT_KEY; 1289 - 1290 - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1291 - if (ret < 0) 1292 - goto out; 1293 - if (ret > 0) { 1294 - ret = btrfs_previous_item(root, path, key.objectid, key.type); 1295 - if (ret < 0) 1296 - goto out; 1297 - } 1298 - 1299 - while (1) { 1300 - l = path->nodes[0]; 1301 - slot = path->slots[0]; 1302 - if (slot >= btrfs_header_nritems(l)) { 1303 - ret = btrfs_next_leaf(root, path); 1304 - if (ret == 0) 1305 - continue; 1306 - if (ret < 0) 1307 - goto out; 1308 - 1309 - break; 1310 - } 1311 - btrfs_item_key_to_cpu(l, &key, slot); 1312 - 1313 - if (key.objectid < device->devid) 1314 - goto next; 1315 - 1316 - if (key.objectid > device->devid) 1317 - break; 1318 - 1319 - if (key.type != BTRFS_DEV_EXTENT_KEY) 1320 - goto next; 1321 - 1322 - dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 1323 - extent_end = key.offset + btrfs_dev_extent_length(l, 1324 - dev_extent); 1325 - if (key.offset <= start && extent_end > end) { 1326 - *length = end - start + 1; 1327 - break; 1328 - } else if (key.offset <= start && extent_end > start) 1329 - *length += extent_end - start; 1330 - else if (key.offset > start && extent_end <= end) 1331 - *length += extent_end - key.offset; 1332 - else if (key.offset > start && key.offset <= end) { 1333 - *length += end - key.offset + 1; 1334 - break; 1335 - } else if (key.offset > end) 1336 - break; 1337 - 1338 - next: 1339 - path->slots[0]++; 1340 - } 1341 - ret = 0; 1342 - out: 1343 - btrfs_free_path(path); 1344 - return ret; 1259 + return device; 1345 1260 } 1346 1261 1347 1262 static int contains_pending_extent(struct btrfs_transaction *transaction, ··· 1666 1755 * the btrfs_device struct should be fully filled in 1667 1756 */ 1668 1757 static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, 1669 - struct btrfs_fs_info *fs_info, 1670 1758 struct btrfs_device *device) 1671 1759 { 1672 - struct btrfs_root *root = fs_info->chunk_root; 1673 1760 int ret; 1674 1761 struct btrfs_path *path; 1675 1762 struct btrfs_dev_item *dev_item; ··· 1683 1774 key.type = BTRFS_DEV_ITEM_KEY; 1684 1775 key.offset = device->devid; 1685 1776 1686 - ret = btrfs_insert_empty_item(trans, root, path, &key, 1687 - sizeof(*dev_item)); 1777 + ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path, 1778 + &key, sizeof(*dev_item)); 1688 1779 if (ret) 1689 1780 goto out; 1690 1781 ··· 1709 1800 ptr = btrfs_device_uuid(dev_item); 1710 1801 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 1711 1802 ptr = btrfs_device_fsid(dev_item); 1712 - write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE); 1803 + write_extent_buffer(leaf, trans->fs_info->fsid, ptr, BTRFS_FSID_SIZE); 1713 1804 btrfs_mark_buffer_dirty(leaf); 1714 1805 1715 1806 ret = 0; ··· 1833 1924 * where this function called, there should be always be another device (or 1834 1925 * this_dev) which is active. 1835 1926 */ 1836 - void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, 1837 - struct btrfs_device *device, struct btrfs_device *this_dev) 1927 + void btrfs_assign_next_active_device(struct btrfs_device *device, 1928 + struct btrfs_device *this_dev) 1838 1929 { 1930 + struct btrfs_fs_info *fs_info = device->fs_info; 1839 1931 struct btrfs_device *next_device; 1840 1932 1841 1933 if (this_dev) ··· 1939 2029 1940 2030 cur_devices->num_devices--; 1941 2031 cur_devices->total_devices--; 2032 + /* Update total_devices of the parent fs_devices if it's seed */ 2033 + if (cur_devices != fs_devices) 2034 + fs_devices->total_devices--; 1942 2035 1943 2036 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) 1944 2037 cur_devices->missing_devices--; 1945 2038 1946 - btrfs_assign_next_active_device(fs_info, device, NULL); 2039 + btrfs_assign_next_active_device(device, NULL); 1947 2040 1948 2041 if (device->bdev) { 1949 2042 cur_devices->open_devices--; ··· 1997 2084 goto out; 1998 2085 } 1999 2086 2000 - void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, 2001 - struct btrfs_device *srcdev) 2087 + void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) 2002 2088 { 2003 2089 struct btrfs_fs_devices *fs_devices; 2004 2090 2005 - lockdep_assert_held(&fs_info->fs_devices->device_list_mutex); 2091 + lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex); 2006 2092 2007 2093 /* 2008 2094 * in case of fs with no seed, srcdev->fs_devices will point ··· 2063 2151 } 2064 2152 } 2065 2153 2066 - void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, 2067 - struct btrfs_device *tgtdev) 2154 + void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) 2068 2155 { 2069 - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 2156 + struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices; 2070 2157 2071 2158 WARN_ON(!tgtdev); 2072 2159 mutex_lock(&fs_devices->device_list_mutex); ··· 2077 2166 2078 2167 fs_devices->num_devices--; 2079 2168 2080 - btrfs_assign_next_active_device(fs_info, tgtdev, NULL); 2169 + btrfs_assign_next_active_device(tgtdev, NULL); 2081 2170 2082 2171 list_del_rcu(&tgtdev->dev_list); 2083 2172 ··· 2208 2297 INIT_LIST_HEAD(&seed_devices->alloc_list); 2209 2298 mutex_init(&seed_devices->device_list_mutex); 2210 2299 2211 - mutex_lock(&fs_info->fs_devices->device_list_mutex); 2300 + mutex_lock(&fs_devices->device_list_mutex); 2212 2301 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, 2213 2302 synchronize_rcu); 2214 2303 list_for_each_entry(device, &seed_devices->devices, dev_list) ··· 2228 2317 generate_random_uuid(fs_devices->fsid); 2229 2318 memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 2230 2319 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 2231 - mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2320 + mutex_unlock(&fs_devices->device_list_mutex); 2232 2321 2233 2322 super_flags = btrfs_super_flags(disk_super) & 2234 2323 ~BTRFS_SUPER_FLAG_SEEDING; ··· 2318 2407 struct btrfs_trans_handle *trans; 2319 2408 struct btrfs_device *device; 2320 2409 struct block_device *bdev; 2321 - struct list_head *devices; 2322 2410 struct super_block *sb = fs_info->sb; 2323 2411 struct rcu_string *name; 2324 - u64 tmp; 2412 + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 2413 + u64 orig_super_total_bytes; 2414 + u64 orig_super_num_devices; 2325 2415 int seeding_dev = 0; 2326 2416 int ret = 0; 2327 2417 bool unlocked = false; 2328 2418 2329 - if (sb_rdonly(sb) && !fs_info->fs_devices->seeding) 2419 + if (sb_rdonly(sb) && !fs_devices->seeding) 2330 2420 return -EROFS; 2331 2421 2332 2422 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, ··· 2335 2423 if (IS_ERR(bdev)) 2336 2424 return PTR_ERR(bdev); 2337 2425 2338 - if (fs_info->fs_devices->seeding) { 2426 + if (fs_devices->seeding) { 2339 2427 seeding_dev = 1; 2340 2428 down_write(&sb->s_umount); 2341 2429 mutex_lock(&uuid_mutex); ··· 2343 2431 2344 2432 filemap_write_and_wait(bdev->bd_inode->i_mapping); 2345 2433 2346 - devices = &fs_info->fs_devices->devices; 2347 - 2348 - mutex_lock(&fs_info->fs_devices->device_list_mutex); 2349 - list_for_each_entry(device, devices, dev_list) { 2434 + mutex_lock(&fs_devices->device_list_mutex); 2435 + list_for_each_entry(device, &fs_devices->devices, dev_list) { 2350 2436 if (device->bdev == bdev) { 2351 2437 ret = -EEXIST; 2352 2438 mutex_unlock( 2353 - &fs_info->fs_devices->device_list_mutex); 2439 + &fs_devices->device_list_mutex); 2354 2440 goto error; 2355 2441 } 2356 2442 } 2357 - mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2443 + mutex_unlock(&fs_devices->device_list_mutex); 2358 2444 2359 2445 device = btrfs_alloc_device(fs_info, NULL, NULL); 2360 2446 if (IS_ERR(device)) { ··· 2401 2491 } 2402 2492 } 2403 2493 2404 - device->fs_devices = fs_info->fs_devices; 2494 + device->fs_devices = fs_devices; 2405 2495 2406 - mutex_lock(&fs_info->fs_devices->device_list_mutex); 2496 + mutex_lock(&fs_devices->device_list_mutex); 2407 2497 mutex_lock(&fs_info->chunk_mutex); 2408 - list_add_rcu(&device->dev_list, &fs_info->fs_devices->devices); 2409 - list_add(&device->dev_alloc_list, 2410 - &fs_info->fs_devices->alloc_list); 2411 - fs_info->fs_devices->num_devices++; 2412 - fs_info->fs_devices->open_devices++; 2413 - fs_info->fs_devices->rw_devices++; 2414 - fs_info->fs_devices->total_devices++; 2415 - fs_info->fs_devices->total_rw_bytes += device->total_bytes; 2498 + list_add_rcu(&device->dev_list, &fs_devices->devices); 2499 + list_add(&device->dev_alloc_list, &fs_devices->alloc_list); 2500 + fs_devices->num_devices++; 2501 + fs_devices->open_devices++; 2502 + fs_devices->rw_devices++; 2503 + fs_devices->total_devices++; 2504 + fs_devices->total_rw_bytes += device->total_bytes; 2416 2505 2417 2506 atomic64_add(device->total_bytes, &fs_info->free_chunk_space); 2418 2507 2419 2508 if (!blk_queue_nonrot(q)) 2420 - fs_info->fs_devices->rotating = 1; 2509 + fs_devices->rotating = 1; 2421 2510 2422 - tmp = btrfs_super_total_bytes(fs_info->super_copy); 2511 + orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy); 2423 2512 btrfs_set_super_total_bytes(fs_info->super_copy, 2424 - round_down(tmp + device->total_bytes, fs_info->sectorsize)); 2513 + round_down(orig_super_total_bytes + device->total_bytes, 2514 + fs_info->sectorsize)); 2425 2515 2426 - tmp = btrfs_super_num_devices(fs_info->super_copy); 2427 - btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1); 2516 + orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy); 2517 + btrfs_set_super_num_devices(fs_info->super_copy, 2518 + orig_super_num_devices + 1); 2428 2519 2429 2520 /* add sysfs device entry */ 2430 - btrfs_sysfs_add_device_link(fs_info->fs_devices, device); 2521 + btrfs_sysfs_add_device_link(fs_devices, device); 2431 2522 2432 2523 /* 2433 2524 * we've got more storage, clear any full flags on the space ··· 2437 2526 btrfs_clear_space_info_full(fs_info); 2438 2527 2439 2528 mutex_unlock(&fs_info->chunk_mutex); 2440 - mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2529 + mutex_unlock(&fs_devices->device_list_mutex); 2441 2530 2442 2531 if (seeding_dev) { 2443 2532 mutex_lock(&fs_info->chunk_mutex); ··· 2449 2538 } 2450 2539 } 2451 2540 2452 - ret = btrfs_add_dev_item(trans, fs_info, device); 2541 + ret = btrfs_add_dev_item(trans, device); 2453 2542 if (ret) { 2454 2543 btrfs_abort_transaction(trans, ret); 2455 2544 goto error_sysfs; ··· 2469 2558 */ 2470 2559 snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", 2471 2560 fs_info->fsid); 2472 - if (kobject_rename(&fs_info->fs_devices->fsid_kobj, fsid_buf)) 2561 + if (kobject_rename(&fs_devices->fsid_kobj, fsid_buf)) 2473 2562 btrfs_warn(fs_info, 2474 2563 "sysfs: failed to create fsid for sprout"); 2475 2564 } ··· 2504 2593 return ret; 2505 2594 2506 2595 error_sysfs: 2507 - btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); 2596 + btrfs_sysfs_rm_device_link(fs_devices, device); 2597 + mutex_lock(&fs_info->fs_devices->device_list_mutex); 2598 + mutex_lock(&fs_info->chunk_mutex); 2599 + list_del_rcu(&device->dev_list); 2600 + list_del(&device->dev_alloc_list); 2601 + fs_info->fs_devices->num_devices--; 2602 + fs_info->fs_devices->open_devices--; 2603 + fs_info->fs_devices->rw_devices--; 2604 + fs_info->fs_devices->total_devices--; 2605 + fs_info->fs_devices->total_rw_bytes -= device->total_bytes; 2606 + atomic64_sub(device->total_bytes, &fs_info->free_chunk_space); 2607 + btrfs_set_super_total_bytes(fs_info->super_copy, 2608 + orig_super_total_bytes); 2609 + btrfs_set_super_num_devices(fs_info->super_copy, 2610 + orig_super_num_devices); 2611 + mutex_unlock(&fs_info->chunk_mutex); 2612 + mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2508 2613 error_trans: 2509 2614 if (seeding_dev) 2510 2615 sb->s_flags |= SB_RDONLY; ··· 2624 2697 return btrfs_update_device(trans, device); 2625 2698 } 2626 2699 2627 - static int btrfs_free_chunk(struct btrfs_trans_handle *trans, 2628 - struct btrfs_fs_info *fs_info, u64 chunk_offset) 2700 + static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) 2629 2701 { 2702 + struct btrfs_fs_info *fs_info = trans->fs_info; 2630 2703 struct btrfs_root *root = fs_info->chunk_root; 2631 2704 int ret; 2632 2705 struct btrfs_path *path; ··· 2735 2808 return em; 2736 2809 } 2737 2810 2738 - int btrfs_remove_chunk(struct btrfs_trans_handle *trans, 2739 - struct btrfs_fs_info *fs_info, u64 chunk_offset) 2811 + int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) 2740 2812 { 2813 + struct btrfs_fs_info *fs_info = trans->fs_info; 2741 2814 struct extent_map *em; 2742 2815 struct map_lookup *map; 2743 2816 u64 dev_extent_len = 0; ··· 2756 2829 } 2757 2830 map = em->map_lookup; 2758 2831 mutex_lock(&fs_info->chunk_mutex); 2759 - check_system_chunk(trans, fs_info, map->type); 2832 + check_system_chunk(trans, map->type); 2760 2833 mutex_unlock(&fs_info->chunk_mutex); 2761 2834 2762 2835 /* ··· 2796 2869 } 2797 2870 mutex_unlock(&fs_devices->device_list_mutex); 2798 2871 2799 - ret = btrfs_free_chunk(trans, fs_info, chunk_offset); 2872 + ret = btrfs_free_chunk(trans, chunk_offset); 2800 2873 if (ret) { 2801 2874 btrfs_abort_transaction(trans, ret); 2802 2875 goto out; ··· 2812 2885 } 2813 2886 } 2814 2887 2815 - ret = btrfs_remove_block_group(trans, fs_info, chunk_offset, em); 2888 + ret = btrfs_remove_block_group(trans, chunk_offset, em); 2816 2889 if (ret) { 2817 2890 btrfs_abort_transaction(trans, ret); 2818 2891 goto out; ··· 2877 2950 * step two, delete the device extents and the 2878 2951 * chunk tree entries 2879 2952 */ 2880 - ret = btrfs_remove_chunk(trans, fs_info, chunk_offset); 2953 + ret = btrfs_remove_chunk(trans, chunk_offset); 2881 2954 btrfs_end_transaction(trans); 2882 2955 return ret; 2883 2956 } ··· 2986 3059 if (IS_ERR(trans)) 2987 3060 return PTR_ERR(trans); 2988 3061 2989 - ret = btrfs_force_chunk_alloc(trans, fs_info, 3062 + ret = btrfs_force_chunk_alloc(trans, 2990 3063 BTRFS_BLOCK_GROUP_DATA); 2991 3064 btrfs_end_transaction(trans); 2992 3065 if (ret < 0) ··· 4619 4692 4620 4693 if (type & BTRFS_BLOCK_GROUP_DATA) { 4621 4694 max_stripe_size = SZ_1G; 4622 - max_chunk_size = 10 * max_stripe_size; 4695 + max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; 4623 4696 if (!devs_max) 4624 4697 devs_max = BTRFS_MAX_DEVS(info); 4625 4698 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { ··· 4827 4900 refcount_inc(&em->refs); 4828 4901 write_unlock(&em_tree->lock); 4829 4902 4830 - ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes); 4903 + ret = btrfs_make_block_group(trans, 0, type, start, num_bytes); 4831 4904 if (ret) 4832 4905 goto error_del_extent; 4833 4906 ··· 4861 4934 } 4862 4935 4863 4936 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, 4864 - struct btrfs_fs_info *fs_info, 4865 - u64 chunk_offset, u64 chunk_size) 4937 + u64 chunk_offset, u64 chunk_size) 4866 4938 { 4939 + struct btrfs_fs_info *fs_info = trans->fs_info; 4867 4940 struct btrfs_root *extent_root = fs_info->extent_root; 4868 4941 struct btrfs_root *chunk_root = fs_info->chunk_root; 4869 4942 struct btrfs_key key; ··· 4965 5038 * require modifying the chunk tree. This division is important for the 4966 5039 * bootstrap process of adding storage to a seed btrfs. 4967 5040 */ 4968 - int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 4969 - struct btrfs_fs_info *fs_info, u64 type) 5041 + int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type) 4970 5042 { 4971 5043 u64 chunk_offset; 4972 5044 4973 - lockdep_assert_held(&fs_info->chunk_mutex); 4974 - chunk_offset = find_next_chunk(fs_info); 5045 + lockdep_assert_held(&trans->fs_info->chunk_mutex); 5046 + chunk_offset = find_next_chunk(trans->fs_info); 4975 5047 return __btrfs_alloc_chunk(trans, chunk_offset, type); 4976 5048 } 4977 5049 ··· 5101 5175 /* 5102 5176 * There could be two corrupted data stripes, we need 5103 5177 * to loop retry in order to rebuild the correct data. 5104 - * 5178 + * 5105 5179 * Fail a stripe at a time on every retry except the 5106 5180 * stripe under reconstruction. 5107 5181 */ ··· 6113 6187 btrfs_io_bio(bio)->stripe_index = dev_nr; 6114 6188 bio->bi_end_io = btrfs_end_bio; 6115 6189 bio->bi_iter.bi_sector = physical >> 9; 6116 - #ifdef DEBUG 6117 - { 6118 - struct rcu_string *name; 6119 - 6120 - rcu_read_lock(); 6121 - name = rcu_dereference(dev->name); 6122 - btrfs_debug(fs_info, 6123 - "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", 6124 - bio_op(bio), bio->bi_opf, 6125 - (u64)bio->bi_iter.bi_sector, 6126 - (u_long)dev->bdev->bd_dev, name->str, dev->devid, 6127 - bio->bi_iter.bi_size); 6128 - rcu_read_unlock(); 6129 - } 6130 - #endif 6190 + btrfs_debug_in_rcu(fs_info, 6191 + "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", 6192 + bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector, 6193 + (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid, 6194 + bio->bi_iter.bi_size); 6131 6195 bio_set_dev(bio, dev->bdev); 6132 6196 6133 6197 btrfs_bio_counter_inc_noblocked(fs_info); ··· 6319 6403 u16 num_stripes; 6320 6404 u16 sub_stripes; 6321 6405 u64 type; 6406 + u64 features; 6407 + bool mixed = false; 6322 6408 6323 6409 length = btrfs_chunk_length(leaf, chunk); 6324 6410 stripe_len = btrfs_chunk_stripe_len(leaf, chunk); ··· 6359 6441 btrfs_chunk_type(leaf, chunk)); 6360 6442 return -EIO; 6361 6443 } 6444 + 6445 + if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { 6446 + btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); 6447 + return -EIO; 6448 + } 6449 + 6450 + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && 6451 + (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { 6452 + btrfs_err(fs_info, 6453 + "system chunk with data or metadata type: 0x%llx", type); 6454 + return -EIO; 6455 + } 6456 + 6457 + features = btrfs_super_incompat_flags(fs_info->super_copy); 6458 + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 6459 + mixed = true; 6460 + 6461 + if (!mixed) { 6462 + if ((type & BTRFS_BLOCK_GROUP_METADATA) && 6463 + (type & BTRFS_BLOCK_GROUP_DATA)) { 6464 + btrfs_err(fs_info, 6465 + "mixed chunk type in non-mixed mode: 0x%llx", type); 6466 + return -EIO; 6467 + } 6468 + } 6469 + 6362 6470 if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || 6363 6471 (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || 6364 6472 (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || ··· 6471 6527 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 6472 6528 map->type = btrfs_chunk_type(leaf, chunk); 6473 6529 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); 6530 + map->verified_stripes = 0; 6474 6531 for (i = 0; i < num_stripes; i++) { 6475 6532 map->stripes[i].physical = 6476 6533 btrfs_stripe_offset_nr(leaf, chunk, i); ··· 6508 6563 write_lock(&map_tree->map_tree.lock); 6509 6564 ret = add_extent_mapping(&map_tree->map_tree, em, 0); 6510 6565 write_unlock(&map_tree->map_tree.lock); 6511 - BUG_ON(ret); /* Tree corruption */ 6566 + if (ret < 0) { 6567 + btrfs_err(fs_info, 6568 + "failed to add chunk map, start=%llu len=%llu: %d", 6569 + em->start, em->len, ret); 6570 + } 6512 6571 free_extent_map(em); 6513 6572 6514 - return 0; 6573 + return ret; 6515 6574 } 6516 6575 6517 6576 static void fill_device_from_item(struct extent_buffer *leaf, ··· 7057 7108 } 7058 7109 7059 7110 static int update_dev_stat_item(struct btrfs_trans_handle *trans, 7060 - struct btrfs_fs_info *fs_info, 7061 7111 struct btrfs_device *device) 7062 7112 { 7113 + struct btrfs_fs_info *fs_info = trans->fs_info; 7063 7114 struct btrfs_root *dev_root = fs_info->dev_root; 7064 7115 struct btrfs_path *path; 7065 7116 struct btrfs_key key; ··· 7152 7203 */ 7153 7204 smp_rmb(); 7154 7205 7155 - ret = update_dev_stat_item(trans, fs_info, device); 7206 + ret = update_dev_stat_item(trans, device); 7156 7207 if (!ret) 7157 7208 atomic_sub(stats_cnt, &device->dev_stats_ccnt); 7158 7209 } ··· 7330 7381 fs_devices->fs_info = NULL; 7331 7382 fs_devices = fs_devices->seed; 7332 7383 } 7384 + } 7385 + 7386 + /* 7387 + * Multiplicity factor for simple profiles: DUP, RAID1-like and RAID10. 7388 + */ 7389 + int btrfs_bg_type_to_factor(u64 flags) 7390 + { 7391 + if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 7392 + BTRFS_BLOCK_GROUP_RAID10)) 7393 + return 2; 7394 + return 1; 7395 + } 7396 + 7397 + 7398 + static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) 7399 + { 7400 + int index = btrfs_bg_flags_to_raid_index(type); 7401 + int ncopies = btrfs_raid_array[index].ncopies; 7402 + int data_stripes; 7403 + 7404 + switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { 7405 + case BTRFS_BLOCK_GROUP_RAID5: 7406 + data_stripes = num_stripes - 1; 7407 + break; 7408 + case BTRFS_BLOCK_GROUP_RAID6: 7409 + data_stripes = num_stripes - 2; 7410 + break; 7411 + default: 7412 + data_stripes = num_stripes / ncopies; 7413 + break; 7414 + } 7415 + return div_u64(chunk_len, data_stripes); 7416 + } 7417 + 7418 + static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, 7419 + u64 chunk_offset, u64 devid, 7420 + u64 physical_offset, u64 physical_len) 7421 + { 7422 + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; 7423 + struct extent_map *em; 7424 + struct map_lookup *map; 7425 + u64 stripe_len; 7426 + bool found = false; 7427 + int ret = 0; 7428 + int i; 7429 + 7430 + read_lock(&em_tree->lock); 7431 + em = lookup_extent_mapping(em_tree, chunk_offset, 1); 7432 + read_unlock(&em_tree->lock); 7433 + 7434 + if (!em) { 7435 + btrfs_err(fs_info, 7436 + "dev extent physical offset %llu on devid %llu doesn't have corresponding chunk", 7437 + physical_offset, devid); 7438 + ret = -EUCLEAN; 7439 + goto out; 7440 + } 7441 + 7442 + map = em->map_lookup; 7443 + stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes); 7444 + if (physical_len != stripe_len) { 7445 + btrfs_err(fs_info, 7446 + "dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu", 7447 + physical_offset, devid, em->start, physical_len, 7448 + stripe_len); 7449 + ret = -EUCLEAN; 7450 + goto out; 7451 + } 7452 + 7453 + for (i = 0; i < map->num_stripes; i++) { 7454 + if (map->stripes[i].dev->devid == devid && 7455 + map->stripes[i].physical == physical_offset) { 7456 + found = true; 7457 + if (map->verified_stripes >= map->num_stripes) { 7458 + btrfs_err(fs_info, 7459 + "too many dev extents for chunk %llu found", 7460 + em->start); 7461 + ret = -EUCLEAN; 7462 + goto out; 7463 + } 7464 + map->verified_stripes++; 7465 + break; 7466 + } 7467 + } 7468 + if (!found) { 7469 + btrfs_err(fs_info, 7470 + "dev extent physical offset %llu devid %llu has no corresponding chunk", 7471 + physical_offset, devid); 7472 + ret = -EUCLEAN; 7473 + } 7474 + out: 7475 + free_extent_map(em); 7476 + return ret; 7477 + } 7478 + 7479 + static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) 7480 + { 7481 + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; 7482 + struct extent_map *em; 7483 + struct rb_node *node; 7484 + int ret = 0; 7485 + 7486 + read_lock(&em_tree->lock); 7487 + for (node = rb_first(&em_tree->map); node; node = rb_next(node)) { 7488 + em = rb_entry(node, struct extent_map, rb_node); 7489 + if (em->map_lookup->num_stripes != 7490 + em->map_lookup->verified_stripes) { 7491 + btrfs_err(fs_info, 7492 + "chunk %llu has missing dev extent, have %d expect %d", 7493 + em->start, em->map_lookup->verified_stripes, 7494 + em->map_lookup->num_stripes); 7495 + ret = -EUCLEAN; 7496 + goto out; 7497 + } 7498 + } 7499 + out: 7500 + read_unlock(&em_tree->lock); 7501 + return ret; 7502 + } 7503 + 7504 + /* 7505 + * Ensure that all dev extents are mapped to correct chunk, otherwise 7506 + * later chunk allocation/free would cause unexpected behavior. 7507 + * 7508 + * NOTE: This will iterate through the whole device tree, which should be of 7509 + * the same size level as the chunk tree. This slightly increases mount time. 7510 + */ 7511 + int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) 7512 + { 7513 + struct btrfs_path *path; 7514 + struct btrfs_root *root = fs_info->dev_root; 7515 + struct btrfs_key key; 7516 + int ret = 0; 7517 + 7518 + key.objectid = 1; 7519 + key.type = BTRFS_DEV_EXTENT_KEY; 7520 + key.offset = 0; 7521 + 7522 + path = btrfs_alloc_path(); 7523 + if (!path) 7524 + return -ENOMEM; 7525 + 7526 + path->reada = READA_FORWARD; 7527 + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 7528 + if (ret < 0) 7529 + goto out; 7530 + 7531 + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 7532 + ret = btrfs_next_item(root, path); 7533 + if (ret < 0) 7534 + goto out; 7535 + /* No dev extents at all? Not good */ 7536 + if (ret > 0) { 7537 + ret = -EUCLEAN; 7538 + goto out; 7539 + } 7540 + } 7541 + while (1) { 7542 + struct extent_buffer *leaf = path->nodes[0]; 7543 + struct btrfs_dev_extent *dext; 7544 + int slot = path->slots[0]; 7545 + u64 chunk_offset; 7546 + u64 physical_offset; 7547 + u64 physical_len; 7548 + u64 devid; 7549 + 7550 + btrfs_item_key_to_cpu(leaf, &key, slot); 7551 + if (key.type != BTRFS_DEV_EXTENT_KEY) 7552 + break; 7553 + devid = key.objectid; 7554 + physical_offset = key.offset; 7555 + 7556 + dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); 7557 + chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext); 7558 + physical_len = btrfs_dev_extent_length(leaf, dext); 7559 + 7560 + ret = verify_one_dev_extent(fs_info, chunk_offset, devid, 7561 + physical_offset, physical_len); 7562 + if (ret < 0) 7563 + goto out; 7564 + ret = btrfs_next_item(root, path); 7565 + if (ret < 0) 7566 + goto out; 7567 + if (ret > 0) { 7568 + ret = 0; 7569 + break; 7570 + } 7571 + } 7572 + 7573 + /* Ensure all chunks have corresponding dev extents */ 7574 + ret = verify_chunk_dev_extent_mapping(fs_info); 7575 + out: 7576 + btrfs_free_path(path); 7577 + return ret; 7333 7578 }

+15 -16

fs/btrfs/volumes.h

··· 11 11 #include <linux/btrfs.h> 12 12 #include "async-thread.h" 13 13 14 + #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) 15 + 14 16 extern struct mutex uuid_mutex; 15 17 16 18 #define BTRFS_STRIPE_LEN SZ_64K ··· 345 343 u64 stripe_len; 346 344 int num_stripes; 347 345 int sub_stripes; 346 + int verified_stripes; /* For mount time dev extent verification */ 348 347 struct btrfs_bio_stripe stripes[]; 349 348 }; 350 349 ··· 385 382 } 386 383 } 387 384 388 - int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 389 - u64 end, u64 *length); 390 385 void btrfs_get_bbio(struct btrfs_bio *bbio); 391 386 void btrfs_put_bbio(struct btrfs_bio *bbio); 392 387 int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ··· 397 396 u64 physical, u64 **logical, int *naddrs, int *stripe_len); 398 397 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); 399 398 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); 400 - int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 401 - struct btrfs_fs_info *fs_info, u64 type); 399 + int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type); 402 400 void btrfs_mapping_init(struct btrfs_mapping_tree *tree); 403 401 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); 404 402 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 405 403 int mirror_num, int async_submit); 406 404 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 407 405 fmode_t flags, void *holder); 408 - int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 409 - struct btrfs_fs_devices **fs_devices_ret); 406 + struct btrfs_device *btrfs_scan_one_device(const char *path, 407 + fmode_t flags, void *holder); 410 408 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 411 409 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step); 412 - void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, 413 - struct btrfs_device *device, struct btrfs_device *this_dev); 410 + void btrfs_assign_next_active_device(struct btrfs_device *device, 411 + struct btrfs_device *this_dev); 414 412 int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info, 415 413 const char *device_path, 416 414 struct btrfs_device **device); ··· 453 453 int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); 454 454 int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, 455 455 struct btrfs_fs_info *fs_info); 456 - void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, 457 - struct btrfs_device *srcdev); 456 + void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); 458 457 void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, 459 458 struct btrfs_device *srcdev); 460 - void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, 461 - struct btrfs_device *tgtdev); 459 + void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev); 462 460 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); 463 461 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, 464 462 u64 logical, u64 len); 465 463 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, 466 464 u64 logical); 467 465 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, 468 - struct btrfs_fs_info *fs_info, 469 - u64 chunk_offset, u64 chunk_size); 470 - int btrfs_remove_chunk(struct btrfs_trans_handle *trans, 471 - struct btrfs_fs_info *fs_info, u64 chunk_offset); 466 + u64 chunk_offset, u64 chunk_size); 467 + int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset); 472 468 473 469 static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, 474 470 int index) ··· 555 559 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); 556 560 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, 557 561 struct btrfs_device *failing_dev); 562 + 563 + int btrfs_bg_type_to_factor(u64 flags); 564 + int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); 558 565 559 566 #endif

+1 -2

include/trace/events/btrfs.h

··· 374 374 __entry->extent_type = btrfs_file_extent_type(l, fi); 375 375 __entry->compression = btrfs_file_extent_compression(l, fi); 376 376 __entry->extent_start = start; 377 - __entry->extent_end = (start + btrfs_file_extent_inline_len(l, slot, fi)); 377 + __entry->extent_end = (start + btrfs_file_extent_ram_bytes(l, fi)); 378 378 ), 379 379 380 380 TP_printk_btrfs( ··· 433 433 { (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \ 434 434 { (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \ 435 435 { (1 << BTRFS_ORDERED_UPDATED_ISIZE), "UPDATED_ISIZE" }, \ 436 - { (1 << BTRFS_ORDERED_LOGGED_CSUM), "LOGGED_CSUM" }, \ 437 436 { (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" }) 438 437 439 438

Configure Feed

Configure Feed