Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"This has Mark Fasheh's patches to fix quota accounting during subvol
deletion, which we've been working on for a while now. The patch is
pretty small but it's a key fix.

Otherwise it's a random assortment"

* 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
btrfs: fix balance range usage filters in 4.4-rc
btrfs: qgroup: account shared subtree during snapshot delete
Btrfs: use btrfs_get_fs_root in resolve_indirect_ref
btrfs: qgroup: fix quota disable during rescan
Btrfs: fix race between cleaner kthread and space cache writeout
Btrfs: fix scrub preventing unused block groups from being deleted
Btrfs: fix race between scrub and block group deletion
btrfs: fix rcu warning during device replace
btrfs: Continue replace when set_block_ro failed
btrfs: fix clashing number of the enhanced balance usage filter
Btrfs: fix the number of transaction units needed to remove a block group
Btrfs: use global reserve when deleting unused block group after ENOSPC
Btrfs: tests: checking for NULL instead of IS_ERR()
btrfs: fix signed overflows in btrfs_sync_file

+219 -66
+1 -1
fs/btrfs/backref.c
··· 355 355 356 356 index = srcu_read_lock(&fs_info->subvol_srcu); 357 357 358 - root = btrfs_read_fs_root_no_name(fs_info, &root_key); 358 + root = btrfs_get_fs_root(fs_info, &root_key, false); 359 359 if (IS_ERR(root)) { 360 360 srcu_read_unlock(&fs_info->subvol_srcu, index); 361 361 ret = PTR_ERR(root);
+4
fs/btrfs/ctree.h
··· 3416 3416 struct btrfs_block_group_cache *btrfs_lookup_block_group( 3417 3417 struct btrfs_fs_info *info, 3418 3418 u64 bytenr); 3419 + void btrfs_get_block_group(struct btrfs_block_group_cache *cache); 3419 3420 void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 3420 3421 int get_block_group_index(struct btrfs_block_group_cache *cache); 3421 3422 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, ··· 3480 3479 struct btrfs_root *root, u64 bytes_used, 3481 3480 u64 type, u64 chunk_objectid, u64 chunk_offset, 3482 3481 u64 size); 3482 + struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( 3483 + struct btrfs_fs_info *fs_info, 3484 + const u64 chunk_offset); 3483 3485 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 3484 3486 struct btrfs_root *root, u64 group_start, 3485 3487 struct extent_map *em);
+100 -23
fs/btrfs/extent-tree.c
··· 124 124 return (cache->flags & bits) == bits; 125 125 } 126 126 127 - static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 127 + void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 128 128 { 129 129 atomic_inc(&cache->count); 130 130 } ··· 5915 5915 set_extent_dirty(info->pinned_extents, 5916 5916 bytenr, bytenr + num_bytes - 1, 5917 5917 GFP_NOFS | __GFP_NOFAIL); 5918 - /* 5919 - * No longer have used bytes in this block group, queue 5920 - * it for deletion. 5921 - */ 5922 - if (old_val == 0) { 5923 - spin_lock(&info->unused_bgs_lock); 5924 - if (list_empty(&cache->bg_list)) { 5925 - btrfs_get_block_group(cache); 5926 - list_add_tail(&cache->bg_list, 5927 - &info->unused_bgs); 5928 - } 5929 - spin_unlock(&info->unused_bgs_lock); 5930 - } 5931 5918 } 5932 5919 5933 5920 spin_lock(&trans->transaction->dirty_bgs_lock); ··· 5925 5938 btrfs_get_block_group(cache); 5926 5939 } 5927 5940 spin_unlock(&trans->transaction->dirty_bgs_lock); 5941 + 5942 + /* 5943 + * No longer have used bytes in this block group, queue it for 5944 + * deletion. We do this after adding the block group to the 5945 + * dirty list to avoid races between cleaner kthread and space 5946 + * cache writeout. 5947 + */ 5948 + if (!alloc && old_val == 0) { 5949 + spin_lock(&info->unused_bgs_lock); 5950 + if (list_empty(&cache->bg_list)) { 5951 + btrfs_get_block_group(cache); 5952 + list_add_tail(&cache->bg_list, 5953 + &info->unused_bgs); 5954 + } 5955 + spin_unlock(&info->unused_bgs_lock); 5956 + } 5928 5957 5929 5958 btrfs_put_block_group(cache); 5930 5959 total -= num_bytes; ··· 8108 8105 } 8109 8106 8110 8107 /* 8111 - * TODO: Modify related function to add related node/leaf to dirty_extent_root, 8112 - * for later qgroup accounting. 8113 - * 8114 - * Current, this function does nothing. 8108 + * These may not be seen by the usual inc/dec ref code so we have to 8109 + * add them here. 8115 8110 */ 8111 + static int record_one_subtree_extent(struct btrfs_trans_handle *trans, 8112 + struct btrfs_root *root, u64 bytenr, 8113 + u64 num_bytes) 8114 + { 8115 + struct btrfs_qgroup_extent_record *qrecord; 8116 + struct btrfs_delayed_ref_root *delayed_refs; 8117 + 8118 + qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); 8119 + if (!qrecord) 8120 + return -ENOMEM; 8121 + 8122 + qrecord->bytenr = bytenr; 8123 + qrecord->num_bytes = num_bytes; 8124 + qrecord->old_roots = NULL; 8125 + 8126 + delayed_refs = &trans->transaction->delayed_refs; 8127 + spin_lock(&delayed_refs->lock); 8128 + if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord)) 8129 + kfree(qrecord); 8130 + spin_unlock(&delayed_refs->lock); 8131 + 8132 + return 0; 8133 + } 8134 + 8116 8135 static int account_leaf_items(struct btrfs_trans_handle *trans, 8117 8136 struct btrfs_root *root, 8118 8137 struct extent_buffer *eb) 8119 8138 { 8120 8139 int nr = btrfs_header_nritems(eb); 8121 - int i, extent_type; 8140 + int i, extent_type, ret; 8122 8141 struct btrfs_key key; 8123 8142 struct btrfs_file_extent_item *fi; 8124 8143 u64 bytenr, num_bytes; 8144 + 8145 + /* We can be called directly from walk_up_proc() */ 8146 + if (!root->fs_info->quota_enabled) 8147 + return 0; 8125 8148 8126 8149 for (i = 0; i < nr; i++) { 8127 8150 btrfs_item_key_to_cpu(eb, &key, i); ··· 8167 8138 continue; 8168 8139 8169 8140 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 8141 + 8142 + ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); 8143 + if (ret) 8144 + return ret; 8170 8145 } 8171 8146 return 0; 8172 8147 } ··· 8239 8206 8240 8207 /* 8241 8208 * root_eb is the subtree root and is locked before this function is called. 8242 - * TODO: Modify this function to mark all (including complete shared node) 8243 - * to dirty_extent_root to allow it get accounted in qgroup. 8244 8209 */ 8245 8210 static int account_shared_subtree(struct btrfs_trans_handle *trans, 8246 8211 struct btrfs_root *root, ··· 8316 8285 btrfs_tree_read_lock(eb); 8317 8286 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 8318 8287 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 8288 + 8289 + ret = record_one_subtree_extent(trans, root, child_bytenr, 8290 + root->nodesize); 8291 + if (ret) 8292 + goto out; 8319 8293 } 8320 8294 8321 8295 if (level == 0) { ··· 10292 10256 return ret; 10293 10257 } 10294 10258 10259 + struct btrfs_trans_handle * 10260 + btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, 10261 + const u64 chunk_offset) 10262 + { 10263 + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; 10264 + struct extent_map *em; 10265 + struct map_lookup *map; 10266 + unsigned int num_items; 10267 + 10268 + read_lock(&em_tree->lock); 10269 + em = lookup_extent_mapping(em_tree, chunk_offset, 1); 10270 + read_unlock(&em_tree->lock); 10271 + ASSERT(em && em->start == chunk_offset); 10272 + 10273 + /* 10274 + * We need to reserve 3 + N units from the metadata space info in order 10275 + * to remove a block group (done at btrfs_remove_chunk() and at 10276 + * btrfs_remove_block_group()), which are used for: 10277 + * 10278 + * 1 unit for adding the free space inode's orphan (located in the tree 10279 + * of tree roots). 10280 + * 1 unit for deleting the block group item (located in the extent 10281 + * tree). 10282 + * 1 unit for deleting the free space item (located in tree of tree 10283 + * roots). 10284 + * N units for deleting N device extent items corresponding to each 10285 + * stripe (located in the device tree). 10286 + * 10287 + * In order to remove a block group we also need to reserve units in the 10288 + * system space info in order to update the chunk tree (update one or 10289 + * more device items and remove one chunk item), but this is done at 10290 + * btrfs_remove_chunk() through a call to check_system_chunk(). 10291 + */ 10292 + map = (struct map_lookup *)em->bdev; 10293 + num_items = 3 + map->num_stripes; 10294 + free_extent_map(em); 10295 + 10296 + return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, 10297 + num_items, 1); 10298 + } 10299 + 10295 10300 /* 10296 10301 * Process the unused_bgs list and remove any that don't have any allocated 10297 10302 * space inside of them. ··· 10399 10322 * Want to do this before we do anything else so we can recover 10400 10323 * properly if we fail to join the transaction. 10401 10324 */ 10402 - /* 1 for btrfs_orphan_reserve_metadata() */ 10403 - trans = btrfs_start_transaction(root, 1); 10325 + trans = btrfs_start_trans_remove_block_group(fs_info, 10326 + block_group->key.objectid); 10404 10327 if (IS_ERR(trans)) { 10405 10328 btrfs_dec_block_group_ro(root, block_group); 10406 10329 ret = PTR_ERR(trans);
+7 -3
fs/btrfs/file.c
··· 1882 1882 struct btrfs_log_ctx ctx; 1883 1883 int ret = 0; 1884 1884 bool full_sync = 0; 1885 - const u64 len = end - start + 1; 1885 + u64 len; 1886 1886 1887 + /* 1888 + * The range length can be represented by u64, we have to do the typecasts 1889 + * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() 1890 + */ 1891 + len = (u64)end - (u64)start + 1; 1887 1892 trace_btrfs_sync_file(file, datasync); 1888 1893 1889 1894 /* ··· 2076 2071 } 2077 2072 } 2078 2073 if (!full_sync) { 2079 - ret = btrfs_wait_ordered_range(inode, start, 2080 - end - start + 1); 2074 + ret = btrfs_wait_ordered_range(inode, start, len); 2081 2075 if (ret) { 2082 2076 btrfs_end_transaction(trans, root); 2083 2077 goto out;
+1 -23
fs/btrfs/inode.c
··· 4046 4046 */ 4047 4047 static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) 4048 4048 { 4049 - struct btrfs_trans_handle *trans; 4050 4049 struct btrfs_root *root = BTRFS_I(dir)->root; 4051 - int ret; 4052 4050 4053 4051 /* 4054 4052 * 1 for the possible orphan item ··· 4055 4057 * 1 for the inode ref 4056 4058 * 1 for the inode 4057 4059 */ 4058 - trans = btrfs_start_transaction(root, 5); 4059 - if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 4060 - return trans; 4061 - 4062 - if (PTR_ERR(trans) == -ENOSPC) { 4063 - u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); 4064 - 4065 - trans = btrfs_start_transaction(root, 0); 4066 - if (IS_ERR(trans)) 4067 - return trans; 4068 - ret = btrfs_cond_migrate_bytes(root->fs_info, 4069 - &root->fs_info->trans_block_rsv, 4070 - num_bytes, 5); 4071 - if (ret) { 4072 - btrfs_end_transaction(trans, root); 4073 - return ERR_PTR(ret); 4074 - } 4075 - trans->block_rsv = &root->fs_info->trans_block_rsv; 4076 - trans->bytes_reserved = num_bytes; 4077 - } 4078 - return trans; 4060 + return btrfs_start_transaction_fallback_global_rsv(root, 5, 5); 4079 4061 } 4080 4062 4081 4063 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+4 -1
fs/btrfs/qgroup.c
··· 993 993 mutex_lock(&fs_info->qgroup_ioctl_lock); 994 994 if (!fs_info->quota_root) 995 995 goto out; 996 - spin_lock(&fs_info->qgroup_lock); 997 996 fs_info->quota_enabled = 0; 998 997 fs_info->pending_quota_state = 0; 998 + btrfs_qgroup_wait_for_completion(fs_info); 999 + spin_lock(&fs_info->qgroup_lock); 999 1000 quota_root = fs_info->quota_root; 1000 1001 fs_info->quota_root = NULL; 1001 1002 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; ··· 1461 1460 struct rb_node *parent_node = NULL; 1462 1461 struct btrfs_qgroup_extent_record *entry; 1463 1462 u64 bytenr = record->bytenr; 1463 + 1464 + assert_spin_locked(&delayed_refs->lock); 1464 1465 1465 1466 while (*p) { 1466 1467 parent_node = *p;
+56 -6
fs/btrfs/scrub.c
··· 3432 3432 static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, 3433 3433 struct btrfs_device *scrub_dev, 3434 3434 u64 chunk_offset, u64 length, 3435 - u64 dev_offset, int is_dev_replace) 3435 + u64 dev_offset, 3436 + struct btrfs_block_group_cache *cache, 3437 + int is_dev_replace) 3436 3438 { 3437 3439 struct btrfs_mapping_tree *map_tree = 3438 3440 &sctx->dev_root->fs_info->mapping_tree; ··· 3447 3445 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); 3448 3446 read_unlock(&map_tree->map_tree.lock); 3449 3447 3450 - if (!em) 3451 - return -EINVAL; 3448 + if (!em) { 3449 + /* 3450 + * Might have been an unused block group deleted by the cleaner 3451 + * kthread or relocation. 3452 + */ 3453 + spin_lock(&cache->lock); 3454 + if (!cache->removed) 3455 + ret = -EINVAL; 3456 + spin_unlock(&cache->lock); 3457 + 3458 + return ret; 3459 + } 3452 3460 3453 3461 map = (struct map_lookup *)em->bdev; 3454 3462 if (em->start != chunk_offset) ··· 3495 3483 u64 length; 3496 3484 u64 chunk_offset; 3497 3485 int ret = 0; 3486 + int ro_set; 3498 3487 int slot; 3499 3488 struct extent_buffer *l; 3500 3489 struct btrfs_key key; ··· 3581 3568 scrub_pause_on(fs_info); 3582 3569 ret = btrfs_inc_block_group_ro(root, cache); 3583 3570 scrub_pause_off(fs_info); 3584 - if (ret) { 3571 + 3572 + if (ret == 0) { 3573 + ro_set = 1; 3574 + } else if (ret == -ENOSPC) { 3575 + /* 3576 + * btrfs_inc_block_group_ro return -ENOSPC when it 3577 + * failed in creating new chunk for metadata. 3578 + * It is not a problem for scrub/replace, because 3579 + * metadata are always cowed, and our scrub paused 3580 + * commit_transactions. 3581 + */ 3582 + ro_set = 0; 3583 + } else { 3584 + btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n", 3585 + ret); 3585 3586 btrfs_put_block_group(cache); 3586 3587 break; 3587 3588 } ··· 3604 3577 dev_replace->cursor_left = found_key.offset; 3605 3578 dev_replace->item_needs_writeback = 1; 3606 3579 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, 3607 - found_key.offset, is_dev_replace); 3580 + found_key.offset, cache, is_dev_replace); 3608 3581 3609 3582 /* 3610 3583 * flush, submit all pending read and write bios, afterwards ··· 3638 3611 3639 3612 scrub_pause_off(fs_info); 3640 3613 3641 - btrfs_dec_block_group_ro(root, cache); 3614 + if (ro_set) 3615 + btrfs_dec_block_group_ro(root, cache); 3616 + 3617 + /* 3618 + * We might have prevented the cleaner kthread from deleting 3619 + * this block group if it was already unused because we raced 3620 + * and set it to RO mode first. So add it back to the unused 3621 + * list, otherwise it might not ever be deleted unless a manual 3622 + * balance is triggered or it becomes used and unused again. 3623 + */ 3624 + spin_lock(&cache->lock); 3625 + if (!cache->removed && !cache->ro && cache->reserved == 0 && 3626 + btrfs_block_group_used(&cache->item) == 0) { 3627 + spin_unlock(&cache->lock); 3628 + spin_lock(&fs_info->unused_bgs_lock); 3629 + if (list_empty(&cache->bg_list)) { 3630 + btrfs_get_block_group(cache); 3631 + list_add_tail(&cache->bg_list, 3632 + &fs_info->unused_bgs); 3633 + } 3634 + spin_unlock(&fs_info->unused_bgs_lock); 3635 + } else { 3636 + spin_unlock(&cache->lock); 3637 + } 3642 3638 3643 3639 btrfs_put_block_group(cache); 3644 3640 if (ret)
+3 -1
fs/btrfs/tests/free-space-tests.c
··· 898 898 } 899 899 900 900 root = btrfs_alloc_dummy_root(); 901 - if (!root) 901 + if (IS_ERR(root)) { 902 + ret = PTR_ERR(root); 902 903 goto out; 904 + } 903 905 904 906 root->fs_info = btrfs_alloc_dummy_fs_info(); 905 907 if (!root->fs_info)
+32
fs/btrfs/transaction.c
··· 592 592 return start_transaction(root, num_items, TRANS_START, 593 593 BTRFS_RESERVE_FLUSH_ALL); 594 594 } 595 + struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( 596 + struct btrfs_root *root, 597 + unsigned int num_items, 598 + int min_factor) 599 + { 600 + struct btrfs_trans_handle *trans; 601 + u64 num_bytes; 602 + int ret; 603 + 604 + trans = btrfs_start_transaction(root, num_items); 605 + if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 606 + return trans; 607 + 608 + trans = btrfs_start_transaction(root, 0); 609 + if (IS_ERR(trans)) 610 + return trans; 611 + 612 + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); 613 + ret = btrfs_cond_migrate_bytes(root->fs_info, 614 + &root->fs_info->trans_block_rsv, 615 + num_bytes, 616 + min_factor); 617 + if (ret) { 618 + btrfs_end_transaction(trans, root); 619 + return ERR_PTR(ret); 620 + } 621 + 622 + trans->block_rsv = &root->fs_info->trans_block_rsv; 623 + trans->bytes_reserved = num_bytes; 624 + 625 + return trans; 626 + } 595 627 596 628 struct btrfs_trans_handle *btrfs_start_transaction_lflush( 597 629 struct btrfs_root *root,
+4
fs/btrfs/transaction.h
··· 185 185 struct btrfs_root *root); 186 186 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 187 187 unsigned int num_items); 188 + struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( 189 + struct btrfs_root *root, 190 + unsigned int num_items, 191 + int min_factor); 188 192 struct btrfs_trans_handle *btrfs_start_transaction_lflush( 189 193 struct btrfs_root *root, 190 194 unsigned int num_items);
+6 -7
fs/btrfs/volumes.c
··· 1973 1973 if (srcdev->writeable) { 1974 1974 fs_devices->rw_devices--; 1975 1975 /* zero out the old super if it is writable */ 1976 - btrfs_scratch_superblocks(srcdev->bdev, 1977 - rcu_str_deref(srcdev->name)); 1976 + btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); 1978 1977 } 1979 1978 1980 1979 if (srcdev->bdev) ··· 2023 2024 btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); 2024 2025 2025 2026 if (tgtdev->bdev) { 2026 - btrfs_scratch_superblocks(tgtdev->bdev, 2027 - rcu_str_deref(tgtdev->name)); 2027 + btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); 2028 2028 fs_info->fs_devices->open_devices--; 2029 2029 } 2030 2030 fs_info->fs_devices->num_devices--; ··· 2851 2853 if (ret) 2852 2854 return ret; 2853 2855 2854 - trans = btrfs_start_transaction(root, 0); 2856 + trans = btrfs_start_trans_remove_block_group(root->fs_info, 2857 + chunk_offset); 2855 2858 if (IS_ERR(trans)) { 2856 2859 ret = PTR_ERR(trans); 2857 2860 btrfs_std_error(root->fs_info, ret, NULL); ··· 3122 3123 return 1; 3123 3124 } 3124 3125 3125 - static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, 3126 + static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, 3126 3127 struct btrfs_balance_args *bargs) 3127 3128 { 3128 3129 struct btrfs_block_group_cache *cache; ··· 3155 3156 return ret; 3156 3157 } 3157 3158 3158 - static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, 3159 + static int chunk_usage_filter(struct btrfs_fs_info *fs_info, 3159 3160 u64 chunk_offset, struct btrfs_balance_args *bargs) 3160 3161 { 3161 3162 struct btrfs_block_group_cache *cache;
+1 -1
fs/btrfs/volumes.h
··· 382 382 #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) 383 383 #define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6) 384 384 #define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7) 385 - #define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 8) 385 + #define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10) 386 386 387 387 #define BTRFS_BALANCE_ARGS_MASK \ 388 388 (BTRFS_BALANCE_ARGS_PROFILES | \