Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"We've queued up a few different fixes in here. These range from
enospc corners to fsync and quota fixes, and a few targeted at error
handling for corrupt metadata/fuzzing"

* 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: fix lockdep warning on deadlock against an inode's log mutex
Btrfs: detect corruption when non-root leaf has zero item
Btrfs: check btree node's nritems
btrfs: don't create or leak aliased root while cleaning up orphans
Btrfs: fix em leak in find_first_block_group
btrfs: do not background blkdev_put()
Btrfs: clarify do_chunk_alloc()'s return value
btrfs: fix fsfreeze hang caused by delayed iputs deal
btrfs: update btrfs_space_info's bytes_may_use timely
btrfs: divide btrfs_update_reserved_bytes() into two functions
btrfs: use correct offset for reloc_inode in prealloc_file_extent_cluster()
btrfs: qgroup: Fix qgroup incorrectness caused by log replay
btrfs: relocation: Fix leaking qgroups numbers on data extents
btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
btrfs: waiting on qgroup rescan should not always be interruptible
btrfs: properly track when rescan worker is running
btrfs: flush_space: treat return value of do_chunk_alloc properly
Btrfs: add ASSERT for block group's memory leak
btrfs: backref: Fix soft lockup in __merge_refs function
Btrfs: fix memory leak of reloc_root

+473 -181
+1
fs/btrfs/backref.c
··· 589 589 590 590 list_del(&ref2->list); 591 591 kmem_cache_free(btrfs_prelim_ref_cache, ref2); 592 + cond_resched(); 592 593 } 593 594 594 595 }
+4 -1
fs/btrfs/ctree.h
··· 1028 1028 struct btrfs_workqueue *qgroup_rescan_workers; 1029 1029 struct completion qgroup_rescan_completion; 1030 1030 struct btrfs_work qgroup_rescan_work; 1031 + bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */ 1031 1032 1032 1033 /* filesystem state */ 1033 1034 unsigned long fs_state; ··· 1080 1079 struct list_head pinned_chunks; 1081 1080 1082 1081 int creating_free_space_tree; 1082 + /* Used to record internally whether fs has been frozen */ 1083 + int fs_frozen; 1083 1084 }; 1084 1085 1085 1086 struct btrfs_subvolume_writers { ··· 2581 2578 struct btrfs_root *root, 2582 2579 u64 root_objectid, u64 owner, u64 offset, 2583 2580 struct btrfs_key *ins); 2584 - int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, 2581 + int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, 2585 2582 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 2586 2583 struct btrfs_key *ins, int is_data, int delalloc); 2587 2584 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+2 -5
fs/btrfs/delayed-ref.c
··· 541 541 struct btrfs_delayed_ref_head *existing; 542 542 struct btrfs_delayed_ref_head *head_ref = NULL; 543 543 struct btrfs_delayed_ref_root *delayed_refs; 544 - struct btrfs_qgroup_extent_record *qexisting; 545 544 int count_mod = 1; 546 545 int must_insert_reserved = 0; 547 546 ··· 605 606 qrecord->num_bytes = num_bytes; 606 607 qrecord->old_roots = NULL; 607 608 608 - qexisting = btrfs_qgroup_insert_dirty_extent(fs_info, 609 - delayed_refs, 610 - qrecord); 611 - if (qexisting) 609 + if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info, 610 + delayed_refs, qrecord)) 612 611 kfree(qrecord); 613 612 } 614 613
+51 -5
fs/btrfs/disk-io.c
··· 559 559 u32 nritems = btrfs_header_nritems(leaf); 560 560 int slot; 561 561 562 - if (nritems == 0) 562 + if (nritems == 0) { 563 + struct btrfs_root *check_root; 564 + 565 + key.objectid = btrfs_header_owner(leaf); 566 + key.type = BTRFS_ROOT_ITEM_KEY; 567 + key.offset = (u64)-1; 568 + 569 + check_root = btrfs_get_fs_root(root->fs_info, &key, false); 570 + /* 571 + * The only reason we also check NULL here is that during 572 + * open_ctree() some roots has not yet been set up. 573 + */ 574 + if (!IS_ERR_OR_NULL(check_root)) { 575 + /* if leaf is the root, then it's fine */ 576 + if (leaf->start != 577 + btrfs_root_bytenr(&check_root->root_item)) { 578 + CORRUPT("non-root leaf's nritems is 0", 579 + leaf, root, 0); 580 + return -EIO; 581 + } 582 + } 563 583 return 0; 584 + } 564 585 565 586 /* Check the 0 item */ 566 587 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != ··· 630 609 } 631 610 } 632 611 612 + return 0; 613 + } 614 + 615 + static int check_node(struct btrfs_root *root, struct extent_buffer *node) 616 + { 617 + unsigned long nr = btrfs_header_nritems(node); 618 + 619 + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { 620 + btrfs_crit(root->fs_info, 621 + "corrupt node: block %llu root %llu nritems %lu", 622 + node->start, root->objectid, nr); 623 + return -EIO; 624 + } 633 625 return 0; 634 626 } 635 627 ··· 715 681 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 716 682 ret = -EIO; 717 683 } 684 + 685 + if (found_level > 0 && check_node(root, eb)) 686 + ret = -EIO; 718 687 719 688 if (!ret) 720 689 set_extent_buffer_uptodate(eb); ··· 1655 1618 return ret; 1656 1619 } 1657 1620 1658 - static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, 1659 - u64 root_id) 1621 + struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, 1622 + u64 root_id) 1660 1623 { 1661 1624 struct btrfs_root *root; 1662 1625 ··· 2335 2298 fs_info->quota_enabled = 0; 2336 2299 fs_info->pending_quota_state = 0; 2337 2300 fs_info->qgroup_ulist = NULL; 2301 + fs_info->qgroup_rescan_running = false; 2338 2302 mutex_init(&fs_info->qgroup_rescan_lock); 2339 2303 } 2340 2304 ··· 2662 2624 atomic_set(&fs_info->qgroup_op_seq, 0); 2663 2625 atomic_set(&fs_info->reada_works_cnt, 0); 2664 2626 atomic64_set(&fs_info->tree_mod_seq, 0); 2627 + fs_info->fs_frozen = 0; 2665 2628 fs_info->sb = sb; 2666 2629 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; 2667 2630 fs_info->metadata_ratio = 0; ··· 3778 3739 if (btrfs_root_refs(&root->root_item) == 0) 3779 3740 synchronize_srcu(&fs_info->subvol_srcu); 3780 3741 3781 - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 3742 + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 3782 3743 btrfs_free_log(NULL, root); 3744 + if (root->reloc_root) { 3745 + free_extent_buffer(root->reloc_root->node); 3746 + free_extent_buffer(root->reloc_root->commit_root); 3747 + btrfs_put_fs_root(root->reloc_root); 3748 + root->reloc_root = NULL; 3749 + } 3750 + } 3783 3751 3784 3752 if (root->free_ino_pinned) 3785 3753 __btrfs_remove_free_space_cache(root->free_ino_pinned); ··· 3897 3851 smp_mb(); 3898 3852 3899 3853 /* wait for the qgroup rescan worker to stop */ 3900 - btrfs_qgroup_wait_for_completion(fs_info); 3854 + btrfs_qgroup_wait_for_completion(fs_info, false); 3901 3855 3902 3856 /* wait for the uuid_scan task to finish */ 3903 3857 down(&fs_info->uuid_tree_rescan_sem);
+2
fs/btrfs/disk-io.h
··· 68 68 struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, 69 69 struct btrfs_key *location); 70 70 int btrfs_init_fs_root(struct btrfs_root *root); 71 + struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, 72 + u64 root_id); 71 73 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, 72 74 struct btrfs_root *root); 73 75 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
+86 -99
fs/btrfs/extent-tree.c
··· 60 60 CHUNK_ALLOC_FORCE = 2, 61 61 }; 62 62 63 - /* 64 - * Control how reservations are dealt with. 65 - * 66 - * RESERVE_FREE - freeing a reservation. 67 - * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for 68 - * ENOSPC accounting 69 - * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update 70 - * bytes_may_use as the ENOSPC accounting is done elsewhere 71 - */ 72 - enum { 73 - RESERVE_FREE = 0, 74 - RESERVE_ALLOC = 1, 75 - RESERVE_ALLOC_NO_ACCOUNT = 2, 76 - }; 77 - 78 63 static int update_block_group(struct btrfs_trans_handle *trans, 79 64 struct btrfs_root *root, u64 bytenr, 80 65 u64 num_bytes, int alloc); ··· 89 104 struct btrfs_key *key); 90 105 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 91 106 int dump_block_groups); 92 - static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 93 - u64 num_bytes, int reserve, 94 - int delalloc); 107 + static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, 108 + u64 ram_bytes, u64 num_bytes, int delalloc); 109 + static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, 110 + u64 num_bytes, int delalloc); 95 111 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 96 112 u64 num_bytes); 97 113 int btrfs_pin_extent(struct btrfs_root *root, ··· 3487 3501 dcs = BTRFS_DC_SETUP; 3488 3502 else if (ret == -ENOSPC) 3489 3503 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); 3490 - btrfs_free_reserved_data_space(inode, 0, num_pages); 3491 3504 3492 3505 out_put: 3493 3506 iput(inode); ··· 4457 4472 } 4458 4473 } 4459 4474 4475 + /* 4476 + * If force is CHUNK_ALLOC_FORCE: 4477 + * - return 1 if it successfully allocates a chunk, 4478 + * - return errors including -ENOSPC otherwise. 4479 + * If force is NOT CHUNK_ALLOC_FORCE: 4480 + * - return 0 if it doesn't need to allocate a new chunk, 4481 + * - return 1 if it successfully allocates a chunk, 4482 + * - return errors including -ENOSPC otherwise. 4483 + */ 4460 4484 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 4461 4485 struct btrfs_root *extent_root, u64 flags, int force) 4462 4486 { ··· 4876 4882 btrfs_get_alloc_profile(root, 0), 4877 4883 CHUNK_ALLOC_NO_FORCE); 4878 4884 btrfs_end_transaction(trans, root); 4879 - if (ret == -ENOSPC) 4885 + if (ret > 0 || ret == -ENOSPC) 4880 4886 ret = 0; 4881 4887 break; 4882 4888 case COMMIT_TRANS: ··· 6491 6497 } 6492 6498 6493 6499 /** 6494 - * btrfs_update_reserved_bytes - update the block_group and space info counters 6500 + * btrfs_add_reserved_bytes - update the block_group and space info counters 6495 6501 * @cache: The cache we are manipulating 6502 + * @ram_bytes: The number of bytes of file content, and will be same to 6503 + * @num_bytes except for the compress path. 6496 6504 * @num_bytes: The number of bytes in question 6497 - * @reserve: One of the reservation enums 6498 6505 * @delalloc: The blocks are allocated for the delalloc write 6499 6506 * 6500 - * This is called by the allocator when it reserves space, or by somebody who is 6501 - * freeing space that was never actually used on disk. For example if you 6502 - * reserve some space for a new leaf in transaction A and before transaction A 6503 - * commits you free that leaf, you call this with reserve set to 0 in order to 6504 - * clear the reservation. 6505 - * 6506 - * Metadata reservations should be called with RESERVE_ALLOC so we do the proper 6507 + * This is called by the allocator when it reserves space. Metadata 6508 + * reservations should be called with RESERVE_ALLOC so we do the proper 6507 6509 * ENOSPC accounting. For data we handle the reservation through clearing the 6508 6510 * delalloc bits in the io_tree. We have to do this since we could end up 6509 6511 * allocating less disk space for the amount of data we have reserved in the ··· 6509 6519 * make the reservation and return -EAGAIN, otherwise this function always 6510 6520 * succeeds. 6511 6521 */ 6512 - static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 6513 - u64 num_bytes, int reserve, int delalloc) 6522 + static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, 6523 + u64 ram_bytes, u64 num_bytes, int delalloc) 6514 6524 { 6515 6525 struct btrfs_space_info *space_info = cache->space_info; 6516 6526 int ret = 0; 6517 6527 6518 6528 spin_lock(&space_info->lock); 6519 6529 spin_lock(&cache->lock); 6520 - if (reserve != RESERVE_FREE) { 6521 - if (cache->ro) { 6522 - ret = -EAGAIN; 6523 - } else { 6524 - cache->reserved += num_bytes; 6525 - space_info->bytes_reserved += num_bytes; 6526 - if (reserve == RESERVE_ALLOC) { 6527 - trace_btrfs_space_reservation(cache->fs_info, 6528 - "space_info", space_info->flags, 6529 - num_bytes, 0); 6530 - space_info->bytes_may_use -= num_bytes; 6531 - } 6532 - 6533 - if (delalloc) 6534 - cache->delalloc_bytes += num_bytes; 6535 - } 6530 + if (cache->ro) { 6531 + ret = -EAGAIN; 6536 6532 } else { 6537 - if (cache->ro) 6538 - space_info->bytes_readonly += num_bytes; 6539 - cache->reserved -= num_bytes; 6540 - space_info->bytes_reserved -= num_bytes; 6533 + cache->reserved += num_bytes; 6534 + space_info->bytes_reserved += num_bytes; 6541 6535 6536 + trace_btrfs_space_reservation(cache->fs_info, 6537 + "space_info", space_info->flags, 6538 + ram_bytes, 0); 6539 + space_info->bytes_may_use -= ram_bytes; 6542 6540 if (delalloc) 6543 - cache->delalloc_bytes -= num_bytes; 6541 + cache->delalloc_bytes += num_bytes; 6544 6542 } 6545 6543 spin_unlock(&cache->lock); 6546 6544 spin_unlock(&space_info->lock); 6547 6545 return ret; 6548 6546 } 6549 6547 6548 + /** 6549 + * btrfs_free_reserved_bytes - update the block_group and space info counters 6550 + * @cache: The cache we are manipulating 6551 + * @num_bytes: The number of bytes in question 6552 + * @delalloc: The blocks are allocated for the delalloc write 6553 + * 6554 + * This is called by somebody who is freeing space that was never actually used 6555 + * on disk. For example if you reserve some space for a new leaf in transaction 6556 + * A and before transaction A commits you free that leaf, you call this with 6557 + * reserve set to 0 in order to clear the reservation. 6558 + */ 6559 + 6560 + static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, 6561 + u64 num_bytes, int delalloc) 6562 + { 6563 + struct btrfs_space_info *space_info = cache->space_info; 6564 + int ret = 0; 6565 + 6566 + spin_lock(&space_info->lock); 6567 + spin_lock(&cache->lock); 6568 + if (cache->ro) 6569 + space_info->bytes_readonly += num_bytes; 6570 + cache->reserved -= num_bytes; 6571 + space_info->bytes_reserved -= num_bytes; 6572 + 6573 + if (delalloc) 6574 + cache->delalloc_bytes -= num_bytes; 6575 + spin_unlock(&cache->lock); 6576 + spin_unlock(&space_info->lock); 6577 + return ret; 6578 + } 6550 6579 void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 6551 6580 struct btrfs_root *root) 6552 6581 { ··· 7200 7191 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 7201 7192 7202 7193 btrfs_add_free_space(cache, buf->start, buf->len); 7203 - btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); 7194 + btrfs_free_reserved_bytes(cache, buf->len, 0); 7204 7195 btrfs_put_block_group(cache); 7205 7196 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 7206 7197 pin = 0; ··· 7425 7416 * the free space extent currently. 7426 7417 */ 7427 7418 static noinline int find_free_extent(struct btrfs_root *orig_root, 7428 - u64 num_bytes, u64 empty_size, 7429 - u64 hint_byte, struct btrfs_key *ins, 7430 - u64 flags, int delalloc) 7419 + u64 ram_bytes, u64 num_bytes, u64 empty_size, 7420 + u64 hint_byte, struct btrfs_key *ins, 7421 + u64 flags, int delalloc) 7431 7422 { 7432 7423 int ret = 0; 7433 7424 struct btrfs_root *root = orig_root->fs_info->extent_root; ··· 7439 7430 struct btrfs_space_info *space_info; 7440 7431 int loop = 0; 7441 7432 int index = __get_raid_index(flags); 7442 - int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? 7443 - RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 7444 7433 bool failed_cluster_refill = false; 7445 7434 bool failed_alloc = false; 7446 7435 bool use_cluster = true; ··· 7770 7763 search_start - offset); 7771 7764 BUG_ON(offset > search_start); 7772 7765 7773 - ret = btrfs_update_reserved_bytes(block_group, num_bytes, 7774 - alloc_type, delalloc); 7766 + ret = btrfs_add_reserved_bytes(block_group, ram_bytes, 7767 + num_bytes, delalloc); 7775 7768 if (ret == -EAGAIN) { 7776 7769 btrfs_add_free_space(block_group, offset, num_bytes); 7777 7770 goto loop; ··· 7943 7936 up_read(&info->groups_sem); 7944 7937 } 7945 7938 7946 - int btrfs_reserve_extent(struct btrfs_root *root, 7939 + int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, 7947 7940 u64 num_bytes, u64 min_alloc_size, 7948 7941 u64 empty_size, u64 hint_byte, 7949 7942 struct btrfs_key *ins, int is_data, int delalloc) ··· 7955 7948 flags = btrfs_get_alloc_profile(root, is_data); 7956 7949 again: 7957 7950 WARN_ON(num_bytes < root->sectorsize); 7958 - ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, 7959 - flags, delalloc); 7951 + ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, 7952 + hint_byte, ins, flags, delalloc); 7960 7953 if (!ret && !is_data) { 7961 7954 btrfs_dec_block_group_reservations(root->fs_info, 7962 7955 ins->objectid); ··· 7965 7958 num_bytes = min(num_bytes >> 1, ins->offset); 7966 7959 num_bytes = round_down(num_bytes, root->sectorsize); 7967 7960 num_bytes = max(num_bytes, min_alloc_size); 7961 + ram_bytes = num_bytes; 7968 7962 if (num_bytes == min_alloc_size) 7969 7963 final_tried = true; 7970 7964 goto again; ··· 8003 7995 if (btrfs_test_opt(root->fs_info, DISCARD)) 8004 7996 ret = btrfs_discard_extent(root, start, len, NULL); 8005 7997 btrfs_add_free_space(cache, start, len); 8006 - btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); 7998 + btrfs_free_reserved_bytes(cache, len, delalloc); 8007 7999 trace_btrfs_reserved_extent_free(root, start, len); 8008 8000 } 8009 8001 ··· 8231 8223 if (!block_group) 8232 8224 return -EINVAL; 8233 8225 8234 - ret = btrfs_update_reserved_bytes(block_group, ins->offset, 8235 - RESERVE_ALLOC_NO_ACCOUNT, 0); 8226 + ret = btrfs_add_reserved_bytes(block_group, ins->offset, 8227 + ins->offset, 0); 8236 8228 BUG_ON(ret); /* logic error */ 8237 8229 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 8238 8230 0, owner, offset, ins, 1); ··· 8376 8368 if (IS_ERR(block_rsv)) 8377 8369 return ERR_CAST(block_rsv); 8378 8370 8379 - ret = btrfs_reserve_extent(root, blocksize, blocksize, 8371 + ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize, 8380 8372 empty_size, hint, &ins, 0, 0); 8381 8373 if (ret) 8382 8374 goto out_unuse; ··· 8529 8521 wc->reada_slot = slot; 8530 8522 } 8531 8523 8532 - /* 8533 - * These may not be seen by the usual inc/dec ref code so we have to 8534 - * add them here. 8535 - */ 8536 - static int record_one_subtree_extent(struct btrfs_trans_handle *trans, 8537 - struct btrfs_root *root, u64 bytenr, 8538 - u64 num_bytes) 8539 - { 8540 - struct btrfs_qgroup_extent_record *qrecord; 8541 - struct btrfs_delayed_ref_root *delayed_refs; 8542 - 8543 - qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); 8544 - if (!qrecord) 8545 - return -ENOMEM; 8546 - 8547 - qrecord->bytenr = bytenr; 8548 - qrecord->num_bytes = num_bytes; 8549 - qrecord->old_roots = NULL; 8550 - 8551 - delayed_refs = &trans->transaction->delayed_refs; 8552 - spin_lock(&delayed_refs->lock); 8553 - if (btrfs_qgroup_insert_dirty_extent(trans->fs_info, 8554 - delayed_refs, qrecord)) 8555 - kfree(qrecord); 8556 - spin_unlock(&delayed_refs->lock); 8557 - 8558 - return 0; 8559 - } 8560 - 8561 8524 static int account_leaf_items(struct btrfs_trans_handle *trans, 8562 8525 struct btrfs_root *root, 8563 8526 struct extent_buffer *eb) ··· 8562 8583 8563 8584 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 8564 8585 8565 - ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); 8586 + ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, 8587 + bytenr, num_bytes, GFP_NOFS); 8566 8588 if (ret) 8567 8589 return ret; 8568 8590 } ··· 8712 8732 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 8713 8733 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 8714 8734 8715 - ret = record_one_subtree_extent(trans, root, child_bytenr, 8716 - root->nodesize); 8735 + ret = btrfs_qgroup_insert_dirty_extent(trans, 8736 + root->fs_info, child_bytenr, 8737 + root->nodesize, GFP_NOFS); 8717 8738 if (ret) 8718 8739 goto out; 8719 8740 } ··· 9887 9906 } else { 9888 9907 ret = 0; 9889 9908 } 9909 + free_extent_map(em); 9890 9910 goto out; 9891 9911 } 9892 9912 path->slots[0]++; ··· 9924 9942 block_group->iref = 0; 9925 9943 block_group->inode = NULL; 9926 9944 spin_unlock(&block_group->lock); 9945 + ASSERT(block_group->io_ctl.inode == NULL); 9927 9946 iput(inode); 9928 9947 last = block_group->key.objectid + block_group->key.offset; 9929 9948 btrfs_put_block_group(block_group); ··· 9982 9999 free_excluded_extents(info->extent_root, block_group); 9983 10000 9984 10001 btrfs_remove_free_space_cache(block_group); 10002 + ASSERT(list_empty(&block_group->dirty_list)); 10003 + ASSERT(list_empty(&block_group->io_list)); 10004 + ASSERT(list_empty(&block_group->bg_list)); 10005 + ASSERT(atomic_read(&block_group->count) == 1); 9985 10006 btrfs_put_block_group(block_group); 9986 10007 9987 10008 spin_lock(&info->block_group_cache_lock);
+1
fs/btrfs/extent_io.h
··· 20 20 #define EXTENT_DAMAGED (1U << 14) 21 21 #define EXTENT_NORESERVE (1U << 15) 22 22 #define EXTENT_QGROUP_RESERVED (1U << 16) 23 + #define EXTENT_CLEAR_DATA_RESV (1U << 17) 23 24 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 24 25 #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 25 26
+16 -12
fs/btrfs/file.c
··· 2070 2070 } 2071 2071 trans->sync = true; 2072 2072 2073 - btrfs_init_log_ctx(&ctx); 2073 + btrfs_init_log_ctx(&ctx, inode); 2074 2074 2075 2075 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); 2076 2076 if (ret < 0) { ··· 2675 2675 2676 2676 alloc_start = round_down(offset, blocksize); 2677 2677 alloc_end = round_up(offset + len, blocksize); 2678 + cur_offset = alloc_start; 2678 2679 2679 2680 /* Make sure we aren't being give some crap mode */ 2680 2681 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) ··· 2768 2767 2769 2768 /* First, check if we exceed the qgroup limit */ 2770 2769 INIT_LIST_HEAD(&reserve_list); 2771 - cur_offset = alloc_start; 2772 2770 while (1) { 2773 2771 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 2774 2772 alloc_end - cur_offset, 0); ··· 2794 2794 last_byte - cur_offset); 2795 2795 if (ret < 0) 2796 2796 break; 2797 + } else { 2798 + /* 2799 + * Do not need to reserve unwritten extent for this 2800 + * range, free reserved data space first, otherwise 2801 + * it'll result in false ENOSPC error. 2802 + */ 2803 + btrfs_free_reserved_data_space(inode, cur_offset, 2804 + last_byte - cur_offset); 2797 2805 } 2798 2806 free_extent_map(em); 2799 2807 cur_offset = last_byte; ··· 2819 2811 range->start, 2820 2812 range->len, 1 << inode->i_blkbits, 2821 2813 offset + len, &alloc_hint); 2814 + else 2815 + btrfs_free_reserved_data_space(inode, range->start, 2816 + range->len); 2822 2817 list_del(&range->list); 2823 2818 kfree(range); 2824 2819 } ··· 2856 2845 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 2857 2846 &cached_state, GFP_KERNEL); 2858 2847 out: 2859 - /* 2860 - * As we waited the extent range, the data_rsv_map must be empty 2861 - * in the range, as written data range will be released from it. 2862 - * And for prealloacted extent, it will also be released when 2863 - * its metadata is written. 2864 - * So this is completely used as cleanup. 2865 - */ 2866 - btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start); 2867 2848 inode_unlock(inode); 2868 2849 /* Let go of our reservation. */ 2869 - btrfs_free_reserved_data_space(inode, alloc_start, 2870 - alloc_end - alloc_start); 2850 + if (ret != 0) 2851 + btrfs_free_reserved_data_space(inode, alloc_start, 2852 + alloc_end - cur_offset); 2871 2853 return ret; 2872 2854 } 2873 2855
+1 -2
fs/btrfs/inode-map.c
··· 495 495 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, 496 496 prealloc, prealloc, &alloc_hint); 497 497 if (ret) { 498 - btrfs_delalloc_release_space(inode, 0, prealloc); 498 + btrfs_delalloc_release_metadata(inode, prealloc); 499 499 goto out_put; 500 500 } 501 - btrfs_free_reserved_data_space(inode, 0, prealloc); 502 501 503 502 ret = btrfs_write_out_ino_cache(root, trans, path, inode); 504 503 out_put:
+27 -10
fs/btrfs/inode.c
··· 566 566 PAGE_SET_WRITEBACK | 567 567 page_error_op | 568 568 PAGE_END_WRITEBACK); 569 + btrfs_free_reserved_data_space_noquota(inode, start, 570 + end - start + 1); 569 571 goto free_pages_out; 570 572 } 571 573 } ··· 744 742 lock_extent(io_tree, async_extent->start, 745 743 async_extent->start + async_extent->ram_size - 1); 746 744 747 - ret = btrfs_reserve_extent(root, 745 + ret = btrfs_reserve_extent(root, async_extent->ram_size, 748 746 async_extent->compressed_size, 749 747 async_extent->compressed_size, 750 748 0, alloc_hint, &ins, 1, 1); ··· 971 969 EXTENT_DEFRAG, PAGE_UNLOCK | 972 970 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | 973 971 PAGE_END_WRITEBACK); 974 - 972 + btrfs_free_reserved_data_space_noquota(inode, start, 973 + end - start + 1); 975 974 *nr_written = *nr_written + 976 975 (end - start + PAGE_SIZE) / PAGE_SIZE; 977 976 *page_started = 1; ··· 992 989 unsigned long op; 993 990 994 991 cur_alloc_size = disk_num_bytes; 995 - ret = btrfs_reserve_extent(root, cur_alloc_size, 992 + ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, 996 993 root->sectorsize, 0, alloc_hint, 997 994 &ins, 1, 1); 998 995 if (ret < 0) ··· 1492 1489 extent_clear_unlock_delalloc(inode, cur_offset, 1493 1490 cur_offset + num_bytes - 1, 1494 1491 locked_page, EXTENT_LOCKED | 1495 - EXTENT_DELALLOC, PAGE_UNLOCK | 1496 - PAGE_SET_PRIVATE2); 1492 + EXTENT_DELALLOC | 1493 + EXTENT_CLEAR_DATA_RESV, 1494 + PAGE_UNLOCK | PAGE_SET_PRIVATE2); 1495 + 1497 1496 if (!nolock && nocow) 1498 1497 btrfs_end_write_no_snapshoting(root); 1499 1498 cur_offset = extent_end; ··· 1812 1807 return; 1813 1808 1814 1809 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1815 - && do_list && !(state->state & EXTENT_NORESERVE)) 1810 + && do_list && !(state->state & EXTENT_NORESERVE) 1811 + && (*bits & (EXTENT_DO_ACCOUNTING | 1812 + EXTENT_CLEAR_DATA_RESV))) 1816 1813 btrfs_free_reserved_data_space_noquota(inode, 1817 1814 state->start, len); 1818 1815 ··· 7258 7251 int ret; 7259 7252 7260 7253 alloc_hint = get_extent_allocation_hint(inode, start, len); 7261 - ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, 7254 + ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0, 7262 7255 alloc_hint, &ins, 1, 1); 7263 7256 if (ret) 7264 7257 return ERR_PTR(ret); ··· 7758 7751 ret = PTR_ERR(em2); 7759 7752 goto unlock_err; 7760 7753 } 7754 + /* 7755 + * For inode marked NODATACOW or extent marked PREALLOC, 7756 + * use the existing or preallocated extent, so does not 7757 + * need to adjust btrfs_space_info's bytes_may_use. 7758 + */ 7759 + btrfs_free_reserved_data_space_noquota(inode, 7760 + start, len); 7761 7761 goto unlock; 7762 7762 } 7763 7763 } ··· 7799 7785 i_size_write(inode, start + len); 7800 7786 7801 7787 adjust_dio_outstanding_extents(inode, dio_data, len); 7802 - btrfs_free_reserved_data_space(inode, start, len); 7803 7788 WARN_ON(dio_data->reserve < len); 7804 7789 dio_data->reserve -= len; 7805 7790 dio_data->unsubmitted_oe_range_end = start + len; ··· 10319 10306 u64 last_alloc = (u64)-1; 10320 10307 int ret = 0; 10321 10308 bool own_trans = true; 10309 + u64 end = start + num_bytes - 1; 10322 10310 10323 10311 if (trans) 10324 10312 own_trans = false; ··· 10341 10327 * sized chunks. 10342 10328 */ 10343 10329 cur_bytes = min(cur_bytes, last_alloc); 10344 - ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, 10345 - *alloc_hint, &ins, 1, 0); 10330 + ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, 10331 + min_size, 0, *alloc_hint, &ins, 1, 0); 10346 10332 if (ret) { 10347 10333 if (own_trans) 10348 10334 btrfs_end_transaction(trans, root); ··· 10428 10414 if (own_trans) 10429 10415 btrfs_end_transaction(trans, root); 10430 10416 } 10417 + if (cur_offset < end) 10418 + btrfs_free_reserved_data_space(inode, cur_offset, 10419 + end - cur_offset + 1); 10431 10420 return ret; 10432 10421 } 10433 10422
+1 -1
fs/btrfs/ioctl.c
··· 5084 5084 if (!capable(CAP_SYS_ADMIN)) 5085 5085 return -EPERM; 5086 5086 5087 - return btrfs_qgroup_wait_for_completion(root->fs_info); 5087 + return btrfs_qgroup_wait_for_completion(root->fs_info, true); 5088 5088 } 5089 5089 5090 5090 static long _btrfs_ioctl_set_received_subvol(struct file *file,
+52 -10
fs/btrfs/qgroup.c
··· 995 995 goto out; 996 996 fs_info->quota_enabled = 0; 997 997 fs_info->pending_quota_state = 0; 998 - btrfs_qgroup_wait_for_completion(fs_info); 998 + btrfs_qgroup_wait_for_completion(fs_info, false); 999 999 spin_lock(&fs_info->qgroup_lock); 1000 1000 quota_root = fs_info->quota_root; 1001 1001 fs_info->quota_root = NULL; ··· 1453 1453 return ret; 1454 1454 } 1455 1455 1456 - struct btrfs_qgroup_extent_record * 1457 - btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, 1458 - struct btrfs_delayed_ref_root *delayed_refs, 1459 - struct btrfs_qgroup_extent_record *record) 1456 + int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, 1457 + struct btrfs_delayed_ref_root *delayed_refs, 1458 + struct btrfs_qgroup_extent_record *record) 1460 1459 { 1461 1460 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1462 1461 struct rb_node *parent_node = NULL; ··· 1474 1475 else if (bytenr > entry->bytenr) 1475 1476 p = &(*p)->rb_right; 1476 1477 else 1477 - return entry; 1478 + return 1; 1478 1479 } 1479 1480 1480 1481 rb_link_node(&record->node, parent_node, p); 1481 1482 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1482 - return NULL; 1483 + return 0; 1484 + } 1485 + 1486 + int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, 1487 + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1488 + gfp_t gfp_flag) 1489 + { 1490 + struct btrfs_qgroup_extent_record *record; 1491 + struct btrfs_delayed_ref_root *delayed_refs; 1492 + int ret; 1493 + 1494 + if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0) 1495 + return 0; 1496 + if (WARN_ON(trans == NULL)) 1497 + return -EINVAL; 1498 + record = kmalloc(sizeof(*record), gfp_flag); 1499 + if (!record) 1500 + return -ENOMEM; 1501 + 1502 + delayed_refs = &trans->transaction->delayed_refs; 1503 + record->bytenr = bytenr; 1504 + record->num_bytes = num_bytes; 1505 + record->old_roots = NULL; 1506 + 1507 + spin_lock(&delayed_refs->lock); 1508 + ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs, 1509 + record); 1510 + spin_unlock(&delayed_refs->lock); 1511 + if (ret > 0) 1512 + kfree(record); 1513 + return 0; 1483 1514 } 1484 1515 1485 1516 #define UPDATE_NEW 0 ··· 2332 2303 int err = -ENOMEM; 2333 2304 int ret = 0; 2334 2305 2306 + mutex_lock(&fs_info->qgroup_rescan_lock); 2307 + fs_info->qgroup_rescan_running = true; 2308 + mutex_unlock(&fs_info->qgroup_rescan_lock); 2309 + 2335 2310 path = btrfs_alloc_path(); 2336 2311 if (!path) 2337 2312 goto out; ··· 2402 2369 } 2403 2370 2404 2371 done: 2372 + mutex_lock(&fs_info->qgroup_rescan_lock); 2373 + fs_info->qgroup_rescan_running = false; 2374 + mutex_unlock(&fs_info->qgroup_rescan_lock); 2405 2375 complete_all(&fs_info->qgroup_rescan_completion); 2406 2376 } 2407 2377 ··· 2523 2487 return 0; 2524 2488 } 2525 2489 2526 - int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) 2490 + int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 2491 + bool interruptible) 2527 2492 { 2528 2493 int running; 2529 2494 int ret = 0; 2530 2495 2531 2496 mutex_lock(&fs_info->qgroup_rescan_lock); 2532 2497 spin_lock(&fs_info->qgroup_lock); 2533 - running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2498 + running = fs_info->qgroup_rescan_running; 2534 2499 spin_unlock(&fs_info->qgroup_lock); 2535 2500 mutex_unlock(&fs_info->qgroup_rescan_lock); 2536 2501 2537 - if (running) 2502 + if (!running) 2503 + return 0; 2504 + 2505 + if (interruptible) 2538 2506 ret = wait_for_completion_interruptible( 2539 2507 &fs_info->qgroup_rescan_completion); 2508 + else 2509 + wait_for_completion(&fs_info->qgroup_rescan_completion); 2540 2510 2541 2511 return ret; 2542 2512 }
+31 -5
fs/btrfs/qgroup.h
··· 46 46 struct btrfs_fs_info *fs_info); 47 47 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); 48 48 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); 49 - int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); 49 + int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 50 + bool interruptible); 50 51 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 51 52 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 52 53 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, ··· 64 63 struct btrfs_delayed_extent_op; 65 64 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 66 65 struct btrfs_fs_info *fs_info); 67 - struct btrfs_qgroup_extent_record * 68 - btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, 69 - struct btrfs_delayed_ref_root *delayed_refs, 70 - struct btrfs_qgroup_extent_record *record); 66 + /* 67 + * Insert one dirty extent record into @delayed_refs, informing qgroup to 68 + * account that extent at commit trans time. 69 + * 70 + * No lock version, caller must acquire delayed ref lock and allocate memory. 71 + * 72 + * Return 0 for success insert 73 + * Return >0 for existing record, caller can free @record safely. 74 + * Error is not possible 75 + */ 76 + int btrfs_qgroup_insert_dirty_extent_nolock( 77 + struct btrfs_fs_info *fs_info, 78 + struct btrfs_delayed_ref_root *delayed_refs, 79 + struct btrfs_qgroup_extent_record *record); 80 + 81 + /* 82 + * Insert one dirty extent record into @delayed_refs, informing qgroup to 83 + * account that extent at commit trans time. 84 + * 85 + * Better encapsulated version. 86 + * 87 + * Return 0 if the operation is done. 88 + * Return <0 for error, like memory allocation failure or invalid parameter 89 + * (NULL trans) 90 + */ 91 + int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, 92 + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 93 + gfp_t gfp_flag); 94 + 71 95 int 72 96 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 73 97 struct btrfs_fs_info *fs_info,
+116 -10
fs/btrfs/relocation.c
··· 31 31 #include "async-thread.h" 32 32 #include "free-space-cache.h" 33 33 #include "inode-map.h" 34 + #include "qgroup.h" 34 35 35 36 /* 36 37 * backref_node, mapping_node and tree_block start with this ··· 3038 3037 u64 num_bytes; 3039 3038 int nr = 0; 3040 3039 int ret = 0; 3040 + u64 prealloc_start = cluster->start - offset; 3041 + u64 prealloc_end = cluster->end - offset; 3042 + u64 cur_offset; 3041 3043 3042 3044 BUG_ON(cluster->start != cluster->boundary[0]); 3043 3045 inode_lock(inode); 3044 3046 3045 - ret = btrfs_check_data_free_space(inode, cluster->start, 3046 - cluster->end + 1 - cluster->start); 3047 + ret = btrfs_check_data_free_space(inode, prealloc_start, 3048 + prealloc_end + 1 - prealloc_start); 3047 3049 if (ret) 3048 3050 goto out; 3049 3051 3052 + cur_offset = prealloc_start; 3050 3053 while (nr < cluster->nr) { 3051 3054 start = cluster->boundary[nr] - offset; 3052 3055 if (nr + 1 < cluster->nr) ··· 3060 3055 3061 3056 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 3062 3057 num_bytes = end + 1 - start; 3058 + if (cur_offset < start) 3059 + btrfs_free_reserved_data_space(inode, cur_offset, 3060 + start - cur_offset); 3063 3061 ret = btrfs_prealloc_file_range(inode, 0, start, 3064 3062 num_bytes, num_bytes, 3065 3063 end + 1, &alloc_hint); 3064 + cur_offset = end + 1; 3066 3065 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 3067 3066 if (ret) 3068 3067 break; 3069 3068 nr++; 3070 3069 } 3071 - btrfs_free_reserved_data_space(inode, cluster->start, 3072 - cluster->end + 1 - cluster->start); 3070 + if (cur_offset < prealloc_end) 3071 + btrfs_free_reserved_data_space(inode, cur_offset, 3072 + prealloc_end + 1 - cur_offset); 3073 3073 out: 3074 3074 inode_unlock(inode); 3075 3075 return ret; ··· 3926 3916 return 0; 3927 3917 } 3928 3918 3919 + /* 3920 + * Qgroup fixer for data chunk relocation. 3921 + * The data relocation is done in the following steps 3922 + * 1) Copy data extents into data reloc tree 3923 + * 2) Create tree reloc tree(special snapshot) for related subvolumes 3924 + * 3) Modify file extents in tree reloc tree 3925 + * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks 3926 + * 3927 + * The problem is, data and tree reloc tree are not accounted to qgroup, 3928 + * and 4) will only info qgroup to track tree blocks change, not file extents 3929 + * in the tree blocks. 3930 + * 3931 + * The good news is, related data extents are all in data reloc tree, so we 3932 + * only need to info qgroup to track all file extents in data reloc tree 3933 + * before commit trans. 3934 + */ 3935 + static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans, 3936 + struct reloc_control *rc) 3937 + { 3938 + struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3939 + struct inode *inode = rc->data_inode; 3940 + struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; 3941 + struct btrfs_path *path; 3942 + struct btrfs_key key; 3943 + int ret = 0; 3944 + 3945 + if (!fs_info->quota_enabled) 3946 + return 0; 3947 + 3948 + /* 3949 + * Only for stage where we update data pointers the qgroup fix is 3950 + * valid. 3951 + * For MOVING_DATA stage, we will miss the timing of swapping tree 3952 + * blocks, and won't fix it. 3953 + */ 3954 + if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) 3955 + return 0; 3956 + 3957 + path = btrfs_alloc_path(); 3958 + if (!path) 3959 + return -ENOMEM; 3960 + key.objectid = btrfs_ino(inode); 3961 + key.type = BTRFS_EXTENT_DATA_KEY; 3962 + key.offset = 0; 3963 + 3964 + ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); 3965 + if (ret < 0) 3966 + goto out; 3967 + 3968 + lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); 3969 + while (1) { 3970 + struct btrfs_file_extent_item *fi; 3971 + 3972 + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3973 + if (key.objectid > btrfs_ino(inode)) 3974 + break; 3975 + if (key.type != BTRFS_EXTENT_DATA_KEY) 3976 + goto next; 3977 + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 3978 + struct btrfs_file_extent_item); 3979 + if (btrfs_file_extent_type(path->nodes[0], fi) != 3980 + BTRFS_FILE_EXTENT_REG) 3981 + goto next; 3982 + ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, 3983 + btrfs_file_extent_disk_bytenr(path->nodes[0], fi), 3984 + btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), 3985 + GFP_NOFS); 3986 + if (ret < 0) 3987 + break; 3988 + next: 3989 + ret = btrfs_next_item(data_reloc_root, path); 3990 + if (ret < 0) 3991 + break; 3992 + if (ret > 0) { 3993 + ret = 0; 3994 + break; 3995 + } 3996 + } 3997 + unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); 3998 + out: 3999 + btrfs_free_path(path); 4000 + return ret; 4001 + } 4002 + 3929 4003 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3930 4004 { 3931 4005 struct rb_root blocks = RB_ROOT; ··· 4196 4102 4197 4103 /* get rid of pinned extents */ 4198 4104 trans = btrfs_join_transaction(rc->extent_root); 4199 - if (IS_ERR(trans)) 4105 + if (IS_ERR(trans)) { 4200 4106 err = PTR_ERR(trans); 4201 - else 4202 - btrfs_commit_transaction(trans, rc->extent_root); 4107 + goto out_free; 4108 + } 4109 + err = qgroup_fix_relocated_data_extents(trans, rc); 4110 + if (err < 0) { 4111 + btrfs_abort_transaction(trans, err); 4112 + goto out_free; 4113 + } 4114 + btrfs_commit_transaction(trans, rc->extent_root); 4203 4115 out_free: 4204 4116 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 4205 4117 btrfs_free_path(path); ··· 4568 4468 unset_reloc_control(rc); 4569 4469 4570 4470 trans = btrfs_join_transaction(rc->extent_root); 4571 - if (IS_ERR(trans)) 4471 + if (IS_ERR(trans)) { 4572 4472 err = PTR_ERR(trans); 4573 - else 4574 - err = btrfs_commit_transaction(trans, rc->extent_root); 4473 + goto out_free; 4474 + } 4475 + err = qgroup_fix_relocated_data_extents(trans, rc); 4476 + if (err < 0) { 4477 + btrfs_abort_transaction(trans, err); 4478 + goto out_free; 4479 + } 4480 + err = btrfs_commit_transaction(trans, rc->extent_root); 4575 4481 out_free: 4576 4482 kfree(rc); 4577 4483 out:
+18 -9
fs/btrfs/root-tree.c
··· 272 272 root_key.objectid = key.offset; 273 273 key.offset++; 274 274 275 + /* 276 + * The root might have been inserted already, as before we look 277 + * for orphan roots, log replay might have happened, which 278 + * triggers a transaction commit and qgroup accounting, which 279 + * in turn reads and inserts fs roots while doing backref 280 + * walking. 281 + */ 282 + root = btrfs_lookup_fs_root(tree_root->fs_info, 283 + root_key.objectid); 284 + if (root) { 285 + WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, 286 + &root->state)); 287 + if (btrfs_root_refs(&root->root_item) == 0) 288 + btrfs_add_dead_root(root); 289 + continue; 290 + } 291 + 275 292 root = btrfs_read_fs_root(tree_root, &root_key); 276 293 err = PTR_ERR_OR_ZERO(root); 277 294 if (err && err != -ENOENT) { ··· 327 310 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); 328 311 329 312 err = btrfs_insert_fs_root(root->fs_info, root); 330 - /* 331 - * The root might have been inserted already, as before we look 332 - * for orphan roots, log replay might have happened, which 333 - * triggers a transaction commit and qgroup accounting, which 334 - * in turn reads and inserts fs roots while doing backref 335 - * walking. 336 - */ 337 - if (err == -EEXIST) 338 - err = 0; 339 313 if (err) { 314 + BUG_ON(err == -EEXIST); 340 315 btrfs_free_fs_root(root); 341 316 break; 342 317 }
+16
fs/btrfs/super.c
··· 2241 2241 struct btrfs_trans_handle *trans; 2242 2242 struct btrfs_root *root = btrfs_sb(sb)->tree_root; 2243 2243 2244 + root->fs_info->fs_frozen = 1; 2245 + /* 2246 + * We don't need a barrier here, we'll wait for any transaction that 2247 + * could be in progress on other threads (and do delayed iputs that 2248 + * we want to avoid on a frozen filesystem), or do the commit 2249 + * ourselves. 2250 + */ 2244 2251 trans = btrfs_attach_transaction_barrier(root); 2245 2252 if (IS_ERR(trans)) { 2246 2253 /* no transaction, don't bother */ ··· 2256 2249 return PTR_ERR(trans); 2257 2250 } 2258 2251 return btrfs_commit_transaction(trans, root); 2252 + } 2253 + 2254 + static int btrfs_unfreeze(struct super_block *sb) 2255 + { 2256 + struct btrfs_root *root = btrfs_sb(sb)->tree_root; 2257 + 2258 + root->fs_info->fs_frozen = 0; 2259 + return 0; 2259 2260 } 2260 2261 2261 2262 static int btrfs_show_devname(struct seq_file *m, struct dentry *root) ··· 2314 2299 .statfs = btrfs_statfs, 2315 2300 .remount_fs = btrfs_remount, 2316 2301 .freeze_fs = btrfs_freeze, 2302 + .unfreeze_fs = btrfs_unfreeze, 2317 2303 }; 2318 2304 2319 2305 static const struct file_operations btrfs_ctl_fops = {
+6 -1
fs/btrfs/transaction.c
··· 2278 2278 2279 2279 kmem_cache_free(btrfs_trans_handle_cachep, trans); 2280 2280 2281 + /* 2282 + * If fs has been frozen, we can not handle delayed iputs, otherwise 2283 + * it'll result in deadlock about SB_FREEZE_FS. 2284 + */ 2281 2285 if (current != root->fs_info->transaction_kthread && 2282 - current != root->fs_info->cleaner_kthread) 2286 + current != root->fs_info->cleaner_kthread && 2287 + !root->fs_info->fs_frozen) 2283 2288 btrfs_run_delayed_iputs(root); 2284 2289 2285 2290 return ret;
+19 -2
fs/btrfs/tree-log.c
··· 27 27 #include "backref.h" 28 28 #include "hash.h" 29 29 #include "compression.h" 30 + #include "qgroup.h" 30 31 31 32 /* magic values for the inode_only field in btrfs_log_inode: 32 33 * ··· 680 679 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 681 680 ins.type = BTRFS_EXTENT_ITEM_KEY; 682 681 offset = key->offset - btrfs_file_extent_offset(eb, item); 682 + 683 + /* 684 + * Manually record dirty extent, as here we did a shallow 685 + * file extent item copy and skip normal backref update, 686 + * but modifying extent tree all by ourselves. 687 + * So need to manually record dirty extent for qgroup, 688 + * as the owner of the file extent changed from log tree 689 + * (doesn't affect qgroup) to fs/file tree(affects qgroup) 690 + */ 691 + ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, 692 + btrfs_file_extent_disk_bytenr(eb, item), 693 + btrfs_file_extent_disk_num_bytes(eb, item), 694 + GFP_NOFS); 695 + if (ret < 0) 696 + goto out; 683 697 684 698 if (ins.objectid > 0) { 685 699 u64 csum_start; ··· 2823 2807 */ 2824 2808 mutex_unlock(&root->log_mutex); 2825 2809 2826 - btrfs_init_log_ctx(&root_log_ctx); 2810 + btrfs_init_log_ctx(&root_log_ctx, NULL); 2827 2811 2828 2812 mutex_lock(&log_root_tree->log_mutex); 2829 2813 atomic_inc(&log_root_tree->log_batch); ··· 4757 4741 if (ret < 0) { 4758 4742 err = ret; 4759 4743 goto out_unlock; 4760 - } else if (ret > 0) { 4744 + } else if (ret > 0 && ctx && 4745 + other_ino != btrfs_ino(ctx->inode)) { 4761 4746 struct btrfs_key inode_key; 4762 4747 struct inode *other_inode; 4763 4748
+4 -1
fs/btrfs/tree-log.h
··· 30 30 int log_transid; 31 31 int io_err; 32 32 bool log_new_dentries; 33 + struct inode *inode; 33 34 struct list_head list; 34 35 }; 35 36 36 - static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) 37 + static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, 38 + struct inode *inode) 37 39 { 38 40 ctx->log_ret = 0; 39 41 ctx->log_transid = 0; 40 42 ctx->io_err = 0; 41 43 ctx->log_new_dentries = false; 44 + ctx->inode = inode; 42 45 INIT_LIST_HEAD(&ctx->list); 43 46 } 44 47
+19 -8
fs/btrfs/volumes.c
··· 834 834 struct btrfs_device *device; 835 835 836 836 device = container_of(work, struct btrfs_device, rcu_work); 837 - 838 - if (device->bdev) 839 - blkdev_put(device->bdev, device->mode); 840 - 841 837 rcu_string_free(device->name); 842 838 kfree(device); 843 839 } ··· 846 850 847 851 INIT_WORK(&device->rcu_work, __free_device); 848 852 schedule_work(&device->rcu_work); 853 + } 854 + 855 + static void btrfs_close_bdev(struct btrfs_device *device) 856 + { 857 + if (device->bdev && device->writeable) { 858 + sync_blockdev(device->bdev); 859 + invalidate_bdev(device->bdev); 860 + } 861 + 862 + if (device->bdev) 863 + blkdev_put(device->bdev, device->mode); 849 864 } 850 865 851 866 static void btrfs_close_one_device(struct btrfs_device *device) ··· 877 870 if (device->missing) 878 871 fs_devices->missing_devices--; 879 872 880 - if (device->bdev && device->writeable) { 881 - sync_blockdev(device->bdev); 882 - invalidate_bdev(device->bdev); 883 - } 873 + btrfs_close_bdev(device); 884 874 885 875 new_device = btrfs_alloc_device(NULL, &device->devid, 886 876 device->uuid); ··· 1936 1932 btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); 1937 1933 } 1938 1934 1935 + btrfs_close_bdev(device); 1936 + 1939 1937 call_rcu(&device->rcu, free_device); 1940 1938 1941 1939 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; ··· 2031 2025 /* zero out the old super if it is writable */ 2032 2026 btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); 2033 2027 } 2028 + 2029 + btrfs_close_bdev(srcdev); 2030 + 2034 2031 call_rcu(&srcdev->rcu, free_device); 2035 2032 2036 2033 /* ··· 2089 2080 * the device_list_mutex lock. 2090 2081 */ 2091 2082 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); 2083 + 2084 + btrfs_close_bdev(tgtdev); 2092 2085 call_rcu(&tgtdev->rcu, free_device); 2093 2086 } 2094 2087