Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"This fixes some lockups in btrfs reported with rc1. It probably has
some performance impact because it is backing off our spinning locks
more often and switching to a blocking lock. I'll be able to nail
that down next week, but for now I want to get the lockups taken care
of.

Otherwise some more stack reduction and assorted fixes"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: fix wrong error handle when the device is missing or is not writeable
Btrfs: fix deadlock when mounting a degraded fs
Btrfs: use bio_endio_nodec instead of open code
Btrfs: fix NULL pointer crash when running balance and scrub concurrently
btrfs: Skip scrubbing removed chunks to avoid -ENOENT.
Btrfs: fix broken free space cache after the system crashed
Btrfs: make free space cache write out functions more readable
Btrfs: remove unused wait queue in struct extent_buffer
Btrfs: fix deadlocks with trylock on tree nodes

+360 -173
+11 -2
fs/btrfs/ctree.h
··· 1259 1259 spinlock_t lock; 1260 1260 u64 pinned; 1261 1261 u64 reserved; 1262 + u64 delalloc_bytes; 1262 1263 u64 bytes_super; 1263 1264 u64 flags; 1264 1265 u64 sectorsize; 1265 1266 u64 cache_generation; 1267 + 1268 + /* 1269 + * It is just used for the delayed data space allocation because 1270 + * only the data space allocation and the relative metadata update 1271 + * can be done cross the transaction. 1272 + */ 1273 + struct rw_semaphore data_rwsem; 1266 1274 1267 1275 /* for raid56, this is a full stripe, without parity */ 1268 1276 unsigned long full_stripe_len; ··· 3324 3316 struct btrfs_key *ins); 3325 3317 int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, 3326 3318 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3327 - struct btrfs_key *ins, int is_data); 3319 + struct btrfs_key *ins, int is_data, int delalloc); 3328 3320 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3329 3321 struct extent_buffer *buf, int full_backref, int no_quota); 3330 3322 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ··· 3338 3330 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 3339 3331 u64 owner, u64 offset, int no_quota); 3340 3332 3341 - int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 3333 + int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len, 3334 + int delalloc); 3342 3335 int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 3343 3336 u64 start, u64 len); 3344 3337 void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+112 -31
fs/btrfs/extent-tree.c
··· 105 105 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 106 106 int dump_block_groups); 107 107 static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 108 - u64 num_bytes, int reserve); 108 + u64 num_bytes, int reserve, 109 + int delalloc); 109 110 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 110 111 u64 num_bytes); 111 112 int btrfs_pin_extent(struct btrfs_root *root, ··· 3261 3260 3262 3261 spin_lock(&block_group->lock); 3263 3262 if (block_group->cached != BTRFS_CACHE_FINISHED || 3264 - !btrfs_test_opt(root, SPACE_CACHE)) { 3263 + !btrfs_test_opt(root, SPACE_CACHE) || 3264 + block_group->delalloc_bytes) { 3265 3265 /* 3266 3266 * don't bother trying to write stuff out _if_ 3267 3267 * a) we're not cached, ··· 5615 5613 * @cache: The cache we are manipulating 5616 5614 * @num_bytes: The number of bytes in question 5617 5615 * @reserve: One of the reservation enums 5616 + * @delalloc: The blocks are allocated for the delalloc write 5618 5617 * 5619 5618 * This is called by the allocator when it reserves space, or by somebody who is 5620 5619 * freeing space that was never actually used on disk. For example if you ··· 5634 5631 * succeeds. 5635 5632 */ 5636 5633 static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 5637 - u64 num_bytes, int reserve) 5634 + u64 num_bytes, int reserve, int delalloc) 5638 5635 { 5639 5636 struct btrfs_space_info *space_info = cache->space_info; 5640 5637 int ret = 0; ··· 5653 5650 num_bytes, 0); 5654 5651 space_info->bytes_may_use -= num_bytes; 5655 5652 } 5653 + 5654 + if (delalloc) 5655 + cache->delalloc_bytes += num_bytes; 5656 5656 } 5657 5657 } else { 5658 5658 if (cache->ro) 5659 5659 space_info->bytes_readonly += num_bytes; 5660 5660 cache->reserved -= num_bytes; 5661 5661 space_info->bytes_reserved -= num_bytes; 5662 + 5663 + if (delalloc) 5664 + cache->delalloc_bytes -= num_bytes; 5662 5665 } 5663 5666 spin_unlock(&cache->lock); 5664 5667 spin_unlock(&space_info->lock); ··· 6215 6206 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 6216 6207 6217 6208 btrfs_add_free_space(cache, buf->start, buf->len); 6218 - btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); 6209 + btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); 6219 6210 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 6220 6211 pin = 0; 6221 6212 } ··· 6374 6365 LOOP_NO_EMPTY_SIZE = 3, 6375 6366 }; 6376 6367 6368 + static inline void 6369 + btrfs_lock_block_group(struct btrfs_block_group_cache *cache, 6370 + int delalloc) 6371 + { 6372 + if (delalloc) 6373 + down_read(&cache->data_rwsem); 6374 + } 6375 + 6376 + static inline void 6377 + btrfs_grab_block_group(struct btrfs_block_group_cache *cache, 6378 + int delalloc) 6379 + { 6380 + btrfs_get_block_group(cache); 6381 + if (delalloc) 6382 + down_read(&cache->data_rwsem); 6383 + } 6384 + 6385 + static struct btrfs_block_group_cache * 6386 + btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, 6387 + struct btrfs_free_cluster *cluster, 6388 + int delalloc) 6389 + { 6390 + struct btrfs_block_group_cache *used_bg; 6391 + bool locked = false; 6392 + again: 6393 + spin_lock(&cluster->refill_lock); 6394 + if (locked) { 6395 + if (used_bg == cluster->block_group) 6396 + return used_bg; 6397 + 6398 + up_read(&used_bg->data_rwsem); 6399 + btrfs_put_block_group(used_bg); 6400 + } 6401 + 6402 + used_bg = cluster->block_group; 6403 + if (!used_bg) 6404 + return NULL; 6405 + 6406 + if (used_bg == block_group) 6407 + return used_bg; 6408 + 6409 + btrfs_get_block_group(used_bg); 6410 + 6411 + if (!delalloc) 6412 + return used_bg; 6413 + 6414 + if (down_read_trylock(&used_bg->data_rwsem)) 6415 + return used_bg; 6416 + 6417 + spin_unlock(&cluster->refill_lock); 6418 + down_read(&used_bg->data_rwsem); 6419 + locked = true; 6420 + goto again; 6421 + } 6422 + 6423 + static inline void 6424 + btrfs_release_block_group(struct btrfs_block_group_cache *cache, 6425 + int delalloc) 6426 + { 6427 + if (delalloc) 6428 + up_read(&cache->data_rwsem); 6429 + btrfs_put_block_group(cache); 6430 + } 6431 + 6377 6432 /* 6378 6433 * walks the btree of allocated extents and find a hole of a given size. 6379 6434 * The key ins is changed to record the hole: ··· 6452 6379 static noinline int find_free_extent(struct btrfs_root *orig_root, 6453 6380 u64 num_bytes, u64 empty_size, 6454 6381 u64 hint_byte, struct btrfs_key *ins, 6455 - u64 flags) 6382 + u64 flags, int delalloc) 6456 6383 { 6457 6384 int ret = 0; 6458 6385 struct btrfs_root *root = orig_root->fs_info->extent_root; ··· 6540 6467 up_read(&space_info->groups_sem); 6541 6468 } else { 6542 6469 index = get_block_group_index(block_group); 6470 + btrfs_lock_block_group(block_group, delalloc); 6543 6471 goto have_block_group; 6544 6472 } 6545 6473 } else if (block_group) { ··· 6555 6481 u64 offset; 6556 6482 int cached; 6557 6483 6558 - btrfs_get_block_group(block_group); 6484 + btrfs_grab_block_group(block_group, delalloc); 6559 6485 search_start = block_group->key.objectid; 6560 6486 6561 6487 /* ··· 6603 6529 * the refill lock keeps out other 6604 6530 * people trying to start a new cluster 6605 6531 */ 6606 - spin_lock(&last_ptr->refill_lock); 6607 - used_block_group = last_ptr->block_group; 6608 - if (used_block_group != block_group && 6609 - (!used_block_group || 6610 - used_block_group->ro || 6611 - !block_group_bits(used_block_group, flags))) 6532 + used_block_group = btrfs_lock_cluster(block_group, 6533 + last_ptr, 6534 + delalloc); 6535 + if (!used_block_group) 6612 6536 goto refill_cluster; 6613 6537 6614 - if (used_block_group != block_group) 6615 - btrfs_get_block_group(used_block_group); 6538 + if (used_block_group != block_group && 6539 + (used_block_group->ro || 6540 + !block_group_bits(used_block_group, flags))) 6541 + goto release_cluster; 6616 6542 6617 6543 offset = btrfs_alloc_from_cluster(used_block_group, 6618 6544 last_ptr, ··· 6626 6552 used_block_group, 6627 6553 search_start, num_bytes); 6628 6554 if (used_block_group != block_group) { 6629 - btrfs_put_block_group(block_group); 6555 + btrfs_release_block_group(block_group, 6556 + delalloc); 6630 6557 block_group = used_block_group; 6631 6558 } 6632 6559 goto checks; 6633 6560 } 6634 6561 6635 6562 WARN_ON(last_ptr->block_group != used_block_group); 6636 - if (used_block_group != block_group) 6637 - btrfs_put_block_group(used_block_group); 6638 - refill_cluster: 6563 + release_cluster: 6639 6564 /* If we are on LOOP_NO_EMPTY_SIZE, we can't 6640 6565 * set up a new clusters, so lets just skip it 6641 6566 * and let the allocator find whatever block ··· 6651 6578 * succeeding in the unclustered 6652 6579 * allocation. */ 6653 6580 if (loop >= LOOP_NO_EMPTY_SIZE && 6654 - last_ptr->block_group != block_group) { 6581 + used_block_group != block_group) { 6655 6582 spin_unlock(&last_ptr->refill_lock); 6583 + btrfs_release_block_group(used_block_group, 6584 + delalloc); 6656 6585 goto unclustered_alloc; 6657 6586 } 6658 6587 ··· 6664 6589 */ 6665 6590 btrfs_return_cluster_to_free_space(NULL, last_ptr); 6666 6591 6592 + if (used_block_group != block_group) 6593 + btrfs_release_block_group(used_block_group, 6594 + delalloc); 6595 + refill_cluster: 6667 6596 if (loop >= LOOP_NO_EMPTY_SIZE) { 6668 6597 spin_unlock(&last_ptr->refill_lock); 6669 6598 goto unclustered_alloc; ··· 6775 6696 BUG_ON(offset > search_start); 6776 6697 6777 6698 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 6778 - alloc_type); 6699 + alloc_type, delalloc); 6779 6700 if (ret == -EAGAIN) { 6780 6701 btrfs_add_free_space(block_group, offset, num_bytes); 6781 6702 goto loop; ··· 6787 6708 6788 6709 trace_btrfs_reserve_extent(orig_root, block_group, 6789 6710 search_start, num_bytes); 6790 - btrfs_put_block_group(block_group); 6711 + btrfs_release_block_group(block_group, delalloc); 6791 6712 break; 6792 6713 loop: 6793 6714 failed_cluster_refill = false; 6794 6715 failed_alloc = false; 6795 6716 BUG_ON(index != get_block_group_index(block_group)); 6796 - btrfs_put_block_group(block_group); 6717 + btrfs_release_block_group(block_group, delalloc); 6797 6718 } 6798 6719 up_read(&space_info->groups_sem); 6799 6720 ··· 6906 6827 int btrfs_reserve_extent(struct btrfs_root *root, 6907 6828 u64 num_bytes, u64 min_alloc_size, 6908 6829 u64 empty_size, u64 hint_byte, 6909 - struct btrfs_key *ins, int is_data) 6830 + struct btrfs_key *ins, int is_data, int delalloc) 6910 6831 { 6911 6832 bool final_tried = false; 6912 6833 u64 flags; ··· 6916 6837 again: 6917 6838 WARN_ON(num_bytes < root->sectorsize); 6918 6839 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, 6919 - flags); 6840 + flags, delalloc); 6920 6841 6921 6842 if (ret == -ENOSPC) { 6922 6843 if (!final_tried && ins->offset) { ··· 6941 6862 } 6942 6863 6943 6864 static int __btrfs_free_reserved_extent(struct btrfs_root *root, 6944 - u64 start, u64 len, int pin) 6865 + u64 start, u64 len, 6866 + int pin, int delalloc) 6945 6867 { 6946 6868 struct btrfs_block_group_cache *cache; 6947 6869 int ret = 0; ··· 6961 6881 pin_down_extent(root, cache, start, len, 1); 6962 6882 else { 6963 6883 btrfs_add_free_space(cache, start, len); 6964 - btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); 6884 + btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); 6965 6885 } 6966 6886 btrfs_put_block_group(cache); 6967 6887 ··· 6971 6891 } 6972 6892 6973 6893 int btrfs_free_reserved_extent(struct btrfs_root *root, 6974 - u64 start, u64 len) 6894 + u64 start, u64 len, int delalloc) 6975 6895 { 6976 - return __btrfs_free_reserved_extent(root, start, len, 0); 6896 + return __btrfs_free_reserved_extent(root, start, len, 0, delalloc); 6977 6897 } 6978 6898 6979 6899 int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 6980 6900 u64 start, u64 len) 6981 6901 { 6982 - return __btrfs_free_reserved_extent(root, start, len, 1); 6902 + return __btrfs_free_reserved_extent(root, start, len, 1, 0); 6983 6903 } 6984 6904 6985 6905 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ··· 7194 7114 return -EINVAL; 7195 7115 7196 7116 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 7197 - RESERVE_ALLOC_NO_ACCOUNT); 7117 + RESERVE_ALLOC_NO_ACCOUNT, 0); 7198 7118 BUG_ON(ret); /* logic error */ 7199 7119 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 7200 7120 0, owner, offset, ins, 1); ··· 7336 7256 return ERR_CAST(block_rsv); 7337 7257 7338 7258 ret = btrfs_reserve_extent(root, blocksize, blocksize, 7339 - empty_size, hint, &ins, 0); 7259 + empty_size, hint, &ins, 0, 0); 7340 7260 if (ret) { 7341 7261 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 7342 7262 return ERR_PTR(ret); ··· 8739 8659 start); 8740 8660 atomic_set(&cache->count, 1); 8741 8661 spin_lock_init(&cache->lock); 8662 + init_rwsem(&cache->data_rwsem); 8742 8663 INIT_LIST_HEAD(&cache->list); 8743 8664 INIT_LIST_HEAD(&cache->cluster_list); 8744 8665 INIT_LIST_HEAD(&cache->new_bg_list);
-1
fs/btrfs/extent_io.h
··· 158 158 * to unlock 159 159 */ 160 160 wait_queue_head_t read_lock_wq; 161 - wait_queue_head_t lock_wq; 162 161 struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; 163 162 #ifdef CONFIG_BTRFS_DEBUG 164 163 struct list_head leak_list;
+2
fs/btrfs/extent_map.c
··· 75 75 if (atomic_dec_and_test(&em->refs)) { 76 76 WARN_ON(extent_map_in_tree(em)); 77 77 WARN_ON(!list_empty(&em->list)); 78 + if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) 79 + kfree(em->bdev); 78 80 kmem_cache_free(extent_map_cache, em); 79 81 } 80 82 }
+1
fs/btrfs/extent_map.h
··· 15 15 #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ 16 16 #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ 17 17 #define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ 18 + #define EXTENT_FLAG_FS_MAPPING 6 /* filesystem extent mapping type */ 18 19 19 20 struct extent_map { 20 21 struct rb_node rb_node;
+127 -67
fs/btrfs/free-space-cache.c
··· 274 274 }; 275 275 276 276 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, 277 - struct btrfs_root *root) 277 + struct btrfs_root *root, int write) 278 278 { 279 + int num_pages; 280 + int check_crcs = 0; 281 + 282 + num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 283 + PAGE_CACHE_SHIFT; 284 + 285 + if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 286 + check_crcs = 1; 287 + 288 + /* Make sure we can fit our crcs into the first page */ 289 + if (write && check_crcs && 290 + (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) 291 + return -ENOSPC; 292 + 279 293 memset(io_ctl, 0, sizeof(struct io_ctl)); 280 - io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 281 - PAGE_CACHE_SHIFT; 282 - io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, 283 - GFP_NOFS); 294 + 295 + io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 284 296 if (!io_ctl->pages) 285 297 return -ENOMEM; 298 + 299 + io_ctl->num_pages = num_pages; 286 300 io_ctl->root = root; 287 - if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 288 - io_ctl->check_crcs = 1; 301 + io_ctl->check_crcs = check_crcs; 302 + 289 303 return 0; 290 304 } 291 305 ··· 680 666 generation = btrfs_free_space_generation(leaf, header); 681 667 btrfs_release_path(path); 682 668 669 + if (!BTRFS_I(inode)->generation) { 670 + btrfs_info(root->fs_info, 671 + "The free space cache file (%llu) is invalid. skip it\n", 672 + offset); 673 + return 0; 674 + } 675 + 683 676 if (BTRFS_I(inode)->generation != generation) { 684 677 btrfs_err(root->fs_info, 685 678 "free space inode generation (%llu) " ··· 698 677 if (!num_entries) 699 678 return 0; 700 679 701 - ret = io_ctl_init(&io_ctl, inode, root); 680 + ret = io_ctl_init(&io_ctl, inode, root, 0); 702 681 if (ret) 703 682 return ret; 704 683 ··· 978 957 } 979 958 980 959 static noinline_for_stack int 981 - add_ioctl_entries(struct btrfs_root *root, 982 - struct inode *inode, 983 - struct btrfs_block_group_cache *block_group, 984 - struct io_ctl *io_ctl, 985 - struct extent_state **cached_state, 986 - struct list_head *bitmap_list, 987 - int *entries) 960 + write_pinned_extent_entries(struct btrfs_root *root, 961 + struct btrfs_block_group_cache *block_group, 962 + struct io_ctl *io_ctl, 963 + int *entries) 988 964 { 989 965 u64 start, extent_start, extent_end, len; 990 - struct list_head *pos, *n; 991 966 struct extent_io_tree *unpin = NULL; 992 967 int ret; 968 + 969 + if (!block_group) 970 + return 0; 993 971 994 972 /* 995 973 * We want to add any pinned extents to our free space cache ··· 999 979 */ 1000 980 unpin = root->fs_info->pinned_extents; 1001 981 1002 - if (block_group) 1003 - start = block_group->key.objectid; 982 + start = block_group->key.objectid; 1004 983 1005 - while (block_group && (start < block_group->key.objectid + 1006 - block_group->key.offset)) { 984 + while (start < block_group->key.objectid + block_group->key.offset) { 1007 985 ret = find_first_extent_bit(unpin, start, 1008 986 &extent_start, &extent_end, 1009 987 EXTENT_DIRTY, NULL); 1010 - if (ret) { 1011 - ret = 0; 1012 - break; 1013 - } 988 + if (ret) 989 + return 0; 1014 990 1015 991 /* This pinned extent is out of our range */ 1016 992 if (extent_start >= block_group->key.objectid + 1017 993 block_group->key.offset) 1018 - break; 994 + return 0; 1019 995 1020 996 extent_start = max(extent_start, start); 1021 997 extent_end = min(block_group->key.objectid + ··· 1021 1005 *entries += 1; 1022 1006 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1023 1007 if (ret) 1024 - goto out_nospc; 1008 + return -ENOSPC; 1025 1009 1026 1010 start = extent_end; 1027 1011 } 1012 + 1013 + return 0; 1014 + } 1015 + 1016 + static noinline_for_stack int 1017 + write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list) 1018 + { 1019 + struct list_head *pos, *n; 1020 + int ret; 1028 1021 1029 1022 /* Write out the bitmaps */ 1030 1023 list_for_each_safe(pos, n, bitmap_list) { ··· 1042 1017 1043 1018 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1044 1019 if (ret) 1045 - goto out_nospc; 1020 + return -ENOSPC; 1046 1021 list_del_init(&entry->list); 1047 1022 } 1048 1023 1049 - /* Zero out the rest of the pages just to make sure */ 1050 - io_ctl_zero_remaining_pages(io_ctl); 1024 + return 0; 1025 + } 1051 1026 1052 - ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, 1053 - 0, i_size_read(inode), cached_state); 1054 - io_ctl_drop_pages(io_ctl); 1055 - unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1056 - i_size_read(inode) - 1, cached_state, GFP_NOFS); 1057 - 1058 - if (ret) 1059 - goto fail; 1027 + static int flush_dirty_cache(struct inode *inode) 1028 + { 1029 + int ret; 1060 1030 1061 1031 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1062 - if (ret) { 1032 + if (ret) 1063 1033 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1064 1034 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1065 1035 GFP_NOFS); 1066 - goto fail; 1067 - } 1068 - return 0; 1069 1036 1070 - fail: 1071 - return -1; 1072 - 1073 - out_nospc: 1074 - return -ENOSPC; 1037 + return ret; 1075 1038 } 1076 1039 1077 1040 static void noinline_for_stack ··· 1069 1056 struct list_head *bitmap_list) 1070 1057 { 1071 1058 struct list_head *pos, *n; 1059 + 1072 1060 list_for_each_safe(pos, n, bitmap_list) { 1073 1061 struct btrfs_free_space *entry = 1074 1062 list_entry(pos, struct btrfs_free_space, list); ··· 1102 1088 { 1103 1089 struct extent_state *cached_state = NULL; 1104 1090 struct io_ctl io_ctl; 1105 - struct list_head bitmap_list; 1091 + LIST_HEAD(bitmap_list); 1106 1092 int entries = 0; 1107 1093 int bitmaps = 0; 1108 1094 int ret; 1109 - int err = -1; 1110 - 1111 - INIT_LIST_HEAD(&bitmap_list); 1112 1095 1113 1096 if (!i_size_read(inode)) 1114 1097 return -1; 1115 1098 1116 - ret = io_ctl_init(&io_ctl, inode, root); 1099 + ret = io_ctl_init(&io_ctl, inode, root, 1); 1117 1100 if (ret) 1118 1101 return -1; 1102 + 1103 + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1104 + down_write(&block_group->data_rwsem); 1105 + spin_lock(&block_group->lock); 1106 + if (block_group->delalloc_bytes) { 1107 + block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1108 + spin_unlock(&block_group->lock); 1109 + up_write(&block_group->data_rwsem); 1110 + BTRFS_I(inode)->generation = 0; 1111 + ret = 0; 1112 + goto out; 1113 + } 1114 + spin_unlock(&block_group->lock); 1115 + } 1119 1116 1120 1117 /* Lock all pages first so we can lock the extent safely. */ 1121 1118 io_ctl_prepare_pages(&io_ctl, inode, 0); ··· 1134 1109 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1135 1110 0, &cached_state); 1136 1111 1137 - 1138 - /* Make sure we can fit our crcs into the first page */ 1139 - if (io_ctl.check_crcs && 1140 - (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) 1141 - goto out_nospc; 1142 - 1143 1112 io_ctl_set_generation(&io_ctl, trans->transid); 1144 1113 1114 + /* Write out the extent entries in the free space cache */ 1145 1115 ret = write_cache_extent_entries(&io_ctl, ctl, 1146 1116 block_group, &entries, &bitmaps, 1147 1117 &bitmap_list); 1148 1118 if (ret) 1149 1119 goto out_nospc; 1150 1120 1151 - ret = add_ioctl_entries(root, inode, block_group, &io_ctl, 1152 - &cached_state, &bitmap_list, &entries); 1153 - 1154 - if (ret == -ENOSPC) 1121 + /* 1122 + * Some spaces that are freed in the current transaction are pinned, 1123 + * they will be added into free space cache after the transaction is 1124 + * committed, we shouldn't lose them. 1125 + */ 1126 + ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); 1127 + if (ret) 1155 1128 goto out_nospc; 1156 - else if (ret) 1129 + 1130 + /* At last, we write out all the bitmaps. */ 1131 + ret = write_bitmap_entries(&io_ctl, &bitmap_list); 1132 + if (ret) 1133 + goto out_nospc; 1134 + 1135 + /* Zero out the rest of the pages just to make sure */ 1136 + io_ctl_zero_remaining_pages(&io_ctl); 1137 + 1138 + /* Everything is written out, now we dirty the pages in the file. */ 1139 + ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, 1140 + 0, i_size_read(inode), &cached_state); 1141 + if (ret) 1142 + goto out_nospc; 1143 + 1144 + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1145 + up_write(&block_group->data_rwsem); 1146 + /* 1147 + * Release the pages and unlock the extent, we will flush 1148 + * them out later 1149 + */ 1150 + io_ctl_drop_pages(&io_ctl); 1151 + 1152 + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1153 + i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1154 + 1155 + /* Flush the dirty pages in the cache file. */ 1156 + ret = flush_dirty_cache(inode); 1157 + if (ret) 1157 1158 goto out; 1158 1159 1159 - err = update_cache_item(trans, root, inode, path, offset, 1160 + /* Update the cache item to tell everyone this cache file is valid. */ 1161 + ret = update_cache_item(trans, root, inode, path, offset, 1160 1162 entries, bitmaps); 1161 - 1162 1163 out: 1163 1164 io_ctl_free(&io_ctl); 1164 - if (err) { 1165 + if (ret) { 1165 1166 invalidate_inode_pages2(inode->i_mapping); 1166 1167 BTRFS_I(inode)->generation = 0; 1167 1168 } 1168 1169 btrfs_update_inode(trans, root, inode); 1169 - return err; 1170 + return ret; 1170 1171 1171 1172 out_nospc: 1172 - 1173 1173 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); 1174 + 1175 + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1176 + up_write(&block_group->data_rwsem); 1177 + 1174 1178 goto out; 1175 1179 } 1176 1180 ··· 1216 1162 1217 1163 spin_lock(&block_group->lock); 1218 1164 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1165 + spin_unlock(&block_group->lock); 1166 + return 0; 1167 + } 1168 + 1169 + if (block_group->delalloc_bytes) { 1170 + block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1219 1171 spin_unlock(&block_group->lock); 1220 1172 return 0; 1221 1173 }
+30 -11
fs/btrfs/inode.c
··· 693 693 ret = btrfs_reserve_extent(root, 694 694 async_extent->compressed_size, 695 695 async_extent->compressed_size, 696 - 0, alloc_hint, &ins, 1); 696 + 0, alloc_hint, &ins, 1, 1); 697 697 if (ret) { 698 698 int i; 699 699 ··· 794 794 out: 795 795 return ret; 796 796 out_free_reserve: 797 - btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 797 + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 798 798 out_free: 799 799 extent_clear_unlock_delalloc(inode, async_extent->start, 800 800 async_extent->start + ··· 917 917 cur_alloc_size = disk_num_bytes; 918 918 ret = btrfs_reserve_extent(root, cur_alloc_size, 919 919 root->sectorsize, 0, alloc_hint, 920 - &ins, 1); 920 + &ins, 1, 1); 921 921 if (ret < 0) 922 922 goto out_unlock; 923 923 ··· 995 995 return ret; 996 996 997 997 out_reserve: 998 - btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 998 + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 999 999 out_unlock: 1000 1000 extent_clear_unlock_delalloc(inode, start, end, locked_page, 1001 1001 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | ··· 2599 2599 return NULL; 2600 2600 } 2601 2601 2602 + static void btrfs_release_delalloc_bytes(struct btrfs_root *root, 2603 + u64 start, u64 len) 2604 + { 2605 + struct btrfs_block_group_cache *cache; 2606 + 2607 + cache = btrfs_lookup_block_group(root->fs_info, start); 2608 + ASSERT(cache); 2609 + 2610 + spin_lock(&cache->lock); 2611 + cache->delalloc_bytes -= len; 2612 + spin_unlock(&cache->lock); 2613 + 2614 + btrfs_put_block_group(cache); 2615 + } 2616 + 2602 2617 /* as ordered data IO finishes, this gets called so we can finish 2603 2618 * an ordered extent if the range of bytes in the file it covers are 2604 2619 * fully written. ··· 2713 2698 logical_len, logical_len, 2714 2699 compress_type, 0, 0, 2715 2700 BTRFS_FILE_EXTENT_REG); 2701 + if (!ret) 2702 + btrfs_release_delalloc_bytes(root, 2703 + ordered_extent->start, 2704 + ordered_extent->disk_len); 2716 2705 } 2717 2706 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 2718 2707 ordered_extent->file_offset, ordered_extent->len, ··· 2769 2750 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && 2770 2751 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) 2771 2752 btrfs_free_reserved_extent(root, ordered_extent->start, 2772 - ordered_extent->disk_len); 2753 + ordered_extent->disk_len, 1); 2773 2754 } 2774 2755 2775 2756 ··· 6554 6535 6555 6536 alloc_hint = get_extent_allocation_hint(inode, start, len); 6556 6537 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, 6557 - alloc_hint, &ins, 1); 6538 + alloc_hint, &ins, 1, 1); 6558 6539 if (ret) 6559 6540 return ERR_PTR(ret); 6560 6541 6561 6542 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 6562 6543 ins.offset, ins.offset, ins.offset, 0); 6563 6544 if (IS_ERR(em)) { 6564 - btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6545 + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 6565 6546 return em; 6566 6547 } 6567 6548 6568 6549 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 6569 6550 ins.offset, ins.offset, 0); 6570 6551 if (ret) { 6571 - btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6552 + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 6572 6553 free_extent_map(em); 6573 6554 return ERR_PTR(ret); 6574 6555 } ··· 7456 7437 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && 7457 7438 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 7458 7439 btrfs_free_reserved_extent(root, ordered->start, 7459 - ordered->disk_len); 7440 + ordered->disk_len, 1); 7460 7441 btrfs_put_ordered_extent(ordered); 7461 7442 btrfs_put_ordered_extent(ordered); 7462 7443 } ··· 8827 8808 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); 8828 8809 cur_bytes = max(cur_bytes, min_size); 8829 8810 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, 8830 - *alloc_hint, &ins, 1); 8811 + *alloc_hint, &ins, 1, 0); 8831 8812 if (ret) { 8832 8813 if (own_trans) 8833 8814 btrfs_end_transaction(trans, root); ··· 8841 8822 BTRFS_FILE_EXTENT_PREALLOC); 8842 8823 if (ret) { 8843 8824 btrfs_free_reserved_extent(root, ins.objectid, 8844 - ins.offset); 8825 + ins.offset, 0); 8845 8826 btrfs_abort_transaction(trans, root, ret); 8846 8827 if (own_trans) 8847 8828 btrfs_end_transaction(trans, root);
+46 -34
fs/btrfs/locking.c
··· 33 33 */ 34 34 void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) 35 35 { 36 - if (eb->lock_nested) { 37 - read_lock(&eb->lock); 38 - if (eb->lock_nested && current->pid == eb->lock_owner) { 39 - read_unlock(&eb->lock); 40 - return; 41 - } 42 - read_unlock(&eb->lock); 43 - } 36 + /* 37 + * no lock is required. The lock owner may change if 38 + * we have a read lock, but it won't change to or away 39 + * from us. If we have the write lock, we are the owner 40 + * and it'll never change. 41 + */ 42 + if (eb->lock_nested && current->pid == eb->lock_owner) 43 + return; 44 44 if (rw == BTRFS_WRITE_LOCK) { 45 45 if (atomic_read(&eb->blocking_writers) == 0) { 46 46 WARN_ON(atomic_read(&eb->spinning_writers) != 1); ··· 65 65 */ 66 66 void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) 67 67 { 68 - if (eb->lock_nested) { 69 - read_lock(&eb->lock); 70 - if (eb->lock_nested && current->pid == eb->lock_owner) { 71 - read_unlock(&eb->lock); 72 - return; 73 - } 74 - read_unlock(&eb->lock); 75 - } 68 + /* 69 + * no lock is required. The lock owner may change if 70 + * we have a read lock, but it won't change to or away 71 + * from us. If we have the write lock, we are the owner 72 + * and it'll never change. 73 + */ 74 + if (eb->lock_nested && current->pid == eb->lock_owner) 75 + return; 76 + 76 77 if (rw == BTRFS_WRITE_LOCK_BLOCKING) { 77 78 BUG_ON(atomic_read(&eb->blocking_writers) != 1); 78 79 write_lock(&eb->lock); ··· 100 99 void btrfs_tree_read_lock(struct extent_buffer *eb) 101 100 { 102 101 again: 102 + BUG_ON(!atomic_read(&eb->blocking_writers) && 103 + current->pid == eb->lock_owner); 104 + 103 105 read_lock(&eb->lock); 104 106 if (atomic_read(&eb->blocking_writers) && 105 107 current->pid == eb->lock_owner) { ··· 136 132 if (atomic_read(&eb->blocking_writers)) 137 133 return 0; 138 134 139 - read_lock(&eb->lock); 135 + if (!read_trylock(&eb->lock)) 136 + return 0; 137 + 140 138 if (atomic_read(&eb->blocking_writers)) { 141 139 read_unlock(&eb->lock); 142 140 return 0; ··· 157 151 if (atomic_read(&eb->blocking_writers) || 158 152 atomic_read(&eb->blocking_readers)) 159 153 return 0; 160 - write_lock(&eb->lock); 154 + 155 + if (!write_trylock(&eb->lock)) 156 + return 0; 157 + 161 158 if (atomic_read(&eb->blocking_writers) || 162 159 atomic_read(&eb->blocking_readers)) { 163 160 write_unlock(&eb->lock); ··· 177 168 */ 178 169 void btrfs_tree_read_unlock(struct extent_buffer *eb) 179 170 { 180 - if (eb->lock_nested) { 181 - read_lock(&eb->lock); 182 - if (eb->lock_nested && current->pid == eb->lock_owner) { 183 - eb->lock_nested = 0; 184 - read_unlock(&eb->lock); 185 - return; 186 - } 187 - read_unlock(&eb->lock); 171 + /* 172 + * if we're nested, we have the write lock. No new locking 173 + * is needed as long as we are the lock owner. 174 + * The write unlock will do a barrier for us, and the lock_nested 175 + * field only matters to the lock owner. 176 + */ 177 + if (eb->lock_nested && current->pid == eb->lock_owner) { 178 + eb->lock_nested = 0; 179 + return; 188 180 } 189 181 btrfs_assert_tree_read_locked(eb); 190 182 WARN_ON(atomic_read(&eb->spinning_readers) == 0); ··· 199 189 */ 200 190 void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) 201 191 { 202 - if (eb->lock_nested) { 203 - read_lock(&eb->lock); 204 - if (eb->lock_nested && current->pid == eb->lock_owner) { 205 - eb->lock_nested = 0; 206 - read_unlock(&eb->lock); 207 - return; 208 - } 209 - read_unlock(&eb->lock); 192 + /* 193 + * if we're nested, we have the write lock. No new locking 194 + * is needed as long as we are the lock owner. 195 + * The write unlock will do a barrier for us, and the lock_nested 196 + * field only matters to the lock owner. 197 + */ 198 + if (eb->lock_nested && current->pid == eb->lock_owner) { 199 + eb->lock_nested = 0; 200 + return; 210 201 } 211 202 btrfs_assert_tree_read_locked(eb); 212 203 WARN_ON(atomic_read(&eb->blocking_readers) == 0); ··· 255 244 BUG_ON(blockers > 1); 256 245 257 246 btrfs_assert_tree_locked(eb); 247 + eb->lock_owner = 0; 258 248 atomic_dec(&eb->write_locks); 259 249 260 250 if (blockers) {
+9 -10
fs/btrfs/scrub.c
··· 2725 2725 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 2726 2726 length = btrfs_dev_extent_length(l, dev_extent); 2727 2727 2728 - if (found_key.offset + length <= start) { 2729 - key.offset = found_key.offset + length; 2730 - btrfs_release_path(path); 2731 - continue; 2732 - } 2728 + if (found_key.offset + length <= start) 2729 + goto skip; 2733 2730 2734 2731 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 2735 2732 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); ··· 2737 2740 * the chunk from going away while we scrub it 2738 2741 */ 2739 2742 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 2740 - if (!cache) { 2741 - ret = -ENOENT; 2742 - break; 2743 - } 2743 + 2744 + /* some chunks are removed but not committed to disk yet, 2745 + * continue scrubbing */ 2746 + if (!cache) 2747 + goto skip; 2748 + 2744 2749 dev_replace->cursor_right = found_key.offset + length; 2745 2750 dev_replace->cursor_left = found_key.offset; 2746 2751 dev_replace->item_needs_writeback = 1; ··· 2801 2802 2802 2803 dev_replace->cursor_left = dev_replace->cursor_right; 2803 2804 dev_replace->item_needs_writeback = 1; 2804 - 2805 + skip: 2805 2806 key.offset = found_key.offset + length; 2806 2807 btrfs_release_path(path); 2807 2808 }
+19 -17
fs/btrfs/volumes.c
··· 2543 2543 remove_extent_mapping(em_tree, em); 2544 2544 write_unlock(&em_tree->lock); 2545 2545 2546 - kfree(map); 2547 - em->bdev = NULL; 2548 - 2549 2546 /* once for the tree */ 2550 2547 free_extent_map(em); 2551 2548 /* once for us */ ··· 4298 4301 4299 4302 em = alloc_extent_map(); 4300 4303 if (!em) { 4304 + kfree(map); 4301 4305 ret = -ENOMEM; 4302 4306 goto error; 4303 4307 } 4308 + set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); 4304 4309 em->bdev = (struct block_device *)map; 4305 4310 em->start = start; 4306 4311 em->len = num_bytes; ··· 4345 4346 /* One for the tree reference */ 4346 4347 free_extent_map(em); 4347 4348 error: 4348 - kfree(map); 4349 4349 kfree(devices_info); 4350 4350 return ret; 4351 4351 } ··· 4556 4558 write_unlock(&tree->map_tree.lock); 4557 4559 if (!em) 4558 4560 break; 4559 - kfree(em->bdev); 4560 4561 /* once for us */ 4561 4562 free_extent_map(em); 4562 4563 /* once for the tree */ ··· 5359 5362 return 0; 5360 5363 } 5361 5364 5365 + static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) 5366 + { 5367 + if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) 5368 + bio_endio_nodec(bio, err); 5369 + else 5370 + bio_endio(bio, err); 5371 + kfree(bbio); 5372 + } 5373 + 5362 5374 static void btrfs_end_bio(struct bio *bio, int err) 5363 5375 { 5364 5376 struct btrfs_bio *bbio = bio->bi_private; ··· 5408 5402 bio = bbio->orig_bio; 5409 5403 } 5410 5404 5411 - /* 5412 - * We have original bio now. So increment bi_remaining to 5413 - * account for it in endio 5414 - */ 5415 - atomic_inc(&bio->bi_remaining); 5416 - 5417 5405 bio->bi_private = bbio->private; 5418 5406 bio->bi_end_io = bbio->end_io; 5419 5407 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; ··· 5424 5424 set_bit(BIO_UPTODATE, &bio->bi_flags); 5425 5425 err = 0; 5426 5426 } 5427 - kfree(bbio); 5428 5427 5429 - bio_endio(bio, err); 5428 + btrfs_end_bbio(bbio, bio, err); 5430 5429 } else if (!is_orig_bio) { 5431 5430 bio_put(bio); 5432 5431 } ··· 5588 5589 { 5589 5590 atomic_inc(&bbio->error); 5590 5591 if (atomic_dec_and_test(&bbio->stripes_pending)) { 5592 + /* Shoud be the original bio. */ 5593 + WARN_ON(bio != bbio->orig_bio); 5594 + 5591 5595 bio->bi_private = bbio->private; 5592 5596 bio->bi_end_io = bbio->end_io; 5593 5597 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5594 5598 bio->bi_iter.bi_sector = logical >> 9; 5595 - kfree(bbio); 5596 - bio_endio(bio, -EIO); 5599 + 5600 + btrfs_end_bbio(bbio, bio, -EIO); 5597 5601 } 5598 5602 } 5599 5603 ··· 5683 5681 BUG_ON(!bio); /* -ENOMEM */ 5684 5682 } else { 5685 5683 bio = first_bio; 5684 + bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; 5686 5685 } 5687 5686 5688 5687 submit_stripe_bio(root, bbio, bio, ··· 5825 5822 return -ENOMEM; 5826 5823 } 5827 5824 5825 + set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); 5828 5826 em->bdev = (struct block_device *)map; 5829 5827 em->start = logical; 5830 5828 em->len = length; ··· 5850 5846 map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, 5851 5847 uuid, NULL); 5852 5848 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { 5853 - kfree(map); 5854 5849 free_extent_map(em); 5855 5850 return -EIO; 5856 5851 } ··· 5857 5854 map->stripes[i].dev = 5858 5855 add_missing_dev(root, devid, uuid); 5859 5856 if (!map->stripes[i].dev) { 5860 - kfree(map); 5861 5857 free_extent_map(em); 5862 5858 return -EIO; 5863 5859 }
+3
fs/btrfs/volumes.h
··· 190 190 struct btrfs_bio; 191 191 typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); 192 192 193 + #define BTRFS_BIO_ORIG_BIO_SUBMITTED 0x1 194 + 193 195 struct btrfs_bio { 194 196 atomic_t stripes_pending; 195 197 struct btrfs_fs_info *fs_info; 196 198 bio_end_io_t *end_io; 197 199 struct bio *orig_bio; 200 + unsigned long flags; 198 201 void *private; 199 202 atomic_t error; 200 203 int max_errors;