Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"A few more btrfs fixes.

These range from corners Filipe found in the new free space cache
writeback to a grab bag of fixes from the list"

* 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: btrfs_release_extent_buffer_page didn't free pages of dummy extent
Btrfs: fill ->last_trans for delayed inode in btrfs_fill_inode.
btrfs: unlock i_mutex after attempting to delete subvolume during send
btrfs: check io_ctl_prepare_pages return in __btrfs_write_out_cache
btrfs: fix race on ENOMEM in alloc_extent_buffer
btrfs: handle ENOMEM in btrfs_alloc_tree_block
Btrfs: fix find_free_dev_extent() malfunction in case device tree has hole
Btrfs: don't check for delalloc_bytes in cache_save_setup
Btrfs: fix deadlock when starting writeback of bg caches
Btrfs: fix race between start dirty bg cache writeout and bg deletion

+119 -78
+2
fs/btrfs/delayed-inode.c
··· 1802 1802 set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1803 1803 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1804 1804 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1805 + BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); 1806 + 1805 1807 inode->i_version = btrfs_stack_inode_sequence(inode_item); 1806 1808 inode->i_rdev = 0; 1807 1809 *rdev = btrfs_stack_inode_rdev(inode_item);
+57 -33
fs/btrfs/extent-tree.c
··· 3178 3178 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 3179 3179 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 3180 3180 btrfs_mark_buffer_dirty(leaf); 3181 - btrfs_release_path(path); 3182 3181 fail: 3182 + btrfs_release_path(path); 3183 3183 if (ret) 3184 3184 btrfs_abort_transaction(trans, root, ret); 3185 3185 return ret; ··· 3305 3305 3306 3306 spin_lock(&block_group->lock); 3307 3307 if (block_group->cached != BTRFS_CACHE_FINISHED || 3308 - !btrfs_test_opt(root, SPACE_CACHE) || 3309 - block_group->delalloc_bytes) { 3308 + !btrfs_test_opt(root, SPACE_CACHE)) { 3310 3309 /* 3311 3310 * don't bother trying to write stuff out _if_ 3312 3311 * a) we're not cached, ··· 3407 3408 int loops = 0; 3408 3409 3409 3410 spin_lock(&cur_trans->dirty_bgs_lock); 3410 - if (!list_empty(&cur_trans->dirty_bgs)) { 3411 - list_splice_init(&cur_trans->dirty_bgs, &dirty); 3411 + if (list_empty(&cur_trans->dirty_bgs)) { 3412 + spin_unlock(&cur_trans->dirty_bgs_lock); 3413 + return 0; 3412 3414 } 3415 + list_splice_init(&cur_trans->dirty_bgs, &dirty); 3413 3416 spin_unlock(&cur_trans->dirty_bgs_lock); 3414 3417 3415 3418 again: 3416 - if (list_empty(&dirty)) { 3417 - btrfs_free_path(path); 3418 - return 0; 3419 - } 3420 - 3421 3419 /* 3422 3420 * make sure all the block groups on our dirty list actually 3423 3421 * exist ··· 3427 3431 return -ENOMEM; 3428 3432 } 3429 3433 3434 + /* 3435 + * cache_write_mutex is here only to save us from balance or automatic 3436 + * removal of empty block groups deleting this block group while we are 3437 + * writing out the cache 3438 + */ 3439 + mutex_lock(&trans->transaction->cache_write_mutex); 3430 3440 while (!list_empty(&dirty)) { 3431 3441 cache = list_first_entry(&dirty, 3432 3442 struct btrfs_block_group_cache, 3433 3443 dirty_list); 3434 - 3435 - /* 3436 - * cache_write_mutex is here only to save us from balance 3437 - * deleting this block group while we are writing out the 3438 - * cache 3439 - */ 3440 - mutex_lock(&trans->transaction->cache_write_mutex); 3441 - 3442 3444 /* 3443 3445 * this can happen if something re-dirties a block 3444 3446 * group that is already under IO. Just wait for it to ··· 3489 3495 } 3490 3496 if (!ret) 3491 3497 ret = write_one_cache_group(trans, root, path, cache); 3492 - mutex_unlock(&trans->transaction->cache_write_mutex); 3493 3498 3494 3499 /* if its not on the io list, we need to put the block group */ 3495 3500 if (should_put) ··· 3496 3503 3497 3504 if (ret) 3498 3505 break; 3506 + 3507 + /* 3508 + * Avoid blocking other tasks for too long. It might even save 3509 + * us from writing caches for block groups that are going to be 3510 + * removed. 3511 + */ 3512 + mutex_unlock(&trans->transaction->cache_write_mutex); 3513 + mutex_lock(&trans->transaction->cache_write_mutex); 3499 3514 } 3515 + mutex_unlock(&trans->transaction->cache_write_mutex); 3500 3516 3501 3517 /* 3502 3518 * go through delayed refs for all the stuff we've just kicked off ··· 3516 3514 loops++; 3517 3515 spin_lock(&cur_trans->dirty_bgs_lock); 3518 3516 list_splice_init(&cur_trans->dirty_bgs, &dirty); 3517 + /* 3518 + * dirty_bgs_lock protects us from concurrent block group 3519 + * deletes too (not just cache_write_mutex). 3520 + */ 3521 + if (!list_empty(&dirty)) { 3522 + spin_unlock(&cur_trans->dirty_bgs_lock); 3523 + goto again; 3524 + } 3519 3525 spin_unlock(&cur_trans->dirty_bgs_lock); 3520 - goto again; 3521 3526 } 3522 3527 3523 3528 btrfs_free_path(path); ··· 7546 7537 * returns the key for the extent through ins, and a tree buffer for 7547 7538 * the first block of the extent through buf. 7548 7539 * 7549 - * returns the tree buffer or NULL. 7540 + * returns the tree buffer or an ERR_PTR on error. 7550 7541 */ 7551 7542 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, 7552 7543 struct btrfs_root *root, ··· 7557 7548 struct btrfs_key ins; 7558 7549 struct btrfs_block_rsv *block_rsv; 7559 7550 struct extent_buffer *buf; 7551 + struct btrfs_delayed_extent_op *extent_op; 7560 7552 u64 flags = 0; 7561 7553 int ret; 7562 7554 u32 blocksize = root->nodesize; ··· 7578 7568 7579 7569 ret = btrfs_reserve_extent(root, blocksize, blocksize, 7580 7570 empty_size, hint, &ins, 0, 0); 7581 - if (ret) { 7582 - unuse_block_rsv(root->fs_info, block_rsv, blocksize); 7583 - return ERR_PTR(ret); 7584 - } 7571 + if (ret) 7572 + goto out_unuse; 7585 7573 7586 7574 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); 7587 - BUG_ON(IS_ERR(buf)); /* -ENOMEM */ 7575 + if (IS_ERR(buf)) { 7576 + ret = PTR_ERR(buf); 7577 + goto out_free_reserved; 7578 + } 7588 7579 7589 7580 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 7590 7581 if (parent == 0) ··· 7595 7584 BUG_ON(parent > 0); 7596 7585 7597 7586 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 7598 - struct btrfs_delayed_extent_op *extent_op; 7599 7587 extent_op = btrfs_alloc_delayed_extent_op(); 7600 - BUG_ON(!extent_op); /* -ENOMEM */ 7588 + if (!extent_op) { 7589 + ret = -ENOMEM; 7590 + goto out_free_buf; 7591 + } 7601 7592 if (key) 7602 7593 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 7603 7594 else ··· 7614 7601 extent_op->level = level; 7615 7602 7616 7603 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, 7617 - ins.objectid, 7618 - ins.offset, parent, root_objectid, 7619 - level, BTRFS_ADD_DELAYED_EXTENT, 7620 - extent_op, 0); 7621 - BUG_ON(ret); /* -ENOMEM */ 7604 + ins.objectid, ins.offset, 7605 + parent, root_objectid, level, 7606 + BTRFS_ADD_DELAYED_EXTENT, 7607 + extent_op, 0); 7608 + if (ret) 7609 + goto out_free_delayed; 7622 7610 } 7623 7611 return buf; 7612 + 7613 + out_free_delayed: 7614 + btrfs_free_delayed_extent_op(extent_op); 7615 + out_free_buf: 7616 + free_extent_buffer(buf); 7617 + out_free_reserved: 7618 + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0); 7619 + out_unuse: 7620 + unuse_block_rsv(root->fs_info, block_rsv, blocksize); 7621 + return ERR_PTR(ret); 7624 7622 } 7625 7623 7626 7624 struct walk_control {
+28 -26
fs/btrfs/extent_io.c
··· 4560 4560 do { 4561 4561 index--; 4562 4562 page = eb->pages[index]; 4563 - if (page && mapped) { 4563 + if (!page) 4564 + continue; 4565 + if (mapped) 4564 4566 spin_lock(&page->mapping->private_lock); 4567 + /* 4568 + * We do this since we'll remove the pages after we've 4569 + * removed the eb from the radix tree, so we could race 4570 + * and have this page now attached to the new eb. So 4571 + * only clear page_private if it's still connected to 4572 + * this eb. 4573 + */ 4574 + if (PagePrivate(page) && 4575 + page->private == (unsigned long)eb) { 4576 + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); 4577 + BUG_ON(PageDirty(page)); 4578 + BUG_ON(PageWriteback(page)); 4565 4579 /* 4566 - * We do this since we'll remove the pages after we've 4567 - * removed the eb from the radix tree, so we could race 4568 - * and have this page now attached to the new eb. So 4569 - * only clear page_private if it's still connected to 4570 - * this eb. 4580 + * We need to make sure we haven't be attached 4581 + * to a new eb. 4571 4582 */ 4572 - if (PagePrivate(page) && 4573 - page->private == (unsigned long)eb) { 4574 - BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); 4575 - BUG_ON(PageDirty(page)); 4576 - BUG_ON(PageWriteback(page)); 4577 - /* 4578 - * We need to make sure we haven't be attached 4579 - * to a new eb. 4580 - */ 4581 - ClearPagePrivate(page); 4582 - set_page_private(page, 0); 4583 - /* One for the page private */ 4584 - page_cache_release(page); 4585 - } 4586 - spin_unlock(&page->mapping->private_lock); 4587 - 4588 - } 4589 - if (page) { 4590 - /* One for when we alloced the page */ 4583 + ClearPagePrivate(page); 4584 + set_page_private(page, 0); 4585 + /* One for the page private */ 4591 4586 page_cache_release(page); 4592 4587 } 4588 + 4589 + if (mapped) 4590 + spin_unlock(&page->mapping->private_lock); 4591 + 4592 + /* One for when we alloced the page */ 4593 + page_cache_release(page); 4593 4594 } while (index != 0); 4594 4595 } 4595 4596 ··· 4871 4870 mark_extent_buffer_accessed(exists, p); 4872 4871 goto free_eb; 4873 4872 } 4873 + exists = NULL; 4874 4874 4875 4875 /* 4876 4876 * Do this so attach doesn't complain and we need to ··· 4935 4933 return eb; 4936 4934 4937 4935 free_eb: 4936 + WARN_ON(!atomic_dec_and_test(&eb->refs)); 4938 4937 for (i = 0; i < num_pages; i++) { 4939 4938 if (eb->pages[i]) 4940 4939 unlock_page(eb->pages[i]); 4941 4940 } 4942 4941 4943 - WARN_ON(!atomic_dec_and_test(&eb->refs)); 4944 4942 btrfs_release_extent_buffer(eb); 4945 4943 return exists; 4946 4944 }
+6 -4
fs/btrfs/free-space-cache.c
··· 1218 1218 * 1219 1219 * This function writes out a free space cache struct to disk for quick recovery 1220 1220 * on mount. This will return 0 if it was successfull in writing the cache out, 1221 - * and -1 if it was not. 1221 + * or an errno if it was not. 1222 1222 */ 1223 1223 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1224 1224 struct btrfs_free_space_ctl *ctl, ··· 1235 1235 int must_iput = 0; 1236 1236 1237 1237 if (!i_size_read(inode)) 1238 - return -1; 1238 + return -EIO; 1239 1239 1240 1240 WARN_ON(io_ctl->pages); 1241 1241 ret = io_ctl_init(io_ctl, inode, root, 1); 1242 1242 if (ret) 1243 - return -1; 1243 + return ret; 1244 1244 1245 1245 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1246 1246 down_write(&block_group->data_rwsem); ··· 1258 1258 } 1259 1259 1260 1260 /* Lock all pages first so we can lock the extent safely. */ 1261 - io_ctl_prepare_pages(io_ctl, inode, 0); 1261 + ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1262 + if (ret) 1263 + goto out; 1262 1264 1263 1265 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1264 1266 0, &cached_state);
+13 -10
fs/btrfs/inode.c
··· 3632 3632 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 3633 3633 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); 3634 3634 3635 - /* 3636 - * If we were modified in the current generation and evicted from memory 3637 - * and then re-read we need to do a full sync since we don't have any 3638 - * idea about which extents were modified before we were evicted from 3639 - * cache. 3640 - */ 3641 - if (BTRFS_I(inode)->last_trans == root->fs_info->generation) 3642 - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 3643 - &BTRFS_I(inode)->runtime_flags); 3644 - 3645 3635 inode->i_version = btrfs_inode_sequence(leaf, inode_item); 3646 3636 inode->i_generation = BTRFS_I(inode)->generation; 3647 3637 inode->i_rdev = 0; ··· 3641 3651 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 3642 3652 3643 3653 cache_index: 3654 + /* 3655 + * If we were modified in the current generation and evicted from memory 3656 + * and then re-read we need to do a full sync since we don't have any 3657 + * idea about which extents were modified before we were evicted from 3658 + * cache. 3659 + * 3660 + * This is required for both inode re-read from disk and delayed inode 3661 + * in delayed_nodes_tree. 3662 + */ 3663 + if (BTRFS_I(inode)->last_trans == root->fs_info->generation) 3664 + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 3665 + &BTRFS_I(inode)->runtime_flags); 3666 + 3644 3667 path->slots[0]++; 3645 3668 if (inode->i_nlink != 1 || 3646 3669 path->slots[0] >= btrfs_header_nritems(leaf))
+2 -1
fs/btrfs/ioctl.c
··· 2410 2410 "Attempt to delete subvolume %llu during send", 2411 2411 dest->root_key.objectid); 2412 2412 err = -EPERM; 2413 - goto out_dput; 2413 + goto out_unlock_inode; 2414 2414 } 2415 2415 2416 2416 d_invalidate(dentry); ··· 2505 2505 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); 2506 2506 spin_unlock(&dest->root_item_lock); 2507 2507 } 2508 + out_unlock_inode: 2508 2509 mutex_unlock(&inode->i_mutex); 2509 2510 if (!err) { 2510 2511 shrink_dcache_sb(root->fs_info->sb);
+11 -4
fs/btrfs/volumes.c
··· 1058 1058 struct extent_map *em; 1059 1059 struct list_head *search_list = &trans->transaction->pending_chunks; 1060 1060 int ret = 0; 1061 + u64 physical_start = *start; 1061 1062 1062 1063 again: 1063 1064 list_for_each_entry(em, search_list, list) { ··· 1069 1068 for (i = 0; i < map->num_stripes; i++) { 1070 1069 if (map->stripes[i].dev != device) 1071 1070 continue; 1072 - if (map->stripes[i].physical >= *start + len || 1071 + if (map->stripes[i].physical >= physical_start + len || 1073 1072 map->stripes[i].physical + em->orig_block_len <= 1074 - *start) 1073 + physical_start) 1075 1074 continue; 1076 1075 *start = map->stripes[i].physical + 1077 1076 em->orig_block_len; ··· 1194 1193 */ 1195 1194 if (contains_pending_extent(trans, device, 1196 1195 &search_start, 1197 - hole_size)) 1198 - hole_size = 0; 1196 + hole_size)) { 1197 + if (key.offset >= search_start) { 1198 + hole_size = key.offset - search_start; 1199 + } else { 1200 + WARN_ON_ONCE(1); 1201 + hole_size = 0; 1202 + } 1203 + } 1199 1204 1200 1205 if (hole_size > max_hole_size) { 1201 1206 max_hole_start = search_start;