Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

- regression fix: transaction commit can run away due to delayed ref
waiting heuristic, this is not necessary now because of the proper
reservation mechanism introduced in 5.0

- regression fix: potential crash due to use-before-check of an ERR_PTR
return value

- fix for transaction abort during transaction commit that needs to
properly clean up pending block groups

- fix deadlock during b-tree node/leaf splitting, when this happens on
some of the fundamental trees, we must prevent new tree block
allocation to re-enter indirectly via the block group flushing path

- potential memory leak after errors during mount

* tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: On error always free subvol_name in btrfs_mount
btrfs: clean up pending block groups when transaction commit aborts
btrfs: fix potential oops in device_list_add
btrfs: don't end the transaction for delayed refs in throttle
Btrfs: fix deadlock when allocating tree block during leaf/node split

+71 -38
+50 -28
fs/btrfs/ctree.c
··· 968 968 return 0; 969 969 } 970 970 971 + static struct extent_buffer *alloc_tree_block_no_bg_flush( 972 + struct btrfs_trans_handle *trans, 973 + struct btrfs_root *root, 974 + u64 parent_start, 975 + const struct btrfs_disk_key *disk_key, 976 + int level, 977 + u64 hint, 978 + u64 empty_size) 979 + { 980 + struct btrfs_fs_info *fs_info = root->fs_info; 981 + struct extent_buffer *ret; 982 + 983 + /* 984 + * If we are COWing a node/leaf from the extent, chunk, device or free 985 + * space trees, make sure that we do not finish block group creation of 986 + * pending block groups. We do this to avoid a deadlock. 987 + * COWing can result in allocation of a new chunk, and flushing pending 988 + * block groups (btrfs_create_pending_block_groups()) can be triggered 989 + * when finishing allocation of a new chunk. Creation of a pending block 990 + * group modifies the extent, chunk, device and free space trees, 991 + * therefore we could deadlock with ourselves since we are holding a 992 + * lock on an extent buffer that btrfs_create_pending_block_groups() may 993 + * try to COW later. 994 + * For similar reasons, we also need to delay flushing pending block 995 + * groups when splitting a leaf or node, from one of those trees, since 996 + * we are holding a write lock on it and its parent or when inserting a 997 + * new root node for one of those trees. 998 + */ 999 + if (root == fs_info->extent_root || 1000 + root == fs_info->chunk_root || 1001 + root == fs_info->dev_root || 1002 + root == fs_info->free_space_root) 1003 + trans->can_flush_pending_bgs = false; 1004 + 1005 + ret = btrfs_alloc_tree_block(trans, root, parent_start, 1006 + root->root_key.objectid, disk_key, level, 1007 + hint, empty_size); 1008 + trans->can_flush_pending_bgs = true; 1009 + 1010 + return ret; 1011 + } 1012 + 971 1013 /* 972 1014 * does the dirty work in cow of a single block. The parent block (if 973 1015 * supplied) is updated to point to the new cow copy. The new buffer is marked ··· 1057 1015 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) 1058 1016 parent_start = parent->start; 1059 1017 1060 - /* 1061 - * If we are COWing a node/leaf from the extent, chunk, device or free 1062 - * space trees, make sure that we do not finish block group creation of 1063 - * pending block groups. We do this to avoid a deadlock. 1064 - * COWing can result in allocation of a new chunk, and flushing pending 1065 - * block groups (btrfs_create_pending_block_groups()) can be triggered 1066 - * when finishing allocation of a new chunk. Creation of a pending block 1067 - * group modifies the extent, chunk, device and free space trees, 1068 - * therefore we could deadlock with ourselves since we are holding a 1069 - * lock on an extent buffer that btrfs_create_pending_block_groups() may 1070 - * try to COW later. 1071 - */ 1072 - if (root == fs_info->extent_root || 1073 - root == fs_info->chunk_root || 1074 - root == fs_info->dev_root || 1075 - root == fs_info->free_space_root) 1076 - trans->can_flush_pending_bgs = false; 1077 - 1078 - cow = btrfs_alloc_tree_block(trans, root, parent_start, 1079 - root->root_key.objectid, &disk_key, level, 1080 - search_start, empty_size); 1081 - trans->can_flush_pending_bgs = true; 1018 + cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, 1019 + level, search_start, empty_size); 1082 1020 if (IS_ERR(cow)) 1083 1021 return PTR_ERR(cow); 1084 1022 ··· 3367 3345 else 3368 3346 btrfs_node_key(lower, &lower_key, 0); 3369 3347 3370 - c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 3371 - &lower_key, level, root->node->start, 0); 3348 + c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, 3349 + root->node->start, 0); 3372 3350 if (IS_ERR(c)) 3373 3351 return PTR_ERR(c); 3374 3352 ··· 3497 3475 mid = (c_nritems + 1) / 2; 3498 3476 btrfs_node_key(c, &disk_key, mid); 3499 3477 3500 - split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 3501 - &disk_key, level, c->start, 0); 3478 + split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, 3479 + c->start, 0); 3502 3480 if (IS_ERR(split)) 3503 3481 return PTR_ERR(split); 3504 3482 ··· 4282 4260 else 4283 4261 btrfs_item_key(l, &disk_key, mid); 4284 4262 4285 - right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 4286 - &disk_key, 0, l->start, 0); 4263 + right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, 4264 + l->start, 0); 4287 4265 if (IS_ERR(right)) 4288 4266 return PTR_ERR(right); 4289 4267
+3
fs/btrfs/super.c
··· 1621 1621 flags | SB_RDONLY, device_name, data); 1622 1622 if (IS_ERR(mnt_root)) { 1623 1623 root = ERR_CAST(mnt_root); 1624 + kfree(subvol_name); 1624 1625 goto out; 1625 1626 } 1626 1627 ··· 1631 1630 if (error < 0) { 1632 1631 root = ERR_PTR(error); 1633 1632 mntput(mnt_root); 1633 + kfree(subvol_name); 1634 1634 goto out; 1635 1635 } 1636 1636 } 1637 1637 } 1638 1638 if (IS_ERR(mnt_root)) { 1639 1639 root = ERR_CAST(mnt_root); 1640 + kfree(subvol_name); 1640 1641 goto out; 1641 1642 } 1642 1643
+16 -8
fs/btrfs/transaction.c
··· 850 850 851 851 btrfs_trans_release_chunk_metadata(trans); 852 852 853 - if (lock && should_end_transaction(trans) && 854 - READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { 855 - spin_lock(&info->trans_lock); 856 - if (cur_trans->state == TRANS_STATE_RUNNING) 857 - cur_trans->state = TRANS_STATE_BLOCKED; 858 - spin_unlock(&info->trans_lock); 859 - } 860 - 861 853 if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { 862 854 if (throttle) 863 855 return btrfs_commit_transaction(trans); ··· 1871 1879 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1872 1880 } 1873 1881 1882 + /* 1883 + * Release reserved delayed ref space of all pending block groups of the 1884 + * transaction and remove them from the list 1885 + */ 1886 + static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) 1887 + { 1888 + struct btrfs_fs_info *fs_info = trans->fs_info; 1889 + struct btrfs_block_group_cache *block_group, *tmp; 1890 + 1891 + list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { 1892 + btrfs_delayed_refs_rsv_release(fs_info, 1); 1893 + list_del_init(&block_group->bg_list); 1894 + } 1895 + } 1896 + 1874 1897 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1875 1898 { 1876 1899 /* ··· 2277 2270 btrfs_scrub_continue(fs_info); 2278 2271 cleanup_transaction: 2279 2272 btrfs_trans_release_metadata(trans); 2273 + btrfs_cleanup_pending_block_groups(trans); 2280 2274 btrfs_trans_release_chunk_metadata(trans); 2281 2275 trans->block_rsv = NULL; 2282 2276 btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
+2 -2
fs/btrfs/volumes.c
··· 957 957 else 958 958 fs_devices = alloc_fs_devices(disk_super->fsid, NULL); 959 959 960 - fs_devices->fsid_change = fsid_change_in_progress; 961 - 962 960 if (IS_ERR(fs_devices)) 963 961 return ERR_CAST(fs_devices); 962 + 963 + fs_devices->fsid_change = fsid_change_in_progress; 964 964 965 965 mutex_lock(&fs_devices->device_list_mutex); 966 966 list_add(&fs_devices->fs_list, &fs_uuids);