Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.19-rc7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs reverts from David Sterba:
"Due to a recent report [1] we need to revert the radix tree to xarray
conversion patches.

There's a problem with sleeping under spinlock, when xa_insert could
allocate memory under pressure. We use GFP_NOFS so this is a real
problem that we unfortunately did not discover during review.

I'm sorry to do such change at rc6 time but the revert is IMO the
safer option, there are patches to use mutex instead of the spin locks
but that would need more testing. The revert branch has been tested on
a few setups, all seem ok.

The conversion to xarray will be revisited in the future"

Link: https://lore.kernel.org/linux-btrfs/cover.1657097693.git.fdmanana@suse.com/ [1]

* tag 'for-5.19-rc7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
Revert "btrfs: turn delayed_nodes_tree into an XArray"
Revert "btrfs: turn name_cache radix tree into XArray in send_ctx"
Revert "btrfs: turn fs_info member buffer_radix into XArray"
Revert "btrfs: turn fs_roots_radix in btrfs_fs_info into an XArray"

+338 -254
+8 -10
fs/btrfs/ctree.h
··· 675 675 rwlock_t global_root_lock; 676 676 struct rb_root global_root_tree; 677 677 678 - /* The xarray that holds all the FS roots */ 679 - spinlock_t fs_roots_lock; 680 - struct xarray fs_roots; 678 + spinlock_t fs_roots_radix_lock; 679 + struct radix_tree_root fs_roots_radix; 681 680 682 681 /* block group cache stuff */ 683 682 rwlock_t block_group_cache_lock; ··· 994 995 995 996 struct btrfs_delayed_root *delayed_root; 996 997 997 - /* Extent buffer xarray */ 998 + /* Extent buffer radix tree */ 998 999 spinlock_t buffer_lock; 999 1000 /* Entries are eb->start / sectorsize */ 1000 - struct xarray extent_buffers; 1001 + struct radix_tree_root buffer_radix; 1001 1002 1002 1003 /* next backup root to be overwritten */ 1003 1004 int backup_root_index; ··· 1118 1119 */ 1119 1120 BTRFS_ROOT_SHAREABLE, 1120 1121 BTRFS_ROOT_TRACK_DIRTY, 1121 - /* The root is tracked in fs_info::fs_roots */ 1122 - BTRFS_ROOT_REGISTERED, 1122 + BTRFS_ROOT_IN_RADIX, 1123 1123 BTRFS_ROOT_ORPHAN_ITEM_INSERTED, 1124 1124 BTRFS_ROOT_DEFRAG_RUNNING, 1125 1125 BTRFS_ROOT_FORCE_COW, ··· 1222 1224 struct rb_root inode_tree; 1223 1225 1224 1226 /* 1225 - * Xarray that keeps track of delayed nodes of every inode, protected 1226 - * by inode_lock 1227 + * radix tree that keeps track of delayed nodes of every inode, 1228 + * protected by inode_lock 1227 1229 */ 1228 - struct xarray delayed_nodes; 1230 + struct radix_tree_root delayed_nodes_tree; 1229 1231 /* 1230 1232 * right now this just gets used so that a root has its own devid 1231 1233 * for stat. It may be used for more later
+45 -39
fs/btrfs/delayed-inode.c
··· 78 78 } 79 79 80 80 spin_lock(&root->inode_lock); 81 - node = xa_load(&root->delayed_nodes, ino); 81 + node = radix_tree_lookup(&root->delayed_nodes_tree, ino); 82 82 83 83 if (node) { 84 84 if (btrfs_inode->delayed_node) { ··· 90 90 91 91 /* 92 92 * It's possible that we're racing into the middle of removing 93 - * this node from the xarray. In this case, the refcount 93 + * this node from the radix tree. In this case, the refcount 94 94 * was zero and it should never go back to one. Just return 95 - * NULL like it was never in the xarray at all; our release 95 + * NULL like it was never in the radix at all; our release 96 96 * function is in the process of removing it. 97 97 * 98 98 * Some implementations of refcount_inc refuse to bump the ··· 100 100 * here, refcount_inc() may decide to just WARN_ONCE() instead 101 101 * of actually bumping the refcount. 102 102 * 103 - * If this node is properly in the xarray, we want to bump the 103 + * If this node is properly in the radix, we want to bump the 104 104 * refcount twice, once for the inode and once for this get 105 105 * operation. 106 106 */ ··· 128 128 u64 ino = btrfs_ino(btrfs_inode); 129 129 int ret; 130 130 131 - do { 132 - node = btrfs_get_delayed_node(btrfs_inode); 133 - if (node) 134 - return node; 131 + again: 132 + node = btrfs_get_delayed_node(btrfs_inode); 133 + if (node) 134 + return node; 135 135 136 - node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS); 137 - if (!node) 138 - return ERR_PTR(-ENOMEM); 139 - btrfs_init_delayed_node(node, root, ino); 136 + node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS); 137 + if (!node) 138 + return ERR_PTR(-ENOMEM); 139 + btrfs_init_delayed_node(node, root, ino); 140 140 141 - /* Cached in the inode and can be accessed */ 142 - refcount_set(&node->refs, 2); 141 + /* cached in the btrfs inode and can be accessed */ 142 + refcount_set(&node->refs, 2); 143 143 144 - spin_lock(&root->inode_lock); 145 - ret = xa_insert(&root->delayed_nodes, ino, node, GFP_NOFS); 146 - if (ret) { 147 - spin_unlock(&root->inode_lock); 148 - kmem_cache_free(delayed_node_cache, node); 149 - if (ret != -EBUSY) 150 - return ERR_PTR(ret); 151 - } 152 - } while (ret); 144 + ret = radix_tree_preload(GFP_NOFS); 145 + if (ret) { 146 + kmem_cache_free(delayed_node_cache, node); 147 + return ERR_PTR(ret); 148 + } 149 + 150 + spin_lock(&root->inode_lock); 151 + ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); 152 + if (ret == -EEXIST) { 153 + spin_unlock(&root->inode_lock); 154 + kmem_cache_free(delayed_node_cache, node); 155 + radix_tree_preload_end(); 156 + goto again; 157 + } 153 158 btrfs_inode->delayed_node = node; 154 159 spin_unlock(&root->inode_lock); 160 + radix_tree_preload_end(); 155 161 156 162 return node; 157 163 } ··· 276 270 * back up. We can delete it now. 277 271 */ 278 272 ASSERT(refcount_read(&delayed_node->refs) == 0); 279 - xa_erase(&root->delayed_nodes, delayed_node->inode_id); 273 + radix_tree_delete(&root->delayed_nodes_tree, 274 + delayed_node->inode_id); 280 275 spin_unlock(&root->inode_lock); 281 276 kmem_cache_free(delayed_node_cache, delayed_node); 282 277 } ··· 1870 1863 1871 1864 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) 1872 1865 { 1873 - unsigned long index = 0; 1874 - struct btrfs_delayed_node *delayed_node; 1866 + u64 inode_id = 0; 1875 1867 struct btrfs_delayed_node *delayed_nodes[8]; 1868 + int i, n; 1876 1869 1877 1870 while (1) { 1878 - int n = 0; 1879 - 1880 1871 spin_lock(&root->inode_lock); 1881 - if (xa_empty(&root->delayed_nodes)) { 1872 + n = radix_tree_gang_lookup(&root->delayed_nodes_tree, 1873 + (void **)delayed_nodes, inode_id, 1874 + ARRAY_SIZE(delayed_nodes)); 1875 + if (!n) { 1882 1876 spin_unlock(&root->inode_lock); 1883 - return; 1877 + break; 1884 1878 } 1885 1879 1886 - xa_for_each_start(&root->delayed_nodes, index, delayed_node, index) { 1880 + inode_id = delayed_nodes[n - 1]->inode_id + 1; 1881 + for (i = 0; i < n; i++) { 1887 1882 /* 1888 1883 * Don't increase refs in case the node is dead and 1889 1884 * about to be removed from the tree in the loop below 1890 1885 */ 1891 - if (refcount_inc_not_zero(&delayed_node->refs)) { 1892 - delayed_nodes[n] = delayed_node; 1893 - n++; 1894 - } 1895 - if (n >= ARRAY_SIZE(delayed_nodes)) 1896 - break; 1886 + if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) 1887 + delayed_nodes[i] = NULL; 1897 1888 } 1898 - index++; 1899 1889 spin_unlock(&root->inode_lock); 1900 1890 1901 - for (int i = 0; i < n; i++) { 1891 + for (i = 0; i < n; i++) { 1892 + if (!delayed_nodes[i]) 1893 + continue; 1902 1894 __btrfs_kill_delayed_node(delayed_nodes[i]); 1903 1895 btrfs_release_delayed_node(delayed_nodes[i]); 1904 1896 }
+103 -82
fs/btrfs/disk-io.c
··· 5 5 6 6 #include <linux/fs.h> 7 7 #include <linux/blkdev.h> 8 + #include <linux/radix-tree.h> 8 9 #include <linux/writeback.h> 9 10 #include <linux/workqueue.h> 10 11 #include <linux/kthread.h> ··· 486 485 uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur, 487 486 fs_info->nodesize); 488 487 489 - /* A dirty eb shouldn't disappear from extent_buffers */ 488 + /* A dirty eb shouldn't disappear from buffer_radix */ 490 489 if (WARN_ON(!eb)) 491 490 return -EUCLEAN; 492 491 ··· 1159 1158 root->nr_delalloc_inodes = 0; 1160 1159 root->nr_ordered_extents = 0; 1161 1160 root->inode_tree = RB_ROOT; 1162 - xa_init_flags(&root->delayed_nodes, GFP_ATOMIC); 1161 + INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1163 1162 1164 1163 btrfs_init_root_block_rsv(root); 1165 1164 ··· 1211 1210 btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks); 1212 1211 #ifdef CONFIG_BTRFS_DEBUG 1213 1212 INIT_LIST_HEAD(&root->leak_list); 1214 - spin_lock(&fs_info->fs_roots_lock); 1213 + spin_lock(&fs_info->fs_roots_radix_lock); 1215 1214 list_add_tail(&root->leak_list, &fs_info->allocated_roots); 1216 - spin_unlock(&fs_info->fs_roots_lock); 1215 + spin_unlock(&fs_info->fs_roots_radix_lock); 1217 1216 #endif 1218 1217 } 1219 1218 ··· 1660 1659 { 1661 1660 struct btrfs_root *root; 1662 1661 1663 - spin_lock(&fs_info->fs_roots_lock); 1664 - root = xa_load(&fs_info->fs_roots, (unsigned long)root_id); 1662 + spin_lock(&fs_info->fs_roots_radix_lock); 1663 + root = radix_tree_lookup(&fs_info->fs_roots_radix, 1664 + (unsigned long)root_id); 1665 1665 if (root) 1666 1666 root = btrfs_grab_root(root); 1667 - spin_unlock(&fs_info->fs_roots_lock); 1667 + spin_unlock(&fs_info->fs_roots_radix_lock); 1668 1668 return root; 1669 1669 } 1670 1670 ··· 1707 1705 { 1708 1706 int ret; 1709 1707 1710 - spin_lock(&fs_info->fs_roots_lock); 1711 - ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid, 1712 - root, GFP_NOFS); 1708 + ret = radix_tree_preload(GFP_NOFS); 1709 + if (ret) 1710 + return ret; 1711 + 1712 + spin_lock(&fs_info->fs_roots_radix_lock); 1713 + ret = radix_tree_insert(&fs_info->fs_roots_radix, 1714 + (unsigned long)root->root_key.objectid, 1715 + root); 1713 1716 if (ret == 0) { 1714 1717 btrfs_grab_root(root); 1715 - set_bit(BTRFS_ROOT_REGISTERED, &root->state); 1718 + set_bit(BTRFS_ROOT_IN_RADIX, &root->state); 1716 1719 } 1717 - spin_unlock(&fs_info->fs_roots_lock); 1720 + spin_unlock(&fs_info->fs_roots_radix_lock); 1721 + radix_tree_preload_end(); 1718 1722 1719 1723 return ret; 1720 1724 } ··· 2350 2342 btrfs_drew_lock_destroy(&root->snapshot_lock); 2351 2343 free_root_extent_buffers(root); 2352 2344 #ifdef CONFIG_BTRFS_DEBUG 2353 - spin_lock(&root->fs_info->fs_roots_lock); 2345 + spin_lock(&root->fs_info->fs_roots_radix_lock); 2354 2346 list_del_init(&root->leak_list); 2355 - spin_unlock(&root->fs_info->fs_roots_lock); 2347 + spin_unlock(&root->fs_info->fs_roots_radix_lock); 2356 2348 #endif 2357 2349 kfree(root); 2358 2350 } ··· 2360 2352 2361 2353 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) 2362 2354 { 2363 - struct btrfs_root *root; 2364 - unsigned long index = 0; 2355 + int ret; 2356 + struct btrfs_root *gang[8]; 2357 + int i; 2365 2358 2366 2359 while (!list_empty(&fs_info->dead_roots)) { 2367 - root = list_entry(fs_info->dead_roots.next, 2368 - struct btrfs_root, root_list); 2369 - list_del(&root->root_list); 2360 + gang[0] = list_entry(fs_info->dead_roots.next, 2361 + struct btrfs_root, root_list); 2362 + list_del(&gang[0]->root_list); 2370 2363 2371 - if (test_bit(BTRFS_ROOT_REGISTERED, &root->state)) 2372 - btrfs_drop_and_free_fs_root(fs_info, root); 2373 - btrfs_put_root(root); 2364 + if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) 2365 + btrfs_drop_and_free_fs_root(fs_info, gang[0]); 2366 + btrfs_put_root(gang[0]); 2374 2367 } 2375 2368 2376 - xa_for_each(&fs_info->fs_roots, index, root) { 2377 - btrfs_drop_and_free_fs_root(fs_info, root); 2369 + while (1) { 2370 + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, 2371 + (void **)gang, 0, 2372 + ARRAY_SIZE(gang)); 2373 + if (!ret) 2374 + break; 2375 + for (i = 0; i < ret; i++) 2376 + btrfs_drop_and_free_fs_root(fs_info, gang[i]); 2378 2377 } 2379 2378 } 2380 2379 ··· 3149 3134 3150 3135 void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) 3151 3136 { 3152 - xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC); 3153 - xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC); 3137 + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 3138 + INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); 3154 3139 INIT_LIST_HEAD(&fs_info->trans_list); 3155 3140 INIT_LIST_HEAD(&fs_info->dead_roots); 3156 3141 INIT_LIST_HEAD(&fs_info->delayed_iputs); ··· 3158 3143 INIT_LIST_HEAD(&fs_info->caching_block_groups); 3159 3144 spin_lock_init(&fs_info->delalloc_root_lock); 3160 3145 spin_lock_init(&fs_info->trans_lock); 3161 - spin_lock_init(&fs_info->fs_roots_lock); 3146 + spin_lock_init(&fs_info->fs_roots_radix_lock); 3162 3147 spin_lock_init(&fs_info->delayed_iput_lock); 3163 3148 spin_lock_init(&fs_info->defrag_inodes_lock); 3164 3149 spin_lock_init(&fs_info->super_lock); ··· 3389 3374 /* 3390 3375 * btrfs_find_orphan_roots() is responsible for finding all the dead 3391 3376 * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load 3392 - * them into the fs_info->fs_roots. This must be done before 3377 + * them into the fs_info->fs_roots_radix tree. This must be done before 3393 3378 * calling btrfs_orphan_cleanup() on the tree root. If we don't do it 3394 3379 * first, then btrfs_orphan_cleanup() will delete a dead root's orphan 3395 3380 * item before the root's tree is deleted - this means that if we unmount ··· 4514 4499 { 4515 4500 bool drop_ref = false; 4516 4501 4517 - spin_lock(&fs_info->fs_roots_lock); 4518 - xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid); 4519 - if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state)) 4502 + spin_lock(&fs_info->fs_roots_radix_lock); 4503 + radix_tree_delete(&fs_info->fs_roots_radix, 4504 + (unsigned long)root->root_key.objectid); 4505 + if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state)) 4520 4506 drop_ref = true; 4521 - spin_unlock(&fs_info->fs_roots_lock); 4507 + spin_unlock(&fs_info->fs_roots_radix_lock); 4522 4508 4523 4509 if (BTRFS_FS_ERROR(fs_info)) { 4524 4510 ASSERT(root->log_root == NULL); ··· 4535 4519 4536 4520 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 4537 4521 { 4538 - struct btrfs_root *roots[8]; 4539 - unsigned long index = 0; 4540 - int i; 4522 + u64 root_objectid = 0; 4523 + struct btrfs_root *gang[8]; 4524 + int i = 0; 4541 4525 int err = 0; 4542 - int grabbed; 4526 + unsigned int ret = 0; 4543 4527 4544 4528 while (1) { 4545 - struct btrfs_root *root; 4546 - 4547 - spin_lock(&fs_info->fs_roots_lock); 4548 - if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) { 4549 - spin_unlock(&fs_info->fs_roots_lock); 4550 - return err; 4529 + spin_lock(&fs_info->fs_roots_radix_lock); 4530 + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, 4531 + (void **)gang, root_objectid, 4532 + ARRAY_SIZE(gang)); 4533 + if (!ret) { 4534 + spin_unlock(&fs_info->fs_roots_radix_lock); 4535 + break; 4551 4536 } 4537 + root_objectid = gang[ret - 1]->root_key.objectid + 1; 4552 4538 4553 - grabbed = 0; 4554 - xa_for_each_start(&fs_info->fs_roots, index, root, index) { 4555 - /* Avoid grabbing roots in dead_roots */ 4556 - if (btrfs_root_refs(&root->root_item) > 0) 4557 - roots[grabbed++] = btrfs_grab_root(root); 4558 - if (grabbed >= ARRAY_SIZE(roots)) 4559 - break; 4560 - } 4561 - spin_unlock(&fs_info->fs_roots_lock); 4562 - 4563 - for (i = 0; i < grabbed; i++) { 4564 - if (!roots[i]) 4539 + for (i = 0; i < ret; i++) { 4540 + /* Avoid to grab roots in dead_roots */ 4541 + if (btrfs_root_refs(&gang[i]->root_item) == 0) { 4542 + gang[i] = NULL; 4565 4543 continue; 4566 - index = roots[i]->root_key.objectid; 4567 - err = btrfs_orphan_cleanup(roots[i]); 4568 - if (err) 4569 - goto out; 4570 - btrfs_put_root(roots[i]); 4544 + } 4545 + /* grab all the search result for later use */ 4546 + gang[i] = btrfs_grab_root(gang[i]); 4571 4547 } 4572 - index++; 4548 + spin_unlock(&fs_info->fs_roots_radix_lock); 4549 + 4550 + for (i = 0; i < ret; i++) { 4551 + if (!gang[i]) 4552 + continue; 4553 + root_objectid = gang[i]->root_key.objectid; 4554 + err = btrfs_orphan_cleanup(gang[i]); 4555 + if (err) 4556 + break; 4557 + btrfs_put_root(gang[i]); 4558 + } 4559 + root_objectid++; 4573 4560 } 4574 4561 4575 - out: 4576 - /* Release the roots that remain uncleaned due to error */ 4577 - for (; i < grabbed; i++) { 4578 - if (roots[i]) 4579 - btrfs_put_root(roots[i]); 4562 + /* release the uncleaned roots due to error */ 4563 + for (; i < ret; i++) { 4564 + if (gang[i]) 4565 + btrfs_put_root(gang[i]); 4580 4566 } 4581 4567 return err; 4582 4568 } ··· 4897 4879 4898 4880 static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info) 4899 4881 { 4900 - unsigned long index = 0; 4901 - int grabbed = 0; 4902 - struct btrfs_root *roots[8]; 4882 + struct btrfs_root *gang[8]; 4883 + u64 root_objectid = 0; 4884 + int ret; 4903 4885 4904 - spin_lock(&fs_info->fs_roots_lock); 4905 - while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index, 4906 - ULONG_MAX, 8, XA_PRESENT))) { 4907 - for (int i = 0; i < grabbed; i++) 4908 - roots[i] = btrfs_grab_root(roots[i]); 4909 - spin_unlock(&fs_info->fs_roots_lock); 4886 + spin_lock(&fs_info->fs_roots_radix_lock); 4887 + while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, 4888 + (void **)gang, root_objectid, 4889 + ARRAY_SIZE(gang))) != 0) { 4890 + int i; 4910 4891 4911 - for (int i = 0; i < grabbed; i++) { 4912 - if (!roots[i]) 4892 + for (i = 0; i < ret; i++) 4893 + gang[i] = btrfs_grab_root(gang[i]); 4894 + spin_unlock(&fs_info->fs_roots_radix_lock); 4895 + 4896 + for (i = 0; i < ret; i++) { 4897 + if (!gang[i]) 4913 4898 continue; 4914 - index = roots[i]->root_key.objectid; 4915 - btrfs_free_log(NULL, roots[i]); 4916 - btrfs_put_root(roots[i]); 4899 + root_objectid = gang[i]->root_key.objectid; 4900 + btrfs_free_log(NULL, gang[i]); 4901 + btrfs_put_root(gang[i]); 4917 4902 } 4918 - index++; 4919 - spin_lock(&fs_info->fs_roots_lock); 4903 + root_objectid++; 4904 + spin_lock(&fs_info->fs_roots_radix_lock); 4920 4905 } 4921 - spin_unlock(&fs_info->fs_roots_lock); 4906 + spin_unlock(&fs_info->fs_roots_radix_lock); 4922 4907 btrfs_free_log_root_tree(NULL, fs_info); 4923 4908 } 4924 4909
+1 -1
fs/btrfs/extent-tree.c
··· 5829 5829 btrfs_qgroup_convert_reserved_meta(root, INT_MAX); 5830 5830 btrfs_qgroup_free_meta_all_pertrans(root); 5831 5831 5832 - if (test_bit(BTRFS_ROOT_REGISTERED, &root->state)) 5832 + if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) 5833 5833 btrfs_add_dropped_root(trans, root); 5834 5834 else 5835 5835 btrfs_put_root(root);
+73 -47
fs/btrfs/extent_io.c
··· 2966 2966 } 2967 2967 2968 2968 /* 2969 - * Find extent buffer for a given bytenr. 2969 + * Find extent buffer for a givne bytenr. 2970 2970 * 2971 2971 * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking 2972 2972 * in endio context. ··· 2985 2985 return (struct extent_buffer *)page->private; 2986 2986 } 2987 2987 2988 - /* For subpage case, we need to lookup extent buffer xarray */ 2989 - eb = xa_load(&fs_info->extent_buffers, 2990 - bytenr >> fs_info->sectorsize_bits); 2988 + /* For subpage case, we need to lookup buffer radix tree */ 2989 + rcu_read_lock(); 2990 + eb = radix_tree_lookup(&fs_info->buffer_radix, 2991 + bytenr >> fs_info->sectorsize_bits); 2992 + rcu_read_unlock(); 2991 2993 ASSERT(eb); 2992 2994 return eb; 2993 2995 } ··· 4437 4435 struct extent_buffer *eb; 4438 4436 4439 4437 rcu_read_lock(); 4440 - eb = xa_load(&fs_info->extent_buffers, 4441 - start >> fs_info->sectorsize_bits); 4438 + eb = radix_tree_lookup(&fs_info->buffer_radix, 4439 + start >> fs_info->sectorsize_bits); 4442 4440 if (eb && atomic_inc_not_zero(&eb->refs)) { 4443 4441 rcu_read_unlock(); 4444 4442 return eb; ··· 6131 6129 if (!eb) 6132 6130 return ERR_PTR(-ENOMEM); 6133 6131 eb->fs_info = fs_info; 6134 - 6135 - do { 6136 - ret = xa_insert(&fs_info->extent_buffers, 6137 - start >> fs_info->sectorsize_bits, 6138 - eb, GFP_NOFS); 6139 - if (ret == -ENOMEM) { 6140 - exists = ERR_PTR(ret); 6132 + again: 6133 + ret = radix_tree_preload(GFP_NOFS); 6134 + if (ret) { 6135 + exists = ERR_PTR(ret); 6136 + goto free_eb; 6137 + } 6138 + spin_lock(&fs_info->buffer_lock); 6139 + ret = radix_tree_insert(&fs_info->buffer_radix, 6140 + start >> fs_info->sectorsize_bits, eb); 6141 + spin_unlock(&fs_info->buffer_lock); 6142 + radix_tree_preload_end(); 6143 + if (ret == -EEXIST) { 6144 + exists = find_extent_buffer(fs_info, start); 6145 + if (exists) 6141 6146 goto free_eb; 6142 - } 6143 - if (ret == -EBUSY) { 6144 - exists = find_extent_buffer(fs_info, start); 6145 - if (exists) 6146 - goto free_eb; 6147 - } 6148 - } while (ret); 6149 - 6147 + else 6148 + goto again; 6149 + } 6150 6150 check_buffer_tree_ref(eb); 6151 6151 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); 6152 6152 ··· 6323 6319 } 6324 6320 if (uptodate) 6325 6321 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 6322 + again: 6323 + ret = radix_tree_preload(GFP_NOFS); 6324 + if (ret) { 6325 + exists = ERR_PTR(ret); 6326 + goto free_eb; 6327 + } 6326 6328 6327 - do { 6328 - ret = xa_insert(&fs_info->extent_buffers, 6329 - start >> fs_info->sectorsize_bits, 6330 - eb, GFP_NOFS); 6331 - if (ret == -ENOMEM) { 6332 - exists = ERR_PTR(ret); 6329 + spin_lock(&fs_info->buffer_lock); 6330 + ret = radix_tree_insert(&fs_info->buffer_radix, 6331 + start >> fs_info->sectorsize_bits, eb); 6332 + spin_unlock(&fs_info->buffer_lock); 6333 + radix_tree_preload_end(); 6334 + if (ret == -EEXIST) { 6335 + exists = find_extent_buffer(fs_info, start); 6336 + if (exists) 6333 6337 goto free_eb; 6334 - } 6335 - if (ret == -EBUSY) { 6336 - exists = find_extent_buffer(fs_info, start); 6337 - if (exists) 6338 - goto free_eb; 6339 - } 6340 - } while (ret); 6341 - 6338 + else 6339 + goto again; 6340 + } 6342 6341 /* add one reference for the tree */ 6343 6342 check_buffer_tree_ref(eb); 6344 6343 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); ··· 6386 6379 6387 6380 spin_unlock(&eb->refs_lock); 6388 6381 6389 - xa_erase(&fs_info->extent_buffers, 6390 - eb->start >> fs_info->sectorsize_bits); 6382 + spin_lock(&fs_info->buffer_lock); 6383 + radix_tree_delete(&fs_info->buffer_radix, 6384 + eb->start >> fs_info->sectorsize_bits); 6385 + spin_unlock(&fs_info->buffer_lock); 6391 6386 } else { 6392 6387 spin_unlock(&eb->refs_lock); 6393 6388 } ··· 7334 7325 } 7335 7326 } 7336 7327 7328 + #define GANG_LOOKUP_SIZE 16 7337 7329 static struct extent_buffer *get_next_extent_buffer( 7338 7330 struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr) 7339 7331 { 7340 - struct extent_buffer *eb; 7341 - unsigned long index; 7332 + struct extent_buffer *gang[GANG_LOOKUP_SIZE]; 7333 + struct extent_buffer *found = NULL; 7342 7334 u64 page_start = page_offset(page); 7335 + u64 cur = page_start; 7343 7336 7344 7337 ASSERT(in_range(bytenr, page_start, PAGE_SIZE)); 7345 7338 lockdep_assert_held(&fs_info->buffer_lock); 7346 7339 7347 - xa_for_each_start(&fs_info->extent_buffers, index, eb, 7348 - page_start >> fs_info->sectorsize_bits) { 7349 - if (in_range(eb->start, page_start, PAGE_SIZE)) 7350 - return eb; 7351 - else if (eb->start >= page_start + PAGE_SIZE) 7352 - /* Already beyond page end */ 7353 - return NULL; 7340 + while (cur < page_start + PAGE_SIZE) { 7341 + int ret; 7342 + int i; 7343 + 7344 + ret = radix_tree_gang_lookup(&fs_info->buffer_radix, 7345 + (void **)gang, cur >> fs_info->sectorsize_bits, 7346 + min_t(unsigned int, GANG_LOOKUP_SIZE, 7347 + PAGE_SIZE / fs_info->nodesize)); 7348 + if (ret == 0) 7349 + goto out; 7350 + for (i = 0; i < ret; i++) { 7351 + /* Already beyond page end */ 7352 + if (gang[i]->start >= page_start + PAGE_SIZE) 7353 + goto out; 7354 + /* Found one */ 7355 + if (gang[i]->start >= bytenr) { 7356 + found = gang[i]; 7357 + goto out; 7358 + } 7359 + } 7360 + cur = gang[ret - 1]->start + gang[ret - 1]->len; 7354 7361 } 7355 - return NULL; 7362 + out: 7363 + return found; 7356 7364 } 7357 7365 7358 7366 static int try_release_subpage_extent_buffer(struct page *page)
+7 -8
fs/btrfs/inode.c
··· 3578 3578 u64 last_objectid = 0; 3579 3579 int ret = 0, nr_unlink = 0; 3580 3580 3581 - /* Bail out if the cleanup is already running. */ 3582 3581 if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state)) 3583 3582 return 0; 3584 3583 ··· 3660 3661 * 3661 3662 * btrfs_find_orphan_roots() ran before us, which has 3662 3663 * found all deleted roots and loaded them into 3663 - * fs_info->fs_roots. So here we can find if an 3664 + * fs_info->fs_roots_radix. So here we can find if an 3664 3665 * orphan item corresponds to a deleted root by looking 3665 - * up the root from that xarray. 3666 + * up the root from that radix tree. 3666 3667 */ 3667 3668 3668 - spin_lock(&fs_info->fs_roots_lock); 3669 - dead_root = xa_load(&fs_info->fs_roots, 3670 - (unsigned long)found_key.objectid); 3669 + spin_lock(&fs_info->fs_roots_radix_lock); 3670 + dead_root = radix_tree_lookup(&fs_info->fs_roots_radix, 3671 + (unsigned long)found_key.objectid); 3671 3672 if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0) 3672 3673 is_dead_root = 1; 3673 - spin_unlock(&fs_info->fs_roots_lock); 3674 + spin_unlock(&fs_info->fs_roots_radix_lock); 3674 3675 3675 3676 if (is_dead_root) { 3676 3677 /* prevent this orphan from being found again */ ··· 3910 3911 * cache. 3911 3912 * 3912 3913 * This is required for both inode re-read from disk and delayed inode 3913 - * in the delayed_nodes xarray. 3914 + * in delayed_nodes_tree. 3914 3915 */ 3915 3916 if (BTRFS_I(inode)->last_trans == fs_info->generation) 3916 3917 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+22 -18
fs/btrfs/send.c
··· 10 10 #include <linux/mount.h> 11 11 #include <linux/xattr.h> 12 12 #include <linux/posix_acl_xattr.h> 13 + #include <linux/radix-tree.h> 13 14 #include <linux/vmalloc.h> 14 15 #include <linux/string.h> 15 16 #include <linux/compat.h> ··· 128 127 struct list_head new_refs; 129 128 struct list_head deleted_refs; 130 129 131 - struct xarray name_cache; 130 + struct radix_tree_root name_cache; 132 131 struct list_head name_cache_list; 133 132 int name_cache_size; 134 133 ··· 269 268 struct name_cache_entry { 270 269 struct list_head list; 271 270 /* 272 - * On 32bit kernels, xarray has only 32bit indices, but we need to 273 - * handle 64bit inums. We use the lower 32bit of the 64bit inum to store 274 - * it in the tree. If more than one inum would fall into the same entry, 275 - * we use inum_aliases to store the additional entries. inum_aliases is 276 - * also used to store entries with the same inum but different generations. 271 + * radix_tree has only 32bit entries but we need to handle 64bit inums. 272 + * We use the lower 32bit of the 64bit inum to store it in the tree. If 273 + * more then one inum would fall into the same entry, we use radix_list 274 + * to store the additional entries. radix_list is also used to store 275 + * entries where two entries have the same inum but different 276 + * generations. 277 277 */ 278 - struct list_head inum_aliases; 278 + struct list_head radix_list; 279 279 u64 ino; 280 280 u64 gen; 281 281 u64 parent_ino; ··· 2026 2024 } 2027 2025 2028 2026 /* 2029 - * Insert a name cache entry. On 32bit kernels the xarray index is 32bit, 2027 + * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, 2030 2028 * so we need to do some special handling in case we have clashes. This function 2031 - * takes care of this with the help of name_cache_entry::inum_aliases. 2029 + * takes care of this with the help of name_cache_entry::radix_list. 2032 2030 * In case of error, nce is kfreed. 2033 2031 */ 2034 2032 static int name_cache_insert(struct send_ctx *sctx, ··· 2037 2035 int ret = 0; 2038 2036 struct list_head *nce_head; 2039 2037 2040 - nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino); 2038 + nce_head = radix_tree_lookup(&sctx->name_cache, 2039 + (unsigned long)nce->ino); 2041 2040 if (!nce_head) { 2042 2041 nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL); 2043 2042 if (!nce_head) { ··· 2047 2044 } 2048 2045 INIT_LIST_HEAD(nce_head); 2049 2046 2050 - ret = xa_insert(&sctx->name_cache, nce->ino, nce_head, GFP_KERNEL); 2047 + ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 2051 2048 if (ret < 0) { 2052 2049 kfree(nce_head); 2053 2050 kfree(nce); 2054 2051 return ret; 2055 2052 } 2056 2053 } 2057 - list_add_tail(&nce->inum_aliases, nce_head); 2054 + list_add_tail(&nce->radix_list, nce_head); 2058 2055 list_add_tail(&nce->list, &sctx->name_cache_list); 2059 2056 sctx->name_cache_size++; 2060 2057 ··· 2066 2063 { 2067 2064 struct list_head *nce_head; 2068 2065 2069 - nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino); 2066 + nce_head = radix_tree_lookup(&sctx->name_cache, 2067 + (unsigned long)nce->ino); 2070 2068 if (!nce_head) { 2071 2069 btrfs_err(sctx->send_root->fs_info, 2072 2070 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", 2073 2071 nce->ino, sctx->name_cache_size); 2074 2072 } 2075 2073 2076 - list_del(&nce->inum_aliases); 2074 + list_del(&nce->radix_list); 2077 2075 list_del(&nce->list); 2078 2076 sctx->name_cache_size--; 2079 2077 ··· 2082 2078 * We may not get to the final release of nce_head if the lookup fails 2083 2079 */ 2084 2080 if (nce_head && list_empty(nce_head)) { 2085 - xa_erase(&sctx->name_cache, (unsigned long)nce->ino); 2081 + radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); 2086 2082 kfree(nce_head); 2087 2083 } 2088 2084 } ··· 2093 2089 struct list_head *nce_head; 2094 2090 struct name_cache_entry *cur; 2095 2091 2096 - nce_head = xa_load(&sctx->name_cache, (unsigned long)ino); 2092 + nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); 2097 2093 if (!nce_head) 2098 2094 return NULL; 2099 2095 2100 - list_for_each_entry(cur, nce_head, inum_aliases) { 2096 + list_for_each_entry(cur, nce_head, radix_list) { 2101 2097 if (cur->ino == ino && cur->gen == gen) 2102 2098 return cur; 2103 2099 } ··· 7522 7518 7523 7519 INIT_LIST_HEAD(&sctx->new_refs); 7524 7520 INIT_LIST_HEAD(&sctx->deleted_refs); 7525 - xa_init_flags(&sctx->name_cache, GFP_KERNEL); 7521 + INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL); 7526 7522 INIT_LIST_HEAD(&sctx->name_cache_list); 7527 7523 7528 7524 sctx->flags = arg->flags;
+20 -4
fs/btrfs/tests/btrfs-tests.c
··· 150 150 151 151 void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) 152 152 { 153 - unsigned long index; 154 - struct extent_buffer *eb; 153 + struct radix_tree_iter iter; 154 + void **slot; 155 155 struct btrfs_device *dev, *tmp; 156 156 157 157 if (!fs_info) ··· 163 163 164 164 test_mnt->mnt_sb->s_fs_info = NULL; 165 165 166 - xa_for_each(&fs_info->extent_buffers, index, eb) { 166 + spin_lock(&fs_info->buffer_lock); 167 + radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { 168 + struct extent_buffer *eb; 169 + 170 + eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); 171 + if (!eb) 172 + continue; 173 + /* Shouldn't happen but that kind of thinking creates CVE's */ 174 + if (radix_tree_exception(eb)) { 175 + if (radix_tree_deref_retry(eb)) 176 + slot = radix_tree_iter_retry(&iter); 177 + continue; 178 + } 179 + slot = radix_tree_iter_resume(slot, &iter); 180 + spin_unlock(&fs_info->buffer_lock); 167 181 free_extent_buffer_stale(eb); 182 + spin_lock(&fs_info->buffer_lock); 168 183 } 184 + spin_unlock(&fs_info->buffer_lock); 169 185 170 186 btrfs_mapping_tree_free(&fs_info->mapping_tree); 171 187 list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices, ··· 202 186 if (!root) 203 187 return; 204 188 /* Will be freed by btrfs_free_fs_roots */ 205 - if (WARN_ON(test_bit(BTRFS_ROOT_REGISTERED, &root->state))) 189 + if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) 206 190 return; 207 191 btrfs_global_root_delete(root); 208 192 btrfs_put_root(root);
+59 -45
fs/btrfs/transaction.c
··· 23 23 #include "space-info.h" 24 24 #include "zoned.h" 25 25 26 - #define BTRFS_ROOT_TRANS_TAG XA_MARK_0 26 + #define BTRFS_ROOT_TRANS_TAG 0 27 27 28 28 /* 29 29 * Transaction states and transitions ··· 437 437 */ 438 438 smp_wmb(); 439 439 440 - spin_lock(&fs_info->fs_roots_lock); 440 + spin_lock(&fs_info->fs_roots_radix_lock); 441 441 if (root->last_trans == trans->transid && !force) { 442 - spin_unlock(&fs_info->fs_roots_lock); 442 + spin_unlock(&fs_info->fs_roots_radix_lock); 443 443 return 0; 444 444 } 445 - xa_set_mark(&fs_info->fs_roots, 446 - (unsigned long)root->root_key.objectid, 447 - BTRFS_ROOT_TRANS_TAG); 448 - spin_unlock(&fs_info->fs_roots_lock); 445 + radix_tree_tag_set(&fs_info->fs_roots_radix, 446 + (unsigned long)root->root_key.objectid, 447 + BTRFS_ROOT_TRANS_TAG); 448 + spin_unlock(&fs_info->fs_roots_radix_lock); 449 449 root->last_trans = trans->transid; 450 450 451 451 /* this is pretty tricky. We don't want to ··· 487 487 spin_unlock(&cur_trans->dropped_roots_lock); 488 488 489 489 /* Make sure we don't try to update the root at commit time */ 490 - xa_clear_mark(&fs_info->fs_roots, 491 - (unsigned long)root->root_key.objectid, 492 - BTRFS_ROOT_TRANS_TAG); 490 + spin_lock(&fs_info->fs_roots_radix_lock); 491 + radix_tree_tag_clear(&fs_info->fs_roots_radix, 492 + (unsigned long)root->root_key.objectid, 493 + BTRFS_ROOT_TRANS_TAG); 494 + spin_unlock(&fs_info->fs_roots_radix_lock); 493 495 } 494 496 495 497 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, ··· 1404 1402 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) 1405 1403 { 1406 1404 struct btrfs_fs_info *fs_info = trans->fs_info; 1407 - struct btrfs_root *root; 1408 - unsigned long index; 1405 + struct btrfs_root *gang[8]; 1406 + int i; 1407 + int ret; 1409 1408 1410 1409 /* 1411 1410 * At this point no one can be using this transaction to modify any tree ··· 1414 1411 */ 1415 1412 ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING); 1416 1413 1417 - spin_lock(&fs_info->fs_roots_lock); 1418 - xa_for_each_marked(&fs_info->fs_roots, index, root, BTRFS_ROOT_TRANS_TAG) { 1419 - int ret; 1414 + spin_lock(&fs_info->fs_roots_radix_lock); 1415 + while (1) { 1416 + ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 1417 + (void **)gang, 0, 1418 + ARRAY_SIZE(gang), 1419 + BTRFS_ROOT_TRANS_TAG); 1420 + if (ret == 0) 1421 + break; 1422 + for (i = 0; i < ret; i++) { 1423 + struct btrfs_root *root = gang[i]; 1424 + int ret2; 1420 1425 1421 - /* 1422 - * At this point we can neither have tasks logging inodes 1423 - * from a root nor trying to commit a log tree. 1424 - */ 1425 - ASSERT(atomic_read(&root->log_writers) == 0); 1426 - ASSERT(atomic_read(&root->log_commit[0]) == 0); 1427 - ASSERT(atomic_read(&root->log_commit[1]) == 0); 1426 + /* 1427 + * At this point we can neither have tasks logging inodes 1428 + * from a root nor trying to commit a log tree. 1429 + */ 1430 + ASSERT(atomic_read(&root->log_writers) == 0); 1431 + ASSERT(atomic_read(&root->log_commit[0]) == 0); 1432 + ASSERT(atomic_read(&root->log_commit[1]) == 0); 1428 1433 1429 - xa_clear_mark(&fs_info->fs_roots, 1430 - (unsigned long)root->root_key.objectid, 1431 - BTRFS_ROOT_TRANS_TAG); 1432 - spin_unlock(&fs_info->fs_roots_lock); 1434 + radix_tree_tag_clear(&fs_info->fs_roots_radix, 1435 + (unsigned long)root->root_key.objectid, 1436 + BTRFS_ROOT_TRANS_TAG); 1437 + spin_unlock(&fs_info->fs_roots_radix_lock); 1433 1438 1434 - btrfs_free_log(trans, root); 1435 - ret = btrfs_update_reloc_root(trans, root); 1436 - if (ret) 1437 - return ret; 1439 + btrfs_free_log(trans, root); 1440 + ret2 = btrfs_update_reloc_root(trans, root); 1441 + if (ret2) 1442 + return ret2; 1438 1443 1439 - /* See comments in should_cow_block() */ 1440 - clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); 1441 - smp_mb__after_atomic(); 1444 + /* see comments in should_cow_block() */ 1445 + clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); 1446 + smp_mb__after_atomic(); 1442 1447 1443 - if (root->commit_root != root->node) { 1444 - list_add_tail(&root->dirty_list, 1445 - &trans->transaction->switch_commits); 1446 - btrfs_set_root_node(&root->root_item, root->node); 1448 + if (root->commit_root != root->node) { 1449 + list_add_tail(&root->dirty_list, 1450 + &trans->transaction->switch_commits); 1451 + btrfs_set_root_node(&root->root_item, 1452 + root->node); 1453 + } 1454 + 1455 + ret2 = btrfs_update_root(trans, fs_info->tree_root, 1456 + &root->root_key, 1457 + &root->root_item); 1458 + if (ret2) 1459 + return ret2; 1460 + spin_lock(&fs_info->fs_roots_radix_lock); 1461 + btrfs_qgroup_free_meta_all_pertrans(root); 1447 1462 } 1448 - 1449 - ret = btrfs_update_root(trans, fs_info->tree_root, 1450 - &root->root_key, &root->root_item); 1451 - if (ret) 1452 - return ret; 1453 - spin_lock(&fs_info->fs_roots_lock); 1454 - btrfs_qgroup_free_meta_all_pertrans(root); 1455 1463 } 1456 - spin_unlock(&fs_info->fs_roots_lock); 1464 + spin_unlock(&fs_info->fs_roots_radix_lock); 1457 1465 return 0; 1458 1466 } 1459 1467