Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-6.5-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
"More fixes, some of them going back to older releases and there are
fixes for hangs in stress tests regarding space caching:

- fixes and progress tracking for hangs in free space caching, found
by test generic/475

- writeback fixes, write pages in integrity mode and skip writing
pages that have been written meanwhile

- properly clear end of extent range after an error

- relocation fixes:
- fix race betwen qgroup tree creation and relocation
- detect and report invalid reloc roots"

* tag 'for-6.5-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: set cache_block_group_error if we find an error
btrfs: reject invalid reloc tree root keys with stack dump
btrfs: exit gracefully if reloc roots don't match
btrfs: avoid race between qgroup tree creation and relocation
btrfs: properly clear end of the unreserved range in cow_file_range
btrfs: don't wait for writeback on clean pages in extent_write_cache_pages
btrfs: don't stop integrity writeback too early
btrfs: wait for actual caching progress during allocation

+99 -20
+15 -2
fs/btrfs/block-group.c
··· 441 441 u64 num_bytes) 442 442 { 443 443 struct btrfs_caching_control *caching_ctl; 444 + int progress; 444 445 445 446 caching_ctl = btrfs_get_caching_control(cache); 446 447 if (!caching_ctl) 447 448 return; 448 449 450 + /* 451 + * We've already failed to allocate from this block group, so even if 452 + * there's enough space in the block group it isn't contiguous enough to 453 + * allow for an allocation, so wait for at least the next wakeup tick, 454 + * or for the thing to be done. 455 + */ 456 + progress = atomic_read(&caching_ctl->progress); 457 + 449 458 wait_event(caching_ctl->wait, btrfs_block_group_done(cache) || 450 - (cache->free_space_ctl->free_space >= num_bytes)); 459 + (progress != atomic_read(&caching_ctl->progress) && 460 + (cache->free_space_ctl->free_space >= num_bytes))); 451 461 452 462 btrfs_put_caching_control(caching_ctl); 453 463 } ··· 812 802 813 803 if (total_found > CACHING_CTL_WAKE_UP) { 814 804 total_found = 0; 815 - if (wakeup) 805 + if (wakeup) { 806 + atomic_inc(&caching_ctl->progress); 816 807 wake_up(&caching_ctl->wait); 808 + } 817 809 } 818 810 } 819 811 path->slots[0]++; ··· 922 910 init_waitqueue_head(&caching_ctl->wait); 923 911 caching_ctl->block_group = cache; 924 912 refcount_set(&caching_ctl->count, 2); 913 + atomic_set(&caching_ctl->progress, 0); 925 914 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); 926 915 927 916 spin_lock(&cache->lock);
+2
fs/btrfs/block-group.h
··· 90 90 wait_queue_head_t wait; 91 91 struct btrfs_work work; 92 92 struct btrfs_block_group *block_group; 93 + /* Track progress of caching during allocation. */ 94 + atomic_t progress; 93 95 refcount_t count; 94 96 }; 95 97
+12 -1
fs/btrfs/disk-io.c
··· 1103 1103 btrfs_drew_lock_init(&root->snapshot_lock); 1104 1104 1105 1105 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && 1106 - !btrfs_is_data_reloc_root(root)) { 1106 + !btrfs_is_data_reloc_root(root) && 1107 + is_fstree(root->root_key.objectid)) { 1107 1108 set_bit(BTRFS_ROOT_SHAREABLE, &root->state); 1108 1109 btrfs_check_and_init_root_item(&root->root_item); 1109 1110 } ··· 1301 1300 root = btrfs_get_global_root(fs_info, objectid); 1302 1301 if (root) 1303 1302 return root; 1303 + 1304 + /* 1305 + * If we're called for non-subvolume trees, and above function didn't 1306 + * find one, do not try to read it from disk. 1307 + * 1308 + * This is namely for free-space-tree and quota tree, which can change 1309 + * at runtime and should only be grabbed from fs_info. 1310 + */ 1311 + if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) 1312 + return ERR_PTR(-ENOENT); 1304 1313 again: 1305 1314 root = btrfs_lookup_fs_root(fs_info, objectid); 1306 1315 if (root) {
+4 -1
fs/btrfs/extent-tree.c
··· 4310 4310 ret = 0; 4311 4311 } 4312 4312 4313 - if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) 4313 + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) { 4314 + if (!cache_block_group_error) 4315 + cache_block_group_error = -EIO; 4314 4316 goto loop; 4317 + } 4315 4318 4316 4319 if (!find_free_extent_check_size_class(ffe_ctl, block_group)) 4317 4320 goto loop;
+10 -3
fs/btrfs/extent_io.c
··· 2145 2145 continue; 2146 2146 } 2147 2147 2148 + if (!folio_test_dirty(folio)) { 2149 + /* Someone wrote it for us. */ 2150 + folio_unlock(folio); 2151 + continue; 2152 + } 2153 + 2148 2154 if (wbc->sync_mode != WB_SYNC_NONE) { 2149 2155 if (folio_test_writeback(folio)) 2150 2156 submit_write_bio(bio_ctrl, 0); ··· 2170 2164 } 2171 2165 2172 2166 /* 2173 - * the filesystem may choose to bump up nr_to_write. 2167 + * The filesystem may choose to bump up nr_to_write. 2174 2168 * We have to make sure to honor the new nr_to_write 2175 - * at any time 2169 + * at any time. 2176 2170 */ 2177 - nr_to_write_done = wbc->nr_to_write <= 0; 2171 + nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE && 2172 + wbc->nr_to_write <= 0); 2178 2173 } 2179 2174 folio_batch_release(&fbatch); 2180 2175 cond_resched();
+5 -5
fs/btrfs/inode.c
··· 1654 1654 clear_bits, 1655 1655 page_ops); 1656 1656 start += cur_alloc_size; 1657 - if (start >= end) 1658 - return ret; 1659 1657 } 1660 1658 1661 1659 /* ··· 1662 1664 * space_info's bytes_may_use counter, reserved in 1663 1665 * btrfs_check_data_free_space(). 1664 1666 */ 1665 - extent_clear_unlock_delalloc(inode, start, end, locked_page, 1666 - clear_bits | EXTENT_CLEAR_DATA_RESV, 1667 - page_ops); 1667 + if (start < end) { 1668 + clear_bits |= EXTENT_CLEAR_DATA_RESV; 1669 + extent_clear_unlock_delalloc(inode, start, end, locked_page, 1670 + clear_bits, page_ops); 1671 + } 1668 1672 return ret; 1669 1673 } 1670 1674
+37 -8
fs/btrfs/relocation.c
··· 1916 1916 err = PTR_ERR(root); 1917 1917 break; 1918 1918 } 1919 - ASSERT(root->reloc_root == reloc_root); 1919 + 1920 + if (unlikely(root->reloc_root != reloc_root)) { 1921 + if (root->reloc_root) { 1922 + btrfs_err(fs_info, 1923 + "reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu", 1924 + root->root_key.objectid, 1925 + root->reloc_root->root_key.objectid, 1926 + root->reloc_root->root_key.type, 1927 + root->reloc_root->root_key.offset, 1928 + btrfs_root_generation( 1929 + &root->reloc_root->root_item), 1930 + reloc_root->root_key.objectid, 1931 + reloc_root->root_key.type, 1932 + reloc_root->root_key.offset, 1933 + btrfs_root_generation( 1934 + &reloc_root->root_item)); 1935 + } else { 1936 + btrfs_err(fs_info, 1937 + "reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu", 1938 + root->root_key.objectid, 1939 + reloc_root->root_key.objectid, 1940 + reloc_root->root_key.type, 1941 + reloc_root->root_key.offset, 1942 + btrfs_root_generation( 1943 + &reloc_root->root_item)); 1944 + } 1945 + list_add(&reloc_root->root_list, &reloc_roots); 1946 + btrfs_put_root(root); 1947 + btrfs_abort_transaction(trans, -EUCLEAN); 1948 + if (!err) 1949 + err = -EUCLEAN; 1950 + break; 1951 + } 1920 1952 1921 1953 /* 1922 1954 * set reference count to 1, so btrfs_recover_relocation ··· 2021 1989 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, 2022 1990 false); 2023 1991 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 2024 - if (IS_ERR(root)) { 1992 + if (WARN_ON(IS_ERR(root))) { 2025 1993 /* 2026 1994 * For recovery we read the fs roots on mount, 2027 1995 * and if we didn't find the root then we marked ··· 2030 1998 * memory. However there's no reason we can't 2031 1999 * handle the error properly here just in case. 2032 2000 */ 2033 - ASSERT(0); 2034 2001 ret = PTR_ERR(root); 2035 2002 goto out; 2036 2003 } 2037 - if (root->reloc_root != reloc_root) { 2004 + if (WARN_ON(root->reloc_root != reloc_root)) { 2038 2005 /* 2039 - * This is actually impossible without something 2040 - * going really wrong (like weird race condition 2041 - * or cosmic rays). 2006 + * This can happen if on-disk metadata has some 2007 + * corruption, e.g. bad reloc tree key offset. 2042 2008 */ 2043 - ASSERT(0); 2044 2009 ret = -EINVAL; 2045 2010 goto out; 2046 2011 }
+14
fs/btrfs/tree-checker.c
··· 446 446 btrfs_item_key_to_cpu(leaf, &item_key, slot); 447 447 is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY); 448 448 449 + /* 450 + * Bad rootid for reloc trees. 451 + * 452 + * Reloc trees are only for subvolume trees, other trees only need 453 + * to be COWed to be relocated. 454 + */ 455 + if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID && 456 + !is_fstree(key->offset))) { 457 + generic_err(leaf, slot, 458 + "invalid reloc tree for root %lld, root id is not a subvolume tree", 459 + key->offset); 460 + return -EUCLEAN; 461 + } 462 + 449 463 /* No such tree id */ 450 464 if (unlikely(key->objectid == 0)) { 451 465 if (is_root_item)