Merge tag 'for-6.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

+1 -11

fs/btrfs/Kconfig

··· 62 62 config BTRFS_DEBUG 63 63 bool "Btrfs debugging support" 64 64 depends on BTRFS_FS 65 + select REF_TRACKER if STACKTRACE_SUPPORT 65 66 help 66 67 Enable run-time debugging support for the btrfs filesystem. 67 68 ··· 116 115 - extent tree v2 - complex rework of extent tracking 117 116 118 117 - large folio support 119 - 120 - If unsure, say N. 121 - 122 - config BTRFS_FS_REF_VERIFY 123 - bool "Btrfs with the ref verify tool compiled in" 124 - depends on BTRFS_FS 125 - default n 126 - help 127 - Enable run-time extent reference verification instrumentation. This 128 - is meant to be used by btrfs developers for tracking down extent 129 - reference problems or verifying they didn't break something. 130 118 131 119 If unsure, say N.

+1 -1

fs/btrfs/Makefile

··· 36 36 lru_cache.o raid-stripe-tree.o fiemap.o direct-io.o 37 37 38 38 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o 39 - btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o 39 + btrfs-$(CONFIG_BTRFS_DEBUG) += ref-verify.o 40 40 btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o 41 41 btrfs-$(CONFIG_FS_VERITY) += verity.o 42 42

+1 -1

fs/btrfs/accessors.c

··· 44 44 * gives us all the type checking. 45 45 * 46 46 * The extent buffer pages stored in the array folios may not form a contiguous 47 - * phyusical range, but the API functions assume the linear offset to the range 47 + * physical range, but the API functions assume the linear offset to the range 48 48 * from 0 to metadata node size. 49 49 */ 50 50

+13 -13

fs/btrfs/backref.c

··· 859 859 free_pref(ref); 860 860 return PTR_ERR(eb); 861 861 } 862 - if (!extent_buffer_uptodate(eb)) { 862 + if (unlikely(!extent_buffer_uptodate(eb))) { 863 863 free_pref(ref); 864 864 free_extent_buffer(eb); 865 865 return -EIO; ··· 1062 1062 iref = (struct btrfs_extent_inline_ref *)ptr; 1063 1063 type = btrfs_get_extent_inline_ref_type(leaf, iref, 1064 1064 BTRFS_REF_TYPE_ANY); 1065 - if (type == BTRFS_REF_TYPE_INVALID) 1065 + if (unlikely(type == BTRFS_REF_TYPE_INVALID)) 1066 1066 return -EUCLEAN; 1067 1067 1068 1068 offset = btrfs_extent_inline_ref_offset(leaf, iref); ··· 1422 1422 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1423 1423 if (ret < 0) 1424 1424 goto out; 1425 - if (ret == 0) { 1425 + if (unlikely(ret == 0)) { 1426 1426 /* 1427 1427 * Key with offset -1 found, there would have to exist an extent 1428 1428 * item with such offset, but this is out of the valid range. ··· 1614 1614 ret = PTR_ERR(eb); 1615 1615 goto out; 1616 1616 } 1617 - if (!extent_buffer_uptodate(eb)) { 1617 + if (unlikely(!extent_buffer_uptodate(eb))) { 1618 1618 free_extent_buffer(eb); 1619 1619 ret = -EIO; 1620 1620 goto out; ··· 1652 1652 * case. 1653 1653 */ 1654 1654 ASSERT(eie); 1655 - if (!eie) { 1655 + if (unlikely(!eie)) { 1656 1656 ret = -EUCLEAN; 1657 1657 goto out; 1658 1658 } ··· 1690 1690 * @ctx->bytenr and @ctx->extent_item_pos. The bytenr of the found leaves are 1691 1691 * added to the ulist at @ctx->refs, and that ulist is allocated by this 1692 1692 * function. The caller should free the ulist with free_leaf_list() if 1693 - * @ctx->ignore_extent_item_pos is false, otherwise a fimple ulist_free() is 1693 + * @ctx->ignore_extent_item_pos is false, otherwise a simple ulist_free() is 1694 1694 * enough. 1695 1695 * 1696 1696 * Returns 0 on success and < 0 on error. On error @ctx->refs is not allocated. ··· 2215 2215 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2216 2216 if (ret < 0) 2217 2217 return ret; 2218 - if (ret == 0) { 2218 + if (unlikely(ret == 0)) { 2219 2219 /* 2220 2220 * Key with offset -1 found, there would have to exist an extent 2221 2221 * item with such offset, but this is out of the valid range. ··· 2312 2312 *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr); 2313 2313 *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref, 2314 2314 BTRFS_REF_TYPE_ANY); 2315 - if (*out_type == BTRFS_REF_TYPE_INVALID) 2315 + if (unlikely(*out_type == BTRFS_REF_TYPE_INVALID)) 2316 2316 return -EUCLEAN; 2317 2317 2318 2318 *ptr += btrfs_extent_inline_ref_size(*out_type); ··· 2868 2868 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2869 2869 if (ret < 0) 2870 2870 return ret; 2871 - if (ret == 0) { 2871 + if (unlikely(ret == 0)) { 2872 2872 /* 2873 2873 * Key with offset -1 found, there would have to exist an extent 2874 2874 * item with such offset, but this is out of the valid range. ··· 2876 2876 ret = -EUCLEAN; 2877 2877 goto release; 2878 2878 } 2879 - if (path->slots[0] == 0) { 2879 + if (unlikely(path->slots[0] == 0)) { 2880 2880 DEBUG_WARN(); 2881 2881 ret = -EUCLEAN; 2882 2882 goto release; ··· 3457 3457 if (ret < 0) 3458 3458 goto out; 3459 3459 /* No extra backref? This means the tree block is corrupted */ 3460 - if (ret > 0) { 3460 + if (unlikely(ret > 0)) { 3461 3461 ret = -EUCLEAN; 3462 3462 goto out; 3463 3463 } ··· 3500 3500 ((unsigned long)iter->cur_ptr); 3501 3501 type = btrfs_get_extent_inline_ref_type(eb, iref, 3502 3502 BTRFS_REF_TYPE_BLOCK); 3503 - if (type == BTRFS_REF_TYPE_INVALID) { 3503 + if (unlikely(type == BTRFS_REF_TYPE_INVALID)) { 3504 3504 ret = -EUCLEAN; 3505 3505 goto out; 3506 3506 } ··· 3612 3612 } 3613 3613 3614 3614 /* Sanity check, we shouldn't have any unchecked nodes */ 3615 - if (!upper->checked) { 3615 + if (unlikely(!upper->checked)) { 3616 3616 DEBUG_WARN("we should not have any unchecked nodes"); 3617 3617 return -EUCLEAN; 3618 3618 }

+2 -2

fs/btrfs/backref.h

··· 190 190 * It's very common to have several file extent items that point to the 191 191 * same extent (bytenr) but with different offsets and lengths. This 192 192 * typically happens for COW writes, partial writes into prealloc 193 - * extents, NOCOW writes after snapshoting a root, hole punching or 193 + * extents, NOCOW writes after snapshotting a root, hole punching or 194 194 * reflinking within the same file (less common perhaps). 195 195 * So keep a small cache with the lookup results for the extent pointed 196 196 * by the last few file extent items. This cache is checked, with a ··· 414 414 /* 415 415 * Whether this cache is for relocation 416 416 * 417 - * Reloction backref cache require more info for reloc root compared 417 + * Relocation backref cache require more info for reloc root compared 418 418 * to generic backref cache. 419 419 */ 420 420 bool is_reloc;

+40 -14

fs/btrfs/bio.c

··· 93 93 refcount_inc(&orig_bbio->ordered->refs); 94 94 bbio->ordered = orig_bbio->ordered; 95 95 } 96 + bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root; 96 97 atomic_inc(&orig_bbio->pending_ios); 97 98 return bbio; 98 99 } ··· 167 166 int mirror = repair_bbio->mirror_num; 168 167 169 168 if (repair_bbio->bio.bi_status || 170 - !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) { 169 + !btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) { 171 170 bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ); 172 171 repair_bbio->bio.bi_iter = repair_bbio->saved_iter; 173 172 ··· 204 203 */ 205 204 static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio, 206 205 u32 bio_offset, 207 - struct bio_vec *bv, 206 + phys_addr_t paddr, 208 207 struct btrfs_failed_bio *fbio) 209 208 { 210 209 struct btrfs_inode *inode = failed_bbio->inode; 211 210 struct btrfs_fs_info *fs_info = inode->root->fs_info; 211 + struct folio *folio = page_folio(phys_to_page(paddr)); 212 212 const u32 sectorsize = fs_info->sectorsize; 213 + const u32 foff = offset_in_folio(folio, paddr); 213 214 const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT); 214 215 struct btrfs_bio *repair_bbio; 215 216 struct bio *repair_bio; 216 217 int num_copies; 217 218 int mirror; 218 219 220 + ASSERT(foff + sectorsize <= folio_size(folio)); 219 221 btrfs_debug(fs_info, "repair read error: read error at %llu", 220 222 failed_bbio->file_offset + bio_offset); 221 223 ··· 241 237 repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, 242 238 &btrfs_repair_bioset); 243 239 repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector; 244 - __bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset); 240 + bio_add_folio_nofail(repair_bio, folio, sectorsize, foff); 245 241 246 242 repair_bbio = btrfs_bio(repair_bio); 247 243 btrfs_bio_init(repair_bbio, fs_info, NULL, fbio); ··· 262 258 struct bvec_iter *iter = &bbio->saved_iter; 263 259 blk_status_t status = bbio->bio.bi_status; 264 260 struct btrfs_failed_bio *fbio = NULL; 261 + phys_addr_t paddr; 265 262 u32 offset = 0; 266 263 267 264 /* Read-repair requires the inode field to be set by the submitter. */ ··· 280 275 /* Clear the I/O error. A failed repair will reset it. */ 281 276 bbio->bio.bi_status = BLK_STS_OK; 282 277 283 - while (iter->bi_size) { 284 - struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter); 285 - 286 - bv.bv_len = min(bv.bv_len, sectorsize); 287 - if (status || !btrfs_data_csum_ok(bbio, dev, offset, &bv)) 288 - fbio = repair_one_sector(bbio, offset, &bv, fbio); 289 - 290 - bio_advance_iter_single(&bbio->bio, iter, sectorsize); 278 + btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) { 279 + if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr)) 280 + fbio = repair_one_sector(bbio, offset, paddr, fbio); 291 281 offset += sectorsize; 292 282 } 293 - 294 283 if (bbio->csum != bbio->csum_inline) 295 284 kfree(bbio->csum); 296 285 ··· 779 780 return true; 780 781 } 781 782 783 + static void assert_bbio_alignment(struct btrfs_bio *bbio) 784 + { 785 + #ifdef CONFIG_BTRFS_ASSERT 786 + struct btrfs_fs_info *fs_info = bbio->fs_info; 787 + struct bio_vec bvec; 788 + struct bvec_iter iter; 789 + const u32 blocksize = fs_info->sectorsize; 790 + 791 + /* Metadata has no extra bs > ps alignment requirement. */ 792 + if (!is_data_bbio(bbio)) 793 + return; 794 + 795 + bio_for_each_bvec(bvec, &bbio->bio, iter) 796 + ASSERT(IS_ALIGNED(bvec.bv_offset, blocksize) && 797 + IS_ALIGNED(bvec.bv_len, blocksize), 798 + "root=%llu inode=%llu logical=%llu length=%u index=%u bv_offset=%u bv_len=%u", 799 + btrfs_root_id(bbio->inode->root), 800 + btrfs_ino(bbio->inode), 801 + bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT, 802 + bbio->bio.bi_iter.bi_size, iter.bi_idx, 803 + bvec.bv_offset, 804 + bvec.bv_len); 805 + #endif 806 + } 807 + 782 808 void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num) 783 809 { 784 810 /* If bbio->inode is not populated, its file_offset must be 0. */ 785 811 ASSERT(bbio->inode || bbio->file_offset == 0); 812 + 813 + assert_bbio_alignment(bbio); 786 814 787 815 while (!btrfs_submit_chunk(bbio, mirror_num)) 788 816 ; ··· 849 823 if (ret < 0) 850 824 goto out_counter_dec; 851 825 852 - if (!smap.dev->bdev || 853 - !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state)) { 826 + if (unlikely(!smap.dev->bdev || 827 + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state))) { 854 828 ret = -EIO; 855 829 goto out_counter_dec; 856 830 }

+2

fs/btrfs/bio.h

··· 82 82 /* Save the first error status of split bio. */ 83 83 blk_status_t status; 84 84 85 + /* Use the commit root to look up csums (data read bio only). */ 86 + bool csum_search_commit_root; 85 87 /* 86 88 * This member must come last, bio_alloc_bioset will allocate enough 87 89 * bytes for entire btrfs_bio but relies on bio being last.

+15 -15

fs/btrfs/block-group.c

··· 1358 1358 * data in this block group. That check should be done by relocation routine, 1359 1359 * not this function. 1360 1360 */ 1361 - static int inc_block_group_ro(struct btrfs_block_group *cache, int force) 1361 + static int inc_block_group_ro(struct btrfs_block_group *cache, bool force) 1362 1362 { 1363 1363 struct btrfs_space_info *sinfo = cache->space_info; 1364 1364 u64 num_bytes; ··· 1971 1971 * called, which is where we will transfer a reserved extent's 1972 1972 * size from the "reserved" counter to the "used" counter - this 1973 1973 * happens when running delayed references. When we relocate the 1974 - * chunk below, relocation first flushes dellaloc, waits for 1974 + * chunk below, relocation first flushes delalloc, waits for 1975 1975 * ordered extent completion (which is where we create delayed 1976 1976 * references for data extents) and commits the current 1977 1977 * transaction (which runs delayed references), and only after ··· 2071 2071 return -ENOENT; 2072 2072 } 2073 2073 2074 - if (map->start != key->objectid || map->chunk_len != key->offset) { 2074 + if (unlikely(map->start != key->objectid || map->chunk_len != key->offset)) { 2075 2075 btrfs_err(fs_info, 2076 2076 "block group %llu len %llu mismatch with chunk %llu len %llu", 2077 2077 key->objectid, key->offset, map->start, map->chunk_len); ··· 2084 2084 flags = btrfs_stack_block_group_flags(&bg) & 2085 2085 BTRFS_BLOCK_GROUP_TYPE_MASK; 2086 2086 2087 - if (flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { 2087 + if (unlikely(flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK))) { 2088 2088 btrfs_err(fs_info, 2089 2089 "block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", 2090 2090 key->objectid, key->offset, flags, ··· 2245 2245 return ret; 2246 2246 2247 2247 /* Shouldn't have super stripes in sequential zones */ 2248 - if (zoned && nr) { 2248 + if (unlikely(zoned && nr)) { 2249 2249 kfree(logical); 2250 2250 btrfs_err(fs_info, 2251 2251 "zoned: block group %llu must not contain super block", ··· 2336 2336 break; 2337 2337 2338 2338 bg = btrfs_lookup_block_group(fs_info, map->start); 2339 - if (!bg) { 2339 + if (unlikely(!bg)) { 2340 2340 btrfs_err(fs_info, 2341 2341 "chunk start=%llu len=%llu doesn't have corresponding block group", 2342 2342 map->start, map->chunk_len); ··· 2344 2344 btrfs_free_chunk_map(map); 2345 2345 break; 2346 2346 } 2347 - if (bg->start != map->start || bg->length != map->chunk_len || 2348 - (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != 2349 - (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { 2347 + if (unlikely(bg->start != map->start || bg->length != map->chunk_len || 2348 + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != 2349 + (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK))) { 2350 2350 btrfs_err(fs_info, 2351 2351 "chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", 2352 2352 map->start, map->chunk_len, ··· 2839 2839 * space or none at all (due to no need to COW, extent buffers 2840 2840 * were already COWed in the current transaction and still 2841 2841 * unwritten, tree heights lower than the maximum possible 2842 - * height, etc). For data we generally reserve the axact amount 2842 + * height, etc). For data we generally reserve the exact amount 2843 2843 * of space we are going to allocate later, the exception is 2844 2844 * when using compression, as we must reserve space based on the 2845 2845 * uncompressed data size, because the compression is only done ··· 3248 3248 */ 3249 3249 BTRFS_I(inode)->generation = 0; 3250 3250 ret = btrfs_update_inode(trans, BTRFS_I(inode)); 3251 - if (ret) { 3251 + if (unlikely(ret)) { 3252 3252 /* 3253 3253 * So theoretically we could recover from this, simply set the 3254 3254 * super cache generation to 0 so we know to invalidate the ··· 3995 3995 struct btrfs_space_info *sys_space_info; 3996 3996 3997 3997 sys_space_info = btrfs_find_space_info(trans->fs_info, sys_flags); 3998 - if (!sys_space_info) { 3998 + if (unlikely(!sys_space_info)) { 3999 3999 ret = -EINVAL; 4000 4000 btrfs_abort_transaction(trans, ret); 4001 4001 goto out; ··· 4009 4009 } 4010 4010 4011 4011 ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg); 4012 - if (ret) { 4012 + if (unlikely(ret)) { 4013 4013 btrfs_abort_transaction(trans, ret); 4014 4014 goto out; 4015 4015 } 4016 4016 4017 4017 ret = btrfs_chunk_alloc_add_chunk_item(trans, bg); 4018 - if (ret) { 4018 + if (unlikely(ret)) { 4019 4019 btrfs_abort_transaction(trans, ret); 4020 4020 goto out; 4021 4021 } 4022 - } else if (ret) { 4022 + } else if (unlikely(ret)) { 4023 4023 btrfs_abort_transaction(trans, ret); 4024 4024 goto out; 4025 4025 }

+1 -1

fs/btrfs/block-group.h

··· 63 63 * CHUNK_ALLOC_FORCE means it must try to allocate one 64 64 * 65 65 * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from 66 - * find_free_extent() that also activaes the zone 66 + * find_free_extent() that also activates the zone 67 67 */ 68 68 enum btrfs_chunk_alloc_enum { 69 69 CHUNK_ALLOC_NO_FORCE,

+9 -7

fs/btrfs/btrfs_inode.h

··· 537 537 538 538 /* We only allow BITS_PER_LONGS blocks for each bitmap. */ 539 539 #ifdef CONFIG_BTRFS_EXPERIMENTAL 540 - mapping_set_folio_order_range(inode->vfs_inode.i_mapping, 0, 541 - ilog2(((BITS_PER_LONG << inode->root->fs_info->sectorsize_bits) 542 - >> PAGE_SHIFT))); 540 + mapping_set_folio_order_range(inode->vfs_inode.i_mapping, 541 + inode->root->fs_info->block_min_order, 542 + inode->root->fs_info->block_max_order); 543 543 #endif 544 544 } 545 545 ··· 547 547 #define CSUM_FMT "0x%*phN" 548 548 #define CSUM_FMT_VALUE(size, bytes) size, bytes 549 549 550 - int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum, 551 - const u8 * const csum_expected); 550 + void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, 551 + u8 *dest); 552 + int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, 553 + const u8 * const csum_expected); 552 554 bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, 553 - u32 bio_offset, struct bio_vec *bv); 555 + u32 bio_offset, phys_addr_t paddr); 554 556 noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len, 555 557 struct btrfs_file_extent *file_extent, 556 558 bool nowait); ··· 565 563 const struct fscrypt_str *name); 566 564 int btrfs_add_link(struct btrfs_trans_handle *trans, 567 565 struct btrfs_inode *parent_inode, struct btrfs_inode *inode, 568 - const struct fscrypt_str *name, int add_backref, u64 index); 566 + const struct fscrypt_str *name, bool add_backref, u64 index); 569 567 int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry); 570 568 int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 end); 571 569

+148 -95

fs/btrfs/compression.c

··· 90 90 } 91 91 92 92 static int compression_compress_pages(int type, struct list_head *ws, 93 - struct address_space *mapping, u64 start, 93 + struct btrfs_inode *inode, u64 start, 94 94 struct folio **folios, unsigned long *out_folios, 95 95 unsigned long *total_in, unsigned long *total_out) 96 96 { 97 97 switch (type) { 98 98 case BTRFS_COMPRESS_ZLIB: 99 - return zlib_compress_folios(ws, mapping, start, folios, 99 + return zlib_compress_folios(ws, inode, start, folios, 100 100 out_folios, total_in, total_out); 101 101 case BTRFS_COMPRESS_LZO: 102 - return lzo_compress_folios(ws, mapping, start, folios, 102 + return lzo_compress_folios(ws, inode, start, folios, 103 103 out_folios, total_in, total_out); 104 104 case BTRFS_COMPRESS_ZSTD: 105 - return zstd_compress_folios(ws, mapping, start, folios, 105 + return zstd_compress_folios(ws, inode, start, folios, 106 106 out_folios, total_in, total_out); 107 107 case BTRFS_COMPRESS_NONE: 108 108 default: ··· 223 223 /* 224 224 * Common wrappers for page allocation from compression wrappers 225 225 */ 226 - struct folio *btrfs_alloc_compr_folio(void) 226 + struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info) 227 227 { 228 228 struct folio *folio = NULL; 229 + 230 + /* For bs > ps cases, no cached folio pool for now. */ 231 + if (fs_info->block_min_order) 232 + goto alloc; 229 233 230 234 spin_lock(&compr_pool.lock); 231 235 if (compr_pool.count > 0) { ··· 242 238 if (folio) 243 239 return folio; 244 240 245 - return folio_alloc(GFP_NOFS, 0); 241 + alloc: 242 + return folio_alloc(GFP_NOFS, fs_info->block_min_order); 246 243 } 247 244 248 245 void btrfs_free_compr_folio(struct folio *folio) 249 246 { 250 247 bool do_free = false; 248 + 249 + /* The folio is from bs > ps fs, no cached pool for now. */ 250 + if (folio_order(folio)) 251 + goto free; 251 252 252 253 spin_lock(&compr_pool.lock); 253 254 if (compr_pool.count > compr_pool.thresh) { ··· 266 257 if (!do_free) 267 258 return; 268 259 260 + free: 269 261 ASSERT(folio_ref_count(folio) == 1); 270 262 folio_put(folio); 271 263 } ··· 354 344 355 345 static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb) 356 346 { 347 + struct btrfs_fs_info *fs_info = cb->bbio.fs_info; 357 348 struct bio *bio = &cb->bbio.bio; 358 349 u32 offset = 0; 359 350 360 351 while (offset < cb->compressed_len) { 352 + struct folio *folio; 361 353 int ret; 362 - u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE); 354 + u32 len = min_t(u32, cb->compressed_len - offset, 355 + btrfs_min_folio_size(fs_info)); 363 356 357 + folio = cb->compressed_folios[offset >> (PAGE_SHIFT + fs_info->block_min_order)]; 364 358 /* Maximum compressed extent is smaller than bio size limit. */ 365 - ret = bio_add_folio(bio, cb->compressed_folios[offset >> PAGE_SHIFT], 366 - len, 0); 359 + ret = bio_add_folio(bio, folio, len, 0); 367 360 ASSERT(ret); 368 361 offset += len; 369 362 } ··· 454 441 * subpage for now, until full compressed write is supported. 455 442 */ 456 443 if (fs_info->sectorsize < PAGE_SIZE) 444 + return 0; 445 + 446 + /* For bs > ps cases, we don't support readahead for compressed folios for now. */ 447 + if (fs_info->block_min_order) 457 448 return 0; 458 449 459 450 end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; ··· 619 602 cb->compressed_len = compressed_len; 620 603 cb->compress_type = btrfs_extent_map_compression(em); 621 604 cb->orig_bbio = bbio; 605 + cb->bbio.csum_search_commit_root = bbio->csum_search_commit_root; 622 606 623 607 btrfs_free_extent_map(em); 624 608 625 - cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE); 609 + cb->nr_folios = DIV_ROUND_UP(compressed_len, btrfs_min_folio_size(fs_info)); 626 610 cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS); 627 611 if (!cb->compressed_folios) { 628 612 status = BLK_STS_RESOURCE; 629 613 goto out_free_bio; 630 614 } 631 615 632 - ret = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios); 616 + ret = btrfs_alloc_folio_array(cb->nr_folios, fs_info->block_min_order, 617 + cb->compressed_folios); 633 618 if (ret) { 634 619 status = BLK_STS_RESOURCE; 635 620 goto out_free_compressed_pages; ··· 706 687 struct list_head list; 707 688 }; 708 689 709 - static struct workspace_manager heuristic_wsm; 710 - 711 690 static void free_heuristic_ws(struct list_head *ws) 712 691 { 713 692 struct heuristic_ws *workspace; ··· 718 701 kfree(workspace); 719 702 } 720 703 721 - static struct list_head *alloc_heuristic_ws(void) 704 + static struct list_head *alloc_heuristic_ws(struct btrfs_fs_info *fs_info) 722 705 { 723 706 struct heuristic_ws *ws; 724 707 ··· 745 728 return ERR_PTR(-ENOMEM); 746 729 } 747 730 748 - const struct btrfs_compress_op btrfs_heuristic_compress = { 749 - .workspace_manager = &heuristic_wsm, 750 - }; 731 + const struct btrfs_compress_levels btrfs_heuristic_compress = { 0 }; 751 732 752 - static const struct btrfs_compress_op * const btrfs_compress_op[] = { 733 + static const struct btrfs_compress_levels * const btrfs_compress_levels[] = { 753 734 /* The heuristic is represented as compression type 0 */ 754 735 &btrfs_heuristic_compress, 755 736 &btrfs_zlib_compress, ··· 755 740 &btrfs_zstd_compress, 756 741 }; 757 742 758 - static struct list_head *alloc_workspace(int type, int level) 743 + static struct list_head *alloc_workspace(struct btrfs_fs_info *fs_info, int type, int level) 759 744 { 760 745 switch (type) { 761 - case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(); 762 - case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level); 763 - case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(); 764 - case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level); 746 + case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(fs_info); 747 + case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(fs_info, level); 748 + case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(fs_info); 749 + case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(fs_info, level); 765 750 default: 766 751 /* 767 752 * This can't happen, the type is validated several times ··· 787 772 } 788 773 } 789 774 790 - static void btrfs_init_workspace_manager(int type) 775 + static int alloc_workspace_manager(struct btrfs_fs_info *fs_info, 776 + enum btrfs_compression_type type) 791 777 { 792 - struct workspace_manager *wsm; 778 + struct workspace_manager *gwsm; 793 779 struct list_head *workspace; 794 780 795 - wsm = btrfs_compress_op[type]->workspace_manager; 796 - INIT_LIST_HEAD(&wsm->idle_ws); 797 - spin_lock_init(&wsm->ws_lock); 798 - atomic_set(&wsm->total_ws, 0); 799 - init_waitqueue_head(&wsm->ws_wait); 781 + ASSERT(fs_info->compr_wsm[type] == NULL); 782 + gwsm = kzalloc(sizeof(*gwsm), GFP_KERNEL); 783 + if (!gwsm) 784 + return -ENOMEM; 785 + 786 + INIT_LIST_HEAD(&gwsm->idle_ws); 787 + spin_lock_init(&gwsm->ws_lock); 788 + atomic_set(&gwsm->total_ws, 0); 789 + init_waitqueue_head(&gwsm->ws_wait); 790 + fs_info->compr_wsm[type] = gwsm; 800 791 801 792 /* 802 793 * Preallocate one workspace for each compression type so we can 803 794 * guarantee forward progress in the worst case 804 795 */ 805 - workspace = alloc_workspace(type, 0); 796 + workspace = alloc_workspace(fs_info, type, 0); 806 797 if (IS_ERR(workspace)) { 807 - btrfs_warn(NULL, 808 - "cannot preallocate compression workspace, will try later"); 798 + btrfs_warn(fs_info, 799 + "cannot preallocate compression workspace for %s, will try later", 800 + btrfs_compress_type2str(type)); 809 801 } else { 810 - atomic_set(&wsm->total_ws, 1); 811 - wsm->free_ws = 1; 812 - list_add(workspace, &wsm->idle_ws); 802 + atomic_set(&gwsm->total_ws, 1); 803 + gwsm->free_ws = 1; 804 + list_add(workspace, &gwsm->idle_ws); 813 805 } 806 + return 0; 814 807 } 815 808 816 - static void btrfs_cleanup_workspace_manager(int type) 809 + static void free_workspace_manager(struct btrfs_fs_info *fs_info, 810 + enum btrfs_compression_type type) 817 811 { 818 - struct workspace_manager *wsman; 819 812 struct list_head *ws; 813 + struct workspace_manager *gwsm = fs_info->compr_wsm[type]; 820 814 821 - wsman = btrfs_compress_op[type]->workspace_manager; 822 - while (!list_empty(&wsman->idle_ws)) { 823 - ws = wsman->idle_ws.next; 815 + /* ZSTD uses its own workspace manager, should enter here. */ 816 + ASSERT(type != BTRFS_COMPRESS_ZSTD && type < BTRFS_NR_COMPRESS_TYPES); 817 + if (!gwsm) 818 + return; 819 + fs_info->compr_wsm[type] = NULL; 820 + while (!list_empty(&gwsm->idle_ws)) { 821 + ws = gwsm->idle_ws.next; 824 822 list_del(ws); 825 823 free_workspace(type, ws); 826 - atomic_dec(&wsman->total_ws); 824 + atomic_dec(&gwsm->total_ws); 827 825 } 826 + kfree(gwsm); 828 827 } 829 828 830 829 /* ··· 847 818 * Preallocation makes a forward progress guarantees and we do not return 848 819 * errors. 849 820 */ 850 - struct list_head *btrfs_get_workspace(int type, int level) 821 + struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level) 851 822 { 852 - struct workspace_manager *wsm; 823 + struct workspace_manager *wsm = fs_info->compr_wsm[type]; 853 824 struct list_head *workspace; 854 825 int cpus = num_online_cpus(); 855 826 unsigned nofs_flag; ··· 859 830 wait_queue_head_t *ws_wait; 860 831 int *free_ws; 861 832 862 - wsm = btrfs_compress_op[type]->workspace_manager; 833 + ASSERT(wsm); 863 834 idle_ws = &wsm->idle_ws; 864 835 ws_lock = &wsm->ws_lock; 865 836 total_ws = &wsm->total_ws; ··· 895 866 * context of btrfs_compress_bio/btrfs_compress_pages 896 867 */ 897 868 nofs_flag = memalloc_nofs_save(); 898 - workspace = alloc_workspace(type, level); 869 + workspace = alloc_workspace(fs_info, type, level); 899 870 memalloc_nofs_restore(nofs_flag); 900 871 901 872 if (IS_ERR(workspace)) { ··· 918 889 /* no burst */ 1); 919 890 920 891 if (__ratelimit(&_rs)) 921 - btrfs_warn(NULL, 892 + btrfs_warn(fs_info, 922 893 "no compression workspaces, low memory, retrying"); 923 894 } 924 895 goto again; ··· 926 897 return workspace; 927 898 } 928 899 929 - static struct list_head *get_workspace(int type, int level) 900 + static struct list_head *get_workspace(struct btrfs_fs_info *fs_info, int type, int level) 930 901 { 931 902 switch (type) { 932 - case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level); 933 - case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level); 934 - case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level); 935 - case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level); 903 + case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(fs_info, type, level); 904 + case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(fs_info, level); 905 + case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(fs_info, type, level); 906 + case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(fs_info, level); 936 907 default: 937 908 /* 938 909 * This can't happen, the type is validated several times ··· 946 917 * put a workspace struct back on the list or free it if we have enough 947 918 * idle ones sitting around 948 919 */ 949 - void btrfs_put_workspace(int type, struct list_head *ws) 920 + void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws) 950 921 { 951 - struct workspace_manager *wsm; 922 + struct workspace_manager *gwsm = fs_info->compr_wsm[type]; 952 923 struct list_head *idle_ws; 953 924 spinlock_t *ws_lock; 954 925 atomic_t *total_ws; 955 926 wait_queue_head_t *ws_wait; 956 927 int *free_ws; 957 928 958 - wsm = btrfs_compress_op[type]->workspace_manager; 959 - idle_ws = &wsm->idle_ws; 960 - ws_lock = &wsm->ws_lock; 961 - total_ws = &wsm->total_ws; 962 - ws_wait = &wsm->ws_wait; 963 - free_ws = &wsm->free_ws; 929 + ASSERT(gwsm); 930 + idle_ws = &gwsm->idle_ws; 931 + ws_lock = &gwsm->ws_lock; 932 + total_ws = &gwsm->total_ws; 933 + ws_wait = &gwsm->ws_wait; 934 + free_ws = &gwsm->free_ws; 964 935 965 936 spin_lock(ws_lock); 966 937 if (*free_ws <= num_online_cpus()) { ··· 977 948 cond_wake_up(ws_wait); 978 949 } 979 950 980 - static void put_workspace(int type, struct list_head *ws) 951 + static void put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws) 981 952 { 982 953 switch (type) { 983 - case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws); 984 - case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws); 985 - case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws); 986 - case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws); 954 + case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(fs_info, type, ws); 955 + case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(fs_info, type, ws); 956 + case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(fs_info, type, ws); 957 + case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(fs_info, ws); 987 958 default: 988 959 /* 989 960 * This can't happen, the type is validated several times ··· 999 970 */ 1000 971 static int btrfs_compress_set_level(unsigned int type, int level) 1001 972 { 1002 - const struct btrfs_compress_op *ops = btrfs_compress_op[type]; 973 + const struct btrfs_compress_levels *levels = btrfs_compress_levels[type]; 1003 974 1004 975 if (level == 0) 1005 - level = ops->default_level; 976 + level = levels->default_level; 1006 977 else 1007 - level = clamp(level, ops->min_level, ops->max_level); 978 + level = clamp(level, levels->min_level, levels->max_level); 1008 979 1009 980 return level; 1010 981 } ··· 1014 985 */ 1015 986 bool btrfs_compress_level_valid(unsigned int type, int level) 1016 987 { 1017 - const struct btrfs_compress_op *ops = btrfs_compress_op[type]; 988 + const struct btrfs_compress_levels *levels = btrfs_compress_levels[type]; 1018 989 1019 - return ops->min_level <= level && level <= ops->max_level; 990 + return levels->min_level <= level && level <= levels->max_level; 1020 991 } 1021 992 1022 993 /* Wrapper around find_get_page(), with extra error message. */ ··· 1051 1022 * - compression algo are 0-3 1052 1023 * - the level are bits 4-7 1053 1024 * 1054 - * @out_pages is an in/out parameter, holds maximum number of pages to allocate 1055 - * and returns number of actually allocated pages 1025 + * @out_folios is an in/out parameter, holds maximum number of folios to allocate 1026 + * and returns number of actually allocated folios 1056 1027 * 1057 1028 * @total_in is used to return the number of bytes actually read. It 1058 1029 * may be smaller than the input length if we had to exit early because we 1059 - * ran out of room in the pages array or because we cross the 1030 + * ran out of room in the folios array or because we cross the 1060 1031 * max_out threshold. 1061 1032 * 1062 1033 * @total_out is an in/out parameter, must be set to the input length and will 1063 1034 * be also used to return the total number of compressed bytes 1064 1035 */ 1065 - int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping, 1036 + int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inode, 1066 1037 u64 start, struct folio **folios, unsigned long *out_folios, 1067 1038 unsigned long *total_in, unsigned long *total_out) 1068 1039 { 1040 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1069 1041 const unsigned long orig_len = *total_out; 1070 1042 struct list_head *workspace; 1071 1043 int ret; 1072 1044 1073 1045 level = btrfs_compress_set_level(type, level); 1074 - workspace = get_workspace(type, level); 1075 - ret = compression_compress_pages(type, workspace, mapping, start, folios, 1046 + workspace = get_workspace(fs_info, type, level); 1047 + ret = compression_compress_pages(type, workspace, inode, start, folios, 1076 1048 out_folios, total_in, total_out); 1077 1049 /* The total read-in bytes should be no larger than the input. */ 1078 1050 ASSERT(*total_in <= orig_len); 1079 - put_workspace(type, workspace); 1051 + put_workspace(fs_info, type, workspace); 1080 1052 return ret; 1081 1053 } 1082 1054 1083 1055 static int btrfs_decompress_bio(struct compressed_bio *cb) 1084 1056 { 1057 + struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); 1085 1058 struct list_head *workspace; 1086 1059 int ret; 1087 1060 int type = cb->compress_type; 1088 1061 1089 - workspace = get_workspace(type, 0); 1062 + workspace = get_workspace(fs_info, type, 0); 1090 1063 ret = compression_decompress_bio(workspace, cb); 1091 - put_workspace(type, workspace); 1064 + put_workspace(fs_info, type, workspace); 1092 1065 1093 1066 if (!ret) 1094 1067 zero_fill_bio(&cb->orig_bbio->bio); ··· 1111 1080 int ret; 1112 1081 1113 1082 /* 1114 - * The full destination page range should not exceed the page size. 1083 + * The full destination folio range should not exceed the folio size. 1115 1084 * And the @destlen should not exceed sectorsize, as this is only called for 1116 1085 * inline file extents, which should not exceed sectorsize. 1117 1086 */ 1118 - ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize); 1087 + ASSERT(dest_pgoff + destlen <= folio_size(dest_folio) && destlen <= sectorsize); 1119 1088 1120 - workspace = get_workspace(type, 0); 1089 + workspace = get_workspace(fs_info, type, 0); 1121 1090 ret = compression_decompress(type, workspace, data_in, dest_folio, 1122 1091 dest_pgoff, srclen, destlen); 1123 - put_workspace(type, workspace); 1092 + put_workspace(fs_info, type, workspace); 1124 1093 1125 1094 return ret; 1095 + } 1096 + 1097 + int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info) 1098 + { 1099 + int ret; 1100 + 1101 + ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_NONE); 1102 + if (ret < 0) 1103 + goto error; 1104 + ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB); 1105 + if (ret < 0) 1106 + goto error; 1107 + ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_LZO); 1108 + if (ret < 0) 1109 + goto error; 1110 + ret = zstd_alloc_workspace_manager(fs_info); 1111 + if (ret < 0) 1112 + goto error; 1113 + return 0; 1114 + error: 1115 + btrfs_free_compress_wsm(fs_info); 1116 + return ret; 1117 + } 1118 + 1119 + void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info) 1120 + { 1121 + free_workspace_manager(fs_info, BTRFS_COMPRESS_NONE); 1122 + free_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB); 1123 + free_workspace_manager(fs_info, BTRFS_COMPRESS_LZO); 1124 + zstd_free_workspace_manager(fs_info); 1126 1125 } 1127 1126 1128 1127 int __init btrfs_init_compress(void) ··· 1165 1104 compr_pool.shrinker = shrinker_alloc(SHRINKER_NONSLAB, "btrfs-compr-pages"); 1166 1105 if (!compr_pool.shrinker) 1167 1106 return -ENOMEM; 1168 - 1169 - btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE); 1170 - btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB); 1171 - btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO); 1172 - zstd_init_workspace_manager(); 1173 1107 1174 1108 spin_lock_init(&compr_pool.lock); 1175 1109 INIT_LIST_HEAD(&compr_pool.list); ··· 1186 1130 btrfs_compr_pool_scan(NULL, NULL); 1187 1131 shrinker_free(compr_pool.shrinker); 1188 1132 1189 - btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE); 1190 - btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB); 1191 - btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO); 1192 - zstd_cleanup_workspace_manager(); 1193 1133 bioset_exit(&btrfs_compressed_bioset); 1194 1134 } 1195 1135 ··· 1308 1256 #define ENTROPY_LVL_HIGH (80) 1309 1257 1310 1258 /* 1311 - * For increasead precision in shannon_entropy calculation, 1259 + * For increased precision in shannon_entropy calculation, 1312 1260 * let's do pow(n, M) to save more digits after comma: 1313 1261 * 1314 1262 * - maximum int bit length is 64 ··· 1594 1542 */ 1595 1543 int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end) 1596 1544 { 1597 - struct list_head *ws_list = get_workspace(0, 0); 1545 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1546 + struct list_head *ws_list = get_workspace(fs_info, 0, 0); 1598 1547 struct heuristic_ws *ws; 1599 1548 u32 i; 1600 1549 u8 byte; ··· 1664 1611 } 1665 1612 1666 1613 out: 1667 - put_workspace(0, ws_list); 1614 + put_workspace(fs_info, 0, ws_list); 1668 1615 return ret; 1669 1616 } 1670 1617

+28 -31

fs/btrfs/compression.h

··· 75 75 struct btrfs_bio bbio; 76 76 }; 77 77 78 + static inline struct btrfs_fs_info *cb_to_fs_info(const struct compressed_bio *cb) 79 + { 80 + return cb->bbio.fs_info; 81 + } 82 + 78 83 /* @range_end must be exclusive. */ 79 84 static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u64 cur) 80 85 { ··· 89 84 return min(range_end, folio_end(folio)) - cur; 90 85 } 91 86 87 + int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info); 88 + void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info); 89 + 92 90 int __init btrfs_init_compress(void); 93 91 void __cold btrfs_exit_compress(void); 94 92 95 93 bool btrfs_compress_level_valid(unsigned int type, int level); 96 - int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping, 94 + int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inode, 97 95 u64 start, struct folio **folios, unsigned long *out_folios, 98 96 unsigned long *total_in, unsigned long *total_out); 99 97 int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, ··· 112 104 113 105 int btrfs_compress_str2level(unsigned int type, const char *str, int *level_ret); 114 106 115 - struct folio *btrfs_alloc_compr_folio(void); 107 + struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info); 116 108 void btrfs_free_compr_folio(struct folio *folio); 117 - 118 - enum btrfs_compression_type { 119 - BTRFS_COMPRESS_NONE = 0, 120 - BTRFS_COMPRESS_ZLIB = 1, 121 - BTRFS_COMPRESS_LZO = 2, 122 - BTRFS_COMPRESS_ZSTD = 3, 123 - BTRFS_NR_COMPRESS_TYPES = 4, 124 - 125 - BTRFS_DEFRAG_DONT_COMPRESS, 126 - }; 127 109 128 110 struct workspace_manager { 129 111 struct list_head idle_ws; ··· 126 128 wait_queue_head_t ws_wait; 127 129 }; 128 130 129 - struct list_head *btrfs_get_workspace(int type, int level); 130 - void btrfs_put_workspace(int type, struct list_head *ws); 131 + struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level); 132 + void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws); 131 133 132 - struct btrfs_compress_op { 133 - struct workspace_manager *workspace_manager; 134 + struct btrfs_compress_levels { 134 135 /* Maximum level supported by the compression algorithm */ 135 136 int min_level; 136 137 int max_level; ··· 139 142 /* The heuristic workspaces are managed via the 0th workspace manager */ 140 143 #define BTRFS_NR_WORKSPACE_MANAGERS BTRFS_NR_COMPRESS_TYPES 141 144 142 - extern const struct btrfs_compress_op btrfs_heuristic_compress; 143 - extern const struct btrfs_compress_op btrfs_zlib_compress; 144 - extern const struct btrfs_compress_op btrfs_lzo_compress; 145 - extern const struct btrfs_compress_op btrfs_zstd_compress; 145 + extern const struct btrfs_compress_levels btrfs_heuristic_compress; 146 + extern const struct btrfs_compress_levels btrfs_zlib_compress; 147 + extern const struct btrfs_compress_levels btrfs_lzo_compress; 148 + extern const struct btrfs_compress_levels btrfs_zstd_compress; 146 149 147 150 const char* btrfs_compress_type2str(enum btrfs_compression_type type); 148 151 bool btrfs_compress_is_valid_type(const char *str, size_t len); ··· 152 155 int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start, 153 156 struct folio **in_folio_ret); 154 157 155 - int zlib_compress_folios(struct list_head *ws, struct address_space *mapping, 158 + int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode, 156 159 u64 start, struct folio **folios, unsigned long *out_folios, 157 160 unsigned long *total_in, unsigned long *total_out); 158 161 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb); 159 162 int zlib_decompress(struct list_head *ws, const u8 *data_in, 160 163 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, 161 164 size_t destlen); 162 - struct list_head *zlib_alloc_workspace(unsigned int level); 165 + struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level); 163 166 void zlib_free_workspace(struct list_head *ws); 164 - struct list_head *zlib_get_workspace(unsigned int level); 167 + struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level); 165 168 166 - int lzo_compress_folios(struct list_head *ws, struct address_space *mapping, 169 + int lzo_compress_folios(struct list_head *ws, struct btrfs_inode *inode, 167 170 u64 start, struct folio **folios, unsigned long *out_folios, 168 171 unsigned long *total_in, unsigned long *total_out); 169 172 int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb); 170 173 int lzo_decompress(struct list_head *ws, const u8 *data_in, 171 174 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, 172 175 size_t destlen); 173 - struct list_head *lzo_alloc_workspace(void); 176 + struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info); 174 177 void lzo_free_workspace(struct list_head *ws); 175 178 176 - int zstd_compress_folios(struct list_head *ws, struct address_space *mapping, 179 + int zstd_compress_folios(struct list_head *ws, struct btrfs_inode *inode, 177 180 u64 start, struct folio **folios, unsigned long *out_folios, 178 181 unsigned long *total_in, unsigned long *total_out); 179 182 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb); 180 183 int zstd_decompress(struct list_head *ws, const u8 *data_in, 181 184 struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, 182 185 size_t destlen); 183 - void zstd_init_workspace_manager(void); 184 - void zstd_cleanup_workspace_manager(void); 185 - struct list_head *zstd_alloc_workspace(int level); 186 + int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info); 187 + void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info); 188 + struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level); 186 189 void zstd_free_workspace(struct list_head *ws); 187 - struct list_head *zstd_get_workspace(int level); 188 - void zstd_put_workspace(struct list_head *ws); 190 + struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level); 191 + void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws); 189 192 190 193 #endif

+72 -63

fs/btrfs/ctree.c

··· 30 30 *root, struct btrfs_path *path, int level); 31 31 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, 32 32 const struct btrfs_key *ins_key, struct btrfs_path *path, 33 - int data_size, int extend); 33 + int data_size, bool extend); 34 34 static int push_node_left(struct btrfs_trans_handle *trans, 35 35 struct extent_buffer *dst, 36 - struct extent_buffer *src, int empty); 36 + struct extent_buffer *src, bool empty); 37 37 static int balance_node_right(struct btrfs_trans_handle *trans, 38 38 struct extent_buffer *dst_buf, 39 39 struct extent_buffer *src_buf); ··· 293 293 294 294 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 295 295 ret = btrfs_inc_ref(trans, root, cow, 1); 296 - if (ret) 296 + if (unlikely(ret)) 297 297 btrfs_abort_transaction(trans, ret); 298 298 } else { 299 299 ret = btrfs_inc_ref(trans, root, cow, 0); 300 - if (ret) 300 + if (unlikely(ret)) 301 301 btrfs_abort_transaction(trans, ret); 302 302 } 303 303 if (ret) { ··· 536 536 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); 537 537 538 538 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); 539 - if (ret) { 539 + if (unlikely(ret)) { 540 540 btrfs_abort_transaction(trans, ret); 541 541 goto error_unlock_cow; 542 542 } 543 543 544 544 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) { 545 545 ret = btrfs_reloc_cow_block(trans, root, buf, cow); 546 - if (ret) { 546 + if (unlikely(ret)) { 547 547 btrfs_abort_transaction(trans, ret); 548 548 goto error_unlock_cow; 549 549 } ··· 556 556 parent_start = buf->start; 557 557 558 558 ret = btrfs_tree_mod_log_insert_root(root->node, cow, true); 559 - if (ret < 0) { 559 + if (unlikely(ret < 0)) { 560 560 btrfs_abort_transaction(trans, ret); 561 561 goto error_unlock_cow; 562 562 } ··· 567 567 parent_start, last_ref); 568 568 free_extent_buffer(buf); 569 569 add_root_to_dirty_list(root); 570 - if (ret < 0) { 570 + if (unlikely(ret < 0)) { 571 571 btrfs_abort_transaction(trans, ret); 572 572 goto error_unlock_cow; 573 573 } ··· 575 575 WARN_ON(trans->transid != btrfs_header_generation(parent)); 576 576 ret = btrfs_tree_mod_log_insert_key(parent, parent_slot, 577 577 BTRFS_MOD_LOG_KEY_REPLACE); 578 - if (ret) { 578 + if (unlikely(ret)) { 579 579 btrfs_abort_transaction(trans, ret); 580 580 goto error_unlock_cow; 581 581 } ··· 586 586 btrfs_mark_buffer_dirty(trans, parent); 587 587 if (last_ref) { 588 588 ret = btrfs_tree_mod_log_free_eb(buf); 589 - if (ret) { 589 + if (unlikely(ret)) { 590 590 btrfs_abort_transaction(trans, ret); 591 591 goto error_unlock_cow; 592 592 } 593 593 } 594 594 ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf, 595 595 parent_start, last_ref); 596 - if (ret < 0) { 596 + if (unlikely(ret < 0)) { 597 597 btrfs_abort_transaction(trans, ret); 598 598 goto error_unlock_cow; 599 599 } ··· 613 613 return ret; 614 614 } 615 615 616 - static inline int should_cow_block(const struct btrfs_trans_handle *trans, 617 - const struct btrfs_root *root, 618 - const struct extent_buffer *buf) 616 + static inline bool should_cow_block(const struct btrfs_trans_handle *trans, 617 + const struct btrfs_root *root, 618 + const struct extent_buffer *buf) 619 619 { 620 620 if (btrfs_is_testing(root->fs_info)) 621 - return 0; 622 - 623 - /* Ensure we can see the FORCE_COW bit */ 624 - smp_mb__before_atomic(); 621 + return false; 625 622 626 623 /* 627 624 * We do not need to cow a block if ··· 631 634 * after we've finished copying src root, we must COW the shared 632 635 * block to ensure the metadata consistency. 633 636 */ 634 - if (btrfs_header_generation(buf) == trans->transid && 635 - !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 636 - !(btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID && 637 - btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && 638 - !test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) 639 - return 0; 640 - return 1; 637 + 638 + if (btrfs_header_generation(buf) != trans->transid) 639 + return true; 640 + 641 + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) 642 + return true; 643 + 644 + /* Ensure we can see the FORCE_COW bit. */ 645 + smp_mb__before_atomic(); 646 + if (test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) 647 + return true; 648 + 649 + if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) 650 + return false; 651 + 652 + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) 653 + return true; 654 + 655 + return false; 641 656 } 642 657 643 658 /* ··· 853 844 &check); 854 845 if (IS_ERR(eb)) 855 846 return eb; 856 - if (!extent_buffer_uptodate(eb)) { 847 + if (unlikely(!extent_buffer_uptodate(eb))) { 857 848 free_extent_buffer(eb); 858 849 return ERR_PTR(-EIO); 859 850 } ··· 922 913 } 923 914 924 915 ret = btrfs_tree_mod_log_insert_root(root->node, child, true); 925 - if (ret < 0) { 916 + if (unlikely(ret < 0)) { 926 917 btrfs_tree_unlock(child); 927 918 free_extent_buffer(child); 928 919 btrfs_abort_transaction(trans, ret); ··· 944 935 ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); 945 936 /* once for the root ptr */ 946 937 free_extent_buffer_stale(mid); 947 - if (ret < 0) { 938 + if (unlikely(ret < 0)) { 948 939 btrfs_abort_transaction(trans, ret); 949 940 goto out; 950 941 } ··· 1019 1010 right, 0, 1); 1020 1011 free_extent_buffer_stale(right); 1021 1012 right = NULL; 1022 - if (ret < 0) { 1013 + if (unlikely(ret < 0)) { 1023 1014 btrfs_abort_transaction(trans, ret); 1024 1015 goto out; 1025 1016 } ··· 1028 1019 btrfs_node_key(right, &right_key, 0); 1029 1020 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1, 1030 1021 BTRFS_MOD_LOG_KEY_REPLACE); 1031 - if (ret < 0) { 1022 + if (unlikely(ret < 0)) { 1032 1023 btrfs_abort_transaction(trans, ret); 1033 1024 goto out; 1034 1025 } ··· 1080 1071 ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); 1081 1072 free_extent_buffer_stale(mid); 1082 1073 mid = NULL; 1083 - if (ret < 0) { 1074 + if (unlikely(ret < 0)) { 1084 1075 btrfs_abort_transaction(trans, ret); 1085 1076 goto out; 1086 1077 } ··· 1090 1081 btrfs_node_key(mid, &mid_key, 0); 1091 1082 ret = btrfs_tree_mod_log_insert_key(parent, pslot, 1092 1083 BTRFS_MOD_LOG_KEY_REPLACE); 1093 - if (ret < 0) { 1084 + if (unlikely(ret < 0)) { 1094 1085 btrfs_abort_transaction(trans, ret); 1095 1086 goto out; 1096 1087 } ··· 1195 1186 btrfs_node_key(mid, &disk_key, 0); 1196 1187 ret = btrfs_tree_mod_log_insert_key(parent, pslot, 1197 1188 BTRFS_MOD_LOG_KEY_REPLACE); 1198 - if (ret < 0) { 1189 + if (unlikely(ret < 0)) { 1199 1190 btrfs_tree_unlock(left); 1200 1191 free_extent_buffer(left); 1201 1192 btrfs_abort_transaction(trans, ret); ··· 1255 1246 btrfs_node_key(right, &disk_key, 0); 1256 1247 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1, 1257 1248 BTRFS_MOD_LOG_KEY_REPLACE); 1258 - if (ret < 0) { 1249 + if (unlikely(ret < 0)) { 1259 1250 btrfs_tree_unlock(right); 1260 1251 free_extent_buffer(right); 1261 1252 btrfs_abort_transaction(trans, ret); ··· 1493 1484 reada_for_search(fs_info, p, parent_level, slot, key->objectid); 1494 1485 1495 1486 /* first we do an atomic uptodate check */ 1496 - if (btrfs_buffer_uptodate(tmp, check.transid, 1) > 0) { 1487 + if (btrfs_buffer_uptodate(tmp, check.transid, true) > 0) { 1497 1488 /* 1498 1489 * Do extra check for first_key, eb can be stale due to 1499 1490 * being cached, read from scrub, or have multiple 1500 1491 * parents (shared tree blocks). 1501 1492 */ 1502 - if (btrfs_verify_level_key(tmp, &check)) { 1493 + if (unlikely(btrfs_verify_level_key(tmp, &check))) { 1503 1494 ret = -EUCLEAN; 1504 1495 goto out; 1505 1496 } ··· 1580 1571 * and give up so that our caller doesn't loop forever 1581 1572 * on our EAGAINs. 1582 1573 */ 1583 - if (!extent_buffer_uptodate(tmp)) { 1574 + if (unlikely(!extent_buffer_uptodate(tmp))) { 1584 1575 ret = -EIO; 1585 1576 goto out; 1586 1577 } ··· 1761 1752 * The root may have failed to write out at some point, and thus is no 1762 1753 * longer valid, return an error in this case. 1763 1754 */ 1764 - if (!extent_buffer_uptodate(b)) { 1755 + if (unlikely(!extent_buffer_uptodate(b))) { 1765 1756 if (root_lock) 1766 1757 btrfs_tree_unlock_rw(b, root_lock); 1767 1758 free_extent_buffer(b); ··· 2269 2260 2270 2261 again: 2271 2262 b = btrfs_get_old_root(root, time_seq); 2272 - if (!b) { 2263 + if (unlikely(!b)) { 2273 2264 ret = -EIO; 2274 2265 goto done; 2275 2266 } ··· 2695 2686 */ 2696 2687 static int push_node_left(struct btrfs_trans_handle *trans, 2697 2688 struct extent_buffer *dst, 2698 - struct extent_buffer *src, int empty) 2689 + struct extent_buffer *src, bool empty) 2699 2690 { 2700 2691 struct btrfs_fs_info *fs_info = trans->fs_info; 2701 2692 int push_items = 0; ··· 2731 2722 push_items = min(src_nritems - 8, push_items); 2732 2723 2733 2724 /* dst is the left eb, src is the middle eb */ 2734 - if (check_sibling_keys(dst, src)) { 2725 + if (unlikely(check_sibling_keys(dst, src))) { 2735 2726 ret = -EUCLEAN; 2736 2727 btrfs_abort_transaction(trans, ret); 2737 2728 return ret; 2738 2729 } 2739 2730 ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items); 2740 - if (ret) { 2731 + if (unlikely(ret)) { 2741 2732 btrfs_abort_transaction(trans, ret); 2742 2733 return ret; 2743 2734 } ··· 2805 2796 push_items = max_push; 2806 2797 2807 2798 /* dst is the right eb, src is the middle eb */ 2808 - if (check_sibling_keys(src, dst)) { 2799 + if (unlikely(check_sibling_keys(src, dst))) { 2809 2800 ret = -EUCLEAN; 2810 2801 btrfs_abort_transaction(trans, ret); 2811 2802 return ret; ··· 2822 2813 2823 2814 ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items, 2824 2815 push_items); 2825 - if (ret) { 2816 + if (unlikely(ret)) { 2826 2817 btrfs_abort_transaction(trans, ret); 2827 2818 return ret; 2828 2819 } ··· 2892 2883 2893 2884 btrfs_clear_buffer_dirty(trans, c); 2894 2885 ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1); 2895 - if (ret2 < 0) 2886 + if (unlikely(ret2 < 0)) 2896 2887 btrfs_abort_transaction(trans, ret2); 2897 2888 btrfs_tree_unlock(c); 2898 2889 free_extent_buffer(c); ··· 2937 2928 if (level) { 2938 2929 ret = btrfs_tree_mod_log_insert_move(lower, slot + 1, 2939 2930 slot, nritems - slot); 2940 - if (ret < 0) { 2931 + if (unlikely(ret < 0)) { 2941 2932 btrfs_abort_transaction(trans, ret); 2942 2933 return ret; 2943 2934 } ··· 2950 2941 if (level) { 2951 2942 ret = btrfs_tree_mod_log_insert_key(lower, slot, 2952 2943 BTRFS_MOD_LOG_KEY_ADD); 2953 - if (ret < 0) { 2944 + if (unlikely(ret < 0)) { 2954 2945 btrfs_abort_transaction(trans, ret); 2955 2946 return ret; 2956 2947 } ··· 3026 3017 ASSERT(btrfs_header_level(c) == level); 3027 3018 3028 3019 ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); 3029 - if (ret) { 3020 + if (unlikely(ret)) { 3030 3021 btrfs_tree_unlock(split); 3031 3022 free_extent_buffer(split); 3032 3023 btrfs_abort_transaction(trans, ret); ··· 3095 3086 int ret; 3096 3087 3097 3088 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems); 3098 - if (ret < 0) { 3089 + if (unlikely(ret < 0)) { 3099 3090 btrfs_crit(fs_info, 3100 3091 "leaf free space ret %d, leaf data size %lu, used %d nritems %d", 3101 3092 ret, ··· 3111 3102 */ 3112 3103 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 3113 3104 struct btrfs_path *path, 3114 - int data_size, int empty, 3105 + int data_size, bool empty, 3115 3106 struct extent_buffer *right, 3116 3107 int free_space, u32 left_nritems, 3117 3108 u32 min_slot) ··· 3248 3239 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 3249 3240 *root, struct btrfs_path *path, 3250 3241 int min_data_size, int data_size, 3251 - int empty, u32 min_slot) 3242 + bool empty, u32 min_slot) 3252 3243 { 3253 3244 struct extent_buffer *left = path->nodes[0]; 3254 3245 struct extent_buffer *right; ··· 3287 3278 if (left_nritems == 0) 3288 3279 goto out_unlock; 3289 3280 3290 - if (check_sibling_keys(left, right)) { 3281 + if (unlikely(check_sibling_keys(left, right))) { 3291 3282 ret = -EUCLEAN; 3292 3283 btrfs_abort_transaction(trans, ret); 3293 3284 btrfs_tree_unlock(right); ··· 3325 3316 */ 3326 3317 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 3327 3318 struct btrfs_path *path, int data_size, 3328 - int empty, struct extent_buffer *left, 3319 + bool empty, struct extent_buffer *left, 3329 3320 int free_space, u32 right_nritems, 3330 3321 u32 max_slot) 3331 3322 { ··· 3503 3494 goto out; 3504 3495 } 3505 3496 3506 - if (check_sibling_keys(left, right)) { 3497 + if (unlikely(check_sibling_keys(left, right))) { 3507 3498 ret = -EUCLEAN; 3508 3499 btrfs_abort_transaction(trans, ret); 3509 3500 goto out; ··· 3651 3642 struct btrfs_root *root, 3652 3643 const struct btrfs_key *ins_key, 3653 3644 struct btrfs_path *path, int data_size, 3654 - int extend) 3645 + bool extend) 3655 3646 { 3656 3647 struct btrfs_disk_key disk_key; 3657 3648 struct extent_buffer *l; ··· 4084 4075 btrfs_set_item_size(leaf, slot, new_size); 4085 4076 btrfs_mark_buffer_dirty(trans, leaf); 4086 4077 4087 - if (btrfs_leaf_free_space(leaf) < 0) { 4078 + if (unlikely(btrfs_leaf_free_space(leaf) < 0)) { 4088 4079 btrfs_print_leaf(leaf); 4089 4080 BUG(); 4090 4081 } ··· 4117 4108 old_data = btrfs_item_data_end(leaf, slot); 4118 4109 4119 4110 BUG_ON(slot < 0); 4120 - if (slot >= nritems) { 4111 + if (unlikely(slot >= nritems)) { 4121 4112 btrfs_print_leaf(leaf); 4122 4113 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d", 4123 4114 slot, nritems); ··· 4144 4135 btrfs_set_item_size(leaf, slot, old_size + data_size); 4145 4136 btrfs_mark_buffer_dirty(trans, leaf); 4146 4137 4147 - if (btrfs_leaf_free_space(leaf) < 0) { 4138 + if (unlikely(btrfs_leaf_free_space(leaf) < 0)) { 4148 4139 btrfs_print_leaf(leaf); 4149 4140 BUG(); 4150 4141 } ··· 4192 4183 data_end = leaf_data_end(leaf); 4193 4184 total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item)); 4194 4185 4195 - if (btrfs_leaf_free_space(leaf) < total_size) { 4186 + if (unlikely(btrfs_leaf_free_space(leaf) < total_size)) { 4196 4187 btrfs_print_leaf(leaf); 4197 4188 btrfs_crit(fs_info, "not enough freespace need %u have %d", 4198 4189 total_size, btrfs_leaf_free_space(leaf)); ··· 4202 4193 if (slot != nritems) { 4203 4194 unsigned int old_data = btrfs_item_data_end(leaf, slot); 4204 4195 4205 - if (old_data < data_end) { 4196 + if (unlikely(old_data < data_end)) { 4206 4197 btrfs_print_leaf(leaf); 4207 4198 btrfs_crit(fs_info, 4208 4199 "item at slot %d with data offset %u beyond data end of leaf %u", ··· 4241 4232 btrfs_set_header_nritems(leaf, nritems + batch->nr); 4242 4233 btrfs_mark_buffer_dirty(trans, leaf); 4243 4234 4244 - if (btrfs_leaf_free_space(leaf) < 0) { 4235 + if (unlikely(btrfs_leaf_free_space(leaf) < 0)) { 4245 4236 btrfs_print_leaf(leaf); 4246 4237 BUG(); 4247 4238 } ··· 4383 4374 if (level) { 4384 4375 ret = btrfs_tree_mod_log_insert_move(parent, slot, 4385 4376 slot + 1, nritems - slot - 1); 4386 - if (ret < 0) { 4377 + if (unlikely(ret < 0)) { 4387 4378 btrfs_abort_transaction(trans, ret); 4388 4379 return ret; 4389 4380 } ··· 4396 4387 } else if (level) { 4397 4388 ret = btrfs_tree_mod_log_insert_key(parent, slot, 4398 4389 BTRFS_MOD_LOG_KEY_REMOVE); 4399 - if (ret < 0) { 4390 + if (unlikely(ret < 0)) { 4400 4391 btrfs_abort_transaction(trans, ret); 4401 4392 return ret; 4402 4393 }

+2 -2

fs/btrfs/defrag.c

··· 153 153 } 154 154 155 155 /* 156 - * Pick the defragable inode that we want, if it doesn't exist, we will get the 156 + * Pick the defraggable inode that we want, if it doesn't exist, we will get the 157 157 * next one. 158 158 */ 159 159 static struct inode_defrag *btrfs_pick_defrag_inode( ··· 924 924 folio_put(folio); 925 925 goto again; 926 926 } 927 - if (!folio_test_uptodate(folio)) { 927 + if (unlikely(!folio_test_uptodate(folio))) { 928 928 folio_unlock(folio); 929 929 folio_put(folio); 930 930 return ERR_PTR(-EIO);

+126 -60

fs/btrfs/delayed-inode.c

··· 57 57 delayed_node->root = root; 58 58 delayed_node->inode_id = inode_id; 59 59 refcount_set(&delayed_node->refs, 0); 60 + btrfs_delayed_node_ref_tracker_dir_init(delayed_node); 60 61 delayed_node->ins_root = RB_ROOT_CACHED; 61 62 delayed_node->del_root = RB_ROOT_CACHED; 62 63 mutex_init(&delayed_node->mutex); ··· 66 65 } 67 66 68 67 static struct btrfs_delayed_node *btrfs_get_delayed_node( 69 - struct btrfs_inode *btrfs_inode) 68 + struct btrfs_inode *btrfs_inode, 69 + struct btrfs_ref_tracker *tracker) 70 70 { 71 71 struct btrfs_root *root = btrfs_inode->root; 72 72 u64 ino = btrfs_ino(btrfs_inode); ··· 76 74 node = READ_ONCE(btrfs_inode->delayed_node); 77 75 if (node) { 78 76 refcount_inc(&node->refs); 77 + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_NOFS); 79 78 return node; 80 79 } 81 80 ··· 86 83 if (node) { 87 84 if (btrfs_inode->delayed_node) { 88 85 refcount_inc(&node->refs); /* can be accessed */ 86 + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); 89 87 BUG_ON(btrfs_inode->delayed_node != node); 90 88 xa_unlock(&root->delayed_nodes); 91 89 return node; ··· 110 106 */ 111 107 if (refcount_inc_not_zero(&node->refs)) { 112 108 refcount_inc(&node->refs); 109 + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); 110 + btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker, 111 + GFP_ATOMIC); 113 112 btrfs_inode->delayed_node = node; 114 113 } else { 115 114 node = NULL; ··· 133 126 * Return the delayed node, or error pointer on failure. 134 127 */ 135 128 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( 136 - struct btrfs_inode *btrfs_inode) 129 + struct btrfs_inode *btrfs_inode, 130 + struct btrfs_ref_tracker *tracker) 137 131 { 138 132 struct btrfs_delayed_node *node; 139 133 struct btrfs_root *root = btrfs_inode->root; ··· 143 135 void *ptr; 144 136 145 137 again: 146 - node = btrfs_get_delayed_node(btrfs_inode); 138 + node = btrfs_get_delayed_node(btrfs_inode, tracker); 147 139 if (node) 148 140 return node; 149 141 ··· 152 144 return ERR_PTR(-ENOMEM); 153 145 btrfs_init_delayed_node(node, root, ino); 154 146 155 - /* Cached in the inode and can be accessed. */ 156 - refcount_set(&node->refs, 2); 157 - 158 147 /* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */ 159 148 ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS); 160 149 if (ret == -ENOMEM) { 150 + btrfs_delayed_node_ref_tracker_dir_exit(node); 161 151 kmem_cache_free(delayed_node_cache, node); 162 152 return ERR_PTR(-ENOMEM); 163 153 } ··· 164 158 if (ptr) { 165 159 /* Somebody inserted it, go back and read it. */ 166 160 xa_unlock(&root->delayed_nodes); 161 + btrfs_delayed_node_ref_tracker_dir_exit(node); 167 162 kmem_cache_free(delayed_node_cache, node); 168 163 node = NULL; 169 164 goto again; ··· 173 166 ASSERT(xa_err(ptr) != -EINVAL); 174 167 ASSERT(xa_err(ptr) != -ENOMEM); 175 168 ASSERT(ptr == NULL); 169 + 170 + /* Cached in the inode and can be accessed. */ 171 + refcount_set(&node->refs, 2); 172 + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); 173 + btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker, GFP_ATOMIC); 174 + 176 175 btrfs_inode->delayed_node = node; 177 176 xa_unlock(&root->delayed_nodes); 178 177 ··· 204 191 list_add_tail(&node->n_list, &root->node_list); 205 192 list_add_tail(&node->p_list, &root->prepare_list); 206 193 refcount_inc(&node->refs); /* inserted into list */ 194 + btrfs_delayed_node_ref_tracker_alloc(node, &node->node_list_tracker, 195 + GFP_ATOMIC); 207 196 root->nodes++; 208 197 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags); 209 198 } ··· 219 204 spin_lock(&root->lock); 220 205 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { 221 206 root->nodes--; 207 + btrfs_delayed_node_ref_tracker_free(node, &node->node_list_tracker); 222 208 refcount_dec(&node->refs); /* not in the list */ 223 209 list_del_init(&node->n_list); 224 210 if (!list_empty(&node->p_list)) ··· 230 214 } 231 215 232 216 static struct btrfs_delayed_node *btrfs_first_delayed_node( 233 - struct btrfs_delayed_root *delayed_root) 217 + struct btrfs_delayed_root *delayed_root, 218 + struct btrfs_ref_tracker *tracker) 234 219 { 235 220 struct btrfs_delayed_node *node; 236 221 237 222 spin_lock(&delayed_root->lock); 238 223 node = list_first_entry_or_null(&delayed_root->node_list, 239 224 struct btrfs_delayed_node, n_list); 240 - if (node) 225 + if (node) { 241 226 refcount_inc(&node->refs); 227 + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); 228 + } 242 229 spin_unlock(&delayed_root->lock); 243 230 244 231 return node; 245 232 } 246 233 247 234 static struct btrfs_delayed_node *btrfs_next_delayed_node( 248 - struct btrfs_delayed_node *node) 235 + struct btrfs_delayed_node *node, 236 + struct btrfs_ref_tracker *tracker) 249 237 { 250 238 struct btrfs_delayed_root *delayed_root; 251 239 struct list_head *p; ··· 269 249 270 250 next = list_entry(p, struct btrfs_delayed_node, n_list); 271 251 refcount_inc(&next->refs); 252 + btrfs_delayed_node_ref_tracker_alloc(next, tracker, GFP_ATOMIC); 272 253 out: 273 254 spin_unlock(&delayed_root->lock); 274 255 ··· 278 257 279 258 static void __btrfs_release_delayed_node( 280 259 struct btrfs_delayed_node *delayed_node, 281 - int mod) 260 + int mod, struct btrfs_ref_tracker *tracker) 282 261 { 283 262 struct btrfs_delayed_root *delayed_root; 284 263 ··· 294 273 btrfs_dequeue_delayed_node(delayed_root, delayed_node); 295 274 mutex_unlock(&delayed_node->mutex); 296 275 276 + btrfs_delayed_node_ref_tracker_free(delayed_node, tracker); 297 277 if (refcount_dec_and_test(&delayed_node->refs)) { 298 278 struct btrfs_root *root = delayed_node->root; 299 279 ··· 304 282 * back up. We can delete it now. 305 283 */ 306 284 ASSERT(refcount_read(&delayed_node->refs) == 0); 285 + btrfs_delayed_node_ref_tracker_dir_exit(delayed_node); 307 286 kmem_cache_free(delayed_node_cache, delayed_node); 308 287 } 309 288 } 310 289 311 - static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node) 290 + static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node, 291 + struct btrfs_ref_tracker *tracker) 312 292 { 313 - __btrfs_release_delayed_node(node, 0); 293 + __btrfs_release_delayed_node(node, 0, tracker); 314 294 } 315 295 316 296 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( 317 - struct btrfs_delayed_root *delayed_root) 297 + struct btrfs_delayed_root *delayed_root, 298 + struct btrfs_ref_tracker *tracker) 318 299 { 319 300 struct btrfs_delayed_node *node; 320 301 ··· 327 302 if (node) { 328 303 list_del_init(&node->p_list); 329 304 refcount_inc(&node->refs); 305 + btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC); 330 306 } 331 307 spin_unlock(&delayed_root->lock); 332 308 ··· 335 309 } 336 310 337 311 static inline void btrfs_release_prepared_delayed_node( 338 - struct btrfs_delayed_node *node) 312 + struct btrfs_delayed_node *node, 313 + struct btrfs_ref_tracker *tracker) 339 314 { 340 - __btrfs_release_delayed_node(node, 1); 315 + __btrfs_release_delayed_node(node, 1, tracker); 341 316 } 342 317 343 318 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len, ··· 738 711 u32 *ins_sizes; 739 712 int i = 0; 740 713 741 - ins_data = kmalloc(batch.nr * sizeof(u32) + 742 - batch.nr * sizeof(struct btrfs_key), GFP_NOFS); 714 + ins_data = kmalloc_array(batch.nr, 715 + sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS); 743 716 if (!ins_data) { 744 717 ret = -ENOMEM; 745 718 goto out; ··· 1038 1011 * transaction, because we could leave the inode with the 1039 1012 * improper counts behind. 1040 1013 */ 1041 - if (ret != -ENOENT) 1014 + if (unlikely(ret != -ENOENT)) 1042 1015 btrfs_abort_transaction(trans, ret); 1043 1016 goto out; 1044 1017 } ··· 1066 1039 1067 1040 btrfs_release_path(path); 1068 1041 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1069 - if (ret < 0) { 1042 + if (unlikely(ret < 0)) { 1070 1043 btrfs_abort_transaction(trans, ret); 1071 1044 goto err_out; 1072 1045 } ··· 1153 1126 struct btrfs_fs_info *fs_info = trans->fs_info; 1154 1127 struct btrfs_delayed_root *delayed_root; 1155 1128 struct btrfs_delayed_node *curr_node, *prev_node; 1129 + struct btrfs_ref_tracker curr_delayed_node_tracker, prev_delayed_node_tracker; 1156 1130 struct btrfs_path *path; 1157 1131 struct btrfs_block_rsv *block_rsv; 1158 1132 int ret = 0; ··· 1171 1143 1172 1144 delayed_root = fs_info->delayed_root; 1173 1145 1174 - curr_node = btrfs_first_delayed_node(delayed_root); 1146 + curr_node = btrfs_first_delayed_node(delayed_root, &curr_delayed_node_tracker); 1175 1147 while (curr_node && (!count || nr--)) { 1176 1148 ret = __btrfs_commit_inode_delayed_items(trans, path, 1177 1149 curr_node); 1178 - if (ret) { 1150 + if (unlikely(ret)) { 1179 1151 btrfs_abort_transaction(trans, ret); 1180 1152 break; 1181 1153 } 1182 1154 1183 1155 prev_node = curr_node; 1184 - curr_node = btrfs_next_delayed_node(curr_node); 1156 + prev_delayed_node_tracker = curr_delayed_node_tracker; 1157 + curr_node = btrfs_next_delayed_node(curr_node, &curr_delayed_node_tracker); 1185 1158 /* 1186 1159 * See the comment below about releasing path before releasing 1187 1160 * node. If the commit of delayed items was successful the path ··· 1190 1161 * point to locked extent buffers (a leaf at the very least). 1191 1162 */ 1192 1163 ASSERT(path->nodes[0] == NULL); 1193 - btrfs_release_delayed_node(prev_node); 1164 + btrfs_release_delayed_node(prev_node, &prev_delayed_node_tracker); 1194 1165 } 1195 1166 1196 1167 /* ··· 1203 1174 btrfs_free_path(path); 1204 1175 1205 1176 if (curr_node) 1206 - btrfs_release_delayed_node(curr_node); 1177 + btrfs_release_delayed_node(curr_node, &curr_delayed_node_tracker); 1207 1178 trans->block_rsv = block_rsv; 1208 1179 1209 1180 return ret; ··· 1222 1193 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, 1223 1194 struct btrfs_inode *inode) 1224 1195 { 1225 - struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1196 + struct btrfs_ref_tracker delayed_node_tracker; 1197 + struct btrfs_delayed_node *delayed_node = 1198 + btrfs_get_delayed_node(inode, &delayed_node_tracker); 1226 1199 BTRFS_PATH_AUTO_FREE(path); 1227 1200 struct btrfs_block_rsv *block_rsv; 1228 1201 int ret; ··· 1235 1204 mutex_lock(&delayed_node->mutex); 1236 1205 if (!delayed_node->count) { 1237 1206 mutex_unlock(&delayed_node->mutex); 1238 - btrfs_release_delayed_node(delayed_node); 1207 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1239 1208 return 0; 1240 1209 } 1241 1210 mutex_unlock(&delayed_node->mutex); 1242 1211 1243 1212 path = btrfs_alloc_path(); 1244 1213 if (!path) { 1245 - btrfs_release_delayed_node(delayed_node); 1214 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1246 1215 return -ENOMEM; 1247 1216 } 1248 1217 ··· 1251 1220 1252 1221 ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node); 1253 1222 1254 - btrfs_release_delayed_node(delayed_node); 1223 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1255 1224 trans->block_rsv = block_rsv; 1256 1225 1257 1226 return ret; ··· 1261 1230 { 1262 1231 struct btrfs_fs_info *fs_info = inode->root->fs_info; 1263 1232 struct btrfs_trans_handle *trans; 1264 - struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1233 + struct btrfs_ref_tracker delayed_node_tracker; 1234 + struct btrfs_delayed_node *delayed_node; 1265 1235 struct btrfs_path *path; 1266 1236 struct btrfs_block_rsv *block_rsv; 1267 1237 int ret; 1268 1238 1239 + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); 1269 1240 if (!delayed_node) 1270 1241 return 0; 1271 1242 1272 1243 mutex_lock(&delayed_node->mutex); 1273 1244 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1274 1245 mutex_unlock(&delayed_node->mutex); 1275 - btrfs_release_delayed_node(delayed_node); 1246 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1276 1247 return 0; 1277 1248 } 1278 1249 mutex_unlock(&delayed_node->mutex); ··· 1308 1275 btrfs_end_transaction(trans); 1309 1276 btrfs_btree_balance_dirty(fs_info); 1310 1277 out: 1311 - btrfs_release_delayed_node(delayed_node); 1278 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1312 1279 1313 1280 return ret; 1314 1281 } ··· 1322 1289 return; 1323 1290 1324 1291 inode->delayed_node = NULL; 1325 - btrfs_release_delayed_node(delayed_node); 1292 + 1293 + btrfs_release_delayed_node(delayed_node, &delayed_node->inode_cache_tracker); 1326 1294 } 1327 1295 1328 1296 struct btrfs_async_delayed_work { ··· 1339 1305 struct btrfs_trans_handle *trans; 1340 1306 struct btrfs_path *path; 1341 1307 struct btrfs_delayed_node *delayed_node = NULL; 1308 + struct btrfs_ref_tracker delayed_node_tracker; 1342 1309 struct btrfs_root *root; 1343 1310 struct btrfs_block_rsv *block_rsv; 1344 1311 int total_done = 0; ··· 1356 1321 BTRFS_DELAYED_BACKGROUND / 2) 1357 1322 break; 1358 1323 1359 - delayed_node = btrfs_first_prepared_delayed_node(delayed_root); 1324 + delayed_node = btrfs_first_prepared_delayed_node(delayed_root, 1325 + &delayed_node_tracker); 1360 1326 if (!delayed_node) 1361 1327 break; 1362 1328 ··· 1366 1330 trans = btrfs_join_transaction(root); 1367 1331 if (IS_ERR(trans)) { 1368 1332 btrfs_release_path(path); 1369 - btrfs_release_prepared_delayed_node(delayed_node); 1333 + btrfs_release_prepared_delayed_node(delayed_node, 1334 + &delayed_node_tracker); 1370 1335 total_done++; 1371 1336 continue; 1372 1337 } ··· 1382 1345 btrfs_btree_balance_dirty_nodelay(root->fs_info); 1383 1346 1384 1347 btrfs_release_path(path); 1385 - btrfs_release_prepared_delayed_node(delayed_node); 1348 + btrfs_release_prepared_delayed_node(delayed_node, 1349 + &delayed_node_tracker); 1386 1350 total_done++; 1387 1351 1388 1352 } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ··· 1415 1377 1416 1378 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info) 1417 1379 { 1418 - struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root); 1380 + struct btrfs_ref_tracker delayed_node_tracker; 1381 + struct btrfs_delayed_node *node; 1419 1382 1420 - if (WARN_ON(node)) 1383 + node = btrfs_first_delayed_node( fs_info->delayed_root, &delayed_node_tracker); 1384 + if (WARN_ON(node)) { 1385 + btrfs_delayed_node_ref_tracker_free(node, 1386 + &delayed_node_tracker); 1421 1387 refcount_dec(&node->refs); 1388 + } 1422 1389 } 1423 1390 1424 1391 static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) ··· 1497 1454 struct btrfs_fs_info *fs_info = trans->fs_info; 1498 1455 const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info); 1499 1456 struct btrfs_delayed_node *delayed_node; 1457 + struct btrfs_ref_tracker delayed_node_tracker; 1500 1458 struct btrfs_delayed_item *delayed_item; 1501 1459 struct btrfs_dir_item *dir_item; 1502 1460 bool reserve_leaf_space; 1503 1461 u32 data_len; 1504 1462 int ret; 1505 1463 1506 - delayed_node = btrfs_get_or_create_delayed_node(dir); 1464 + delayed_node = btrfs_get_or_create_delayed_node(dir, &delayed_node_tracker); 1507 1465 if (IS_ERR(delayed_node)) 1508 1466 return PTR_ERR(delayed_node); 1509 1467 ··· 1580 1536 mutex_unlock(&delayed_node->mutex); 1581 1537 1582 1538 release_node: 1583 - btrfs_release_delayed_node(delayed_node); 1539 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1584 1540 return ret; 1585 1541 } 1586 1542 ··· 1635 1591 struct btrfs_inode *dir, u64 index) 1636 1592 { 1637 1593 struct btrfs_delayed_node *node; 1594 + struct btrfs_ref_tracker delayed_node_tracker; 1638 1595 struct btrfs_delayed_item *item; 1639 1596 int ret; 1640 1597 1641 - node = btrfs_get_or_create_delayed_node(dir); 1598 + node = btrfs_get_or_create_delayed_node(dir, &delayed_node_tracker); 1642 1599 if (IS_ERR(node)) 1643 1600 return PTR_ERR(node); 1644 1601 ··· 1680 1635 } 1681 1636 mutex_unlock(&node->mutex); 1682 1637 end: 1683 - btrfs_release_delayed_node(node); 1638 + btrfs_release_delayed_node(node, &delayed_node_tracker); 1684 1639 return ret; 1685 1640 } 1686 1641 1687 1642 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) 1688 1643 { 1689 - struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1644 + struct btrfs_ref_tracker delayed_node_tracker; 1645 + struct btrfs_delayed_node *delayed_node; 1690 1646 1647 + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); 1691 1648 if (!delayed_node) 1692 1649 return -ENOENT; 1693 1650 ··· 1699 1652 * is updated now. So we needn't lock the delayed node. 1700 1653 */ 1701 1654 if (!delayed_node->index_cnt) { 1702 - btrfs_release_delayed_node(delayed_node); 1655 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1703 1656 return -EINVAL; 1704 1657 } 1705 1658 1706 1659 inode->index_cnt = delayed_node->index_cnt; 1707 - btrfs_release_delayed_node(delayed_node); 1660 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1708 1661 return 0; 1709 1662 } 1710 1663 ··· 1715 1668 { 1716 1669 struct btrfs_delayed_node *delayed_node; 1717 1670 struct btrfs_delayed_item *item; 1671 + struct btrfs_ref_tracker delayed_node_tracker; 1718 1672 1719 - delayed_node = btrfs_get_delayed_node(inode); 1673 + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); 1720 1674 if (!delayed_node) 1721 1675 return false; 1722 1676 ··· 1752 1704 * insert/delete delayed items in this period. So we also needn't 1753 1705 * requeue or dequeue this delayed node. 1754 1706 */ 1707 + btrfs_delayed_node_ref_tracker_free(delayed_node, &delayed_node_tracker); 1755 1708 refcount_dec(&delayed_node->refs); 1756 1709 1757 1710 return true; ··· 1893 1844 int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev) 1894 1845 { 1895 1846 struct btrfs_delayed_node *delayed_node; 1847 + struct btrfs_ref_tracker delayed_node_tracker; 1896 1848 struct btrfs_inode_item *inode_item; 1897 1849 struct inode *vfs_inode = &inode->vfs_inode; 1898 1850 1899 - delayed_node = btrfs_get_delayed_node(inode); 1851 + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); 1900 1852 if (!delayed_node) 1901 1853 return -ENOENT; 1902 1854 1903 1855 mutex_lock(&delayed_node->mutex); 1904 1856 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1905 1857 mutex_unlock(&delayed_node->mutex); 1906 - btrfs_release_delayed_node(delayed_node); 1858 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1907 1859 return -ENOENT; 1908 1860 } 1909 1861 ··· 1942 1892 inode->index_cnt = (u64)-1; 1943 1893 1944 1894 mutex_unlock(&delayed_node->mutex); 1945 - btrfs_release_delayed_node(delayed_node); 1895 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1946 1896 return 0; 1947 1897 } 1948 1898 ··· 1951 1901 { 1952 1902 struct btrfs_root *root = inode->root; 1953 1903 struct btrfs_delayed_node *delayed_node; 1904 + struct btrfs_ref_tracker delayed_node_tracker; 1954 1905 int ret = 0; 1955 1906 1956 - delayed_node = btrfs_get_or_create_delayed_node(inode); 1907 + delayed_node = btrfs_get_or_create_delayed_node(inode, &delayed_node_tracker); 1957 1908 if (IS_ERR(delayed_node)) 1958 1909 return PTR_ERR(delayed_node); 1959 1910 ··· 1974 1923 atomic_inc(&root->fs_info->delayed_root->items); 1975 1924 release_node: 1976 1925 mutex_unlock(&delayed_node->mutex); 1977 - btrfs_release_delayed_node(delayed_node); 1926 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 1978 1927 return ret; 1979 1928 } 1980 1929 ··· 1982 1931 { 1983 1932 struct btrfs_fs_info *fs_info = inode->root->fs_info; 1984 1933 struct btrfs_delayed_node *delayed_node; 1934 + struct btrfs_ref_tracker delayed_node_tracker; 1985 1935 1986 1936 /* 1987 1937 * we don't do delayed inode updates during log recovery because it ··· 1992 1940 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) 1993 1941 return -EAGAIN; 1994 1942 1995 - delayed_node = btrfs_get_or_create_delayed_node(inode); 1943 + delayed_node = btrfs_get_or_create_delayed_node(inode, &delayed_node_tracker); 1996 1944 if (IS_ERR(delayed_node)) 1997 1945 return PTR_ERR(delayed_node); 1998 1946 ··· 2019 1967 atomic_inc(&fs_info->delayed_root->items); 2020 1968 release_node: 2021 1969 mutex_unlock(&delayed_node->mutex); 2022 - btrfs_release_delayed_node(delayed_node); 1970 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 2023 1971 return 0; 2024 1972 } 2025 1973 ··· 2063 2011 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode) 2064 2012 { 2065 2013 struct btrfs_delayed_node *delayed_node; 2014 + struct btrfs_ref_tracker delayed_node_tracker; 2066 2015 2067 - delayed_node = btrfs_get_delayed_node(inode); 2016 + delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker); 2068 2017 if (!delayed_node) 2069 2018 return; 2070 2019 2071 2020 __btrfs_kill_delayed_node(delayed_node); 2072 - btrfs_release_delayed_node(delayed_node); 2021 + btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 2073 2022 } 2074 2023 2075 2024 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) 2076 2025 { 2077 2026 unsigned long index = 0; 2078 2027 struct btrfs_delayed_node *delayed_nodes[8]; 2028 + struct btrfs_ref_tracker delayed_node_trackers[8]; 2079 2029 2080 2030 while (1) { 2081 2031 struct btrfs_delayed_node *node; ··· 2096 2042 * about to be removed from the tree in the loop below 2097 2043 */ 2098 2044 if (refcount_inc_not_zero(&node->refs)) { 2045 + btrfs_delayed_node_ref_tracker_alloc(node, 2046 + &delayed_node_trackers[count], 2047 + GFP_ATOMIC); 2099 2048 delayed_nodes[count] = node; 2100 2049 count++; 2101 2050 } ··· 2110 2053 2111 2054 for (int i = 0; i < count; i++) { 2112 2055 __btrfs_kill_delayed_node(delayed_nodes[i]); 2113 - btrfs_release_delayed_node(delayed_nodes[i]); 2056 + btrfs_release_delayed_node(delayed_nodes[i], 2057 + &delayed_node_trackers[i]); 2058 + btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]); 2114 2059 } 2115 2060 } 2116 2061 } ··· 2120 2061 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info) 2121 2062 { 2122 2063 struct btrfs_delayed_node *curr_node, *prev_node; 2064 + struct btrfs_ref_tracker curr_delayed_node_tracker, prev_delayed_node_tracker; 2123 2065 2124 - curr_node = btrfs_first_delayed_node(fs_info->delayed_root); 2066 + curr_node = btrfs_first_delayed_node(fs_info->delayed_root, 2067 + &curr_delayed_node_tracker); 2125 2068 while (curr_node) { 2126 2069 __btrfs_kill_delayed_node(curr_node); 2127 2070 2128 2071 prev_node = curr_node; 2129 - curr_node = btrfs_next_delayed_node(curr_node); 2130 - btrfs_release_delayed_node(prev_node); 2072 + prev_delayed_node_tracker = curr_delayed_node_tracker; 2073 + curr_node = btrfs_next_delayed_node(curr_node, &curr_delayed_node_tracker); 2074 + btrfs_release_delayed_node(prev_node, &prev_delayed_node_tracker); 2131 2075 } 2132 2076 } 2133 2077 ··· 2140 2078 { 2141 2079 struct btrfs_delayed_node *node; 2142 2080 struct btrfs_delayed_item *item; 2081 + struct btrfs_ref_tracker delayed_node_tracker; 2143 2082 2144 - node = btrfs_get_delayed_node(inode); 2083 + node = btrfs_get_delayed_node(inode, &delayed_node_tracker); 2145 2084 if (!node) 2146 2085 return; 2147 2086 ··· 2200 2137 * delete delayed items. 2201 2138 */ 2202 2139 ASSERT(refcount_read(&node->refs) > 1); 2140 + btrfs_delayed_node_ref_tracker_free(node, &delayed_node_tracker); 2203 2141 refcount_dec(&node->refs); 2204 2142 } 2205 2143 ··· 2211 2147 struct btrfs_delayed_node *node; 2212 2148 struct btrfs_delayed_item *item; 2213 2149 struct btrfs_delayed_item *next; 2150 + struct btrfs_ref_tracker delayed_node_tracker; 2214 2151 2215 - node = btrfs_get_delayed_node(inode); 2152 + node = btrfs_get_delayed_node(inode, &delayed_node_tracker); 2216 2153 if (!node) 2217 2154 return; 2218 2155 ··· 2245 2180 * delete delayed items. 2246 2181 */ 2247 2182 ASSERT(refcount_read(&node->refs) > 1); 2183 + btrfs_delayed_node_ref_tracker_free(node, &delayed_node_tracker); 2248 2184 refcount_dec(&node->refs); 2249 2185 }

+93

fs/btrfs/delayed-inode.h

··· 16 16 #include <linux/fs.h> 17 17 #include <linux/atomic.h> 18 18 #include <linux/refcount.h> 19 + #include <linux/ref_tracker.h> 19 20 #include "ctree.h" 20 21 21 22 struct btrfs_disk_key; ··· 43 42 atomic_t items_seq; /* for delayed items */ 44 43 int nodes; /* for delayed nodes */ 45 44 wait_queue_head_t wait; 45 + }; 46 + 47 + struct btrfs_ref_tracker_dir { 48 + #ifdef CONFIG_BTRFS_DEBUG 49 + struct ref_tracker_dir dir; 50 + #else 51 + struct {} tracker; 52 + #endif 53 + }; 54 + 55 + struct btrfs_ref_tracker { 56 + #ifdef CONFIG_BTRFS_DEBUG 57 + struct ref_tracker *tracker; 58 + #else 59 + struct {} tracker; 60 + #endif 46 61 }; 47 62 48 63 #define BTRFS_DELAYED_NODE_IN_LIST 0 ··· 95 78 * actual number of leaves we end up using. Protected by @mutex. 96 79 */ 97 80 u32 index_item_leaves; 81 + /* Track all references to this delayed node. */ 82 + struct btrfs_ref_tracker_dir ref_dir; 83 + /* Track delayed node reference stored in node list. */ 84 + struct btrfs_ref_tracker node_list_tracker; 85 + /* Track delayed node reference stored in inode cache. */ 86 + struct btrfs_ref_tracker inode_cache_tracker; 98 87 }; 99 88 100 89 struct btrfs_delayed_item { ··· 191 168 192 169 /* for debugging */ 193 170 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info); 171 + 172 + #define BTRFS_DELAYED_NODE_REF_TRACKER_QUARANTINE_COUNT 16 173 + #define BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT 16 174 + 175 + #ifdef CONFIG_BTRFS_DEBUG 176 + static inline void btrfs_delayed_node_ref_tracker_dir_init(struct btrfs_delayed_node *node) 177 + { 178 + if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER)) 179 + return; 180 + 181 + ref_tracker_dir_init(&node->ref_dir.dir, 182 + BTRFS_DELAYED_NODE_REF_TRACKER_QUARANTINE_COUNT, 183 + "delayed_node"); 184 + } 185 + 186 + static inline void btrfs_delayed_node_ref_tracker_dir_exit(struct btrfs_delayed_node *node) 187 + { 188 + if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER)) 189 + return; 190 + 191 + ref_tracker_dir_exit(&node->ref_dir.dir); 192 + } 193 + 194 + static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed_node *node) 195 + { 196 + if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER)) 197 + return; 198 + 199 + ref_tracker_dir_print(&node->ref_dir.dir, 200 + BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT); 201 + } 202 + 203 + static inline int btrfs_delayed_node_ref_tracker_alloc(struct btrfs_delayed_node *node, 204 + struct btrfs_ref_tracker *tracker, 205 + gfp_t gfp) 206 + { 207 + if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER)) 208 + return 0; 209 + 210 + return ref_tracker_alloc(&node->ref_dir.dir, &tracker->tracker, gfp); 211 + } 212 + 213 + static inline int btrfs_delayed_node_ref_tracker_free(struct btrfs_delayed_node *node, 214 + struct btrfs_ref_tracker *tracker) 215 + { 216 + if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER)) 217 + return 0; 218 + 219 + return ref_tracker_free(&node->ref_dir.dir, &tracker->tracker); 220 + } 221 + #else 222 + static inline void btrfs_delayed_node_ref_tracker_dir_init(struct btrfs_delayed_node *node) { } 223 + 224 + static inline void btrfs_delayed_node_ref_tracker_dir_exit(struct btrfs_delayed_node *node) { } 225 + 226 + static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed_node *node) { } 227 + 228 + static inline int btrfs_delayed_node_ref_tracker_alloc(struct btrfs_delayed_node *node, 229 + struct btrfs_ref_tracker *tracker, 230 + gfp_t gfp) 231 + { 232 + return 0; 233 + } 234 + 235 + static inline int btrfs_delayed_node_ref_tracker_free(struct btrfs_delayed_node *node, 236 + struct btrfs_ref_tracker *tracker) 237 + { 238 + return 0; 239 + } 240 + #endif 194 241 195 242 #endif

+6 -7

fs/btrfs/delayed-ref.c

··· 895 895 } 896 896 897 897 /* 898 - * Initialize the structure which represents a modification to a an extent. 898 + * Initialize the structure which represents a modification to an extent. 899 899 * 900 900 * @fs_info: Internal to the mounted filesystem mount structure. 901 901 * ··· 952 952 void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root, 953 953 bool skip_qgroup) 954 954 { 955 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 955 + #ifdef CONFIG_BTRFS_DEBUG 956 956 /* If @real_root not set, use @root as fallback */ 957 957 generic_ref->real_root = mod_root ?: generic_ref->ref_root; 958 958 #endif ··· 969 969 void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset, 970 970 u64 mod_root, bool skip_qgroup) 971 971 { 972 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 972 + #ifdef CONFIG_BTRFS_DEBUG 973 973 /* If @real_root not set, use @root as fallback */ 974 974 generic_ref->real_root = mod_root ?: generic_ref->ref_root; 975 975 #endif ··· 1251 1251 { 1252 1252 struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs; 1253 1253 struct btrfs_fs_info *fs_info = trans->fs_info; 1254 - bool testing = btrfs_is_testing(fs_info); 1255 1254 1256 1255 spin_lock(&delayed_refs->lock); 1257 1256 while (true) { ··· 1280 1281 spin_unlock(&delayed_refs->lock); 1281 1282 mutex_unlock(&head->mutex); 1282 1283 1283 - if (!testing && pin_bytes) { 1284 + if (!btrfs_is_testing(fs_info) && pin_bytes) { 1284 1285 struct btrfs_block_group *bg; 1285 1286 1286 1287 bg = btrfs_lookup_block_group(fs_info, head->bytenr); ··· 1311 1312 btrfs_error_unpin_extent_range(fs_info, head->bytenr, 1312 1313 head->bytenr + head->num_bytes - 1); 1313 1314 } 1314 - if (!testing) 1315 + if (!btrfs_is_testing(fs_info)) 1315 1316 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); 1316 1317 btrfs_put_delayed_ref_head(head); 1317 1318 cond_resched(); 1318 1319 spin_lock(&delayed_refs->lock); 1319 1320 } 1320 1321 1321 - if (!testing) 1322 + if (!btrfs_is_testing(fs_info)) 1322 1323 btrfs_qgroup_destroy_extent_records(trans); 1323 1324 1324 1325 spin_unlock(&delayed_refs->lock);

+5 -4

fs/btrfs/delayed-ref.h

··· 276 276 */ 277 277 bool skip_qgroup; 278 278 279 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 280 - /* Through which root is this modification. */ 281 - u64 real_root; 282 - #endif 283 279 u64 bytenr; 284 280 u64 num_bytes; 285 281 u64 owning_root; ··· 292 296 struct btrfs_data_ref data_ref; 293 297 struct btrfs_tree_ref tree_ref; 294 298 }; 299 + 300 + #ifdef CONFIG_BTRFS_DEBUG 301 + /* Through which root is this modification. */ 302 + u64 real_root; 303 + #endif 295 304 }; 296 305 297 306 extern struct kmem_cache *btrfs_delayed_ref_head_cachep;

+5 -7

fs/btrfs/dev-replace.c

··· 98 98 * We don't have a replace item or it's corrupted. If there is 99 99 * a replace target, fail the mount. 100 100 */ 101 - if (btrfs_find_device(fs_info->fs_devices, &args)) { 101 + if (unlikely(btrfs_find_device(fs_info->fs_devices, &args))) { 102 102 btrfs_err(fs_info, 103 103 "found replace target device without a valid replace item"); 104 104 return -EUCLEAN; ··· 158 158 * We don't have an active replace item but if there is a 159 159 * replace target, fail the mount. 160 160 */ 161 - if (btrfs_find_device(fs_info->fs_devices, &args)) { 161 + if (unlikely(btrfs_find_device(fs_info->fs_devices, &args))) { 162 162 btrfs_err(fs_info, 163 163 "replace without active item, run 'device scan --forget' on the target device"); 164 164 ret = -EUCLEAN; ··· 177 177 * allow 'btrfs dev replace_cancel' if src/tgt device is 178 178 * missing 179 179 */ 180 - if (!dev_replace->srcdev && 181 - !btrfs_test_opt(fs_info, DEGRADED)) { 180 + if (unlikely(!dev_replace->srcdev && !btrfs_test_opt(fs_info, DEGRADED))) { 182 181 ret = -EIO; 183 182 btrfs_warn(fs_info, 184 183 "cannot mount because device replace operation is ongoing and"); ··· 185 186 "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?", 186 187 src_devid); 187 188 } 188 - if (!dev_replace->tgtdev && 189 - !btrfs_test_opt(fs_info, DEGRADED)) { 189 + if (unlikely(!dev_replace->tgtdev && !btrfs_test_opt(fs_info, DEGRADED))) { 190 190 ret = -EIO; 191 191 btrfs_warn(fs_info, 192 192 "cannot mount because device replace operation is ongoing and"); ··· 635 637 break; 636 638 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 637 639 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: 638 - DEBUG_WARN("unexpected STARTED ot SUSPENDED dev-replace state"); 640 + DEBUG_WARN("unexpected STARTED or SUSPENDED dev-replace state"); 639 641 ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; 640 642 up_write(&dev_replace->rwsem); 641 643 goto leave;

+12

fs/btrfs/direct-io.c

··· 786 786 if (iov_iter_alignment(iter) & blocksize_mask) 787 787 return -EINVAL; 788 788 789 + /* 790 + * For bs > ps support, we heavily rely on large folios to make sure no 791 + * block will cross large folio boundaries. 792 + * 793 + * But memory provided by direct IO is only virtually contiguous, not 794 + * physically contiguous, and will break the btrfs' large folio requirement. 795 + * 796 + * So for bs > ps support, all direct IOs should fallback to buffered ones. 797 + */ 798 + if (fs_info->sectorsize > PAGE_SIZE) 799 + return -EINVAL; 800 + 789 801 return 0; 790 802 } 791 803

+49 -48

fs/btrfs/disk-io.c

··· 116 116 * detect blocks that either didn't get written at all or got written 117 117 * in the wrong place. 118 118 */ 119 - int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic) 119 + int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic) 120 120 { 121 121 if (!extent_buffer_uptodate(eb)) 122 122 return 0; ··· 370 370 ASSERT(check); 371 371 372 372 found_start = btrfs_header_bytenr(eb); 373 - if (found_start != eb->start) { 373 + if (unlikely(found_start != eb->start)) { 374 374 btrfs_err_rl(fs_info, 375 375 "bad tree block start, mirror %u want %llu have %llu", 376 376 eb->read_mirror, eb->start, found_start); 377 377 ret = -EIO; 378 378 goto out; 379 379 } 380 - if (check_tree_block_fsid(eb)) { 380 + if (unlikely(check_tree_block_fsid(eb))) { 381 381 btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u", 382 382 eb->start, eb->read_mirror); 383 383 ret = -EIO; 384 384 goto out; 385 385 } 386 386 found_level = btrfs_header_level(eb); 387 - if (found_level >= BTRFS_MAX_LEVEL) { 387 + if (unlikely(found_level >= BTRFS_MAX_LEVEL)) { 388 388 btrfs_err(fs_info, 389 389 "bad tree block level, mirror %u level %d on logical %llu", 390 390 eb->read_mirror, btrfs_header_level(eb), eb->start); ··· 404 404 CSUM_FMT_VALUE(csum_size, result), 405 405 btrfs_header_level(eb), 406 406 ignore_csum ? ", ignored" : ""); 407 - if (!ignore_csum) { 407 + if (unlikely(!ignore_csum)) { 408 408 ret = -EUCLEAN; 409 409 goto out; 410 410 } 411 411 } 412 412 413 - if (found_level != check->level) { 413 + if (unlikely(found_level != check->level)) { 414 414 btrfs_err(fs_info, 415 415 "level verify failed on logical %llu mirror %u wanted %u found %u", 416 416 eb->start, eb->read_mirror, check->level, found_level); ··· 639 639 u64 objectid, gfp_t flags) 640 640 { 641 641 struct btrfs_root *root; 642 - bool dummy = btrfs_is_testing(fs_info); 643 642 644 643 root = kzalloc(sizeof(*root), flags); 645 644 if (!root) ··· 695 696 root->log_transid_committed = -1; 696 697 btrfs_set_root_last_log_commit(root, 0); 697 698 root->anon_dev = 0; 698 - if (!dummy) { 699 + if (!btrfs_is_testing(fs_info)) { 699 700 btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages, 700 701 IO_TREE_ROOT_DIRTY_LOG_PAGES); 701 702 btrfs_extent_io_tree_init(fs_info, &root->log_csum_range, ··· 1046 1047 root->node = NULL; 1047 1048 goto fail; 1048 1049 } 1049 - if (!btrfs_buffer_uptodate(root->node, generation, 0)) { 1050 + if (unlikely(!btrfs_buffer_uptodate(root->node, generation, false))) { 1050 1051 ret = -EIO; 1051 1052 goto fail; 1052 1053 } ··· 1055 1056 * For real fs, and not log/reloc trees, root owner must 1056 1057 * match its root node owner 1057 1058 */ 1058 - if (!btrfs_is_testing(fs_info) && 1059 - btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID && 1060 - btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID && 1061 - btrfs_root_id(root) != btrfs_header_owner(root->node)) { 1059 + if (unlikely(!btrfs_is_testing(fs_info) && 1060 + btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID && 1061 + btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID && 1062 + btrfs_root_id(root) != btrfs_header_owner(root->node))) { 1062 1063 btrfs_crit(fs_info, 1063 1064 "root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu", 1064 1065 btrfs_root_id(root), root->node->start, ··· 1247 1248 1248 1249 if (fs_info->fs_devices) 1249 1250 btrfs_close_devices(fs_info->fs_devices); 1251 + btrfs_free_compress_wsm(fs_info); 1250 1252 percpu_counter_destroy(&fs_info->stats_read_blocks); 1251 1253 percpu_counter_destroy(&fs_info->dirty_metadata_bytes); 1252 1254 percpu_counter_destroy(&fs_info->delalloc_bytes); ··· 2058 2058 u64 bytenr = btrfs_super_log_root(disk_super); 2059 2059 int level = btrfs_super_log_root_level(disk_super); 2060 2060 2061 - if (fs_devices->rw_devices == 0) { 2061 + if (unlikely(fs_devices->rw_devices == 0)) { 2062 2062 btrfs_warn(fs_info, "log replay required on RO media"); 2063 2063 return -EIO; 2064 2064 } ··· 2079 2079 btrfs_put_root(log_tree_root); 2080 2080 return ret; 2081 2081 } 2082 - if (!extent_buffer_uptodate(log_tree_root->node)) { 2082 + if (unlikely(!extent_buffer_uptodate(log_tree_root->node))) { 2083 2083 btrfs_err(fs_info, "failed to read log tree"); 2084 2084 btrfs_put_root(log_tree_root); 2085 2085 return -EIO; ··· 2087 2087 2088 2088 /* returns with log_tree_root freed on success */ 2089 2089 ret = btrfs_recover_log_trees(log_tree_root); 2090 + btrfs_put_root(log_tree_root); 2090 2091 if (ret) { 2091 2092 btrfs_handle_fs_error(fs_info, ret, 2092 2093 "Failed to recover log tree"); 2093 - btrfs_put_root(log_tree_root); 2094 2094 return ret; 2095 2095 } 2096 2096 ··· 2324 2324 const u32 sectorsize = btrfs_super_sectorsize(sb); 2325 2325 u32 sys_array_size = btrfs_super_sys_array_size(sb); 2326 2326 2327 - if (sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { 2327 + if (unlikely(sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)) { 2328 2328 btrfs_err(fs_info, "system chunk array too big %u > %u", 2329 2329 sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); 2330 2330 return -EUCLEAN; ··· 2342 2342 disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur); 2343 2343 len = sizeof(*disk_key); 2344 2344 2345 - if (cur + len > sys_array_size) 2345 + if (unlikely(cur + len > sys_array_size)) 2346 2346 goto short_read; 2347 2347 cur += len; 2348 2348 2349 2349 btrfs_disk_key_to_cpu(&key, disk_key); 2350 - if (key.type != BTRFS_CHUNK_ITEM_KEY) { 2350 + if (unlikely(key.type != BTRFS_CHUNK_ITEM_KEY)) { 2351 2351 btrfs_err(fs_info, 2352 2352 "unexpected item type %u in sys_array at offset %u", 2353 2353 key.type, cur); ··· 2355 2355 } 2356 2356 chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur); 2357 2357 num_stripes = btrfs_stack_chunk_num_stripes(chunk); 2358 - if (cur + btrfs_chunk_item_size(num_stripes) > sys_array_size) 2358 + if (unlikely(cur + btrfs_chunk_item_size(num_stripes) > sys_array_size)) 2359 2359 goto short_read; 2360 2360 type = btrfs_stack_chunk_type(chunk); 2361 - if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) { 2361 + if (unlikely(!(type & BTRFS_BLOCK_GROUP_SYSTEM))) { 2362 2362 btrfs_err(fs_info, 2363 2363 "invalid chunk type %llu in sys_array at offset %u", 2364 2364 type, cur); ··· 2438 2438 ret = -EINVAL; 2439 2439 } 2440 2440 2441 - /* 2442 - * We only support at most 3 sectorsizes: 4K, PAGE_SIZE, MIN_BLOCKSIZE. 2443 - * 2444 - * For 4K page sized systems with non-debug builds, all 3 matches (4K). 2445 - * For 4K page sized systems with debug builds, there are two block sizes 2446 - * supported. (4K and 2K) 2447 - * 2448 - * We can support 16K sectorsize with 64K page size without problem, 2449 - * but such sectorsize/pagesize combination doesn't make much sense. 2450 - * 4K will be our future standard, PAGE_SIZE is supported from the very 2451 - * beginning. 2452 - */ 2453 - if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K && 2454 - sectorsize != PAGE_SIZE && 2455 - sectorsize != BTRFS_MIN_BLOCKSIZE)) { 2441 + if (!btrfs_supported_blocksize(sectorsize)) { 2456 2442 btrfs_err(fs_info, 2457 2443 "sectorsize %llu not yet supported for page size %lu", 2458 2444 sectorsize, PAGE_SIZE); ··· 2605 2619 ret = btrfs_validate_super(fs_info, sb, -1); 2606 2620 if (ret < 0) 2607 2621 goto out; 2608 - if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) { 2622 + if (unlikely(!btrfs_supported_super_csum(btrfs_super_csum_type(sb)))) { 2609 2623 ret = -EUCLEAN; 2610 2624 btrfs_err(fs_info, "invalid csum type, has %u want %u", 2611 2625 btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32); 2612 2626 goto out; 2613 2627 } 2614 - if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) { 2628 + if (unlikely(btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP)) { 2615 2629 ret = -EUCLEAN; 2616 2630 btrfs_err(fs_info, 2617 2631 "invalid incompat flags, has 0x%llx valid mask 0x%llx", ··· 2641 2655 root->node = NULL; 2642 2656 return ret; 2643 2657 } 2644 - if (!extent_buffer_uptodate(root->node)) { 2658 + if (unlikely(!extent_buffer_uptodate(root->node))) { 2645 2659 free_extent_buffer(root->node); 2646 2660 root->node = NULL; 2647 2661 return -EIO; ··· 3242 3256 } 3243 3257 3244 3258 /* 3245 - * Subpage runtime limitation on v1 cache. 3259 + * Subpage/bs > ps runtime limitation on v1 cache. 3246 3260 * 3247 - * V1 space cache still has some hard codeed PAGE_SIZE usage, while 3261 + * V1 space cache still has some hard coded PAGE_SIZE usage, while 3248 3262 * we're already defaulting to v2 cache, no need to bother v1 as it's 3249 3263 * going to be deprecated anyway. 3250 3264 */ 3251 - if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) { 3265 + if (fs_info->sectorsize != PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) { 3252 3266 btrfs_warn(fs_info, 3253 3267 "v1 space cache is not supported for page size %lu with sectorsize %u", 3254 3268 PAGE_SIZE, fs_info->sectorsize); 3269 + return -EINVAL; 3270 + } 3271 + if (fs_info->sectorsize > PAGE_SIZE && btrfs_fs_incompat(fs_info, RAID56)) { 3272 + btrfs_err(fs_info, 3273 + "RAID56 is not supported for page size %lu with sectorsize %u", 3274 + PAGE_SIZE, fs_info->sectorsize); 3255 3275 return -EINVAL; 3256 3276 } 3257 3277 ··· 3388 3396 fs_info->nodesize_bits = ilog2(nodesize); 3389 3397 fs_info->sectorsize = sectorsize; 3390 3398 fs_info->sectorsize_bits = ilog2(sectorsize); 3399 + fs_info->block_min_order = ilog2(round_up(sectorsize, PAGE_SIZE) >> PAGE_SHIFT); 3400 + fs_info->block_max_order = ilog2((BITS_PER_LONG << fs_info->sectorsize_bits) >> PAGE_SHIFT); 3391 3401 fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size; 3392 3402 fs_info->stripesize = stripesize; 3393 3403 fs_info->fs_devices->fs_info = fs_info; 3394 3404 3405 + if (fs_info->sectorsize > PAGE_SIZE) 3406 + btrfs_warn(fs_info, 3407 + "support for block size %u with page size %zu is experimental, some features may be missing", 3408 + fs_info->sectorsize, PAGE_SIZE); 3395 3409 /* 3396 3410 * Handle the space caching options appropriately now that we have the 3397 3411 * super block loaded and validated. ··· 3419 3421 */ 3420 3422 fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize); 3421 3423 3424 + ret = btrfs_alloc_compress_wsm(fs_info); 3425 + if (ret) 3426 + goto fail_sb_buffer; 3422 3427 ret = btrfs_init_workqueues(fs_info); 3423 3428 if (ret) 3424 3429 goto fail_sb_buffer; ··· 3469 3468 * below in btrfs_init_dev_replace(). 3470 3469 */ 3471 3470 btrfs_free_extra_devids(fs_devices); 3472 - if (!fs_devices->latest_dev->bdev) { 3471 + if (unlikely(!fs_devices->latest_dev->bdev)) { 3473 3472 btrfs_err(fs_info, "failed to read devices"); 3474 3473 ret = -EIO; 3475 3474 goto fail_tree_roots; ··· 3963 3962 * Checks last_flush_error of disks in order to determine the device 3964 3963 * state. 3965 3964 */ 3966 - if (errors_wait && !btrfs_check_rw_degradable(info, NULL)) 3965 + if (unlikely(errors_wait && !btrfs_check_rw_degradable(info, NULL))) 3967 3966 return -EIO; 3968 3967 3969 3968 return 0; ··· 4065 4064 btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); 4066 4065 4067 4066 ret = btrfs_validate_write_super(fs_info, sb); 4068 - if (ret < 0) { 4067 + if (unlikely(ret < 0)) { 4069 4068 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 4070 4069 btrfs_handle_fs_error(fs_info, -EUCLEAN, 4071 4070 "unexpected superblock corruption detected"); ··· 4076 4075 if (ret) 4077 4076 total_errors++; 4078 4077 } 4079 - if (total_errors > max_errors) { 4078 + if (unlikely(total_errors > max_errors)) { 4080 4079 btrfs_err(fs_info, "%d errors while writing supers", 4081 4080 total_errors); 4082 4081 mutex_unlock(&fs_info->fs_devices->device_list_mutex); ··· 4101 4100 total_errors++; 4102 4101 } 4103 4102 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 4104 - if (total_errors > max_errors) { 4103 + if (unlikely(total_errors > max_errors)) { 4105 4104 btrfs_handle_fs_error(fs_info, -EIO, 4106 4105 "%d errors while writing supers", 4107 4106 total_errors); ··· 4881 4880 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 4882 4881 if (ret < 0) 4883 4882 return ret; 4884 - if (ret == 0) { 4883 + if (unlikely(ret == 0)) { 4885 4884 /* 4886 4885 * Key with offset -1 found, there would have to exist a root 4887 4886 * with such id, but this is out of valid range.

+1 -2

fs/btrfs/disk-io.h

··· 106 106 void btrfs_put_root(struct btrfs_root *root); 107 107 void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans, 108 108 struct extent_buffer *buf); 109 - int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, 110 - int atomic); 109 + int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic); 111 110 int btrfs_read_extent_buffer(struct extent_buffer *buf, 112 111 const struct btrfs_tree_parent_check *check); 113 112

+1 -1

fs/btrfs/export.c

··· 174 174 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 175 175 if (ret < 0) 176 176 goto fail; 177 - if (ret == 0) { 177 + if (unlikely(ret == 0)) { 178 178 /* 179 179 * Key with offset of -1 found, there would have to exist an 180 180 * inode with such number or a root with such id.

+2 -2

fs/btrfs/extent-io-tree.c

··· 1237 1237 state = next_search_state(inserted_state, end); 1238 1238 /* 1239 1239 * If there's a next state, whether contiguous or not, we don't 1240 - * need to unlock and start search agian. If it's not contiguous 1240 + * need to unlock and start search again. If it's not contiguous 1241 1241 * we will end up here and try to allocate a prealloc state and insert. 1242 1242 */ 1243 1243 if (state) ··· 1664 1664 */ 1665 1665 u64 btrfs_count_range_bits(struct extent_io_tree *tree, 1666 1666 u64 *start, u64 search_end, u64 max_bytes, 1667 - u32 bits, int contig, 1667 + u32 bits, bool contig, 1668 1668 struct extent_state **cached_state) 1669 1669 { 1670 1670 struct extent_state *state = NULL;

+1 -1

fs/btrfs/extent-io-tree.h

··· 163 163 164 164 u64 btrfs_count_range_bits(struct extent_io_tree *tree, 165 165 u64 *start, u64 search_end, 166 - u64 max_bytes, u32 bits, int contig, 166 + u64 max_bytes, u32 bits, bool contig, 167 167 struct extent_state **cached_state); 168 168 169 169 void btrfs_free_extent_state(struct extent_state *state);

+53 -51

fs/btrfs/extent-tree.c

··· 325 325 326 326 /* 327 327 * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required, 328 - * is_data == BTRFS_REF_TYPE_DATA, data type is requiried, 328 + * is_data == BTRFS_REF_TYPE_DATA, data type is required, 329 329 * is_data == BTRFS_REF_TYPE_ANY, either type is OK. 330 330 */ 331 331 int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, ··· 879 879 ptr += btrfs_extent_inline_ref_size(type); 880 880 continue; 881 881 } 882 - if (type == BTRFS_REF_TYPE_INVALID) { 882 + if (unlikely(type == BTRFS_REF_TYPE_INVALID)) { 883 883 ret = -EUCLEAN; 884 884 goto out; 885 885 } ··· 1210 1210 * We're adding refs to a tree block we already own, this 1211 1211 * should not happen at all. 1212 1212 */ 1213 - if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1213 + if (unlikely(owner < BTRFS_FIRST_FREE_OBJECTID)) { 1214 1214 btrfs_print_leaf(path->nodes[0]); 1215 1215 btrfs_crit(trans->fs_info, 1216 1216 "adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u", ··· 2157 2157 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); 2158 2158 #endif 2159 2159 ret = __btrfs_run_delayed_refs(trans, min_bytes); 2160 - if (ret < 0) { 2160 + if (unlikely(ret < 0)) { 2161 2161 btrfs_abort_transaction(trans, ret); 2162 2162 return ret; 2163 2163 } ··· 2355 2355 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2356 2356 if (ret < 0) 2357 2357 return ret; 2358 - if (ret == 0) { 2358 + if (unlikely(ret == 0)) { 2359 2359 /* 2360 2360 * Key with offset -1 found, there would have to exist an extent 2361 2361 * item with such offset, but this is out of the valid range. ··· 2457 2457 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2458 2458 struct btrfs_root *root, 2459 2459 struct extent_buffer *buf, 2460 - int full_backref, int inc) 2460 + bool full_backref, bool inc) 2461 2461 { 2462 2462 struct btrfs_fs_info *fs_info = root->fs_info; 2463 2463 u64 parent; ··· 2543 2543 } 2544 2544 2545 2545 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2546 - struct extent_buffer *buf, int full_backref) 2546 + struct extent_buffer *buf, bool full_backref) 2547 2547 { 2548 - return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2548 + return __btrfs_mod_ref(trans, root, buf, full_backref, true); 2549 2549 } 2550 2550 2551 2551 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2552 - struct extent_buffer *buf, int full_backref) 2552 + struct extent_buffer *buf, bool full_backref) 2553 2553 { 2554 - return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2554 + return __btrfs_mod_ref(trans, root, buf, full_backref, false); 2555 2555 } 2556 2556 2557 2557 static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data) ··· 2760 2760 btrfs_put_block_group(cache); 2761 2761 total_unpinned = 0; 2762 2762 cache = btrfs_lookup_block_group(fs_info, start); 2763 - if (cache == NULL) { 2763 + if (unlikely(cache == NULL)) { 2764 2764 /* Logic error, something removed the block group. */ 2765 2765 ret = -EUCLEAN; 2766 2766 goto out; ··· 2982 2982 2983 2983 csum_root = btrfs_csum_root(trans->fs_info, bytenr); 2984 2984 ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes); 2985 - if (ret) { 2985 + if (unlikely(ret)) { 2986 2986 btrfs_abort_transaction(trans, ret); 2987 2987 return ret; 2988 2988 } 2989 2989 2990 2990 ret = btrfs_delete_raid_extent(trans, bytenr, num_bytes); 2991 - if (ret) { 2991 + if (unlikely(ret)) { 2992 2992 btrfs_abort_transaction(trans, ret); 2993 2993 return ret; 2994 2994 } 2995 2995 } 2996 2996 2997 2997 ret = btrfs_record_squota_delta(trans->fs_info, delta); 2998 - if (ret) { 2998 + if (unlikely(ret)) { 2999 2999 btrfs_abort_transaction(trans, ret); 3000 3000 return ret; 3001 3001 } 3002 3002 3003 3003 ret = btrfs_add_to_free_space_tree(trans, bytenr, num_bytes); 3004 - if (ret) { 3004 + if (unlikely(ret)) { 3005 3005 btrfs_abort_transaction(trans, ret); 3006 3006 return ret; 3007 3007 } ··· 3115 3115 3116 3116 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 3117 3117 3118 - if (!is_data && refs_to_drop != 1) { 3118 + if (unlikely(!is_data && refs_to_drop != 1)) { 3119 3119 btrfs_crit(info, 3120 3120 "invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u", 3121 3121 node->bytenr, refs_to_drop); ··· 3162 3162 } 3163 3163 3164 3164 if (!found_extent) { 3165 - if (iref) { 3165 + if (unlikely(iref)) { 3166 3166 abort_and_dump(trans, path, 3167 3167 "invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref", 3168 3168 path->slots[0]); ··· 3172 3172 /* Must be SHARED_* item, remove the backref first */ 3173 3173 ret = remove_extent_backref(trans, extent_root, path, 3174 3174 NULL, refs_to_drop, is_data); 3175 - if (ret) { 3175 + if (unlikely(ret)) { 3176 3176 btrfs_abort_transaction(trans, ret); 3177 3177 goto out; 3178 3178 } ··· 3221 3221 "umm, got %d back from search, was looking for %llu, slot %d", 3222 3222 ret, bytenr, path->slots[0]); 3223 3223 } 3224 - if (ret < 0) { 3224 + if (unlikely(ret < 0)) { 3225 3225 btrfs_abort_transaction(trans, ret); 3226 3226 goto out; 3227 3227 } ··· 3254 3254 key.type == BTRFS_EXTENT_ITEM_KEY) { 3255 3255 struct btrfs_tree_block_info *bi; 3256 3256 3257 - if (item_size < sizeof(*ei) + sizeof(*bi)) { 3257 + if (unlikely(item_size < sizeof(*ei) + sizeof(*bi))) { 3258 3258 abort_and_dump(trans, path, 3259 3259 "invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu", 3260 3260 key.objectid, key.type, key.offset, ··· 3268 3268 } 3269 3269 3270 3270 refs = btrfs_extent_refs(leaf, ei); 3271 - if (refs < refs_to_drop) { 3271 + if (unlikely(refs < refs_to_drop)) { 3272 3272 abort_and_dump(trans, path, 3273 3273 "trying to drop %d refs but we only have %llu for bytenr %llu slot %u", 3274 3274 refs_to_drop, refs, bytenr, path->slots[0]); ··· 3285 3285 * be updated by remove_extent_backref 3286 3286 */ 3287 3287 if (iref) { 3288 - if (!found_extent) { 3288 + if (unlikely(!found_extent)) { 3289 3289 abort_and_dump(trans, path, 3290 3290 "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u", 3291 3291 path->slots[0]); ··· 3298 3298 if (found_extent) { 3299 3299 ret = remove_extent_backref(trans, extent_root, path, 3300 3300 iref, refs_to_drop, is_data); 3301 - if (ret) { 3301 + if (unlikely(ret)) { 3302 3302 btrfs_abort_transaction(trans, ret); 3303 3303 goto out; 3304 3304 } ··· 3314 3314 3315 3315 /* In this branch refs == 1 */ 3316 3316 if (found_extent) { 3317 - if (is_data && refs_to_drop != 3318 - extent_data_ref_count(path, iref)) { 3317 + if (unlikely(is_data && refs_to_drop != 3318 + extent_data_ref_count(path, iref))) { 3319 3319 abort_and_dump(trans, path, 3320 3320 "invalid refs_to_drop, current refs %u refs_to_drop %u slot %u", 3321 3321 extent_data_ref_count(path, iref), ··· 3324 3324 goto out; 3325 3325 } 3326 3326 if (iref) { 3327 - if (path->slots[0] != extent_slot) { 3327 + if (unlikely(path->slots[0] != extent_slot)) { 3328 3328 abort_and_dump(trans, path, 3329 3329 "invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref", 3330 3330 key.objectid, key.type, ··· 3339 3339 * | extent_slot ||extent_slot + 1| 3340 3340 * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ] 3341 3341 */ 3342 - if (path->slots[0] != extent_slot + 1) { 3342 + if (unlikely(path->slots[0] != extent_slot + 1)) { 3343 3343 abort_and_dump(trans, path, 3344 3344 "invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM", 3345 3345 path->slots[0]); ··· 3363 3363 3364 3364 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3365 3365 num_to_del); 3366 - if (ret) { 3366 + if (unlikely(ret)) { 3367 3367 btrfs_abort_transaction(trans, ret); 3368 3368 goto out; 3369 3369 } ··· 4297 4297 } 4298 4298 4299 4299 static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info, 4300 - struct find_free_extent_ctl *ffe_ctl) 4300 + struct find_free_extent_ctl *ffe_ctl, 4301 + struct btrfs_space_info *space_info) 4301 4302 { 4302 4303 if (ffe_ctl->for_treelog) { 4303 4304 spin_lock(&fs_info->treelog_bg_lock); ··· 4316 4315 spin_lock(&fs_info->zone_active_bgs_lock); 4317 4316 list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { 4318 4317 /* 4319 - * No lock is OK here because avail is monotinically 4318 + * No lock is OK here because avail is monotonically 4320 4319 * decreasing, and this is just a hint. 4321 4320 */ 4322 4321 u64 avail = block_group->zone_capacity - block_group->alloc_offset; 4323 4322 4324 4323 if (block_group_bits(block_group, ffe_ctl->flags) && 4324 + block_group->space_info == space_info && 4325 4325 avail >= ffe_ctl->num_bytes) { 4326 4326 ffe_ctl->hint_byte = block_group->start; 4327 4327 break; ··· 4344 4342 return prepare_allocation_clustered(fs_info, ffe_ctl, 4345 4343 space_info, ins); 4346 4344 case BTRFS_EXTENT_ALLOC_ZONED: 4347 - return prepare_allocation_zoned(fs_info, ffe_ctl); 4345 + return prepare_allocation_zoned(fs_info, ffe_ctl, space_info); 4348 4346 default: 4349 4347 BUG(); 4350 4348 } ··· 5063 5061 if (IS_ERR(buf)) 5064 5062 return buf; 5065 5063 5066 - if (check_eb_lock_owner(buf)) { 5064 + if (unlikely(check_eb_lock_owner(buf))) { 5067 5065 free_extent_buffer(buf); 5068 5066 return ERR_PTR(-EUCLEAN); 5069 5067 } ··· 5472 5470 if (!(wc->flags[level] & flag)) { 5473 5471 ASSERT(path->locks[level]); 5474 5472 ret = btrfs_inc_ref(trans, root, eb, 1); 5475 - if (ret) { 5473 + if (unlikely(ret)) { 5476 5474 btrfs_abort_transaction(trans, ret); 5477 5475 return ret; 5478 5476 } 5479 5477 ret = btrfs_dec_ref(trans, root, eb, 0); 5480 - if (ret) { 5478 + if (unlikely(ret)) { 5481 5479 btrfs_abort_transaction(trans, ret); 5482 5480 return ret; 5483 5481 } 5484 5482 ret = btrfs_set_disk_extent_flags(trans, eb, flag); 5485 - if (ret) { 5483 + if (unlikely(ret)) { 5486 5484 btrfs_abort_transaction(trans, ret); 5487 5485 return ret; 5488 5486 } ··· 5584 5582 5585 5583 generation = btrfs_node_ptr_generation(path->nodes[level], path->slots[level]); 5586 5584 5587 - if (btrfs_buffer_uptodate(next, generation, 0)) 5585 + if (btrfs_buffer_uptodate(next, generation, false)) 5588 5586 return 0; 5589 5587 5590 5588 check.level = level - 1; ··· 5613 5611 * If we are UPDATE_BACKREF then we will not, we need to update our backrefs. 5614 5612 * 5615 5613 * If we are DROP_REFERENCE this will figure out if we need to drop our current 5616 - * reference, skipping it if we dropped it from a previous incompleted drop, or 5614 + * reference, skipping it if we dropped it from a previous uncompleted drop, or 5617 5615 * dropping it if we still have a reference to it. 5618 5616 */ 5619 5617 static int maybe_drop_reference(struct btrfs_trans_handle *trans, struct btrfs_root *root, ··· 5638 5636 ref.parent = path->nodes[level]->start; 5639 5637 } else { 5640 5638 ASSERT(btrfs_root_id(root) == btrfs_header_owner(path->nodes[level])); 5641 - if (btrfs_root_id(root) != btrfs_header_owner(path->nodes[level])) { 5639 + if (unlikely(btrfs_root_id(root) != btrfs_header_owner(path->nodes[level]))) { 5642 5640 btrfs_err(root->fs_info, "mismatched block owner"); 5643 5641 return -EIO; 5644 5642 } ··· 5760 5758 5761 5759 /* 5762 5760 * We have to walk down into this node, and if we're currently at the 5763 - * DROP_REFERNCE stage and this block is shared then we need to switch 5761 + * DROP_REFERENCE stage and this block is shared then we need to switch 5764 5762 * to the UPDATE_BACKREF stage in order to convert to FULL_BACKREF. 5765 5763 */ 5766 5764 if (wc->stage == DROP_REFERENCE && wc->refs[level - 1] > 1) { ··· 5774 5772 5775 5773 level--; 5776 5774 ASSERT(level == btrfs_header_level(next)); 5777 - if (level != btrfs_header_level(next)) { 5775 + if (unlikely(level != btrfs_header_level(next))) { 5778 5776 btrfs_err(root->fs_info, "mismatched level"); 5779 5777 ret = -EIO; 5780 5778 goto out_unlock; ··· 5885 5883 } 5886 5884 } else { 5887 5885 ret = btrfs_dec_ref(trans, root, eb, 0); 5888 - if (ret) { 5886 + if (unlikely(ret)) { 5889 5887 btrfs_abort_transaction(trans, ret); 5890 5888 return ret; 5891 5889 } ··· 5910 5908 if (eb == root->node) { 5911 5909 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5912 5910 parent = eb->start; 5913 - else if (btrfs_root_id(root) != btrfs_header_owner(eb)) 5911 + else if (unlikely(btrfs_root_id(root) != btrfs_header_owner(eb))) 5914 5912 goto owner_mismatch; 5915 5913 } else { 5916 5914 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5917 5915 parent = path->nodes[level + 1]->start; 5918 - else if (btrfs_root_id(root) != 5919 - btrfs_header_owner(path->nodes[level + 1])) 5916 + else if (unlikely(btrfs_root_id(root) != 5917 + btrfs_header_owner(path->nodes[level + 1]))) 5920 5918 goto owner_mismatch; 5921 5919 } 5922 5920 ··· 6051 6049 * also make sure backrefs for the shared block and all lower level 6052 6050 * blocks are properly updated. 6053 6051 * 6054 - * If called with for_reloc == 0, may exit early with -EAGAIN 6052 + * If called with for_reloc set, may exit early with -EAGAIN 6055 6053 */ 6056 - int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) 6054 + int btrfs_drop_snapshot(struct btrfs_root *root, bool update_ref, bool for_reloc) 6057 6055 { 6058 6056 const bool is_reloc_root = (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID); 6059 6057 struct btrfs_fs_info *fs_info = root->fs_info; ··· 6180 6178 while (1) { 6181 6179 6182 6180 ret = walk_down_tree(trans, root, path, wc); 6183 - if (ret < 0) { 6181 + if (unlikely(ret < 0)) { 6184 6182 btrfs_abort_transaction(trans, ret); 6185 6183 break; 6186 6184 } 6187 6185 6188 6186 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 6189 - if (ret < 0) { 6187 + if (unlikely(ret < 0)) { 6190 6188 btrfs_abort_transaction(trans, ret); 6191 6189 break; 6192 6190 } ··· 6213 6211 ret = btrfs_update_root(trans, tree_root, 6214 6212 &root->root_key, 6215 6213 root_item); 6216 - if (ret) { 6214 + if (unlikely(ret)) { 6217 6215 btrfs_abort_transaction(trans, ret); 6218 6216 goto out_end_trans; 6219 6217 } ··· 6249 6247 goto out_end_trans; 6250 6248 6251 6249 ret = btrfs_del_root(trans, &root->root_key); 6252 - if (ret) { 6250 + if (unlikely(ret)) { 6253 6251 btrfs_abort_transaction(trans, ret); 6254 6252 goto out_end_trans; 6255 6253 } ··· 6257 6255 if (!is_reloc_root) { 6258 6256 ret = btrfs_find_root(tree_root, &root->root_key, path, 6259 6257 NULL, NULL); 6260 - if (ret < 0) { 6258 + if (unlikely(ret < 0)) { 6261 6259 btrfs_abort_transaction(trans, ret); 6262 6260 goto out_end_trans; 6263 6261 } else if (ret > 0) {

+3 -4

fs/btrfs/extent-tree.h

··· 140 140 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 141 141 struct btrfs_key *ins, int is_data, int delalloc); 142 142 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 143 - struct extent_buffer *buf, int full_backref); 143 + struct extent_buffer *buf, bool full_backref); 144 144 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 145 - struct extent_buffer *buf, int full_backref); 145 + struct extent_buffer *buf, bool full_backref); 146 146 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 147 147 struct extent_buffer *eb, u64 flags); 148 148 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); ··· 155 155 const struct extent_buffer *eb); 156 156 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); 157 157 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref); 158 - int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, 159 - int for_reloc); 158 + int btrfs_drop_snapshot(struct btrfs_root *root, bool update_ref, bool for_reloc); 160 159 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 161 160 struct btrfs_root *root, 162 161 struct extent_buffer *node,

+99 -28

fs/btrfs/extent_io.c

··· 101 101 enum btrfs_compression_type compress_type; 102 102 u32 len_to_oe_boundary; 103 103 blk_opf_t opf; 104 + /* 105 + * For data read bios, we attempt to optimize csum lookups if the extent 106 + * generation is older than the current one. To make this possible, we 107 + * need to track the maximum generation of an extent in a bio_ctrl to 108 + * make the decision when submitting the bio. 109 + * 110 + * The pattern between do_readpage(), submit_one_bio() and 111 + * submit_extent_folio() is quite subtle, so tracking this is tricky. 112 + * 113 + * As we process extent E, we might submit a bio with existing built up 114 + * extents before adding E to a new bio, or we might just add E to the 115 + * bio. As a result, E's generation could apply to the current bio or 116 + * to the next one, so we need to be careful to update the bio_ctrl's 117 + * generation with E's only when we are sure E is added to bio_ctrl->bbio 118 + * in submit_extent_folio(). 119 + * 120 + * See the comment in btrfs_lookup_bio_sums() for more detail on the 121 + * need for this optimization. 122 + */ 123 + u64 generation; 104 124 btrfs_bio_end_io_t end_io_func; 105 125 struct writeback_control *wbc; 106 126 ··· 151 131 u64 last_em_start; 152 132 }; 153 133 134 + /* 135 + * Helper to set the csum search commit root option for a bio_ctrl's bbio 136 + * before submitting the bio. 137 + * 138 + * Only for use by submit_one_bio(). 139 + */ 140 + static void bio_set_csum_search_commit_root(struct btrfs_bio_ctrl *bio_ctrl) 141 + { 142 + struct btrfs_bio *bbio = bio_ctrl->bbio; 143 + 144 + ASSERT(bbio); 145 + 146 + if (!(btrfs_op(&bbio->bio) == BTRFS_MAP_READ && is_data_inode(bbio->inode))) 147 + return; 148 + 149 + bio_ctrl->bbio->csum_search_commit_root = 150 + (bio_ctrl->generation && 151 + bio_ctrl->generation < btrfs_get_fs_generation(bbio->inode->root->fs_info)); 152 + } 153 + 154 154 static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) 155 155 { 156 156 struct btrfs_bio *bbio = bio_ctrl->bbio; ··· 181 141 /* Caller should ensure the bio has at least some range added */ 182 142 ASSERT(bbio->bio.bi_iter.bi_size); 183 143 144 + bio_set_csum_search_commit_root(bio_ctrl); 145 + 184 146 if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ && 185 147 bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) 186 148 btrfs_submit_compressed_read(bbio); ··· 191 149 192 150 /* The bbio is owned by the end_io handler now */ 193 151 bio_ctrl->bbio = NULL; 152 + /* 153 + * We used the generation to decide whether to lookup csums in the 154 + * commit_root or not when we called bio_set_csum_search_commit_root() 155 + * above. Now, reset the generation for the next bio. 156 + */ 157 + bio_ctrl->generation = 0; 194 158 } 195 159 196 160 /* ··· 393 345 /* step one, find a bunch of delalloc bytes starting at start */ 394 346 delalloc_start = *start; 395 347 delalloc_end = 0; 348 + 349 + /* 350 + * If @max_bytes is smaller than a block, btrfs_find_delalloc_range() can 351 + * return early without handling any dirty ranges. 352 + */ 353 + ASSERT(max_bytes >= fs_info->sectorsize); 354 + 396 355 found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end, 397 356 max_bytes, &cached_state); 398 357 if (!found || delalloc_end <= *start || delalloc_start > orig_end) { ··· 425 370 if (delalloc_end + 1 - delalloc_start > max_bytes) 426 371 delalloc_end = delalloc_start + max_bytes - 1; 427 372 428 - /* step two, lock all the folioss after the folios that has start */ 373 + /* step two, lock all the folios after the folios that has start */ 429 374 ret = lock_delalloc_folios(inode, locked_folio, delalloc_start, 430 375 delalloc_end); 431 376 ASSERT(!ret || ret == -EAGAIN); 432 377 if (ret == -EAGAIN) { 433 - /* some of the folios are gone, lets avoid looping by 434 - * shortening the size of the delalloc range we're searching 378 + /* 379 + * Some of the folios are gone, lets avoid looping by 380 + * shortening the size of the delalloc range we're searching. 435 381 */ 436 382 btrfs_free_extent_state(cached_state); 437 383 cached_state = NULL; 438 384 if (!loops) { 439 - max_bytes = PAGE_SIZE; 385 + max_bytes = fs_info->sectorsize; 440 386 loops = 1; 441 387 goto again; 442 388 } else { ··· 626 570 * Populate every free slot in a provided array with folios using GFP_NOFS. 627 571 * 628 572 * @nr_folios: number of folios to allocate 573 + * @order: the order of the folios to be allocated 629 574 * @folio_array: the array to fill with folios; any existing non-NULL entries in 630 575 * the array will be skipped 631 576 * ··· 634 577 * -ENOMEM otherwise, the partially allocated folios would be freed and 635 578 * the array slots zeroed 636 579 */ 637 - int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array) 580 + int btrfs_alloc_folio_array(unsigned int nr_folios, unsigned int order, 581 + struct folio **folio_array) 638 582 { 639 583 for (int i = 0; i < nr_folios; i++) { 640 584 if (folio_array[i]) 641 585 continue; 642 - folio_array[i] = folio_alloc(GFP_NOFS, 0); 586 + folio_array[i] = folio_alloc(GFP_NOFS, order); 643 587 if (!folio_array[i]) 644 588 goto error; 645 589 } ··· 649 591 for (int i = 0; i < nr_folios; i++) { 650 592 if (folio_array[i]) 651 593 folio_put(folio_array[i]); 594 + folio_array[i] = NULL; 652 595 } 653 596 return -ENOMEM; 654 597 } ··· 778 719 * @size: portion of page that we want to write to 779 720 * @pg_offset: offset of the new bio or to check whether we are adding 780 721 * a contiguous page to the previous one 722 + * @read_em_generation: generation of the extent_map we are submitting 723 + * (only used for read) 781 724 * 782 725 * The will either add the page into the existing @bio_ctrl->bbio, or allocate a 783 726 * new one in @bio_ctrl->bbio. 784 - * The mirror number for this IO should already be initizlied in 727 + * The mirror number for this IO should already be initialized in 785 728 * @bio_ctrl->mirror_num. 786 729 */ 787 730 static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl, 788 731 u64 disk_bytenr, struct folio *folio, 789 - size_t size, unsigned long pg_offset) 732 + size_t size, unsigned long pg_offset, 733 + u64 read_em_generation) 790 734 { 791 735 struct btrfs_inode *inode = folio_to_inode(folio); 792 736 loff_t file_offset = folio_pos(folio) + pg_offset; ··· 820 758 submit_one_bio(bio_ctrl); 821 759 continue; 822 760 } 761 + /* 762 + * Now that the folio is definitely added to the bio, include its 763 + * generation in the max generation calculation. 764 + */ 765 + bio_ctrl->generation = max(bio_ctrl->generation, read_em_generation); 823 766 bio_ctrl->next_file_offset += len; 824 767 825 768 if (bio_ctrl->wbc) ··· 1027 960 bool force_bio_submit = false; 1028 961 u64 disk_bytenr; 1029 962 u64 block_start; 963 + u64 em_gen; 1030 964 1031 965 ASSERT(IS_ALIGNED(cur, fs_info->sectorsize)); 1032 966 if (cur >= last_byte) { ··· 1111 1043 1112 1044 bio_ctrl->last_em_start = em->start; 1113 1045 1046 + em_gen = em->generation; 1114 1047 btrfs_free_extent_map(em); 1115 1048 em = NULL; 1116 1049 ··· 1135 1066 if (force_bio_submit) 1136 1067 submit_one_bio(bio_ctrl); 1137 1068 submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize, 1138 - pg_offset); 1069 + pg_offset, em_gen); 1139 1070 } 1140 1071 return 0; 1141 1072 } ··· 1669 1600 ASSERT(folio_test_writeback(folio)); 1670 1601 1671 1602 submit_extent_folio(bio_ctrl, disk_bytenr, folio, 1672 - sectorsize, filepos - folio_pos(folio)); 1603 + sectorsize, filepos - folio_pos(folio), 0); 1673 1604 return 0; 1674 1605 } 1675 1606 ··· 1690 1621 struct btrfs_fs_info *fs_info = inode->root->fs_info; 1691 1622 unsigned long range_bitmap = 0; 1692 1623 bool submitted_io = false; 1693 - bool error = false; 1624 + int found_error = 0; 1694 1625 const u64 folio_start = folio_pos(folio); 1695 1626 const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 1696 1627 u64 cur; ··· 1754 1685 */ 1755 1686 btrfs_mark_ordered_io_finished(inode, folio, cur, 1756 1687 fs_info->sectorsize, false); 1757 - error = true; 1688 + if (!found_error) 1689 + found_error = ret; 1758 1690 continue; 1759 1691 } 1760 1692 submitted_io = true; ··· 1772 1702 * If we hit any error, the corresponding sector will have its dirty 1773 1703 * flag cleared and writeback finished, thus no need to handle the error case. 1774 1704 */ 1775 - if (!submitted_io && !error) { 1705 + if (!submitted_io && !found_error) { 1776 1706 btrfs_folio_set_writeback(fs_info, folio, start, len); 1777 1707 btrfs_folio_clear_writeback(fs_info, folio, start, len); 1778 1708 } 1779 - return ret; 1709 + return found_error; 1780 1710 } 1781 1711 1782 1712 /* ··· 2237 2167 * @fs_info: The fs_info for this file system. 2238 2168 * @start: The offset of the range to start waiting on writeback. 2239 2169 * @end: The end of the range, inclusive. This is meant to be used in 2240 - * conjuction with wait_marked_extents, so this will usually be 2170 + * conjunction with wait_marked_extents, so this will usually be 2241 2171 * the_next_eb->start - 1. 2242 2172 */ 2243 2173 void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start, ··· 2507 2437 * In above case, [32K, 96K) is asynchronously submitted 2508 2438 * for compression, and [124K, 128K) needs to be written back. 2509 2439 * 2510 - * If we didn't wait wrtiteback for page 64K, [128K, 128K) 2440 + * If we didn't wait writeback for page 64K, [128K, 128K) 2511 2441 * won't be submitted as the page still has writeback flag 2512 2442 * and will be skipped in the next check. 2513 2443 * ··· 2991 2921 { 2992 2922 const int num_folios = num_extent_folios(eb); 2993 2923 2994 - /* We canont use num_extent_folios() as loop bound as eb->folios changes. */ 2924 + /* We cannot use num_extent_folios() as loop bound as eb->folios changes. */ 2995 2925 for (int i = 0; i < num_folios; i++) { 2996 2926 ASSERT(eb->folios[i]); 2997 2927 detach_extent_buffer_folio(eb, eb->folios[i]); ··· 3238 3168 */ 3239 3169 static bool check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start) 3240 3170 { 3241 - if (!IS_ALIGNED(start, fs_info->sectorsize)) { 3171 + const u32 nodesize = fs_info->nodesize; 3172 + 3173 + if (unlikely(!IS_ALIGNED(start, fs_info->sectorsize))) { 3242 3174 btrfs_err(fs_info, "bad tree block start %llu", start); 3243 3175 return true; 3244 3176 } 3245 3177 3246 - if (fs_info->nodesize < PAGE_SIZE && !IS_ALIGNED(start, fs_info->nodesize)) { 3178 + if (unlikely(nodesize < PAGE_SIZE && !IS_ALIGNED(start, nodesize))) { 3247 3179 btrfs_err(fs_info, 3248 3180 "tree block is not nodesize aligned, start %llu nodesize %u", 3249 - start, fs_info->nodesize); 3181 + start, nodesize); 3250 3182 return true; 3251 3183 } 3252 - if (fs_info->nodesize >= PAGE_SIZE && 3253 - !PAGE_ALIGNED(start)) { 3184 + if (unlikely(nodesize >= PAGE_SIZE && !PAGE_ALIGNED(start))) { 3254 3185 btrfs_err(fs_info, 3255 3186 "tree block is not page aligned, start %llu nodesize %u", 3256 - start, fs_info->nodesize); 3187 + start, nodesize); 3257 3188 return true; 3258 3189 } 3259 - if (!IS_ALIGNED(start, fs_info->nodesize) && 3260 - !test_and_set_bit(BTRFS_FS_UNALIGNED_TREE_BLOCK, &fs_info->flags)) { 3190 + if (unlikely(!IS_ALIGNED(start, nodesize) && 3191 + !test_and_set_bit(BTRFS_FS_UNALIGNED_TREE_BLOCK, &fs_info->flags))) { 3261 3192 btrfs_warn(fs_info, 3262 3193 "tree block not nodesize aligned, start %llu nodesize %u, can be resolved by a full metadata balance", 3263 - start, fs_info->nodesize); 3194 + start, nodesize); 3264 3195 } 3265 3196 return false; 3266 3197 } ··· 3880 3809 return ret; 3881 3810 3882 3811 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING, TASK_UNINTERRUPTIBLE); 3883 - if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3812 + if (unlikely(!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) 3884 3813 return -EIO; 3885 3814 return 0; 3886 3815 } ··· 4556 4485 if (IS_ERR(eb)) 4557 4486 return; 4558 4487 4559 - if (btrfs_buffer_uptodate(eb, gen, 1)) { 4488 + if (btrfs_buffer_uptodate(eb, gen, true)) { 4560 4489 free_extent_buffer(eb); 4561 4490 return; 4562 4491 }

+2 -1

fs/btrfs/extent_io.h

··· 366 366 367 367 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, 368 368 bool nofail); 369 - int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array); 369 + int btrfs_alloc_folio_array(unsigned int nr_folios, unsigned int order, 370 + struct folio **folio_array); 370 371 371 372 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 372 373 bool find_lock_delalloc_range(struct inode *inode,

+11 -11

fs/btrfs/extent_map.c

··· 460 460 461 461 static inline void setup_extent_mapping(struct btrfs_inode *inode, 462 462 struct extent_map *em, 463 - int modified) 463 + bool modified) 464 464 { 465 465 refcount_inc(&em->refs); 466 466 ··· 486 486 * taken, or a reference dropped if the merge attempt was successful. 487 487 */ 488 488 static int add_extent_mapping(struct btrfs_inode *inode, 489 - struct extent_map *em, int modified) 489 + struct extent_map *em, bool modified) 490 490 { 491 491 struct extent_map_tree *tree = &inode->extent_tree; 492 492 struct btrfs_root *root = inode->root; ··· 509 509 } 510 510 511 511 static struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 512 - u64 start, u64 len, int strict) 512 + u64 start, u64 len, bool strict) 513 513 { 514 514 struct extent_map *em; 515 515 struct rb_node *rb_node; ··· 548 548 struct extent_map *btrfs_lookup_extent_mapping(struct extent_map_tree *tree, 549 549 u64 start, u64 len) 550 550 { 551 - return lookup_extent_mapping(tree, start, len, 1); 551 + return lookup_extent_mapping(tree, start, len, true); 552 552 } 553 553 554 554 /* ··· 566 566 struct extent_map *btrfs_search_extent_mapping(struct extent_map_tree *tree, 567 567 u64 start, u64 len) 568 568 { 569 - return lookup_extent_mapping(tree, start, len, 0); 569 + return lookup_extent_mapping(tree, start, len, false); 570 570 } 571 571 572 572 /* ··· 594 594 static void replace_extent_mapping(struct btrfs_inode *inode, 595 595 struct extent_map *cur, 596 596 struct extent_map *new, 597 - int modified) 597 + bool modified) 598 598 { 599 599 struct btrfs_fs_info *fs_info = inode->root->fs_info; 600 600 struct extent_map_tree *tree = &inode->extent_tree; ··· 670 670 em->len = end - start; 671 671 if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) 672 672 em->offset += start_diff; 673 - return add_extent_mapping(inode, em, 0); 673 + return add_extent_mapping(inode, em, false); 674 674 } 675 675 676 676 /* ··· 707 707 if (em->disk_bytenr == EXTENT_MAP_INLINE) 708 708 ASSERT(em->start == 0); 709 709 710 - ret = add_extent_mapping(inode, em, 0); 710 + ret = add_extent_mapping(inode, em, false); 711 711 /* it is possible that someone inserted the extent into the tree 712 712 * while we had the lock dropped. It is also possible that 713 713 * an overlapping map exists in the tree ··· 1057 1057 btrfs_lock_extent(&inode->io_tree, start, start + len - 1, NULL); 1058 1058 write_lock(&em_tree->lock); 1059 1059 em = btrfs_lookup_extent_mapping(em_tree, start, len); 1060 - if (!em) { 1060 + if (unlikely(!em)) { 1061 1061 ret = -EIO; 1062 1062 goto out_unlock; 1063 1063 } ··· 1082 1082 split_pre->flags = flags; 1083 1083 split_pre->generation = em->generation; 1084 1084 1085 - replace_extent_mapping(inode, em, split_pre, 1); 1085 + replace_extent_mapping(inode, em, split_pre, true); 1086 1086 1087 1087 /* 1088 1088 * Now we only have an extent_map at: ··· 1098 1098 split_mid->ram_bytes = split_mid->len; 1099 1099 split_mid->flags = flags; 1100 1100 split_mid->generation = em->generation; 1101 - add_extent_mapping(inode, split_mid, 1); 1101 + add_extent_mapping(inode, split_mid, true); 1102 1102 1103 1103 /* Once for us */ 1104 1104 btrfs_free_extent_map(em);

+1 -1

fs/btrfs/fiemap.c

··· 153 153 if (cache_end > offset) { 154 154 if (offset == cache->offset) { 155 155 /* 156 - * We cached a dealloc range (found in the io tree) for 156 + * We cached a delalloc range (found in the io tree) for 157 157 * a hole or prealloc extent and we have now found a 158 158 * file extent item for the same offset. What we have 159 159 * now is more recent and up to date, so discard what

+39 -21

fs/btrfs/file-item.c

··· 397 397 path->skip_locking = 1; 398 398 } 399 399 400 + /* 401 + * If we are searching for a csum of an extent from a past 402 + * transaction, we can search in the commit root and reduce 403 + * lock contention on the csum tree extent buffers. 404 + * 405 + * This is important because that lock is an rwsem which gets 406 + * pretty heavy write load under memory pressure and sustained 407 + * csum overwrites, unlike the commit_root_sem. (Memory pressure 408 + * makes us writeback the nodes multiple times per transaction, 409 + * which makes us cow them each time, taking the write lock.) 410 + * 411 + * Due to how rwsem is implemented, there is a possible 412 + * priority inversion where the readers holding the lock don't 413 + * get scheduled (say they're in a cgroup stuck in heavy reclaim) 414 + * which then blocks writers, including transaction commit. By 415 + * using a semaphore with fewer writers (only a commit switching 416 + * the roots), we make this issue less likely. 417 + * 418 + * Note that we don't rely on btrfs_search_slot to lock the 419 + * commit root csum. We call search_slot multiple times, which would 420 + * create a potential race where a commit comes in between searches 421 + * while we are not holding the commit_root_sem, and we get csums 422 + * from across transactions. 423 + */ 424 + if (bbio->csum_search_commit_root) { 425 + path->search_commit_root = 1; 426 + path->skip_locking = 1; 427 + down_read(&fs_info->commit_root_sem); 428 + } 429 + 400 430 while (bio_offset < orig_len) { 401 431 int count; 402 432 u64 cur_disk_bytenr = orig_disk_bytenr + bio_offset; ··· 472 442 bio_offset += count * sectorsize; 473 443 } 474 444 445 + if (bbio->csum_search_commit_root) 446 + up_read(&fs_info->commit_root_sem); 475 447 return ret; 476 448 } 477 449 ··· 775 743 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 776 744 struct bio *bio = &bbio->bio; 777 745 struct btrfs_ordered_sum *sums; 778 - char *data; 779 - struct bvec_iter iter; 780 - struct bio_vec bvec; 746 + struct bvec_iter iter = bio->bi_iter; 747 + phys_addr_t paddr; 748 + const u32 blocksize = fs_info->sectorsize; 781 749 int index; 782 - unsigned int blockcount; 783 - int i; 784 750 unsigned nofs_flag; 785 751 786 752 nofs_flag = memalloc_nofs_save(); ··· 797 767 798 768 shash->tfm = fs_info->csum_shash; 799 769 800 - bio_for_each_segment(bvec, bio, iter) { 801 - blockcount = BTRFS_BYTES_TO_BLKS(fs_info, 802 - bvec.bv_len + fs_info->sectorsize 803 - - 1); 804 - 805 - for (i = 0; i < blockcount; i++) { 806 - data = bvec_kmap_local(&bvec); 807 - crypto_shash_digest(shash, 808 - data + (i * fs_info->sectorsize), 809 - fs_info->sectorsize, 810 - sums->sums + index); 811 - kunmap_local(data); 812 - index += fs_info->csum_size; 813 - } 814 - 770 + btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) { 771 + btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index); 772 + index += fs_info->csum_size; 815 773 } 816 774 817 775 bbio->sums = sums; ··· 1011 993 * item changed size or key 1012 994 */ 1013 995 ret = btrfs_split_item(trans, root, path, &key, offset); 1014 - if (ret && ret != -EAGAIN) { 996 + if (unlikely(ret && ret != -EAGAIN)) { 1015 997 btrfs_abort_transaction(trans, ret); 1016 998 break; 1017 999 }

+24 -25

fs/btrfs/file.c

··· 327 327 args->start - extent_offset, 328 328 0, false); 329 329 ret = btrfs_inc_extent_ref(trans, &ref); 330 - if (ret) { 330 + if (unlikely(ret)) { 331 331 btrfs_abort_transaction(trans, ret); 332 332 break; 333 333 } ··· 426 426 key.offset - extent_offset, 427 427 0, false); 428 428 ret = btrfs_free_extent(trans, &ref); 429 - if (ret) { 429 + if (unlikely(ret)) { 430 430 btrfs_abort_transaction(trans, ret); 431 431 break; 432 432 } ··· 443 443 444 444 ret = btrfs_del_items(trans, root, path, del_slot, 445 445 del_nr); 446 - if (ret) { 446 + if (unlikely(ret)) { 447 447 btrfs_abort_transaction(trans, ret); 448 448 break; 449 449 } ··· 587 587 588 588 leaf = path->nodes[0]; 589 589 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 590 - if (key.objectid != ino || 591 - key.type != BTRFS_EXTENT_DATA_KEY) { 590 + if (unlikely(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)) { 592 591 ret = -EINVAL; 593 592 btrfs_abort_transaction(trans, ret); 594 593 goto out; 595 594 } 596 595 fi = btrfs_item_ptr(leaf, path->slots[0], 597 596 struct btrfs_file_extent_item); 598 - if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) { 597 + if (unlikely(btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC)) { 599 598 ret = -EINVAL; 600 599 btrfs_abort_transaction(trans, ret); 601 600 goto out; 602 601 } 603 602 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 604 - if (key.offset > start || extent_end < end) { 603 + if (unlikely(key.offset > start || extent_end < end)) { 605 604 ret = -EINVAL; 606 605 btrfs_abort_transaction(trans, ret); 607 606 goto out; ··· 675 676 btrfs_release_path(path); 676 677 goto again; 677 678 } 678 - if (ret < 0) { 679 + if (unlikely(ret < 0)) { 679 680 btrfs_abort_transaction(trans, ret); 680 681 goto out; 681 682 } ··· 703 704 ref.ref_root = btrfs_root_id(root); 704 705 btrfs_init_data_ref(&ref, ino, orig_offset, 0, false); 705 706 ret = btrfs_inc_extent_ref(trans, &ref); 706 - if (ret) { 707 + if (unlikely(ret)) { 707 708 btrfs_abort_transaction(trans, ret); 708 709 goto out; 709 710 } ··· 711 712 if (split == start) { 712 713 key.offset = start; 713 714 } else { 714 - if (start != key.offset) { 715 + if (unlikely(start != key.offset)) { 715 716 ret = -EINVAL; 716 717 btrfs_abort_transaction(trans, ret); 717 718 goto out; ··· 743 744 del_slot = path->slots[0] + 1; 744 745 del_nr++; 745 746 ret = btrfs_free_extent(trans, &ref); 746 - if (ret) { 747 + if (unlikely(ret)) { 747 748 btrfs_abort_transaction(trans, ret); 748 749 goto out; 749 750 } ··· 761 762 del_slot = path->slots[0]; 762 763 del_nr++; 763 764 ret = btrfs_free_extent(trans, &ref); 764 - if (ret) { 765 + if (unlikely(ret)) { 765 766 btrfs_abort_transaction(trans, ret); 766 767 goto out; 767 768 } ··· 782 783 extent_end - key.offset); 783 784 784 785 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 785 - if (ret < 0) { 786 + if (unlikely(ret < 0)) { 786 787 btrfs_abort_transaction(trans, ret); 787 788 goto out; 788 789 } ··· 814 815 if (ret) 815 816 return ret; 816 817 folio_lock(folio); 817 - if (!folio_test_uptodate(folio)) { 818 + if (unlikely(!folio_test_uptodate(folio))) { 818 819 folio_unlock(folio); 819 820 return -EIO; 820 821 } ··· 969 970 * Return: 970 971 * > 0 If we can nocow, and updates @write_bytes. 971 972 * 0 If we can't do a nocow write. 972 - * -EAGAIN If we can't do a nocow write because snapshoting of the inode's 973 + * -EAGAIN If we can't do a nocow write because snapshotting of the inode's 973 974 * root is in progress or because we are in a non-blocking IO 974 975 * context and need to block (@nowait is true). 975 976 * < 0 If an error happened. ··· 2459 2460 * got EOPNOTSUPP via prealloc then we messed up and 2460 2461 * need to abort. 2461 2462 */ 2462 - if (ret && 2463 - (ret != -EOPNOTSUPP || 2464 - (extent_info && extent_info->is_new_extent))) 2463 + if (unlikely(ret && 2464 + (ret != -EOPNOTSUPP || 2465 + (extent_info && extent_info->is_new_extent)))) 2465 2466 btrfs_abort_transaction(trans, ret); 2466 2467 break; 2467 2468 } ··· 2472 2473 cur_offset < ino_size) { 2473 2474 ret = fill_holes(trans, inode, path, cur_offset, 2474 2475 drop_args.drop_end); 2475 - if (ret) { 2476 + if (unlikely(ret)) { 2476 2477 /* 2477 2478 * If we failed then we didn't insert our hole 2478 2479 * entries for the area we dropped, so now the ··· 2492 2493 ret = btrfs_inode_clear_file_extent_range(inode, 2493 2494 cur_offset, 2494 2495 drop_args.drop_end - cur_offset); 2495 - if (ret) { 2496 + if (unlikely(ret)) { 2496 2497 /* 2497 2498 * We couldn't clear our area, so we could 2498 2499 * presumably adjust up and corrupt the fs, so ··· 2511 2512 ret = btrfs_insert_replace_extent(trans, inode, path, 2512 2513 extent_info, replace_len, 2513 2514 drop_args.bytes_found); 2514 - if (ret) { 2515 + if (unlikely(ret)) { 2515 2516 btrfs_abort_transaction(trans, ret); 2516 2517 break; 2517 2518 } ··· 2606 2607 cur_offset < drop_args.drop_end) { 2607 2608 ret = fill_holes(trans, inode, path, cur_offset, 2608 2609 drop_args.drop_end); 2609 - if (ret) { 2610 + if (unlikely(ret)) { 2610 2611 /* Same comment as above. */ 2611 2612 btrfs_abort_transaction(trans, ret); 2612 2613 goto out_trans; ··· 2615 2616 /* See the comment in the loop above for the reasoning here. */ 2616 2617 ret = btrfs_inode_clear_file_extent_range(inode, cur_offset, 2617 2618 drop_args.drop_end - cur_offset); 2618 - if (ret) { 2619 + if (unlikely(ret)) { 2619 2620 btrfs_abort_transaction(trans, ret); 2620 2621 goto out_trans; 2621 2622 } ··· 2625 2626 ret = btrfs_insert_replace_extent(trans, inode, path, 2626 2627 extent_info, extent_info->data_len, 2627 2628 drop_args.bytes_found); 2628 - if (ret) { 2629 + if (unlikely(ret)) { 2629 2630 btrfs_abort_transaction(trans, ret); 2630 2631 goto out_trans; 2631 2632 } ··· 3344 3345 * We could also use the extent map tree to find such delalloc that is 3345 3346 * being flushed, but using the ordered extents tree is more efficient 3346 3347 * because it's usually much smaller as ordered extents are removed from 3347 - * the tree once they complete. With the extent maps, we mau have them 3348 + * the tree once they complete. With the extent maps, we may have them 3348 3349 * in the extent map tree for a very long time, and they were either 3349 3350 * created by previous writes or loaded by read operations. 3350 3351 */

+3 -3

fs/btrfs/free-space-cache.c

··· 2282 2282 * If this block group has some small extents we don't want to 2283 2283 * use up all of our free slots in the cache with them, we want 2284 2284 * to reserve them to larger extents, however if we have plenty 2285 - * of cache left then go ahead an dadd them, no sense in adding 2285 + * of cache left then go ahead and add them, no sense in adding 2286 2286 * the overhead of a bitmap if we don't have to. 2287 2287 */ 2288 2288 if (info->bytes <= fs_info->sectorsize * 8) { ··· 3829 3829 3830 3830 /* 3831 3831 * If we break out of trimming a bitmap prematurely, we should reset the 3832 - * trimming bit. In a rather contrieved case, it's possible to race here so 3832 + * trimming bit. In a rather contrived case, it's possible to race here so 3833 3833 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. 3834 3834 * 3835 3835 * start = start of bitmap ··· 4142 4142 if (!active) { 4143 4143 set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 4144 4144 ret = cleanup_free_space_cache_v1(fs_info, trans); 4145 - if (ret) { 4145 + if (unlikely(ret)) { 4146 4146 btrfs_abort_transaction(trans, ret); 4147 4147 btrfs_end_transaction(trans); 4148 4148 goto out;

+30 -30

fs/btrfs/free-space-tree.c

··· 137 137 if (ret < 0) 138 138 return ret; 139 139 140 - if (ret == 0) { 140 + if (unlikely(ret == 0)) { 141 141 DEBUG_WARN(); 142 142 return -EIO; 143 143 } 144 144 145 - if (p->slots[0] == 0) { 145 + if (unlikely(p->slots[0] == 0)) { 146 146 DEBUG_WARN("no previous slot found"); 147 147 return -EIO; 148 148 } ··· 218 218 219 219 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 220 220 bitmap = alloc_bitmap(bitmap_size); 221 - if (!bitmap) { 221 + if (unlikely(!bitmap)) { 222 222 ret = -ENOMEM; 223 223 btrfs_abort_transaction(trans, ret); 224 224 goto out; ··· 233 233 234 234 while (!done) { 235 235 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 236 - if (ret) { 236 + if (unlikely(ret)) { 237 237 btrfs_abort_transaction(trans, ret); 238 238 goto out; 239 239 } ··· 271 271 } 272 272 273 273 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 274 - if (ret) { 274 + if (unlikely(ret)) { 275 275 btrfs_abort_transaction(trans, ret); 276 276 goto out; 277 277 } ··· 293 293 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 294 294 btrfs_release_path(path); 295 295 296 - if (extent_count != expected_extent_count) { 296 + if (unlikely(extent_count != expected_extent_count)) { 297 297 btrfs_err(fs_info, 298 298 "incorrect extent count for %llu; counted %u, expected %u", 299 299 block_group->start, extent_count, ··· 320 320 321 321 ret = btrfs_insert_empty_item(trans, root, path, &key, 322 322 data_size); 323 - if (ret) { 323 + if (unlikely(ret)) { 324 324 btrfs_abort_transaction(trans, ret); 325 325 goto out; 326 326 } ··· 361 361 362 362 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 363 363 bitmap = alloc_bitmap(bitmap_size); 364 - if (!bitmap) { 364 + if (unlikely(!bitmap)) { 365 365 ret = -ENOMEM; 366 366 btrfs_abort_transaction(trans, ret); 367 367 goto out; ··· 376 376 377 377 while (!done) { 378 378 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 379 - if (ret) { 379 + if (unlikely(ret)) { 380 380 btrfs_abort_transaction(trans, ret); 381 381 goto out; 382 382 } ··· 420 420 } 421 421 422 422 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 423 - if (ret) { 423 + if (unlikely(ret)) { 424 424 btrfs_abort_transaction(trans, ret); 425 425 goto out; 426 426 } ··· 454 454 key.offset = (end_bit - start_bit) * fs_info->sectorsize; 455 455 456 456 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 457 - if (ret) { 457 + if (unlikely(ret)) { 458 458 btrfs_abort_transaction(trans, ret); 459 459 goto out; 460 460 } ··· 465 465 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 466 466 } 467 467 468 - if (extent_count != expected_extent_count) { 468 + if (unlikely(extent_count != expected_extent_count)) { 469 469 btrfs_err(fs_info, 470 470 "incorrect extent count for %llu; counted %u, expected %u", 471 471 block_group->start, extent_count, ··· 848 848 return 0; 849 849 850 850 path = btrfs_alloc_path(); 851 - if (!path) { 851 + if (unlikely(!path)) { 852 852 ret = -ENOMEM; 853 853 btrfs_abort_transaction(trans, ret); 854 854 goto out; 855 855 } 856 856 857 857 block_group = btrfs_lookup_block_group(trans->fs_info, start); 858 - if (!block_group) { 858 + if (unlikely(!block_group)) { 859 859 DEBUG_WARN("no block group found for start=%llu", start); 860 860 ret = -ENOENT; 861 861 btrfs_abort_transaction(trans, ret); ··· 1030 1030 return 0; 1031 1031 1032 1032 path = btrfs_alloc_path(); 1033 - if (!path) { 1033 + if (unlikely(!path)) { 1034 1034 ret = -ENOMEM; 1035 1035 btrfs_abort_transaction(trans, ret); 1036 1036 goto out; 1037 1037 } 1038 1038 1039 1039 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1040 - if (!block_group) { 1040 + if (unlikely(!block_group)) { 1041 1041 DEBUG_WARN("no block group found for start=%llu", start); 1042 1042 ret = -ENOENT; 1043 1043 btrfs_abort_transaction(trans, ret); ··· 1185 1185 goto out_clear; 1186 1186 } 1187 1187 ret = btrfs_global_root_insert(free_space_root); 1188 - if (ret) { 1188 + if (unlikely(ret)) { 1189 1189 btrfs_put_root(free_space_root); 1190 1190 btrfs_abort_transaction(trans, ret); 1191 1191 btrfs_end_transaction(trans); ··· 1197 1197 block_group = rb_entry(node, struct btrfs_block_group, 1198 1198 cache_node); 1199 1199 ret = populate_free_space_tree(trans, block_group); 1200 - if (ret) { 1200 + if (unlikely(ret)) { 1201 1201 btrfs_abort_transaction(trans, ret); 1202 1202 btrfs_end_transaction(trans); 1203 1203 goto out_clear; ··· 1290 1290 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1291 1291 1292 1292 ret = clear_free_space_tree(trans, free_space_root); 1293 - if (ret) { 1293 + if (unlikely(ret)) { 1294 1294 btrfs_abort_transaction(trans, ret); 1295 1295 btrfs_end_transaction(trans); 1296 1296 return ret; 1297 1297 } 1298 1298 1299 1299 ret = btrfs_del_root(trans, &free_space_root->root_key); 1300 - if (ret) { 1300 + if (unlikely(ret)) { 1301 1301 btrfs_abort_transaction(trans, ret); 1302 1302 btrfs_end_transaction(trans); 1303 1303 return ret; ··· 1315 1315 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1316 1316 free_space_root->node, 0, 1); 1317 1317 btrfs_put_root(free_space_root); 1318 - if (ret < 0) { 1318 + if (unlikely(ret < 0)) { 1319 1319 btrfs_abort_transaction(trans, ret); 1320 1320 btrfs_end_transaction(trans); 1321 1321 return ret; ··· 1344 1344 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1345 1345 1346 1346 ret = clear_free_space_tree(trans, free_space_root); 1347 - if (ret) { 1347 + if (unlikely(ret)) { 1348 1348 btrfs_abort_transaction(trans, ret); 1349 1349 btrfs_end_transaction(trans); 1350 1350 return ret; ··· 1362 1362 goto next; 1363 1363 1364 1364 ret = populate_free_space_tree(trans, block_group); 1365 - if (ret) { 1365 + if (unlikely(ret)) { 1366 1366 btrfs_abort_transaction(trans, ret); 1367 1367 btrfs_end_transaction(trans); 1368 1368 return ret; ··· 1422 1422 1423 1423 if (!path) { 1424 1424 path = btrfs_alloc_path(); 1425 - if (!path) { 1425 + if (unlikely(!path)) { 1426 1426 btrfs_abort_transaction(trans, -ENOMEM); 1427 1427 return -ENOMEM; 1428 1428 } ··· 1430 1430 } 1431 1431 1432 1432 ret = add_new_free_space_info(trans, block_group, path); 1433 - if (ret) { 1433 + if (unlikely(ret)) { 1434 1434 btrfs_abort_transaction(trans, ret); 1435 1435 goto out; 1436 1436 } ··· 1481 1481 } 1482 1482 1483 1483 path = btrfs_alloc_path(); 1484 - if (!path) { 1484 + if (unlikely(!path)) { 1485 1485 ret = -ENOMEM; 1486 1486 btrfs_abort_transaction(trans, ret); 1487 1487 goto out; ··· 1496 1496 1497 1497 while (!done) { 1498 1498 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1499 - if (ret) { 1499 + if (unlikely(ret)) { 1500 1500 btrfs_abort_transaction(trans, ret); 1501 1501 goto out; 1502 1502 } ··· 1527 1527 } 1528 1528 1529 1529 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1530 - if (ret) { 1530 + if (unlikely(ret)) { 1531 1531 btrfs_abort_transaction(trans, ret); 1532 1532 goto out; 1533 1533 } ··· 1611 1611 extent_count++; 1612 1612 } 1613 1613 1614 - if (extent_count != expected_extent_count) { 1614 + if (unlikely(extent_count != expected_extent_count)) { 1615 1615 btrfs_err(fs_info, 1616 1616 "incorrect extent count for %llu; counted %u, expected %u", 1617 1617 block_group->start, extent_count, ··· 1672 1672 extent_count++; 1673 1673 } 1674 1674 1675 - if (extent_count != expected_extent_count) { 1675 + if (unlikely(extent_count != expected_extent_count)) { 1676 1676 btrfs_err(fs_info, 1677 1677 "incorrect extent count for %llu; counted %u, expected %u", 1678 1678 block_group->start, extent_count,

+48

fs/btrfs/fs.c

··· 55 55 } 56 56 57 57 /* 58 + * We support the following block sizes for all systems: 59 + * 60 + * - 4K 61 + * This is the most common block size. For PAGE SIZE > 4K cases the subpage 62 + * mode is used. 63 + * 64 + * - PAGE_SIZE 65 + * The straightforward block size to support. 66 + * 67 + * And extra support for the following block sizes based on the kernel config: 68 + * 69 + * - MIN_BLOCKSIZE 70 + * This is either 4K (regular builds) or 2K (debug builds) 71 + * This allows testing subpage routines on x86_64. 72 + */ 73 + bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize) 74 + { 75 + /* @blocksize should be validated first. */ 76 + ASSERT(is_power_of_2(blocksize) && blocksize >= BTRFS_MIN_BLOCKSIZE && 77 + blocksize <= BTRFS_MAX_BLOCKSIZE); 78 + 79 + if (blocksize == PAGE_SIZE || blocksize == SZ_4K || blocksize == BTRFS_MIN_BLOCKSIZE) 80 + return true; 81 + #ifdef CONFIG_BTRFS_EXPERIMENTAL 82 + /* 83 + * For bs > ps support it's done by specifying a minimal folio order 84 + * for filemap, thus implying large data folios. 85 + * For HIGHMEM systems, we can not always access the content of a (large) 86 + * folio in one go, but go through them page by page. 87 + * 88 + * A lot of features don't implement a proper PAGE sized loop for large 89 + * folios, this includes: 90 + * 91 + * - compression 92 + * - verity 93 + * - encoded write 94 + * 95 + * Considering HIGHMEM is such a pain to deal with and it's going 96 + * to be deprecated eventually, just reject HIGHMEM && bs > ps cases. 97 + */ 98 + if (IS_ENABLED(CONFIG_HIGHMEM) && blocksize > PAGE_SIZE) 99 + return false; 100 + return true; 101 + #endif 102 + return false; 103 + } 104 + 105 + /* 58 106 * Start exclusive operation @type, return true on success. 59 107 */ 60 108 bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,

+33 -8

fs/btrfs/fs.h

··· 59 59 #define BTRFS_MIN_BLOCKSIZE (SZ_4K) 60 60 #endif 61 61 62 + #define BTRFS_MAX_BLOCKSIZE (SZ_64K) 63 + 62 64 #define BTRFS_MAX_EXTENT_SIZE SZ_128M 63 65 64 66 #define BTRFS_OLDEST_GENERATION 0ULL ··· 104 102 BTRFS_FS_STATE_RO, 105 103 /* Track if a transaction abort has been reported on this filesystem */ 106 104 BTRFS_FS_STATE_TRANS_ABORTED, 105 + /* Track if log replay has failed. */ 106 + BTRFS_FS_STATE_LOG_REPLAY_ABORTED, 107 107 /* 108 108 * Bio operations should be blocked on this filesystem because a source 109 109 * or target device is being destroyed as part of a device replace ··· 247 243 BTRFS_MOUNT_NOSPACECACHE = (1ULL << 30), 248 244 BTRFS_MOUNT_IGNOREMETACSUMS = (1ULL << 31), 249 245 BTRFS_MOUNT_IGNORESUPERFLAGS = (1ULL << 32), 246 + BTRFS_MOUNT_REF_TRACKER = (1ULL << 33), 250 247 }; 251 248 252 249 /* ··· 285 280 286 281 #ifdef CONFIG_BTRFS_EXPERIMENTAL 287 282 /* 288 - * Features under developmen like Extent tree v2 support is enabled 283 + * Features under development like Extent tree v2 support is enabled 289 284 * only under CONFIG_BTRFS_EXPERIMENTAL 290 285 */ 291 286 #define BTRFS_FEATURE_INCOMPAT_SUPP \ ··· 307 302 #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) 308 303 #define BTRFS_WARNING_COMMIT_INTERVAL (300) 309 304 #define BTRFS_DEFAULT_MAX_INLINE (2048) 305 + 306 + enum btrfs_compression_type { 307 + BTRFS_COMPRESS_NONE = 0, 308 + BTRFS_COMPRESS_ZLIB = 1, 309 + BTRFS_COMPRESS_LZO = 2, 310 + BTRFS_COMPRESS_ZSTD = 3, 311 + BTRFS_NR_COMPRESS_TYPES = 4, 312 + 313 + BTRFS_DEFRAG_DONT_COMPRESS, 314 + }; 310 315 311 316 struct btrfs_dev_replace { 312 317 /* See #define above */ ··· 519 504 */ 520 505 u64 last_trans_log_full_commit; 521 506 unsigned long long mount_opt; 507 + 508 + /* Compress related structures. */ 509 + void *compr_wsm[BTRFS_NR_COMPRESS_TYPES]; 522 510 523 511 int compress_type; 524 512 int compress_level; ··· 827 809 u32 sectorsize; 828 810 /* ilog2 of sectorsize, use to avoid 64bit division */ 829 811 u32 sectorsize_bits; 812 + u32 block_min_order; 813 + u32 block_max_order; 830 814 u32 csum_size; 831 815 u32 csums_per_leaf; 832 816 u32 stripesize; ··· 898 878 struct lockdep_map btrfs_trans_pending_ordered_map; 899 879 struct lockdep_map btrfs_ordered_extent_map; 900 880 901 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 881 + #ifdef CONFIG_BTRFS_DEBUG 902 882 spinlock_t ref_verify_lock; 903 883 struct rb_root block_tree; 904 - #endif 905 884 906 - #ifdef CONFIG_BTRFS_DEBUG 907 885 struct kobject *debug_kobj; 908 886 struct list_head allocated_roots; 909 887 ··· 921 903 static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) 922 904 { 923 905 return mapping_gfp_constraint(mapping, ~__GFP_FS); 906 + } 907 + 908 + /* Return the minimal folio size of the fs. */ 909 + static inline unsigned int btrfs_min_folio_size(struct btrfs_fs_info *fs_info) 910 + { 911 + return 1U << (PAGE_SHIFT + fs_info->block_min_order); 924 912 } 925 913 926 914 static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info) ··· 1021 997 return folio_size(folio) >> fs_info->sectorsize_bits; 1022 998 } 1023 999 1000 + bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize); 1024 1001 bool btrfs_exclop_start(struct btrfs_fs_info *fs_info, 1025 1002 enum btrfs_exclusive_operation type); 1026 1003 bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info, ··· 1132 1107 1133 1108 #define EXPORT_FOR_TESTS 1134 1109 1135 - static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info) 1110 + static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info) 1136 1111 { 1137 - return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); 1112 + return unlikely(test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state)); 1138 1113 } 1139 1114 1140 1115 void btrfs_test_destroy_inode(struct inode *inode); ··· 1143 1118 1144 1119 #define EXPORT_FOR_TESTS static 1145 1120 1146 - static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info) 1121 + static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info) 1147 1122 { 1148 - return 0; 1123 + return false; 1149 1124 } 1150 1125 #endif 1151 1126

+5 -5

fs/btrfs/inode-item.c

··· 137 137 */ 138 138 extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], 139 139 ref_objectid, name); 140 - if (!extref) { 140 + if (unlikely(!extref)) { 141 141 btrfs_abort_transaction(trans, -ENOENT); 142 142 return -ENOENT; 143 143 } ··· 627 627 if (control->clear_extent_range) { 628 628 ret = btrfs_inode_clear_file_extent_range(control->inode, 629 629 clear_start, clear_len); 630 - if (ret) { 630 + if (unlikely(ret)) { 631 631 btrfs_abort_transaction(trans, ret); 632 632 break; 633 633 } ··· 666 666 btrfs_init_data_ref(&ref, control->ino, extent_offset, 667 667 btrfs_root_id(root), false); 668 668 ret = btrfs_free_extent(trans, &ref); 669 - if (ret) { 669 + if (unlikely(ret)) { 670 670 btrfs_abort_transaction(trans, ret); 671 671 break; 672 672 } ··· 684 684 ret = btrfs_del_items(trans, root, path, 685 685 pending_del_slot, 686 686 pending_del_nr); 687 - if (ret) { 687 + if (unlikely(ret)) { 688 688 btrfs_abort_transaction(trans, ret); 689 689 break; 690 690 } ··· 720 720 int ret2; 721 721 722 722 ret2 = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); 723 - if (ret2) { 723 + if (unlikely(ret2)) { 724 724 btrfs_abort_transaction(trans, ret2); 725 725 ret = ret2; 726 726 }

+266 -240

fs/btrfs/inode.c

··· 72 72 #include "raid-stripe-tree.h" 73 73 #include "fiemap.h" 74 74 75 + #define COW_FILE_RANGE_KEEP_LOCKED (1UL << 0) 76 + #define COW_FILE_RANGE_NO_INLINE (1UL << 1) 77 + 75 78 struct btrfs_iget_args { 76 79 u64 ino; 77 80 struct btrfs_root *root; ··· 370 367 } 371 368 372 369 /* 373 - * Unock inode i_rwsem. 370 + * Unlock inode i_rwsem. 374 371 * 375 372 * ilock_flags should contain the same bits set as passed to btrfs_inode_lock() 376 373 * to decide whether the lock acquired is shared or exclusive. ··· 634 631 drop_args.replace_extent = true; 635 632 drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(data_len); 636 633 ret = btrfs_drop_extents(trans, root, inode, &drop_args); 637 - if (ret) { 634 + if (unlikely(ret)) { 638 635 btrfs_abort_transaction(trans, ret); 639 636 goto out; 640 637 } ··· 642 639 ret = insert_inline_extent(trans, path, inode, drop_args.extent_inserted, 643 640 size, compressed_size, compress_type, 644 641 compressed_folio, update_i_size); 645 - if (ret && ret != -ENOSPC) { 642 + if (unlikely(ret && ret != -ENOSPC)) { 646 643 btrfs_abort_transaction(trans, ret); 647 644 goto out; 648 645 } else if (ret == -ENOSPC) { ··· 652 649 653 650 btrfs_update_inode_bytes(inode, size, drop_args.bytes_found); 654 651 ret = btrfs_update_inode(trans, inode); 655 - if (ret && ret != -ENOSPC) { 652 + if (unlikely(ret && ret != -ENOSPC)) { 656 653 btrfs_abort_transaction(trans, ret); 657 654 goto out; 658 655 } else if (ret == -ENOSPC) { ··· 854 851 struct btrfs_inode *inode = async_chunk->inode; 855 852 struct btrfs_fs_info *fs_info = inode->root->fs_info; 856 853 struct address_space *mapping = inode->vfs_inode.i_mapping; 854 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 855 + const u32 min_folio_size = btrfs_min_folio_size(fs_info); 857 856 u64 blocksize = fs_info->sectorsize; 858 857 u64 start = async_chunk->start; 859 858 u64 end = async_chunk->end; ··· 866 861 unsigned long nr_folios; 867 862 unsigned long total_compressed = 0; 868 863 unsigned long total_in = 0; 869 - unsigned int poff; 864 + unsigned int loff; 870 865 int i; 871 866 int compress_type = fs_info->compress_type; 872 867 int compress_level = fs_info->compress_level; ··· 904 899 actual_end = min_t(u64, i_size, end + 1); 905 900 again: 906 901 folios = NULL; 907 - nr_folios = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; 908 - nr_folios = min_t(unsigned long, nr_folios, BTRFS_MAX_COMPRESSED_PAGES); 902 + nr_folios = (end >> min_folio_shift) - (start >> min_folio_shift) + 1; 903 + nr_folios = min_t(unsigned long, nr_folios, BTRFS_MAX_COMPRESSED >> min_folio_shift); 909 904 910 905 /* 911 906 * we don't want to send crud past the end of i_size through ··· 961 956 962 957 /* Compression level is applied here. */ 963 958 ret = btrfs_compress_folios(compress_type, compress_level, 964 - mapping, start, folios, &nr_folios, &total_in, 959 + inode, start, folios, &nr_folios, &total_in, 965 960 &total_compressed); 966 961 if (ret) 967 962 goto mark_incompressible; 968 963 969 964 /* 970 - * Zero the tail end of the last page, as we might be sending it down 965 + * Zero the tail end of the last folio, as we might be sending it down 971 966 * to disk. 972 967 */ 973 - poff = offset_in_page(total_compressed); 974 - if (poff) 975 - folio_zero_range(folios[nr_folios - 1], poff, PAGE_SIZE - poff); 968 + loff = (total_compressed & (min_folio_size - 1)); 969 + if (loff) 970 + folio_zero_range(folios[nr_folios - 1], loff, min_folio_size - loff); 976 971 977 972 /* 978 973 * Try to create an inline extent. ··· 1250 1245 * locked_folio is the folio that writepage had locked already. We use 1251 1246 * it to make sure we don't do extra locks or unlocks. 1252 1247 * 1253 - * When this function fails, it unlocks all pages except @locked_folio. 1248 + * When this function fails, it unlocks all folios except @locked_folio. 1254 1249 * 1255 1250 * When this function successfully creates an inline extent, it returns 1 and 1256 - * unlocks all pages including locked_folio and starts I/O on them. 1257 - * (In reality inline extents are limited to a single page, so locked_folio is 1258 - * the only page handled anyway). 1251 + * unlocks all folios including locked_folio and starts I/O on them. 1252 + * (In reality inline extents are limited to a single block, so locked_folio is 1253 + * the only folio handled anyway). 1259 1254 * 1260 - * When this function succeed and creates a normal extent, the page locking 1255 + * When this function succeed and creates a normal extent, the folio locking 1261 1256 * status depends on the passed in flags: 1262 1257 * 1263 - * - If @keep_locked is set, all pages are kept locked. 1264 - * - Else all pages except for @locked_folio are unlocked. 1258 + * - If COW_FILE_RANGE_KEEP_LOCKED flag is set, all folios are kept locked. 1259 + * - Else all folios except for @locked_folio are unlocked. 1265 1260 * 1266 1261 * When a failure happens in the second or later iteration of the 1267 1262 * while-loop, the ordered extents created in previous iterations are cleaned up. ··· 1269 1264 static noinline int cow_file_range(struct btrfs_inode *inode, 1270 1265 struct folio *locked_folio, u64 start, 1271 1266 u64 end, u64 *done_offset, 1272 - bool keep_locked, bool no_inline) 1267 + unsigned long flags) 1273 1268 { 1274 1269 struct btrfs_root *root = inode->root; 1275 1270 struct btrfs_fs_info *fs_info = root->fs_info; ··· 1297 1292 1298 1293 inode_should_defrag(inode, start, end, num_bytes, SZ_64K); 1299 1294 1300 - if (!no_inline) { 1295 + if (!(flags & COW_FILE_RANGE_NO_INLINE)) { 1301 1296 /* lets try to make an inline extent */ 1302 1297 ret = cow_file_range_inline(inode, locked_folio, start, end, 0, 1303 1298 BTRFS_COMPRESS_NONE, NULL, false); ··· 1325 1320 * Do set the Ordered (Private2) bit so we know this page was properly 1326 1321 * setup for writepage. 1327 1322 */ 1328 - page_ops = (keep_locked ? 0 : PAGE_UNLOCK); 1323 + page_ops = ((flags & COW_FILE_RANGE_KEEP_LOCKED) ? 0 : PAGE_UNLOCK); 1329 1324 page_ops |= PAGE_SET_ORDERED; 1330 1325 1331 1326 /* ··· 1536 1531 btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size, 1537 1532 end - start - cur_alloc_size + 1, NULL); 1538 1533 } 1539 - btrfs_err_rl(fs_info, 1540 - "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", 1541 - __func__, btrfs_root_id(inode->root), 1542 - btrfs_ino(inode), orig_start, end + 1 - orig_start, ret); 1534 + btrfs_err(fs_info, 1535 + "%s failed, root=%llu inode=%llu start=%llu len=%llu cur_offset=%llu cur_alloc_size=%llu: %d", 1536 + __func__, btrfs_root_id(inode->root), 1537 + btrfs_ino(inode), orig_start, end + 1 - orig_start, 1538 + start, cur_alloc_size, ret); 1543 1539 return ret; 1544 1540 } 1545 1541 ··· 1693 1687 1694 1688 while (start <= end) { 1695 1689 ret = cow_file_range(inode, locked_folio, start, end, 1696 - &done_offset, true, false); 1690 + &done_offset, COW_FILE_RANGE_KEEP_LOCKED); 1697 1691 if (ret) 1698 1692 return ret; 1699 1693 extent_write_locked_range(&inode->vfs_inode, locked_folio, ··· 1774 1768 * Don't try to create inline extents, as a mix of inline extent that 1775 1769 * is written out and unlocked directly and a normal NOCOW extent 1776 1770 * doesn't work. 1771 + * 1772 + * And here we do not unlock the folio after a successful run. 1773 + * The folios will be unlocked after everything is finished, or by error handling. 1774 + * 1775 + * This is to ensure error handling won't need to clear dirty/ordered flags without 1776 + * a locked folio, which can race with writeback. 1777 1777 */ 1778 - ret = cow_file_range(inode, locked_folio, start, end, NULL, false, 1779 - true); 1778 + ret = cow_file_range(inode, locked_folio, start, end, NULL, 1779 + COW_FILE_RANGE_NO_INLINE | COW_FILE_RANGE_KEEP_LOCKED); 1780 1780 ASSERT(ret != 1); 1781 1781 return ret; 1782 1782 } ··· 1925 1913 return ret < 0 ? ret : can_nocow; 1926 1914 } 1927 1915 1928 - /* 1929 - * Cleanup the dirty folios which will never be submitted due to error. 1930 - * 1931 - * When running a delalloc range, we may need to split the ranges (due to 1932 - * fragmentation or NOCOW). If we hit an error in the later part, we will error 1933 - * out and previously successfully executed range will never be submitted, thus 1934 - * we have to cleanup those folios by clearing their dirty flag, starting and 1935 - * finishing the writeback. 1936 - */ 1937 - static void cleanup_dirty_folios(struct btrfs_inode *inode, 1938 - struct folio *locked_folio, 1939 - u64 start, u64 end, int error) 1940 - { 1941 - struct btrfs_fs_info *fs_info = inode->root->fs_info; 1942 - struct address_space *mapping = inode->vfs_inode.i_mapping; 1943 - pgoff_t start_index = start >> PAGE_SHIFT; 1944 - pgoff_t end_index = end >> PAGE_SHIFT; 1945 - u32 len; 1946 - 1947 - ASSERT(end + 1 - start < U32_MAX); 1948 - ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 1949 - IS_ALIGNED(end + 1, fs_info->sectorsize)); 1950 - len = end + 1 - start; 1951 - 1952 - /* 1953 - * Handle the locked folio first. 1954 - * The btrfs_folio_clamp_*() helpers can handle range out of the folio case. 1955 - */ 1956 - btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len); 1957 - 1958 - for (pgoff_t index = start_index; index <= end_index; index++) { 1959 - struct folio *folio; 1960 - 1961 - /* Already handled at the beginning. */ 1962 - if (index == locked_folio->index) 1963 - continue; 1964 - folio = __filemap_get_folio(mapping, index, FGP_LOCK, GFP_NOFS); 1965 - /* Cache already dropped, no need to do any cleanup. */ 1966 - if (IS_ERR(folio)) 1967 - continue; 1968 - btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len); 1969 - folio_unlock(folio); 1970 - folio_put(folio); 1971 - } 1972 - mapping_set_error(mapping, error); 1973 - } 1974 - 1975 1916 static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio, 1976 1917 struct extent_state **cached, 1977 1918 struct can_nocow_file_extent_args *nocow_args, 1978 1919 u64 file_pos, bool is_prealloc) 1979 1920 { 1980 1921 struct btrfs_ordered_extent *ordered; 1981 - u64 len = nocow_args->file_extent.num_bytes; 1982 - u64 end = file_pos + len - 1; 1922 + const u64 len = nocow_args->file_extent.num_bytes; 1923 + const u64 end = file_pos + len - 1; 1983 1924 int ret = 0; 1984 1925 1985 1926 btrfs_lock_extent(&inode->io_tree, file_pos, end, cached); ··· 1943 1978 em = btrfs_create_io_em(inode, file_pos, &nocow_args->file_extent, 1944 1979 BTRFS_ORDERED_PREALLOC); 1945 1980 if (IS_ERR(em)) { 1946 - btrfs_unlock_extent(&inode->io_tree, file_pos, end, cached); 1947 - return PTR_ERR(em); 1981 + ret = PTR_ERR(em); 1982 + goto error; 1948 1983 } 1949 1984 btrfs_free_extent_map(em); 1950 1985 } ··· 1956 1991 if (IS_ERR(ordered)) { 1957 1992 if (is_prealloc) 1958 1993 btrfs_drop_extent_map_range(inode, file_pos, end, false); 1959 - btrfs_unlock_extent(&inode->io_tree, file_pos, end, cached); 1960 - return PTR_ERR(ordered); 1994 + ret = PTR_ERR(ordered); 1995 + goto error; 1961 1996 } 1962 1997 1963 1998 if (btrfs_is_data_reloc_root(inode->root)) ··· 1969 2004 ret = btrfs_reloc_clone_csums(ordered); 1970 2005 btrfs_put_ordered_extent(ordered); 1971 2006 2007 + if (ret < 0) 2008 + goto error; 1972 2009 extent_clear_unlock_delalloc(inode, file_pos, end, locked_folio, cached, 1973 2010 EXTENT_LOCKED | EXTENT_DELALLOC | 1974 2011 EXTENT_CLEAR_DATA_RESV, 1975 - PAGE_UNLOCK | PAGE_SET_ORDERED); 1976 - /* 1977 - * On error, we need to cleanup the ordered extents we created. 1978 - * 1979 - * We do not clear the folio Dirty flags because they are set and 1980 - * cleaered by the caller. 1981 - */ 1982 - if (ret < 0) 1983 - btrfs_cleanup_ordered_extents(inode, file_pos, len); 2012 + PAGE_SET_ORDERED); 2013 + return ret; 2014 + 2015 + error: 2016 + btrfs_cleanup_ordered_extents(inode, file_pos, len); 2017 + extent_clear_unlock_delalloc(inode, file_pos, end, locked_folio, cached, 2018 + EXTENT_LOCKED | EXTENT_DELALLOC | 2019 + EXTENT_CLEAR_DATA_RESV, 2020 + PAGE_UNLOCK | PAGE_START_WRITEBACK | 2021 + PAGE_END_WRITEBACK); 2022 + btrfs_err(inode->root->fs_info, 2023 + "%s failed, root=%lld inode=%llu start=%llu len=%llu: %d", 2024 + __func__, btrfs_root_id(inode->root), btrfs_ino(inode), 2025 + file_pos, len, ret); 1984 2026 return ret; 1985 2027 } 1986 2028 1987 2029 /* 1988 - * when nowcow writeback call back. This checks for snapshots or COW copies 2030 + * When nocow writeback calls back. This checks for snapshots or COW copies 1989 2031 * of the extents that exist in the file, and COWs the file as required. 1990 2032 * 1991 2033 * If no cow copies or snapshots exist, we write directly to the existing ··· 2009 2037 /* 2010 2038 * If not 0, represents the inclusive end of the last fallback_to_cow() 2011 2039 * range. Only for error handling. 2040 + * 2041 + * The same for nocow_end, it's to avoid double cleaning up the range 2042 + * already cleaned by nocow_one_range(). 2012 2043 */ 2013 2044 u64 cow_end = 0; 2045 + u64 nocow_end = 0; 2014 2046 u64 cur_offset = start; 2015 2047 int ret; 2016 2048 bool check_prev = true; 2017 2049 u64 ino = btrfs_ino(inode); 2018 2050 struct can_nocow_file_extent_args nocow_args = { 0 }; 2051 + /* The range that has ordered extent(s). */ 2052 + u64 oe_cleanup_start; 2053 + u64 oe_cleanup_len = 0; 2054 + /* The range that is untouched. */ 2055 + u64 untouched_start; 2056 + u64 untouched_len = 0; 2019 2057 2020 2058 /* 2021 2059 * Normally on a zoned device we're only doing COW writes, but in case ··· 2189 2207 &nocow_args, cur_offset, 2190 2208 extent_type == BTRFS_FILE_EXTENT_PREALLOC); 2191 2209 btrfs_dec_nocow_writers(nocow_bg); 2192 - if (ret < 0) 2210 + if (ret < 0) { 2211 + nocow_end = cur_offset + nocow_args.file_extent.num_bytes - 1; 2193 2212 goto error; 2213 + } 2194 2214 cur_offset = extent_end; 2195 2215 } 2196 2216 btrfs_release_path(path); ··· 2209 2225 cow_start = (u64)-1; 2210 2226 } 2211 2227 2228 + /* 2229 + * Everything is finished without an error, can unlock the folios now. 2230 + * 2231 + * No need to touch the io tree range nor set folio ordered flag, as 2232 + * fallback_to_cow() and nocow_one_range() have already handled them. 2233 + */ 2234 + extent_clear_unlock_delalloc(inode, start, end, locked_folio, NULL, 0, PAGE_UNLOCK); 2235 + 2212 2236 btrfs_free_path(path); 2213 2237 return 0; 2214 2238 2215 2239 error: 2216 - /* 2217 - * There are several error cases: 2218 - * 2219 - * 1) Failed without falling back to COW 2220 - * start cur_offset end 2221 - * |/////////////| | 2222 - * 2223 - * In this case, cow_start should be (u64)-1. 2224 - * 2225 - * For range [start, cur_offset) the folios are already unlocked (except 2226 - * @locked_folio), EXTENT_DELALLOC already removed. 2227 - * Need to clear the dirty flags and finish the ordered extents. 2228 - * 2229 - * 2) Failed with error before calling fallback_to_cow() 2230 - * 2231 - * start cow_start end 2232 - * |/////////////| | 2233 - * 2234 - * In this case, only @cow_start is set, @cur_offset is between 2235 - * [cow_start, end) 2236 - * 2237 - * It's mostly the same as case 1), just replace @cur_offset with 2238 - * @cow_start. 2239 - * 2240 - * 3) Failed with error from fallback_to_cow() 2241 - * 2242 - * start cow_start cow_end end 2243 - * |/////////////|-----------| | 2244 - * 2245 - * In this case, both @cow_start and @cow_end is set. 2246 - * 2247 - * For range [start, cow_start) it's the same as case 1). 2248 - * But for range [cow_start, cow_end), all the cleanup is handled by 2249 - * cow_file_range(), we should not touch anything in that range. 2250 - * 2251 - * So for all above cases, if @cow_start is set, cleanup ordered extents 2252 - * for range [start, @cow_start), other wise cleanup range [start, @cur_offset). 2253 - */ 2254 - if (cow_start != (u64)-1) 2255 - cur_offset = cow_start; 2256 - 2257 - if (cur_offset > start) { 2258 - btrfs_cleanup_ordered_extents(inode, start, cur_offset - start); 2259 - cleanup_dirty_folios(inode, locked_folio, start, cur_offset - 1, ret); 2240 + if (cow_start == (u64)-1) { 2241 + /* 2242 + * case a) 2243 + * start cur_offset end 2244 + * | OE cleanup | Untouched | 2245 + * 2246 + * We finished a fallback_to_cow() or nocow_one_range() call, 2247 + * but failed to check the next range. 2248 + * 2249 + * or 2250 + * start cur_offset nocow_end end 2251 + * | OE cleanup | Skip | Untouched | 2252 + * 2253 + * nocow_one_range() failed, the range [cur_offset, nocow_end] is 2254 + * already cleaned up. 2255 + */ 2256 + oe_cleanup_start = start; 2257 + oe_cleanup_len = cur_offset - start; 2258 + if (nocow_end) 2259 + untouched_start = nocow_end + 1; 2260 + else 2261 + untouched_start = cur_offset; 2262 + untouched_len = end + 1 - untouched_start; 2263 + } else if (cow_start != (u64)-1 && cow_end == 0) { 2264 + /* 2265 + * case b) 2266 + * start cow_start cur_offset end 2267 + * | OE cleanup | Untouched | 2268 + * 2269 + * We got a range that needs COW, but before we hit the next NOCOW range, 2270 + * thus [cow_start, cur_offset) doesn't yet have any OE. 2271 + */ 2272 + oe_cleanup_start = start; 2273 + oe_cleanup_len = cow_start - start; 2274 + untouched_start = cow_start; 2275 + untouched_len = end + 1 - untouched_start; 2276 + } else { 2277 + /* 2278 + * case c) 2279 + * start cow_start cow_end end 2280 + * | OE cleanup | Skip | Untouched | 2281 + * 2282 + * fallback_to_cow() failed, and fallback_to_cow() will do the 2283 + * cleanup for its range, we shouldn't touch the range 2284 + * [cow_start, cow_end]. 2285 + */ 2286 + ASSERT(cow_start != (u64)-1 && cow_end != 0); 2287 + oe_cleanup_start = start; 2288 + oe_cleanup_len = cow_start - start; 2289 + untouched_start = cow_end + 1; 2290 + untouched_len = end + 1 - untouched_start; 2260 2291 } 2261 2292 2262 - /* 2263 - * If an error happened while a COW region is outstanding, cur_offset 2264 - * needs to be reset to @cow_end + 1 to skip the COW range, as 2265 - * cow_file_range() will do the proper cleanup at error. 2266 - */ 2267 - if (cow_end) 2268 - cur_offset = cow_end + 1; 2293 + if (oe_cleanup_len) { 2294 + const u64 oe_cleanup_end = oe_cleanup_start + oe_cleanup_len - 1; 2295 + btrfs_cleanup_ordered_extents(inode, oe_cleanup_start, oe_cleanup_len); 2296 + extent_clear_unlock_delalloc(inode, oe_cleanup_start, oe_cleanup_end, 2297 + locked_folio, NULL, 2298 + EXTENT_LOCKED | EXTENT_DELALLOC, 2299 + PAGE_UNLOCK | PAGE_START_WRITEBACK | 2300 + PAGE_END_WRITEBACK); 2301 + } 2269 2302 2270 - /* 2271 - * We need to lock the extent here because we're clearing DELALLOC and 2272 - * we're not locked at this point. 2273 - */ 2274 - if (cur_offset < end) { 2303 + if (untouched_len) { 2275 2304 struct extent_state *cached = NULL; 2305 + const u64 untouched_end = untouched_start + untouched_len - 1; 2276 2306 2277 - btrfs_lock_extent(&inode->io_tree, cur_offset, end, &cached); 2278 - extent_clear_unlock_delalloc(inode, cur_offset, end, 2307 + /* 2308 + * We need to lock the extent here because we're clearing DELALLOC and 2309 + * we're not locked at this point. 2310 + */ 2311 + btrfs_lock_extent(&inode->io_tree, untouched_start, untouched_end, &cached); 2312 + extent_clear_unlock_delalloc(inode, untouched_start, untouched_end, 2279 2313 locked_folio, &cached, 2280 2314 EXTENT_LOCKED | EXTENT_DELALLOC | 2281 2315 EXTENT_DEFRAG | 2282 2316 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | 2283 2317 PAGE_START_WRITEBACK | 2284 2318 PAGE_END_WRITEBACK); 2285 - btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); 2319 + btrfs_qgroup_free_data(inode, NULL, untouched_start, untouched_len, NULL); 2286 2320 } 2287 2321 btrfs_free_path(path); 2288 - btrfs_err_rl(fs_info, 2289 - "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", 2290 - __func__, btrfs_root_id(inode->root), 2291 - btrfs_ino(inode), start, end + 1 - start, ret); 2322 + btrfs_err(fs_info, 2323 + "%s failed, root=%llu inode=%llu start=%llu len=%llu cur_offset=%llu oe_cleanup=%llu oe_cleanup_len=%llu untouched_start=%llu untouched_len=%llu: %d", 2324 + __func__, btrfs_root_id(inode->root), btrfs_ino(inode), 2325 + start, end + 1 - start, cur_offset, oe_cleanup_start, oe_cleanup_len, 2326 + untouched_start, untouched_len, ret); 2292 2327 return ret; 2293 2328 } 2294 2329 ··· 2352 2349 ret = run_delalloc_cow(inode, locked_folio, start, end, wbc, 2353 2350 true); 2354 2351 else 2355 - ret = cow_file_range(inode, locked_folio, start, end, NULL, 2356 - false, false); 2352 + ret = cow_file_range(inode, locked_folio, start, end, NULL, 0); 2357 2353 return ret; 2358 2354 } 2359 2355 ··· 2988 2986 * If we dropped an inline extent here, we know the range where it is 2989 2987 * was not marked with the EXTENT_DELALLOC_NEW bit, so we update the 2990 2988 * number of bytes only for that range containing the inline extent. 2991 - * The remaining of the range will be processed when clearning the 2989 + * The remaining of the range will be processed when clearing the 2992 2990 * EXTENT_DELALLOC_BIT bit through the ordered extent completion. 2993 2991 */ 2994 2992 if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) { ··· 3104 3102 if (!freespace_inode) 3105 3103 btrfs_lockdep_acquire(fs_info, btrfs_ordered_extent); 3106 3104 3107 - if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { 3105 + if (unlikely(test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags))) { 3108 3106 ret = -EIO; 3109 3107 goto out; 3110 3108 } 3111 3109 3112 - if (btrfs_is_zoned(fs_info)) 3113 - btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, 3114 - ordered_extent->disk_num_bytes); 3110 + ret = btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, 3111 + ordered_extent->disk_num_bytes); 3112 + if (ret) 3113 + goto out; 3115 3114 3116 3115 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { 3117 3116 truncated = true; ··· 3150 3147 trans->block_rsv = &inode->block_rsv; 3151 3148 3152 3149 ret = btrfs_insert_raid_extent(trans, ordered_extent); 3153 - if (ret) { 3150 + if (unlikely(ret)) { 3154 3151 btrfs_abort_transaction(trans, ret); 3155 3152 goto out; 3156 3153 } ··· 3158 3155 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 3159 3156 /* Logic error */ 3160 3157 ASSERT(list_empty(&ordered_extent->list)); 3161 - if (!list_empty(&ordered_extent->list)) { 3158 + if (unlikely(!list_empty(&ordered_extent->list))) { 3162 3159 ret = -EINVAL; 3163 3160 btrfs_abort_transaction(trans, ret); 3164 3161 goto out; ··· 3166 3163 3167 3164 btrfs_inode_safe_disk_i_size_write(inode, 0); 3168 3165 ret = btrfs_update_inode_fallback(trans, inode); 3169 - if (ret) { 3166 + if (unlikely(ret)) { 3170 3167 /* -ENOMEM or corruption */ 3171 3168 btrfs_abort_transaction(trans, ret); 3172 3169 } ··· 3193 3190 ordered_extent->disk_num_bytes); 3194 3191 } 3195 3192 } 3196 - if (ret < 0) { 3193 + if (unlikely(ret < 0)) { 3197 3194 btrfs_abort_transaction(trans, ret); 3198 3195 goto out; 3199 3196 } 3200 3197 3201 3198 ret = btrfs_unpin_extent_cache(inode, ordered_extent->file_offset, 3202 3199 ordered_extent->num_bytes, trans->transid); 3203 - if (ret < 0) { 3200 + if (unlikely(ret < 0)) { 3204 3201 btrfs_abort_transaction(trans, ret); 3205 3202 goto out; 3206 3203 } 3207 3204 3208 3205 ret = add_pending_csums(trans, &ordered_extent->list); 3209 - if (ret) { 3206 + if (unlikely(ret)) { 3210 3207 btrfs_abort_transaction(trans, ret); 3211 3208 goto out; 3212 3209 } ··· 3224 3221 3225 3222 btrfs_inode_safe_disk_i_size_write(inode, 0); 3226 3223 ret = btrfs_update_inode_fallback(trans, inode); 3227 - if (ret) { /* -ENOMEM or corruption */ 3224 + if (unlikely(ret)) { /* -ENOMEM or corruption */ 3228 3225 btrfs_abort_transaction(trans, ret); 3229 3226 goto out; 3230 3227 } ··· 3330 3327 return btrfs_finish_one_ordered(ordered); 3331 3328 } 3332 3329 3330 + void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, 3331 + u8 *dest) 3332 + { 3333 + struct folio *folio = page_folio(phys_to_page(paddr)); 3334 + const u32 blocksize = fs_info->sectorsize; 3335 + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 3336 + 3337 + shash->tfm = fs_info->csum_shash; 3338 + /* The full block must be inside the folio. */ 3339 + ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio)); 3340 + 3341 + if (folio_test_partial_kmap(folio)) { 3342 + size_t cur = paddr; 3343 + 3344 + crypto_shash_init(shash); 3345 + while (cur < paddr + blocksize) { 3346 + void *kaddr; 3347 + size_t len = min(paddr + blocksize - cur, 3348 + PAGE_SIZE - offset_in_page(cur)); 3349 + 3350 + kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur)); 3351 + crypto_shash_update(shash, kaddr, len); 3352 + kunmap_local(kaddr); 3353 + cur += len; 3354 + } 3355 + crypto_shash_final(shash, dest); 3356 + } else { 3357 + crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest); 3358 + } 3359 + } 3333 3360 /* 3334 3361 * Verify the checksum for a single sector without any extra action that depend 3335 3362 * on the type of I/O. 3336 3363 * 3337 3364 * @kaddr must be a properly kmapped address. 3338 3365 */ 3339 - int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum, 3340 - const u8 * const csum_expected) 3366 + int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, 3367 + const u8 * const csum_expected) 3341 3368 { 3342 - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 3343 - 3344 - shash->tfm = fs_info->csum_shash; 3345 - crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum); 3346 - 3347 - if (memcmp(csum, csum_expected, fs_info->csum_size)) 3369 + btrfs_calculate_block_csum(fs_info, paddr, csum); 3370 + if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0)) 3348 3371 return -EIO; 3349 3372 return 0; 3350 3373 } ··· 3389 3360 * Return %true if the sector is ok or had no checksum to start with, else %false. 3390 3361 */ 3391 3362 bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, 3392 - u32 bio_offset, struct bio_vec *bv) 3363 + u32 bio_offset, phys_addr_t paddr) 3393 3364 { 3394 3365 struct btrfs_inode *inode = bbio->inode; 3395 3366 struct btrfs_fs_info *fs_info = inode->root->fs_info; 3367 + const u32 blocksize = fs_info->sectorsize; 3368 + struct folio *folio; 3396 3369 u64 file_offset = bbio->file_offset + bio_offset; 3397 - u64 end = file_offset + bv->bv_len - 1; 3370 + u64 end = file_offset + blocksize - 1; 3398 3371 u8 *csum_expected; 3399 3372 u8 csum[BTRFS_CSUM_SIZE]; 3400 - void *kaddr; 3401 - 3402 - ASSERT(bv->bv_len == fs_info->sectorsize); 3403 3373 3404 3374 if (!bbio->csum) 3405 3375 return true; ··· 3414 3386 3415 3387 csum_expected = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) * 3416 3388 fs_info->csum_size; 3417 - kaddr = bvec_kmap_local(bv); 3418 - if (btrfs_check_sector_csum(fs_info, kaddr, csum, csum_expected)) { 3419 - kunmap_local(kaddr); 3389 + if (btrfs_check_block_csum(fs_info, paddr, csum, csum_expected)) 3420 3390 goto zeroit; 3421 - } 3422 - kunmap_local(kaddr); 3423 3391 return true; 3424 3392 3425 3393 zeroit: ··· 3423 3399 bbio->mirror_num); 3424 3400 if (dev) 3425 3401 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS); 3426 - memzero_bvec(bv); 3402 + folio = page_folio(phys_to_page(paddr)); 3403 + ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio)); 3404 + folio_zero_range(folio, offset_in_folio(folio, paddr), blocksize); 3427 3405 return false; 3428 3406 } 3429 3407 ··· 3539 3513 int ret; 3540 3514 3541 3515 ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode)); 3542 - if (ret && ret != -EEXIST) { 3516 + if (unlikely(ret && ret != -EEXIST)) { 3543 3517 btrfs_abort_transaction(trans, ret); 3544 3518 return ret; 3545 3519 } ··· 4288 4262 } 4289 4263 4290 4264 ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index); 4291 - if (ret) { 4265 + if (unlikely(ret)) { 4292 4266 btrfs_crit(fs_info, 4293 4267 "failed to delete reference to %.*s, root %llu inode %llu parent %llu", 4294 4268 name->len, name->name, btrfs_root_id(root), ino, dir_ino); ··· 4300 4274 rename_ctx->index = index; 4301 4275 4302 4276 ret = btrfs_delete_delayed_dir_index(trans, dir, index); 4303 - if (ret) { 4277 + if (unlikely(ret)) { 4304 4278 btrfs_abort_transaction(trans, ret); 4305 4279 return ret; 4306 4280 } ··· 4455 4429 btrfs_dir_item_key_to_cpu(leaf, di, &key); 4456 4430 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); 4457 4431 ret = btrfs_delete_one_dir_name(trans, root, path, di); 4458 - if (ret) { 4432 + if (unlikely(ret)) { 4459 4433 btrfs_abort_transaction(trans, ret); 4460 4434 goto out; 4461 4435 } ··· 4486 4460 ret = btrfs_del_root_ref(trans, objectid, 4487 4461 btrfs_root_id(root), dir_ino, 4488 4462 &index, &fname.disk_name); 4489 - if (ret) { 4463 + if (unlikely(ret)) { 4490 4464 btrfs_abort_transaction(trans, ret); 4491 4465 goto out; 4492 4466 } 4493 4467 } 4494 4468 4495 4469 ret = btrfs_delete_delayed_dir_index(trans, dir, index); 4496 - if (ret) { 4470 + if (unlikely(ret)) { 4497 4471 btrfs_abort_transaction(trans, ret); 4498 4472 goto out; 4499 4473 } ··· 4551 4525 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); 4552 4526 if (ret < 0) 4553 4527 return ret; 4554 - if (ret == 0) { 4528 + if (unlikely(ret == 0)) { 4555 4529 /* 4556 4530 * Key with offset -1 found, there would have to exist a root 4557 4531 * with such id, but this is out of valid range. ··· 4665 4639 btrfs_record_snapshot_destroy(trans, dir); 4666 4640 4667 4641 ret = btrfs_unlink_subvol(trans, dir, dentry); 4668 - if (ret) { 4642 + if (unlikely(ret)) { 4669 4643 btrfs_abort_transaction(trans, ret); 4670 4644 goto out_end_trans; 4671 4645 } 4672 4646 4673 4647 ret = btrfs_record_root_in_trans(trans, dest); 4674 - if (ret) { 4648 + if (unlikely(ret)) { 4675 4649 btrfs_abort_transaction(trans, ret); 4676 4650 goto out_end_trans; 4677 4651 } ··· 4685 4659 ret = btrfs_insert_orphan_item(trans, 4686 4660 fs_info->tree_root, 4687 4661 btrfs_root_id(dest)); 4688 - if (ret) { 4662 + if (unlikely(ret)) { 4689 4663 btrfs_abort_transaction(trans, ret); 4690 4664 goto out_end_trans; 4691 4665 } ··· 4693 4667 4694 4668 ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid, 4695 4669 BTRFS_UUID_KEY_SUBVOL, btrfs_root_id(dest)); 4696 - if (ret && ret != -ENOENT) { 4670 + if (unlikely(ret && ret != -ENOENT)) { 4697 4671 btrfs_abort_transaction(trans, ret); 4698 4672 goto out_end_trans; 4699 4673 } ··· 4702 4676 dest->root_item.received_uuid, 4703 4677 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4704 4678 btrfs_root_id(dest)); 4705 - if (ret && ret != -ENOENT) { 4679 + if (unlikely(ret && ret != -ENOENT)) { 4706 4680 btrfs_abort_transaction(trans, ret); 4707 4681 goto out_end_trans; 4708 4682 } ··· 4842 4816 folio_put(folio); 4843 4817 goto again; 4844 4818 } 4845 - if (!folio_test_uptodate(folio)) { 4819 + if (unlikely(!folio_test_uptodate(folio))) { 4846 4820 ret = -EIO; 4847 4821 goto out_unlock; 4848 4822 } ··· 4930 4904 goto out; 4931 4905 4932 4906 /* 4933 - * Skip the truncatioin if the range in the target block is already aligned. 4907 + * Skip the truncation if the range in the target block is already aligned. 4934 4908 * The seemingly complex check will also handle the same block case. 4935 4909 */ 4936 4910 if (in_head_block && !IS_ALIGNED(start, blocksize)) ··· 4986 4960 folio_put(folio); 4987 4961 goto again; 4988 4962 } 4989 - if (!folio_test_uptodate(folio)) { 4963 + if (unlikely(!folio_test_uptodate(folio))) { 4990 4964 ret = -EIO; 4991 4965 goto out_unlock; 4992 4966 } ··· 5106 5080 drop_args.drop_cache = true; 5107 5081 5108 5082 ret = btrfs_drop_extents(trans, root, inode, &drop_args); 5109 - if (ret) { 5083 + if (unlikely(ret)) { 5110 5084 btrfs_abort_transaction(trans, ret); 5111 5085 btrfs_end_transaction(trans); 5112 5086 return ret; ··· 5626 5600 } 5627 5601 5628 5602 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); 5629 - if (location->type != BTRFS_INODE_ITEM_KEY && 5630 - location->type != BTRFS_ROOT_ITEM_KEY) { 5603 + if (unlikely(location->type != BTRFS_INODE_ITEM_KEY && 5604 + location->type != BTRFS_ROOT_ITEM_KEY)) { 5631 5605 ret = -EUCLEAN; 5632 5606 btrfs_warn(root->fs_info, 5633 5607 "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", ··· 5918 5892 return ERR_CAST(inode); 5919 5893 5920 5894 /* Do extra check against inode mode with di_type */ 5921 - if (btrfs_inode_type(inode) != di_type) { 5895 + if (unlikely(btrfs_inode_type(inode) != di_type)) { 5922 5896 btrfs_crit(fs_info, 5923 5897 "inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u", 5924 5898 inode->vfs_inode.i_mode, btrfs_inode_type(inode), ··· 6505 6479 if (!args->subvol) 6506 6480 btrfs_inherit_iflags(BTRFS_I(inode), BTRFS_I(dir)); 6507 6481 6482 + btrfs_set_inode_mapping_order(BTRFS_I(inode)); 6508 6483 if (S_ISREG(inode->i_mode)) { 6509 6484 if (btrfs_test_opt(fs_info, NODATASUM)) 6510 6485 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; ··· 6513 6486 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW | 6514 6487 BTRFS_INODE_NODATASUM; 6515 6488 btrfs_update_inode_mapping_flags(BTRFS_I(inode)); 6516 - btrfs_set_inode_mapping_order(BTRFS_I(inode)); 6517 6489 } 6518 6490 6519 6491 ret = btrfs_insert_inode_locked(inode); ··· 6559 6533 batch.total_data_size = sizes[0] + (args->orphan ? 0 : sizes[1]); 6560 6534 batch.nr = args->orphan ? 1 : 2; 6561 6535 ret = btrfs_insert_empty_items(trans, root, path, &batch); 6562 - if (ret != 0) { 6536 + if (unlikely(ret != 0)) { 6563 6537 btrfs_abort_transaction(trans, ret); 6564 6538 goto discard; 6565 6539 } ··· 6636 6610 */ 6637 6611 if (!args->subvol) { 6638 6612 ret = btrfs_init_inode_security(trans, args); 6639 - if (ret) { 6613 + if (unlikely(ret)) { 6640 6614 btrfs_abort_transaction(trans, ret); 6641 6615 goto discard; 6642 6616 } ··· 6656 6630 6657 6631 if (args->orphan) { 6658 6632 ret = btrfs_orphan_add(trans, BTRFS_I(inode)); 6659 - if (ret) { 6633 + if (unlikely(ret)) { 6660 6634 btrfs_abort_transaction(trans, ret); 6661 6635 goto discard; 6662 6636 } 6663 6637 } else { 6664 6638 ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, 6665 6639 0, BTRFS_I(inode)->dir_index); 6666 - if (ret) { 6640 + if (unlikely(ret)) { 6667 6641 btrfs_abort_transaction(trans, ret); 6668 6642 goto discard; 6669 6643 } ··· 6694 6668 */ 6695 6669 int btrfs_add_link(struct btrfs_trans_handle *trans, 6696 6670 struct btrfs_inode *parent_inode, struct btrfs_inode *inode, 6697 - const struct fscrypt_str *name, int add_backref, u64 index) 6671 + const struct fscrypt_str *name, bool add_backref, u64 index) 6698 6672 { 6699 6673 int ret = 0; 6700 6674 struct btrfs_key key; ··· 6727 6701 btrfs_inode_type(inode), index); 6728 6702 if (ret == -EEXIST || ret == -EOVERFLOW) 6729 6703 goto fail_dir_item; 6730 - else if (ret) { 6704 + else if (unlikely(ret)) { 6731 6705 btrfs_abort_transaction(trans, ret); 6732 6706 return ret; 6733 6707 } ··· 6883 6857 /* Link added now we update the inode item with the new link count. */ 6884 6858 inc_nlink(inode); 6885 6859 ret = btrfs_update_inode(trans, BTRFS_I(inode)); 6886 - if (ret) { 6860 + if (unlikely(ret)) { 6887 6861 btrfs_abort_transaction(trans, ret); 6888 6862 goto fail; 6889 6863 } ··· 6894 6868 * open(2) O_TMPFILE flag. 6895 6869 */ 6896 6870 ret = btrfs_orphan_del(trans, BTRFS_I(inode)); 6897 - if (ret) { 6871 + if (unlikely(ret)) { 6898 6872 btrfs_abort_transaction(trans, ret); 6899 6873 goto fail; 6900 6874 } ··· 7102 7076 if (extent_type == BTRFS_FILE_EXTENT_REG || 7103 7077 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 7104 7078 /* Only regular file could have regular/prealloc extent */ 7105 - if (!S_ISREG(inode->vfs_inode.i_mode)) { 7079 + if (unlikely(!S_ISREG(inode->vfs_inode.i_mode))) { 7106 7080 ret = -EUCLEAN; 7107 7081 btrfs_crit(fs_info, 7108 7082 "regular/prealloc extent found for non-regular inode %llu", ··· 7179 7153 insert: 7180 7154 ret = 0; 7181 7155 btrfs_release_path(path); 7182 - if (em->start > start || btrfs_extent_map_end(em) <= start) { 7156 + if (unlikely(em->start > start || btrfs_extent_map_end(em) <= start)) { 7183 7157 btrfs_err(fs_info, 7184 7158 "bad extent! em: [%llu %llu] passed [%llu %llu]", 7185 7159 em->start, em->len, start, len); ··· 8211 8185 btrfs_ino(BTRFS_I(old_dir)), 8212 8186 new_idx); 8213 8187 if (ret) { 8214 - if (need_abort) 8188 + if (unlikely(need_abort)) 8215 8189 btrfs_abort_transaction(trans, ret); 8216 8190 goto out_fail; 8217 8191 } ··· 8259 8233 /* src is a subvolume */ 8260 8234 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { 8261 8235 ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry); 8262 - if (ret) { 8236 + if (unlikely(ret)) { 8263 8237 btrfs_abort_transaction(trans, ret); 8264 8238 goto out_fail; 8265 8239 } ··· 8267 8241 ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), 8268 8242 BTRFS_I(old_dentry->d_inode), 8269 8243 old_name, &old_rename_ctx); 8270 - if (ret) { 8244 + if (unlikely(ret)) { 8271 8245 btrfs_abort_transaction(trans, ret); 8272 8246 goto out_fail; 8273 8247 } 8274 8248 ret = btrfs_update_inode(trans, BTRFS_I(old_inode)); 8275 - if (ret) { 8249 + if (unlikely(ret)) { 8276 8250 btrfs_abort_transaction(trans, ret); 8277 8251 goto out_fail; 8278 8252 } ··· 8281 8255 /* dest is a subvolume */ 8282 8256 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { 8283 8257 ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry); 8284 - if (ret) { 8258 + if (unlikely(ret)) { 8285 8259 btrfs_abort_transaction(trans, ret); 8286 8260 goto out_fail; 8287 8261 } ··· 8289 8263 ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir), 8290 8264 BTRFS_I(new_dentry->d_inode), 8291 8265 new_name, &new_rename_ctx); 8292 - if (ret) { 8266 + if (unlikely(ret)) { 8293 8267 btrfs_abort_transaction(trans, ret); 8294 8268 goto out_fail; 8295 8269 } 8296 8270 ret = btrfs_update_inode(trans, BTRFS_I(new_inode)); 8297 - if (ret) { 8271 + if (unlikely(ret)) { 8298 8272 btrfs_abort_transaction(trans, ret); 8299 8273 goto out_fail; 8300 8274 } ··· 8302 8276 8303 8277 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), 8304 8278 new_name, 0, old_idx); 8305 - if (ret) { 8279 + if (unlikely(ret)) { 8306 8280 btrfs_abort_transaction(trans, ret); 8307 8281 goto out_fail; 8308 8282 } 8309 8283 8310 8284 ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode), 8311 8285 old_name, 0, new_idx); 8312 - if (ret) { 8286 + if (unlikely(ret)) { 8313 8287 btrfs_abort_transaction(trans, ret); 8314 8288 goto out_fail; 8315 8289 } ··· 8550 8524 8551 8525 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { 8552 8526 ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry); 8553 - if (ret) { 8527 + if (unlikely(ret)) { 8554 8528 btrfs_abort_transaction(trans, ret); 8555 8529 goto out_fail; 8556 8530 } ··· 8558 8532 ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), 8559 8533 BTRFS_I(d_inode(old_dentry)), 8560 8534 &old_fname.disk_name, &rename_ctx); 8561 - if (ret) { 8535 + if (unlikely(ret)) { 8562 8536 btrfs_abort_transaction(trans, ret); 8563 8537 goto out_fail; 8564 8538 } 8565 8539 ret = btrfs_update_inode(trans, BTRFS_I(old_inode)); 8566 - if (ret) { 8540 + if (unlikely(ret)) { 8567 8541 btrfs_abort_transaction(trans, ret); 8568 8542 goto out_fail; 8569 8543 } ··· 8574 8548 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == 8575 8549 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 8576 8550 ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry); 8577 - if (ret) { 8551 + if (unlikely(ret)) { 8578 8552 btrfs_abort_transaction(trans, ret); 8579 8553 goto out_fail; 8580 8554 } ··· 8583 8557 ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir), 8584 8558 BTRFS_I(d_inode(new_dentry)), 8585 8559 &new_fname.disk_name); 8586 - if (ret) { 8560 + if (unlikely(ret)) { 8587 8561 btrfs_abort_transaction(trans, ret); 8588 8562 goto out_fail; 8589 8563 } ··· 8591 8565 if (new_inode->i_nlink == 0) { 8592 8566 ret = btrfs_orphan_add(trans, 8593 8567 BTRFS_I(d_inode(new_dentry))); 8594 - if (ret) { 8568 + if (unlikely(ret)) { 8595 8569 btrfs_abort_transaction(trans, ret); 8596 8570 goto out_fail; 8597 8571 } ··· 8600 8574 8601 8575 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), 8602 8576 &new_fname.disk_name, 0, index); 8603 - if (ret) { 8577 + if (unlikely(ret)) { 8604 8578 btrfs_abort_transaction(trans, ret); 8605 8579 goto out_fail; 8606 8580 } ··· 8614 8588 8615 8589 if (flags & RENAME_WHITEOUT) { 8616 8590 ret = btrfs_create_new_inode(trans, &whiteout_args); 8617 - if (ret) { 8591 + if (unlikely(ret)) { 8618 8592 btrfs_abort_transaction(trans, ret); 8619 8593 goto out_fail; 8620 8594 } else { ··· 8908 8882 goto out; 8909 8883 8910 8884 path = btrfs_alloc_path(); 8911 - if (!path) { 8885 + if (unlikely(!path)) { 8912 8886 ret = -ENOMEM; 8913 8887 btrfs_abort_transaction(trans, ret); 8914 8888 discard_new_inode(inode); ··· 8920 8894 key.offset = 0; 8921 8895 datasize = btrfs_file_extent_calc_inline_size(name_len); 8922 8896 ret = btrfs_insert_empty_item(trans, root, path, &key, datasize); 8923 - if (ret) { 8897 + if (unlikely(ret)) { 8924 8898 btrfs_abort_transaction(trans, ret); 8925 8899 btrfs_free_path(path); 8926 8900 discard_new_inode(inode); ··· 9133 9107 9134 9108 ret = btrfs_update_inode(trans, BTRFS_I(inode)); 9135 9109 9136 - if (ret) { 9110 + if (unlikely(ret)) { 9137 9111 btrfs_abort_transaction(trans, ret); 9138 9112 if (own_trans) 9139 9113 btrfs_end_transaction(trans); ··· 9301 9275 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), 9302 9276 extent_start, 0); 9303 9277 if (ret) { 9304 - if (ret > 0) { 9278 + if (unlikely(ret > 0)) { 9305 9279 /* The extent item disappeared? */ 9306 9280 return -EIO; 9307 9281 }

+42 -27

fs/btrfs/ioctl.c

··· 376 376 if (comp) { 377 377 ret = btrfs_set_prop(trans, inode, "btrfs.compression", 378 378 comp, strlen(comp), 0); 379 - if (ret) { 379 + if (unlikely(ret)) { 380 380 btrfs_abort_transaction(trans, ret); 381 381 goto out_end_trans; 382 382 } 383 383 } else { 384 384 ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0); 385 - if (ret && ret != -ENODATA) { 385 + if (unlikely(ret && ret != -ENODATA)) { 386 386 btrfs_abort_transaction(trans, ret); 387 387 goto out_end_trans; 388 388 } ··· 633 633 btrfs_clear_buffer_dirty(trans, leaf); 634 634 btrfs_tree_unlock(leaf); 635 635 ret2 = btrfs_free_tree_block(trans, objectid, leaf, 0, 1); 636 - if (ret2 < 0) 636 + if (unlikely(ret2 < 0)) 637 637 btrfs_abort_transaction(trans, ret2); 638 638 free_extent_buffer(leaf); 639 639 goto out; ··· 654 654 /* ... and new_root is owned by new_inode_args.inode now. */ 655 655 656 656 ret = btrfs_record_root_in_trans(trans, new_root); 657 - if (ret) { 657 + if (unlikely(ret)) { 658 658 btrfs_abort_transaction(trans, ret); 659 659 goto out; 660 660 } 661 661 662 662 ret = btrfs_uuid_tree_add(trans, root_item->uuid, 663 663 BTRFS_UUID_KEY_SUBVOL, objectid); 664 - if (ret) { 664 + if (unlikely(ret)) { 665 665 btrfs_abort_transaction(trans, ret); 666 666 goto out; 667 667 } ··· 669 669 btrfs_record_new_subvolume(trans, BTRFS_I(dir)); 670 670 671 671 ret = btrfs_create_new_inode(trans, &new_inode_args); 672 - if (ret) { 672 + if (unlikely(ret)) { 673 673 btrfs_abort_transaction(trans, ret); 674 674 goto out; 675 675 } ··· 957 957 958 958 /* 959 959 * Force new buffered writes to reserve space even when NOCOW is 960 - * possible. This is to avoid later writeback (running dealloc) to 960 + * possible. This is to avoid later writeback (running delalloc) to 961 961 * fallback to COW mode and unexpectedly fail with ENOSPC. 962 962 */ 963 963 btrfs_drew_read_lock(&root->snapshot_lock); ··· 1251 1251 } 1252 1252 1253 1253 static noinline int btrfs_ioctl_snap_create(struct file *file, 1254 - void __user *arg, int subvol) 1254 + void __user *arg, bool subvol) 1255 1255 { 1256 1256 struct btrfs_ioctl_vol_args *vol_args; 1257 1257 int ret; ··· 2133 2133 ret = btrfs_next_leaf(fs_info->tree_root, path); 2134 2134 if (ret < 0) { 2135 2135 goto out; 2136 - } else if (ret > 0) { 2136 + } else if (unlikely(ret > 0)) { 2137 2137 ret = -EUCLEAN; 2138 2138 goto out; 2139 2139 } ··· 2216 2216 ret = btrfs_next_leaf(root, path); 2217 2217 if (ret < 0) { 2218 2218 goto out; 2219 - } else if (ret > 0) { 2219 + } else if (unlikely(ret > 0)) { 2220 2220 ret = -EUCLEAN; 2221 2221 goto out; 2222 2222 } ··· 2245 2245 ret = btrfs_next_item(root, path); 2246 2246 if (ret < 0) { 2247 2247 goto out; 2248 - } else if (ret > 0) { 2248 + } else if (unlikely(ret > 0)) { 2249 2249 ret = -EUCLEAN; 2250 2250 goto out; 2251 2251 } ··· 4008 4008 ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid, 4009 4009 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4010 4010 btrfs_root_id(root)); 4011 - if (ret && ret != -ENOENT) { 4011 + if (unlikely(ret && ret != -ENOENT)) { 4012 4012 btrfs_abort_transaction(trans, ret); 4013 4013 btrfs_end_transaction(trans); 4014 4014 goto out; ··· 4032 4032 ret = btrfs_uuid_tree_add(trans, sa->uuid, 4033 4033 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4034 4034 btrfs_root_id(root)); 4035 - if (ret < 0 && ret != -EEXIST) { 4035 + if (unlikely(ret < 0 && ret != -EEXIST)) { 4036 4036 btrfs_abort_transaction(trans, ret); 4037 4037 btrfs_end_transaction(trans); 4038 4038 goto out; ··· 4418 4418 goto out_acct; 4419 4419 } 4420 4420 4421 + if (fs_info->sectorsize > PAGE_SIZE) { 4422 + ret = -ENOTTY; 4423 + goto out_acct; 4424 + } 4421 4425 if (compat) { 4422 4426 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 4423 4427 struct btrfs_ioctl_encoded_io_args_32 args32; ··· 4513 4509 4514 4510 static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat) 4515 4511 { 4512 + struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode); 4516 4513 struct btrfs_ioctl_encoded_io_args args; 4517 4514 struct iovec iovstack[UIO_FASTIOV]; 4518 4515 struct iovec *iov = iovstack; ··· 4524 4519 4525 4520 if (!capable(CAP_SYS_ADMIN)) { 4526 4521 ret = -EPERM; 4522 + goto out_acct; 4523 + } 4524 + 4525 + if (fs_info->sectorsize > PAGE_SIZE) { 4526 + ret = -ENOTTY; 4527 4527 goto out_acct; 4528 4528 } 4529 4529 ··· 4790 4780 4791 4781 static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue_flags) 4792 4782 { 4783 + struct file *file = cmd->file; 4784 + struct btrfs_inode *inode = BTRFS_I(file->f_inode); 4785 + struct extent_io_tree *io_tree = &inode->io_tree; 4786 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 4793 4787 size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags); 4794 4788 size_t copy_end; 4795 4789 int ret; 4796 4790 u64 disk_bytenr, disk_io_size; 4797 - struct file *file; 4798 - struct btrfs_inode *inode; 4799 - struct btrfs_fs_info *fs_info; 4800 - struct extent_io_tree *io_tree; 4801 4791 loff_t pos; 4802 4792 struct kiocb kiocb; 4803 4793 struct extent_state *cached_state = NULL; ··· 4813 4803 ret = -EPERM; 4814 4804 goto out_acct; 4815 4805 } 4816 - file = cmd->file; 4817 - inode = BTRFS_I(file->f_inode); 4818 - fs_info = inode->root->fs_info; 4819 - io_tree = &inode->io_tree; 4806 + if (fs_info->sectorsize > PAGE_SIZE) { 4807 + ret = -ENOTTY; 4808 + goto out_acct; 4809 + } 4810 + 4820 4811 sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); 4821 4812 4822 4813 if (issue_flags & IO_URING_F_COMPAT) { ··· 4944 4933 4945 4934 static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags) 4946 4935 { 4936 + struct file *file = cmd->file; 4937 + struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode); 4947 4938 loff_t pos; 4948 4939 struct kiocb kiocb; 4949 - struct file *file; 4950 4940 ssize_t ret; 4951 4941 void __user *sqe_addr; 4952 4942 struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd); ··· 4960 4948 ret = -EPERM; 4961 4949 goto out_acct; 4962 4950 } 4951 + if (fs_info->sectorsize > PAGE_SIZE) { 4952 + ret = -ENOTTY; 4953 + goto out_acct; 4954 + } 4963 4955 4964 - file = cmd->file; 4965 4956 sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); 4966 4957 4967 4958 if (!(file->f_mode & FMODE_WRITE)) { ··· 5238 5223 case FITRIM: 5239 5224 return btrfs_ioctl_fitrim(fs_info, argp); 5240 5225 case BTRFS_IOC_SNAP_CREATE: 5241 - return btrfs_ioctl_snap_create(file, argp, 0); 5226 + return btrfs_ioctl_snap_create(file, argp, false); 5242 5227 case BTRFS_IOC_SNAP_CREATE_V2: 5243 - return btrfs_ioctl_snap_create_v2(file, argp, 0); 5228 + return btrfs_ioctl_snap_create_v2(file, argp, false); 5244 5229 case BTRFS_IOC_SUBVOL_CREATE: 5245 - return btrfs_ioctl_snap_create(file, argp, 1); 5230 + return btrfs_ioctl_snap_create(file, argp, true); 5246 5231 case BTRFS_IOC_SUBVOL_CREATE_V2: 5247 - return btrfs_ioctl_snap_create_v2(file, argp, 1); 5232 + return btrfs_ioctl_snap_create_v2(file, argp, true); 5248 5233 case BTRFS_IOC_SNAP_DESTROY: 5249 5234 return btrfs_ioctl_snap_destroy(file, argp, false); 5250 5235 case BTRFS_IOC_SNAP_DESTROY_V2:

+1 -1

fs/btrfs/locking.c

··· 361 361 atomic_inc(&lock->readers); 362 362 363 363 /* 364 - * Ensure the pending reader count is perceieved BEFORE this reader 364 + * Ensure the pending reader count is perceived BEFORE this reader 365 365 * goes to sleep in case of active writers. This guarantees new writers 366 366 * won't be allowed and that the current reader will be woken up when 367 367 * the last active writer finishes its jobs.

+1 -1

fs/btrfs/locking.h

··· 74 74 BTRFS_NESTING_NEW_ROOT, 75 75 76 76 /* 77 - * We are limited to MAX_LOCKDEP_SUBLCLASSES number of subclasses, so 77 + * We are limited to MAX_LOCKDEP_SUBCLASSES number of subclasses, so 78 78 * add this in here and add a static_assert to keep us from going over 79 79 * the limit. As of this writing we're limited to 8, and we're 80 80 * definitely using 8, hence this check to keep us from messing up in

+51 -42

fs/btrfs/lzo.c

··· 58 58 * 0x1000 | SegHdr N+1| Data payload N+1 ... | 59 59 */ 60 60 61 - #define WORKSPACE_BUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE)) 62 - #define WORKSPACE_CBUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE)) 63 - 64 61 struct workspace { 65 62 void *mem; 66 63 void *buf; /* where decompressed data goes */ ··· 65 68 struct list_head list; 66 69 }; 67 70 68 - static struct workspace_manager wsm; 71 + static u32 workspace_buf_length(const struct btrfs_fs_info *fs_info) 72 + { 73 + return lzo1x_worst_compress(fs_info->sectorsize); 74 + } 75 + static u32 workspace_cbuf_length(const struct btrfs_fs_info *fs_info) 76 + { 77 + return lzo1x_worst_compress(fs_info->sectorsize); 78 + } 69 79 70 80 void lzo_free_workspace(struct list_head *ws) 71 81 { ··· 84 80 kfree(workspace); 85 81 } 86 82 87 - struct list_head *lzo_alloc_workspace(void) 83 + struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info) 88 84 { 89 85 struct workspace *workspace; 90 86 ··· 93 89 return ERR_PTR(-ENOMEM); 94 90 95 91 workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN); 96 - workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL | __GFP_NOWARN); 97 - workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL | __GFP_NOWARN); 92 + workspace->buf = kvmalloc(workspace_buf_length(fs_info), GFP_KERNEL | __GFP_NOWARN); 93 + workspace->cbuf = kvmalloc(workspace_cbuf_length(fs_info), GFP_KERNEL | __GFP_NOWARN); 98 94 if (!workspace->mem || !workspace->buf || !workspace->cbuf) 99 95 goto fail; 100 96 ··· 132 128 * 133 129 * Will allocate new pages when needed. 134 130 */ 135 - static int copy_compressed_data_to_page(char *compressed_data, 131 + static int copy_compressed_data_to_page(struct btrfs_fs_info *fs_info, 132 + char *compressed_data, 136 133 size_t compressed_size, 137 134 struct folio **out_folios, 138 135 unsigned long max_nr_folio, 139 - u32 *cur_out, 140 - const u32 sectorsize) 136 + u32 *cur_out) 141 137 { 138 + const u32 sectorsize = fs_info->sectorsize; 139 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 142 140 u32 sector_bytes_left; 143 141 u32 orig_out; 144 142 struct folio *cur_folio; 145 143 char *kaddr; 146 144 147 - if ((*cur_out / PAGE_SIZE) >= max_nr_folio) 145 + if ((*cur_out >> min_folio_shift) >= max_nr_folio) 148 146 return -E2BIG; 149 147 150 148 /* ··· 155 149 */ 156 150 ASSERT((*cur_out / sectorsize) == (*cur_out + LZO_LEN - 1) / sectorsize); 157 151 158 - cur_folio = out_folios[*cur_out / PAGE_SIZE]; 152 + cur_folio = out_folios[*cur_out >> min_folio_shift]; 159 153 /* Allocate a new page */ 160 154 if (!cur_folio) { 161 - cur_folio = btrfs_alloc_compr_folio(); 155 + cur_folio = btrfs_alloc_compr_folio(fs_info); 162 156 if (!cur_folio) 163 157 return -ENOMEM; 164 - out_folios[*cur_out / PAGE_SIZE] = cur_folio; 158 + out_folios[*cur_out >> min_folio_shift] = cur_folio; 165 159 } 166 160 167 - kaddr = kmap_local_folio(cur_folio, 0); 168 - write_compress_length(kaddr + offset_in_page(*cur_out), 169 - compressed_size); 161 + kaddr = kmap_local_folio(cur_folio, offset_in_folio(cur_folio, *cur_out)); 162 + write_compress_length(kaddr, compressed_size); 170 163 *cur_out += LZO_LEN; 171 164 172 165 orig_out = *cur_out; ··· 177 172 178 173 kunmap_local(kaddr); 179 174 180 - if ((*cur_out / PAGE_SIZE) >= max_nr_folio) 175 + if ((*cur_out >> min_folio_shift) >= max_nr_folio) 181 176 return -E2BIG; 182 177 183 - cur_folio = out_folios[*cur_out / PAGE_SIZE]; 178 + cur_folio = out_folios[*cur_out >> min_folio_shift]; 184 179 /* Allocate a new page */ 185 180 if (!cur_folio) { 186 - cur_folio = btrfs_alloc_compr_folio(); 181 + cur_folio = btrfs_alloc_compr_folio(fs_info); 187 182 if (!cur_folio) 188 183 return -ENOMEM; 189 - out_folios[*cur_out / PAGE_SIZE] = cur_folio; 184 + out_folios[*cur_out >> min_folio_shift] = cur_folio; 190 185 } 191 186 kaddr = kmap_local_folio(cur_folio, 0); 192 187 193 - memcpy(kaddr + offset_in_page(*cur_out), 188 + memcpy(kaddr + offset_in_folio(cur_folio, *cur_out), 194 189 compressed_data + *cur_out - orig_out, copy_len); 195 190 196 191 *cur_out += copy_len; ··· 214 209 return 0; 215 210 } 216 211 217 - int lzo_compress_folios(struct list_head *ws, struct address_space *mapping, 212 + int lzo_compress_folios(struct list_head *ws, struct btrfs_inode *inode, 218 213 u64 start, struct folio **folios, unsigned long *out_folios, 219 214 unsigned long *total_in, unsigned long *total_out) 220 215 { 216 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 221 217 struct workspace *workspace = list_entry(ws, struct workspace, list); 222 - const u32 sectorsize = inode_to_fs_info(mapping->host)->sectorsize; 218 + const u32 sectorsize = fs_info->sectorsize; 219 + const u32 min_folio_size = btrfs_min_folio_size(fs_info); 220 + struct address_space *mapping = inode->vfs_inode.i_mapping; 223 221 struct folio *folio_in = NULL; 224 222 char *sizes_ptr; 225 223 const unsigned long max_nr_folio = *out_folios; ··· 271 263 goto out; 272 264 } 273 265 274 - ret = copy_compressed_data_to_page(workspace->cbuf, out_len, 266 + ret = copy_compressed_data_to_page(fs_info, workspace->cbuf, out_len, 275 267 folios, max_nr_folio, 276 - &cur_out, sectorsize); 268 + &cur_out); 277 269 if (ret < 0) 278 270 goto out; 279 271 ··· 288 280 goto out; 289 281 } 290 282 291 - /* Check if we have reached page boundary */ 292 - if (PAGE_ALIGNED(cur_in)) { 283 + /* Check if we have reached folio boundary. */ 284 + if (IS_ALIGNED(cur_in, min_folio_size)) { 293 285 folio_put(folio_in); 294 286 folio_in = NULL; 295 287 } ··· 306 298 out: 307 299 if (folio_in) 308 300 folio_put(folio_in); 309 - *out_folios = DIV_ROUND_UP(cur_out, PAGE_SIZE); 301 + *out_folios = DIV_ROUND_UP(cur_out, min_folio_size); 310 302 return ret; 311 303 } 312 304 ··· 318 310 static void copy_compressed_segment(struct compressed_bio *cb, 319 311 char *dest, u32 len, u32 *cur_in) 320 312 { 313 + struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); 314 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 321 315 u32 orig_in = *cur_in; 322 316 323 317 while (*cur_in < orig_in + len) { 324 - struct folio *cur_folio; 325 - u32 copy_len = min_t(u32, PAGE_SIZE - offset_in_page(*cur_in), 326 - orig_in + len - *cur_in); 318 + struct folio *cur_folio = cb->compressed_folios[*cur_in >> min_folio_shift]; 319 + u32 copy_len = min_t(u32, orig_in + len - *cur_in, 320 + folio_size(cur_folio) - offset_in_folio(cur_folio, *cur_in)); 327 321 328 322 ASSERT(copy_len); 329 - cur_folio = cb->compressed_folios[*cur_in / PAGE_SIZE]; 330 323 331 324 memcpy_from_folio(dest + *cur_in - orig_in, cur_folio, 332 325 offset_in_folio(cur_folio, *cur_in), copy_len); ··· 341 332 struct workspace *workspace = list_entry(ws, struct workspace, list); 342 333 const struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info; 343 334 const u32 sectorsize = fs_info->sectorsize; 335 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 344 336 char *kaddr; 345 337 int ret; 346 338 /* Compressed data length, can be unaligned */ ··· 388 378 */ 389 379 ASSERT(cur_in / sectorsize == 390 380 (cur_in + LZO_LEN - 1) / sectorsize); 391 - cur_folio = cb->compressed_folios[cur_in / PAGE_SIZE]; 381 + cur_folio = cb->compressed_folios[cur_in >> min_folio_shift]; 392 382 ASSERT(cur_folio); 393 383 kaddr = kmap_local_folio(cur_folio, 0); 394 - seg_len = read_compress_length(kaddr + offset_in_page(cur_in)); 384 + seg_len = read_compress_length(kaddr + offset_in_folio(cur_folio, cur_in)); 395 385 kunmap_local(kaddr); 396 386 cur_in += LZO_LEN; 397 387 398 - if (unlikely(seg_len > WORKSPACE_CBUF_LENGTH)) { 388 + if (unlikely(seg_len > workspace_cbuf_length(fs_info))) { 399 389 struct btrfs_inode *inode = cb->bbio.inode; 400 390 401 391 /* ··· 455 445 const u32 sectorsize = fs_info->sectorsize; 456 446 size_t in_len; 457 447 size_t out_len; 458 - size_t max_segment_len = WORKSPACE_BUF_LENGTH; 448 + size_t max_segment_len = workspace_buf_length(fs_info); 459 449 int ret = 0; 460 450 461 - if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2) 451 + if (unlikely(srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)) 462 452 return -EUCLEAN; 463 453 464 454 in_len = read_compress_length(data_in); 465 - if (in_len != srclen) 455 + if (unlikely(in_len != srclen)) 466 456 return -EUCLEAN; 467 457 data_in += LZO_LEN; 468 458 469 459 in_len = read_compress_length(data_in); 470 - if (in_len != srclen - LZO_LEN * 2) { 460 + if (unlikely(in_len != srclen - LZO_LEN * 2)) { 471 461 ret = -EUCLEAN; 472 462 goto out; 473 463 } ··· 497 487 return ret; 498 488 } 499 489 500 - const struct btrfs_compress_op btrfs_lzo_compress = { 501 - .workspace_manager = &wsm, 490 + const struct btrfs_compress_levels btrfs_lzo_compress = { 502 491 .max_level = 1, 503 492 .default_level = 1, 504 493 };

+1

fs/btrfs/messages.c

··· 18 18 [BTRFS_FS_STATE_REMOUNTING] = 'M', 19 19 [BTRFS_FS_STATE_RO] = 0, 20 20 [BTRFS_FS_STATE_TRANS_ABORTED] = 'A', 21 + [BTRFS_FS_STATE_LOG_REPLAY_ABORTED] = 'O', 21 22 [BTRFS_FS_STATE_DEV_REPLACING] = 'R', 22 23 [BTRFS_FS_STATE_DUMMY_FS_INFO] = 0, 23 24 [BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C',

-1

fs/btrfs/messages.h

··· 4 4 #define BTRFS_MESSAGES_H 5 5 6 6 #include <linux/types.h> 7 - #include <linux/types.h> 8 7 #include <linux/printk.h> 9 8 #include <linux/bug.h> 10 9

+49

fs/btrfs/misc.h

··· 11 11 #include <linux/pagemap.h> 12 12 #include <linux/math64.h> 13 13 #include <linux/rbtree.h> 14 + #include <linux/bio.h> 14 15 15 16 /* 16 17 * Enumerate bits using enum autoincrement. Define the @name as the n-th bit. ··· 20 19 __ ## name ## _BIT, \ 21 20 name = (1U << __ ## name ## _BIT), \ 22 21 __ ## name ## _SEQ = __ ## name ## _BIT 22 + 23 + static inline phys_addr_t bio_iter_phys(struct bio *bio, struct bvec_iter *iter) 24 + { 25 + struct bio_vec bv = bio_iter_iovec(bio, *iter); 26 + 27 + return bvec_phys(&bv); 28 + } 29 + 30 + /* 31 + * Iterate bio using btrfs block size. 32 + * 33 + * This will handle large folio and highmem. 34 + * 35 + * @paddr: Physical memory address of each iteration 36 + * @bio: The bio to iterate 37 + * @iter: The bvec_iter (pointer) to use. 38 + * @blocksize: The blocksize to iterate. 39 + * 40 + * This requires all folios in the bio to cover at least one block. 41 + */ 42 + #define btrfs_bio_for_each_block(paddr, bio, iter, blocksize) \ 43 + for (; (iter)->bi_size && \ 44 + (paddr = bio_iter_phys((bio), (iter)), 1); \ 45 + bio_advance_iter_single((bio), (iter), (blocksize))) 46 + 47 + /* Initialize a bvec_iter to the size of the specified bio. */ 48 + static inline struct bvec_iter init_bvec_iter_for_bio(struct bio *bio) 49 + { 50 + struct bio_vec *bvec; 51 + u32 bio_size = 0; 52 + int i; 53 + 54 + bio_for_each_bvec_all(bvec, bio, i) 55 + bio_size += bvec->bv_len; 56 + 57 + return (struct bvec_iter) { 58 + .bi_sector = 0, 59 + .bi_size = bio_size, 60 + .bi_idx = 0, 61 + .bi_bvec_done = 0, 62 + }; 63 + } 64 + 65 + #define btrfs_bio_for_each_block_all(paddr, bio, blocksize) \ 66 + for (struct bvec_iter iter = init_bvec_iter_for_bio(bio); \ 67 + (iter).bi_size && \ 68 + (paddr = bio_iter_phys((bio), &(iter)), 1); \ 69 + bio_advance_iter_single((bio), &(iter), (blocksize))) 23 70 24 71 static inline void cond_wake_up(struct wait_queue_head *wq) 25 72 {

+222 -34

fs/btrfs/print-tree.c

··· 6 6 #include "messages.h" 7 7 #include "ctree.h" 8 8 #include "disk-io.h" 9 + #include "file-item.h" 9 10 #include "print-tree.h" 10 11 #include "accessors.h" 11 12 #include "tree-checker.h" 12 13 #include "volumes.h" 13 14 #include "raid-stripe-tree.h" 15 + 16 + /* 17 + * Large enough buffer size for the stringification of any key type yet short 18 + * enough to use the stack and avoid allocations. 19 + */ 20 + #define KEY_TYPE_BUF_SIZE 32 14 21 15 22 struct root_name_map { 16 23 u64 id; ··· 234 227 #endif 235 228 } 236 229 230 + static void print_timespec(const struct extent_buffer *eb, 231 + struct btrfs_timespec *timespec, 232 + const char *prefix, const char *suffix) 233 + { 234 + const u64 secs = btrfs_timespec_sec(eb, timespec); 235 + const u32 nsecs = btrfs_timespec_nsec(eb, timespec); 236 + 237 + pr_info("%s%llu.%u%s", prefix, secs, nsecs, suffix); 238 + } 239 + 240 + static void print_inode_item(const struct extent_buffer *eb, int i) 241 + { 242 + struct btrfs_inode_item *ii = btrfs_item_ptr(eb, i, struct btrfs_inode_item); 243 + 244 + pr_info("\t\tinode generation %llu transid %llu size %llu nbytes %llu\n", 245 + btrfs_inode_generation(eb, ii), btrfs_inode_transid(eb, ii), 246 + btrfs_inode_size(eb, ii), btrfs_inode_nbytes(eb, ii)); 247 + pr_info("\t\tblock group %llu mode %o links %u uid %u gid %u\n", 248 + btrfs_inode_block_group(eb, ii), btrfs_inode_mode(eb, ii), 249 + btrfs_inode_nlink(eb, ii), btrfs_inode_uid(eb, ii), 250 + btrfs_inode_gid(eb, ii)); 251 + pr_info("\t\trdev %llu sequence %llu flags 0x%llx\n", 252 + btrfs_inode_rdev(eb, ii), btrfs_inode_sequence(eb, ii), 253 + btrfs_inode_flags(eb, ii)); 254 + print_timespec(eb, &ii->atime, "\t\tatime ", "\n"); 255 + print_timespec(eb, &ii->ctime, "\t\tctime ", "\n"); 256 + print_timespec(eb, &ii->mtime, "\t\tmtime ", "\n"); 257 + print_timespec(eb, &ii->otime, "\t\totime ", "\n"); 258 + } 259 + 260 + static void print_dir_item(const struct extent_buffer *eb, int i) 261 + { 262 + const u32 size = btrfs_item_size(eb, i); 263 + struct btrfs_dir_item *di = btrfs_item_ptr(eb, i, struct btrfs_dir_item); 264 + u32 cur = 0; 265 + 266 + while (cur < size) { 267 + const u32 name_len = btrfs_dir_name_len(eb, di); 268 + const u32 data_len = btrfs_dir_data_len(eb, di); 269 + const u32 len = sizeof(*di) + name_len + data_len; 270 + struct btrfs_key location; 271 + 272 + btrfs_dir_item_key_to_cpu(eb, di, &location); 273 + pr_info("\t\tlocation key (%llu %u %llu) type %d\n", 274 + location.objectid, location.type, location.offset, 275 + btrfs_dir_ftype(eb, di)); 276 + pr_info("\t\ttransid %llu data_len %u name_len %u\n", 277 + btrfs_dir_transid(eb, di), data_len, name_len); 278 + di = (struct btrfs_dir_item *)((char *)di + len); 279 + cur += len; 280 + } 281 + } 282 + 283 + static void print_inode_ref_item(const struct extent_buffer *eb, int i) 284 + { 285 + const u32 size = btrfs_item_size(eb, i); 286 + struct btrfs_inode_ref *ref = btrfs_item_ptr(eb, i, struct btrfs_inode_ref); 287 + u32 cur = 0; 288 + 289 + while (cur < size) { 290 + const u64 index = btrfs_inode_ref_index(eb, ref); 291 + const u32 name_len = btrfs_inode_ref_name_len(eb, ref); 292 + const u32 len = sizeof(*ref) + name_len; 293 + 294 + pr_info("\t\tindex %llu name_len %u\n", index, name_len); 295 + ref = (struct btrfs_inode_ref *)((char *)ref + len); 296 + cur += len; 297 + } 298 + } 299 + 300 + static void print_inode_extref_item(const struct extent_buffer *eb, int i) 301 + { 302 + const u32 size = btrfs_item_size(eb, i); 303 + struct btrfs_inode_extref *extref; 304 + u32 cur = 0; 305 + 306 + extref = btrfs_item_ptr(eb, i, struct btrfs_inode_extref); 307 + while (cur < size) { 308 + const u64 index = btrfs_inode_extref_index(eb, extref); 309 + const u32 name_len = btrfs_inode_extref_name_len(eb, extref); 310 + const u64 parent = btrfs_inode_extref_parent(eb, extref); 311 + const u32 len = sizeof(*extref) + name_len; 312 + 313 + pr_info("\t\tindex %llu parent %llu name_len %u\n", 314 + index, parent, name_len); 315 + extref = (struct btrfs_inode_extref *)((char *)extref + len); 316 + cur += len; 317 + } 318 + } 319 + 320 + static void print_dir_log_index_item(const struct extent_buffer *eb, int i) 321 + { 322 + struct btrfs_dir_log_item *dlog; 323 + 324 + dlog = btrfs_item_ptr(eb, i, struct btrfs_dir_log_item); 325 + pr_info("\t\tdir log end %llu\n", btrfs_dir_log_end(eb, dlog)); 326 + } 327 + 328 + static void print_extent_csum(const struct extent_buffer *eb, int i) 329 + { 330 + const struct btrfs_fs_info *fs_info = eb->fs_info; 331 + const u32 size = btrfs_item_size(eb, i); 332 + const u32 csum_bytes = (size / fs_info->csum_size) * fs_info->sectorsize; 333 + struct btrfs_key key; 334 + 335 + btrfs_item_key_to_cpu(eb, &key, i); 336 + pr_info("\t\trange start %llu end %llu length %u\n", 337 + key.offset, key.offset + csum_bytes, csum_bytes); 338 + } 339 + 340 + static void print_file_extent_item(const struct extent_buffer *eb, int i) 341 + { 342 + struct btrfs_file_extent_item *fi; 343 + 344 + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 345 + pr_info("\t\tgeneration %llu type %hhu\n", 346 + btrfs_file_extent_generation(eb, fi), 347 + btrfs_file_extent_type(eb, fi)); 348 + 349 + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) { 350 + pr_info("\t\tinline extent data size %u ram_bytes %llu compression %hhu\n", 351 + btrfs_file_extent_inline_item_len(eb, i), 352 + btrfs_file_extent_ram_bytes(eb, fi), 353 + btrfs_file_extent_compression(eb, fi)); 354 + return; 355 + } 356 + 357 + pr_info("\t\textent data disk bytenr %llu nr %llu\n", 358 + btrfs_file_extent_disk_bytenr(eb, fi), 359 + btrfs_file_extent_disk_num_bytes(eb, fi)); 360 + pr_info("\t\textent data offset %llu nr %llu ram %llu\n", 361 + btrfs_file_extent_offset(eb, fi), 362 + btrfs_file_extent_num_bytes(eb, fi), 363 + btrfs_file_extent_ram_bytes(eb, fi)); 364 + pr_info("\t\textent compression %hhu\n", 365 + btrfs_file_extent_compression(eb, fi)); 366 + } 367 + 368 + static void key_type_string(const struct btrfs_key *key, char *buf, int buf_size) 369 + { 370 + static const char *key_to_str[256] = { 371 + [BTRFS_INODE_ITEM_KEY] = "INODE_ITEM", 372 + [BTRFS_INODE_REF_KEY] = "INODE_REF", 373 + [BTRFS_INODE_EXTREF_KEY] = "INODE_EXTREF", 374 + [BTRFS_DIR_ITEM_KEY] = "DIR_ITEM", 375 + [BTRFS_DIR_INDEX_KEY] = "DIR_INDEX", 376 + [BTRFS_DIR_LOG_ITEM_KEY] = "DIR_LOG_ITEM", 377 + [BTRFS_DIR_LOG_INDEX_KEY] = "DIR_LOG_INDEX", 378 + [BTRFS_XATTR_ITEM_KEY] = "XATTR_ITEM", 379 + [BTRFS_VERITY_DESC_ITEM_KEY] = "VERITY_DESC_ITEM", 380 + [BTRFS_VERITY_MERKLE_ITEM_KEY] = "VERITY_MERKLE_ITEM", 381 + [BTRFS_ORPHAN_ITEM_KEY] = "ORPHAN_ITEM", 382 + [BTRFS_ROOT_ITEM_KEY] = "ROOT_ITEM", 383 + [BTRFS_ROOT_REF_KEY] = "ROOT_REF", 384 + [BTRFS_ROOT_BACKREF_KEY] = "ROOT_BACKREF", 385 + [BTRFS_EXTENT_ITEM_KEY] = "EXTENT_ITEM", 386 + [BTRFS_METADATA_ITEM_KEY] = "METADATA_ITEM", 387 + [BTRFS_TREE_BLOCK_REF_KEY] = "TREE_BLOCK_REF", 388 + [BTRFS_SHARED_BLOCK_REF_KEY] = "SHARED_BLOCK_REF", 389 + [BTRFS_EXTENT_DATA_REF_KEY] = "EXTENT_DATA_REF", 390 + [BTRFS_SHARED_DATA_REF_KEY] = "SHARED_DATA_REF", 391 + [BTRFS_EXTENT_OWNER_REF_KEY] = "EXTENT_OWNER_REF", 392 + [BTRFS_EXTENT_CSUM_KEY] = "EXTENT_CSUM", 393 + [BTRFS_EXTENT_DATA_KEY] = "EXTENT_DATA", 394 + [BTRFS_BLOCK_GROUP_ITEM_KEY] = "BLOCK_GROUP_ITEM", 395 + [BTRFS_FREE_SPACE_INFO_KEY] = "FREE_SPACE_INFO", 396 + [BTRFS_FREE_SPACE_EXTENT_KEY] = "FREE_SPACE_EXTENT", 397 + [BTRFS_FREE_SPACE_BITMAP_KEY] = "FREE_SPACE_BITMAP", 398 + [BTRFS_CHUNK_ITEM_KEY] = "CHUNK_ITEM", 399 + [BTRFS_DEV_ITEM_KEY] = "DEV_ITEM", 400 + [BTRFS_DEV_EXTENT_KEY] = "DEV_EXTENT", 401 + [BTRFS_TEMPORARY_ITEM_KEY] = "TEMPORARY_ITEM", 402 + [BTRFS_DEV_REPLACE_KEY] = "DEV_REPLACE", 403 + [BTRFS_STRING_ITEM_KEY] = "STRING_ITEM", 404 + [BTRFS_QGROUP_STATUS_KEY] = "QGROUP_STATUS", 405 + [BTRFS_QGROUP_RELATION_KEY] = "QGROUP_RELATION", 406 + [BTRFS_QGROUP_INFO_KEY] = "QGROUP_INFO", 407 + [BTRFS_QGROUP_LIMIT_KEY] = "QGROUP_LIMIT", 408 + [BTRFS_PERSISTENT_ITEM_KEY] = "PERSISTENT_ITEM", 409 + [BTRFS_UUID_KEY_SUBVOL] = "UUID_KEY_SUBVOL", 410 + [BTRFS_UUID_KEY_RECEIVED_SUBVOL] = "UUID_KEY_RECEIVED_SUBVOL", 411 + [BTRFS_RAID_STRIPE_KEY] = "RAID_STRIPE", 412 + }; 413 + 414 + if (key->type == 0 && key->objectid == BTRFS_FREE_SPACE_OBJECTID) 415 + scnprintf(buf, buf_size, "UNTYPED"); 416 + else if (key_to_str[key->type]) 417 + scnprintf(buf, buf_size, key_to_str[key->type]); 418 + else 419 + scnprintf(buf, buf_size, "UNKNOWN.%d", key->type); 420 + } 421 + 237 422 void btrfs_print_leaf(const struct extent_buffer *l) 238 423 { 239 424 struct btrfs_fs_info *fs_info; 240 425 int i; 241 426 u32 type, nr; 242 427 struct btrfs_root_item *ri; 243 - struct btrfs_dir_item *di; 244 - struct btrfs_inode_item *ii; 245 428 struct btrfs_block_group_item *bi; 246 - struct btrfs_file_extent_item *fi; 247 429 struct btrfs_extent_data_ref *dref; 248 430 struct btrfs_shared_data_ref *sref; 249 431 struct btrfs_dev_extent *dev_extent; 250 432 struct btrfs_key key; 251 - struct btrfs_key found_key; 252 433 253 434 if (!l) 254 435 return; ··· 450 255 btrfs_leaf_free_space(l), btrfs_header_owner(l)); 451 256 print_eb_refs_lock(l); 452 257 for (i = 0 ; i < nr ; i++) { 258 + char key_buf[KEY_TYPE_BUF_SIZE]; 259 + 453 260 btrfs_item_key_to_cpu(l, &key, i); 454 261 type = key.type; 455 - pr_info("\titem %d key (%llu %u %llu) itemoff %d itemsize %d\n", 456 - i, key.objectid, type, key.offset, 262 + key_type_string(&key, key_buf, KEY_TYPE_BUF_SIZE); 263 + 264 + pr_info("\titem %d key (%llu %s %llu) itemoff %d itemsize %d\n", 265 + i, key.objectid, key_buf, key.offset, 457 266 btrfs_item_offset(l, i), btrfs_item_size(l, i)); 458 267 switch (type) { 459 268 case BTRFS_INODE_ITEM_KEY: 460 - ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); 461 - pr_info("\t\tinode generation %llu size %llu mode %o\n", 462 - btrfs_inode_generation(l, ii), 463 - btrfs_inode_size(l, ii), 464 - btrfs_inode_mode(l, ii)); 269 + print_inode_item(l, i); 270 + break; 271 + case BTRFS_INODE_REF_KEY: 272 + print_inode_ref_item(l, i); 273 + break; 274 + case BTRFS_INODE_EXTREF_KEY: 275 + print_inode_extref_item(l, i); 465 276 break; 466 277 case BTRFS_DIR_ITEM_KEY: 467 - di = btrfs_item_ptr(l, i, struct btrfs_dir_item); 468 - btrfs_dir_item_key_to_cpu(l, di, &found_key); 469 - pr_info("\t\tdir oid %llu flags %u\n", 470 - found_key.objectid, 471 - btrfs_dir_flags(l, di)); 278 + case BTRFS_DIR_INDEX_KEY: 279 + case BTRFS_XATTR_ITEM_KEY: 280 + print_dir_item(l, i); 281 + break; 282 + case BTRFS_DIR_LOG_INDEX_KEY: 283 + print_dir_log_index_item(l, i); 284 + break; 285 + case BTRFS_EXTENT_CSUM_KEY: 286 + print_extent_csum(l, i); 472 287 break; 473 288 case BTRFS_ROOT_ITEM_KEY: 474 289 ri = btrfs_item_ptr(l, i, struct btrfs_root_item); ··· 508 303 btrfs_shared_data_ref_count(l, sref)); 509 304 break; 510 305 case BTRFS_EXTENT_DATA_KEY: 511 - fi = btrfs_item_ptr(l, i, 512 - struct btrfs_file_extent_item); 513 - pr_info("\t\tgeneration %llu type %hhu\n", 514 - btrfs_file_extent_generation(l, fi), 515 - btrfs_file_extent_type(l, fi)); 516 - if (btrfs_file_extent_type(l, fi) == 517 - BTRFS_FILE_EXTENT_INLINE) { 518 - pr_info("\t\tinline extent data size %llu\n", 519 - btrfs_file_extent_ram_bytes(l, fi)); 520 - break; 521 - } 522 - pr_info("\t\textent data disk bytenr %llu nr %llu\n", 523 - btrfs_file_extent_disk_bytenr(l, fi), 524 - btrfs_file_extent_disk_num_bytes(l, fi)); 525 - pr_info("\t\textent data offset %llu nr %llu ram %llu\n", 526 - btrfs_file_extent_offset(l, fi), 527 - btrfs_file_extent_num_bytes(l, fi), 528 - btrfs_file_extent_ram_bytes(l, fi)); 306 + print_file_extent_item(l, i); 529 307 break; 530 308 case BTRFS_BLOCK_GROUP_ITEM_KEY: 531 309 bi = btrfs_item_ptr(l, i,

+22 -22

fs/btrfs/qgroup.c

··· 1069 1069 } 1070 1070 1071 1071 path = btrfs_alloc_path(); 1072 - if (!path) { 1072 + if (unlikely(!path)) { 1073 1073 ret = -ENOMEM; 1074 1074 btrfs_abort_transaction(trans, ret); 1075 1075 goto out_free_root; ··· 1081 1081 1082 1082 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 1083 1083 sizeof(*ptr)); 1084 - if (ret) { 1084 + if (unlikely(ret)) { 1085 1085 btrfs_abort_transaction(trans, ret); 1086 1086 goto out_free_path; 1087 1087 } ··· 1111 1111 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 1112 1112 if (ret > 0) 1113 1113 goto out_add_root; 1114 - if (ret < 0) { 1114 + if (unlikely(ret < 0)) { 1115 1115 btrfs_abort_transaction(trans, ret); 1116 1116 goto out_free_path; 1117 1117 } ··· 1129 1129 /* We should not have a stray @prealloc pointer. */ 1130 1130 ASSERT(prealloc == NULL); 1131 1131 prealloc = kzalloc(sizeof(*prealloc), GFP_NOFS); 1132 - if (!prealloc) { 1132 + if (unlikely(!prealloc)) { 1133 1133 ret = -ENOMEM; 1134 1134 btrfs_abort_transaction(trans, ret); 1135 1135 goto out_free_path; ··· 1137 1137 1138 1138 ret = add_qgroup_item(trans, quota_root, 1139 1139 found_key.offset); 1140 - if (ret) { 1140 + if (unlikely(ret)) { 1141 1141 btrfs_abort_transaction(trans, ret); 1142 1142 goto out_free_path; 1143 1143 } ··· 1145 1145 qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset); 1146 1146 prealloc = NULL; 1147 1147 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1148 - if (ret < 0) { 1148 + if (unlikely(ret < 0)) { 1149 1149 btrfs_abort_transaction(trans, ret); 1150 1150 goto out_free_path; 1151 1151 } 1152 1152 ret = btrfs_search_slot_for_read(tree_root, &found_key, 1153 1153 path, 1, 0); 1154 - if (ret < 0) { 1154 + if (unlikely(ret < 0)) { 1155 1155 btrfs_abort_transaction(trans, ret); 1156 1156 goto out_free_path; 1157 1157 } ··· 1165 1165 } 1166 1166 } 1167 1167 ret = btrfs_next_item(tree_root, path); 1168 - if (ret < 0) { 1168 + if (unlikely(ret < 0)) { 1169 1169 btrfs_abort_transaction(trans, ret); 1170 1170 goto out_free_path; 1171 1171 } ··· 1176 1176 out_add_root: 1177 1177 btrfs_release_path(path); 1178 1178 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 1179 - if (ret) { 1179 + if (unlikely(ret)) { 1180 1180 btrfs_abort_transaction(trans, ret); 1181 1181 goto out_free_path; 1182 1182 } ··· 1190 1190 qgroup = add_qgroup_rb(fs_info, prealloc, BTRFS_FS_TREE_OBJECTID); 1191 1191 prealloc = NULL; 1192 1192 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1193 - if (ret < 0) { 1193 + if (unlikely(ret < 0)) { 1194 1194 btrfs_abort_transaction(trans, ret); 1195 1195 goto out_free_path; 1196 1196 } ··· 1376 1376 btrfs_free_qgroup_config(fs_info); 1377 1377 1378 1378 ret = btrfs_clean_quota_tree(trans, quota_root); 1379 - if (ret) { 1379 + if (unlikely(ret)) { 1380 1380 btrfs_abort_transaction(trans, ret); 1381 1381 goto out; 1382 1382 } 1383 1383 1384 1384 ret = btrfs_del_root(trans, &quota_root->root_key); 1385 - if (ret) { 1385 + if (unlikely(ret)) { 1386 1386 btrfs_abort_transaction(trans, ret); 1387 1387 goto out; 1388 1388 } ··· 2426 2426 int i; 2427 2427 2428 2428 /* Level sanity check */ 2429 - if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || 2430 - root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || 2431 - root_level < cur_level) { 2429 + if (unlikely(cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || 2430 + root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || 2431 + root_level < cur_level)) { 2432 2432 btrfs_err_rl(fs_info, 2433 2433 "%s: bad levels, cur_level=%d root_level=%d", 2434 2434 __func__, cur_level, root_level); ··· 2444 2444 * dst_path->nodes[root_level] must be initialized before 2445 2445 * calling this function. 2446 2446 */ 2447 - if (cur_level == root_level) { 2447 + if (unlikely(cur_level == root_level)) { 2448 2448 btrfs_err_rl(fs_info, 2449 2449 "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d", 2450 2450 __func__, root_level, root_level, cur_level); ··· 2530 2530 return 0; 2531 2531 2532 2532 /* Wrong parameter order */ 2533 - if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) { 2533 + if (unlikely(btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb))) { 2534 2534 btrfs_err_rl(fs_info, 2535 2535 "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__, 2536 2536 btrfs_header_generation(src_eb), ··· 2538 2538 return -EUCLEAN; 2539 2539 } 2540 2540 2541 - if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) { 2541 + if (unlikely(!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb))) { 2542 2542 ret = -EIO; 2543 2543 goto out; 2544 2544 } ··· 2729 2729 */ 2730 2730 static void qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 2731 2731 struct ulist *roots, struct list_head *qgroups, 2732 - u64 seq, int update_old) 2732 + u64 seq, bool update_old) 2733 2733 { 2734 2734 struct ulist_node *unode; 2735 2735 struct ulist_iterator uiter; ··· 4710 4710 if (!btrfs_qgroup_full_accounting(fs_info)) 4711 4711 return 0; 4712 4712 4713 - if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) > 4714 - btrfs_node_ptr_generation(reloc_parent, reloc_slot)) { 4713 + if (unlikely(btrfs_node_ptr_generation(subvol_parent, subvol_slot) > 4714 + btrfs_node_ptr_generation(reloc_parent, reloc_slot))) { 4715 4715 btrfs_err_rl(fs_info, 4716 4716 "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu", 4717 4717 __func__, ··· 4843 4843 reloc_eb = NULL; 4844 4844 goto free_out; 4845 4845 } 4846 - if (!extent_buffer_uptodate(reloc_eb)) { 4846 + if (unlikely(!extent_buffer_uptodate(reloc_eb))) { 4847 4847 ret = -EIO; 4848 4848 goto free_out; 4849 4849 }

+6 -11

fs/btrfs/raid-stripe-tree.c

··· 67 67 { 68 68 struct btrfs_fs_info *fs_info = trans->fs_info; 69 69 struct btrfs_root *stripe_root = fs_info->stripe_root; 70 - struct btrfs_path *path; 70 + BTRFS_PATH_AUTO_FREE(path); 71 71 struct btrfs_key key; 72 72 struct extent_buffer *leaf; 73 73 u64 found_start; ··· 260 260 btrfs_release_path(path); 261 261 } 262 262 263 - btrfs_free_path(path); 264 263 return ret; 265 264 } 266 265 ··· 268 269 struct btrfs_stripe_extent *stripe_extent, 269 270 const size_t item_size) 270 271 { 271 - struct btrfs_path *path; 272 + BTRFS_PATH_AUTO_FREE(path); 272 273 struct extent_buffer *leaf; 273 274 int ret; 274 275 int slot; ··· 287 288 288 289 write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot), 289 290 item_size); 290 - btrfs_free_path(path); 291 291 292 292 return ret; 293 293 } ··· 304 306 int ret; 305 307 306 308 stripe_extent = kzalloc(item_size, GFP_NOFS); 307 - if (!stripe_extent) { 309 + if (!unlikely(stripe_extent)) { 308 310 btrfs_abort_transaction(trans, -ENOMEM); 309 311 btrfs_end_transaction(trans); 310 312 return -ENOMEM; ··· 374 376 struct btrfs_stripe_extent *stripe_extent; 375 377 struct btrfs_key stripe_key; 376 378 struct btrfs_key found_key; 377 - struct btrfs_path *path; 379 + BTRFS_PATH_AUTO_FREE(path); 378 380 struct extent_buffer *leaf; 379 381 const u64 end = logical + *length; 380 382 int num_stripes; ··· 400 402 401 403 ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); 402 404 if (ret < 0) 403 - goto free_path; 405 + return ret; 404 406 if (ret) { 405 407 if (path->slots[0] != 0) 406 408 path->slots[0]--; ··· 457 459 trace_btrfs_get_raid_extent_offset(fs_info, logical, *length, 458 460 stripe->physical, devid); 459 461 460 - ret = 0; 461 - goto free_path; 462 + return 0; 462 463 } 463 464 464 465 /* If we're here, we haven't found the requested devid in the stripe. */ ··· 471 474 logical, logical + *length, stripe->dev->devid, 472 475 btrfs_bg_type_to_raid_name(map_type)); 473 476 } 474 - free_path: 475 - btrfs_free_path(path); 476 477 477 478 return ret; 478 479 }

+59 -62

fs/btrfs/raid56.c

··· 1167 1167 /* Check if we have reached tolerance early. */ 1168 1168 found_errors = get_rbio_veritical_errors(rbio, sector_nr, 1169 1169 NULL, NULL); 1170 - if (found_errors > rbio->bioc->max_errors) 1170 + if (unlikely(found_errors > rbio->bioc->max_errors)) 1171 1171 return -EIO; 1172 1172 return 0; 1173 1173 } ··· 1208 1208 const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 1209 1209 const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits; 1210 1210 struct bvec_iter iter = bio->bi_iter; 1211 + phys_addr_t paddr; 1211 1212 u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - 1212 1213 rbio->bioc->full_stripe_logical; 1213 1214 1214 - while (iter.bi_size) { 1215 + btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) { 1215 1216 unsigned int index = (offset >> sectorsize_bits); 1216 1217 struct sector_ptr *sector = &rbio->bio_sectors[index]; 1217 - struct bio_vec bv = bio_iter_iovec(bio, iter); 1218 1218 1219 1219 sector->has_paddr = true; 1220 - sector->paddr = bvec_phys(&bv); 1221 - bio_advance_iter_single(bio, &iter, sectorsize); 1220 + sector->paddr = paddr; 1222 1221 offset += sectorsize; 1223 1222 } 1224 1223 } ··· 1510 1511 */ 1511 1512 static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio) 1512 1513 { 1513 - const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 1514 - struct bio_vec *bvec; 1515 - struct bvec_iter_all iter_all; 1514 + const u32 blocksize = rbio->bioc->fs_info->sectorsize; 1515 + phys_addr_t paddr; 1516 1516 1517 1517 ASSERT(!bio_flagged(bio, BIO_CLONED)); 1518 1518 1519 - bio_for_each_segment_all(bvec, bio, iter_all) { 1520 - struct sector_ptr *sector; 1521 - phys_addr_t paddr = bvec_phys(bvec); 1519 + btrfs_bio_for_each_block_all(paddr, bio, blocksize) { 1520 + struct sector_ptr *sector = find_stripe_sector(rbio, paddr); 1522 1521 1523 - for (u32 off = 0; off < bvec->bv_len; off += sectorsize) { 1524 - sector = find_stripe_sector(rbio, paddr + off); 1525 - ASSERT(sector); 1526 - if (sector) 1527 - sector->uptodate = 1; 1528 - } 1522 + ASSERT(sector); 1523 + if (sector) 1524 + sector->uptodate = 1; 1529 1525 } 1530 1526 } 1531 1527 ··· 1567 1573 { 1568 1574 struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 1569 1575 int total_sector_nr = get_bio_sector_nr(rbio, bio); 1570 - struct bio_vec *bvec; 1571 - struct bvec_iter_all iter_all; 1576 + phys_addr_t paddr; 1572 1577 1573 1578 /* No data csum for the whole stripe, no need to verify. */ 1574 1579 if (!rbio->csum_bitmap || !rbio->csum_buf) ··· 1577 1584 if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors) 1578 1585 return; 1579 1586 1580 - bio_for_each_segment_all(bvec, bio, iter_all) { 1581 - void *kaddr; 1587 + btrfs_bio_for_each_block_all(paddr, bio, fs_info->sectorsize) { 1588 + u8 csum_buf[BTRFS_CSUM_SIZE]; 1589 + u8 *expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size; 1590 + int ret; 1582 1591 1583 - kaddr = bvec_kmap_local(bvec); 1584 - for (u32 off = 0; off < bvec->bv_len; 1585 - off += fs_info->sectorsize, total_sector_nr++) { 1586 - u8 csum_buf[BTRFS_CSUM_SIZE]; 1587 - u8 *expected_csum = rbio->csum_buf + 1588 - total_sector_nr * fs_info->csum_size; 1589 - int ret; 1592 + /* No csum for this sector, skip to the next sector. */ 1593 + if (!test_bit(total_sector_nr, rbio->csum_bitmap)) 1594 + continue; 1590 1595 1591 - /* No csum for this sector, skip to the next sector. */ 1592 - if (!test_bit(total_sector_nr, rbio->csum_bitmap)) 1593 - continue; 1594 - 1595 - ret = btrfs_check_sector_csum(fs_info, kaddr + off, 1596 - csum_buf, expected_csum); 1597 - if (ret < 0) 1598 - set_bit(total_sector_nr, rbio->error_bitmap); 1599 - } 1600 - kunmap_local(kaddr); 1596 + ret = btrfs_check_block_csum(fs_info, paddr, 1597 + csum_buf, expected_csum); 1598 + if (ret < 0) 1599 + set_bit(total_sector_nr, rbio->error_bitmap); 1600 + total_sector_nr++; 1601 1601 } 1602 1602 } 1603 1603 ··· 1788 1802 struct sector_ptr *sector; 1789 1803 u8 csum_buf[BTRFS_CSUM_SIZE]; 1790 1804 u8 *csum_expected; 1791 - void *kaddr; 1792 1805 int ret; 1793 1806 1794 1807 if (!rbio->csum_bitmap || !rbio->csum_buf) ··· 1809 1824 csum_expected = rbio->csum_buf + 1810 1825 (stripe_nr * rbio->stripe_nsectors + sector_nr) * 1811 1826 fs_info->csum_size; 1812 - kaddr = kmap_local_sector(sector); 1813 - ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected); 1814 - kunmap_local(kaddr); 1827 + ret = btrfs_check_block_csum(fs_info, sector->paddr, csum_buf, csum_expected); 1815 1828 return ret; 1816 1829 } 1817 1830 ··· 1847 1864 if (!found_errors) 1848 1865 return 0; 1849 1866 1850 - if (found_errors > rbio->bioc->max_errors) 1867 + if (unlikely(found_errors > rbio->bioc->max_errors)) 1851 1868 return -EIO; 1852 1869 1853 1870 /* ··· 2399 2416 int found_errors; 2400 2417 2401 2418 found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL); 2402 - if (found_errors > rbio->bioc->max_errors) { 2419 + if (unlikely(found_errors > rbio->bioc->max_errors)) { 2403 2420 ret = -EIO; 2404 2421 break; 2405 2422 } ··· 2688 2705 2689 2706 found_errors = get_rbio_veritical_errors(rbio, sector_nr, 2690 2707 &faila, &failb); 2691 - if (found_errors > rbio->bioc->max_errors) { 2708 + if (unlikely(found_errors > rbio->bioc->max_errors)) { 2692 2709 ret = -EIO; 2693 2710 goto out; 2694 2711 } ··· 2712 2729 * data, so the capability of the repair is declined. (In the 2713 2730 * case of RAID5, we can not repair anything.) 2714 2731 */ 2715 - if (dfail > rbio->bioc->max_errors - 1) { 2732 + if (unlikely(dfail > rbio->bioc->max_errors - 1)) { 2716 2733 ret = -EIO; 2717 2734 goto out; 2718 2735 } ··· 2729 2746 * scrubbing parity, luckily, use the other one to repair the 2730 2747 * data, or we can not repair the data stripe. 2731 2748 */ 2732 - if (failp != rbio->scrubp) { 2749 + if (unlikely(failp != rbio->scrubp)) { 2733 2750 ret = -EIO; 2734 2751 goto out; 2735 2752 } ··· 2820 2837 int found_errors; 2821 2838 2822 2839 found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL); 2823 - if (found_errors > rbio->bioc->max_errors) { 2840 + if (unlikely(found_errors > rbio->bioc->max_errors)) { 2824 2841 ret = -EIO; 2825 2842 break; 2826 2843 } ··· 2844 2861 * This is for scrub call sites where we already have correct data contents. 2845 2862 * This allows us to avoid reading data stripes again. 2846 2863 * 2847 - * Unfortunately here we have to do page copy, other than reusing the pages. 2864 + * Unfortunately here we have to do folio copy, other than reusing the pages. 2848 2865 * This is due to the fact rbio has its own page management for its cache. 2849 2866 */ 2850 - void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio, 2851 - struct page **data_pages, u64 data_logical) 2867 + void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio, 2868 + struct folio **data_folios, u64 data_logical) 2852 2869 { 2870 + struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 2853 2871 const u64 offset_in_full_stripe = data_logical - 2854 2872 rbio->bioc->full_stripe_logical; 2855 - const int page_index = offset_in_full_stripe >> PAGE_SHIFT; 2856 - const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 2857 - const u32 sectors_per_page = PAGE_SIZE / sectorsize; 2873 + unsigned int findex = 0; 2874 + unsigned int foffset = 0; 2858 2875 int ret; 2876 + 2877 + /* We shouldn't hit RAID56 for bs > ps cases for now. */ 2878 + ASSERT(fs_info->sectorsize <= PAGE_SIZE); 2859 2879 2860 2880 /* 2861 2881 * If we hit ENOMEM temporarily, but later at ··· 2876 2890 ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN)); 2877 2891 ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT)); 2878 2892 2879 - for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) { 2880 - struct page *dst = rbio->stripe_pages[page_nr + page_index]; 2881 - struct page *src = data_pages[page_nr]; 2893 + for (unsigned int cur_off = offset_in_full_stripe; 2894 + cur_off < offset_in_full_stripe + BTRFS_STRIPE_LEN; 2895 + cur_off += PAGE_SIZE) { 2896 + const unsigned int pindex = cur_off >> PAGE_SHIFT; 2897 + void *kaddr; 2882 2898 2883 - memcpy_page(dst, 0, src, 0, PAGE_SIZE); 2884 - for (int sector_nr = sectors_per_page * page_index; 2885 - sector_nr < sectors_per_page * (page_index + 1); 2886 - sector_nr++) 2887 - rbio->stripe_sectors[sector_nr].uptodate = true; 2899 + kaddr = kmap_local_page(rbio->stripe_pages[pindex]); 2900 + memcpy_from_folio(kaddr, data_folios[findex], foffset, PAGE_SIZE); 2901 + kunmap_local(kaddr); 2902 + 2903 + foffset += PAGE_SIZE; 2904 + ASSERT(foffset <= folio_size(data_folios[findex])); 2905 + if (foffset == folio_size(data_folios[findex])) { 2906 + findex++; 2907 + foffset = 0; 2908 + } 2888 2909 } 2910 + for (unsigned int sector_nr = offset_in_full_stripe >> fs_info->sectorsize_bits; 2911 + sector_nr < (offset_in_full_stripe + BTRFS_STRIPE_LEN) >> fs_info->sectorsize_bits; 2912 + sector_nr++) 2913 + rbio->stripe_sectors[sector_nr].uptodate = true; 2889 2914 }

+2 -2

fs/btrfs/raid56.h

··· 201 201 unsigned long *dbitmap, int stripe_nsectors); 202 202 void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); 203 203 204 - void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio, 205 - struct page **data_pages, u64 data_logical); 204 + void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio, 205 + struct folio **data_folios, u64 data_logical); 206 206 207 207 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); 208 208 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);

+1 -2

fs/btrfs/ref-verify.c

··· 971 971 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) 972 972 { 973 973 struct btrfs_root *extent_root; 974 - struct btrfs_path *path; 974 + BTRFS_PATH_AUTO_FREE(path); 975 975 struct extent_buffer *eb; 976 976 int tree_block_level = 0; 977 977 u64 bytenr = 0, num_bytes = 0; ··· 1021 1021 btrfs_free_ref_cache(fs_info); 1022 1022 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 1023 1023 } 1024 - btrfs_free_path(path); 1025 1024 return ret; 1026 1025 }

+2 -2

fs/btrfs/ref-verify.h

··· 12 12 struct btrfs_fs_info; 13 13 struct btrfs_ref; 14 14 15 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 15 + #ifdef CONFIG_BTRFS_DEBUG 16 16 17 17 #include <linux/spinlock.h> 18 18 ··· 53 53 { 54 54 } 55 55 56 - #endif /* CONFIG_BTRFS_FS_REF_VERIFY */ 56 + #endif /* CONFIG_BTRFS_DEBUG */ 57 57 58 58 #endif

+7 -8

fs/btrfs/reflink.c

··· 23 23 u64 endoff, 24 24 const u64 destoff, 25 25 const u64 olen, 26 - int no_time_update) 26 + bool no_time_update) 27 27 { 28 28 int ret; 29 29 ··· 43 43 } 44 44 45 45 ret = btrfs_update_inode(trans, BTRFS_I(inode)); 46 - if (ret) { 46 + if (unlikely(ret)) { 47 47 btrfs_abort_transaction(trans, ret); 48 48 btrfs_end_transaction(trans); 49 49 return ret; ··· 268 268 drop_args.end = aligned_end; 269 269 drop_args.drop_cache = true; 270 270 ret = btrfs_drop_extents(trans, root, inode, &drop_args); 271 - if (ret) { 271 + if (unlikely(ret)) { 272 272 btrfs_abort_transaction(trans, ret); 273 273 goto out; 274 274 } 275 275 ret = btrfs_insert_empty_item(trans, root, path, new_key, size); 276 - if (ret) { 276 + if (unlikely(ret)) { 277 277 btrfs_abort_transaction(trans, ret); 278 278 goto out; 279 279 } ··· 285 285 btrfs_update_inode_bytes(inode, datal, drop_args.bytes_found); 286 286 btrfs_set_inode_full_sync(inode); 287 287 ret = btrfs_inode_set_file_extent_range(inode, 0, aligned_end); 288 - if (ret) 288 + if (unlikely(ret)) 289 289 btrfs_abort_transaction(trans, ret); 290 290 out: 291 291 if (!ret && !trans) { ··· 337 337 */ 338 338 static int btrfs_clone(struct inode *src, struct inode *inode, 339 339 const u64 off, const u64 olen, const u64 olen_aligned, 340 - const u64 destoff, int no_time_update) 340 + const u64 destoff, bool no_time_update) 341 341 { 342 342 struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 343 - struct btrfs_path *path = NULL; 343 + BTRFS_PATH_AUTO_FREE(path); 344 344 struct extent_buffer *leaf; 345 345 struct btrfs_trans_handle *trans; 346 346 char *buf = NULL; ··· 611 611 } 612 612 613 613 out: 614 - btrfs_free_path(path); 615 614 kvfree(buf); 616 615 clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags); 617 616

+33 -48

fs/btrfs/relocation.c

··· 821 821 u64 bytenr, u64 num_bytes) 822 822 { 823 823 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 824 - struct btrfs_path *path; 824 + BTRFS_PATH_AUTO_FREE(path); 825 825 struct btrfs_file_extent_item *fi; 826 826 struct extent_buffer *leaf; 827 827 int ret; ··· 834 834 ret = btrfs_lookup_file_extent(NULL, root, path, 835 835 btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0); 836 836 if (ret < 0) 837 - goto out; 838 - if (ret > 0) { 839 - ret = -ENOENT; 840 - goto out; 841 - } 837 + return ret; 838 + if (ret > 0) 839 + return -ENOENT; 842 840 843 841 leaf = path->nodes[0]; 844 842 fi = btrfs_item_ptr(leaf, path->slots[0], ··· 847 849 btrfs_file_extent_encryption(leaf, fi) || 848 850 btrfs_file_extent_other_encoding(leaf, fi)); 849 851 850 - if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 851 - ret = -EINVAL; 852 - goto out; 853 - } 852 + if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) 853 + return -EINVAL; 854 854 855 855 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 856 - ret = 0; 857 - out: 858 - btrfs_free_path(path); 859 - return ret; 856 + return 0; 860 857 } 861 858 862 859 /* ··· 967 974 btrfs_init_data_ref(&ref, key.objectid, key.offset, 968 975 btrfs_root_id(root), false); 969 976 ret = btrfs_inc_extent_ref(trans, &ref); 970 - if (ret) { 977 + if (unlikely(ret)) { 971 978 btrfs_abort_transaction(trans, ret); 972 979 break; 973 980 } ··· 981 988 btrfs_init_data_ref(&ref, key.objectid, key.offset, 982 989 btrfs_root_id(root), false); 983 990 ret = btrfs_free_extent(trans, &ref); 984 - if (ret) { 991 + if (unlikely(ret)) { 985 992 btrfs_abort_transaction(trans, ret); 986 993 break; 987 994 } ··· 1192 1199 ref.ref_root = btrfs_root_id(src); 1193 1200 btrfs_init_tree_ref(&ref, level - 1, 0, true); 1194 1201 ret = btrfs_inc_extent_ref(trans, &ref); 1195 - if (ret) { 1202 + if (unlikely(ret)) { 1196 1203 btrfs_abort_transaction(trans, ret); 1197 1204 break; 1198 1205 } ··· 1205 1212 ref.ref_root = btrfs_root_id(dest); 1206 1213 btrfs_init_tree_ref(&ref, level - 1, 0, true); 1207 1214 ret = btrfs_inc_extent_ref(trans, &ref); 1208 - if (ret) { 1215 + if (unlikely(ret)) { 1209 1216 btrfs_abort_transaction(trans, ret); 1210 1217 break; 1211 1218 } ··· 1219 1226 ref.ref_root = btrfs_root_id(src); 1220 1227 btrfs_init_tree_ref(&ref, level - 1, 0, true); 1221 1228 ret = btrfs_free_extent(trans, &ref); 1222 - if (ret) { 1229 + if (unlikely(ret)) { 1223 1230 btrfs_abort_transaction(trans, ret); 1224 1231 break; 1225 1232 } ··· 1233 1240 ref.ref_root = btrfs_root_id(dest); 1234 1241 btrfs_init_tree_ref(&ref, level - 1, 0, true); 1235 1242 ret = btrfs_free_extent(trans, &ref); 1236 - if (ret) { 1243 + if (unlikely(ret)) { 1237 1244 btrfs_abort_transaction(trans, ret); 1238 1245 break; 1239 1246 } ··· 1483 1490 * ->reloc_root. If it fails however we must 1484 1491 * drop the ref ourselves. 1485 1492 */ 1486 - ret2 = btrfs_drop_snapshot(reloc_root, 0, 1); 1493 + ret2 = btrfs_drop_snapshot(reloc_root, false, true); 1487 1494 if (ret2 < 0) { 1488 1495 btrfs_put_root(reloc_root); 1489 1496 if (!ret) ··· 1493 1500 btrfs_put_root(root); 1494 1501 } else { 1495 1502 /* Orphan reloc tree, just clean it up */ 1496 - ret2 = btrfs_drop_snapshot(root, 0, 1); 1503 + ret2 = btrfs_drop_snapshot(root, false, true); 1497 1504 if (ret2 < 0) { 1498 1505 btrfs_put_root(root); 1499 1506 if (!ret) ··· 1784 1791 list_add(&reloc_root->root_list, &reloc_roots); 1785 1792 btrfs_put_root(root); 1786 1793 1787 - if (ret) { 1794 + if (unlikely(ret)) { 1788 1795 btrfs_abort_transaction(trans, ret); 1789 1796 if (!err) 1790 1797 err = ret; ··· 1953 1960 DEBUG_WARN("error %ld reading root for reloc root", PTR_ERR(root)); 1954 1961 return PTR_ERR(root); 1955 1962 } 1956 - if (root->reloc_root != reloc_root) { 1963 + if (unlikely(root->reloc_root != reloc_root)) { 1957 1964 DEBUG_WARN("unexpected reloc root found"); 1958 1965 btrfs_err(fs_info, 1959 1966 "root %llu has two reloc roots associated with it", ··· 2024 2031 if (!root) 2025 2032 return ERR_PTR(-ENOENT); 2026 2033 2027 - if (next->new_bytenr) { 2034 + if (unlikely(next->new_bytenr)) { 2028 2035 /* 2029 2036 * We just created the reloc root, so we shouldn't have 2030 2037 * ->new_bytenr set yet. If it is then we have multiple roots ··· 2083 2090 * This can occur if we have incomplete extent refs leading all 2084 2091 * the way up a particular path, in this case return -EUCLEAN. 2085 2092 */ 2086 - if (!root) 2093 + if (unlikely(!root)) 2087 2094 return ERR_PTR(-EUCLEAN); 2088 2095 2089 2096 /* No other choice for non-shareable tree */ ··· 2270 2277 2271 2278 bytenr = btrfs_node_blockptr(upper->eb, slot); 2272 2279 if (lowest) { 2273 - if (bytenr != node->bytenr) { 2280 + if (unlikely(bytenr != node->bytenr)) { 2274 2281 btrfs_err(root->fs_info, 2275 2282 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu", 2276 2283 bytenr, node->bytenr, slot, ··· 2325 2332 if (!ret) 2326 2333 ret = btrfs_drop_subtree(trans, root, eb, 2327 2334 upper->eb); 2328 - if (ret) 2335 + if (unlikely(ret)) 2329 2336 btrfs_abort_transaction(trans, ret); 2330 2337 } 2331 2338 next: ··· 2447 2454 eb = read_tree_block(fs_info, block->bytenr, &check); 2448 2455 if (IS_ERR(eb)) 2449 2456 return PTR_ERR(eb); 2450 - if (!extent_buffer_uptodate(eb)) { 2457 + if (unlikely(!extent_buffer_uptodate(eb))) { 2451 2458 free_extent_buffer(eb); 2452 2459 return -EIO; 2453 2460 } ··· 2512 2519 * normal user in the case of corruption. 2513 2520 */ 2514 2521 ASSERT(node->new_bytenr == 0); 2515 - if (node->new_bytenr) { 2522 + if (unlikely(node->new_bytenr)) { 2516 2523 btrfs_err(root->fs_info, 2517 2524 "bytenr %llu has improper references to it", 2518 2525 node->bytenr); ··· 2832 2839 if (!folio_test_uptodate(folio)) { 2833 2840 btrfs_read_folio(NULL, folio); 2834 2841 folio_lock(folio); 2835 - if (!folio_test_uptodate(folio)) { 2842 + if (unlikely(!folio_test_uptodate(folio))) { 2836 2843 ret = -EIO; 2837 2844 goto release_folio; 2838 2845 } ··· 3151 3158 struct rb_root *blocks) 3152 3159 { 3153 3160 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3154 - struct btrfs_path *path; 3161 + BTRFS_PATH_AUTO_FREE(path); 3155 3162 struct btrfs_key key; 3156 3163 int ret; 3157 3164 bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA); ··· 3179 3186 path->skip_locking = 1; 3180 3187 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3181 3188 if (ret < 0) 3182 - goto out; 3189 + return ret; 3183 3190 3184 3191 if (ret > 0 && skinny) { 3185 3192 if (path->slots[0]) { ··· 3206 3213 "tree block extent item (%llu) is not found in extent tree", 3207 3214 bytenr); 3208 3215 WARN_ON(1); 3209 - ret = -EINVAL; 3210 - goto out; 3216 + return -EINVAL; 3211 3217 } 3212 3218 3213 - ret = add_tree_block(rc, &key, path, blocks); 3214 - out: 3215 - btrfs_free_path(path); 3216 - return ret; 3219 + return add_tree_block(rc, &key, path, blocks); 3217 3220 } 3218 3221 3219 3222 static int delete_block_group_cache(struct btrfs_block_group *block_group, ··· 3499 3510 struct rb_root blocks = RB_ROOT; 3500 3511 struct btrfs_key key; 3501 3512 struct btrfs_trans_handle *trans = NULL; 3502 - struct btrfs_path *path; 3513 + BTRFS_PATH_AUTO_FREE(path); 3503 3514 struct btrfs_extent_item *ei; 3504 3515 u64 flags; 3505 3516 int ret; ··· 3668 3679 if (ret < 0 && !err) 3669 3680 err = ret; 3670 3681 btrfs_free_block_rsv(fs_info, rc->block_rsv); 3671 - btrfs_free_path(path); 3672 3682 return err; 3673 3683 } 3674 3684 3675 3685 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3676 3686 struct btrfs_root *root, u64 objectid) 3677 3687 { 3678 - struct btrfs_path *path; 3688 + BTRFS_PATH_AUTO_FREE(path); 3679 3689 struct btrfs_inode_item *item; 3680 3690 struct extent_buffer *leaf; 3681 3691 int ret; ··· 3685 3697 3686 3698 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 3687 3699 if (ret) 3688 - goto out; 3700 + return ret; 3689 3701 3690 3702 leaf = path->nodes[0]; 3691 3703 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); ··· 3695 3707 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3696 3708 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 3697 3709 BTRFS_INODE_PREALLOC); 3698 - out: 3699 - btrfs_free_path(path); 3700 - return ret; 3710 + return 0; 3701 3711 } 3702 3712 3703 3713 static void delete_orphan_inode(struct btrfs_trans_handle *trans, 3704 3714 struct btrfs_root *root, u64 objectid) 3705 3715 { 3706 - struct btrfs_path *path; 3716 + BTRFS_PATH_AUTO_FREE(path); 3707 3717 struct btrfs_key key; 3708 3718 int ret = 0; 3709 3719 ··· 3724 3738 out: 3725 3739 if (ret) 3726 3740 btrfs_abort_transaction(trans, ret); 3727 - btrfs_free_path(path); 3728 3741 } 3729 3742 3730 3743 /*

+26 -40

fs/btrfs/root-tree.c

··· 85 85 * Key with offset -1 found, there would have to exist a root 86 86 * with such id, but this is out of the valid range. 87 87 */ 88 - if (ret == 0) { 88 + if (unlikely(ret == 0)) { 89 89 ret = -EUCLEAN; 90 90 goto out; 91 91 } ··· 130 130 *item) 131 131 { 132 132 struct btrfs_fs_info *fs_info = root->fs_info; 133 - struct btrfs_path *path; 133 + BTRFS_PATH_AUTO_FREE(path); 134 134 struct extent_buffer *l; 135 135 int ret; 136 136 int slot; ··· 143 143 144 144 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 145 145 if (ret < 0) 146 - goto out; 146 + return ret; 147 147 148 - if (ret > 0) { 148 + if (unlikely(ret > 0)) { 149 149 btrfs_crit(fs_info, 150 150 "unable to find root key (%llu %u %llu) in tree %llu", 151 151 key->objectid, key->type, key->offset, btrfs_root_id(root)); 152 152 ret = -EUCLEAN; 153 153 btrfs_abort_transaction(trans, ret); 154 - goto out; 154 + return ret; 155 155 } 156 156 157 157 l = path->nodes[0]; ··· 168 168 btrfs_release_path(path); 169 169 ret = btrfs_search_slot(trans, root, key, path, 170 170 -1, 1); 171 - if (ret < 0) { 171 + if (unlikely(ret < 0)) { 172 172 btrfs_abort_transaction(trans, ret); 173 - goto out; 173 + return ret; 174 174 } 175 175 176 176 ret = btrfs_del_item(trans, root, path); 177 - if (ret < 0) { 177 + if (unlikely(ret < 0)) { 178 178 btrfs_abort_transaction(trans, ret); 179 - goto out; 179 + return ret; 180 180 } 181 181 btrfs_release_path(path); 182 182 ret = btrfs_insert_empty_item(trans, root, path, 183 183 key, sizeof(*item)); 184 - if (ret < 0) { 184 + if (unlikely(ret < 0)) { 185 185 btrfs_abort_transaction(trans, ret); 186 - goto out; 186 + return ret; 187 187 } 188 188 l = path->nodes[0]; 189 189 slot = path->slots[0]; ··· 197 197 btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); 198 198 199 199 write_extent_buffer(l, item, ptr, sizeof(*item)); 200 - out: 201 - btrfs_free_path(path); 202 200 return ret; 203 201 } 204 202 ··· 214 216 { 215 217 struct btrfs_root *tree_root = fs_info->tree_root; 216 218 struct extent_buffer *leaf; 217 - struct btrfs_path *path; 219 + BTRFS_PATH_AUTO_FREE(path); 218 220 struct btrfs_key key; 219 221 struct btrfs_root *root; 220 222 int err = 0; ··· 307 309 btrfs_put_root(root); 308 310 } 309 311 310 - btrfs_free_path(path); 311 312 return err; 312 313 } 313 314 ··· 315 318 const struct btrfs_key *key) 316 319 { 317 320 struct btrfs_root *root = trans->fs_info->tree_root; 318 - struct btrfs_path *path; 321 + BTRFS_PATH_AUTO_FREE(path); 319 322 int ret; 320 323 321 324 path = btrfs_alloc_path(); ··· 323 326 return -ENOMEM; 324 327 ret = btrfs_search_slot(trans, root, key, path, -1, 1); 325 328 if (ret < 0) 326 - goto out; 327 - if (ret != 0) { 329 + return ret; 330 + if (unlikely(ret > 0)) 328 331 /* The root must exist but we did not find it by the key. */ 329 - ret = -EUCLEAN; 330 - goto out; 331 - } 332 + return -EUCLEAN; 332 333 333 - ret = btrfs_del_item(trans, root, path); 334 - out: 335 - btrfs_free_path(path); 336 - return ret; 334 + return btrfs_del_item(trans, root, path); 337 335 } 338 336 339 337 int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, ··· 336 344 const struct fscrypt_str *name) 337 345 { 338 346 struct btrfs_root *tree_root = trans->fs_info->tree_root; 339 - struct btrfs_path *path; 347 + BTRFS_PATH_AUTO_FREE(path); 340 348 struct btrfs_root_ref *ref; 341 349 struct extent_buffer *leaf; 342 350 struct btrfs_key key; ··· 353 361 again: 354 362 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); 355 363 if (ret < 0) { 356 - goto out; 364 + return ret; 357 365 } else if (ret == 0) { 358 366 leaf = path->nodes[0]; 359 367 ref = btrfs_item_ptr(leaf, path->slots[0], ··· 361 369 ptr = (unsigned long)(ref + 1); 362 370 if ((btrfs_root_ref_dirid(leaf, ref) != dirid) || 363 371 (btrfs_root_ref_name_len(leaf, ref) != name->len) || 364 - memcmp_extent_buffer(leaf, name->name, ptr, name->len)) { 365 - ret = -ENOENT; 366 - goto out; 367 - } 372 + memcmp_extent_buffer(leaf, name->name, ptr, name->len)) 373 + return -ENOENT; 374 + 368 375 *sequence = btrfs_root_ref_sequence(leaf, ref); 369 376 370 377 ret = btrfs_del_item(trans, tree_root, path); 371 378 if (ret) 372 - goto out; 379 + return ret; 373 380 } else { 374 - ret = -ENOENT; 375 - goto out; 381 + return -ENOENT; 376 382 } 377 383 378 384 if (key.type == BTRFS_ROOT_BACKREF_KEY) { ··· 381 391 goto again; 382 392 } 383 393 384 - out: 385 - btrfs_free_path(path); 386 394 return ret; 387 395 } 388 396 ··· 406 418 struct btrfs_root *tree_root = trans->fs_info->tree_root; 407 419 struct btrfs_key key; 408 420 int ret; 409 - struct btrfs_path *path; 421 + BTRFS_PATH_AUTO_FREE(path); 410 422 struct btrfs_root_ref *ref; 411 423 struct extent_buffer *leaf; 412 424 unsigned long ptr; ··· 421 433 again: 422 434 ret = btrfs_insert_empty_item(trans, tree_root, path, &key, 423 435 sizeof(*ref) + name->len); 424 - if (ret) { 436 + if (unlikely(ret)) { 425 437 btrfs_abort_transaction(trans, ret); 426 - btrfs_free_path(path); 427 438 return ret; 428 439 } 429 440 ··· 442 455 goto again; 443 456 } 444 457 445 - btrfs_free_path(path); 446 458 return 0; 447 459 } 448 460

+55 -40

fs/btrfs/scrub.c

··· 113 113 /* Which blocks are covered by extent items. */ 114 114 scrub_bitmap_nr_has_extent = 0, 115 115 116 - /* Which blocks are meteadata. */ 116 + /* Which blocks are metadata. */ 117 117 scrub_bitmap_nr_is_metadata, 118 118 119 119 /* ··· 130 130 scrub_bitmap_nr_last, 131 131 }; 132 132 133 - #define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE) 133 + #define SCRUB_STRIPE_MAX_FOLIOS (BTRFS_STRIPE_LEN / PAGE_SIZE) 134 134 135 135 /* 136 136 * Represent one contiguous range with a length of BTRFS_STRIPE_LEN. ··· 139 139 struct scrub_ctx *sctx; 140 140 struct btrfs_block_group *bg; 141 141 142 - struct page *pages[SCRUB_STRIPE_PAGES]; 142 + struct folio *folios[SCRUB_STRIPE_MAX_FOLIOS]; 143 143 struct scrub_sector_verification *sectors; 144 144 145 145 struct btrfs_device *dev; ··· 206 206 ktime_t throttle_deadline; 207 207 u64 throttle_sent; 208 208 209 - int is_dev_replace; 209 + bool is_dev_replace; 210 210 u64 write_pointer; 211 211 212 212 struct mutex wr_lock; ··· 339 339 if (!stripe) 340 340 return; 341 341 342 - for (int i = 0; i < SCRUB_STRIPE_PAGES; i++) { 343 - if (stripe->pages[i]) 344 - __free_page(stripe->pages[i]); 345 - stripe->pages[i] = NULL; 342 + for (int i = 0; i < SCRUB_STRIPE_MAX_FOLIOS; i++) { 343 + if (stripe->folios[i]) 344 + folio_put(stripe->folios[i]); 345 + stripe->folios[i] = NULL; 346 346 } 347 347 kfree(stripe->sectors); 348 348 kfree(stripe->csums); ··· 355 355 static int init_scrub_stripe(struct btrfs_fs_info *fs_info, 356 356 struct scrub_stripe *stripe) 357 357 { 358 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 358 359 int ret; 359 360 360 361 memset(stripe, 0, sizeof(*stripe)); ··· 368 367 atomic_set(&stripe->pending_io, 0); 369 368 spin_lock_init(&stripe->write_error_lock); 370 369 371 - ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, false); 370 + ASSERT(BTRFS_STRIPE_LEN >> min_folio_shift <= SCRUB_STRIPE_MAX_FOLIOS); 371 + ret = btrfs_alloc_folio_array(BTRFS_STRIPE_LEN >> min_folio_shift, 372 + fs_info->block_min_order, stripe->folios); 372 373 if (ret < 0) 373 374 goto error; 374 375 ··· 449 446 } 450 447 451 448 static noinline_for_stack struct scrub_ctx *scrub_setup_ctx( 452 - struct btrfs_fs_info *fs_info, int is_dev_replace) 449 + struct btrfs_fs_info *fs_info, bool is_dev_replace) 453 450 { 454 451 struct scrub_ctx *sctx; 455 452 int i; ··· 588 585 bool is_super, u64 logical, u64 physical) 589 586 { 590 587 struct btrfs_fs_info *fs_info = dev->fs_info; 591 - struct btrfs_path *path; 588 + BTRFS_PATH_AUTO_FREE(path); 592 589 struct btrfs_key found_key; 593 590 struct extent_buffer *eb; 594 591 struct btrfs_extent_item *ei; ··· 615 612 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, 616 613 &flags); 617 614 if (ret < 0) 618 - goto out; 615 + return; 619 616 620 617 swarn.extent_item_size = found_key.offset; 621 618 ··· 661 658 662 659 iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn); 663 660 } 664 - 665 - out: 666 - btrfs_free_path(path); 667 661 } 668 662 669 663 static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical) ··· 687 687 688 688 static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr) 689 689 { 690 - u32 offset = (sector_nr << stripe->bg->fs_info->sectorsize_bits); 691 - const struct page *page = stripe->pages[offset >> PAGE_SHIFT]; 690 + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 691 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 692 + u32 offset = (sector_nr << fs_info->sectorsize_bits); 693 + const struct folio *folio = stripe->folios[offset >> min_folio_shift]; 692 694 693 - /* stripe->pages[] is allocated by us and no highmem is allowed. */ 694 - ASSERT(page); 695 - ASSERT(!PageHighMem(page)); 696 - return page_address(page) + offset_in_page(offset); 695 + /* stripe->folios[] is allocated by us and no highmem is allowed. */ 696 + ASSERT(folio); 697 + ASSERT(!folio_test_partial_kmap(folio)); 698 + return folio_address(folio) + offset_in_folio(folio, offset); 699 + } 700 + 701 + static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int sector_nr) 702 + { 703 + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 704 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 705 + u32 offset = (sector_nr << fs_info->sectorsize_bits); 706 + const struct folio *folio = stripe->folios[offset >> min_folio_shift]; 707 + 708 + /* stripe->folios[] is allocated by us and no highmem is allowed. */ 709 + ASSERT(folio); 710 + ASSERT(!folio_test_partial_kmap(folio)); 711 + /* And the range must be contained inside the folio. */ 712 + ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio)); 713 + return page_to_phys(folio_page(folio, 0)) + offset_in_folio(folio, offset); 697 714 } 698 715 699 716 static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr) ··· 805 788 struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 806 789 struct scrub_sector_verification *sector = &stripe->sectors[sector_nr]; 807 790 const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; 808 - void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr); 791 + phys_addr_t paddr = scrub_stripe_get_paddr(stripe, sector_nr); 809 792 u8 csum_buf[BTRFS_CSUM_SIZE]; 810 793 int ret; 811 794 ··· 850 833 return; 851 834 } 852 835 853 - ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, sector->csum); 836 + ret = btrfs_check_block_csum(fs_info, paddr, csum_buf, sector->csum); 854 837 if (ret < 0) { 855 838 scrub_bitmap_set_bit_csum_error(stripe, sector_nr); 856 839 scrub_bitmap_set_bit_error(stripe, sector_nr); ··· 1386 1369 * Slice is divided into intervals when the IO is submitted, adjust by 1387 1370 * bwlimit and maximum of 64 intervals. 1388 1371 */ 1389 - div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024))); 1390 - div = min_t(u32, 64, div); 1372 + div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64); 1391 1373 1392 1374 /* Start new epoch, set deadline */ 1393 1375 now = ktime_get(); ··· 1529 1513 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 1530 1514 if (ret < 0) 1531 1515 return ret; 1532 - if (ret == 0) { 1516 + if (unlikely(ret == 0)) { 1533 1517 /* 1534 1518 * Key with offset -1 found, there would have to exist an extent 1535 1519 * item with such offset, but this is out of the valid range. ··· 1875 1859 { 1876 1860 struct btrfs_fs_info *fs_info = sctx->fs_info; 1877 1861 struct btrfs_bio *bbio; 1862 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 1878 1863 unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; 1879 1864 int mirror = stripe->mirror_num; 1880 1865 ··· 1888 1871 return; 1889 1872 } 1890 1873 1891 - bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, 1874 + bbio = btrfs_bio_alloc(BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ, fs_info, 1892 1875 scrub_read_endio, stripe); 1893 1876 1894 1877 bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT; ··· 1987 1970 * metadata, we should immediately abort. 1988 1971 */ 1989 1972 for (int i = 0; i < nr_stripes; i++) { 1990 - if (stripe_has_metadata_error(&sctx->stripes[i])) { 1973 + if (unlikely(stripe_has_metadata_error(&sctx->stripes[i]))) { 1991 1974 ret = -EIO; 1992 1975 goto out; 1993 1976 } ··· 2181 2164 * As we may hit an empty data stripe while it's missing. 2182 2165 */ 2183 2166 bitmap_and(&error, &error, &has_extent, stripe->nr_sectors); 2184 - if (!bitmap_empty(&error, stripe->nr_sectors)) { 2167 + if (unlikely(!bitmap_empty(&error, stripe->nr_sectors))) { 2185 2168 btrfs_err(fs_info, 2186 2169 "scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", 2187 2170 full_stripe_start, i, stripe->nr_sectors, ··· 2219 2202 for (int i = 0; i < data_stripes; i++) { 2220 2203 stripe = &sctx->raid56_data_stripes[i]; 2221 2204 2222 - raid56_parity_cache_data_pages(rbio, stripe->pages, 2205 + raid56_parity_cache_data_folios(rbio, stripe->folios, 2223 2206 full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT)); 2224 2207 } 2225 2208 raid56_parity_submit_scrub_rbio(rbio); ··· 2603 2586 struct btrfs_device *scrub_dev, u64 start, u64 end) 2604 2587 { 2605 2588 struct btrfs_dev_extent *dev_extent = NULL; 2606 - struct btrfs_path *path; 2589 + BTRFS_PATH_AUTO_FREE(path); 2607 2590 struct btrfs_fs_info *fs_info = sctx->fs_info; 2608 2591 struct btrfs_root *root = fs_info->dev_root; 2609 2592 u64 chunk_offset; ··· 2875 2858 btrfs_put_block_group(cache); 2876 2859 if (ret) 2877 2860 break; 2878 - if (sctx->is_dev_replace && 2879 - atomic64_read(&dev_replace->num_write_errors) > 0) { 2861 + if (unlikely(sctx->is_dev_replace && 2862 + atomic64_read(&dev_replace->num_write_errors) > 0)) { 2880 2863 ret = -EIO; 2881 2864 break; 2882 2865 } ··· 2888 2871 key.offset = found_key.offset + dev_extent_len; 2889 2872 btrfs_release_path(path); 2890 2873 } 2891 - 2892 - btrfs_free_path(path); 2893 2874 2894 2875 return ret; 2895 2876 } ··· 2904 2889 if (ret < 0) 2905 2890 return ret; 2906 2891 ret = btrfs_check_super_csum(fs_info, sb); 2907 - if (ret != 0) { 2892 + if (unlikely(ret != 0)) { 2908 2893 btrfs_err_rl(fs_info, 2909 2894 "scrub: super block at physical %llu devid %llu has bad csum", 2910 2895 physical, dev->devid); 2911 2896 return -EIO; 2912 2897 } 2913 - if (btrfs_super_generation(sb) != generation) { 2898 + if (unlikely(btrfs_super_generation(sb) != generation)) { 2914 2899 btrfs_err_rl(fs_info, 2915 2900 "scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu", 2916 2901 physical, dev->devid, ··· 3028 3013 3029 3014 int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, 3030 3015 u64 end, struct btrfs_scrub_progress *progress, 3031 - int readonly, int is_dev_replace) 3016 + bool readonly, bool is_dev_replace) 3032 3017 { 3033 3018 struct btrfs_dev_lookup_args args = { .devid = devid }; 3034 3019 struct scrub_ctx *sctx; ··· 3080 3065 } 3081 3066 3082 3067 mutex_lock(&fs_info->scrub_lock); 3083 - if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || 3084 - test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) { 3068 + if (unlikely(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || 3069 + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state))) { 3085 3070 mutex_unlock(&fs_info->scrub_lock); 3086 3071 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 3087 3072 ret = -EIO;

+1 -1

fs/btrfs/scrub.h

··· 11 11 12 12 int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, 13 13 u64 end, struct btrfs_scrub_progress *progress, 14 - int readonly, int is_dev_replace); 14 + bool readonly, bool is_dev_replace); 15 15 void btrfs_scrub_pause(struct btrfs_fs_info *fs_info); 16 16 void btrfs_scrub_continue(struct btrfs_fs_info *fs_info); 17 17 int btrfs_scrub_cancel(struct btrfs_fs_info *info);

+146 -227

fs/btrfs/send.c

··· 646 646 ret = kernel_write(filp, buf + pos, len - pos, off); 647 647 if (ret < 0) 648 648 return ret; 649 - if (ret == 0) 649 + if (unlikely(ret == 0)) 650 650 return -EIO; 651 651 pos += ret; 652 652 } ··· 909 909 struct btrfs_inode_info *info) 910 910 { 911 911 int ret; 912 - struct btrfs_path *path; 912 + BTRFS_PATH_AUTO_FREE(path); 913 913 struct btrfs_inode_item *ii; 914 914 struct btrfs_key key; 915 915 ··· 924 924 if (ret) { 925 925 if (ret > 0) 926 926 ret = -ENOENT; 927 - goto out; 927 + return ret; 928 928 } 929 929 930 930 if (!info) 931 - goto out; 931 + return 0; 932 932 933 933 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 934 934 struct btrfs_inode_item); ··· 945 945 */ 946 946 info->fileattr = btrfs_inode_flags(path->nodes[0], ii); 947 947 948 - out: 949 - btrfs_free_path(path); 950 - return ret; 948 + return 0; 951 949 } 952 950 953 951 static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen) ··· 971 973 * path must point to the INODE_REF or INODE_EXTREF when called. 972 974 */ 973 975 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, 974 - struct btrfs_key *found_key, int resolve, 976 + struct btrfs_key *found_key, bool resolve, 975 977 iterate_inode_ref_t iterate, void *ctx) 976 978 { 977 979 struct extent_buffer *eb = path->nodes[0]; 978 980 struct btrfs_inode_ref *iref; 979 981 struct btrfs_inode_extref *extref; 980 - struct btrfs_path *tmp_path; 982 + BTRFS_PATH_AUTO_FREE(tmp_path); 981 983 struct fs_path *p; 982 984 u32 cur = 0; 983 985 u32 total; ··· 1074 1076 } 1075 1077 1076 1078 out: 1077 - btrfs_free_path(tmp_path); 1078 1079 fs_path_free(p); 1079 1080 return ret; 1080 1081 } ··· 1221 1224 { 1222 1225 int ret; 1223 1226 struct btrfs_key key, found_key; 1224 - struct btrfs_path *p; 1227 + BTRFS_PATH_AUTO_FREE(p); 1225 1228 1226 1229 p = alloc_path_for_send(); 1227 1230 if (!p) ··· 1235 1238 1236 1239 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); 1237 1240 if (ret < 0) 1238 - goto out; 1239 - if (ret) { 1240 - ret = 1; 1241 - goto out; 1242 - } 1241 + return ret; 1242 + if (ret) 1243 + return 1; 1244 + 1243 1245 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1244 1246 if (found_key.objectid != ino || 1245 1247 (found_key.type != BTRFS_INODE_REF_KEY && 1246 - found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1247 - ret = -ENOENT; 1248 - goto out; 1249 - } 1248 + found_key.type != BTRFS_INODE_EXTREF_KEY)) 1249 + return -ENOENT; 1250 1250 1251 - ret = iterate_inode_ref(root, p, &found_key, 1, 1252 - __copy_first_ref, path); 1251 + ret = iterate_inode_ref(root, p, &found_key, true, __copy_first_ref, path); 1253 1252 if (ret < 0) 1254 - goto out; 1255 - ret = 0; 1256 - 1257 - out: 1258 - btrfs_free_path(p); 1259 - return ret; 1253 + return ret; 1254 + return 0; 1260 1255 } 1261 1256 1262 1257 struct backref_ctx { ··· 1378 1389 struct backref_ctx *bctx = ctx; 1379 1390 struct send_ctx *sctx = bctx->sctx; 1380 1391 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 1381 - const u64 key = leaf_bytenr >> fs_info->sectorsize_bits; 1392 + const u64 key = leaf_bytenr >> fs_info->nodesize_bits; 1382 1393 struct btrfs_lru_cache_entry *raw_entry; 1383 1394 struct backref_cache_entry *entry; 1384 1395 ··· 1433 1444 if (!new_entry) 1434 1445 return; 1435 1446 1436 - new_entry->entry.key = leaf_bytenr >> fs_info->sectorsize_bits; 1447 + new_entry->entry.key = leaf_bytenr >> fs_info->nodesize_bits; 1437 1448 new_entry->entry.gen = 0; 1438 1449 new_entry->num_roots = 0; 1439 1450 ULIST_ITER_INIT(&uiter); ··· 1705 1716 struct fs_path *dest) 1706 1717 { 1707 1718 int ret; 1708 - struct btrfs_path *path; 1719 + BTRFS_PATH_AUTO_FREE(path); 1709 1720 struct btrfs_key key; 1710 1721 struct btrfs_file_extent_item *ei; 1711 1722 u8 type; ··· 1722 1733 key.offset = 0; 1723 1734 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1724 1735 if (ret < 0) 1725 - goto out; 1726 - if (ret) { 1736 + return ret; 1737 + if (unlikely(ret)) { 1727 1738 /* 1728 1739 * An empty symlink inode. Can happen in rare error paths when 1729 1740 * creating a symlink (transaction committed before the inode 1730 1741 * eviction handler removed the symlink inode items and a crash 1731 - * happened in between or the subvol was snapshoted in between). 1742 + * happened in between or the subvol was snapshotted in between). 1732 1743 * Print an informative message to dmesg/syslog so that the user 1733 1744 * can delete the symlink. 1734 1745 */ 1735 1746 btrfs_err(root->fs_info, 1736 1747 "Found empty symlink inode %llu at root %llu", 1737 1748 ino, btrfs_root_id(root)); 1738 - ret = -EIO; 1739 - goto out; 1749 + return -EIO; 1740 1750 } 1741 1751 1742 1752 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], ··· 1746 1758 btrfs_crit(root->fs_info, 1747 1759 "send: found symlink extent that is not inline, ino %llu root %llu extent type %d", 1748 1760 ino, btrfs_root_id(root), type); 1749 - goto out; 1761 + return ret; 1750 1762 } 1751 1763 compression = btrfs_file_extent_compression(path->nodes[0], ei); 1752 1764 if (unlikely(compression != BTRFS_COMPRESS_NONE)) { ··· 1754 1766 btrfs_crit(root->fs_info, 1755 1767 "send: found symlink extent with compression, ino %llu root %llu compression type %d", 1756 1768 ino, btrfs_root_id(root), compression); 1757 - goto out; 1769 + return ret; 1758 1770 } 1759 1771 1760 1772 off = btrfs_file_extent_inline_start(ei); 1761 1773 len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); 1762 1774 1763 - ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1764 - 1765 - out: 1766 - btrfs_free_path(path); 1767 - return ret; 1775 + return fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1768 1776 } 1769 1777 1770 1778 /* ··· 1771 1787 u64 ino, u64 gen, 1772 1788 struct fs_path *dest) 1773 1789 { 1774 - int ret = 0; 1775 - struct btrfs_path *path; 1790 + BTRFS_PATH_AUTO_FREE(path); 1776 1791 struct btrfs_dir_item *di; 1777 1792 char tmp[64]; 1778 1793 int len; ··· 1794 1811 path, BTRFS_FIRST_FREE_OBJECTID, 1795 1812 &tmp_name, 0); 1796 1813 btrfs_release_path(path); 1797 - if (IS_ERR(di)) { 1798 - ret = PTR_ERR(di); 1799 - goto out; 1800 - } 1814 + if (IS_ERR(di)) 1815 + return PTR_ERR(di); 1816 + 1801 1817 if (di) { 1802 1818 /* not unique, try again */ 1803 1819 idx++; ··· 1805 1823 1806 1824 if (!sctx->parent_root) { 1807 1825 /* unique */ 1808 - ret = 0; 1809 1826 break; 1810 1827 } 1811 1828 ··· 1812 1831 path, BTRFS_FIRST_FREE_OBJECTID, 1813 1832 &tmp_name, 0); 1814 1833 btrfs_release_path(path); 1815 - if (IS_ERR(di)) { 1816 - ret = PTR_ERR(di); 1817 - goto out; 1818 - } 1834 + if (IS_ERR(di)) 1835 + return PTR_ERR(di); 1836 + 1819 1837 if (di) { 1820 1838 /* not unique, try again */ 1821 1839 idx++; ··· 1824 1844 break; 1825 1845 } 1826 1846 1827 - ret = fs_path_add(dest, tmp, len); 1828 - 1829 - out: 1830 - btrfs_free_path(path); 1831 - return ret; 1847 + return fs_path_add(dest, tmp, len); 1832 1848 } 1833 1849 1834 1850 enum inode_state { ··· 1936 1960 int ret = 0; 1937 1961 struct btrfs_dir_item *di; 1938 1962 struct btrfs_key key; 1939 - struct btrfs_path *path; 1963 + BTRFS_PATH_AUTO_FREE(path); 1940 1964 struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len); 1941 1965 1942 1966 path = alloc_path_for_send(); ··· 1944 1968 return -ENOMEM; 1945 1969 1946 1970 di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0); 1947 - if (IS_ERR_OR_NULL(di)) { 1948 - ret = di ? PTR_ERR(di) : -ENOENT; 1949 - goto out; 1950 - } 1971 + if (IS_ERR_OR_NULL(di)) 1972 + return di ? PTR_ERR(di) : -ENOENT; 1973 + 1951 1974 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1952 - if (key.type == BTRFS_ROOT_ITEM_KEY) { 1953 - ret = -ENOENT; 1954 - goto out; 1955 - } 1975 + if (key.type == BTRFS_ROOT_ITEM_KEY) 1976 + return -ENOENT; 1977 + 1956 1978 *found_inode = key.objectid; 1957 1979 1958 - out: 1959 - btrfs_free_path(path); 1960 1980 return ret; 1961 1981 } 1962 1982 ··· 1966 1994 int ret; 1967 1995 struct btrfs_key key; 1968 1996 struct btrfs_key found_key; 1969 - struct btrfs_path *path; 1997 + BTRFS_PATH_AUTO_FREE(path); 1970 1998 int len; 1971 1999 u64 parent_dir; 1972 2000 ··· 1980 2008 1981 2009 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 1982 2010 if (ret < 0) 1983 - goto out; 2011 + return ret; 1984 2012 if (!ret) 1985 2013 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1986 2014 path->slots[0]); 1987 2015 if (ret || found_key.objectid != ino || 1988 2016 (found_key.type != BTRFS_INODE_REF_KEY && 1989 - found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1990 - ret = -ENOENT; 1991 - goto out; 1992 - } 2017 + found_key.type != BTRFS_INODE_EXTREF_KEY)) 2018 + return -ENOENT; 1993 2019 1994 2020 if (found_key.type == BTRFS_INODE_REF_KEY) { 1995 2021 struct btrfs_inode_ref *iref; ··· 2008 2038 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); 2009 2039 } 2010 2040 if (ret < 0) 2011 - goto out; 2041 + return ret; 2012 2042 btrfs_release_path(path); 2013 2043 2014 2044 if (dir_gen) { 2015 2045 ret = get_inode_gen(root, parent_dir, dir_gen); 2016 2046 if (ret < 0) 2017 - goto out; 2047 + return ret; 2018 2048 } 2019 2049 2020 2050 *dir = parent_dir; 2021 2051 2022 - out: 2023 - btrfs_free_path(path); 2024 2052 return ret; 2025 2053 } 2026 2054 ··· 2454 2486 int ret; 2455 2487 struct btrfs_root *send_root = sctx->send_root; 2456 2488 struct btrfs_root *parent_root = sctx->parent_root; 2457 - struct btrfs_path *path; 2489 + BTRFS_PATH_AUTO_FREE(path); 2458 2490 struct btrfs_key key; 2459 2491 struct btrfs_root_ref *ref; 2460 2492 struct extent_buffer *leaf; ··· 2466 2498 return -ENOMEM; 2467 2499 2468 2500 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); 2469 - if (!name) { 2470 - btrfs_free_path(path); 2501 + if (!name) 2471 2502 return -ENOMEM; 2472 - } 2473 2503 2474 2504 key.objectid = btrfs_root_id(send_root); 2475 2505 key.type = BTRFS_ROOT_BACKREF_KEY; ··· 2530 2564 2531 2565 tlv_put_failure: 2532 2566 out: 2533 - btrfs_free_path(path); 2534 2567 kfree(name); 2535 2568 return ret; 2536 2569 } ··· 2680 2715 int ret = 0; 2681 2716 struct fs_path *p = NULL; 2682 2717 struct btrfs_inode_item *ii; 2683 - struct btrfs_path *path = NULL; 2718 + BTRFS_PATH_AUTO_FREE(path); 2684 2719 struct extent_buffer *eb; 2685 2720 struct btrfs_key key; 2686 2721 int slot; ··· 2724 2759 tlv_put_failure: 2725 2760 out: 2726 2761 free_path_for_command(sctx, p); 2727 - btrfs_free_path(path); 2728 2762 return ret; 2729 2763 } 2730 2764 ··· 2733 2769 * processing an inode that is a directory and it just got renamed, and existing 2734 2770 * entries in the cache may refer to inodes that have the directory in their 2735 2771 * full path - in which case we would generate outdated paths (pre-rename) 2736 - * for the inodes that the cache entries point to. Instead of prunning the 2772 + * for the inodes that the cache entries point to. Instead of pruning the 2737 2773 * cache when inserting, do it after we finish processing each inode at 2738 2774 * finish_inode_if_needed(). 2739 2775 */ ··· 2894 2930 { 2895 2931 int ret = 0; 2896 2932 int iter_ret = 0; 2897 - struct btrfs_path *path = NULL; 2933 + BTRFS_PATH_AUTO_FREE(path); 2898 2934 struct btrfs_key key; 2899 2935 struct btrfs_key found_key; 2900 2936 struct btrfs_key di_key; ··· 2934 2970 if (iter_ret < 0) 2935 2971 ret = iter_ret; 2936 2972 2937 - btrfs_free_path(path); 2938 2973 return ret; 2939 2974 } 2940 2975 ··· 3713 3750 struct recorded_ref *parent_ref, 3714 3751 const bool is_orphan) 3715 3752 { 3716 - struct btrfs_path *path; 3753 + BTRFS_PATH_AUTO_FREE(path); 3717 3754 struct btrfs_key key; 3718 3755 struct btrfs_key di_key; 3719 3756 struct btrfs_dir_item *di; ··· 3734 3771 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); 3735 3772 3736 3773 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); 3737 - if (ret < 0) { 3738 - goto out; 3739 - } else if (ret > 0) { 3740 - ret = 0; 3741 - goto out; 3742 - } 3774 + if (ret < 0) 3775 + return ret; 3776 + if (ret > 0) 3777 + return 0; 3743 3778 3744 3779 di = btrfs_match_dir_item_name(path, parent_ref->name, 3745 3780 parent_ref->name_len); 3746 - if (!di) { 3747 - ret = 0; 3748 - goto out; 3749 - } 3781 + if (!di) 3782 + return 0; 3750 3783 /* 3751 3784 * di_key.objectid has the number of the inode that has a dentry in the 3752 3785 * parent directory with the same name that sctx->cur_ino is being ··· 3752 3793 * that it happens after that other inode is renamed. 3753 3794 */ 3754 3795 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); 3755 - if (di_key.type != BTRFS_INODE_ITEM_KEY) { 3756 - ret = 0; 3757 - goto out; 3758 - } 3796 + if (di_key.type != BTRFS_INODE_ITEM_KEY) 3797 + return 0; 3759 3798 3760 3799 ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen); 3761 3800 if (ret < 0) 3762 - goto out; 3801 + return ret; 3763 3802 ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen); 3764 3803 if (ret < 0) { 3765 3804 if (ret == -ENOENT) 3766 3805 ret = 0; 3767 - goto out; 3806 + return ret; 3768 3807 } 3769 3808 3770 3809 /* Different inode, no need to delay the rename of sctx->cur_ino */ 3771 - if (right_gen != left_gen) { 3772 - ret = 0; 3773 - goto out; 3774 - } 3810 + if (right_gen != left_gen) 3811 + return 0; 3775 3812 3776 3813 wdm = get_waiting_dir_move(sctx, di_key.objectid); 3777 3814 if (wdm && !wdm->orphanized) { ··· 3781 3826 if (!ret) 3782 3827 ret = 1; 3783 3828 } 3784 - out: 3785 - btrfs_free_path(path); 3786 3829 return ret; 3787 3830 } 3788 3831 ··· 3830 3877 bool free_fs_path = false; 3831 3878 int ret = 0; 3832 3879 int iter_ret = 0; 3833 - struct btrfs_path *path = NULL; 3880 + BTRFS_PATH_AUTO_FREE(path); 3834 3881 struct btrfs_key key; 3835 3882 3836 3883 if (!fs_path) { ··· 3898 3945 ret = iter_ret; 3899 3946 3900 3947 out: 3901 - btrfs_free_path(path); 3902 3948 if (free_fs_path) 3903 3949 fs_path_free(fs_path); 3904 3950 return ret; ··· 4708 4756 { 4709 4757 int ret; 4710 4758 4711 - ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4712 - sctx->cmp_key, 0, record_new_ref_if_needed, sctx); 4759 + ret = iterate_inode_ref(sctx->send_root, sctx->left_path, sctx->cmp_key, 4760 + false, record_new_ref_if_needed, sctx); 4713 4761 if (ret < 0) 4714 4762 return ret; 4715 4763 ··· 4720 4768 { 4721 4769 int ret; 4722 4770 4723 - ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 4724 - sctx->cmp_key, 0, record_deleted_ref_if_needed, 4725 - sctx); 4771 + ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, sctx->cmp_key, 4772 + false, record_deleted_ref_if_needed, sctx); 4726 4773 if (ret < 0) 4727 4774 return ret; 4728 4775 ··· 4732 4781 { 4733 4782 int ret; 4734 4783 4735 - ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4736 - sctx->cmp_key, 0, record_new_ref_if_needed, sctx); 4784 + ret = iterate_inode_ref(sctx->send_root, sctx->left_path, sctx->cmp_key, 4785 + false, record_new_ref_if_needed, sctx); 4737 4786 if (ret < 0) 4738 4787 return ret; 4739 - ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 4740 - sctx->cmp_key, 0, record_deleted_ref_if_needed, sctx); 4788 + ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, sctx->cmp_key, 4789 + false, record_deleted_ref_if_needed, sctx); 4741 4790 if (ret < 0) 4742 4791 return ret; 4743 4792 ··· 4754 4803 int ret = 0; 4755 4804 int iter_ret = 0; 4756 4805 struct btrfs_root *root; 4757 - struct btrfs_path *path; 4806 + BTRFS_PATH_AUTO_FREE(path); 4758 4807 struct btrfs_key key; 4759 4808 struct btrfs_key found_key; 4760 4809 iterate_inode_ref_t cb; ··· 4773 4822 } else { 4774 4823 btrfs_err(sctx->send_root->fs_info, 4775 4824 "Wrong command %d in process_all_refs", cmd); 4776 - ret = -EINVAL; 4777 - goto out; 4825 + return -EINVAL; 4778 4826 } 4779 4827 4780 4828 key.objectid = sctx->cmp_key->objectid; ··· 4785 4835 found_key.type != BTRFS_INODE_EXTREF_KEY)) 4786 4836 break; 4787 4837 4788 - ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); 4838 + ret = iterate_inode_ref(root, path, &found_key, false, cb, sctx); 4789 4839 if (ret < 0) 4790 - goto out; 4840 + return ret; 4791 4841 } 4792 4842 /* Catch error found during iteration */ 4793 - if (iter_ret < 0) { 4794 - ret = iter_ret; 4795 - goto out; 4796 - } 4843 + if (iter_ret < 0) 4844 + return iter_ret; 4845 + 4797 4846 btrfs_release_path(path); 4798 4847 4799 4848 /* ··· 4800 4851 * re-creating this inode and will be rename'ing it into place once we 4801 4852 * rename the parent directory. 4802 4853 */ 4803 - ret = process_recorded_refs(sctx, &pending_move); 4804 - out: 4805 - btrfs_free_path(path); 4806 - return ret; 4854 + return process_recorded_refs(sctx, &pending_move); 4807 4855 } 4808 4856 4809 4857 static int send_set_xattr(struct send_ctx *sctx, ··· 5026 5080 int ret = 0; 5027 5081 int iter_ret = 0; 5028 5082 struct btrfs_root *root; 5029 - struct btrfs_path *path; 5083 + BTRFS_PATH_AUTO_FREE(path); 5030 5084 struct btrfs_key key; 5031 5085 struct btrfs_key found_key; 5032 5086 ··· 5054 5108 if (iter_ret < 0) 5055 5109 ret = iter_ret; 5056 5110 5057 - btrfs_free_path(path); 5058 5111 return ret; 5059 5112 } 5060 5113 ··· 5199 5254 if (!folio_test_uptodate(folio)) { 5200 5255 btrfs_read_folio(NULL, folio); 5201 5256 folio_lock(folio); 5202 - if (!folio_test_uptodate(folio)) { 5257 + if (unlikely(!folio_test_uptodate(folio))) { 5203 5258 folio_unlock(folio); 5204 5259 btrfs_err(fs_info, 5205 5260 "send: IO error at offset %llu for inode %llu root %llu", ··· 5601 5656 5602 5657 ei = btrfs_item_ptr(leaf, path->slots[0], 5603 5658 struct btrfs_file_extent_item); 5604 - if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) && 5659 + /* 5660 + * Do not go through encoded read for bs > ps cases. 5661 + * 5662 + * Encoded send is using vmallocated pages as buffer, which we can 5663 + * not ensure every folio is large enough to contain a block. 5664 + */ 5665 + if (sctx->send_root->fs_info->sectorsize <= PAGE_SIZE && 5666 + (sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) && 5605 5667 btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) { 5606 5668 bool is_inline = (btrfs_file_extent_type(leaf, ei) == 5607 5669 BTRFS_FILE_EXTENT_INLINE); ··· 5718 5766 */ 5719 5767 static int send_capabilities(struct send_ctx *sctx) 5720 5768 { 5721 - struct btrfs_path *path; 5769 + BTRFS_PATH_AUTO_FREE(path); 5722 5770 struct btrfs_dir_item *di; 5723 5771 struct extent_buffer *leaf; 5724 5772 unsigned long data_ptr; ··· 5756 5804 strlen(XATTR_NAME_CAPS), buf, buf_len); 5757 5805 out: 5758 5806 kfree(buf); 5759 - btrfs_free_path(path); 5760 5807 return ret; 5761 5808 } 5762 5809 ··· 5763 5812 struct clone_root *clone_root, const u64 disk_byte, 5764 5813 u64 data_offset, u64 offset, u64 len) 5765 5814 { 5766 - struct btrfs_path *path; 5815 + BTRFS_PATH_AUTO_FREE(path); 5767 5816 struct btrfs_key key; 5768 5817 int ret; 5769 5818 struct btrfs_inode_info info; ··· 5799 5848 ret = get_inode_info(clone_root->root, clone_root->ino, &info); 5800 5849 btrfs_release_path(path); 5801 5850 if (ret < 0) 5802 - goto out; 5851 + return ret; 5803 5852 clone_src_i_size = info.size; 5804 5853 5805 5854 /* ··· 5829 5878 key.offset = clone_root->offset; 5830 5879 ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0); 5831 5880 if (ret < 0) 5832 - goto out; 5881 + return ret; 5833 5882 if (ret > 0 && path->slots[0] > 0) { 5834 5883 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); 5835 5884 if (key.objectid == clone_root->ino && ··· 5850 5899 if (slot >= btrfs_header_nritems(leaf)) { 5851 5900 ret = btrfs_next_leaf(clone_root->root, path); 5852 5901 if (ret < 0) 5853 - goto out; 5902 + return ret; 5854 5903 else if (ret > 0) 5855 5904 break; 5856 5905 continue; ··· 5887 5936 ret = send_extent_data(sctx, dst_path, offset, 5888 5937 hole_len); 5889 5938 if (ret < 0) 5890 - goto out; 5939 + return ret; 5891 5940 5892 5941 len -= hole_len; 5893 5942 if (len == 0) ··· 5958 6007 ret = send_clone(sctx, offset, slen, 5959 6008 clone_root); 5960 6009 if (ret < 0) 5961 - goto out; 6010 + return ret; 5962 6011 } 5963 6012 ret = send_extent_data(sctx, dst_path, 5964 6013 offset + slen, ··· 5992 6041 } 5993 6042 5994 6043 if (ret < 0) 5995 - goto out; 6044 + return ret; 5996 6045 5997 6046 len -= clone_len; 5998 6047 if (len == 0) ··· 6023 6072 ret = send_extent_data(sctx, dst_path, offset, len); 6024 6073 else 6025 6074 ret = 0; 6026 - out: 6027 - btrfs_free_path(path); 6028 6075 return ret; 6029 6076 } 6030 6077 ··· 6111 6162 { 6112 6163 int ret = 0; 6113 6164 struct btrfs_key key; 6114 - struct btrfs_path *path = NULL; 6165 + BTRFS_PATH_AUTO_FREE(path); 6115 6166 struct extent_buffer *eb; 6116 6167 int slot; 6117 6168 struct btrfs_key found_key; ··· 6137 6188 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 6138 6189 left_type = btrfs_file_extent_type(eb, ei); 6139 6190 6140 - if (left_type != BTRFS_FILE_EXTENT_REG) { 6141 - ret = 0; 6142 - goto out; 6143 - } 6191 + if (left_type != BTRFS_FILE_EXTENT_REG) 6192 + return 0; 6193 + 6144 6194 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 6145 6195 left_len = btrfs_file_extent_num_bytes(eb, ei); 6146 6196 left_offset = btrfs_file_extent_offset(eb, ei); ··· 6171 6223 key.offset = ekey->offset; 6172 6224 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); 6173 6225 if (ret < 0) 6174 - goto out; 6175 - if (ret) { 6176 - ret = 0; 6177 - goto out; 6178 - } 6226 + return ret; 6227 + if (ret) 6228 + return 0; 6179 6229 6180 6230 /* 6181 6231 * Handle special case where the right side has no extents at all. ··· 6182 6236 slot = path->slots[0]; 6183 6237 btrfs_item_key_to_cpu(eb, &found_key, slot); 6184 6238 if (found_key.objectid != key.objectid || 6185 - found_key.type != key.type) { 6239 + found_key.type != key.type) 6186 6240 /* If we're a hole then just pretend nothing changed */ 6187 - ret = (left_disknr) ? 0 : 1; 6188 - goto out; 6189 - } 6241 + return (left_disknr ? 0 : 1); 6190 6242 6191 6243 /* 6192 6244 * We're now on 2a, 2b or 7. ··· 6194 6250 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 6195 6251 right_type = btrfs_file_extent_type(eb, ei); 6196 6252 if (right_type != BTRFS_FILE_EXTENT_REG && 6197 - right_type != BTRFS_FILE_EXTENT_INLINE) { 6198 - ret = 0; 6199 - goto out; 6200 - } 6253 + right_type != BTRFS_FILE_EXTENT_INLINE) 6254 + return 0; 6201 6255 6202 6256 if (right_type == BTRFS_FILE_EXTENT_INLINE) { 6203 6257 right_len = btrfs_file_extent_ram_bytes(eb, ei); ··· 6208 6266 * Are we at extent 8? If yes, we know the extent is changed. 6209 6267 * This may only happen on the first iteration. 6210 6268 */ 6211 - if (found_key.offset + right_len <= ekey->offset) { 6269 + if (found_key.offset + right_len <= ekey->offset) 6212 6270 /* If we're a hole just pretend nothing changed */ 6213 - ret = (left_disknr) ? 0 : 1; 6214 - goto out; 6215 - } 6271 + return (left_disknr ? 0 : 1); 6216 6272 6217 6273 /* 6218 6274 * We just wanted to see if when we have an inline extent, what ··· 6220 6280 * compressed extent representing data with a size matching 6221 6281 * the page size (currently the same as sector size). 6222 6282 */ 6223 - if (right_type == BTRFS_FILE_EXTENT_INLINE) { 6224 - ret = 0; 6225 - goto out; 6226 - } 6283 + if (right_type == BTRFS_FILE_EXTENT_INLINE) 6284 + return 0; 6227 6285 6228 6286 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 6229 6287 right_offset = btrfs_file_extent_offset(eb, ei); ··· 6241 6303 */ 6242 6304 if (left_disknr != right_disknr || 6243 6305 left_offset_fixed != right_offset || 6244 - left_gen != right_gen) { 6245 - ret = 0; 6246 - goto out; 6247 - } 6306 + left_gen != right_gen) 6307 + return 0; 6248 6308 6249 6309 /* 6250 6310 * Go to the next extent. 6251 6311 */ 6252 6312 ret = btrfs_next_item(sctx->parent_root, path); 6253 6313 if (ret < 0) 6254 - goto out; 6314 + return ret; 6255 6315 if (!ret) { 6256 6316 eb = path->nodes[0]; 6257 6317 slot = path->slots[0]; ··· 6260 6324 key.offset += right_len; 6261 6325 break; 6262 6326 } 6263 - if (found_key.offset != key.offset + right_len) { 6264 - ret = 0; 6265 - goto out; 6266 - } 6327 + if (found_key.offset != key.offset + right_len) 6328 + return 0; 6329 + 6267 6330 key = found_key; 6268 6331 } 6269 6332 ··· 6275 6340 else 6276 6341 ret = 0; 6277 6342 6278 - 6279 - out: 6280 - btrfs_free_path(path); 6281 6343 return ret; 6282 6344 } 6283 6345 6284 6346 static int get_last_extent(struct send_ctx *sctx, u64 offset) 6285 6347 { 6286 - struct btrfs_path *path; 6348 + BTRFS_PATH_AUTO_FREE(path); 6287 6349 struct btrfs_root *root = sctx->send_root; 6288 6350 struct btrfs_key key; 6289 6351 int ret; ··· 6296 6364 key.offset = offset; 6297 6365 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); 6298 6366 if (ret < 0) 6299 - goto out; 6367 + return ret; 6300 6368 ret = 0; 6301 6369 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 6302 6370 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) 6303 - goto out; 6371 + return ret; 6304 6372 6305 6373 sctx->cur_inode_last_extent = btrfs_file_extent_end(path); 6306 - out: 6307 - btrfs_free_path(path); 6308 6374 return ret; 6309 6375 } 6310 6376 ··· 6310 6380 const u64 start, 6311 6381 const u64 end) 6312 6382 { 6313 - struct btrfs_path *path; 6383 + BTRFS_PATH_AUTO_FREE(path); 6314 6384 struct btrfs_key key; 6315 6385 struct btrfs_root *root = sctx->parent_root; 6316 6386 u64 search_start = start; ··· 6325 6395 key.offset = search_start; 6326 6396 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6327 6397 if (ret < 0) 6328 - goto out; 6398 + return ret; 6329 6399 if (ret > 0 && path->slots[0] > 0) 6330 6400 path->slots[0]--; 6331 6401 ··· 6338 6408 if (slot >= btrfs_header_nritems(leaf)) { 6339 6409 ret = btrfs_next_leaf(root, path); 6340 6410 if (ret < 0) 6341 - goto out; 6342 - else if (ret > 0) 6411 + return ret; 6412 + if (ret > 0) 6343 6413 break; 6344 6414 continue; 6345 6415 } ··· 6361 6431 search_start = extent_end; 6362 6432 goto next; 6363 6433 } 6364 - ret = 0; 6365 - goto out; 6434 + return 0; 6366 6435 next: 6367 6436 path->slots[0]++; 6368 6437 } 6369 - ret = 1; 6370 - out: 6371 - btrfs_free_path(path); 6372 - return ret; 6438 + return 1; 6373 6439 } 6374 6440 6375 6441 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, ··· 6473 6547 int ret = 0; 6474 6548 int iter_ret = 0; 6475 6549 struct btrfs_root *root; 6476 - struct btrfs_path *path; 6550 + BTRFS_PATH_AUTO_FREE(path); 6477 6551 struct btrfs_key key; 6478 6552 struct btrfs_key found_key; 6479 6553 ··· 6500 6574 if (iter_ret < 0) 6501 6575 ret = iter_ret; 6502 6576 6503 - btrfs_free_path(path); 6504 6577 return ret; 6505 6578 } 6506 6579 6507 - static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, 6580 + static int process_recorded_refs_if_needed(struct send_ctx *sctx, bool at_end, 6508 6581 int *pending_move, 6509 6582 int *refs_processed) 6510 6583 { ··· 6526 6601 return ret; 6527 6602 } 6528 6603 6529 - static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) 6604 + static int finish_inode_if_needed(struct send_ctx *sctx, bool at_end) 6530 6605 { 6531 6606 int ret = 0; 6532 6607 struct btrfs_inode_info info; ··· 6961 7036 { 6962 7037 int ret = 0; 6963 7038 6964 - if (sctx->cur_ino != sctx->cmp_key->objectid) { 7039 + if (unlikely(sctx->cur_ino != sctx->cmp_key->objectid)) { 6965 7040 inconsistent_snapshot_error(sctx, result, "reference"); 6966 7041 return -EIO; 6967 7042 } ··· 6989 7064 { 6990 7065 int ret = 0; 6991 7066 6992 - if (sctx->cur_ino != sctx->cmp_key->objectid) { 7067 + if (unlikely(sctx->cur_ino != sctx->cmp_key->objectid)) { 6993 7068 inconsistent_snapshot_error(sctx, result, "xattr"); 6994 7069 return -EIO; 6995 7070 } ··· 7229 7304 */ 7230 7305 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 7231 7306 ASSERT(ret <= 0); 7232 - if (ret > 0) { 7307 + if (unlikely(ret > 0)) { 7233 7308 btrfs_print_tree(path->nodes[path->lowest_level], false); 7234 7309 btrfs_err(root->fs_info, 7235 7310 "send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d", ··· 7249 7324 struct btrfs_root *send_root = sctx->send_root; 7250 7325 struct btrfs_key key; 7251 7326 struct btrfs_fs_info *fs_info = send_root->fs_info; 7252 - struct btrfs_path *path; 7327 + BTRFS_PATH_AUTO_FREE(path); 7253 7328 7254 7329 path = alloc_path_for_send(); 7255 7330 if (!path) ··· 7266 7341 7267 7342 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 7268 7343 if (ret < 0) 7269 - goto out; 7344 + return ret; 7270 7345 if (ret) 7271 7346 goto out_finish; 7272 7347 ··· 7276 7351 ret = changed_cb(path, NULL, &key, 7277 7352 BTRFS_COMPARE_TREE_NEW, sctx); 7278 7353 if (ret < 0) 7279 - goto out; 7354 + return ret; 7280 7355 7281 7356 down_read(&fs_info->commit_root_sem); 7282 7357 if (fs_info->last_reloc_trans > sctx->last_reloc_trans) { ··· 7295 7370 btrfs_release_path(path); 7296 7371 ret = search_key_again(sctx, send_root, path, &key); 7297 7372 if (ret < 0) 7298 - goto out; 7373 + return ret; 7299 7374 } else { 7300 7375 up_read(&fs_info->commit_root_sem); 7301 7376 } 7302 7377 7303 7378 ret = btrfs_next_item(send_root, path); 7304 7379 if (ret < 0) 7305 - goto out; 7380 + return ret; 7306 7381 if (ret) { 7307 7382 ret = 0; 7308 7383 break; ··· 7310 7385 } 7311 7386 7312 7387 out_finish: 7313 - ret = finish_inode_if_needed(sctx, 1); 7314 - 7315 - out: 7316 - btrfs_free_path(path); 7317 - return ret; 7388 + return finish_inode_if_needed(sctx, 1); 7318 7389 } 7319 7390 7320 7391 static int replace_node_with_clone(struct btrfs_path *path, int level) ··· 7565 7644 struct btrfs_fs_info *fs_info = left_root->fs_info; 7566 7645 int ret; 7567 7646 int cmp; 7568 - struct btrfs_path *left_path = NULL; 7569 - struct btrfs_path *right_path = NULL; 7647 + BTRFS_PATH_AUTO_FREE(left_path); 7648 + BTRFS_PATH_AUTO_FREE(right_path); 7570 7649 struct btrfs_key left_key; 7571 7650 struct btrfs_key right_key; 7572 7651 char *tmp_buf = NULL; ··· 7839 7918 out_unlock: 7840 7919 up_read(&fs_info->commit_root_sem); 7841 7920 out: 7842 - btrfs_free_path(left_path); 7843 - btrfs_free_path(right_path); 7844 7921 kvfree(tmp_buf); 7845 7922 return ret; 7846 7923 } ··· 7905 7986 } 7906 7987 7907 7988 /* 7908 - * Make sure any existing dellaloc is flushed for any root used by a send 7989 + * Make sure any existing delalloc is flushed for any root used by a send 7909 7990 * operation so that we do not miss any data and we do not race with writeback 7910 7991 * finishing and changing a tree while send is using the tree. This could 7911 7992 * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and

+2 -2

fs/btrfs/space-info.c

··· 479 479 480 480 /* 481 481 * On the zoned mode, we always allocate one zone as one chunk. 482 - * Returning non-zone size alingned bytes here will result in 482 + * Returning non-zone size aligned bytes here will result in 483 483 * less pressure for the async metadata reclaim process, and it 484 484 * will over-commit too much leading to ENOSPC. Align down to the 485 485 * zone size to avoid that. ··· 1528 1528 * turned into error mode due to a transaction abort when flushing space 1529 1529 * above, in that case fail with the abort error instead of returning 1530 1530 * success to the caller if we can steal from the global rsv - this is 1531 - * just to have caller fail immeditelly instead of later when trying to 1531 + * just to have caller fail immediately instead of later when trying to 1532 1532 * modify the fs, making it easier to debug -ENOSPC problems. 1533 1533 */ 1534 1534 if (BTRFS_FS_ERROR(fs_info)) {

+1 -1

fs/btrfs/subpage.c

··· 690 690 \ 691 691 GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \ 692 692 btrfs_warn(fs_info, \ 693 - "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ 693 + "dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ 694 694 start, len, folio_pos(folio), \ 695 695 blocks_per_folio, &bitmap); \ 696 696 }

+1 -1

fs/btrfs/subpage.h

··· 13 13 struct folio; 14 14 15 15 /* 16 - * Extra info for subpapge bitmap. 16 + * Extra info for subpage bitmap. 17 17 * 18 18 * For subpage we pack all uptodate/dirty/writeback/ordered bitmaps into 19 19 * one larger bitmap.

+13 -21

fs/btrfs/super.c

··· 133 133 Opt_enospc_debug, 134 134 #ifdef CONFIG_BTRFS_DEBUG 135 135 Opt_fragment, Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, 136 - #endif 137 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 138 136 Opt_ref_verify, 137 + Opt_ref_tracker, 139 138 #endif 140 139 Opt_err, 141 140 }; ··· 256 257 fsparam_flag_no("enospc_debug", Opt_enospc_debug), 257 258 #ifdef CONFIG_BTRFS_DEBUG 258 259 fsparam_enum("fragment", Opt_fragment, btrfs_parameter_fragment), 259 - #endif 260 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 260 + fsparam_flag("ref_tracker", Opt_ref_tracker), 261 261 fsparam_flag("ref_verify", Opt_ref_verify), 262 262 #endif 263 263 {} ··· 644 646 return -EINVAL; 645 647 } 646 648 break; 647 - #endif 648 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 649 649 case Opt_ref_verify: 650 650 btrfs_set_opt(ctx->mount_opt, REF_VERIFY); 651 + break; 652 + case Opt_ref_tracker: 653 + btrfs_set_opt(ctx->mount_opt, REF_TRACKER); 651 654 break; 652 655 #endif 653 656 default: ··· 925 926 { 926 927 struct btrfs_root *root = fs_info->tree_root; 927 928 struct btrfs_dir_item *di; 928 - struct btrfs_path *path; 929 + BTRFS_PATH_AUTO_FREE(path); 929 930 struct btrfs_key location; 930 931 struct fscrypt_str name = FSTR_INIT("default", 7); 931 932 u64 dir_id; ··· 942 943 dir_id = btrfs_super_root_dir(fs_info->super_copy); 943 944 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); 944 945 if (IS_ERR(di)) { 945 - btrfs_free_path(path); 946 946 return PTR_ERR(di); 947 947 } 948 948 if (!di) { ··· 950 952 * it's always been there, but don't freak out, just try and 951 953 * mount the top-level subvolume. 952 954 */ 953 - btrfs_free_path(path); 954 955 *objectid = BTRFS_FS_TREE_OBJECTID; 955 956 return 0; 956 957 } 957 958 958 959 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 959 - btrfs_free_path(path); 960 960 *objectid = location.objectid; 961 961 return 0; 962 962 } ··· 1152 1156 #endif 1153 1157 if (btrfs_test_opt(info, REF_VERIFY)) 1154 1158 seq_puts(seq, ",ref_verify"); 1159 + if (btrfs_test_opt(info, REF_TRACKER)) 1160 + seq_puts(seq, ",ref_tracker"); 1155 1161 seq_printf(seq, ",subvolid=%llu", btrfs_root_id(BTRFS_I(d_inode(dentry))->root)); 1156 1162 subvol_name = btrfs_get_subvol_name_from_objectid(info, 1157 1163 btrfs_root_id(BTRFS_I(d_inode(dentry))->root)); ··· 1280 1282 const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); 1281 1283 1282 1284 /* 1283 - * We need to cleanup all defragable inodes if the autodefragment is 1285 + * We need to cleanup all defraggable inodes if the autodefragment is 1284 1286 * close or the filesystem is read only. 1285 1287 */ 1286 1288 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && ··· 2272 2274 device = btrfs_scan_one_device(vol->name, false); 2273 2275 if (IS_ERR_OR_NULL(device)) { 2274 2276 mutex_unlock(&uuid_mutex); 2275 - if (IS_ERR(device)) 2276 - ret = PTR_ERR(device); 2277 - else 2278 - ret = 0; 2277 + ret = PTR_ERR_OR_ZERO(device); 2279 2278 break; 2280 2279 } 2281 2280 ret = !(device->fs_devices->num_devices == ··· 2325 2330 2326 2331 /* Verify the checksum. */ 2327 2332 csum_type = btrfs_super_csum_type(sb); 2328 - if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) { 2333 + if (unlikely(csum_type != btrfs_super_csum_type(fs_info->super_copy))) { 2329 2334 btrfs_err(fs_info, "csum type changed, has %u expect %u", 2330 2335 csum_type, btrfs_super_csum_type(fs_info->super_copy)); 2331 2336 ret = -EUCLEAN; 2332 2337 goto out; 2333 2338 } 2334 2339 2335 - if (btrfs_check_super_csum(fs_info, sb)) { 2340 + if (unlikely(btrfs_check_super_csum(fs_info, sb))) { 2336 2341 btrfs_err(fs_info, "csum for on-disk super block no longer matches"); 2337 2342 ret = -EUCLEAN; 2338 2343 goto out; ··· 2344 2349 goto out; 2345 2350 2346 2351 last_trans = btrfs_get_last_trans_committed(fs_info); 2347 - if (btrfs_super_generation(sb) != last_trans) { 2352 + if (unlikely(btrfs_super_generation(sb) != last_trans)) { 2348 2353 btrfs_err(fs_info, "transid mismatch, has %llu expect %llu", 2349 2354 btrfs_super_generation(sb), last_trans); 2350 2355 ret = -EUCLEAN; ··· 2480 2485 #endif 2481 2486 #ifdef CONFIG_BTRFS_ASSERT 2482 2487 ", assert=on" 2483 - #endif 2484 - #ifdef CONFIG_BTRFS_FS_REF_VERIFY 2485 - ", ref-verify=on" 2486 2488 #endif 2487 2489 #ifdef CONFIG_BLK_DEV_ZONED 2488 2490 ", zoned=yes"

+10 -6

fs/btrfs/sysfs.c

··· 409 409 char *buf) 410 410 { 411 411 ssize_t ret = 0; 412 + bool has_output = false; 412 413 413 - if (BTRFS_MIN_BLOCKSIZE != SZ_4K && BTRFS_MIN_BLOCKSIZE != PAGE_SIZE) 414 - ret += sysfs_emit_at(buf, ret, "%u ", BTRFS_MIN_BLOCKSIZE); 415 - if (PAGE_SIZE > SZ_4K) 416 - ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K); 417 - ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE); 418 - 414 + for (u32 cur = BTRFS_MIN_BLOCKSIZE; cur <= BTRFS_MAX_BLOCKSIZE; cur *= 2) { 415 + if (!btrfs_supported_blocksize(cur)) 416 + continue; 417 + if (has_output) 418 + ret += sysfs_emit_at(buf, ret, " "); 419 + ret += sysfs_emit_at(buf, ret, "%u", cur); 420 + has_output = true; 421 + } 422 + ret += sysfs_emit_at(buf, ret, "\n"); 419 423 return ret; 420 424 } 421 425 BTRFS_ATTR(static_feature, supported_sectorsizes,

+2 -2

fs/btrfs/tests/delayed-refs-tests.c

··· 997 997 998 998 ret = simple_tests(&trans); 999 999 if (!ret) { 1000 - test_msg("running delayed refs merg tests on metadata refs"); 1000 + test_msg("running delayed refs merge tests on metadata refs"); 1001 1001 ret = merge_tests(&trans, BTRFS_REF_METADATA); 1002 1002 } 1003 1003 1004 1004 if (!ret) { 1005 - test_msg("running delayed refs merg tests on data refs"); 1005 + test_msg("running delayed refs merge tests on data refs"); 1006 1006 ret = merge_tests(&trans, BTRFS_REF_DATA); 1007 1007 } 1008 1008

+1 -1

fs/btrfs/tests/extent-map-tests.c

··· 1095 1095 /* 1096 1096 * Test a chunk with 2 data stripes one of which 1097 1097 * intersects the physical address of the super block 1098 - * is correctly recognised. 1098 + * is correctly recognized. 1099 1099 */ 1100 1100 .raid_type = BTRFS_BLOCK_GROUP_RAID1, 1101 1101 .physical_start = SZ_64M - SZ_4M,

+23 -26

fs/btrfs/transaction.c

··· 103 103 * | attached to transid N+1. | 104 104 * | | 105 105 * | To next stage: | 106 - * | Until all tree blocks are super blocks are | 106 + * | Until all tree blocks and super blocks are | 107 107 * | written to block devices | 108 108 * V | 109 109 * Transaction N [[TRANS_STATE_COMPLETED]] V ··· 404 404 */ 405 405 static int record_root_in_trans(struct btrfs_trans_handle *trans, 406 406 struct btrfs_root *root, 407 - int force) 407 + bool force) 408 408 { 409 409 struct btrfs_fs_info *fs_info = root->fs_info; 410 410 int ret = 0; ··· 1569 1569 * qgroup counters could end up wrong. 1570 1570 */ 1571 1571 ret = btrfs_run_delayed_refs(trans, U64_MAX); 1572 - if (ret) { 1572 + if (unlikely(ret)) { 1573 1573 btrfs_abort_transaction(trans, ret); 1574 1574 return ret; 1575 1575 } ··· 1641 1641 struct btrfs_root *parent_root; 1642 1642 struct btrfs_block_rsv *rsv; 1643 1643 struct btrfs_inode *parent_inode = pending->dir; 1644 - struct btrfs_path *path; 1644 + BTRFS_PATH_AUTO_FREE(path); 1645 1645 struct btrfs_dir_item *dir_item; 1646 1646 struct extent_buffer *tmp; 1647 1647 struct extent_buffer *old; ··· 1694 1694 goto clear_skip_qgroup; 1695 1695 } 1696 1696 1697 - key.objectid = objectid; 1698 - key.type = BTRFS_ROOT_ITEM_KEY; 1699 - key.offset = (u64)-1; 1700 - 1701 1697 rsv = trans->block_rsv; 1702 1698 trans->block_rsv = &pending->block_rsv; 1703 1699 trans->bytes_reserved = trans->block_rsv->reserved; ··· 1710 1714 * insert the directory item 1711 1715 */ 1712 1716 ret = btrfs_set_inode_index(parent_inode, &index); 1713 - if (ret) { 1717 + if (unlikely(ret)) { 1714 1718 btrfs_abort_transaction(trans, ret); 1715 1719 goto fail; 1716 1720 } ··· 1731 1735 1732 1736 ret = btrfs_create_qgroup(trans, objectid); 1733 1737 if (ret && ret != -EEXIST) { 1734 - if (ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info)) { 1738 + if (unlikely(ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info))) { 1735 1739 btrfs_abort_transaction(trans, ret); 1736 1740 goto fail; 1737 1741 } ··· 1744 1748 * snapshot 1745 1749 */ 1746 1750 ret = btrfs_run_delayed_items(trans); 1747 - if (ret) { /* Transaction aborted */ 1751 + if (unlikely(ret)) { 1748 1752 btrfs_abort_transaction(trans, ret); 1749 1753 goto fail; 1750 1754 } 1751 1755 1752 1756 ret = record_root_in_trans(trans, root, 0); 1753 - if (ret) { 1757 + if (unlikely(ret)) { 1754 1758 btrfs_abort_transaction(trans, ret); 1755 1759 goto fail; 1756 1760 } ··· 1785 1789 old = btrfs_lock_root_node(root); 1786 1790 ret = btrfs_cow_block(trans, root, old, NULL, 0, &old, 1787 1791 BTRFS_NESTING_COW); 1788 - if (ret) { 1792 + if (unlikely(ret)) { 1789 1793 btrfs_tree_unlock(old); 1790 1794 free_extent_buffer(old); 1791 1795 btrfs_abort_transaction(trans, ret); ··· 1796 1800 /* clean up in any case */ 1797 1801 btrfs_tree_unlock(old); 1798 1802 free_extent_buffer(old); 1799 - if (ret) { 1803 + if (unlikely(ret)) { 1800 1804 btrfs_abort_transaction(trans, ret); 1801 1805 goto fail; 1802 1806 } 1803 1807 /* see comments in should_cow_block() */ 1804 1808 set_bit(BTRFS_ROOT_FORCE_COW, &root->state); 1805 - smp_wmb(); 1809 + smp_mb__after_atomic(); 1806 1810 1807 1811 btrfs_set_root_node(new_root_item, tmp); 1808 1812 /* record when the snapshot was created in key.offset */ 1813 + key.objectid = objectid; 1814 + key.type = BTRFS_ROOT_ITEM_KEY; 1809 1815 key.offset = trans->transid; 1810 1816 ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); 1811 1817 btrfs_tree_unlock(tmp); 1812 1818 free_extent_buffer(tmp); 1813 - if (ret) { 1819 + if (unlikely(ret)) { 1814 1820 btrfs_abort_transaction(trans, ret); 1815 1821 goto fail; 1816 1822 } ··· 1824 1826 btrfs_root_id(parent_root), 1825 1827 btrfs_ino(parent_inode), index, 1826 1828 &fname.disk_name); 1827 - if (ret) { 1829 + if (unlikely(ret)) { 1828 1830 btrfs_abort_transaction(trans, ret); 1829 1831 goto fail; 1830 1832 } ··· 1839 1841 } 1840 1842 1841 1843 ret = btrfs_reloc_post_snapshot(trans, pending); 1842 - if (ret) { 1844 + if (unlikely(ret)) { 1843 1845 btrfs_abort_transaction(trans, ret); 1844 1846 goto fail; 1845 1847 } ··· 1862 1864 ret = btrfs_insert_dir_item(trans, &fname.disk_name, 1863 1865 parent_inode, &key, BTRFS_FT_DIR, 1864 1866 index); 1865 - if (ret) { 1867 + if (unlikely(ret)) { 1866 1868 btrfs_abort_transaction(trans, ret); 1867 1869 goto fail; 1868 1870 } ··· 1872 1874 inode_set_mtime_to_ts(&parent_inode->vfs_inode, 1873 1875 inode_set_ctime_current(&parent_inode->vfs_inode)); 1874 1876 ret = btrfs_update_inode_fallback(trans, parent_inode); 1875 - if (ret) { 1877 + if (unlikely(ret)) { 1876 1878 btrfs_abort_transaction(trans, ret); 1877 1879 goto fail; 1878 1880 } 1879 1881 ret = btrfs_uuid_tree_add(trans, new_root_item->uuid, 1880 1882 BTRFS_UUID_KEY_SUBVOL, 1881 1883 objectid); 1882 - if (ret) { 1884 + if (unlikely(ret)) { 1883 1885 btrfs_abort_transaction(trans, ret); 1884 1886 goto fail; 1885 1887 } ··· 1887 1889 ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid, 1888 1890 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 1889 1891 objectid); 1890 - if (ret && ret != -EEXIST) { 1892 + if (unlikely(ret && ret != -EEXIST)) { 1891 1893 btrfs_abort_transaction(trans, ret); 1892 1894 goto fail; 1893 1895 } ··· 1905 1907 free_pending: 1906 1908 kfree(new_root_item); 1907 1909 pending->root_item = NULL; 1908 - btrfs_free_path(path); 1909 1910 pending->path = NULL; 1910 1911 1911 1912 return ret; ··· 2420 2423 * them. 2421 2424 * 2422 2425 * We needn't worry that this operation will corrupt the snapshots, 2423 - * because all the tree which are snapshoted will be forced to COW 2426 + * because all the tree which are snapshotted will be forced to COW 2424 2427 * the nodes and leaves. 2425 2428 */ 2426 2429 ret = btrfs_run_delayed_items(trans); ··· 2654 2657 2655 2658 if (btrfs_header_backref_rev(root->node) < 2656 2659 BTRFS_MIXED_BACKREF_REV) 2657 - ret = btrfs_drop_snapshot(root, 0, 0); 2660 + ret = btrfs_drop_snapshot(root, false, false); 2658 2661 else 2659 - ret = btrfs_drop_snapshot(root, 1, 0); 2662 + ret = btrfs_drop_snapshot(root, true, false); 2660 2663 2661 2664 btrfs_put_root(root); 2662 2665 return (ret < 0) ? 0 : 1;

+38 -1

fs/btrfs/tree-checker.c

··· 183 183 /* Only these key->types needs to be checked */ 184 184 ASSERT(key->type == BTRFS_XATTR_ITEM_KEY || 185 185 key->type == BTRFS_INODE_REF_KEY || 186 + key->type == BTRFS_INODE_EXTREF_KEY || 186 187 key->type == BTRFS_DIR_INDEX_KEY || 187 188 key->type == BTRFS_DIR_ITEM_KEY || 188 189 key->type == BTRFS_EXTENT_DATA_KEY); ··· 1210 1209 /* 1211 1210 * For legacy root item, the members starting at generation_v2 will be 1212 1211 * all filled with 0. 1213 - * And since we allow geneartion_v2 as 0, it will still pass the check. 1212 + * And since we allow generation_v2 as 0, it will still pass the check. 1214 1213 */ 1215 1214 read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot), 1216 1215 btrfs_item_size(leaf, slot)); ··· 1783 1782 return 0; 1784 1783 } 1785 1784 1785 + static int check_inode_extref(struct extent_buffer *leaf, 1786 + struct btrfs_key *key, struct btrfs_key *prev_key, 1787 + int slot) 1788 + { 1789 + unsigned long ptr = btrfs_item_ptr_offset(leaf, slot); 1790 + unsigned long end = ptr + btrfs_item_size(leaf, slot); 1791 + 1792 + if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) 1793 + return -EUCLEAN; 1794 + 1795 + while (ptr < end) { 1796 + struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr; 1797 + u16 namelen; 1798 + 1799 + if (unlikely(ptr + sizeof(*extref)) > end) { 1800 + inode_ref_err(leaf, slot, 1801 + "inode extref overflow, ptr %lu end %lu inode_extref size %zu", 1802 + ptr, end, sizeof(*extref)); 1803 + return -EUCLEAN; 1804 + } 1805 + 1806 + namelen = btrfs_inode_extref_name_len(leaf, extref); 1807 + if (unlikely(ptr + sizeof(*extref) + namelen > end)) { 1808 + inode_ref_err(leaf, slot, 1809 + "inode extref overflow, ptr %lu end %lu namelen %u", 1810 + ptr, end, namelen); 1811 + return -EUCLEAN; 1812 + } 1813 + ptr += sizeof(*extref) + namelen; 1814 + } 1815 + return 0; 1816 + } 1817 + 1786 1818 static int check_raid_stripe_extent(const struct extent_buffer *leaf, 1787 1819 const struct btrfs_key *key, int slot) 1788 1820 { ··· 1926 1892 break; 1927 1893 case BTRFS_INODE_REF_KEY: 1928 1894 ret = check_inode_ref(leaf, key, prev_key, slot); 1895 + break; 1896 + case BTRFS_INODE_EXTREF_KEY: 1897 + ret = check_inode_extref(leaf, key, prev_key, slot); 1929 1898 break; 1930 1899 case BTRFS_BLOCK_GROUP_ITEM_KEY: 1931 1900 ret = check_block_group_item(leaf, key, slot);

+1128 -792

fs/btrfs/tree-log.c

··· 27 27 #include "file-item.h" 28 28 #include "file.h" 29 29 #include "orphan.h" 30 + #include "print-tree.h" 30 31 #include "tree-checker.h" 31 32 32 33 #define MAX_CONFLICT_INODES 10 ··· 102 101 LOG_WALK_REPLAY_ALL, 103 102 }; 104 103 104 + /* 105 + * The walk control struct is used to pass state down the chain when processing 106 + * the log tree. The stage field tells us which part of the log tree processing 107 + * we are currently doing. 108 + */ 109 + struct walk_control { 110 + /* 111 + * Signal that we are freeing the metadata extents of a log tree. 112 + * This is used at transaction commit time while freeing a log tree. 113 + */ 114 + bool free; 115 + 116 + /* 117 + * Signal that we are pinning the metadata extents of a log tree and the 118 + * data extents its leaves point to (if using mixed block groups). 119 + * This happens in the first stage of log replay to ensure that during 120 + * replay, while we are modifying subvolume trees, we don't overwrite 121 + * the metadata extents of log trees. 122 + */ 123 + bool pin; 124 + 125 + /* What stage of the replay code we're currently in. */ 126 + int stage; 127 + 128 + /* 129 + * Ignore any items from the inode currently being processed. Needs 130 + * to be set every time we find a BTRFS_INODE_ITEM_KEY. 131 + */ 132 + bool ignore_cur_inode; 133 + 134 + /* 135 + * The root we are currently replaying to. This is NULL for the replay 136 + * stage LOG_WALK_PIN_ONLY. 137 + */ 138 + struct btrfs_root *root; 139 + 140 + /* The log tree we are currently processing (not NULL for any stage). */ 141 + struct btrfs_root *log; 142 + 143 + /* The transaction handle used for replaying all log trees. */ 144 + struct btrfs_trans_handle *trans; 145 + 146 + /* 147 + * The function that gets used to process blocks we find in the tree. 148 + * Note the extent_buffer might not be up to date when it is passed in, 149 + * and it must be checked or read if you need the data inside it. 150 + */ 151 + int (*process_func)(struct extent_buffer *eb, 152 + struct walk_control *wc, u64 gen, int level); 153 + 154 + /* 155 + * The following are used only when stage is >= LOG_WALK_REPLAY_INODES 156 + * and by the replay_one_buffer() callback. 157 + */ 158 + 159 + /* The current log leaf being processed. */ 160 + struct extent_buffer *log_leaf; 161 + /* The key being processed of the current log leaf. */ 162 + struct btrfs_key log_key; 163 + /* The slot being processed of the current log leaf. */ 164 + int log_slot; 165 + 166 + /* A path used for searches and modifications to subvolume trees. */ 167 + struct btrfs_path *subvol_path; 168 + }; 169 + 170 + static void do_abort_log_replay(struct walk_control *wc, const char *function, 171 + unsigned int line, int error, const char *fmt, ...) 172 + { 173 + struct btrfs_fs_info *fs_info = wc->trans->fs_info; 174 + struct va_format vaf; 175 + va_list args; 176 + 177 + /* 178 + * Do nothing if we already aborted, to avoid dumping leaves again which 179 + * can be verbose. Further more, only the first call is useful since it 180 + * is where we have a problem. Note that we do not use the flag 181 + * BTRFS_FS_STATE_TRANS_ABORTED because log replay calls functions that 182 + * are outside of tree-log.c that can abort transactions (such as 183 + * btrfs_add_link() for example), so if that happens we still want to 184 + * dump all log replay specific information below. 185 + */ 186 + if (test_and_set_bit(BTRFS_FS_STATE_LOG_REPLAY_ABORTED, &fs_info->fs_state)) 187 + return; 188 + 189 + btrfs_abort_transaction(wc->trans, error); 190 + 191 + if (wc->subvol_path->nodes[0]) { 192 + btrfs_crit(fs_info, 193 + "subvolume (root %llu) leaf currently being processed:", 194 + btrfs_root_id(wc->root)); 195 + btrfs_print_leaf(wc->subvol_path->nodes[0]); 196 + } 197 + 198 + if (wc->log_leaf) { 199 + btrfs_crit(fs_info, 200 + "log tree (for root %llu) leaf currently being processed (slot %d key %llu %u %llu):", 201 + btrfs_root_id(wc->root), wc->log_slot, 202 + wc->log_key.objectid, wc->log_key.type, wc->log_key.offset); 203 + btrfs_print_leaf(wc->log_leaf); 204 + } 205 + 206 + va_start(args, fmt); 207 + vaf.fmt = fmt; 208 + vaf.va = &args; 209 + 210 + btrfs_crit(fs_info, 211 + "log replay failed in %s:%u for root %llu, stage %d, with error %d: %pV", 212 + function, line, btrfs_root_id(wc->root), wc->stage, error, &vaf); 213 + 214 + va_end(args); 215 + } 216 + 217 + /* 218 + * Use this for aborting a transaction during log replay while we are down the 219 + * call chain of replay_one_buffer(), so that we get a lot more useful 220 + * information for debugging issues when compared to a plain call to 221 + * btrfs_abort_transaction(). 222 + */ 223 + #define btrfs_abort_log_replay(wc, error, fmt, args...) \ 224 + do_abort_log_replay((wc), __func__, __LINE__, (error), fmt, ##args) 225 + 105 226 static int btrfs_log_inode(struct btrfs_trans_handle *trans, 106 227 struct btrfs_inode *inode, 107 228 int inode_only, 108 229 struct btrfs_log_ctx *ctx); 109 - static int link_to_fixup_dir(struct btrfs_trans_handle *trans, 110 - struct btrfs_root *root, 111 - struct btrfs_path *path, u64 objectid); 112 - static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 113 - struct btrfs_root *root, 114 - struct btrfs_root *log, 115 - struct btrfs_path *path, 230 + static int link_to_fixup_dir(struct walk_control *wc, u64 objectid); 231 + static noinline int replay_dir_deletes(struct walk_control *wc, 116 232 u64 dirid, bool del_all); 117 233 static void wait_log_commit(struct btrfs_root *root, int transid); 118 234 ··· 418 300 } 419 301 420 302 /* 421 - * the walk control struct is used to pass state down the chain when 422 - * processing the log tree. The stage field tells us which part 423 - * of the log tree processing we are currently doing. The others 424 - * are state fields used for that specific part 425 - */ 426 - struct walk_control { 427 - /* should we free the extent on disk when done? This is used 428 - * at transaction commit time while freeing a log tree 429 - */ 430 - int free; 431 - 432 - /* pin only walk, we record which extents on disk belong to the 433 - * log trees 434 - */ 435 - int pin; 436 - 437 - /* what stage of the replay code we're currently in */ 438 - int stage; 439 - 440 - /* 441 - * Ignore any items from the inode currently being processed. Needs 442 - * to be set every time we find a BTRFS_INODE_ITEM_KEY. 443 - */ 444 - bool ignore_cur_inode; 445 - 446 - /* the root we are currently replaying */ 447 - struct btrfs_root *replay_dest; 448 - 449 - /* the trans handle for the current replay */ 450 - struct btrfs_trans_handle *trans; 451 - 452 - /* the function that gets used to process blocks we find in the 453 - * tree. Note the extent_buffer might not be up to date when it is 454 - * passed in, and it must be checked or read if you need the data 455 - * inside it 456 - */ 457 - int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, 458 - struct walk_control *wc, u64 gen, int level); 459 - }; 460 - 461 - /* 462 303 * process_func used to pin down extents, write them or wait on them 463 304 */ 464 - static int process_one_buffer(struct btrfs_root *log, 465 - struct extent_buffer *eb, 305 + static int process_one_buffer(struct extent_buffer *eb, 466 306 struct walk_control *wc, u64 gen, int level) 467 307 { 308 + struct btrfs_root *log = wc->log; 309 + struct btrfs_trans_handle *trans = wc->trans; 468 310 struct btrfs_fs_info *fs_info = log->fs_info; 469 311 int ret = 0; 470 312 ··· 439 361 }; 440 362 441 363 ret = btrfs_read_extent_buffer(eb, &check); 442 - if (ret) 364 + if (unlikely(ret)) { 365 + if (trans) 366 + btrfs_abort_transaction(trans, ret); 367 + else 368 + btrfs_handle_fs_error(fs_info, ret, NULL); 443 369 return ret; 370 + } 444 371 } 445 372 446 373 if (wc->pin) { 447 - ret = btrfs_pin_extent_for_log_replay(wc->trans, eb); 448 - if (ret) 374 + ASSERT(trans != NULL); 375 + ret = btrfs_pin_extent_for_log_replay(trans, eb); 376 + if (unlikely(ret)) { 377 + btrfs_abort_transaction(trans, ret); 449 378 return ret; 379 + } 450 380 451 - if (btrfs_buffer_uptodate(eb, gen, 0) && 452 - btrfs_header_level(eb) == 0) 381 + if (btrfs_buffer_uptodate(eb, gen, false) && level == 0) { 453 382 ret = btrfs_exclude_logged_extents(eb); 383 + if (ret) 384 + btrfs_abort_transaction(trans, ret); 385 + } 454 386 } 455 387 return ret; 456 388 } 457 389 458 390 /* 459 - * Item overwrite used by log replay. The given eb, slot and key all refer to 460 - * the source data we are copying out. 391 + * Item overwrite used by log replay. The given log tree leaf, slot and key 392 + * from the walk_control structure all refer to the source data we are copying 393 + * out. 461 394 * 462 395 * The given root is for the tree we are copying into, and path is a scratch 463 396 * path for use in this function (it should be released on entry and will be ··· 480 391 * 481 392 * If the key isn't in the destination yet, a new item is inserted. 482 393 */ 483 - static int overwrite_item(struct btrfs_trans_handle *trans, 484 - struct btrfs_root *root, 485 - struct btrfs_path *path, 486 - struct extent_buffer *eb, int slot, 487 - struct btrfs_key *key) 394 + static int overwrite_item(struct walk_control *wc) 488 395 { 396 + struct btrfs_trans_handle *trans = wc->trans; 397 + struct btrfs_root *root = wc->root; 489 398 int ret; 490 399 u32 item_size; 491 400 u64 saved_i_size = 0; ··· 492 405 unsigned long dst_ptr; 493 406 struct extent_buffer *dst_eb; 494 407 int dst_slot; 495 - bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; 408 + const bool is_inode_item = (wc->log_key.type == BTRFS_INODE_ITEM_KEY); 496 409 497 410 /* 498 411 * This is only used during log replay, so the root is always from a ··· 503 416 */ 504 417 ASSERT(btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID); 505 418 506 - item_size = btrfs_item_size(eb, slot); 507 - src_ptr = btrfs_item_ptr_offset(eb, slot); 419 + item_size = btrfs_item_size(wc->log_leaf, wc->log_slot); 420 + src_ptr = btrfs_item_ptr_offset(wc->log_leaf, wc->log_slot); 508 421 509 422 /* Look for the key in the destination tree. */ 510 - ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 511 - if (ret < 0) 423 + ret = btrfs_search_slot(NULL, root, &wc->log_key, wc->subvol_path, 0, 0); 424 + if (ret < 0) { 425 + btrfs_abort_log_replay(wc, ret, 426 + "failed to search subvolume tree for key (%llu %u %llu) root %llu", 427 + wc->log_key.objectid, wc->log_key.type, 428 + wc->log_key.offset, btrfs_root_id(root)); 512 429 return ret; 430 + } 513 431 514 - dst_eb = path->nodes[0]; 515 - dst_slot = path->slots[0]; 432 + dst_eb = wc->subvol_path->nodes[0]; 433 + dst_slot = wc->subvol_path->slots[0]; 516 434 517 435 if (ret == 0) { 518 436 char *src_copy; ··· 527 435 goto insert; 528 436 529 437 if (item_size == 0) { 530 - btrfs_release_path(path); 438 + btrfs_release_path(wc->subvol_path); 531 439 return 0; 532 440 } 533 441 src_copy = kmalloc(item_size, GFP_NOFS); 534 442 if (!src_copy) { 535 - btrfs_release_path(path); 443 + btrfs_abort_log_replay(wc, -ENOMEM, 444 + "failed to allocate memory for log leaf item"); 536 445 return -ENOMEM; 537 446 } 538 447 539 - read_extent_buffer(eb, src_copy, src_ptr, item_size); 448 + read_extent_buffer(wc->log_leaf, src_copy, src_ptr, item_size); 540 449 dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot); 541 450 ret = memcmp_extent_buffer(dst_eb, src_copy, dst_ptr, item_size); 542 451 ··· 549 456 * sync 550 457 */ 551 458 if (ret == 0) { 552 - btrfs_release_path(path); 459 + btrfs_release_path(wc->subvol_path); 553 460 return 0; 554 461 } 555 462 ··· 557 464 * We need to load the old nbytes into the inode so when we 558 465 * replay the extents we've logged we get the right nbytes. 559 466 */ 560 - if (inode_item) { 467 + if (is_inode_item) { 561 468 struct btrfs_inode_item *item; 562 469 u64 nbytes; 563 470 u32 mode; ··· 565 472 item = btrfs_item_ptr(dst_eb, dst_slot, 566 473 struct btrfs_inode_item); 567 474 nbytes = btrfs_inode_nbytes(dst_eb, item); 568 - item = btrfs_item_ptr(eb, slot, 475 + item = btrfs_item_ptr(wc->log_leaf, wc->log_slot, 569 476 struct btrfs_inode_item); 570 - btrfs_set_inode_nbytes(eb, item, nbytes); 477 + btrfs_set_inode_nbytes(wc->log_leaf, item, nbytes); 571 478 572 479 /* 573 480 * If this is a directory we need to reset the i_size to 574 481 * 0 so that we can set it up properly when replaying 575 482 * the rest of the items in this log. 576 483 */ 577 - mode = btrfs_inode_mode(eb, item); 484 + mode = btrfs_inode_mode(wc->log_leaf, item); 578 485 if (S_ISDIR(mode)) 579 - btrfs_set_inode_size(eb, item, 0); 486 + btrfs_set_inode_size(wc->log_leaf, item, 0); 580 487 } 581 - } else if (inode_item) { 488 + } else if (is_inode_item) { 582 489 struct btrfs_inode_item *item; 583 490 u32 mode; 584 491 ··· 586 493 * New inode, set nbytes to 0 so that the nbytes comes out 587 494 * properly when we replay the extents. 588 495 */ 589 - item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 590 - btrfs_set_inode_nbytes(eb, item, 0); 496 + item = btrfs_item_ptr(wc->log_leaf, wc->log_slot, struct btrfs_inode_item); 497 + btrfs_set_inode_nbytes(wc->log_leaf, item, 0); 591 498 592 499 /* 593 500 * If this is a directory we need to reset the i_size to 0 so 594 501 * that we can set it up properly when replaying the rest of 595 502 * the items in this log. 596 503 */ 597 - mode = btrfs_inode_mode(eb, item); 504 + mode = btrfs_inode_mode(wc->log_leaf, item); 598 505 if (S_ISDIR(mode)) 599 - btrfs_set_inode_size(eb, item, 0); 506 + btrfs_set_inode_size(wc->log_leaf, item, 0); 600 507 } 601 508 insert: 602 - btrfs_release_path(path); 509 + btrfs_release_path(wc->subvol_path); 603 510 /* try to insert the key into the destination tree */ 604 - path->skip_release_on_error = 1; 605 - ret = btrfs_insert_empty_item(trans, root, path, 606 - key, item_size); 607 - path->skip_release_on_error = 0; 511 + wc->subvol_path->skip_release_on_error = 1; 512 + ret = btrfs_insert_empty_item(trans, root, wc->subvol_path, &wc->log_key, item_size); 513 + wc->subvol_path->skip_release_on_error = 0; 608 514 609 - dst_eb = path->nodes[0]; 610 - dst_slot = path->slots[0]; 515 + dst_eb = wc->subvol_path->nodes[0]; 516 + dst_slot = wc->subvol_path->slots[0]; 611 517 612 518 /* make sure any existing item is the correct size */ 613 519 if (ret == -EEXIST || ret == -EOVERFLOW) { 614 520 const u32 found_size = btrfs_item_size(dst_eb, dst_slot); 615 521 616 522 if (found_size > item_size) 617 - btrfs_truncate_item(trans, path, item_size, 1); 523 + btrfs_truncate_item(trans, wc->subvol_path, item_size, 1); 618 524 else if (found_size < item_size) 619 - btrfs_extend_item(trans, path, item_size - found_size); 525 + btrfs_extend_item(trans, wc->subvol_path, item_size - found_size); 620 526 } else if (ret) { 527 + btrfs_abort_log_replay(wc, ret, 528 + "failed to insert item for key (%llu %u %llu)", 529 + wc->log_key.objectid, wc->log_key.type, 530 + wc->log_key.offset); 621 531 return ret; 622 532 } 623 533 dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot); ··· 634 538 * state of the tree found in the subvolume, and i_size is modified 635 539 * as it goes 636 540 */ 637 - if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { 541 + if (is_inode_item && ret == -EEXIST) { 638 542 struct btrfs_inode_item *src_item; 639 543 struct btrfs_inode_item *dst_item; 640 544 641 545 src_item = (struct btrfs_inode_item *)src_ptr; 642 546 dst_item = (struct btrfs_inode_item *)dst_ptr; 643 547 644 - if (btrfs_inode_generation(eb, src_item) == 0) { 645 - const u64 ino_size = btrfs_inode_size(eb, src_item); 548 + if (btrfs_inode_generation(wc->log_leaf, src_item) == 0) { 549 + const u64 ino_size = btrfs_inode_size(wc->log_leaf, src_item); 646 550 647 551 /* 648 552 * For regular files an ino_size == 0 is used only when ··· 651 555 * case don't set the size of the inode in the fs/subvol 652 556 * tree, otherwise we would be throwing valid data away. 653 557 */ 654 - if (S_ISREG(btrfs_inode_mode(eb, src_item)) && 558 + if (S_ISREG(btrfs_inode_mode(wc->log_leaf, src_item)) && 655 559 S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) && 656 560 ino_size != 0) 657 561 btrfs_set_inode_size(dst_eb, dst_item, ino_size); 658 562 goto no_copy; 659 563 } 660 564 661 - if (S_ISDIR(btrfs_inode_mode(eb, src_item)) && 565 + if (S_ISDIR(btrfs_inode_mode(wc->log_leaf, src_item)) && 662 566 S_ISDIR(btrfs_inode_mode(dst_eb, dst_item))) { 663 567 save_old_i_size = 1; 664 568 saved_i_size = btrfs_inode_size(dst_eb, dst_item); 665 569 } 666 570 } 667 571 668 - copy_extent_buffer(dst_eb, eb, dst_ptr, src_ptr, item_size); 572 + copy_extent_buffer(dst_eb, wc->log_leaf, dst_ptr, src_ptr, item_size); 669 573 670 574 if (save_old_i_size) { 671 575 struct btrfs_inode_item *dst_item; ··· 675 579 } 676 580 677 581 /* make sure the generation is filled in */ 678 - if (key->type == BTRFS_INODE_ITEM_KEY) { 582 + if (is_inode_item) { 679 583 struct btrfs_inode_item *dst_item; 680 584 681 585 dst_item = (struct btrfs_inode_item *)dst_ptr; ··· 683 587 btrfs_set_inode_generation(dst_eb, dst_item, trans->transid); 684 588 } 685 589 no_copy: 686 - btrfs_release_path(path); 590 + btrfs_release_path(wc->subvol_path); 687 591 return 0; 688 592 } 689 593 ··· 714 618 * The extent is inserted into the file, dropping any existing extents 715 619 * from the file that overlap the new one. 716 620 */ 717 - static noinline int replay_one_extent(struct btrfs_trans_handle *trans, 718 - struct btrfs_root *root, 719 - struct btrfs_path *path, 720 - struct extent_buffer *eb, int slot, 721 - struct btrfs_key *key) 621 + static noinline int replay_one_extent(struct walk_control *wc) 722 622 { 623 + struct btrfs_trans_handle *trans = wc->trans; 624 + struct btrfs_root *root = wc->root; 723 625 struct btrfs_drop_extents_args drop_args = { 0 }; 724 626 struct btrfs_fs_info *fs_info = root->fs_info; 725 627 int found_type; 726 628 u64 extent_end; 727 - u64 start = key->offset; 629 + const u64 start = wc->log_key.offset; 728 630 u64 nbytes = 0; 631 + u64 csum_start; 632 + u64 csum_end; 633 + LIST_HEAD(ordered_sums); 634 + u64 offset; 635 + unsigned long dest_offset; 636 + struct btrfs_key ins; 729 637 struct btrfs_file_extent_item *item; 730 638 struct btrfs_inode *inode = NULL; 731 - unsigned long size; 732 639 int ret = 0; 733 640 734 - item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 735 - found_type = btrfs_file_extent_type(eb, item); 641 + item = btrfs_item_ptr(wc->log_leaf, wc->log_slot, struct btrfs_file_extent_item); 642 + found_type = btrfs_file_extent_type(wc->log_leaf, item); 736 643 737 644 if (found_type == BTRFS_FILE_EXTENT_REG || 738 645 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 739 - nbytes = btrfs_file_extent_num_bytes(eb, item); 740 - extent_end = start + nbytes; 741 - 742 - /* 743 - * We don't add to the inodes nbytes if we are prealloc or a 744 - * hole. 745 - */ 746 - if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 747 - nbytes = 0; 646 + extent_end = start + btrfs_file_extent_num_bytes(wc->log_leaf, item); 647 + /* Holes don't take up space. */ 648 + if (btrfs_file_extent_disk_bytenr(wc->log_leaf, item) != 0) 649 + nbytes = btrfs_file_extent_num_bytes(wc->log_leaf, item); 748 650 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 749 - size = btrfs_file_extent_ram_bytes(eb, item); 750 - nbytes = btrfs_file_extent_ram_bytes(eb, item); 751 - extent_end = ALIGN(start + size, 752 - fs_info->sectorsize); 651 + nbytes = btrfs_file_extent_ram_bytes(wc->log_leaf, item); 652 + extent_end = ALIGN(start + nbytes, fs_info->sectorsize); 753 653 } else { 754 - btrfs_err(fs_info, 755 - "unexpected extent type=%d root=%llu inode=%llu offset=%llu", 756 - found_type, btrfs_root_id(root), key->objectid, key->offset); 654 + btrfs_abort_log_replay(wc, -EUCLEAN, 655 + "unexpected extent type=%d root=%llu inode=%llu offset=%llu", 656 + found_type, btrfs_root_id(root), 657 + wc->log_key.objectid, wc->log_key.offset); 757 658 return -EUCLEAN; 758 659 } 759 660 760 - inode = btrfs_iget_logging(key->objectid, root); 761 - if (IS_ERR(inode)) 762 - return PTR_ERR(inode); 661 + inode = btrfs_iget_logging(wc->log_key.objectid, root); 662 + if (IS_ERR(inode)) { 663 + ret = PTR_ERR(inode); 664 + btrfs_abort_log_replay(wc, ret, 665 + "failed to get inode %llu for root %llu", 666 + wc->log_key.objectid, btrfs_root_id(root)); 667 + return ret; 668 + } 763 669 764 670 /* 765 671 * first check to see if we already have this extent in the 766 672 * file. This must be done before the btrfs_drop_extents run 767 673 * so we don't try to drop this extent. 768 674 */ 769 - ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), start, 0); 675 + ret = btrfs_lookup_file_extent(trans, root, wc->subvol_path, 676 + btrfs_ino(inode), start, 0); 770 677 771 678 if (ret == 0 && 772 679 (found_type == BTRFS_FILE_EXTENT_REG || 773 680 found_type == BTRFS_FILE_EXTENT_PREALLOC)) { 681 + struct extent_buffer *leaf = wc->subvol_path->nodes[0]; 774 682 struct btrfs_file_extent_item existing; 775 683 unsigned long ptr; 776 684 777 - ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 778 - read_extent_buffer(path->nodes[0], &existing, ptr, sizeof(existing)); 685 + ptr = btrfs_item_ptr_offset(leaf, wc->subvol_path->slots[0]); 686 + read_extent_buffer(leaf, &existing, ptr, sizeof(existing)); 779 687 780 688 /* 781 689 * we already have a pointer to this exact extent, 782 690 * we don't have to do anything 783 691 */ 784 - if (memcmp_extent_buffer(eb, &existing, (unsigned long)item, 692 + if (memcmp_extent_buffer(wc->log_leaf, &existing, (unsigned long)item, 785 693 sizeof(existing)) == 0) { 786 - btrfs_release_path(path); 694 + btrfs_release_path(wc->subvol_path); 787 695 goto out; 788 696 } 789 697 } 790 - btrfs_release_path(path); 698 + btrfs_release_path(wc->subvol_path); 791 699 792 700 /* drop any overlapping extents */ 793 701 drop_args.start = start; 794 702 drop_args.end = extent_end; 795 703 drop_args.drop_cache = true; 704 + drop_args.path = wc->subvol_path; 796 705 ret = btrfs_drop_extents(trans, root, inode, &drop_args); 797 - if (ret) 706 + if (ret) { 707 + btrfs_abort_log_replay(wc, ret, 708 + "failed to drop extents for inode %llu range [%llu, %llu) root %llu", 709 + wc->log_key.objectid, start, extent_end, 710 + btrfs_root_id(root)); 798 711 goto out; 799 - 800 - if (found_type == BTRFS_FILE_EXTENT_REG || 801 - found_type == BTRFS_FILE_EXTENT_PREALLOC) { 802 - u64 offset; 803 - unsigned long dest_offset; 804 - struct btrfs_key ins; 805 - 806 - if (btrfs_file_extent_disk_bytenr(eb, item) == 0 && 807 - btrfs_fs_incompat(fs_info, NO_HOLES)) 808 - goto update_inode; 809 - 810 - ret = btrfs_insert_empty_item(trans, root, path, key, 811 - sizeof(*item)); 812 - if (ret) 813 - goto out; 814 - dest_offset = btrfs_item_ptr_offset(path->nodes[0], 815 - path->slots[0]); 816 - copy_extent_buffer(path->nodes[0], eb, dest_offset, 817 - (unsigned long)item, sizeof(*item)); 818 - 819 - ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 820 - ins.type = BTRFS_EXTENT_ITEM_KEY; 821 - ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 822 - offset = key->offset - btrfs_file_extent_offset(eb, item); 823 - 824 - /* 825 - * Manually record dirty extent, as here we did a shallow 826 - * file extent item copy and skip normal backref update, 827 - * but modifying extent tree all by ourselves. 828 - * So need to manually record dirty extent for qgroup, 829 - * as the owner of the file extent changed from log tree 830 - * (doesn't affect qgroup) to fs/file tree(affects qgroup) 831 - */ 832 - ret = btrfs_qgroup_trace_extent(trans, 833 - btrfs_file_extent_disk_bytenr(eb, item), 834 - btrfs_file_extent_disk_num_bytes(eb, item)); 835 - if (ret < 0) 836 - goto out; 837 - 838 - if (ins.objectid > 0) { 839 - u64 csum_start; 840 - u64 csum_end; 841 - LIST_HEAD(ordered_sums); 842 - 843 - /* 844 - * is this extent already allocated in the extent 845 - * allocation tree? If so, just add a reference 846 - */ 847 - ret = btrfs_lookup_data_extent(fs_info, ins.objectid, 848 - ins.offset); 849 - if (ret < 0) { 850 - goto out; 851 - } else if (ret == 0) { 852 - struct btrfs_ref ref = { 853 - .action = BTRFS_ADD_DELAYED_REF, 854 - .bytenr = ins.objectid, 855 - .num_bytes = ins.offset, 856 - .owning_root = btrfs_root_id(root), 857 - .ref_root = btrfs_root_id(root), 858 - }; 859 - btrfs_init_data_ref(&ref, key->objectid, offset, 860 - 0, false); 861 - ret = btrfs_inc_extent_ref(trans, &ref); 862 - if (ret) 863 - goto out; 864 - } else { 865 - /* 866 - * insert the extent pointer in the extent 867 - * allocation tree 868 - */ 869 - ret = btrfs_alloc_logged_file_extent(trans, 870 - btrfs_root_id(root), 871 - key->objectid, offset, &ins); 872 - if (ret) 873 - goto out; 874 - } 875 - btrfs_release_path(path); 876 - 877 - if (btrfs_file_extent_compression(eb, item)) { 878 - csum_start = ins.objectid; 879 - csum_end = csum_start + ins.offset; 880 - } else { 881 - csum_start = ins.objectid + 882 - btrfs_file_extent_offset(eb, item); 883 - csum_end = csum_start + 884 - btrfs_file_extent_num_bytes(eb, item); 885 - } 886 - 887 - ret = btrfs_lookup_csums_list(root->log_root, 888 - csum_start, csum_end - 1, 889 - &ordered_sums, false); 890 - if (ret < 0) 891 - goto out; 892 - ret = 0; 893 - /* 894 - * Now delete all existing cums in the csum root that 895 - * cover our range. We do this because we can have an 896 - * extent that is completely referenced by one file 897 - * extent item and partially referenced by another 898 - * file extent item (like after using the clone or 899 - * extent_same ioctls). In this case if we end up doing 900 - * the replay of the one that partially references the 901 - * extent first, and we do not do the csum deletion 902 - * below, we can get 2 csum items in the csum tree that 903 - * overlap each other. For example, imagine our log has 904 - * the two following file extent items: 905 - * 906 - * key (257 EXTENT_DATA 409600) 907 - * extent data disk byte 12845056 nr 102400 908 - * extent data offset 20480 nr 20480 ram 102400 909 - * 910 - * key (257 EXTENT_DATA 819200) 911 - * extent data disk byte 12845056 nr 102400 912 - * extent data offset 0 nr 102400 ram 102400 913 - * 914 - * Where the second one fully references the 100K extent 915 - * that starts at disk byte 12845056, and the log tree 916 - * has a single csum item that covers the entire range 917 - * of the extent: 918 - * 919 - * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 920 - * 921 - * After the first file extent item is replayed, the 922 - * csum tree gets the following csum item: 923 - * 924 - * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 925 - * 926 - * Which covers the 20K sub-range starting at offset 20K 927 - * of our extent. Now when we replay the second file 928 - * extent item, if we do not delete existing csum items 929 - * that cover any of its blocks, we end up getting two 930 - * csum items in our csum tree that overlap each other: 931 - * 932 - * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 933 - * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 934 - * 935 - * Which is a problem, because after this anyone trying 936 - * to lookup up for the checksum of any block of our 937 - * extent starting at an offset of 40K or higher, will 938 - * end up looking at the second csum item only, which 939 - * does not contain the checksum for any block starting 940 - * at offset 40K or higher of our extent. 941 - */ 942 - while (!list_empty(&ordered_sums)) { 943 - struct btrfs_ordered_sum *sums; 944 - struct btrfs_root *csum_root; 945 - 946 - sums = list_first_entry(&ordered_sums, 947 - struct btrfs_ordered_sum, 948 - list); 949 - csum_root = btrfs_csum_root(fs_info, 950 - sums->logical); 951 - if (!ret) 952 - ret = btrfs_del_csums(trans, csum_root, 953 - sums->logical, 954 - sums->len); 955 - if (!ret) 956 - ret = btrfs_csum_file_blocks(trans, 957 - csum_root, 958 - sums); 959 - list_del(&sums->list); 960 - kfree(sums); 961 - } 962 - if (ret) 963 - goto out; 964 - } else { 965 - btrfs_release_path(path); 966 - } 967 - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 968 - /* inline extents are easy, we just overwrite them */ 969 - ret = overwrite_item(trans, root, path, eb, slot, key); 970 - if (ret) 971 - goto out; 972 712 } 973 713 974 - ret = btrfs_inode_set_file_extent_range(inode, start, extent_end - start); 714 + if (found_type == BTRFS_FILE_EXTENT_INLINE) { 715 + /* inline extents are easy, we just overwrite them */ 716 + ret = overwrite_item(wc); 717 + if (ret) 718 + goto out; 719 + goto update_inode; 720 + } 721 + 722 + /* 723 + * If not an inline extent, it can only be a regular or prealloc one. 724 + * We have checked that above and returned -EUCLEAN if not. 725 + */ 726 + 727 + /* A hole and NO_HOLES feature enabled, nothing else to do. */ 728 + if (btrfs_file_extent_disk_bytenr(wc->log_leaf, item) == 0 && 729 + btrfs_fs_incompat(fs_info, NO_HOLES)) 730 + goto update_inode; 731 + 732 + ret = btrfs_insert_empty_item(trans, root, wc->subvol_path, 733 + &wc->log_key, sizeof(*item)); 734 + if (ret) { 735 + btrfs_abort_log_replay(wc, ret, 736 + "failed to insert item with key (%llu %u %llu) root %llu", 737 + wc->log_key.objectid, wc->log_key.type, 738 + wc->log_key.offset, btrfs_root_id(root)); 739 + goto out; 740 + } 741 + dest_offset = btrfs_item_ptr_offset(wc->subvol_path->nodes[0], 742 + wc->subvol_path->slots[0]); 743 + copy_extent_buffer(wc->subvol_path->nodes[0], wc->log_leaf, dest_offset, 744 + (unsigned long)item, sizeof(*item)); 745 + 746 + /* 747 + * We have an explicit hole and NO_HOLES is not enabled. We have added 748 + * the hole file extent item to the subvolume tree, so we don't have 749 + * anything else to do other than update the file extent item range and 750 + * update the inode item. 751 + */ 752 + if (btrfs_file_extent_disk_bytenr(wc->log_leaf, item) == 0) { 753 + btrfs_release_path(wc->subvol_path); 754 + goto update_inode; 755 + } 756 + 757 + ins.objectid = btrfs_file_extent_disk_bytenr(wc->log_leaf, item); 758 + ins.type = BTRFS_EXTENT_ITEM_KEY; 759 + ins.offset = btrfs_file_extent_disk_num_bytes(wc->log_leaf, item); 760 + offset = wc->log_key.offset - btrfs_file_extent_offset(wc->log_leaf, item); 761 + 762 + /* 763 + * Manually record dirty extent, as here we did a shallow file extent 764 + * item copy and skip normal backref update, but modifying extent tree 765 + * all by ourselves. So need to manually record dirty extent for qgroup, 766 + * as the owner of the file extent changed from log tree (doesn't affect 767 + * qgroup) to fs/file tree (affects qgroup). 768 + */ 769 + ret = btrfs_qgroup_trace_extent(trans, ins.objectid, ins.offset); 770 + if (ret < 0) { 771 + btrfs_abort_log_replay(wc, ret, 772 + "failed to trace extent for bytenr %llu disk_num_bytes %llu inode %llu root %llu", 773 + ins.objectid, ins.offset, 774 + wc->log_key.objectid, btrfs_root_id(root)); 775 + goto out; 776 + } 777 + 778 + /* 779 + * Is this extent already allocated in the extent tree? 780 + * If so, just add a reference. 781 + */ 782 + ret = btrfs_lookup_data_extent(fs_info, ins.objectid, ins.offset); 783 + if (ret < 0) { 784 + btrfs_abort_log_replay(wc, ret, 785 + "failed to lookup data extent for bytenr %llu disk_num_bytes %llu inode %llu root %llu", 786 + ins.objectid, ins.offset, 787 + wc->log_key.objectid, btrfs_root_id(root)); 788 + goto out; 789 + } else if (ret == 0) { 790 + struct btrfs_ref ref = { 791 + .action = BTRFS_ADD_DELAYED_REF, 792 + .bytenr = ins.objectid, 793 + .num_bytes = ins.offset, 794 + .owning_root = btrfs_root_id(root), 795 + .ref_root = btrfs_root_id(root), 796 + }; 797 + 798 + btrfs_init_data_ref(&ref, wc->log_key.objectid, offset, 0, false); 799 + ret = btrfs_inc_extent_ref(trans, &ref); 800 + if (ret) { 801 + btrfs_abort_log_replay(wc, ret, 802 + "failed to increment data extent for bytenr %llu disk_num_bytes %llu inode %llu root %llu", 803 + ins.objectid, ins.offset, 804 + wc->log_key.objectid, 805 + btrfs_root_id(root)); 806 + goto out; 807 + } 808 + } else { 809 + /* Insert the extent pointer in the extent tree. */ 810 + ret = btrfs_alloc_logged_file_extent(trans, btrfs_root_id(root), 811 + wc->log_key.objectid, offset, &ins); 812 + if (ret) { 813 + btrfs_abort_log_replay(wc, ret, 814 + "failed to allocate logged data extent for bytenr %llu disk_num_bytes %llu offset %llu inode %llu root %llu", 815 + ins.objectid, ins.offset, offset, 816 + wc->log_key.objectid, btrfs_root_id(root)); 817 + goto out; 818 + } 819 + } 820 + 821 + btrfs_release_path(wc->subvol_path); 822 + 823 + if (btrfs_file_extent_compression(wc->log_leaf, item)) { 824 + csum_start = ins.objectid; 825 + csum_end = csum_start + ins.offset; 826 + } else { 827 + csum_start = ins.objectid + btrfs_file_extent_offset(wc->log_leaf, item); 828 + csum_end = csum_start + btrfs_file_extent_num_bytes(wc->log_leaf, item); 829 + } 830 + 831 + ret = btrfs_lookup_csums_list(root->log_root, csum_start, csum_end - 1, 832 + &ordered_sums, false); 833 + if (ret < 0) { 834 + btrfs_abort_log_replay(wc, ret, 835 + "failed to lookups csums for range [%llu, %llu) inode %llu root %llu", 836 + csum_start, csum_end, wc->log_key.objectid, 837 + btrfs_root_id(root)); 838 + goto out; 839 + } 840 + ret = 0; 841 + /* 842 + * Now delete all existing cums in the csum root that cover our range. 843 + * We do this because we can have an extent that is completely 844 + * referenced by one file extent item and partially referenced by 845 + * another file extent item (like after using the clone or extent_same 846 + * ioctls). In this case if we end up doing the replay of the one that 847 + * partially references the extent first, and we do not do the csum 848 + * deletion below, we can get 2 csum items in the csum tree that overlap 849 + * each other. For example, imagine our log has the two following file 850 + * extent items: 851 + * 852 + * key (257 EXTENT_DATA 409600) 853 + * extent data disk byte 12845056 nr 102400 854 + * extent data offset 20480 nr 20480 ram 102400 855 + * 856 + * key (257 EXTENT_DATA 819200) 857 + * extent data disk byte 12845056 nr 102400 858 + * extent data offset 0 nr 102400 ram 102400 859 + * 860 + * Where the second one fully references the 100K extent that starts at 861 + * disk byte 12845056, and the log tree has a single csum item that 862 + * covers the entire range of the extent: 863 + * 864 + * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 865 + * 866 + * After the first file extent item is replayed, the csum tree gets the 867 + * following csum item: 868 + * 869 + * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 870 + * 871 + * Which covers the 20K sub-range starting at offset 20K of our extent. 872 + * Now when we replay the second file extent item, if we do not delete 873 + * existing csum items that cover any of its blocks, we end up getting 874 + * two csum items in our csum tree that overlap each other: 875 + * 876 + * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 877 + * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 878 + * 879 + * Which is a problem, because after this anyone trying to lookup for 880 + * the checksum of any block of our extent starting at an offset of 40K 881 + * or higher, will end up looking at the second csum item only, which 882 + * does not contain the checksum for any block starting at offset 40K or 883 + * higher of our extent. 884 + */ 885 + while (!list_empty(&ordered_sums)) { 886 + struct btrfs_ordered_sum *sums; 887 + struct btrfs_root *csum_root; 888 + 889 + sums = list_first_entry(&ordered_sums, struct btrfs_ordered_sum, list); 890 + csum_root = btrfs_csum_root(fs_info, sums->logical); 891 + if (!ret) { 892 + ret = btrfs_del_csums(trans, csum_root, sums->logical, 893 + sums->len); 894 + if (ret) 895 + btrfs_abort_log_replay(wc, ret, 896 + "failed to delete csums for range [%llu, %llu) inode %llu root %llu", 897 + sums->logical, 898 + sums->logical + sums->len, 899 + wc->log_key.objectid, 900 + btrfs_root_id(root)); 901 + } 902 + if (!ret) { 903 + ret = btrfs_csum_file_blocks(trans, csum_root, sums); 904 + if (ret) 905 + btrfs_abort_log_replay(wc, ret, 906 + "failed to add csums for range [%llu, %llu) inode %llu root %llu", 907 + sums->logical, 908 + sums->logical + sums->len, 909 + wc->log_key.objectid, 910 + btrfs_root_id(root)); 911 + } 912 + list_del(&sums->list); 913 + kfree(sums); 914 + } 975 915 if (ret) 976 916 goto out; 977 917 978 918 update_inode: 919 + ret = btrfs_inode_set_file_extent_range(inode, start, extent_end - start); 920 + if (ret) { 921 + btrfs_abort_log_replay(wc, ret, 922 + "failed to set file extent range [%llu, %llu) inode %llu root %llu", 923 + start, extent_end, wc->log_key.objectid, 924 + btrfs_root_id(root)); 925 + goto out; 926 + } 927 + 979 928 btrfs_update_inode_bytes(inode, nbytes, drop_args.bytes_found); 980 929 ret = btrfs_update_inode(trans, inode); 930 + if (ret) 931 + btrfs_abort_log_replay(wc, ret, 932 + "failed to update inode %llu root %llu", 933 + wc->log_key.objectid, btrfs_root_id(root)); 981 934 out: 982 935 iput(&inode->vfs_inode); 983 936 return ret; 984 937 } 985 938 986 - static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans, 939 + static int unlink_inode_for_log_replay(struct walk_control *wc, 987 940 struct btrfs_inode *dir, 988 941 struct btrfs_inode *inode, 989 942 const struct fscrypt_str *name) 990 943 { 944 + struct btrfs_trans_handle *trans = wc->trans; 991 945 int ret; 992 946 993 947 ret = btrfs_unlink_inode(trans, dir, inode, name); 994 - if (ret) 948 + if (ret) { 949 + btrfs_abort_log_replay(wc, ret, 950 + "failed to unlink inode %llu parent dir %llu name %.*s root %llu", 951 + btrfs_ino(inode), btrfs_ino(dir), name->len, 952 + name->name, btrfs_root_id(inode->root)); 995 953 return ret; 954 + } 996 955 /* 997 956 * Whenever we need to check if a name exists or not, we check the 998 957 * fs/subvolume tree. So after an unlink we must run delayed items, so 999 958 * that future checks for a name during log replay see that the name 1000 959 * does not exists anymore. 1001 960 */ 1002 - return btrfs_run_delayed_items(trans); 961 + ret = btrfs_run_delayed_items(trans); 962 + if (ret) 963 + btrfs_abort_log_replay(wc, ret, 964 + "failed to run delayed items current inode %llu parent dir %llu name %.*s root %llu", 965 + btrfs_ino(inode), btrfs_ino(dir), name->len, 966 + name->name, btrfs_root_id(inode->root)); 967 + 968 + return ret; 1003 969 } 1004 970 1005 971 /* ··· 1072 914 * This is a helper function to do the unlink of a specific directory 1073 915 * item 1074 916 */ 1075 - static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, 1076 - struct btrfs_path *path, 917 + static noinline int drop_one_dir_item(struct walk_control *wc, 1077 918 struct btrfs_inode *dir, 1078 919 struct btrfs_dir_item *di) 1079 920 { 1080 921 struct btrfs_root *root = dir->root; 1081 922 struct btrfs_inode *inode; 1082 923 struct fscrypt_str name; 1083 - struct extent_buffer *leaf; 924 + struct extent_buffer *leaf = wc->subvol_path->nodes[0]; 1084 925 struct btrfs_key location; 1085 926 int ret; 1086 927 1087 - leaf = path->nodes[0]; 1088 - 1089 928 btrfs_dir_item_key_to_cpu(leaf, di, &location); 1090 929 ret = read_alloc_one_name(leaf, di + 1, btrfs_dir_name_len(leaf, di), &name); 1091 - if (ret) 1092 - return -ENOMEM; 930 + if (ret) { 931 + btrfs_abort_log_replay(wc, ret, 932 + "failed to allocate name for dir %llu root %llu", 933 + btrfs_ino(dir), btrfs_root_id(root)); 934 + return ret; 935 + } 1093 936 1094 - btrfs_release_path(path); 937 + btrfs_release_path(wc->subvol_path); 1095 938 1096 939 inode = btrfs_iget_logging(location.objectid, root); 1097 940 if (IS_ERR(inode)) { 1098 941 ret = PTR_ERR(inode); 942 + btrfs_abort_log_replay(wc, ret, 943 + "failed to open inode %llu parent dir %llu name %.*s root %llu", 944 + location.objectid, btrfs_ino(dir), 945 + name.len, name.name, btrfs_root_id(root)); 1099 946 inode = NULL; 1100 947 goto out; 1101 948 } 1102 949 1103 - ret = link_to_fixup_dir(trans, root, path, location.objectid); 950 + ret = link_to_fixup_dir(wc, location.objectid); 1104 951 if (ret) 1105 952 goto out; 1106 953 1107 - ret = unlink_inode_for_log_replay(trans, dir, inode, &name); 954 + ret = unlink_inode_for_log_replay(wc, dir, inode, &name); 1108 955 out: 1109 956 kfree(name.name); 1110 957 if (inode) ··· 1176 1013 u64 ref_objectid, 1177 1014 const struct fscrypt_str *name) 1178 1015 { 1179 - struct btrfs_path *path; 1016 + BTRFS_PATH_AUTO_FREE(path); 1180 1017 int ret; 1181 1018 1182 1019 path = btrfs_alloc_path(); ··· 1184 1021 return -ENOMEM; 1185 1022 1186 1023 ret = btrfs_search_slot(NULL, log, key, path, 0, 0); 1187 - if (ret < 0) { 1188 - goto out; 1189 - } else if (ret == 1) { 1190 - ret = 0; 1191 - goto out; 1192 - } 1024 + if (ret < 0) 1025 + return ret; 1026 + if (ret == 1) 1027 + return 0; 1193 1028 1194 1029 if (key->type == BTRFS_INODE_EXTREF_KEY) 1195 1030 ret = !!btrfs_find_name_in_ext_backref(path->nodes[0], ··· 1196 1035 else 1197 1036 ret = !!btrfs_find_name_in_backref(path->nodes[0], 1198 1037 path->slots[0], name); 1199 - out: 1200 - btrfs_free_path(path); 1201 1038 return ret; 1202 1039 } 1203 1040 1204 - static int unlink_refs_not_in_log(struct btrfs_trans_handle *trans, 1205 - struct btrfs_path *path, 1206 - struct btrfs_root *log_root, 1041 + static int unlink_refs_not_in_log(struct walk_control *wc, 1207 1042 struct btrfs_key *search_key, 1208 1043 struct btrfs_inode *dir, 1209 - struct btrfs_inode *inode, 1210 - u64 parent_objectid) 1044 + struct btrfs_inode *inode) 1211 1045 { 1212 - struct extent_buffer *leaf = path->nodes[0]; 1046 + struct extent_buffer *leaf = wc->subvol_path->nodes[0]; 1213 1047 unsigned long ptr; 1214 1048 unsigned long ptr_end; 1215 1049 ··· 1213 1057 * log. If so, we allow them to stay otherwise they must be unlinked as 1214 1058 * a conflict. 1215 1059 */ 1216 - ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1217 - ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]); 1060 + ptr = btrfs_item_ptr_offset(leaf, wc->subvol_path->slots[0]); 1061 + ptr_end = ptr + btrfs_item_size(leaf, wc->subvol_path->slots[0]); 1218 1062 while (ptr < ptr_end) { 1219 1063 struct fscrypt_str victim_name; 1220 1064 struct btrfs_inode_ref *victim_ref; ··· 1224 1068 ret = read_alloc_one_name(leaf, (victim_ref + 1), 1225 1069 btrfs_inode_ref_name_len(leaf, victim_ref), 1226 1070 &victim_name); 1227 - if (ret) 1228 - return ret; 1229 - 1230 - ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name); 1231 1071 if (ret) { 1232 - kfree(victim_name.name); 1233 - if (ret < 0) 1072 + btrfs_abort_log_replay(wc, ret, 1073 + "failed to allocate name for inode %llu parent dir %llu root %llu", 1074 + btrfs_ino(inode), btrfs_ino(dir), 1075 + btrfs_root_id(inode->root)); 1076 + return ret; 1077 + } 1078 + 1079 + ret = backref_in_log(wc->log, search_key, btrfs_ino(dir), &victim_name); 1080 + if (ret) { 1081 + if (ret < 0) { 1082 + btrfs_abort_log_replay(wc, ret, 1083 + "failed to check if backref is in log tree for inode %llu parent dir %llu name %.*s root %llu", 1084 + btrfs_ino(inode), btrfs_ino(dir), 1085 + victim_name.len, victim_name.name, 1086 + btrfs_root_id(inode->root)); 1087 + kfree(victim_name.name); 1234 1088 return ret; 1089 + } 1090 + kfree(victim_name.name); 1235 1091 ptr = (unsigned long)(victim_ref + 1) + victim_name.len; 1236 1092 continue; 1237 1093 } 1238 1094 1239 1095 inc_nlink(&inode->vfs_inode); 1240 - btrfs_release_path(path); 1096 + btrfs_release_path(wc->subvol_path); 1241 1097 1242 - ret = unlink_inode_for_log_replay(trans, dir, inode, &victim_name); 1098 + ret = unlink_inode_for_log_replay(wc, dir, inode, &victim_name); 1243 1099 kfree(victim_name.name); 1244 1100 if (ret) 1245 1101 return ret; ··· 1261 1093 return 0; 1262 1094 } 1263 1095 1264 - static int unlink_extrefs_not_in_log(struct btrfs_trans_handle *trans, 1265 - struct btrfs_path *path, 1266 - struct btrfs_root *root, 1267 - struct btrfs_root *log_root, 1096 + static int unlink_extrefs_not_in_log(struct walk_control *wc, 1268 1097 struct btrfs_key *search_key, 1269 - struct btrfs_inode *inode, 1270 - u64 inode_objectid, 1271 - u64 parent_objectid) 1098 + struct btrfs_inode *dir, 1099 + struct btrfs_inode *inode) 1272 1100 { 1273 - struct extent_buffer *leaf = path->nodes[0]; 1274 - const unsigned long base = btrfs_item_ptr_offset(leaf, path->slots[0]); 1275 - const u32 item_size = btrfs_item_size(leaf, path->slots[0]); 1101 + struct extent_buffer *leaf = wc->subvol_path->nodes[0]; 1102 + const unsigned long base = btrfs_item_ptr_offset(leaf, wc->subvol_path->slots[0]); 1103 + const u32 item_size = btrfs_item_size(leaf, wc->subvol_path->slots[0]); 1276 1104 u32 cur_offset = 0; 1277 1105 1278 1106 while (cur_offset < item_size) { 1107 + struct btrfs_root *log_root = wc->log; 1279 1108 struct btrfs_inode_extref *extref; 1280 - struct btrfs_inode *victim_parent; 1281 1109 struct fscrypt_str victim_name; 1282 1110 int ret; 1283 1111 1284 1112 extref = (struct btrfs_inode_extref *)(base + cur_offset); 1285 1113 victim_name.len = btrfs_inode_extref_name_len(leaf, extref); 1286 1114 1287 - if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) 1115 + if (btrfs_inode_extref_parent(leaf, extref) != btrfs_ino(dir)) 1288 1116 goto next; 1289 1117 1290 1118 ret = read_alloc_one_name(leaf, &extref->name, victim_name.len, 1291 1119 &victim_name); 1292 - if (ret) 1120 + if (ret) { 1121 + btrfs_abort_log_replay(wc, ret, 1122 + "failed to allocate name for inode %llu parent dir %llu root %llu", 1123 + btrfs_ino(inode), btrfs_ino(dir), 1124 + btrfs_root_id(inode->root)); 1293 1125 return ret; 1126 + } 1294 1127 1295 - search_key->objectid = inode_objectid; 1128 + search_key->objectid = btrfs_ino(inode); 1296 1129 search_key->type = BTRFS_INODE_EXTREF_KEY; 1297 - search_key->offset = btrfs_extref_hash(parent_objectid, 1130 + search_key->offset = btrfs_extref_hash(btrfs_ino(dir), 1298 1131 victim_name.name, 1299 1132 victim_name.len); 1300 - ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name); 1133 + ret = backref_in_log(log_root, search_key, btrfs_ino(dir), &victim_name); 1301 1134 if (ret) { 1302 - kfree(victim_name.name); 1303 - if (ret < 0) 1135 + if (ret < 0) { 1136 + btrfs_abort_log_replay(wc, ret, 1137 + "failed to check if backref is in log tree for inode %llu parent dir %llu name %.*s root %llu", 1138 + btrfs_ino(inode), btrfs_ino(dir), 1139 + victim_name.len, victim_name.name, 1140 + btrfs_root_id(inode->root)); 1141 + kfree(victim_name.name); 1304 1142 return ret; 1143 + } 1144 + kfree(victim_name.name); 1305 1145 next: 1306 1146 cur_offset += victim_name.len + sizeof(*extref); 1307 1147 continue; 1308 1148 } 1309 1149 1310 - victim_parent = btrfs_iget_logging(parent_objectid, root); 1311 - if (IS_ERR(victim_parent)) { 1312 - kfree(victim_name.name); 1313 - return PTR_ERR(victim_parent); 1314 - } 1315 - 1316 1150 inc_nlink(&inode->vfs_inode); 1317 - btrfs_release_path(path); 1151 + btrfs_release_path(wc->subvol_path); 1318 1152 1319 - ret = unlink_inode_for_log_replay(trans, victim_parent, inode, 1320 - &victim_name); 1321 - iput(&victim_parent->vfs_inode); 1153 + ret = unlink_inode_for_log_replay(wc, dir, inode, &victim_name); 1322 1154 kfree(victim_name.name); 1323 1155 if (ret) 1324 1156 return ret; ··· 1328 1160 return 0; 1329 1161 } 1330 1162 1331 - static inline int __add_inode_ref(struct btrfs_trans_handle *trans, 1332 - struct btrfs_root *root, 1333 - struct btrfs_path *path, 1334 - struct btrfs_root *log_root, 1163 + static inline int __add_inode_ref(struct walk_control *wc, 1335 1164 struct btrfs_inode *dir, 1336 1165 struct btrfs_inode *inode, 1337 - u64 inode_objectid, u64 parent_objectid, 1338 1166 u64 ref_index, struct fscrypt_str *name) 1339 1167 { 1340 1168 int ret; 1169 + struct btrfs_trans_handle *trans = wc->trans; 1170 + struct btrfs_root *root = wc->root; 1341 1171 struct btrfs_dir_item *di; 1342 1172 struct btrfs_key search_key; 1343 1173 struct btrfs_inode_extref *extref; 1344 1174 1345 1175 again: 1346 1176 /* Search old style refs */ 1347 - search_key.objectid = inode_objectid; 1177 + search_key.objectid = btrfs_ino(inode); 1348 1178 search_key.type = BTRFS_INODE_REF_KEY; 1349 - search_key.offset = parent_objectid; 1350 - ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 1179 + search_key.offset = btrfs_ino(dir); 1180 + ret = btrfs_search_slot(NULL, root, &search_key, wc->subvol_path, 0, 0); 1351 1181 if (ret < 0) { 1182 + btrfs_abort_log_replay(wc, ret, 1183 + "failed to search subvolume tree for key (%llu %u %llu) root %llu", 1184 + search_key.objectid, search_key.type, 1185 + search_key.offset, btrfs_root_id(root)); 1352 1186 return ret; 1353 1187 } else if (ret == 0) { 1354 1188 /* ··· 1360 1190 if (search_key.objectid == search_key.offset) 1361 1191 return 1; 1362 1192 1363 - ret = unlink_refs_not_in_log(trans, path, log_root, &search_key, 1364 - dir, inode, parent_objectid); 1193 + ret = unlink_refs_not_in_log(wc, &search_key, dir, inode); 1365 1194 if (ret == -EAGAIN) 1366 1195 goto again; 1367 1196 else if (ret) 1368 1197 return ret; 1369 1198 } 1370 - btrfs_release_path(path); 1199 + btrfs_release_path(wc->subvol_path); 1371 1200 1372 1201 /* Same search but for extended refs */ 1373 - extref = btrfs_lookup_inode_extref(root, path, name, inode_objectid, parent_objectid); 1202 + extref = btrfs_lookup_inode_extref(root, wc->subvol_path, name, 1203 + btrfs_ino(inode), btrfs_ino(dir)); 1374 1204 if (IS_ERR(extref)) { 1375 1205 return PTR_ERR(extref); 1376 1206 } else if (extref) { 1377 - ret = unlink_extrefs_not_in_log(trans, path, root, log_root, 1378 - &search_key, inode, 1379 - inode_objectid, parent_objectid); 1207 + ret = unlink_extrefs_not_in_log(wc, &search_key, dir, inode); 1380 1208 if (ret == -EAGAIN) 1381 1209 goto again; 1382 1210 else if (ret) 1383 1211 return ret; 1384 1212 } 1385 - btrfs_release_path(path); 1213 + btrfs_release_path(wc->subvol_path); 1386 1214 1387 1215 /* look for a conflicting sequence number */ 1388 - di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), 1216 + di = btrfs_lookup_dir_index_item(trans, root, wc->subvol_path, btrfs_ino(dir), 1389 1217 ref_index, name, 0); 1390 1218 if (IS_ERR(di)) { 1391 - return PTR_ERR(di); 1219 + ret = PTR_ERR(di); 1220 + btrfs_abort_log_replay(wc, ret, 1221 + "failed to lookup dir index item for dir %llu ref_index %llu name %.*s root %llu", 1222 + btrfs_ino(dir), ref_index, name->len, 1223 + name->name, btrfs_root_id(root)); 1224 + return ret; 1392 1225 } else if (di) { 1393 - ret = drop_one_dir_item(trans, path, dir, di); 1226 + ret = drop_one_dir_item(wc, dir, di); 1394 1227 if (ret) 1395 1228 return ret; 1396 1229 } 1397 - btrfs_release_path(path); 1230 + btrfs_release_path(wc->subvol_path); 1398 1231 1399 1232 /* look for a conflicting name */ 1400 - di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, 0); 1233 + di = btrfs_lookup_dir_item(trans, root, wc->subvol_path, btrfs_ino(dir), name, 0); 1401 1234 if (IS_ERR(di)) { 1402 - return PTR_ERR(di); 1235 + ret = PTR_ERR(di); 1236 + btrfs_abort_log_replay(wc, ret, 1237 + "failed to lookup dir item for dir %llu name %.*s root %llu", 1238 + btrfs_ino(dir), name->len, name->name, 1239 + btrfs_root_id(root)); 1240 + return ret; 1403 1241 } else if (di) { 1404 - ret = drop_one_dir_item(trans, path, dir, di); 1242 + ret = drop_one_dir_item(wc, dir, di); 1405 1243 if (ret) 1406 1244 return ret; 1407 1245 } 1408 - btrfs_release_path(path); 1246 + btrfs_release_path(wc->subvol_path); 1409 1247 1410 1248 return 0; 1411 1249 } ··· 1466 1288 * proper unlink of that name (that is, remove its entry from the inode 1467 1289 * reference item and both dir index keys). 1468 1290 */ 1469 - static int unlink_old_inode_refs(struct btrfs_trans_handle *trans, 1470 - struct btrfs_root *root, 1471 - struct btrfs_path *path, 1472 - struct btrfs_inode *inode, 1473 - struct extent_buffer *log_eb, 1474 - int log_slot, 1475 - struct btrfs_key *key) 1291 + static int unlink_old_inode_refs(struct walk_control *wc, struct btrfs_inode *inode) 1476 1292 { 1293 + struct btrfs_root *root = wc->root; 1477 1294 int ret; 1478 1295 unsigned long ref_ptr; 1479 1296 unsigned long ref_end; 1480 1297 struct extent_buffer *eb; 1481 1298 1482 1299 again: 1483 - btrfs_release_path(path); 1484 - ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 1300 + btrfs_release_path(wc->subvol_path); 1301 + ret = btrfs_search_slot(NULL, root, &wc->log_key, wc->subvol_path, 0, 0); 1485 1302 if (ret > 0) { 1486 1303 ret = 0; 1487 1304 goto out; 1488 1305 } 1489 - if (ret < 0) 1306 + if (ret < 0) { 1307 + btrfs_abort_log_replay(wc, ret, 1308 + "failed to search subvolume tree for key (%llu %u %llu) root %llu", 1309 + wc->log_key.objectid, wc->log_key.type, 1310 + wc->log_key.offset, btrfs_root_id(root)); 1490 1311 goto out; 1312 + } 1491 1313 1492 - eb = path->nodes[0]; 1493 - ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 1494 - ref_end = ref_ptr + btrfs_item_size(eb, path->slots[0]); 1314 + eb = wc->subvol_path->nodes[0]; 1315 + ref_ptr = btrfs_item_ptr_offset(eb, wc->subvol_path->slots[0]); 1316 + ref_end = ref_ptr + btrfs_item_size(eb, wc->subvol_path->slots[0]); 1495 1317 while (ref_ptr < ref_end) { 1496 1318 struct fscrypt_str name; 1497 1319 u64 parent_id; 1498 1320 1499 - if (key->type == BTRFS_INODE_EXTREF_KEY) { 1321 + if (wc->log_key.type == BTRFS_INODE_EXTREF_KEY) { 1500 1322 ret = extref_get_fields(eb, ref_ptr, &name, 1501 1323 NULL, &parent_id); 1324 + if (ret) { 1325 + btrfs_abort_log_replay(wc, ret, 1326 + "failed to get extref details for inode %llu root %llu", 1327 + btrfs_ino(inode), 1328 + btrfs_root_id(root)); 1329 + goto out; 1330 + } 1502 1331 } else { 1503 - parent_id = key->offset; 1332 + parent_id = wc->log_key.offset; 1504 1333 ret = ref_get_fields(eb, ref_ptr, &name, NULL); 1334 + if (ret) { 1335 + btrfs_abort_log_replay(wc, ret, 1336 + "failed to get ref details for inode %llu parent_id %llu root %llu", 1337 + btrfs_ino(inode), parent_id, 1338 + btrfs_root_id(root)); 1339 + goto out; 1340 + } 1505 1341 } 1506 - if (ret) 1507 - goto out; 1508 1342 1509 - if (key->type == BTRFS_INODE_EXTREF_KEY) 1510 - ret = !!btrfs_find_name_in_ext_backref(log_eb, log_slot, 1343 + if (wc->log_key.type == BTRFS_INODE_EXTREF_KEY) 1344 + ret = !!btrfs_find_name_in_ext_backref(wc->log_leaf, wc->log_slot, 1511 1345 parent_id, &name); 1512 1346 else 1513 - ret = !!btrfs_find_name_in_backref(log_eb, log_slot, &name); 1347 + ret = !!btrfs_find_name_in_backref(wc->log_leaf, wc->log_slot, 1348 + &name); 1514 1349 1515 1350 if (!ret) { 1516 1351 struct btrfs_inode *dir; 1517 1352 1518 - btrfs_release_path(path); 1353 + btrfs_release_path(wc->subvol_path); 1519 1354 dir = btrfs_iget_logging(parent_id, root); 1520 1355 if (IS_ERR(dir)) { 1521 1356 ret = PTR_ERR(dir); 1522 1357 kfree(name.name); 1358 + btrfs_abort_log_replay(wc, ret, 1359 + "failed to lookup dir inode %llu root %llu", 1360 + parent_id, btrfs_root_id(root)); 1523 1361 goto out; 1524 1362 } 1525 - ret = unlink_inode_for_log_replay(trans, dir, inode, &name); 1363 + ret = unlink_inode_for_log_replay(wc, dir, inode, &name); 1526 1364 kfree(name.name); 1527 1365 iput(&dir->vfs_inode); 1528 1366 if (ret) ··· 1548 1354 1549 1355 kfree(name.name); 1550 1356 ref_ptr += name.len; 1551 - if (key->type == BTRFS_INODE_EXTREF_KEY) 1357 + if (wc->log_key.type == BTRFS_INODE_EXTREF_KEY) 1552 1358 ref_ptr += sizeof(struct btrfs_inode_extref); 1553 1359 else 1554 1360 ref_ptr += sizeof(struct btrfs_inode_ref); 1555 1361 } 1556 1362 ret = 0; 1557 1363 out: 1558 - btrfs_release_path(path); 1364 + btrfs_release_path(wc->subvol_path); 1559 1365 return ret; 1560 1366 } 1561 1367 1562 1368 /* 1563 - * replay one inode back reference item found in the log tree. 1564 - * eb, slot and key refer to the buffer and key found in the log tree. 1565 - * root is the destination we are replaying into, and path is for temp 1566 - * use by this function. (it should be released on return). 1369 + * Replay one inode back reference item found in the log tree. 1370 + * Path is for temporary use by this function (it should be released on return). 1567 1371 */ 1568 - static noinline int add_inode_ref(struct btrfs_trans_handle *trans, 1569 - struct btrfs_root *root, 1570 - struct btrfs_root *log, 1571 - struct btrfs_path *path, 1572 - struct extent_buffer *eb, int slot, 1573 - struct btrfs_key *key) 1372 + static noinline int add_inode_ref(struct walk_control *wc) 1574 1373 { 1374 + struct btrfs_trans_handle *trans = wc->trans; 1375 + struct btrfs_root *root = wc->root; 1575 1376 struct btrfs_inode *dir = NULL; 1576 1377 struct btrfs_inode *inode = NULL; 1577 1378 unsigned long ref_ptr; 1578 1379 unsigned long ref_end; 1579 1380 struct fscrypt_str name = { 0 }; 1580 1381 int ret; 1581 - const bool is_extref_item = (key->type == BTRFS_INODE_EXTREF_KEY); 1382 + const bool is_extref_item = (wc->log_key.type == BTRFS_INODE_EXTREF_KEY); 1582 1383 u64 parent_objectid; 1583 1384 u64 inode_objectid; 1584 1385 u64 ref_index = 0; 1585 1386 int ref_struct_size; 1586 1387 1587 - ref_ptr = btrfs_item_ptr_offset(eb, slot); 1588 - ref_end = ref_ptr + btrfs_item_size(eb, slot); 1388 + ref_ptr = btrfs_item_ptr_offset(wc->log_leaf, wc->log_slot); 1389 + ref_end = ref_ptr + btrfs_item_size(wc->log_leaf, wc->log_slot); 1589 1390 1590 1391 if (is_extref_item) { 1591 1392 struct btrfs_inode_extref *r; 1592 1393 1593 1394 ref_struct_size = sizeof(struct btrfs_inode_extref); 1594 1395 r = (struct btrfs_inode_extref *)ref_ptr; 1595 - parent_objectid = btrfs_inode_extref_parent(eb, r); 1396 + parent_objectid = btrfs_inode_extref_parent(wc->log_leaf, r); 1596 1397 } else { 1597 1398 ref_struct_size = sizeof(struct btrfs_inode_ref); 1598 - parent_objectid = key->offset; 1399 + parent_objectid = wc->log_key.offset; 1599 1400 } 1600 - inode_objectid = key->objectid; 1401 + inode_objectid = wc->log_key.objectid; 1601 1402 1602 1403 /* 1603 1404 * it is possible that we didn't log all the parent directories ··· 1605 1416 ret = PTR_ERR(dir); 1606 1417 if (ret == -ENOENT) 1607 1418 ret = 0; 1419 + else 1420 + btrfs_abort_log_replay(wc, ret, 1421 + "failed to lookup dir inode %llu root %llu", 1422 + parent_objectid, btrfs_root_id(root)); 1608 1423 dir = NULL; 1609 1424 goto out; 1610 1425 } ··· 1616 1423 inode = btrfs_iget_logging(inode_objectid, root); 1617 1424 if (IS_ERR(inode)) { 1618 1425 ret = PTR_ERR(inode); 1426 + btrfs_abort_log_replay(wc, ret, 1427 + "failed to lookup inode %llu root %llu", 1428 + inode_objectid, btrfs_root_id(root)); 1619 1429 inode = NULL; 1620 1430 goto out; 1621 1431 } 1622 1432 1623 1433 while (ref_ptr < ref_end) { 1624 1434 if (is_extref_item) { 1625 - ret = extref_get_fields(eb, ref_ptr, &name, 1435 + ret = extref_get_fields(wc->log_leaf, ref_ptr, &name, 1626 1436 &ref_index, &parent_objectid); 1627 - if (ret) 1437 + if (ret) { 1438 + btrfs_abort_log_replay(wc, ret, 1439 + "failed to get extref details for inode %llu root %llu", 1440 + btrfs_ino(inode), 1441 + btrfs_root_id(root)); 1628 1442 goto out; 1443 + } 1629 1444 /* 1630 1445 * parent object can change from one array 1631 1446 * item to another. ··· 1658 1457 */ 1659 1458 ret = 0; 1660 1459 goto next; 1460 + } else { 1461 + btrfs_abort_log_replay(wc, ret, 1462 + "failed to lookup dir inode %llu root %llu", 1463 + parent_objectid, 1464 + btrfs_root_id(root)); 1661 1465 } 1662 1466 goto out; 1663 1467 } 1664 1468 } 1665 1469 } else { 1666 - ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); 1667 - if (ret) 1470 + ret = ref_get_fields(wc->log_leaf, ref_ptr, &name, &ref_index); 1471 + if (ret) { 1472 + btrfs_abort_log_replay(wc, ret, 1473 + "failed to get ref details for inode %llu parent_objectid %llu root %llu", 1474 + btrfs_ino(inode), 1475 + parent_objectid, 1476 + btrfs_root_id(root)); 1668 1477 goto out; 1478 + } 1669 1479 } 1670 1480 1671 - ret = inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), 1672 - ref_index, &name); 1481 + ret = inode_in_dir(root, wc->subvol_path, btrfs_ino(dir), 1482 + btrfs_ino(inode), ref_index, &name); 1673 1483 if (ret < 0) { 1484 + btrfs_abort_log_replay(wc, ret, 1485 + "failed to check if inode %llu is in dir %llu ref_index %llu name %.*s root %llu", 1486 + btrfs_ino(inode), btrfs_ino(dir), 1487 + ref_index, name.len, name.name, 1488 + btrfs_root_id(root)); 1674 1489 goto out; 1675 1490 } else if (ret == 0) { 1676 1491 /* ··· 1696 1479 * overwrite any existing back reference, and we don't 1697 1480 * want to create dangling pointers in the directory. 1698 1481 */ 1699 - ret = __add_inode_ref(trans, root, path, log, dir, inode, 1700 - inode_objectid, parent_objectid, 1701 - ref_index, &name); 1482 + ret = __add_inode_ref(wc, dir, inode, ref_index, &name); 1702 1483 if (ret) { 1703 1484 if (ret == 1) 1704 1485 ret = 0; ··· 1705 1490 1706 1491 /* insert our name */ 1707 1492 ret = btrfs_add_link(trans, dir, inode, &name, 0, ref_index); 1708 - if (ret) 1493 + if (ret) { 1494 + btrfs_abort_log_replay(wc, ret, 1495 + "failed to add link for inode %llu in dir %llu ref_index %llu name %.*s root %llu", 1496 + btrfs_ino(inode), 1497 + btrfs_ino(dir), ref_index, 1498 + name.len, name.name, 1499 + btrfs_root_id(root)); 1709 1500 goto out; 1501 + } 1710 1502 1711 1503 ret = btrfs_update_inode(trans, inode); 1712 - if (ret) 1504 + if (ret) { 1505 + btrfs_abort_log_replay(wc, ret, 1506 + "failed to update inode %llu root %llu", 1507 + btrfs_ino(inode), 1508 + btrfs_root_id(root)); 1713 1509 goto out; 1510 + } 1714 1511 } 1715 1512 /* Else, ret == 1, we already have a perfect match, we're done. */ 1716 1513 ··· 1744 1517 * dir index entries exist for a name but there is no inode reference 1745 1518 * item with the same name. 1746 1519 */ 1747 - ret = unlink_old_inode_refs(trans, root, path, inode, eb, slot, key); 1520 + ret = unlink_old_inode_refs(wc, inode); 1748 1521 if (ret) 1749 1522 goto out; 1750 1523 1751 1524 /* finally write the back reference in the inode */ 1752 - ret = overwrite_item(trans, root, path, eb, slot, key); 1525 + ret = overwrite_item(wc); 1753 1526 out: 1754 - btrfs_release_path(path); 1527 + btrfs_release_path(wc->subvol_path); 1755 1528 kfree(name.name); 1756 1529 if (dir) 1757 1530 iput(&dir->vfs_inode); ··· 1869 1642 * number of back refs found. If it goes down to zero, the iput 1870 1643 * will free the inode. 1871 1644 */ 1872 - static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, 1645 + static noinline int fixup_inode_link_count(struct walk_control *wc, 1873 1646 struct btrfs_inode *inode) 1874 1647 { 1648 + struct btrfs_trans_handle *trans = wc->trans; 1875 1649 struct btrfs_root *root = inode->root; 1876 - struct btrfs_path *path; 1877 1650 int ret; 1878 1651 u64 nlink = 0; 1879 1652 const u64 ino = btrfs_ino(inode); 1880 1653 1881 - path = btrfs_alloc_path(); 1882 - if (!path) 1883 - return -ENOMEM; 1884 - 1885 - ret = count_inode_refs(inode, path); 1654 + ret = count_inode_refs(inode, wc->subvol_path); 1886 1655 if (ret < 0) 1887 1656 goto out; 1888 1657 1889 1658 nlink = ret; 1890 1659 1891 - ret = count_inode_extrefs(inode, path); 1660 + ret = count_inode_extrefs(inode, wc->subvol_path); 1892 1661 if (ret < 0) 1893 1662 goto out; 1894 1663 ··· 1903 1680 1904 1681 if (inode->vfs_inode.i_nlink == 0) { 1905 1682 if (S_ISDIR(inode->vfs_inode.i_mode)) { 1906 - ret = replay_dir_deletes(trans, root, NULL, path, ino, true); 1683 + ret = replay_dir_deletes(wc, ino, true); 1907 1684 if (ret) 1908 1685 goto out; 1909 1686 } ··· 1913 1690 } 1914 1691 1915 1692 out: 1916 - btrfs_free_path(path); 1693 + btrfs_release_path(wc->subvol_path); 1917 1694 return ret; 1918 1695 } 1919 1696 1920 - static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, 1921 - struct btrfs_root *root, 1922 - struct btrfs_path *path) 1697 + static noinline int fixup_inode_link_counts(struct walk_control *wc) 1923 1698 { 1924 1699 int ret; 1925 1700 struct btrfs_key key; ··· 1926 1705 key.type = BTRFS_ORPHAN_ITEM_KEY; 1927 1706 key.offset = (u64)-1; 1928 1707 while (1) { 1708 + struct btrfs_trans_handle *trans = wc->trans; 1709 + struct btrfs_root *root = wc->root; 1929 1710 struct btrfs_inode *inode; 1930 1711 1931 - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1712 + ret = btrfs_search_slot(trans, root, &key, wc->subvol_path, -1, 1); 1932 1713 if (ret < 0) 1933 1714 break; 1934 1715 1935 1716 if (ret == 1) { 1936 1717 ret = 0; 1937 - if (path->slots[0] == 0) 1718 + if (wc->subvol_path->slots[0] == 0) 1938 1719 break; 1939 - path->slots[0]--; 1720 + wc->subvol_path->slots[0]--; 1940 1721 } 1941 1722 1942 - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1723 + btrfs_item_key_to_cpu(wc->subvol_path->nodes[0], &key, wc->subvol_path->slots[0]); 1943 1724 if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || 1944 1725 key.type != BTRFS_ORPHAN_ITEM_KEY) 1945 1726 break; 1946 1727 1947 - ret = btrfs_del_item(trans, root, path); 1728 + ret = btrfs_del_item(trans, root, wc->subvol_path); 1948 1729 if (ret) 1949 1730 break; 1950 1731 1951 - btrfs_release_path(path); 1732 + btrfs_release_path(wc->subvol_path); 1952 1733 inode = btrfs_iget_logging(key.offset, root); 1953 1734 if (IS_ERR(inode)) { 1954 1735 ret = PTR_ERR(inode); 1955 1736 break; 1956 1737 } 1957 1738 1958 - ret = fixup_inode_link_count(trans, inode); 1739 + ret = fixup_inode_link_count(wc, inode); 1959 1740 iput(&inode->vfs_inode); 1960 1741 if (ret) 1961 1742 break; 1962 1743 1963 1744 /* 1964 1745 * fixup on a directory may create new entries, 1965 - * make sure we always look for the highset possible 1746 + * make sure we always look for the highest possible 1966 1747 * offset 1967 1748 */ 1968 1749 key.offset = (u64)-1; 1969 1750 } 1970 - btrfs_release_path(path); 1751 + btrfs_release_path(wc->subvol_path); 1971 1752 return ret; 1972 1753 } 1973 1754 ··· 1979 1756 * count when replay is done. The link count is incremented here 1980 1757 * so the inode won't go away until we check it 1981 1758 */ 1982 - static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, 1983 - struct btrfs_root *root, 1984 - struct btrfs_path *path, 1985 - u64 objectid) 1759 + static noinline int link_to_fixup_dir(struct walk_control *wc, u64 objectid) 1986 1760 { 1761 + struct btrfs_trans_handle *trans = wc->trans; 1762 + struct btrfs_root *root = wc->root; 1987 1763 struct btrfs_key key; 1988 1764 int ret = 0; 1989 1765 struct btrfs_inode *inode; 1990 1766 struct inode *vfs_inode; 1991 1767 1992 1768 inode = btrfs_iget_logging(objectid, root); 1993 - if (IS_ERR(inode)) 1994 - return PTR_ERR(inode); 1769 + if (IS_ERR(inode)) { 1770 + ret = PTR_ERR(inode); 1771 + btrfs_abort_log_replay(wc, ret, 1772 + "failed to lookup inode %llu root %llu", 1773 + objectid, btrfs_root_id(root)); 1774 + return ret; 1775 + } 1995 1776 1996 1777 vfs_inode = &inode->vfs_inode; 1997 1778 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1998 1779 key.type = BTRFS_ORPHAN_ITEM_KEY; 1999 1780 key.offset = objectid; 2000 1781 2001 - ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1782 + ret = btrfs_insert_empty_item(trans, root, wc->subvol_path, &key, 0); 2002 1783 2003 - btrfs_release_path(path); 1784 + btrfs_release_path(wc->subvol_path); 2004 1785 if (ret == 0) { 2005 1786 if (!vfs_inode->i_nlink) 2006 1787 set_nlink(vfs_inode, 1); 2007 1788 else 2008 1789 inc_nlink(vfs_inode); 2009 1790 ret = btrfs_update_inode(trans, inode); 1791 + if (ret) 1792 + btrfs_abort_log_replay(wc, ret, 1793 + "failed to update inode %llu root %llu", 1794 + objectid, btrfs_root_id(root)); 2010 1795 } else if (ret == -EEXIST) { 2011 1796 ret = 0; 1797 + } else { 1798 + btrfs_abort_log_replay(wc, ret, 1799 + "failed to insert fixup item for inode %llu root %llu", 1800 + objectid, btrfs_root_id(root)); 2012 1801 } 2013 1802 iput(vfs_inode); 2014 1803 ··· 2061 1826 return ret; 2062 1827 } 2063 1828 2064 - static int delete_conflicting_dir_entry(struct btrfs_trans_handle *trans, 1829 + static int delete_conflicting_dir_entry(struct walk_control *wc, 2065 1830 struct btrfs_inode *dir, 2066 - struct btrfs_path *path, 2067 1831 struct btrfs_dir_item *dst_di, 2068 1832 const struct btrfs_key *log_key, 2069 1833 u8 log_flags, ··· 2070 1836 { 2071 1837 struct btrfs_key found_key; 2072 1838 2073 - btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); 1839 + btrfs_dir_item_key_to_cpu(wc->subvol_path->nodes[0], dst_di, &found_key); 2074 1840 /* The existing dentry points to the same inode, don't delete it. */ 2075 1841 if (found_key.objectid == log_key->objectid && 2076 1842 found_key.type == log_key->type && 2077 1843 found_key.offset == log_key->offset && 2078 - btrfs_dir_flags(path->nodes[0], dst_di) == log_flags) 1844 + btrfs_dir_flags(wc->subvol_path->nodes[0], dst_di) == log_flags) 2079 1845 return 1; 2080 1846 2081 1847 /* ··· 2085 1851 if (!exists) 2086 1852 return 0; 2087 1853 2088 - return drop_one_dir_item(trans, path, dir, dst_di); 1854 + return drop_one_dir_item(wc, dir, dst_di); 2089 1855 } 2090 1856 2091 1857 /* ··· 2104 1870 * Returns < 0 on error, 0 if the name wasn't replayed (dentry points to a 2105 1871 * non-existing inode) and 1 if the name was replayed. 2106 1872 */ 2107 - static noinline int replay_one_name(struct btrfs_trans_handle *trans, 2108 - struct btrfs_root *root, 2109 - struct btrfs_path *path, 2110 - struct extent_buffer *eb, 2111 - struct btrfs_dir_item *di, 2112 - struct btrfs_key *key) 1873 + static noinline int replay_one_name(struct walk_control *wc, struct btrfs_dir_item *di) 2113 1874 { 1875 + struct btrfs_trans_handle *trans = wc->trans; 1876 + struct btrfs_root *root = wc->root; 2114 1877 struct fscrypt_str name = { 0 }; 2115 1878 struct btrfs_dir_item *dir_dst_di; 2116 1879 struct btrfs_dir_item *index_dst_di; ··· 2122 1891 bool update_size = true; 2123 1892 bool name_added = false; 2124 1893 2125 - dir = btrfs_iget_logging(key->objectid, root); 2126 - if (IS_ERR(dir)) 2127 - return PTR_ERR(dir); 1894 + dir = btrfs_iget_logging(wc->log_key.objectid, root); 1895 + if (IS_ERR(dir)) { 1896 + ret = PTR_ERR(dir); 1897 + btrfs_abort_log_replay(wc, ret, 1898 + "failed to lookup dir inode %llu root %llu", 1899 + wc->log_key.objectid, btrfs_root_id(root)); 1900 + return ret; 1901 + } 2128 1902 2129 - ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); 2130 - if (ret) 1903 + ret = read_alloc_one_name(wc->log_leaf, di + 1, 1904 + btrfs_dir_name_len(wc->log_leaf, di), &name); 1905 + if (ret) { 1906 + btrfs_abort_log_replay(wc, ret, 1907 + "failed to allocate name for dir %llu root %llu", 1908 + btrfs_ino(dir), btrfs_root_id(root)); 2131 1909 goto out; 1910 + } 2132 1911 2133 - log_flags = btrfs_dir_flags(eb, di); 2134 - btrfs_dir_item_key_to_cpu(eb, di, &log_key); 2135 - ret = btrfs_lookup_inode(trans, root, path, &log_key, 0); 2136 - btrfs_release_path(path); 2137 - if (ret < 0) 1912 + log_flags = btrfs_dir_flags(wc->log_leaf, di); 1913 + btrfs_dir_item_key_to_cpu(wc->log_leaf, di, &log_key); 1914 + ret = btrfs_lookup_inode(trans, root, wc->subvol_path, &log_key, 0); 1915 + btrfs_release_path(wc->subvol_path); 1916 + if (ret < 0) { 1917 + btrfs_abort_log_replay(wc, ret, 1918 + "failed to lookup inode %llu root %llu", 1919 + log_key.objectid, btrfs_root_id(root)); 2138 1920 goto out; 1921 + } 2139 1922 exists = (ret == 0); 2140 1923 ret = 0; 2141 1924 2142 - dir_dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, 2143 - &name, 1); 1925 + dir_dst_di = btrfs_lookup_dir_item(trans, root, wc->subvol_path, 1926 + wc->log_key.objectid, &name, 1); 2144 1927 if (IS_ERR(dir_dst_di)) { 2145 1928 ret = PTR_ERR(dir_dst_di); 1929 + btrfs_abort_log_replay(wc, ret, 1930 + "failed to lookup dir item for dir %llu name %.*s root %llu", 1931 + wc->log_key.objectid, name.len, name.name, 1932 + btrfs_root_id(root)); 2146 1933 goto out; 2147 1934 } else if (dir_dst_di) { 2148 - ret = delete_conflicting_dir_entry(trans, dir, path, dir_dst_di, 1935 + ret = delete_conflicting_dir_entry(wc, dir, dir_dst_di, 2149 1936 &log_key, log_flags, exists); 2150 - if (ret < 0) 1937 + if (ret < 0) { 1938 + btrfs_abort_log_replay(wc, ret, 1939 + "failed to delete conflicting entry for dir %llu name %.*s root %llu", 1940 + btrfs_ino(dir), name.len, name.name, 1941 + btrfs_root_id(root)); 2151 1942 goto out; 1943 + } 2152 1944 dir_dst_matches = (ret == 1); 2153 1945 } 2154 1946 2155 - btrfs_release_path(path); 1947 + btrfs_release_path(wc->subvol_path); 2156 1948 2157 - index_dst_di = btrfs_lookup_dir_index_item(trans, root, path, 2158 - key->objectid, key->offset, 2159 - &name, 1); 1949 + index_dst_di = btrfs_lookup_dir_index_item(trans, root, wc->subvol_path, 1950 + wc->log_key.objectid, 1951 + wc->log_key.offset, &name, 1); 2160 1952 if (IS_ERR(index_dst_di)) { 2161 1953 ret = PTR_ERR(index_dst_di); 1954 + btrfs_abort_log_replay(wc, ret, 1955 + "failed to lookup dir index item for dir %llu name %.*s root %llu", 1956 + wc->log_key.objectid, name.len, name.name, 1957 + btrfs_root_id(root)); 2162 1958 goto out; 2163 1959 } else if (index_dst_di) { 2164 - ret = delete_conflicting_dir_entry(trans, dir, path, index_dst_di, 1960 + ret = delete_conflicting_dir_entry(wc, dir, index_dst_di, 2165 1961 &log_key, log_flags, exists); 2166 - if (ret < 0) 1962 + if (ret < 0) { 1963 + btrfs_abort_log_replay(wc, ret, 1964 + "failed to delete conflicting entry for dir %llu name %.*s root %llu", 1965 + btrfs_ino(dir), name.len, name.name, 1966 + btrfs_root_id(root)); 2167 1967 goto out; 1968 + } 2168 1969 index_dst_matches = (ret == 1); 2169 1970 } 2170 1971 2171 - btrfs_release_path(path); 1972 + btrfs_release_path(wc->subvol_path); 2172 1973 2173 1974 if (dir_dst_matches && index_dst_matches) { 2174 1975 ret = 0; ··· 2214 1951 */ 2215 1952 search_key.objectid = log_key.objectid; 2216 1953 search_key.type = BTRFS_INODE_REF_KEY; 2217 - search_key.offset = key->objectid; 1954 + search_key.offset = wc->log_key.objectid; 2218 1955 ret = backref_in_log(root->log_root, &search_key, 0, &name); 2219 1956 if (ret < 0) { 1957 + btrfs_abort_log_replay(wc, ret, 1958 + "failed to check if ref item is logged for inode %llu dir %llu name %.*s root %llu", 1959 + search_key.objectid, btrfs_ino(dir), 1960 + name.len, name.name, btrfs_root_id(root)); 2220 1961 goto out; 2221 1962 } else if (ret) { 2222 1963 /* The dentry will be added later. */ ··· 2231 1964 2232 1965 search_key.objectid = log_key.objectid; 2233 1966 search_key.type = BTRFS_INODE_EXTREF_KEY; 2234 - search_key.offset = btrfs_extref_hash(key->objectid, name.name, name.len); 2235 - ret = backref_in_log(root->log_root, &search_key, key->objectid, &name); 1967 + search_key.offset = btrfs_extref_hash(wc->log_key.objectid, name.name, name.len); 1968 + ret = backref_in_log(root->log_root, &search_key, wc->log_key.objectid, &name); 2236 1969 if (ret < 0) { 1970 + btrfs_abort_log_replay(wc, ret, 1971 + "failed to check if extref item is logged for inode %llu dir %llu name %.*s root %llu", 1972 + search_key.objectid, btrfs_ino(dir), 1973 + name.len, name.name, btrfs_root_id(root)); 2237 1974 goto out; 2238 1975 } else if (ret) { 2239 1976 /* The dentry will be added later. */ ··· 2245 1974 update_size = false; 2246 1975 goto out; 2247 1976 } 2248 - btrfs_release_path(path); 2249 - ret = insert_one_name(trans, root, key->objectid, key->offset, 1977 + ret = insert_one_name(trans, root, wc->log_key.objectid, wc->log_key.offset, 2250 1978 &name, &log_key); 2251 - if (ret && ret != -ENOENT && ret != -EEXIST) 1979 + if (ret && ret != -ENOENT && ret != -EEXIST) { 1980 + btrfs_abort_log_replay(wc, ret, 1981 + "failed to insert name %.*s for inode %llu dir %llu root %llu", 1982 + name.len, name.name, log_key.objectid, 1983 + btrfs_ino(dir), btrfs_root_id(root)); 2252 1984 goto out; 1985 + } 2253 1986 if (!ret) 2254 1987 name_added = true; 2255 1988 update_size = false; ··· 2263 1988 if (!ret && update_size) { 2264 1989 btrfs_i_size_write(dir, dir->vfs_inode.i_size + name.len * 2); 2265 1990 ret = btrfs_update_inode(trans, dir); 1991 + if (ret) 1992 + btrfs_abort_log_replay(wc, ret, 1993 + "failed to update dir inode %llu root %llu", 1994 + btrfs_ino(dir), btrfs_root_id(root)); 2266 1995 } 2267 1996 kfree(name.name); 2268 1997 iput(&dir->vfs_inode); ··· 2276 1997 } 2277 1998 2278 1999 /* Replay one dir item from a BTRFS_DIR_INDEX_KEY key. */ 2279 - static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, 2280 - struct btrfs_root *root, 2281 - struct btrfs_path *path, 2282 - struct extent_buffer *eb, int slot, 2283 - struct btrfs_key *key) 2000 + static noinline int replay_one_dir_item(struct walk_control *wc) 2284 2001 { 2285 2002 int ret; 2286 2003 struct btrfs_dir_item *di; 2287 2004 2288 2005 /* We only log dir index keys, which only contain a single dir item. */ 2289 - ASSERT(key->type == BTRFS_DIR_INDEX_KEY); 2006 + ASSERT(wc->log_key.type == BTRFS_DIR_INDEX_KEY); 2290 2007 2291 - di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2292 - ret = replay_one_name(trans, root, path, eb, di, key); 2008 + di = btrfs_item_ptr(wc->log_leaf, wc->log_slot, struct btrfs_dir_item); 2009 + ret = replay_one_name(wc, di); 2293 2010 if (ret < 0) 2294 2011 return ret; 2295 2012 ··· 2315 2040 * to ever delete the parent directory has it would result in stale 2316 2041 * dentries that can never be deleted. 2317 2042 */ 2318 - if (ret == 1 && btrfs_dir_ftype(eb, di) != BTRFS_FT_DIR) { 2319 - struct btrfs_path *fixup_path; 2043 + if (ret == 1 && btrfs_dir_ftype(wc->log_leaf, di) != BTRFS_FT_DIR) { 2320 2044 struct btrfs_key di_key; 2321 2045 2322 - fixup_path = btrfs_alloc_path(); 2323 - if (!fixup_path) 2324 - return -ENOMEM; 2325 - 2326 - btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2327 - ret = link_to_fixup_dir(trans, root, fixup_path, di_key.objectid); 2328 - btrfs_free_path(fixup_path); 2046 + btrfs_dir_item_key_to_cpu(wc->log_leaf, di, &di_key); 2047 + ret = link_to_fixup_dir(wc, di_key.objectid); 2329 2048 } 2330 2049 2331 2050 return ret; ··· 2412 2143 * item is not in the log, the item is removed and the inode it points 2413 2144 * to is unlinked 2414 2145 */ 2415 - static noinline int check_item_in_log(struct btrfs_trans_handle *trans, 2416 - struct btrfs_root *log, 2417 - struct btrfs_path *path, 2146 + static noinline int check_item_in_log(struct walk_control *wc, 2418 2147 struct btrfs_path *log_path, 2419 2148 struct btrfs_inode *dir, 2420 - struct btrfs_key *dir_key) 2149 + struct btrfs_key *dir_key, 2150 + bool force_remove) 2421 2151 { 2152 + struct btrfs_trans_handle *trans = wc->trans; 2422 2153 struct btrfs_root *root = dir->root; 2423 2154 int ret; 2424 2155 struct extent_buffer *eb; ··· 2436 2167 */ 2437 2168 ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY); 2438 2169 2439 - eb = path->nodes[0]; 2440 - slot = path->slots[0]; 2170 + eb = wc->subvol_path->nodes[0]; 2171 + slot = wc->subvol_path->slots[0]; 2441 2172 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2442 2173 ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); 2443 - if (ret) 2174 + if (ret) { 2175 + btrfs_abort_log_replay(wc, ret, 2176 + "failed to allocate name for dir %llu index %llu root %llu", 2177 + btrfs_ino(dir), dir_key->offset, 2178 + btrfs_root_id(root)); 2444 2179 goto out; 2180 + } 2445 2181 2446 - if (log) { 2182 + if (!force_remove) { 2447 2183 struct btrfs_dir_item *log_di; 2448 2184 2449 - log_di = btrfs_lookup_dir_index_item(trans, log, log_path, 2185 + log_di = btrfs_lookup_dir_index_item(trans, wc->log, log_path, 2450 2186 dir_key->objectid, 2451 2187 dir_key->offset, &name, 0); 2452 2188 if (IS_ERR(log_di)) { 2453 2189 ret = PTR_ERR(log_di); 2190 + btrfs_abort_log_replay(wc, ret, 2191 + "failed to lookup dir index item for dir %llu index %llu name %.*s root %llu", 2192 + btrfs_ino(dir), dir_key->offset, 2193 + name.len, name.name, 2194 + btrfs_root_id(root)); 2454 2195 goto out; 2455 2196 } else if (log_di) { 2456 2197 /* The dentry exists in the log, we have nothing to do. */ ··· 2470 2191 } 2471 2192 2472 2193 btrfs_dir_item_key_to_cpu(eb, di, &location); 2473 - btrfs_release_path(path); 2194 + btrfs_release_path(wc->subvol_path); 2474 2195 btrfs_release_path(log_path); 2475 2196 inode = btrfs_iget_logging(location.objectid, root); 2476 2197 if (IS_ERR(inode)) { 2477 2198 ret = PTR_ERR(inode); 2478 2199 inode = NULL; 2200 + btrfs_abort_log_replay(wc, ret, 2201 + "failed to lookup inode %llu root %llu", 2202 + location.objectid, btrfs_root_id(root)); 2479 2203 goto out; 2480 2204 } 2481 2205 2482 - ret = link_to_fixup_dir(trans, root, path, location.objectid); 2206 + ret = link_to_fixup_dir(wc, location.objectid); 2483 2207 if (ret) 2484 2208 goto out; 2485 2209 2486 2210 inc_nlink(&inode->vfs_inode); 2487 - ret = unlink_inode_for_log_replay(trans, dir, inode, &name); 2211 + ret = unlink_inode_for_log_replay(wc, dir, inode, &name); 2488 2212 /* 2489 2213 * Unlike dir item keys, dir index keys can only have one name (entry) in 2490 2214 * them, as there are no key collisions since each key has a unique offset 2491 2215 * (an index number), so we're done. 2492 2216 */ 2493 2217 out: 2494 - btrfs_release_path(path); 2218 + btrfs_release_path(wc->subvol_path); 2495 2219 btrfs_release_path(log_path); 2496 2220 kfree(name.name); 2497 2221 if (inode) ··· 2502 2220 return ret; 2503 2221 } 2504 2222 2505 - static int replay_xattr_deletes(struct btrfs_trans_handle *trans, 2506 - struct btrfs_root *root, 2507 - struct btrfs_root *log, 2508 - struct btrfs_path *path, 2509 - const u64 ino) 2223 + static int replay_xattr_deletes(struct walk_control *wc) 2510 2224 { 2225 + struct btrfs_trans_handle *trans = wc->trans; 2226 + struct btrfs_root *root = wc->root; 2227 + struct btrfs_root *log = wc->log; 2511 2228 struct btrfs_key search_key; 2512 - struct btrfs_path *log_path; 2513 - int i; 2229 + BTRFS_PATH_AUTO_FREE(log_path); 2230 + const u64 ino = wc->log_key.objectid; 2514 2231 int nritems; 2515 2232 int ret; 2516 2233 2517 2234 log_path = btrfs_alloc_path(); 2518 - if (!log_path) 2235 + if (!log_path) { 2236 + btrfs_abort_log_replay(wc, -ENOMEM, "failed to allocate path"); 2519 2237 return -ENOMEM; 2238 + } 2520 2239 2521 2240 search_key.objectid = ino; 2522 2241 search_key.type = BTRFS_XATTR_ITEM_KEY; 2523 2242 search_key.offset = 0; 2524 2243 again: 2525 - ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 2526 - if (ret < 0) 2244 + ret = btrfs_search_slot(NULL, root, &search_key, wc->subvol_path, 0, 0); 2245 + if (ret < 0) { 2246 + btrfs_abort_log_replay(wc, ret, 2247 + "failed to search xattrs for inode %llu root %llu", 2248 + ino, btrfs_root_id(root)); 2527 2249 goto out; 2250 + } 2528 2251 process_leaf: 2529 - nritems = btrfs_header_nritems(path->nodes[0]); 2530 - for (i = path->slots[0]; i < nritems; i++) { 2252 + nritems = btrfs_header_nritems(wc->subvol_path->nodes[0]); 2253 + for (int i = wc->subvol_path->slots[0]; i < nritems; i++) { 2531 2254 struct btrfs_key key; 2532 2255 struct btrfs_dir_item *di; 2533 2256 struct btrfs_dir_item *log_di; 2534 2257 u32 total_size; 2535 2258 u32 cur; 2536 2259 2537 - btrfs_item_key_to_cpu(path->nodes[0], &key, i); 2260 + btrfs_item_key_to_cpu(wc->subvol_path->nodes[0], &key, i); 2538 2261 if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) { 2539 2262 ret = 0; 2540 2263 goto out; 2541 2264 } 2542 2265 2543 - di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item); 2544 - total_size = btrfs_item_size(path->nodes[0], i); 2266 + di = btrfs_item_ptr(wc->subvol_path->nodes[0], i, struct btrfs_dir_item); 2267 + total_size = btrfs_item_size(wc->subvol_path->nodes[0], i); 2545 2268 cur = 0; 2546 2269 while (cur < total_size) { 2547 - u16 name_len = btrfs_dir_name_len(path->nodes[0], di); 2548 - u16 data_len = btrfs_dir_data_len(path->nodes[0], di); 2270 + u16 name_len = btrfs_dir_name_len(wc->subvol_path->nodes[0], di); 2271 + u16 data_len = btrfs_dir_data_len(wc->subvol_path->nodes[0], di); 2549 2272 u32 this_len = sizeof(*di) + name_len + data_len; 2550 2273 char *name; 2551 2274 2552 2275 name = kmalloc(name_len, GFP_NOFS); 2553 2276 if (!name) { 2554 2277 ret = -ENOMEM; 2278 + btrfs_abort_log_replay(wc, ret, 2279 + "failed to allocate memory for name of length %u", 2280 + name_len); 2555 2281 goto out; 2556 2282 } 2557 - read_extent_buffer(path->nodes[0], name, 2283 + read_extent_buffer(wc->subvol_path->nodes[0], name, 2558 2284 (unsigned long)(di + 1), name_len); 2559 2285 2560 2286 log_di = btrfs_lookup_xattr(NULL, log, log_path, ino, ··· 2570 2280 btrfs_release_path(log_path); 2571 2281 if (!log_di) { 2572 2282 /* Doesn't exist in log tree, so delete it. */ 2573 - btrfs_release_path(path); 2574 - di = btrfs_lookup_xattr(trans, root, path, ino, 2283 + btrfs_release_path(wc->subvol_path); 2284 + di = btrfs_lookup_xattr(trans, root, wc->subvol_path, ino, 2575 2285 name, name_len, -1); 2576 - kfree(name); 2577 2286 if (IS_ERR(di)) { 2578 2287 ret = PTR_ERR(di); 2288 + btrfs_abort_log_replay(wc, ret, 2289 + "failed to lookup xattr with name %.*s for inode %llu root %llu", 2290 + name_len, name, ino, 2291 + btrfs_root_id(root)); 2292 + kfree(name); 2579 2293 goto out; 2580 2294 } 2581 2295 ASSERT(di); 2582 2296 ret = btrfs_delete_one_dir_name(trans, root, 2583 - path, di); 2584 - if (ret) 2297 + wc->subvol_path, di); 2298 + if (ret) { 2299 + btrfs_abort_log_replay(wc, ret, 2300 + "failed to delete xattr with name %.*s for inode %llu root %llu", 2301 + name_len, name, ino, 2302 + btrfs_root_id(root)); 2303 + kfree(name); 2585 2304 goto out; 2586 - btrfs_release_path(path); 2305 + } 2306 + btrfs_release_path(wc->subvol_path); 2307 + kfree(name); 2587 2308 search_key = key; 2588 2309 goto again; 2589 2310 } 2590 - kfree(name); 2591 2311 if (IS_ERR(log_di)) { 2592 2312 ret = PTR_ERR(log_di); 2313 + btrfs_abort_log_replay(wc, ret, 2314 + "failed to lookup xattr in log tree with name %.*s for inode %llu root %llu", 2315 + name_len, name, ino, 2316 + btrfs_root_id(root)); 2317 + kfree(name); 2593 2318 goto out; 2594 2319 } 2320 + kfree(name); 2595 2321 cur += this_len; 2596 2322 di = (struct btrfs_dir_item *)((char *)di + this_len); 2597 2323 } 2598 2324 } 2599 - ret = btrfs_next_leaf(root, path); 2325 + ret = btrfs_next_leaf(root, wc->subvol_path); 2600 2326 if (ret > 0) 2601 2327 ret = 0; 2602 2328 else if (ret == 0) 2603 2329 goto process_leaf; 2330 + else 2331 + btrfs_abort_log_replay(wc, ret, 2332 + "failed to get next leaf in subvolume root %llu", 2333 + btrfs_root_id(root)); 2604 2334 out: 2605 - btrfs_free_path(log_path); 2606 - btrfs_release_path(path); 2335 + btrfs_release_path(wc->subvol_path); 2607 2336 return ret; 2608 2337 } 2609 2338 ··· 2637 2328 * Anything we don't find in the log is unlinked and removed from the 2638 2329 * directory. 2639 2330 */ 2640 - static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, 2641 - struct btrfs_root *root, 2642 - struct btrfs_root *log, 2643 - struct btrfs_path *path, 2331 + static noinline int replay_dir_deletes(struct walk_control *wc, 2644 2332 u64 dirid, bool del_all) 2645 2333 { 2334 + struct btrfs_root *root = wc->root; 2335 + struct btrfs_root *log = (del_all ? NULL : wc->log); 2646 2336 u64 range_start; 2647 2337 u64 range_end; 2648 2338 int ret = 0; ··· 2653 2345 dir_key.objectid = dirid; 2654 2346 dir_key.type = BTRFS_DIR_INDEX_KEY; 2655 2347 log_path = btrfs_alloc_path(); 2656 - if (!log_path) 2348 + if (!log_path) { 2349 + btrfs_abort_log_replay(wc, -ENOMEM, "failed to allocate path"); 2657 2350 return -ENOMEM; 2351 + } 2658 2352 2659 2353 dir = btrfs_iget_logging(dirid, root); 2660 2354 /* ··· 2668 2358 ret = PTR_ERR(dir); 2669 2359 if (ret == -ENOENT) 2670 2360 ret = 0; 2361 + else 2362 + btrfs_abort_log_replay(wc, ret, 2363 + "failed to lookup dir inode %llu root %llu", 2364 + dirid, btrfs_root_id(root)); 2671 2365 return ret; 2672 2366 } 2673 2367 ··· 2681 2367 if (del_all) 2682 2368 range_end = (u64)-1; 2683 2369 else { 2684 - ret = find_dir_range(log, path, dirid, 2370 + ret = find_dir_range(log, wc->subvol_path, dirid, 2685 2371 &range_start, &range_end); 2686 - if (ret < 0) 2372 + if (ret < 0) { 2373 + btrfs_abort_log_replay(wc, ret, 2374 + "failed to find range for dir %llu in log tree root %llu", 2375 + dirid, btrfs_root_id(root)); 2687 2376 goto out; 2688 - else if (ret > 0) 2377 + } else if (ret > 0) { 2689 2378 break; 2379 + } 2690 2380 } 2691 2381 2692 2382 dir_key.offset = range_start; 2693 2383 while (1) { 2694 2384 int nritems; 2695 - ret = btrfs_search_slot(NULL, root, &dir_key, path, 2696 - 0, 0); 2697 - if (ret < 0) 2385 + ret = btrfs_search_slot(NULL, root, &dir_key, 2386 + wc->subvol_path, 0, 0); 2387 + if (ret < 0) { 2388 + btrfs_abort_log_replay(wc, ret, 2389 + "failed to search root %llu for key (%llu %u %llu)", 2390 + btrfs_root_id(root), 2391 + dir_key.objectid, dir_key.type, 2392 + dir_key.offset); 2698 2393 goto out; 2699 - 2700 - nritems = btrfs_header_nritems(path->nodes[0]); 2701 - if (path->slots[0] >= nritems) { 2702 - ret = btrfs_next_leaf(root, path); 2703 - if (ret == 1) 2704 - break; 2705 - else if (ret < 0) 2706 - goto out; 2707 2394 } 2708 - btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2709 - path->slots[0]); 2395 + 2396 + nritems = btrfs_header_nritems(wc->subvol_path->nodes[0]); 2397 + if (wc->subvol_path->slots[0] >= nritems) { 2398 + ret = btrfs_next_leaf(root, wc->subvol_path); 2399 + if (ret == 1) { 2400 + break; 2401 + } else if (ret < 0) { 2402 + btrfs_abort_log_replay(wc, ret, 2403 + "failed to get next leaf in subvolume root %llu", 2404 + btrfs_root_id(root)); 2405 + goto out; 2406 + } 2407 + } 2408 + btrfs_item_key_to_cpu(wc->subvol_path->nodes[0], &found_key, 2409 + wc->subvol_path->slots[0]); 2710 2410 if (found_key.objectid != dirid || 2711 2411 found_key.type != dir_key.type) { 2712 2412 ret = 0; ··· 2730 2402 if (found_key.offset > range_end) 2731 2403 break; 2732 2404 2733 - ret = check_item_in_log(trans, log, path, 2734 - log_path, dir, 2735 - &found_key); 2405 + ret = check_item_in_log(wc, log_path, dir, &found_key, del_all); 2736 2406 if (ret) 2737 2407 goto out; 2738 2408 if (found_key.offset == (u64)-1) 2739 2409 break; 2740 2410 dir_key.offset = found_key.offset + 1; 2741 2411 } 2742 - btrfs_release_path(path); 2412 + btrfs_release_path(wc->subvol_path); 2743 2413 if (range_end == (u64)-1) 2744 2414 break; 2745 2415 range_start = range_end + 1; 2746 2416 } 2747 2417 ret = 0; 2748 2418 out: 2749 - btrfs_release_path(path); 2419 + btrfs_release_path(wc->subvol_path); 2750 2420 btrfs_free_path(log_path); 2751 2421 iput(&dir->vfs_inode); 2752 2422 return ret; ··· 2761 2435 * only in the log (references come from either directory items or inode 2762 2436 * back refs). 2763 2437 */ 2764 - static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, 2438 + static int replay_one_buffer(struct extent_buffer *eb, 2765 2439 struct walk_control *wc, u64 gen, int level) 2766 2440 { 2767 2441 int nritems; ··· 2769 2443 .transid = gen, 2770 2444 .level = level 2771 2445 }; 2772 - struct btrfs_path *path; 2773 - struct btrfs_root *root = wc->replay_dest; 2774 - struct btrfs_key key; 2775 - int i; 2446 + struct btrfs_root *root = wc->root; 2447 + struct btrfs_trans_handle *trans = wc->trans; 2776 2448 int ret; 2777 - 2778 - ret = btrfs_read_extent_buffer(eb, &check); 2779 - if (ret) 2780 - return ret; 2781 - 2782 - level = btrfs_header_level(eb); 2783 2449 2784 2450 if (level != 0) 2785 2451 return 0; 2786 2452 2787 - path = btrfs_alloc_path(); 2788 - if (!path) 2453 + /* 2454 + * Set to NULL since it was not yet read and in case we abort log replay 2455 + * on error, we have no valid log tree leaf to dump. 2456 + */ 2457 + wc->log_leaf = NULL; 2458 + ret = btrfs_read_extent_buffer(eb, &check); 2459 + if (ret) { 2460 + btrfs_abort_log_replay(wc, ret, 2461 + "failed to read log tree leaf %llu for root %llu", 2462 + eb->start, btrfs_root_id(root)); 2463 + return ret; 2464 + } 2465 + 2466 + ASSERT(wc->subvol_path == NULL); 2467 + wc->subvol_path = btrfs_alloc_path(); 2468 + if (!wc->subvol_path) { 2469 + btrfs_abort_log_replay(wc, -ENOMEM, "failed to allocate path"); 2789 2470 return -ENOMEM; 2471 + } 2472 + 2473 + wc->log_leaf = eb; 2790 2474 2791 2475 nritems = btrfs_header_nritems(eb); 2792 - for (i = 0; i < nritems; i++) { 2476 + for (wc->log_slot = 0; wc->log_slot < nritems; wc->log_slot++) { 2793 2477 struct btrfs_inode_item *inode_item; 2794 2478 2795 - btrfs_item_key_to_cpu(eb, &key, i); 2479 + btrfs_item_key_to_cpu(eb, &wc->log_key, wc->log_slot); 2796 2480 2797 - if (key.type == BTRFS_INODE_ITEM_KEY) { 2798 - inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item); 2481 + if (wc->log_key.type == BTRFS_INODE_ITEM_KEY) { 2482 + inode_item = btrfs_item_ptr(eb, wc->log_slot, 2483 + struct btrfs_inode_item); 2799 2484 /* 2800 2485 * An inode with no links is either: 2801 2486 * ··· 2835 2498 } 2836 2499 2837 2500 /* Inode keys are done during the first stage. */ 2838 - if (key.type == BTRFS_INODE_ITEM_KEY && 2501 + if (wc->log_key.type == BTRFS_INODE_ITEM_KEY && 2839 2502 wc->stage == LOG_WALK_REPLAY_INODES) { 2840 2503 u32 mode; 2841 2504 2842 - ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid); 2505 + ret = replay_xattr_deletes(wc); 2843 2506 if (ret) 2844 2507 break; 2845 2508 mode = btrfs_inode_mode(eb, inode_item); 2846 2509 if (S_ISDIR(mode)) { 2847 - ret = replay_dir_deletes(wc->trans, root, log, path, 2848 - key.objectid, false); 2510 + ret = replay_dir_deletes(wc, wc->log_key.objectid, false); 2849 2511 if (ret) 2850 2512 break; 2851 2513 } 2852 - ret = overwrite_item(wc->trans, root, path, 2853 - eb, i, &key); 2514 + ret = overwrite_item(wc); 2854 2515 if (ret) 2855 2516 break; 2856 2517 ··· 2865 2530 struct btrfs_inode *inode; 2866 2531 u64 from; 2867 2532 2868 - inode = btrfs_iget_logging(key.objectid, root); 2533 + inode = btrfs_iget_logging(wc->log_key.objectid, root); 2869 2534 if (IS_ERR(inode)) { 2870 2535 ret = PTR_ERR(inode); 2536 + btrfs_abort_log_replay(wc, ret, 2537 + "failed to lookup inode %llu root %llu", 2538 + wc->log_key.objectid, 2539 + btrfs_root_id(root)); 2871 2540 break; 2872 2541 } 2873 2542 from = ALIGN(i_size_read(&inode->vfs_inode), ··· 2879 2540 drop_args.start = from; 2880 2541 drop_args.end = (u64)-1; 2881 2542 drop_args.drop_cache = true; 2882 - ret = btrfs_drop_extents(wc->trans, root, inode, 2883 - &drop_args); 2884 - if (!ret) { 2543 + drop_args.path = wc->subvol_path; 2544 + ret = btrfs_drop_extents(trans, root, inode, &drop_args); 2545 + if (ret) { 2546 + btrfs_abort_log_replay(wc, ret, 2547 + "failed to drop extents for inode %llu root %llu offset %llu", 2548 + btrfs_ino(inode), 2549 + btrfs_root_id(root), 2550 + from); 2551 + } else { 2885 2552 inode_sub_bytes(&inode->vfs_inode, 2886 2553 drop_args.bytes_found); 2887 2554 /* Update the inode's nbytes. */ 2888 - ret = btrfs_update_inode(wc->trans, inode); 2555 + ret = btrfs_update_inode(trans, inode); 2556 + if (ret) 2557 + btrfs_abort_log_replay(wc, ret, 2558 + "failed to update inode %llu root %llu", 2559 + btrfs_ino(inode), 2560 + btrfs_root_id(root)); 2889 2561 } 2890 2562 iput(&inode->vfs_inode); 2891 2563 if (ret) 2892 2564 break; 2893 2565 } 2894 2566 2895 - ret = link_to_fixup_dir(wc->trans, root, 2896 - path, key.objectid); 2567 + ret = link_to_fixup_dir(wc, wc->log_key.objectid); 2897 2568 if (ret) 2898 2569 break; 2899 2570 } ··· 2911 2562 if (wc->ignore_cur_inode) 2912 2563 continue; 2913 2564 2914 - if (key.type == BTRFS_DIR_INDEX_KEY && 2565 + if (wc->log_key.type == BTRFS_DIR_INDEX_KEY && 2915 2566 wc->stage == LOG_WALK_REPLAY_DIR_INDEX) { 2916 - ret = replay_one_dir_item(wc->trans, root, path, 2917 - eb, i, &key); 2567 + ret = replay_one_dir_item(wc); 2918 2568 if (ret) 2919 2569 break; 2920 2570 } ··· 2922 2574 continue; 2923 2575 2924 2576 /* these keys are simply copied */ 2925 - if (key.type == BTRFS_XATTR_ITEM_KEY) { 2926 - ret = overwrite_item(wc->trans, root, path, 2927 - eb, i, &key); 2577 + if (wc->log_key.type == BTRFS_XATTR_ITEM_KEY) { 2578 + ret = overwrite_item(wc); 2928 2579 if (ret) 2929 2580 break; 2930 - } else if (key.type == BTRFS_INODE_REF_KEY || 2931 - key.type == BTRFS_INODE_EXTREF_KEY) { 2932 - ret = add_inode_ref(wc->trans, root, log, path, 2933 - eb, i, &key); 2581 + } else if (wc->log_key.type == BTRFS_INODE_REF_KEY || 2582 + wc->log_key.type == BTRFS_INODE_EXTREF_KEY) { 2583 + ret = add_inode_ref(wc); 2934 2584 if (ret) 2935 2585 break; 2936 - } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 2937 - ret = replay_one_extent(wc->trans, root, path, 2938 - eb, i, &key); 2586 + } else if (wc->log_key.type == BTRFS_EXTENT_DATA_KEY) { 2587 + ret = replay_one_extent(wc); 2939 2588 if (ret) 2940 2589 break; 2941 2590 } ··· 2943 2598 * older kernel with such keys, ignore them. 2944 2599 */ 2945 2600 } 2946 - btrfs_free_path(path); 2601 + btrfs_free_path(wc->subvol_path); 2602 + wc->subvol_path = NULL; 2947 2603 return ret; 2948 - } 2949 - 2950 - /* 2951 - * Correctly adjust the reserved bytes occupied by a log tree extent buffer 2952 - */ 2953 - static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) 2954 - { 2955 - struct btrfs_block_group *cache; 2956 - 2957 - cache = btrfs_lookup_block_group(fs_info, start); 2958 - if (!cache) { 2959 - btrfs_err(fs_info, "unable to find block group for %llu", start); 2960 - return -ENOENT; 2961 - } 2962 - 2963 - spin_lock(&cache->space_info->lock); 2964 - spin_lock(&cache->lock); 2965 - cache->reserved -= fs_info->nodesize; 2966 - cache->space_info->bytes_reserved -= fs_info->nodesize; 2967 - spin_unlock(&cache->lock); 2968 - spin_unlock(&cache->space_info->lock); 2969 - 2970 - btrfs_put_block_group(cache); 2971 - 2972 - return 0; 2973 2604 } 2974 2605 2975 2606 static int clean_log_buffer(struct btrfs_trans_handle *trans, 2976 2607 struct extent_buffer *eb) 2977 2608 { 2609 + struct btrfs_fs_info *fs_info = eb->fs_info; 2610 + struct btrfs_block_group *bg; 2611 + 2978 2612 btrfs_tree_lock(eb); 2979 2613 btrfs_clear_buffer_dirty(trans, eb); 2980 2614 wait_on_extent_buffer_writeback(eb); 2981 2615 btrfs_tree_unlock(eb); 2982 2616 2983 - if (trans) 2984 - return btrfs_pin_reserved_extent(trans, eb); 2617 + if (trans) { 2618 + int ret; 2985 2619 2986 - return unaccount_log_buffer(eb->fs_info, eb->start); 2620 + ret = btrfs_pin_reserved_extent(trans, eb); 2621 + if (ret) 2622 + btrfs_abort_transaction(trans, ret); 2623 + return ret; 2624 + } 2625 + 2626 + bg = btrfs_lookup_block_group(fs_info, eb->start); 2627 + if (!bg) { 2628 + btrfs_err(fs_info, "unable to find block group for %llu", eb->start); 2629 + btrfs_handle_fs_error(fs_info, -ENOENT, NULL); 2630 + return -ENOENT; 2631 + } 2632 + 2633 + spin_lock(&bg->space_info->lock); 2634 + spin_lock(&bg->lock); 2635 + bg->reserved -= fs_info->nodesize; 2636 + bg->space_info->bytes_reserved -= fs_info->nodesize; 2637 + spin_unlock(&bg->lock); 2638 + spin_unlock(&bg->space_info->lock); 2639 + 2640 + btrfs_put_block_group(bg); 2641 + 2642 + return 0; 2987 2643 } 2988 2644 2989 - static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, 2990 - struct btrfs_root *root, 2991 - struct btrfs_path *path, int *level, 2992 - struct walk_control *wc) 2645 + static noinline int walk_down_log_tree(struct btrfs_path *path, int *level, 2646 + struct walk_control *wc) 2993 2647 { 2994 - struct btrfs_fs_info *fs_info = root->fs_info; 2648 + struct btrfs_trans_handle *trans = wc->trans; 2649 + struct btrfs_fs_info *fs_info = wc->log->fs_info; 2995 2650 u64 bytenr; 2996 2651 u64 ptr_gen; 2997 2652 struct extent_buffer *next; ··· 3019 2674 next = btrfs_find_create_tree_block(fs_info, bytenr, 3020 2675 btrfs_header_owner(cur), 3021 2676 *level - 1); 3022 - if (IS_ERR(next)) 3023 - return PTR_ERR(next); 2677 + if (IS_ERR(next)) { 2678 + ret = PTR_ERR(next); 2679 + if (trans) 2680 + btrfs_abort_transaction(trans, ret); 2681 + else 2682 + btrfs_handle_fs_error(fs_info, ret, NULL); 2683 + return ret; 2684 + } 3024 2685 3025 2686 if (*level == 1) { 3026 - ret = wc->process_func(root, next, wc, ptr_gen, 3027 - *level - 1); 2687 + ret = wc->process_func(next, wc, ptr_gen, *level - 1); 3028 2688 if (ret) { 3029 2689 free_extent_buffer(next); 3030 2690 return ret; ··· 3040 2690 ret = btrfs_read_extent_buffer(next, &check); 3041 2691 if (ret) { 3042 2692 free_extent_buffer(next); 2693 + if (trans) 2694 + btrfs_abort_transaction(trans, ret); 2695 + else 2696 + btrfs_handle_fs_error(fs_info, ret, NULL); 3043 2697 return ret; 3044 2698 } 3045 2699 ··· 3059 2705 ret = btrfs_read_extent_buffer(next, &check); 3060 2706 if (ret) { 3061 2707 free_extent_buffer(next); 2708 + if (trans) 2709 + btrfs_abort_transaction(trans, ret); 2710 + else 2711 + btrfs_handle_fs_error(fs_info, ret, NULL); 3062 2712 return ret; 3063 2713 } 3064 2714 ··· 3079 2721 return 0; 3080 2722 } 3081 2723 3082 - static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, 3083 - struct btrfs_root *root, 3084 - struct btrfs_path *path, int *level, 3085 - struct walk_control *wc) 2724 + static noinline int walk_up_log_tree(struct btrfs_path *path, int *level, 2725 + struct walk_control *wc) 3086 2726 { 3087 2727 int i; 3088 2728 int slot; ··· 3094 2738 WARN_ON(*level == 0); 3095 2739 return 0; 3096 2740 } else { 3097 - ret = wc->process_func(root, path->nodes[*level], wc, 2741 + ret = wc->process_func(path->nodes[*level], wc, 3098 2742 btrfs_header_generation(path->nodes[*level]), 3099 2743 *level); 3100 2744 if (ret) 3101 2745 return ret; 3102 2746 3103 2747 if (wc->free) { 3104 - ret = clean_log_buffer(trans, path->nodes[*level]); 2748 + ret = clean_log_buffer(wc->trans, path->nodes[*level]); 3105 2749 if (ret) 3106 2750 return ret; 3107 2751 } ··· 3118 2762 * the tree freeing any blocks that have a ref count of zero after being 3119 2763 * decremented. 3120 2764 */ 3121 - static int walk_log_tree(struct btrfs_trans_handle *trans, 3122 - struct btrfs_root *log, struct walk_control *wc) 2765 + static int walk_log_tree(struct walk_control *wc) 3123 2766 { 2767 + struct btrfs_root *log = wc->log; 3124 2768 int ret = 0; 3125 2769 int wret; 3126 2770 int level; 3127 - struct btrfs_path *path; 2771 + BTRFS_PATH_AUTO_FREE(path); 3128 2772 int orig_level; 3129 2773 3130 2774 path = btrfs_alloc_path(); ··· 3138 2782 path->slots[level] = 0; 3139 2783 3140 2784 while (1) { 3141 - wret = walk_down_log_tree(trans, log, path, &level, wc); 2785 + wret = walk_down_log_tree(path, &level, wc); 3142 2786 if (wret > 0) 3143 2787 break; 3144 - if (wret < 0) { 3145 - ret = wret; 3146 - goto out; 3147 - } 2788 + if (wret < 0) 2789 + return wret; 3148 2790 3149 - wret = walk_up_log_tree(trans, log, path, &level, wc); 2791 + wret = walk_up_log_tree(path, &level, wc); 3150 2792 if (wret > 0) 3151 2793 break; 3152 - if (wret < 0) { 3153 - ret = wret; 3154 - goto out; 3155 - } 2794 + if (wret < 0) 2795 + return wret; 3156 2796 } 3157 2797 3158 2798 /* was the root node processed? if not, catch it here */ 3159 2799 if (path->nodes[orig_level]) { 3160 - ret = wc->process_func(log, path->nodes[orig_level], wc, 2800 + ret = wc->process_func(path->nodes[orig_level], wc, 3161 2801 btrfs_header_generation(path->nodes[orig_level]), 3162 2802 orig_level); 3163 2803 if (ret) 3164 - goto out; 2804 + return ret; 3165 2805 if (wc->free) 3166 - ret = clean_log_buffer(trans, path->nodes[orig_level]); 2806 + ret = clean_log_buffer(wc->trans, path->nodes[orig_level]); 3167 2807 } 3168 2808 3169 - out: 3170 - btrfs_free_path(path); 3171 2809 return ret; 3172 2810 } 3173 2811 ··· 3570 3220 btrfs_set_super_log_root_level(fs_info->super_for_commit, log_root_level); 3571 3221 ret = write_all_supers(fs_info, 1); 3572 3222 mutex_unlock(&fs_info->tree_log_mutex); 3573 - if (ret) { 3223 + if (unlikely(ret)) { 3574 3224 btrfs_set_log_full_commit(trans); 3575 3225 btrfs_abort_transaction(trans, ret); 3576 3226 goto out_wake_log_root; ··· 3622 3272 { 3623 3273 int ret; 3624 3274 struct walk_control wc = { 3625 - .free = 1, 3626 - .process_func = process_one_buffer 3275 + .free = true, 3276 + .process_func = process_one_buffer, 3277 + .log = log, 3278 + .trans = trans, 3627 3279 }; 3628 3280 3629 3281 if (log->node) { 3630 - ret = walk_log_tree(trans, log, &wc); 3282 + ret = walk_log_tree(&wc); 3631 3283 if (ret) { 3632 3284 /* 3633 3285 * We weren't able to traverse the entire log tree, the ··· 3828 3476 3829 3477 /* 3830 3478 * The inode was previously logged and then evicted, set logged_trans to 3831 - * the current transacion's ID, to avoid future tree searches as long as 3479 + * the current transaction's ID, to avoid future tree searches as long as 3832 3480 * the inode is not evicted again. 3833 3481 */ 3834 3482 spin_lock(&inode->lock); ··· 3899 3547 const struct fscrypt_str *name, 3900 3548 struct btrfs_inode *dir, u64 index) 3901 3549 { 3902 - struct btrfs_path *path; 3550 + BTRFS_PATH_AUTO_FREE(path); 3903 3551 int ret; 3904 3552 3905 3553 ret = inode_logged(trans, dir, NULL); 3906 3554 if (ret == 0) 3907 3555 return; 3908 - else if (ret < 0) { 3556 + if (ret < 0) { 3909 3557 btrfs_set_log_full_commit(trans); 3910 3558 return; 3911 3559 } ··· 3919 3567 ret = join_running_log_trans(root); 3920 3568 ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret); 3921 3569 if (WARN_ON(ret)) 3922 - goto out; 3570 + return; 3923 3571 3924 3572 mutex_lock(&dir->log_mutex); 3925 3573 ··· 3929 3577 if (ret < 0) 3930 3578 btrfs_set_log_full_commit(trans); 3931 3579 btrfs_end_log_trans(root); 3932 - out: 3933 - btrfs_free_path(path); 3934 3580 } 3935 3581 3936 3582 /* see comments for btrfs_del_dir_entries_in_log */ ··· 4041 3691 struct btrfs_key *ins_keys; 4042 3692 u32 *ins_sizes; 4043 3693 4044 - ins_data = kmalloc(count * sizeof(u32) + 4045 - count * sizeof(struct btrfs_key), GFP_NOFS); 3694 + ins_data = kmalloc_array(count, sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS); 4046 3695 if (!ins_data) 4047 3696 return -ENOMEM; 4048 3697 ··· 4604 4255 static void fill_inode_item(struct btrfs_trans_handle *trans, 4605 4256 struct extent_buffer *leaf, 4606 4257 struct btrfs_inode_item *item, 4607 - struct inode *inode, int log_inode_only, 4258 + struct inode *inode, bool log_inode_only, 4608 4259 u64 logged_isize) 4609 4260 { 4610 4261 u64 flags; ··· 4700 4351 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 4701 4352 struct btrfs_inode_item); 4702 4353 fill_inode_item(trans, path->nodes[0], inode_item, &inode->vfs_inode, 4703 - 0, 0); 4354 + false, 0); 4704 4355 btrfs_release_path(path); 4705 4356 return 0; 4706 4357 } ··· 4804 4455 4805 4456 src = src_path->nodes[0]; 4806 4457 4807 - ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 4808 - nr * sizeof(u32), GFP_NOFS); 4458 + ins_data = kmalloc_array(nr, sizeof(struct btrfs_key) + sizeof(u32), GFP_NOFS); 4809 4459 if (!ins_data) 4810 4460 return -ENOMEM; 4811 4461 ··· 5205 4857 struct btrfs_key key; 5206 4858 const u64 i_size = i_size_read(&inode->vfs_inode); 5207 4859 const u64 ino = btrfs_ino(inode); 5208 - struct btrfs_path *dst_path = NULL; 4860 + BTRFS_PATH_AUTO_FREE(dst_path); 5209 4861 bool dropped_extents = false; 5210 4862 u64 truncate_offset = i_size; 5211 4863 struct extent_buffer *leaf; ··· 5323 4975 start_slot, ins_nr, 1, 0, ctx); 5324 4976 out: 5325 4977 btrfs_release_path(path); 5326 - btrfs_free_path(dst_path); 5327 4978 return ret; 5328 4979 } 5329 4980 ··· 5695 5348 u64 *other_ino, u64 *other_parent) 5696 5349 { 5697 5350 int ret; 5698 - struct btrfs_path *search_path; 5351 + BTRFS_PATH_AUTO_FREE(search_path); 5699 5352 char *name = NULL; 5700 5353 u32 name_len = 0; 5701 5354 u32 item_size = btrfs_item_size(eb, slot); ··· 5780 5433 } 5781 5434 ret = 0; 5782 5435 out: 5783 - btrfs_free_path(search_path); 5784 5436 kfree(name); 5785 5437 return ret; 5786 5438 } ··· 6507 6161 if (!first) 6508 6162 return 0; 6509 6163 6510 - ins_data = kmalloc(max_batch_size * sizeof(u32) + 6511 - max_batch_size * sizeof(struct btrfs_key), GFP_NOFS); 6164 + ins_data = kmalloc_array(max_batch_size, sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS); 6512 6165 if (!ins_data) 6513 6166 return -ENOMEM; 6514 6167 ins_sizes = (u32 *)ins_data; ··· 7161 6816 struct btrfs_log_ctx *ctx) 7162 6817 { 7163 6818 int ret; 7164 - struct btrfs_path *path; 6819 + BTRFS_PATH_AUTO_FREE(path); 7165 6820 struct btrfs_key key; 7166 6821 struct btrfs_root *root = inode->root; 7167 6822 const u64 ino = btrfs_ino(inode); ··· 7177 6832 key.offset = 0; 7178 6833 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 7179 6834 if (ret < 0) 7180 - goto out; 6835 + return ret; 7181 6836 7182 6837 while (true) { 7183 6838 struct extent_buffer *leaf = path->nodes[0]; ··· 7189 6844 if (slot >= btrfs_header_nritems(leaf)) { 7190 6845 ret = btrfs_next_leaf(root, path); 7191 6846 if (ret < 0) 7192 - goto out; 7193 - else if (ret > 0) 6847 + return ret; 6848 + if (ret > 0) 7194 6849 break; 7195 6850 continue; 7196 6851 } ··· 7248 6903 * at both parents and the old parent B would still 7249 6904 * exist. 7250 6905 */ 7251 - if (IS_ERR(dir_inode)) { 7252 - ret = PTR_ERR(dir_inode); 7253 - goto out; 7254 - } 6906 + if (IS_ERR(dir_inode)) 6907 + return PTR_ERR(dir_inode); 7255 6908 7256 6909 if (!need_log_inode(trans, dir_inode)) { 7257 6910 btrfs_add_delayed_iput(dir_inode); ··· 7262 6919 ret = log_new_dir_dentries(trans, dir_inode, ctx); 7263 6920 btrfs_add_delayed_iput(dir_inode); 7264 6921 if (ret) 7265 - goto out; 6922 + return ret; 7266 6923 } 7267 6924 path->slots[0]++; 7268 6925 } 7269 - ret = 0; 7270 - out: 7271 - btrfs_free_path(path); 7272 - return ret; 6926 + return 0; 7273 6927 } 7274 6928 7275 6929 static int log_new_ancestors(struct btrfs_trans_handle *trans, ··· 7377 7037 { 7378 7038 struct btrfs_root *root = inode->root; 7379 7039 const u64 ino = btrfs_ino(inode); 7380 - struct btrfs_path *path; 7040 + BTRFS_PATH_AUTO_FREE(path); 7381 7041 struct btrfs_key search_key; 7382 7042 int ret; 7383 7043 ··· 7398 7058 again: 7399 7059 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 7400 7060 if (ret < 0) 7401 - goto out; 7061 + return ret; 7402 7062 if (ret == 0) 7403 7063 path->slots[0]++; 7404 7064 ··· 7410 7070 if (slot >= btrfs_header_nritems(leaf)) { 7411 7071 ret = btrfs_next_leaf(root, path); 7412 7072 if (ret < 0) 7413 - goto out; 7414 - else if (ret > 0) 7073 + return ret; 7074 + if (ret > 0) 7415 7075 break; 7416 7076 continue; 7417 7077 } ··· 7428 7088 * this loop, etc). So just return some error to fallback to 7429 7089 * a transaction commit. 7430 7090 */ 7431 - if (found_key.type == BTRFS_INODE_EXTREF_KEY) { 7432 - ret = -EMLINK; 7433 - goto out; 7434 - } 7091 + if (found_key.type == BTRFS_INODE_EXTREF_KEY) 7092 + return -EMLINK; 7435 7093 7436 7094 /* 7437 7095 * Logging ancestors needs to do more searches on the fs/subvol ··· 7441 7103 7442 7104 ret = log_new_ancestors(trans, root, path, ctx); 7443 7105 if (ret) 7444 - goto out; 7106 + return ret; 7445 7107 btrfs_release_path(path); 7446 7108 goto again; 7447 7109 } 7448 - ret = 0; 7449 - out: 7450 - btrfs_free_path(path); 7451 - return ret; 7110 + return 0; 7452 7111 } 7453 7112 7454 7113 /* ··· 7625 7290 } 7626 7291 7627 7292 wc.trans = trans; 7628 - wc.pin = 1; 7293 + wc.pin = true; 7294 + wc.log = log_root_tree; 7629 7295 7630 - ret = walk_log_tree(trans, log_root_tree, &wc); 7631 - if (ret) { 7296 + ret = walk_log_tree(&wc); 7297 + wc.log = NULL; 7298 + if (unlikely(ret)) { 7632 7299 btrfs_abort_transaction(trans, ret); 7633 7300 goto error; 7634 7301 } ··· 7641 7304 key.offset = (u64)-1; 7642 7305 7643 7306 while (1) { 7644 - struct btrfs_root *log; 7645 7307 struct btrfs_key found_key; 7646 7308 7647 7309 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); 7648 7310 7649 - if (ret < 0) { 7311 + if (unlikely(ret < 0)) { 7650 7312 btrfs_abort_transaction(trans, ret); 7651 7313 goto error; 7652 7314 } ··· 7660 7324 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) 7661 7325 break; 7662 7326 7663 - log = btrfs_read_tree_root(log_root_tree, &found_key); 7664 - if (IS_ERR(log)) { 7665 - ret = PTR_ERR(log); 7327 + wc.log = btrfs_read_tree_root(log_root_tree, &found_key); 7328 + if (IS_ERR(wc.log)) { 7329 + ret = PTR_ERR(wc.log); 7330 + wc.log = NULL; 7666 7331 btrfs_abort_transaction(trans, ret); 7667 7332 goto error; 7668 7333 } 7669 7334 7670 - wc.replay_dest = btrfs_get_fs_root(fs_info, found_key.offset, 7671 - true); 7672 - if (IS_ERR(wc.replay_dest)) { 7673 - ret = PTR_ERR(wc.replay_dest); 7674 - wc.replay_dest = NULL; 7675 - if (ret != -ENOENT) { 7676 - btrfs_put_root(log); 7335 + wc.root = btrfs_get_fs_root(fs_info, found_key.offset, true); 7336 + if (IS_ERR(wc.root)) { 7337 + ret = PTR_ERR(wc.root); 7338 + wc.root = NULL; 7339 + if (unlikely(ret != -ENOENT)) { 7677 7340 btrfs_abort_transaction(trans, ret); 7678 7341 goto error; 7679 7342 } ··· 7688 7353 * block from being modified, and we'll just bail for 7689 7354 * each subsequent pass. 7690 7355 */ 7691 - ret = btrfs_pin_extent_for_log_replay(trans, log->node); 7692 - if (ret) { 7693 - btrfs_put_root(log); 7356 + ret = btrfs_pin_extent_for_log_replay(trans, wc.log->node); 7357 + if (unlikely(ret)) { 7694 7358 btrfs_abort_transaction(trans, ret); 7695 7359 goto error; 7696 7360 } 7697 7361 goto next; 7698 7362 } 7699 7363 7700 - wc.replay_dest->log_root = log; 7701 - ret = btrfs_record_root_in_trans(trans, wc.replay_dest); 7702 - if (ret) { 7364 + wc.root->log_root = wc.log; 7365 + ret = btrfs_record_root_in_trans(trans, wc.root); 7366 + if (unlikely(ret)) { 7703 7367 btrfs_abort_transaction(trans, ret); 7704 7368 goto next; 7705 7369 } 7706 7370 7707 - ret = walk_log_tree(trans, log, &wc); 7708 - if (ret) { 7371 + ret = walk_log_tree(&wc); 7372 + if (unlikely(ret)) { 7709 7373 btrfs_abort_transaction(trans, ret); 7710 7374 goto next; 7711 7375 } 7712 7376 7713 7377 if (wc.stage == LOG_WALK_REPLAY_ALL) { 7714 - struct btrfs_root *root = wc.replay_dest; 7378 + struct btrfs_root *root = wc.root; 7715 7379 7716 - ret = fixup_inode_link_counts(trans, wc.replay_dest, path); 7717 - if (ret) { 7380 + wc.subvol_path = path; 7381 + ret = fixup_inode_link_counts(&wc); 7382 + wc.subvol_path = NULL; 7383 + if (unlikely(ret)) { 7718 7384 btrfs_abort_transaction(trans, ret); 7719 7385 goto next; 7720 7386 } ··· 7728 7392 * could only happen during mount. 7729 7393 */ 7730 7394 ret = btrfs_init_root_free_objectid(root); 7731 - if (ret) { 7395 + if (unlikely(ret)) { 7732 7396 btrfs_abort_transaction(trans, ret); 7733 7397 goto next; 7734 7398 } 7735 7399 } 7736 7400 next: 7737 - if (wc.replay_dest) { 7738 - wc.replay_dest->log_root = NULL; 7739 - btrfs_put_root(wc.replay_dest); 7401 + if (wc.root) { 7402 + wc.root->log_root = NULL; 7403 + btrfs_put_root(wc.root); 7740 7404 } 7741 - btrfs_put_root(log); 7405 + btrfs_put_root(wc.log); 7406 + wc.log = NULL; 7742 7407 7743 7408 if (ret) 7744 7409 goto error; ··· 7751 7414 7752 7415 /* step one is to pin it all, step two is to replay just inodes */ 7753 7416 if (wc.pin) { 7754 - wc.pin = 0; 7417 + wc.pin = false; 7755 7418 wc.process_func = replay_one_buffer; 7756 7419 wc.stage = LOG_WALK_REPLAY_INODES; 7757 7420 goto again; ··· 7769 7432 if (ret) 7770 7433 return ret; 7771 7434 7772 - log_root_tree->log_root = NULL; 7773 7435 clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags); 7774 - btrfs_put_root(log_root_tree); 7775 7436 7776 7437 return 0; 7777 7438 error: 7778 7439 if (wc.trans) 7779 7440 btrfs_end_transaction(wc.trans); 7441 + btrfs_put_root(wc.log); 7780 7442 clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags); 7781 7443 btrfs_free_path(path); 7782 7444 return ret;

+4 -4

fs/btrfs/verity.c

··· 487 487 inode->ro_flags &= ~BTRFS_INODE_RO_VERITY; 488 488 btrfs_sync_inode_flags_to_i_flags(inode); 489 489 ret = btrfs_update_inode(trans, inode); 490 - if (ret) { 490 + if (unlikely(ret)) { 491 491 btrfs_abort_transaction(trans, ret); 492 492 goto out; 493 493 } 494 494 ret = del_orphan(trans, inode); 495 - if (ret) { 495 + if (unlikely(ret)) { 496 496 btrfs_abort_transaction(trans, ret); 497 497 goto out; 498 498 } ··· 676 676 if (ret < 0) 677 677 return ret; 678 678 679 - if (item.reserved[0] != 0 || item.reserved[1] != 0) 679 + if (unlikely(item.reserved[0] != 0 || item.reserved[1] != 0)) 680 680 return -EUCLEAN; 681 681 682 682 true_size = btrfs_stack_verity_descriptor_size(&item); 683 - if (true_size > INT_MAX) 683 + if (unlikely(true_size > INT_MAX)) 684 684 return -EUCLEAN; 685 685 686 686 if (buf_size == 0)

+34 -36

fs/btrfs/volumes.c

··· 1377 1377 } 1378 1378 1379 1379 /* 1380 - * Make sure the last byte of label is properly NUL termiated. We use 1381 - * '%s' to print the label, if not properly NUL termiated we can access 1380 + * Make sure the last byte of label is properly NUL terminated. We use 1381 + * '%s' to print the label, if not properly NUL terminated we can access 1382 1382 * beyond the label. 1383 1383 */ 1384 1384 if (super->label[0] && super->label[BTRFS_LABEL_SIZE - 1]) ··· 1911 1911 if (ret < 0) 1912 1912 goto error; 1913 1913 1914 - if (ret == 0) { 1914 + if (unlikely(ret == 0)) { 1915 1915 /* Corruption */ 1916 1916 btrfs_err(fs_info, "corrupted chunk tree devid -1 matched"); 1917 1917 ret = -EUCLEAN; ··· 2243 2243 } 2244 2244 2245 2245 ret = btrfs_rm_dev_item(trans, device); 2246 - if (ret) { 2246 + if (unlikely(ret)) { 2247 2247 /* Any error in dev item removal is critical */ 2248 2248 btrfs_crit(fs_info, 2249 2249 "failed to remove device item for devid %llu: %d", ··· 2843 2843 mutex_lock(&fs_info->chunk_mutex); 2844 2844 ret = init_first_rw_device(trans); 2845 2845 mutex_unlock(&fs_info->chunk_mutex); 2846 - if (ret) { 2846 + if (unlikely(ret)) { 2847 2847 btrfs_abort_transaction(trans, ret); 2848 2848 goto error_sysfs; 2849 2849 } 2850 2850 } 2851 2851 2852 2852 ret = btrfs_add_dev_item(trans, device); 2853 - if (ret) { 2853 + if (unlikely(ret)) { 2854 2854 btrfs_abort_transaction(trans, ret); 2855 2855 goto error_sysfs; 2856 2856 } 2857 2857 2858 2858 if (seeding_dev) { 2859 2859 ret = btrfs_finish_sprout(trans); 2860 - if (ret) { 2860 + if (unlikely(ret)) { 2861 2861 btrfs_abort_transaction(trans, ret); 2862 2862 goto error_sysfs; 2863 2863 } ··· 3049 3049 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 3050 3050 if (ret < 0) 3051 3051 goto out; 3052 - else if (ret > 0) { /* Logic error or corruption */ 3052 + else if (unlikely(ret > 0)) { /* Logic error or corruption */ 3053 3053 btrfs_err(fs_info, "failed to lookup chunk %llu when freeing", 3054 3054 chunk_offset); 3055 3055 btrfs_abort_transaction(trans, -ENOENT); ··· 3058 3058 } 3059 3059 3060 3060 ret = btrfs_del_item(trans, root, path); 3061 - if (ret < 0) { 3061 + if (unlikely(ret < 0)) { 3062 3062 btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset); 3063 3063 btrfs_abort_transaction(trans, ret); 3064 3064 goto out; ··· 3283 3283 ret = btrfs_free_dev_extent(trans, device, 3284 3284 map->stripes[i].physical, 3285 3285 &dev_extent_len); 3286 - if (ret) { 3286 + if (unlikely(ret)) { 3287 3287 mutex_unlock(&fs_devices->device_list_mutex); 3288 3288 btrfs_abort_transaction(trans, ret); 3289 3289 goto out; ··· 3353 3353 struct btrfs_space_info *space_info; 3354 3354 3355 3355 space_info = btrfs_find_space_info(fs_info, sys_flags); 3356 - if (!space_info) { 3356 + if (unlikely(!space_info)) { 3357 3357 ret = -EINVAL; 3358 3358 btrfs_abort_transaction(trans, ret); 3359 3359 goto out; ··· 3367 3367 } 3368 3368 3369 3369 ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg); 3370 - if (ret) { 3370 + if (unlikely(ret)) { 3371 3371 btrfs_abort_transaction(trans, ret); 3372 3372 goto out; 3373 3373 } 3374 3374 3375 3375 ret = remove_chunk_item(trans, map, chunk_offset); 3376 - if (ret) { 3376 + if (unlikely(ret)) { 3377 3377 btrfs_abort_transaction(trans, ret); 3378 3378 goto out; 3379 3379 } 3380 - } else if (ret) { 3380 + } else if (unlikely(ret)) { 3381 3381 btrfs_abort_transaction(trans, ret); 3382 3382 goto out; 3383 3383 } ··· 3386 3386 3387 3387 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { 3388 3388 ret = btrfs_del_sys_chunk(fs_info, chunk_offset); 3389 - if (ret) { 3389 + if (unlikely(ret)) { 3390 3390 btrfs_abort_transaction(trans, ret); 3391 3391 goto out; 3392 3392 } ··· 3402 3402 btrfs_trans_release_chunk_metadata(trans); 3403 3403 3404 3404 ret = btrfs_remove_block_group(trans, map); 3405 - if (ret) { 3405 + if (unlikely(ret)) { 3406 3406 btrfs_abort_transaction(trans, ret); 3407 3407 goto out; 3408 3408 } ··· 3527 3527 mutex_unlock(&fs_info->reclaim_bgs_lock); 3528 3528 goto error; 3529 3529 } 3530 - if (ret == 0) { 3530 + if (unlikely(ret == 0)) { 3531 3531 /* 3532 3532 * On the first search we would find chunk tree with 3533 3533 * offset -1, which is not possible. On subsequent ··· 4269 4269 * @flags: profile to validate 4270 4270 * @extended: if true @flags is treated as an extended profile 4271 4271 */ 4272 - static int alloc_profile_is_valid(u64 flags, int extended) 4272 + static int alloc_profile_is_valid(u64 flags, bool extended) 4273 4273 { 4274 4274 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK : 4275 4275 BTRFS_BLOCK_GROUP_PROFILE_MASK); ··· 4463 4463 } 4464 4464 4465 4465 /* 4466 - * Should be called with balance mutexe held 4466 + * Should be called with balance mutex held 4467 4467 */ 4468 4468 int btrfs_balance(struct btrfs_fs_info *fs_info, 4469 4469 struct btrfs_balance_control *bctl, ··· 5041 5041 /* Now btrfs_update_device() will change the on-disk size. */ 5042 5042 ret = btrfs_update_device(trans, device); 5043 5043 btrfs_trans_release_chunk_metadata(trans); 5044 - if (ret < 0) { 5044 + if (unlikely(ret < 0)) { 5045 5045 btrfs_abort_transaction(trans, ret); 5046 5046 btrfs_end_transaction(trans); 5047 5047 } else { ··· 5701 5701 item_size = btrfs_chunk_item_size(map->num_stripes); 5702 5702 5703 5703 chunk = kzalloc(item_size, GFP_NOFS); 5704 - if (!chunk) { 5704 + if (unlikely(!chunk)) { 5705 5705 ret = -ENOMEM; 5706 5706 btrfs_abort_transaction(trans, ret); 5707 5707 goto out; ··· 7486 7486 /* 7487 7487 * Lockdep complains about possible circular locking dependency between 7488 7488 * a disk's open_mutex (struct gendisk.open_mutex), the rw semaphores 7489 - * used for freeze procection of a fs (struct super_block.s_writers), 7489 + * used for freeze protection of a fs (struct super_block.s_writers), 7490 7490 * which we take when starting a transaction, and extent buffers of the 7491 7491 * chunk tree if we call read_one_dev() while holding a lock on an 7492 7492 * extent buffer of the chunk tree. Since we are mounting the filesystem ··· 7919 7919 return btrfs_raid_array[index].ncopies; 7920 7920 } 7921 7921 7922 - 7923 - 7924 7922 static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, 7925 7923 u64 chunk_offset, u64 devid, 7926 7924 u64 physical_offset, u64 physical_len) ··· 7932 7934 int i; 7933 7935 7934 7936 map = btrfs_find_chunk_map(fs_info, chunk_offset, 1); 7935 - if (!map) { 7937 + if (unlikely(!map)) { 7936 7938 btrfs_err(fs_info, 7937 7939 "dev extent physical offset %llu on devid %llu doesn't have corresponding chunk", 7938 7940 physical_offset, devid); ··· 7941 7943 } 7942 7944 7943 7945 stripe_len = btrfs_calc_stripe_length(map); 7944 - if (physical_len != stripe_len) { 7946 + if (unlikely(physical_len != stripe_len)) { 7945 7947 btrfs_err(fs_info, 7946 7948 "dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu", 7947 7949 physical_offset, devid, map->start, physical_len, ··· 7961 7963 devid, physical_offset, physical_len); 7962 7964 7963 7965 for (i = 0; i < map->num_stripes; i++) { 7964 - if (map->stripes[i].dev->devid == devid && 7965 - map->stripes[i].physical == physical_offset) { 7966 + if (unlikely(map->stripes[i].dev->devid == devid && 7967 + map->stripes[i].physical == physical_offset)) { 7966 7968 found = true; 7967 7969 if (map->verified_stripes >= map->num_stripes) { 7968 7970 btrfs_err(fs_info, ··· 7975 7977 break; 7976 7978 } 7977 7979 } 7978 - if (!found) { 7980 + if (unlikely(!found)) { 7979 7981 btrfs_err(fs_info, 7980 7982 "dev extent physical offset %llu devid %llu has no corresponding chunk", 7981 7983 physical_offset, devid); ··· 7984 7986 7985 7987 /* Make sure no dev extent is beyond device boundary */ 7986 7988 dev = btrfs_find_device(fs_info->fs_devices, &args); 7987 - if (!dev) { 7989 + if (unlikely(!dev)) { 7988 7990 btrfs_err(fs_info, "failed to find devid %llu", devid); 7989 7991 ret = -EUCLEAN; 7990 7992 goto out; 7991 7993 } 7992 7994 7993 - if (physical_offset + physical_len > dev->disk_total_bytes) { 7995 + if (unlikely(physical_offset + physical_len > dev->disk_total_bytes)) { 7994 7996 btrfs_err(fs_info, 7995 7997 "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", 7996 7998 devid, physical_offset, physical_len, ··· 8002 8004 if (dev->zone_info) { 8003 8005 u64 zone_size = dev->zone_info->zone_size; 8004 8006 8005 - if (!IS_ALIGNED(physical_offset, zone_size) || 8006 - !IS_ALIGNED(physical_len, zone_size)) { 8007 + if (unlikely(!IS_ALIGNED(physical_offset, zone_size) || 8008 + !IS_ALIGNED(physical_len, zone_size))) { 8007 8009 btrfs_err(fs_info, 8008 8010 "zoned: dev extent devid %llu physical offset %llu len %llu is not aligned to device zone", 8009 8011 devid, physical_offset, physical_len); ··· 8027 8029 struct btrfs_chunk_map *map; 8028 8030 8029 8031 map = rb_entry(node, struct btrfs_chunk_map, rb_node); 8030 - if (map->num_stripes != map->verified_stripes) { 8032 + if (unlikely(map->num_stripes != map->verified_stripes)) { 8031 8033 btrfs_err(fs_info, 8032 8034 "chunk %llu has missing dev extent, have %d expect %d", 8033 8035 map->start, map->verified_stripes, map->num_stripes); ··· 8087 8089 if (ret < 0) 8088 8090 goto out; 8089 8091 /* No dev extents at all? Not good */ 8090 - if (ret > 0) { 8092 + if (unlikely(ret > 0)) { 8091 8093 ret = -EUCLEAN; 8092 8094 goto out; 8093 8095 } ··· 8112 8114 physical_len = btrfs_dev_extent_length(leaf, dext); 8113 8115 8114 8116 /* Check if this dev extent overlaps with the previous one */ 8115 - if (devid == prev_devid && physical_offset < prev_dev_ext_end) { 8117 + if (unlikely(devid == prev_devid && physical_offset < prev_dev_ext_end)) { 8116 8118 btrfs_err(fs_info, 8117 8119 "dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu", 8118 8120 devid, physical_offset, prev_dev_ext_end);

+2 -2

fs/btrfs/volumes.h

··· 34 34 #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) 35 35 36 36 /* 37 - * Arbitratry maximum size of one discard request to limit potentially long time 37 + * Arbitrary maximum size of one discard request to limit potentially long time 38 38 * spent in blkdev_issue_discard(). 39 39 */ 40 40 #define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G) ··· 495 495 }; 496 496 497 497 /* 498 - * Context for IO subsmission for device stripe. 498 + * Context for IO submission for device stripe. 499 499 * 500 500 * - Track the unfinished mirrors for mirror based profiles 501 501 * Mirror based profiles are SINGLE/DUP/RAID1/RAID10.

+50 -36

fs/btrfs/zlib.c

··· 34 34 int level; 35 35 }; 36 36 37 - static struct workspace_manager wsm; 38 - 39 - struct list_head *zlib_get_workspace(unsigned int level) 37 + struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level) 40 38 { 41 - struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level); 39 + struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level); 42 40 struct workspace *workspace = list_entry(ws, struct workspace, list); 43 41 44 42 workspace->level = level; ··· 53 55 kfree(workspace); 54 56 } 55 57 56 - struct list_head *zlib_alloc_workspace(unsigned int level) 58 + /* 59 + * For s390 hardware acceleration, the buffer size should be at least 60 + * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance. 61 + * 62 + * But if bs > ps we can have large enough folios that meet the s390 hardware 63 + * handling. 64 + */ 65 + static bool need_special_buffer(struct btrfs_fs_info *fs_info) 57 66 { 67 + if (!zlib_deflate_dfltcc_enabled()) 68 + return false; 69 + if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE) 70 + return false; 71 + return true; 72 + } 73 + 74 + struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level) 75 + { 76 + const u32 blocksize = fs_info->sectorsize; 58 77 struct workspace *workspace; 59 78 int workspacesize; 60 79 ··· 84 69 workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN); 85 70 workspace->level = level; 86 71 workspace->buf = NULL; 87 - /* 88 - * In case of s390 zlib hardware support, allocate lager workspace 89 - * buffer. If allocator fails, fall back to a single page buffer. 90 - */ 91 - if (zlib_deflate_dfltcc_enabled()) { 72 + if (need_special_buffer(fs_info)) { 92 73 workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE, 93 74 __GFP_NOMEMALLOC | __GFP_NORETRY | 94 75 __GFP_NOWARN | GFP_NOIO); 95 76 workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE; 96 77 } 97 78 if (!workspace->buf) { 98 - workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 99 - workspace->buf_size = PAGE_SIZE; 79 + workspace->buf = kmalloc(blocksize, GFP_KERNEL); 80 + workspace->buf_size = blocksize; 100 81 } 101 82 if (!workspace->strm.workspace || !workspace->buf) 102 83 goto fail; ··· 144 133 return 0; 145 134 } 146 135 147 - int zlib_compress_folios(struct list_head *ws, struct address_space *mapping, 136 + int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode, 148 137 u64 start, struct folio **folios, unsigned long *out_folios, 149 138 unsigned long *total_in, unsigned long *total_out) 150 139 { 140 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 151 141 struct workspace *workspace = list_entry(ws, struct workspace, list); 142 + struct address_space *mapping = inode->vfs_inode.i_mapping; 143 + const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order; 144 + const u32 min_folio_size = btrfs_min_folio_size(fs_info); 152 145 int ret; 153 146 char *data_in = NULL; 154 147 char *cfolio_out; ··· 161 146 struct folio *out_folio = NULL; 162 147 unsigned long len = *total_out; 163 148 unsigned long nr_dest_folios = *out_folios; 164 - const unsigned long max_out = nr_dest_folios * PAGE_SIZE; 149 + const unsigned long max_out = nr_dest_folios << min_folio_shift; 150 + const u32 blocksize = fs_info->sectorsize; 165 151 const u64 orig_end = start + len; 166 152 167 153 *out_folios = 0; ··· 171 155 172 156 ret = zlib_deflateInit(&workspace->strm, workspace->level); 173 157 if (unlikely(ret != Z_OK)) { 174 - struct btrfs_inode *inode = BTRFS_I(mapping->host); 175 - 176 - btrfs_err(inode->root->fs_info, 158 + btrfs_err(fs_info, 177 159 "zlib compression init failed, error %d root %llu inode %llu offset %llu", 178 160 ret, btrfs_root_id(inode->root), btrfs_ino(inode), start); 179 161 ret = -EIO; ··· 181 167 workspace->strm.total_in = 0; 182 168 workspace->strm.total_out = 0; 183 169 184 - out_folio = btrfs_alloc_compr_folio(); 170 + out_folio = btrfs_alloc_compr_folio(fs_info); 185 171 if (out_folio == NULL) { 186 172 ret = -ENOMEM; 187 173 goto out; ··· 193 179 workspace->strm.next_in = workspace->buf; 194 180 workspace->strm.avail_in = 0; 195 181 workspace->strm.next_out = cfolio_out; 196 - workspace->strm.avail_out = PAGE_SIZE; 182 + workspace->strm.avail_out = min_folio_size; 197 183 198 184 while (workspace->strm.total_in < len) { 199 185 /* ··· 205 191 unsigned int copy_length = min(bytes_left, workspace->buf_size); 206 192 207 193 /* 208 - * This can only happen when hardware zlib compression is 209 - * enabled. 194 + * For s390 hardware accelerated zlib, and our folio is smaller 195 + * than the copy_length, we need to fill the buffer so that 196 + * we can take full advantage of hardware acceleration. 210 197 */ 211 - if (copy_length > PAGE_SIZE) { 198 + if (need_special_buffer(fs_info)) { 212 199 ret = copy_data_into_buffer(mapping, workspace, 213 200 start, copy_length); 214 201 if (ret < 0) ··· 240 225 241 226 ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); 242 227 if (unlikely(ret != Z_OK)) { 243 - struct btrfs_inode *inode = BTRFS_I(mapping->host); 244 - 245 - btrfs_warn(inode->root->fs_info, 228 + btrfs_warn(fs_info, 246 229 "zlib compression failed, error %d root %llu inode %llu offset %llu", 247 230 ret, btrfs_root_id(inode->root), btrfs_ino(inode), 248 231 start); ··· 250 237 } 251 238 252 239 /* we're making it bigger, give up */ 253 - if (workspace->strm.total_in > 8192 && 240 + if (workspace->strm.total_in > blocksize * 2 && 254 241 workspace->strm.total_in < 255 242 workspace->strm.total_out) { 256 243 ret = -E2BIG; ··· 265 252 ret = -E2BIG; 266 253 goto out; 267 254 } 268 - out_folio = btrfs_alloc_compr_folio(); 255 + out_folio = btrfs_alloc_compr_folio(fs_info); 269 256 if (out_folio == NULL) { 270 257 ret = -ENOMEM; 271 258 goto out; ··· 273 260 cfolio_out = folio_address(out_folio); 274 261 folios[nr_folios] = out_folio; 275 262 nr_folios++; 276 - workspace->strm.avail_out = PAGE_SIZE; 263 + workspace->strm.avail_out = min_folio_size; 277 264 workspace->strm.next_out = cfolio_out; 278 265 } 279 266 /* we're all done */ ··· 291 278 ret = zlib_deflate(&workspace->strm, Z_FINISH); 292 279 if (ret == Z_STREAM_END) 293 280 break; 294 - if (ret != Z_OK && ret != Z_BUF_ERROR) { 281 + if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) { 295 282 zlib_deflateEnd(&workspace->strm); 296 283 ret = -EIO; 297 284 goto out; ··· 301 288 ret = -E2BIG; 302 289 goto out; 303 290 } 304 - out_folio = btrfs_alloc_compr_folio(); 291 + out_folio = btrfs_alloc_compr_folio(fs_info); 305 292 if (out_folio == NULL) { 306 293 ret = -ENOMEM; 307 294 goto out; ··· 309 296 cfolio_out = folio_address(out_folio); 310 297 folios[nr_folios] = out_folio; 311 298 nr_folios++; 312 - workspace->strm.avail_out = PAGE_SIZE; 299 + workspace->strm.avail_out = min_folio_size; 313 300 workspace->strm.next_out = cfolio_out; 314 301 } 315 302 } ··· 335 322 336 323 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) 337 324 { 325 + struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); 338 326 struct workspace *workspace = list_entry(ws, struct workspace, list); 327 + const u32 min_folio_size = btrfs_min_folio_size(fs_info); 339 328 int ret = 0, ret2; 340 329 int wbits = MAX_WBITS; 341 330 char *data_in; 342 331 size_t total_out = 0; 343 332 unsigned long folio_in_index = 0; 344 333 size_t srclen = cb->compressed_len; 345 - unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE); 334 + unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size); 346 335 unsigned long buf_start; 347 336 struct folio **folios_in = cb->compressed_folios; 348 337 349 338 data_in = kmap_local_folio(folios_in[folio_in_index], 0); 350 339 workspace->strm.next_in = data_in; 351 - workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE); 340 + workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size); 352 341 workspace->strm.total_in = 0; 353 342 354 343 workspace->strm.total_out = 0; ··· 411 396 data_in = kmap_local_folio(folios_in[folio_in_index], 0); 412 397 workspace->strm.next_in = data_in; 413 398 tmp = srclen - workspace->strm.total_in; 414 - workspace->strm.avail_in = min(tmp, PAGE_SIZE); 399 + workspace->strm.avail_in = min(tmp, min_folio_size); 415 400 } 416 401 } 417 402 if (unlikely(ret != Z_STREAM_END)) { ··· 499 484 return ret; 500 485 } 501 486 502 - const struct btrfs_compress_op btrfs_zlib_compress = { 503 - .workspace_manager = &wsm, 487 + const struct btrfs_compress_levels btrfs_zlib_compress = { 504 488 .min_level = 1, 505 489 .max_level = 9, 506 490 .default_level = BTRFS_ZLIB_DEFAULT_LEVEL,

+36 -30

fs/btrfs/zoned.c

··· 274 274 return ret; 275 275 } 276 276 *nr_zones = ret; 277 - if (!ret) 277 + if (unlikely(!ret)) 278 278 return -EIO; 279 279 280 280 /* Populate cache */ ··· 315 315 if (ret < 0) 316 316 return ret; 317 317 /* No dev extents at all? Not good */ 318 - if (ret > 0) 318 + if (unlikely(ret > 0)) 319 319 return -EUCLEAN; 320 320 } 321 321 ··· 503 503 sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; 504 504 } 505 505 506 - if (nreported != zone_info->nr_zones) { 506 + if (unlikely(nreported != zone_info->nr_zones)) { 507 507 btrfs_err(device->fs_info, 508 508 "inconsistent number of zones on %s (%u/%u)", 509 509 rcu_dereference(device->name), nreported, ··· 513 513 } 514 514 515 515 if (max_active_zones) { 516 - if (nactive > max_active_zones) { 516 + if (unlikely(nactive > max_active_zones)) { 517 517 if (bdev_max_active_zones(bdev) == 0) { 518 518 max_active_zones = 0; 519 519 zone_info->max_active_zones = 0; ··· 550 550 if (ret) 551 551 goto out; 552 552 553 - if (nr_zones != BTRFS_NR_SB_LOG_ZONES) { 553 + if (unlikely(nr_zones != BTRFS_NR_SB_LOG_ZONES)) { 554 554 btrfs_err(device->fs_info, 555 555 "zoned: failed to read super block log zone info at devid %llu zone %u", 556 556 device->devid, sb_zone); ··· 568 568 569 569 ret = sb_write_pointer(device->bdev, 570 570 &zone_info->sb_zones[sb_pos], &sb_wp); 571 - if (ret != -ENOENT && ret) { 571 + if (unlikely(ret != -ENOENT && ret)) { 572 572 btrfs_err(device->fs_info, 573 573 "zoned: super block log zone corrupted devid %llu zone %u", 574 574 device->devid, sb_zone); ··· 901 901 zones); 902 902 if (ret < 0) 903 903 return ret; 904 - if (ret != BTRFS_NR_SB_LOG_ZONES) 904 + if (unlikely(ret != BTRFS_NR_SB_LOG_ZONES)) 905 905 return -EIO; 906 906 907 907 return sb_log_location(bdev, zones, rw, bytenr_ret); ··· 1253 1253 root = btrfs_extent_root(fs_info, key.objectid); 1254 1254 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1255 1255 /* We should not find the exact match */ 1256 - if (!ret) 1256 + if (unlikely(!ret)) 1257 1257 ret = -EUCLEAN; 1258 1258 if (ret < 0) 1259 1259 return ret; ··· 1274 1274 else 1275 1275 length = fs_info->nodesize; 1276 1276 1277 - if (!(found_key.objectid >= cache->start && 1278 - found_key.objectid + length <= cache->start + cache->length)) { 1277 + if (unlikely(!(found_key.objectid >= cache->start && 1278 + found_key.objectid + length <= cache->start + cache->length))) { 1279 1279 return -EUCLEAN; 1280 1280 } 1281 1281 *offset_ret = found_key.objectid + length - cache->start; ··· 1357 1357 return 0; 1358 1358 } 1359 1359 1360 - if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { 1360 + if (unlikely(zone.type == BLK_ZONE_TYPE_CONVENTIONAL)) { 1361 1361 btrfs_err(fs_info, 1362 1362 "zoned: unexpected conventional zone %llu on device %s (devid %llu)", 1363 1363 zone.start << SECTOR_SHIFT, rcu_dereference(device->name), ··· 1399 1399 struct zone_info *info, 1400 1400 unsigned long *active) 1401 1401 { 1402 - if (info->alloc_offset == WP_MISSING_DEV) { 1402 + if (unlikely(info->alloc_offset == WP_MISSING_DEV)) { 1403 1403 btrfs_err(bg->fs_info, 1404 1404 "zoned: cannot recover write pointer for zone %llu", 1405 1405 info->physical); ··· 1428 1428 1429 1429 bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); 1430 1430 1431 - if (zone_info[0].alloc_offset == WP_MISSING_DEV) { 1431 + if (unlikely(zone_info[0].alloc_offset == WP_MISSING_DEV)) { 1432 1432 btrfs_err(bg->fs_info, 1433 1433 "zoned: cannot recover write pointer for zone %llu", 1434 1434 zone_info[0].physical); 1435 1435 return -EIO; 1436 1436 } 1437 - if (zone_info[1].alloc_offset == WP_MISSING_DEV) { 1437 + if (unlikely(zone_info[1].alloc_offset == WP_MISSING_DEV)) { 1438 1438 btrfs_err(bg->fs_info, 1439 1439 "zoned: cannot recover write pointer for zone %llu", 1440 1440 zone_info[1].physical); ··· 1447 1447 if (zone_info[1].alloc_offset == WP_CONVENTIONAL) 1448 1448 zone_info[1].alloc_offset = last_alloc; 1449 1449 1450 - if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { 1450 + if (unlikely(zone_info[0].alloc_offset != zone_info[1].alloc_offset)) { 1451 1451 btrfs_err(bg->fs_info, 1452 1452 "zoned: write pointer offset mismatch of zones in DUP profile"); 1453 1453 return -EIO; 1454 1454 } 1455 1455 1456 1456 if (test_bit(0, active) != test_bit(1, active)) { 1457 - if (!btrfs_zone_activate(bg)) 1457 + if (unlikely(!btrfs_zone_activate(bg))) 1458 1458 return -EIO; 1459 1459 } else if (test_bit(0, active)) { 1460 1460 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); ··· 1489 1489 if (zone_info[i].alloc_offset == WP_CONVENTIONAL) 1490 1490 zone_info[i].alloc_offset = last_alloc; 1491 1491 1492 - if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) && 1493 - !btrfs_test_opt(fs_info, DEGRADED)) { 1492 + if (unlikely((zone_info[0].alloc_offset != zone_info[i].alloc_offset) && 1493 + !btrfs_test_opt(fs_info, DEGRADED))) { 1494 1494 btrfs_err(fs_info, 1495 1495 "zoned: write pointer offset mismatch of zones in %s profile", 1496 1496 btrfs_bg_type_to_raid_name(map->type)); 1497 1497 return -EIO; 1498 1498 } 1499 1499 if (test_bit(0, active) != test_bit(i, active)) { 1500 - if (!btrfs_test_opt(fs_info, DEGRADED) && 1501 - !btrfs_zone_activate(bg)) { 1500 + if (unlikely(!btrfs_test_opt(fs_info, DEGRADED) && 1501 + !btrfs_zone_activate(bg))) { 1502 1502 return -EIO; 1503 1503 } 1504 1504 } else { ··· 1554 1554 } 1555 1555 1556 1556 if (test_bit(0, active) != test_bit(i, active)) { 1557 - if (!btrfs_zone_activate(bg)) 1557 + if (unlikely(!btrfs_zone_activate(bg))) 1558 1558 return -EIO; 1559 1559 } else { 1560 1560 if (test_bit(0, active)) ··· 1586 1586 continue; 1587 1587 1588 1588 if (test_bit(0, active) != test_bit(i, active)) { 1589 - if (!btrfs_zone_activate(bg)) 1589 + if (unlikely(!btrfs_zone_activate(bg))) 1590 1590 return -EIO; 1591 1591 } else { 1592 1592 if (test_bit(0, active)) ··· 1643 1643 return 0; 1644 1644 1645 1645 /* Sanity check */ 1646 - if (!IS_ALIGNED(length, fs_info->zone_size)) { 1646 + if (unlikely(!IS_ALIGNED(length, fs_info->zone_size))) { 1647 1647 btrfs_err(fs_info, 1648 1648 "zoned: block group %llu len %llu unaligned to zone size %llu", 1649 1649 logical, length, fs_info->zone_size); ··· 1756 1756 return -EINVAL; 1757 1757 } 1758 1758 1759 - if (cache->alloc_offset > cache->zone_capacity) { 1759 + if (unlikely(cache->alloc_offset > cache->zone_capacity)) { 1760 1760 btrfs_err(fs_info, 1761 1761 "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", 1762 1762 cache->alloc_offset, cache->zone_capacity, ··· 2087 2087 2088 2088 ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, 2089 2089 &mapped_length, &bioc, NULL, NULL); 2090 - if (ret || !bioc || mapped_length < PAGE_SIZE) { 2090 + if (unlikely(ret || !bioc || mapped_length < PAGE_SIZE)) { 2091 2091 ret = -EIO; 2092 2092 goto out_put_bioc; 2093 2093 } ··· 2145 2145 if (physical_pos == wp) 2146 2146 return 0; 2147 2147 2148 - if (physical_pos > wp) 2148 + if (unlikely(physical_pos > wp)) 2149 2149 return -EUCLEAN; 2150 2150 2151 2151 length = wp - physical_pos; ··· 2464 2464 return ret; 2465 2465 } 2466 2466 2467 - void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length) 2467 + int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length) 2468 2468 { 2469 2469 struct btrfs_block_group *block_group; 2470 2470 u64 min_alloc_bytes; 2471 2471 2472 2472 if (!btrfs_is_zoned(fs_info)) 2473 - return; 2473 + return 0; 2474 2474 2475 2475 block_group = btrfs_lookup_block_group(fs_info, logical); 2476 - ASSERT(block_group); 2476 + if (WARN_ON_ONCE(!block_group)) 2477 + return -ENOENT; 2477 2478 2478 2479 /* No MIXED_BG on zoned btrfs. */ 2479 2480 if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) ··· 2491 2490 2492 2491 out: 2493 2492 btrfs_put_block_group(block_group); 2493 + return 0; 2494 2494 } 2495 2495 2496 2496 static void btrfs_zone_finish_endio_workfn(struct work_struct *work) 2497 2497 { 2498 + int ret; 2498 2499 struct btrfs_block_group *bg = 2499 2500 container_of(work, struct btrfs_block_group, zone_finish_work); 2500 2501 2501 2502 wait_on_extent_buffer_writeback(bg->last_eb); 2502 2503 free_extent_buffer(bg->last_eb); 2503 - btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length); 2504 + ret = do_zone_finish(bg, true); 2505 + if (ret) 2506 + btrfs_handle_fs_error(bg->fs_info, ret, 2507 + "Failed to finish block-group's zone"); 2504 2508 btrfs_put_block_group(bg); 2505 2509 } 2506 2510

+6 -3

fs/btrfs/zoned.h

··· 83 83 bool btrfs_zone_activate(struct btrfs_block_group *block_group); 84 84 int btrfs_zone_finish(struct btrfs_block_group *block_group); 85 85 bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags); 86 - void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, 86 + int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, 87 87 u64 length); 88 88 void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, 89 89 struct extent_buffer *eb); ··· 234 234 return true; 235 235 } 236 236 237 - static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, 238 - u64 logical, u64 length) { } 237 + static inline int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, 238 + u64 logical, u64 length) 239 + { 240 + return 0; 241 + } 239 242 240 243 static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, 241 244 struct extent_buffer *eb) { }

+104 -94

fs/btrfs/zstd.c

··· 77 77 */ 78 78 79 79 struct zstd_workspace_manager { 80 - const struct btrfs_compress_op *ops; 81 80 spinlock_t lock; 82 81 struct list_head lru_list; 83 82 struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL]; ··· 84 85 wait_queue_head_t wait; 85 86 struct timer_list timer; 86 87 }; 87 - 88 - static struct zstd_workspace_manager wsm; 89 88 90 89 static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL]; 91 90 ··· 109 112 */ 110 113 static void zstd_reclaim_timer_fn(struct timer_list *timer) 111 114 { 115 + struct zstd_workspace_manager *zwsm = 116 + container_of(timer, struct zstd_workspace_manager, timer); 112 117 unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES; 113 118 struct list_head *pos, *next; 114 119 115 - ASSERT(timer == &wsm.timer); 120 + spin_lock(&zwsm->lock); 116 121 117 - spin_lock(&wsm.lock); 118 - 119 - if (list_empty(&wsm.lru_list)) { 120 - spin_unlock(&wsm.lock); 122 + if (list_empty(&zwsm->lru_list)) { 123 + spin_unlock(&zwsm->lock); 121 124 return; 122 125 } 123 126 124 - list_for_each_prev_safe(pos, next, &wsm.lru_list) { 127 + list_for_each_prev_safe(pos, next, &zwsm->lru_list) { 125 128 struct workspace *victim = container_of(pos, struct workspace, 126 129 lru_list); 127 130 int level; ··· 138 141 list_del(&victim->list); 139 142 zstd_free_workspace(&victim->list); 140 143 141 - if (list_empty(&wsm.idle_ws[level])) 142 - clear_bit(level, &wsm.active_map); 144 + if (list_empty(&zwsm->idle_ws[level])) 145 + clear_bit(level, &zwsm->active_map); 143 146 144 147 } 145 148 146 - if (!list_empty(&wsm.lru_list)) 147 - mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); 149 + if (!list_empty(&zwsm->lru_list)) 150 + mod_timer(&zwsm->timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); 148 151 149 - spin_unlock(&wsm.lock); 152 + spin_unlock(&zwsm->lock); 150 153 } 151 154 152 155 /* ··· 179 182 } 180 183 } 181 184 182 - void zstd_init_workspace_manager(void) 185 + int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info) 183 186 { 187 + struct zstd_workspace_manager *zwsm; 184 188 struct list_head *ws; 185 - int i; 186 189 190 + ASSERT(fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] == NULL); 191 + zwsm = kzalloc(sizeof(*zwsm), GFP_KERNEL); 192 + if (!zwsm) 193 + return -ENOMEM; 187 194 zstd_calc_ws_mem_sizes(); 195 + spin_lock_init(&zwsm->lock); 196 + init_waitqueue_head(&zwsm->wait); 197 + timer_setup(&zwsm->timer, zstd_reclaim_timer_fn, 0); 188 198 189 - wsm.ops = &btrfs_zstd_compress; 190 - spin_lock_init(&wsm.lock); 191 - init_waitqueue_head(&wsm.wait); 192 - timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0); 199 + INIT_LIST_HEAD(&zwsm->lru_list); 200 + for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) 201 + INIT_LIST_HEAD(&zwsm->idle_ws[i]); 202 + fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = zwsm; 193 203 194 - INIT_LIST_HEAD(&wsm.lru_list); 195 - for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) 196 - INIT_LIST_HEAD(&wsm.idle_ws[i]); 197 - 198 - ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL); 204 + ws = zstd_alloc_workspace(fs_info, ZSTD_BTRFS_MAX_LEVEL); 199 205 if (IS_ERR(ws)) { 200 206 btrfs_warn(NULL, "cannot preallocate zstd compression workspace"); 201 207 } else { 202 - set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map); 203 - list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]); 208 + set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &zwsm->active_map); 209 + list_add(ws, &zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]); 204 210 } 211 + return 0; 205 212 } 206 213 207 - void zstd_cleanup_workspace_manager(void) 214 + void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info) 208 215 { 216 + struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD]; 209 217 struct workspace *workspace; 210 - int i; 211 218 212 - spin_lock_bh(&wsm.lock); 213 - for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) { 214 - while (!list_empty(&wsm.idle_ws[i])) { 215 - workspace = container_of(wsm.idle_ws[i].next, 219 + if (!zwsm) 220 + return; 221 + fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = NULL; 222 + spin_lock_bh(&zwsm->lock); 223 + for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) { 224 + while (!list_empty(&zwsm->idle_ws[i])) { 225 + workspace = container_of(zwsm->idle_ws[i].next, 216 226 struct workspace, list); 217 227 list_del(&workspace->list); 218 228 list_del(&workspace->lru_list); 219 229 zstd_free_workspace(&workspace->list); 220 230 } 221 231 } 222 - spin_unlock_bh(&wsm.lock); 223 - 224 - timer_delete_sync(&wsm.timer); 232 + spin_unlock_bh(&zwsm->lock); 233 + timer_delete_sync(&zwsm->timer); 234 + kfree(zwsm); 225 235 } 226 236 227 237 /* ··· 243 239 * offer the opportunity to reclaim the workspace in favor of allocating an 244 240 * appropriately sized one in the future. 245 241 */ 246 - static struct list_head *zstd_find_workspace(int level) 242 + static struct list_head *zstd_find_workspace(struct btrfs_fs_info *fs_info, int level) 247 243 { 244 + struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD]; 248 245 struct list_head *ws; 249 246 struct workspace *workspace; 250 247 int i = clip_level(level); 251 248 252 - spin_lock_bh(&wsm.lock); 253 - for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) { 254 - if (!list_empty(&wsm.idle_ws[i])) { 255 - ws = wsm.idle_ws[i].next; 249 + ASSERT(zwsm); 250 + spin_lock_bh(&zwsm->lock); 251 + for_each_set_bit_from(i, &zwsm->active_map, ZSTD_BTRFS_MAX_LEVEL) { 252 + if (!list_empty(&zwsm->idle_ws[i])) { 253 + ws = zwsm->idle_ws[i].next; 256 254 workspace = list_to_workspace(ws); 257 255 list_del_init(ws); 258 256 /* keep its place if it's a lower level using this */ 259 257 workspace->req_level = level; 260 258 if (clip_level(level) == workspace->level) 261 259 list_del(&workspace->lru_list); 262 - if (list_empty(&wsm.idle_ws[i])) 263 - clear_bit(i, &wsm.active_map); 264 - spin_unlock_bh(&wsm.lock); 260 + if (list_empty(&zwsm->idle_ws[i])) 261 + clear_bit(i, &zwsm->active_map); 262 + spin_unlock_bh(&zwsm->lock); 265 263 return ws; 266 264 } 267 265 } 268 - spin_unlock_bh(&wsm.lock); 266 + spin_unlock_bh(&zwsm->lock); 269 267 270 268 return NULL; 271 269 } ··· 282 276 * attempt to allocate a new workspace. If we fail to allocate one due to 283 277 * memory pressure, go to sleep waiting for the max level workspace to free up. 284 278 */ 285 - struct list_head *zstd_get_workspace(int level) 279 + struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level) 286 280 { 281 + struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD]; 287 282 struct list_head *ws; 288 283 unsigned int nofs_flag; 284 + 285 + ASSERT(zwsm); 289 286 290 287 /* level == 0 means we can use any workspace */ 291 288 if (!level) 292 289 level = 1; 293 290 294 291 again: 295 - ws = zstd_find_workspace(level); 292 + ws = zstd_find_workspace(fs_info, level); 296 293 if (ws) 297 294 return ws; 298 295 299 296 nofs_flag = memalloc_nofs_save(); 300 - ws = zstd_alloc_workspace(level); 297 + ws = zstd_alloc_workspace(fs_info, level); 301 298 memalloc_nofs_restore(nofs_flag); 302 299 303 300 if (IS_ERR(ws)) { 304 301 DEFINE_WAIT(wait); 305 302 306 - prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE); 303 + prepare_to_wait(&zwsm->wait, &wait, TASK_UNINTERRUPTIBLE); 307 304 schedule(); 308 - finish_wait(&wsm.wait, &wait); 305 + finish_wait(&zwsm->wait, &wait); 309 306 310 307 goto again; 311 308 } ··· 327 318 * isn't set, it is also set here. Only the max level workspace tries and wakes 328 319 * up waiting workspaces. 329 320 */ 330 - void zstd_put_workspace(struct list_head *ws) 321 + void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws) 331 322 { 323 + struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD]; 332 324 struct workspace *workspace = list_to_workspace(ws); 333 325 334 - spin_lock_bh(&wsm.lock); 326 + ASSERT(zwsm); 327 + spin_lock_bh(&zwsm->lock); 335 328 336 329 /* A node is only taken off the lru if we are the corresponding level */ 337 330 if (clip_level(workspace->req_level) == workspace->level) { 338 331 /* Hide a max level workspace from reclaim */ 339 - if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) { 332 + if (list_empty(&zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) { 340 333 INIT_LIST_HEAD(&workspace->lru_list); 341 334 } else { 342 335 workspace->last_used = jiffies; 343 - list_add(&workspace->lru_list, &wsm.lru_list); 344 - if (!timer_pending(&wsm.timer)) 345 - mod_timer(&wsm.timer, 336 + list_add(&workspace->lru_list, &zwsm->lru_list); 337 + if (!timer_pending(&zwsm->timer)) 338 + mod_timer(&zwsm->timer, 346 339 jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); 347 340 } 348 341 } 349 342 350 - set_bit(workspace->level, &wsm.active_map); 351 - list_add(&workspace->list, &wsm.idle_ws[workspace->level]); 343 + set_bit(workspace->level, &zwsm->active_map); 344 + list_add(&workspace->list, &zwsm->idle_ws[workspace->level]); 352 345 workspace->req_level = 0; 353 346 354 - spin_unlock_bh(&wsm.lock); 347 + spin_unlock_bh(&zwsm->lock); 355 348 356 349 if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL)) 357 - cond_wake_up(&wsm.wait); 350 + cond_wake_up(&zwsm->wait); 358 351 } 359 352 360 353 void zstd_free_workspace(struct list_head *ws) ··· 368 357 kfree(workspace); 369 358 } 370 359 371 - struct list_head *zstd_alloc_workspace(int level) 360 + struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level) 372 361 { 362 + const u32 blocksize = fs_info->sectorsize; 373 363 struct workspace *workspace; 374 364 375 365 workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); ··· 383 371 workspace->req_level = level; 384 372 workspace->last_used = jiffies; 385 373 workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN); 386 - workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 374 + workspace->buf = kmalloc(blocksize, GFP_KERNEL); 387 375 if (!workspace->mem || !workspace->buf) 388 376 goto fail; 389 377 ··· 396 384 return ERR_PTR(-ENOMEM); 397 385 } 398 386 399 - int zstd_compress_folios(struct list_head *ws, struct address_space *mapping, 387 + int zstd_compress_folios(struct list_head *ws, struct btrfs_inode *inode, 400 388 u64 start, struct folio **folios, unsigned long *out_folios, 401 389 unsigned long *total_in, unsigned long *total_out) 402 390 { 391 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 403 392 struct workspace *workspace = list_entry(ws, struct workspace, list); 393 + struct address_space *mapping = inode->vfs_inode.i_mapping; 404 394 zstd_cstream *stream; 405 395 int ret = 0; 406 396 int nr_folios = 0; ··· 413 399 unsigned long len = *total_out; 414 400 const unsigned long nr_dest_folios = *out_folios; 415 401 const u64 orig_end = start + len; 416 - unsigned long max_out = nr_dest_folios * PAGE_SIZE; 402 + const u32 blocksize = fs_info->sectorsize; 403 + const u32 min_folio_size = btrfs_min_folio_size(fs_info); 404 + unsigned long max_out = nr_dest_folios * min_folio_size; 417 405 unsigned int cur_len; 418 406 419 407 workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len); ··· 427 411 stream = zstd_init_cstream(&workspace->params, len, workspace->mem, 428 412 workspace->size); 429 413 if (unlikely(!stream)) { 430 - struct btrfs_inode *inode = BTRFS_I(mapping->host); 431 - 432 - btrfs_err(inode->root->fs_info, 414 + btrfs_err(fs_info, 433 415 "zstd compression init level %d failed, root %llu inode %llu offset %llu", 434 416 workspace->req_level, btrfs_root_id(inode->root), 435 417 btrfs_ino(inode), start); ··· 445 431 workspace->in_buf.size = cur_len; 446 432 447 433 /* Allocate and map in the output buffer */ 448 - out_folio = btrfs_alloc_compr_folio(); 434 + out_folio = btrfs_alloc_compr_folio(fs_info); 449 435 if (out_folio == NULL) { 450 436 ret = -ENOMEM; 451 437 goto out; ··· 453 439 folios[nr_folios++] = out_folio; 454 440 workspace->out_buf.dst = folio_address(out_folio); 455 441 workspace->out_buf.pos = 0; 456 - workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); 442 + workspace->out_buf.size = min_t(size_t, max_out, min_folio_size); 457 443 458 444 while (1) { 459 445 size_t ret2; ··· 461 447 ret2 = zstd_compress_stream(stream, &workspace->out_buf, 462 448 &workspace->in_buf); 463 449 if (unlikely(zstd_is_error(ret2))) { 464 - struct btrfs_inode *inode = BTRFS_I(mapping->host); 465 - 466 - btrfs_warn(inode->root->fs_info, 450 + btrfs_warn(fs_info, 467 451 "zstd compression level %d failed, error %d root %llu inode %llu offset %llu", 468 452 workspace->req_level, zstd_get_error_code(ret2), 469 453 btrfs_root_id(inode->root), btrfs_ino(inode), ··· 471 459 } 472 460 473 461 /* Check to see if we are making it bigger */ 474 - if (tot_in + workspace->in_buf.pos > 8192 && 462 + if (tot_in + workspace->in_buf.pos > blocksize * 2 && 475 463 tot_in + workspace->in_buf.pos < 476 464 tot_out + workspace->out_buf.pos) { 477 465 ret = -E2BIG; ··· 487 475 488 476 /* Check if we need more output space */ 489 477 if (workspace->out_buf.pos == workspace->out_buf.size) { 490 - tot_out += PAGE_SIZE; 491 - max_out -= PAGE_SIZE; 478 + tot_out += min_folio_size; 479 + max_out -= min_folio_size; 492 480 if (nr_folios == nr_dest_folios) { 493 481 ret = -E2BIG; 494 482 goto out; 495 483 } 496 - out_folio = btrfs_alloc_compr_folio(); 484 + out_folio = btrfs_alloc_compr_folio(fs_info); 497 485 if (out_folio == NULL) { 498 486 ret = -ENOMEM; 499 487 goto out; ··· 501 489 folios[nr_folios++] = out_folio; 502 490 workspace->out_buf.dst = folio_address(out_folio); 503 491 workspace->out_buf.pos = 0; 504 - workspace->out_buf.size = min_t(size_t, max_out, 505 - PAGE_SIZE); 492 + workspace->out_buf.size = min_t(size_t, max_out, min_folio_size); 506 493 } 507 494 508 495 /* We've reached the end of the input */ ··· 533 522 534 523 ret2 = zstd_end_stream(stream, &workspace->out_buf); 535 524 if (unlikely(zstd_is_error(ret2))) { 536 - struct btrfs_inode *inode = BTRFS_I(mapping->host); 537 - 538 - btrfs_err(inode->root->fs_info, 525 + btrfs_err(fs_info, 539 526 "zstd compression end level %d failed, error %d root %llu inode %llu offset %llu", 540 527 workspace->req_level, zstd_get_error_code(ret2), 541 528 btrfs_root_id(inode->root), btrfs_ino(inode), ··· 551 542 goto out; 552 543 } 553 544 554 - tot_out += PAGE_SIZE; 555 - max_out -= PAGE_SIZE; 545 + tot_out += min_folio_size; 546 + max_out -= min_folio_size; 556 547 if (nr_folios == nr_dest_folios) { 557 548 ret = -E2BIG; 558 549 goto out; 559 550 } 560 - out_folio = btrfs_alloc_compr_folio(); 551 + out_folio = btrfs_alloc_compr_folio(fs_info); 561 552 if (out_folio == NULL) { 562 553 ret = -ENOMEM; 563 554 goto out; ··· 565 556 folios[nr_folios++] = out_folio; 566 557 workspace->out_buf.dst = folio_address(out_folio); 567 558 workspace->out_buf.pos = 0; 568 - workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); 559 + workspace->out_buf.size = min_t(size_t, max_out, min_folio_size); 569 560 } 570 561 571 562 if (tot_out >= tot_in) { ··· 587 578 588 579 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) 589 580 { 581 + struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); 590 582 struct workspace *workspace = list_entry(ws, struct workspace, list); 591 583 struct folio **folios_in = cb->compressed_folios; 592 584 size_t srclen = cb->compressed_len; 593 585 zstd_dstream *stream; 594 586 int ret = 0; 587 + const u32 blocksize = fs_info->sectorsize; 588 + const unsigned int min_folio_size = btrfs_min_folio_size(fs_info); 595 589 unsigned long folio_in_index = 0; 596 - unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE); 590 + unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size); 597 591 unsigned long buf_start; 598 592 unsigned long total_out = 0; 599 593 ··· 614 602 615 603 workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0); 616 604 workspace->in_buf.pos = 0; 617 - workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE); 605 + workspace->in_buf.size = min_t(size_t, srclen, min_folio_size); 618 606 619 607 workspace->out_buf.dst = workspace->buf; 620 608 workspace->out_buf.pos = 0; 621 - workspace->out_buf.size = PAGE_SIZE; 609 + workspace->out_buf.size = blocksize; 622 610 623 611 while (1) { 624 612 size_t ret2; ··· 654 642 if (workspace->in_buf.pos == workspace->in_buf.size) { 655 643 kunmap_local(workspace->in_buf.src); 656 644 folio_in_index++; 657 - if (folio_in_index >= total_folios_in) { 645 + if (unlikely(folio_in_index >= total_folios_in)) { 658 646 workspace->in_buf.src = NULL; 659 647 ret = -EIO; 660 648 goto done; 661 649 } 662 - srclen -= PAGE_SIZE; 650 + srclen -= min_folio_size; 663 651 workspace->in_buf.src = 664 652 kmap_local_folio(folios_in[folio_in_index], 0); 665 653 workspace->in_buf.pos = 0; 666 - workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE); 654 + workspace->in_buf.size = min_t(size_t, srclen, min_folio_size); 667 655 } 668 656 } 669 657 ret = 0; ··· 730 718 return ret; 731 719 } 732 720 733 - const struct btrfs_compress_op btrfs_zstd_compress = { 734 - /* ZSTD uses own workspace manager */ 735 - .workspace_manager = NULL, 721 + const struct btrfs_compress_levels btrfs_zstd_compress = { 736 722 .min_level = ZSTD_BTRFS_MIN_LEVEL, 737 723 .max_level = ZSTD_BTRFS_MAX_LEVEL, 738 724 .default_level = ZSTD_BTRFS_DEFAULT_LEVEL,

Configure Feed

Configure Feed