Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-6.8-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

- fix freeing allocated id for anon dev when snapshot creation fails

- fiemap fixes:
- followup for a recent deadlock fix, ranges that fiemap can access
can still race with ordered extent completion
- make sure fiemap with SYNC flag does not race with writes

* tag 'for-6.8-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: fix double free of anonymous device after snapshot creation failure
btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given
btrfs: fix race between ordered extent completion and fiemap

+139 -35
+11 -11
fs/btrfs/disk-io.c
··· 1307 1307 * 1308 1308 * @objectid: root id 1309 1309 * @anon_dev: preallocated anonymous block device number for new roots, 1310 - * pass 0 for new allocation. 1310 + * pass NULL for a new allocation. 1311 1311 * @check_ref: whether to check root item references, If true, return -ENOENT 1312 1312 * for orphan roots 1313 1313 */ 1314 1314 static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, 1315 - u64 objectid, dev_t anon_dev, 1315 + u64 objectid, dev_t *anon_dev, 1316 1316 bool check_ref) 1317 1317 { 1318 1318 struct btrfs_root *root; ··· 1342 1342 * that common but still possible. In that case, we just need 1343 1343 * to free the anon_dev. 1344 1344 */ 1345 - if (unlikely(anon_dev)) { 1346 - free_anon_bdev(anon_dev); 1347 - anon_dev = 0; 1345 + if (unlikely(anon_dev && *anon_dev)) { 1346 + free_anon_bdev(*anon_dev); 1347 + *anon_dev = 0; 1348 1348 } 1349 1349 1350 1350 if (check_ref && btrfs_root_refs(&root->root_item) == 0) { ··· 1366 1366 goto fail; 1367 1367 } 1368 1368 1369 - ret = btrfs_init_fs_root(root, anon_dev); 1369 + ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); 1370 1370 if (ret) 1371 1371 goto fail; 1372 1372 ··· 1402 1402 * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() 1403 1403 * and once again by our caller. 1404 1404 */ 1405 - if (anon_dev) 1405 + if (anon_dev && *anon_dev) 1406 1406 root->anon_dev = 0; 1407 1407 btrfs_put_root(root); 1408 1408 return ERR_PTR(ret); ··· 1418 1418 struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, 1419 1419 u64 objectid, bool check_ref) 1420 1420 { 1421 - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); 1421 + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); 1422 1422 } 1423 1423 1424 1424 /* ··· 1426 1426 * the anonymous block device id 1427 1427 * 1428 1428 * @objectid: tree objectid 1429 - * @anon_dev: if zero, allocate a new anonymous block device or use the 1430 - * parameter value 1429 + * @anon_dev: if NULL, allocate a new anonymous block device or use the 1430 + * parameter value if not NULL 1431 1431 */ 1432 1432 struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, 1433 - u64 objectid, dev_t anon_dev) 1433 + u64 objectid, dev_t *anon_dev) 1434 1434 { 1435 1435 return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); 1436 1436 }
+1 -1
fs/btrfs/disk-io.h
··· 61 61 struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, 62 62 u64 objectid, bool check_ref); 63 63 struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, 64 - u64 objectid, dev_t anon_dev); 64 + u64 objectid, dev_t *anon_dev); 65 65 struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, 66 66 struct btrfs_path *path, 67 67 u64 objectid);
+104 -20
fs/btrfs/extent_io.c
··· 2480 2480 struct fiemap_cache *cache, 2481 2481 u64 offset, u64 phys, u64 len, u32 flags) 2482 2482 { 2483 + u64 cache_end; 2483 2484 int ret = 0; 2484 2485 2485 2486 /* Set at the end of extent_fiemap(). */ ··· 2490 2489 goto assign; 2491 2490 2492 2491 /* 2493 - * Sanity check, extent_fiemap() should have ensured that new 2494 - * fiemap extent won't overlap with cached one. 2495 - * Not recoverable. 2492 + * When iterating the extents of the inode, at extent_fiemap(), we may 2493 + * find an extent that starts at an offset behind the end offset of the 2494 + * previous extent we processed. This happens if fiemap is called 2495 + * without FIEMAP_FLAG_SYNC and there are ordered extents completing 2496 + * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()). 2496 2497 * 2497 - * NOTE: Physical address can overlap, due to compression 2498 + * For example we are in leaf X processing its last item, which is the 2499 + * file extent item for file range [512K, 1M[, and after 2500 + * btrfs_next_leaf() releases the path, there's an ordered extent that 2501 + * completes for the file range [768K, 2M[, and that results in trimming 2502 + * the file extent item so that it now corresponds to the file range 2503 + * [512K, 768K[ and a new file extent item is inserted for the file 2504 + * range [768K, 2M[, which may end up as the last item of leaf X or as 2505 + * the first item of the next leaf - in either case btrfs_next_leaf() 2506 + * will leave us with a path pointing to the new extent item, for the 2507 + * file range [768K, 2M[, since that's the first key that follows the 2508 + * last one we processed. So in order not to report overlapping extents 2509 + * to user space, we trim the length of the previously cached extent and 2510 + * emit it. 2511 + * 2512 + * Upon calling btrfs_next_leaf() we may also find an extent with an 2513 + * offset smaller than or equals to cache->offset, and this happens 2514 + * when we had a hole or prealloc extent with several delalloc ranges in 2515 + * it, but after btrfs_next_leaf() released the path, delalloc was 2516 + * flushed and the resulting ordered extents were completed, so we can 2517 + * now have found a file extent item for an offset that is smaller than 2518 + * or equals to what we have in cache->offset. We deal with this as 2519 + * described below. 2498 2520 */ 2499 - if (cache->offset + cache->len > offset) { 2500 - WARN_ON(1); 2501 - return -EINVAL; 2521 + cache_end = cache->offset + cache->len; 2522 + if (cache_end > offset) { 2523 + if (offset == cache->offset) { 2524 + /* 2525 + * We cached a dealloc range (found in the io tree) for 2526 + * a hole or prealloc extent and we have now found a 2527 + * file extent item for the same offset. What we have 2528 + * now is more recent and up to date, so discard what 2529 + * we had in the cache and use what we have just found. 2530 + */ 2531 + goto assign; 2532 + } else if (offset > cache->offset) { 2533 + /* 2534 + * The extent range we previously found ends after the 2535 + * offset of the file extent item we found and that 2536 + * offset falls somewhere in the middle of that previous 2537 + * extent range. So adjust the range we previously found 2538 + * to end at the offset of the file extent item we have 2539 + * just found, since this extent is more up to date. 2540 + * Emit that adjusted range and cache the file extent 2541 + * item we have just found. This corresponds to the case 2542 + * where a previously found file extent item was split 2543 + * due to an ordered extent completing. 2544 + */ 2545 + cache->len = offset - cache->offset; 2546 + goto emit; 2547 + } else { 2548 + const u64 range_end = offset + len; 2549 + 2550 + /* 2551 + * The offset of the file extent item we have just found 2552 + * is behind the cached offset. This means we were 2553 + * processing a hole or prealloc extent for which we 2554 + * have found delalloc ranges (in the io tree), so what 2555 + * we have in the cache is the last delalloc range we 2556 + * found while the file extent item we found can be 2557 + * either for a whole delalloc range we previously 2558 + * emmitted or only a part of that range. 2559 + * 2560 + * We have two cases here: 2561 + * 2562 + * 1) The file extent item's range ends at or behind the 2563 + * cached extent's end. In this case just ignore the 2564 + * current file extent item because we don't want to 2565 + * overlap with previous ranges that may have been 2566 + * emmitted already; 2567 + * 2568 + * 2) The file extent item starts behind the currently 2569 + * cached extent but its end offset goes beyond the 2570 + * end offset of the cached extent. We don't want to 2571 + * overlap with a previous range that may have been 2572 + * emmitted already, so we emit the currently cached 2573 + * extent and then partially store the current file 2574 + * extent item's range in the cache, for the subrange 2575 + * going the cached extent's end to the end of the 2576 + * file extent item. 2577 + */ 2578 + if (range_end <= cache_end) 2579 + return 0; 2580 + 2581 + if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC))) 2582 + phys += cache_end - offset; 2583 + 2584 + offset = cache_end; 2585 + len = range_end - cache_end; 2586 + goto emit; 2587 + } 2502 2588 } 2503 2589 2504 2590 /* ··· 2605 2517 return 0; 2606 2518 } 2607 2519 2520 + emit: 2608 2521 /* Not mergeable, need to submit cached one */ 2609 2522 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, 2610 2523 cache->len, cache->flags); ··· 2996 2907 range_end = round_up(start + len, sectorsize); 2997 2908 prev_extent_end = range_start; 2998 2909 2999 - btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); 3000 - 3001 2910 ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); 3002 2911 if (ret < 0) 3003 - goto out_unlock; 2912 + goto out; 3004 2913 btrfs_release_path(path); 3005 2914 3006 2915 path->reada = READA_FORWARD; 3007 2916 ret = fiemap_search_slot(inode, path, range_start); 3008 2917 if (ret < 0) { 3009 - goto out_unlock; 2918 + goto out; 3010 2919 } else if (ret > 0) { 3011 2920 /* 3012 2921 * No file extent item found, but we may have delalloc between ··· 3051 2964 backref_ctx, 0, 0, 0, 3052 2965 prev_extent_end, hole_end); 3053 2966 if (ret < 0) { 3054 - goto out_unlock; 2967 + goto out; 3055 2968 } else if (ret > 0) { 3056 2969 /* fiemap_fill_next_extent() told us to stop. */ 3057 2970 stopped = true; ··· 3107 3020 extent_gen, 3108 3021 backref_ctx); 3109 3022 if (ret < 0) 3110 - goto out_unlock; 3023 + goto out; 3111 3024 else if (ret > 0) 3112 3025 flags |= FIEMAP_EXTENT_SHARED; 3113 3026 } ··· 3118 3031 } 3119 3032 3120 3033 if (ret < 0) { 3121 - goto out_unlock; 3034 + goto out; 3122 3035 } else if (ret > 0) { 3123 3036 /* fiemap_fill_next_extent() told us to stop. */ 3124 3037 stopped = true; ··· 3129 3042 next_item: 3130 3043 if (fatal_signal_pending(current)) { 3131 3044 ret = -EINTR; 3132 - goto out_unlock; 3045 + goto out; 3133 3046 } 3134 3047 3135 3048 ret = fiemap_next_leaf_item(inode, path); 3136 3049 if (ret < 0) { 3137 - goto out_unlock; 3050 + goto out; 3138 3051 } else if (ret > 0) { 3139 3052 /* No more file extent items for this inode. */ 3140 3053 break; ··· 3158 3071 &delalloc_cached_state, backref_ctx, 3159 3072 0, 0, 0, prev_extent_end, range_end - 1); 3160 3073 if (ret < 0) 3161 - goto out_unlock; 3074 + goto out; 3162 3075 prev_extent_end = range_end; 3163 3076 } 3164 3077 ··· 3196 3109 } 3197 3110 3198 3111 ret = emit_last_fiemap_cache(fieinfo, &cache); 3199 - 3200 - out_unlock: 3201 - btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); 3202 3112 out: 3203 3113 free_extent_state(delalloc_cached_state); 3204 3114 btrfs_free_backref_share_ctx(backref_ctx);
+21 -1
fs/btrfs/inode.c
··· 7835 7835 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 7836 7836 u64 start, u64 len) 7837 7837 { 7838 + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); 7838 7839 int ret; 7839 7840 7840 7841 ret = fiemap_prep(inode, fieinfo, start, &len, 0); ··· 7861 7860 return ret; 7862 7861 } 7863 7862 7864 - return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); 7863 + btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); 7864 + 7865 + /* 7866 + * We did an initial flush to avoid holding the inode's lock while 7867 + * triggering writeback and waiting for the completion of IO and ordered 7868 + * extents. Now after we locked the inode we do it again, because it's 7869 + * possible a new write may have happened in between those two steps. 7870 + */ 7871 + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { 7872 + ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX); 7873 + if (ret) { 7874 + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); 7875 + return ret; 7876 + } 7877 + } 7878 + 7879 + ret = extent_fiemap(btrfs_inode, fieinfo, start, len); 7880 + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); 7881 + 7882 + return ret; 7865 7883 } 7866 7884 7867 7885 static int btrfs_writepages(struct address_space *mapping,
+1 -1
fs/btrfs/ioctl.c
··· 721 721 free_extent_buffer(leaf); 722 722 leaf = NULL; 723 723 724 - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); 724 + new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); 725 725 if (IS_ERR(new_root)) { 726 726 ret = PTR_ERR(new_root); 727 727 btrfs_abort_transaction(trans, ret);
+1 -1
fs/btrfs/transaction.c
··· 1834 1834 } 1835 1835 1836 1836 key.offset = (u64)-1; 1837 - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); 1837 + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); 1838 1838 if (IS_ERR(pending->snap)) { 1839 1839 ret = PTR_ERR(pending->snap); 1840 1840 pending->snap = NULL;