Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

- two regression fixes in clone/dedupe ioctls, the generic check
callback needs to lock extents properly and wait for io to avoid
problems with writeback and relocation

- fix deadlock when using free space tree due to block group creation

- a recently added check refuses a valid fileystem with seeding device,
make that work again with a quickfix, proper solution needs more
intrusive changes

* tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: Use real device structure to verify dev extent
Btrfs: fix deadlock when using free space tree due to block group creation
Btrfs: fix race between reflink/dedupe and relocation
Btrfs: fix race between cloning range ending at eof and writeback

+64 -13
+9 -7
fs/btrfs/ctree.c
··· 1016 1016 parent_start = parent->start; 1017 1017 1018 1018 /* 1019 - * If we are COWing a node/leaf from the extent, chunk or device trees, 1020 - * make sure that we do not finish block group creation of pending block 1021 - * groups. We do this to avoid a deadlock. 1019 + * If we are COWing a node/leaf from the extent, chunk, device or free 1020 + * space trees, make sure that we do not finish block group creation of 1021 + * pending block groups. We do this to avoid a deadlock. 1022 1022 * COWing can result in allocation of a new chunk, and flushing pending 1023 1023 * block groups (btrfs_create_pending_block_groups()) can be triggered 1024 1024 * when finishing allocation of a new chunk. Creation of a pending block 1025 - * group modifies the extent, chunk and device trees, therefore we could 1026 - * deadlock with ourselves since we are holding a lock on an extent 1027 - * buffer that btrfs_create_pending_block_groups() may try to COW later. 1025 + * group modifies the extent, chunk, device and free space trees, 1026 + * therefore we could deadlock with ourselves since we are holding a 1027 + * lock on an extent buffer that btrfs_create_pending_block_groups() may 1028 + * try to COW later. 1028 1029 */ 1029 1030 if (root == fs_info->extent_root || 1030 1031 root == fs_info->chunk_root || 1031 - root == fs_info->dev_root) 1032 + root == fs_info->dev_root || 1033 + root == fs_info->free_space_root) 1032 1034 trans->can_flush_pending_bgs = false; 1033 1035 1034 1036 cow = btrfs_alloc_tree_block(trans, root, parent_start,
+43 -6
fs/btrfs/ioctl.c
··· 3221 3221 inode_lock_nested(inode2, I_MUTEX_CHILD); 3222 3222 } 3223 3223 3224 + static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, 3225 + struct inode *inode2, u64 loff2, u64 len) 3226 + { 3227 + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 3228 + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 3229 + } 3230 + 3231 + static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, 3232 + struct inode *inode2, u64 loff2, u64 len) 3233 + { 3234 + if (inode1 < inode2) { 3235 + swap(inode1, inode2); 3236 + swap(loff1, loff2); 3237 + } else if (inode1 == inode2 && loff2 < loff1) { 3238 + swap(loff1, loff2); 3239 + } 3240 + lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 3241 + lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 3242 + } 3243 + 3224 3244 static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, 3225 3245 struct inode *dst, u64 dst_loff) 3226 3246 { ··· 3262 3242 return -EINVAL; 3263 3243 3264 3244 /* 3265 - * Lock destination range to serialize with concurrent readpages(). 3245 + * Lock destination range to serialize with concurrent readpages() and 3246 + * source range to serialize with relocation. 3266 3247 */ 3267 - lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3248 + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); 3268 3249 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); 3269 - unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3250 + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); 3270 3251 3271 3252 return ret; 3272 3253 } ··· 3926 3905 len = ALIGN(src->i_size, bs) - off; 3927 3906 3928 3907 if (destoff > inode->i_size) { 3908 + const u64 wb_start = ALIGN_DOWN(inode->i_size, bs); 3909 + 3929 3910 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3911 + if (ret) 3912 + return ret; 3913 + /* 3914 + * We may have truncated the last block if the inode's size is 3915 + * not sector size aligned, so we need to wait for writeback to 3916 + * complete before proceeding further, otherwise we can race 3917 + * with cloning and attempt to increment a reference to an 3918 + * extent that no longer exists (writeback completed right after 3919 + * we found the previous extent covering eof and before we 3920 + * attempted to increment its reference count). 3921 + */ 3922 + ret = btrfs_wait_ordered_range(inode, wb_start, 3923 + destoff - wb_start); 3930 3924 if (ret) 3931 3925 return ret; 3932 3926 } 3933 3927 3934 3928 /* 3935 - * Lock destination range to serialize with concurrent readpages(). 3929 + * Lock destination range to serialize with concurrent readpages() and 3930 + * source range to serialize with relocation. 3936 3931 */ 3937 - lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3932 + btrfs_double_extent_lock(src, off, inode, destoff, len); 3938 3933 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3939 - unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3934 + btrfs_double_extent_unlock(src, off, inode, destoff, len); 3940 3935 /* 3941 3936 * Truncate page cache pages so that future reads will see the cloned 3942 3937 * data immediately and not the previous data.
+12
fs/btrfs/volumes.c
··· 7825 7825 ret = -EUCLEAN; 7826 7826 goto out; 7827 7827 } 7828 + 7829 + /* It's possible this device is a dummy for seed device */ 7830 + if (dev->disk_total_bytes == 0) { 7831 + dev = find_device(fs_info->fs_devices->seed, devid, NULL); 7832 + if (!dev) { 7833 + btrfs_err(fs_info, "failed to find seed devid %llu", 7834 + devid); 7835 + ret = -EUCLEAN; 7836 + goto out; 7837 + } 7838 + } 7839 + 7828 7840 if (physical_offset + physical_len > dev->disk_total_bytes) { 7829 7841 btrfs_err(fs_info, 7830 7842 "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",