Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"This is an assortment of fixes. Most of the commits are from Filipe
(fsync, the inode allocation cache and a few others). Mark kicked in
a series fixing corners in the extent sharing ioctls, and everyone
else fixed up on assorted other problems"

* 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: fix wrong check for btrfs_force_chunk_alloc()
Btrfs: fix warning of bytes_may_use
Btrfs: fix hang when failing to submit bio of directIO
Btrfs: fix a comment in inode.c:evict_inode_truncate_pages()
Btrfs: fix memory corruption on failure to submit bio for direct IO
btrfs: don't update mtime/ctime on deduped inodes
btrfs: allow dedupe of same inode
btrfs: fix deadlock with extent-same and readpage
btrfs: pass unaligned length to btrfs_cmp_data()
Btrfs: fix fsync after truncate when no_holes feature is enabled
Btrfs: fix fsync xattr loss in the fast fsync path
Btrfs: fix fsync data loss after append write
Btrfs: fix crash on close_ctree() if cleaner starts new transaction
Btrfs: fix race between caching kthread and returning inode to inode cache
Btrfs: use kmem_cache_free when freeing entry in inode cache
Btrfs: fix race between balance and unused block group deletion
btrfs: add error handling for scrub_workers_get()
btrfs: cleanup noused initialization of dev in btrfs_end_bio()
btrfs: qgroup: allow user to clear the limitation on qgroup

+642 -125
+2
fs/btrfs/btrfs_inode.h
··· 44 44 #define BTRFS_INODE_IN_DELALLOC_LIST 9 45 45 #define BTRFS_INODE_READDIO_NEED_LOCK 10 46 46 #define BTRFS_INODE_HAS_PROPS 11 47 + /* DIO is ready to submit */ 48 + #define BTRFS_INODE_DIO_READY 12 47 49 /* 48 50 * The following 3 bits are meant only for the btree inode. 49 51 * When any of them is set, it means an error happened while writing an
+1
fs/btrfs/ctree.h
··· 1778 1778 spinlock_t unused_bgs_lock; 1779 1779 struct list_head unused_bgs; 1780 1780 struct mutex unused_bg_unpin_mutex; 1781 + struct mutex delete_unused_bgs_mutex; 1781 1782 1782 1783 /* For btrfs to record security options */ 1783 1784 struct security_mnt_opts security_opts;
+40 -1
fs/btrfs/disk-io.c
··· 1751 1751 { 1752 1752 struct btrfs_root *root = arg; 1753 1753 int again; 1754 + struct btrfs_trans_handle *trans; 1754 1755 1755 1756 do { 1756 1757 again = 0; ··· 1773 1772 } 1774 1773 1775 1774 btrfs_run_delayed_iputs(root); 1776 - btrfs_delete_unused_bgs(root->fs_info); 1777 1775 again = btrfs_clean_one_deleted_snapshot(root); 1778 1776 mutex_unlock(&root->fs_info->cleaner_mutex); 1779 1777 ··· 1781 1781 * needn't do anything special here. 1782 1782 */ 1783 1783 btrfs_run_defrag_inodes(root->fs_info); 1784 + 1785 + /* 1786 + * Acquires fs_info->delete_unused_bgs_mutex to avoid racing 1787 + * with relocation (btrfs_relocate_chunk) and relocation 1788 + * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) 1789 + * after acquiring fs_info->delete_unused_bgs_mutex. So we 1790 + * can't hold, nor need to, fs_info->cleaner_mutex when deleting 1791 + * unused block groups. 1792 + */ 1793 + btrfs_delete_unused_bgs(root->fs_info); 1784 1794 sleep: 1785 1795 if (!try_to_freeze() && !again) { 1786 1796 set_current_state(TASK_INTERRUPTIBLE); ··· 1799 1789 __set_current_state(TASK_RUNNING); 1800 1790 } 1801 1791 } while (!kthread_should_stop()); 1792 + 1793 + /* 1794 + * Transaction kthread is stopped before us and wakes us up. 1795 + * However we might have started a new transaction and COWed some 1796 + * tree blocks when deleting unused block groups for example. So 1797 + * make sure we commit the transaction we started to have a clean 1798 + * shutdown when evicting the btree inode - if it has dirty pages 1799 + * when we do the final iput() on it, eviction will trigger a 1800 + * writeback for it which will fail with null pointer dereferences 1801 + * since work queues and other resources were already released and 1802 + * destroyed by the time the iput/eviction/writeback is made. 1803 + */ 1804 + trans = btrfs_attach_transaction(root); 1805 + if (IS_ERR(trans)) { 1806 + if (PTR_ERR(trans) != -ENOENT) 1807 + btrfs_err(root->fs_info, 1808 + "cleaner transaction attach returned %ld", 1809 + PTR_ERR(trans)); 1810 + } else { 1811 + int ret; 1812 + 1813 + ret = btrfs_commit_transaction(trans, root); 1814 + if (ret) 1815 + btrfs_err(root->fs_info, 1816 + "cleaner open transaction commit returned %d", 1817 + ret); 1818 + } 1819 + 1802 1820 return 0; 1803 1821 } 1804 1822 ··· 2530 2492 spin_lock_init(&fs_info->unused_bgs_lock); 2531 2493 rwlock_init(&fs_info->tree_mod_log_lock); 2532 2494 mutex_init(&fs_info->unused_bg_unpin_mutex); 2495 + mutex_init(&fs_info->delete_unused_bgs_mutex); 2533 2496 mutex_init(&fs_info->reloc_mutex); 2534 2497 mutex_init(&fs_info->delalloc_root_mutex); 2535 2498 seqlock_init(&fs_info->profiles_lock);
+3
fs/btrfs/extent-tree.c
··· 9889 9889 } 9890 9890 spin_unlock(&fs_info->unused_bgs_lock); 9891 9891 9892 + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); 9893 + 9892 9894 /* Don't want to race with allocators so take the groups_sem */ 9893 9895 down_write(&space_info->groups_sem); 9894 9896 spin_lock(&block_group->lock); ··· 9985 9983 end_trans: 9986 9984 btrfs_end_transaction(trans, root); 9987 9985 next: 9986 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 9988 9987 btrfs_put_block_group(block_group); 9989 9988 spin_lock(&fs_info->unused_bgs_lock); 9990 9989 }
+12 -5
fs/btrfs/inode-map.c
··· 246 246 { 247 247 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 248 248 struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; 249 + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; 249 250 struct btrfs_free_space *info; 250 251 struct rb_node *n; 251 252 u64 count; ··· 255 254 return; 256 255 257 256 while (1) { 257 + bool add_to_ctl = true; 258 + 259 + spin_lock(rbroot_lock); 258 260 n = rb_first(rbroot); 259 - if (!n) 261 + if (!n) { 262 + spin_unlock(rbroot_lock); 260 263 break; 264 + } 261 265 262 266 info = rb_entry(n, struct btrfs_free_space, offset_index); 263 267 BUG_ON(info->bitmap); /* Logic error */ 264 268 265 269 if (info->offset > root->ino_cache_progress) 266 - goto free; 270 + add_to_ctl = false; 267 271 else if (info->offset + info->bytes > root->ino_cache_progress) 268 272 count = root->ino_cache_progress - info->offset + 1; 269 273 else 270 274 count = info->bytes; 271 275 272 - __btrfs_add_free_space(ctl, info->offset, count); 273 - free: 274 276 rb_erase(&info->offset_index, rbroot); 275 - kfree(info); 277 + spin_unlock(rbroot_lock); 278 + if (add_to_ctl) 279 + __btrfs_add_free_space(ctl, info->offset, count); 280 + kmem_cache_free(btrfs_free_space_cachep, info); 276 281 } 277 282 } 278 283
+63 -26
fs/btrfs/inode.c
··· 4989 4989 /* 4990 4990 * Keep looping until we have no more ranges in the io tree. 4991 4991 * We can have ongoing bios started by readpages (called from readahead) 4992 - * that didn't get their end io callbacks called yet or they are still 4993 - * in progress ((extent_io.c:end_bio_extent_readpage()). This means some 4992 + * that have their endio callback (extent_io.c:end_bio_extent_readpage) 4993 + * still in progress (unlocked the pages in the bio but did not yet 4994 + * unlocked the ranges in the io tree). Therefore this means some 4994 4995 * ranges can still be locked and eviction started because before 4995 4996 * submitting those bios, which are executed by a separate task (work 4996 4997 * queue kthread), inode references (inode->i_count) were not taken ··· 7547 7546 7548 7547 current->journal_info = outstanding_extents; 7549 7548 btrfs_free_reserved_data_space(inode, len); 7549 + set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); 7550 7550 } 7551 7551 7552 7552 /* ··· 7873 7871 struct bio *dio_bio; 7874 7872 int ret; 7875 7873 7876 - if (err) 7877 - goto out_done; 7878 7874 again: 7879 7875 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 7880 7876 &ordered_offset, ··· 7895 7895 ordered = NULL; 7896 7896 goto again; 7897 7897 } 7898 - out_done: 7899 7898 dio_bio = dip->dio_bio; 7900 7899 7901 7900 kfree(dip); ··· 8162 8163 static void btrfs_submit_direct(int rw, struct bio *dio_bio, 8163 8164 struct inode *inode, loff_t file_offset) 8164 8165 { 8165 - struct btrfs_root *root = BTRFS_I(inode)->root; 8166 - struct btrfs_dio_private *dip; 8167 - struct bio *io_bio; 8166 + struct btrfs_dio_private *dip = NULL; 8167 + struct bio *io_bio = NULL; 8168 8168 struct btrfs_io_bio *btrfs_bio; 8169 8169 int skip_sum; 8170 8170 int write = rw & REQ_WRITE; ··· 8180 8182 dip = kzalloc(sizeof(*dip), GFP_NOFS); 8181 8183 if (!dip) { 8182 8184 ret = -ENOMEM; 8183 - goto free_io_bio; 8185 + goto free_ordered; 8184 8186 } 8185 8187 8186 8188 dip->private = dio_bio->bi_private; ··· 8208 8210 8209 8211 if (btrfs_bio->end_io) 8210 8212 btrfs_bio->end_io(btrfs_bio, ret); 8211 - free_io_bio: 8212 - bio_put(io_bio); 8213 8213 8214 8214 free_ordered: 8215 8215 /* 8216 - * If this is a write, we need to clean up the reserved space and kill 8217 - * the ordered extent. 8216 + * If we arrived here it means either we failed to submit the dip 8217 + * or we either failed to clone the dio_bio or failed to allocate the 8218 + * dip. If we cloned the dio_bio and allocated the dip, we can just 8219 + * call bio_endio against our io_bio so that we get proper resource 8220 + * cleanup if we fail to submit the dip, otherwise, we must do the 8221 + * same as btrfs_endio_direct_[write|read] because we can't call these 8222 + * callbacks - they require an allocated dip and a clone of dio_bio. 8218 8223 */ 8219 - if (write) { 8220 - struct btrfs_ordered_extent *ordered; 8221 - ordered = btrfs_lookup_ordered_extent(inode, file_offset); 8222 - if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && 8223 - !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 8224 - btrfs_free_reserved_extent(root, ordered->start, 8225 - ordered->disk_len, 1); 8226 - btrfs_put_ordered_extent(ordered); 8227 - btrfs_put_ordered_extent(ordered); 8224 + if (io_bio && dip) { 8225 + bio_endio(io_bio, ret); 8226 + /* 8227 + * The end io callbacks free our dip, do the final put on io_bio 8228 + * and all the cleanup and final put for dio_bio (through 8229 + * dio_end_io()). 8230 + */ 8231 + dip = NULL; 8232 + io_bio = NULL; 8233 + } else { 8234 + if (write) { 8235 + struct btrfs_ordered_extent *ordered; 8236 + 8237 + ordered = btrfs_lookup_ordered_extent(inode, 8238 + file_offset); 8239 + set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); 8240 + /* 8241 + * Decrements our ref on the ordered extent and removes 8242 + * the ordered extent from the inode's ordered tree, 8243 + * doing all the proper resource cleanup such as for the 8244 + * reserved space and waking up any waiters for this 8245 + * ordered extent (through btrfs_remove_ordered_extent). 8246 + */ 8247 + btrfs_finish_ordered_io(ordered); 8248 + } else { 8249 + unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, 8250 + file_offset + dio_bio->bi_iter.bi_size - 1); 8251 + } 8252 + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); 8253 + /* 8254 + * Releases and cleans up our dio_bio, no need to bio_put() 8255 + * nor bio_endio()/bio_io_error() against dio_bio. 8256 + */ 8257 + dio_end_io(dio_bio, ret); 8228 8258 } 8229 - bio_endio(dio_bio, ret); 8259 + if (io_bio) 8260 + bio_put(io_bio); 8261 + kfree(dip); 8230 8262 } 8231 8263 8232 8264 static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, ··· 8358 8330 btrfs_submit_direct, flags); 8359 8331 if (iov_iter_rw(iter) == WRITE) { 8360 8332 current->journal_info = NULL; 8361 - if (ret < 0 && ret != -EIOCBQUEUED) 8362 - btrfs_delalloc_release_space(inode, count); 8363 - else if (ret >= 0 && (size_t)ret < count) 8333 + if (ret < 0 && ret != -EIOCBQUEUED) { 8334 + /* 8335 + * If the error comes from submitting stage, 8336 + * btrfs_get_blocsk_direct() has free'd data space, 8337 + * and metadata space will be handled by 8338 + * finish_ordered_fn, don't do that again to make 8339 + * sure bytes_may_use is correct. 8340 + */ 8341 + if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, 8342 + &BTRFS_I(inode)->runtime_flags)) 8343 + btrfs_delalloc_release_space(inode, count); 8344 + } else if (ret >= 0 && (size_t)ret < count) 8364 8345 btrfs_delalloc_release_space(inode, 8365 8346 count - (size_t)ret); 8366 8347 }
+189 -54
fs/btrfs/ioctl.c
··· 87 87 88 88 89 89 static int btrfs_clone(struct inode *src, struct inode *inode, 90 - u64 off, u64 olen, u64 olen_aligned, u64 destoff); 90 + u64 off, u64 olen, u64 olen_aligned, u64 destoff, 91 + int no_time_update); 91 92 92 93 /* Mask out flags that are inappropriate for the given type of inode. */ 93 94 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) ··· 2766 2765 return ret; 2767 2766 } 2768 2767 2769 - static struct page *extent_same_get_page(struct inode *inode, u64 off) 2768 + static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) 2770 2769 { 2771 2770 struct page *page; 2772 - pgoff_t index; 2773 2771 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2774 - 2775 - index = off >> PAGE_CACHE_SHIFT; 2776 2772 2777 2773 page = grab_cache_page(inode->i_mapping, index); 2778 2774 if (!page) ··· 2789 2791 unlock_page(page); 2790 2792 2791 2793 return page; 2794 + } 2795 + 2796 + static int gather_extent_pages(struct inode *inode, struct page **pages, 2797 + int num_pages, u64 off) 2798 + { 2799 + int i; 2800 + pgoff_t index = off >> PAGE_CACHE_SHIFT; 2801 + 2802 + for (i = 0; i < num_pages; i++) { 2803 + pages[i] = extent_same_get_page(inode, index + i); 2804 + if (!pages[i]) 2805 + return -ENOMEM; 2806 + } 2807 + return 0; 2792 2808 } 2793 2809 2794 2810 static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) ··· 2830 2818 } 2831 2819 } 2832 2820 2833 - static void btrfs_double_unlock(struct inode *inode1, u64 loff1, 2834 - struct inode *inode2, u64 loff2, u64 len) 2821 + static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) 2835 2822 { 2836 - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 2837 - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 2838 - 2839 2823 mutex_unlock(&inode1->i_mutex); 2840 2824 mutex_unlock(&inode2->i_mutex); 2841 2825 } 2842 2826 2843 - static void btrfs_double_lock(struct inode *inode1, u64 loff1, 2844 - struct inode *inode2, u64 loff2, u64 len) 2827 + static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) 2828 + { 2829 + if (inode1 < inode2) 2830 + swap(inode1, inode2); 2831 + 2832 + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 2833 + if (inode1 != inode2) 2834 + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 2835 + } 2836 + 2837 + static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, 2838 + struct inode *inode2, u64 loff2, u64 len) 2839 + { 2840 + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 2841 + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 2842 + } 2843 + 2844 + static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, 2845 + struct inode *inode2, u64 loff2, u64 len) 2845 2846 { 2846 2847 if (inode1 < inode2) { 2847 2848 swap(inode1, inode2); 2848 2849 swap(loff1, loff2); 2849 2850 } 2850 - 2851 - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 2852 2851 lock_extent_range(inode1, loff1, len); 2853 - if (inode1 != inode2) { 2854 - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 2852 + if (inode1 != inode2) 2855 2853 lock_extent_range(inode2, loff2, len); 2854 + } 2855 + 2856 + struct cmp_pages { 2857 + int num_pages; 2858 + struct page **src_pages; 2859 + struct page **dst_pages; 2860 + }; 2861 + 2862 + static void btrfs_cmp_data_free(struct cmp_pages *cmp) 2863 + { 2864 + int i; 2865 + struct page *pg; 2866 + 2867 + for (i = 0; i < cmp->num_pages; i++) { 2868 + pg = cmp->src_pages[i]; 2869 + if (pg) 2870 + page_cache_release(pg); 2871 + pg = cmp->dst_pages[i]; 2872 + if (pg) 2873 + page_cache_release(pg); 2856 2874 } 2875 + kfree(cmp->src_pages); 2876 + kfree(cmp->dst_pages); 2877 + } 2878 + 2879 + static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, 2880 + struct inode *dst, u64 dst_loff, 2881 + u64 len, struct cmp_pages *cmp) 2882 + { 2883 + int ret; 2884 + int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; 2885 + struct page **src_pgarr, **dst_pgarr; 2886 + 2887 + /* 2888 + * We must gather up all the pages before we initiate our 2889 + * extent locking. We use an array for the page pointers. Size 2890 + * of the array is bounded by len, which is in turn bounded by 2891 + * BTRFS_MAX_DEDUPE_LEN. 2892 + */ 2893 + src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); 2894 + dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); 2895 + if (!src_pgarr || !dst_pgarr) { 2896 + kfree(src_pgarr); 2897 + kfree(dst_pgarr); 2898 + return -ENOMEM; 2899 + } 2900 + cmp->num_pages = num_pages; 2901 + cmp->src_pages = src_pgarr; 2902 + cmp->dst_pages = dst_pgarr; 2903 + 2904 + ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); 2905 + if (ret) 2906 + goto out; 2907 + 2908 + ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); 2909 + 2910 + out: 2911 + if (ret) 2912 + btrfs_cmp_data_free(cmp); 2913 + return 0; 2857 2914 } 2858 2915 2859 2916 static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, 2860 - u64 dst_loff, u64 len) 2917 + u64 dst_loff, u64 len, struct cmp_pages *cmp) 2861 2918 { 2862 2919 int ret = 0; 2920 + int i; 2863 2921 struct page *src_page, *dst_page; 2864 2922 unsigned int cmp_len = PAGE_CACHE_SIZE; 2865 2923 void *addr, *dst_addr; 2866 2924 2925 + i = 0; 2867 2926 while (len) { 2868 2927 if (len < PAGE_CACHE_SIZE) 2869 2928 cmp_len = len; 2870 2929 2871 - src_page = extent_same_get_page(src, loff); 2872 - if (!src_page) 2873 - return -EINVAL; 2874 - dst_page = extent_same_get_page(dst, dst_loff); 2875 - if (!dst_page) { 2876 - page_cache_release(src_page); 2877 - return -EINVAL; 2878 - } 2930 + BUG_ON(i >= cmp->num_pages); 2931 + 2932 + src_page = cmp->src_pages[i]; 2933 + dst_page = cmp->dst_pages[i]; 2934 + 2879 2935 addr = kmap_atomic(src_page); 2880 2936 dst_addr = kmap_atomic(dst_page); 2881 2937 ··· 2955 2875 2956 2876 kunmap_atomic(addr); 2957 2877 kunmap_atomic(dst_addr); 2958 - page_cache_release(src_page); 2959 - page_cache_release(dst_page); 2960 2878 2961 2879 if (ret) 2962 2880 break; 2963 2881 2964 - loff += cmp_len; 2965 - dst_loff += cmp_len; 2966 2882 len -= cmp_len; 2883 + i++; 2967 2884 } 2968 2885 2969 2886 return ret; ··· 2991 2914 { 2992 2915 int ret; 2993 2916 u64 len = olen; 2917 + struct cmp_pages cmp; 2918 + int same_inode = 0; 2919 + u64 same_lock_start = 0; 2920 + u64 same_lock_len = 0; 2994 2921 2995 - /* 2996 - * btrfs_clone() can't handle extents in the same file 2997 - * yet. Once that works, we can drop this check and replace it 2998 - * with a check for the same inode, but overlapping extents. 2999 - */ 3000 2922 if (src == dst) 3001 - return -EINVAL; 2923 + same_inode = 1; 3002 2924 3003 2925 if (len == 0) 3004 2926 return 0; 3005 2927 3006 - btrfs_double_lock(src, loff, dst, dst_loff, len); 2928 + if (same_inode) { 2929 + mutex_lock(&src->i_mutex); 3007 2930 3008 - ret = extent_same_check_offsets(src, loff, &len, olen); 3009 - if (ret) 3010 - goto out_unlock; 2931 + ret = extent_same_check_offsets(src, loff, &len, olen); 2932 + if (ret) 2933 + goto out_unlock; 3011 2934 3012 - ret = extent_same_check_offsets(dst, dst_loff, &len, olen); 3013 - if (ret) 3014 - goto out_unlock; 2935 + /* 2936 + * Single inode case wants the same checks, except we 2937 + * don't want our length pushed out past i_size as 2938 + * comparing that data range makes no sense. 2939 + * 2940 + * extent_same_check_offsets() will do this for an 2941 + * unaligned length at i_size, so catch it here and 2942 + * reject the request. 2943 + * 2944 + * This effectively means we require aligned extents 2945 + * for the single-inode case, whereas the other cases 2946 + * allow an unaligned length so long as it ends at 2947 + * i_size. 2948 + */ 2949 + if (len != olen) { 2950 + ret = -EINVAL; 2951 + goto out_unlock; 2952 + } 2953 + 2954 + /* Check for overlapping ranges */ 2955 + if (dst_loff + len > loff && dst_loff < loff + len) { 2956 + ret = -EINVAL; 2957 + goto out_unlock; 2958 + } 2959 + 2960 + same_lock_start = min_t(u64, loff, dst_loff); 2961 + same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; 2962 + } else { 2963 + btrfs_double_inode_lock(src, dst); 2964 + 2965 + ret = extent_same_check_offsets(src, loff, &len, olen); 2966 + if (ret) 2967 + goto out_unlock; 2968 + 2969 + ret = extent_same_check_offsets(dst, dst_loff, &len, olen); 2970 + if (ret) 2971 + goto out_unlock; 2972 + } 3015 2973 3016 2974 /* don't make the dst file partly checksummed */ 3017 2975 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != ··· 3055 2943 goto out_unlock; 3056 2944 } 3057 2945 3058 - ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); 3059 - if (ret == 0) 3060 - ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); 2946 + ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); 2947 + if (ret) 2948 + goto out_unlock; 3061 2949 2950 + if (same_inode) 2951 + lock_extent_range(src, same_lock_start, same_lock_len); 2952 + else 2953 + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); 2954 + 2955 + /* pass original length for comparison so we stay within i_size */ 2956 + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); 2957 + if (ret == 0) 2958 + ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); 2959 + 2960 + if (same_inode) 2961 + unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, 2962 + same_lock_start + same_lock_len - 1); 2963 + else 2964 + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); 2965 + 2966 + btrfs_cmp_data_free(&cmp); 3062 2967 out_unlock: 3063 - btrfs_double_unlock(src, loff, dst, dst_loff, len); 2968 + if (same_inode) 2969 + mutex_unlock(&src->i_mutex); 2970 + else 2971 + btrfs_double_inode_unlock(src, dst); 3064 2972 3065 2973 return ret; 3066 2974 } ··· 3232 3100 struct inode *inode, 3233 3101 u64 endoff, 3234 3102 const u64 destoff, 3235 - const u64 olen) 3103 + const u64 olen, 3104 + int no_time_update) 3236 3105 { 3237 3106 struct btrfs_root *root = BTRFS_I(inode)->root; 3238 3107 int ret; 3239 3108 3240 3109 inode_inc_iversion(inode); 3241 - inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3110 + if (!no_time_update) 3111 + inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3242 3112 /* 3243 3113 * We round up to the block size at eof when determining which 3244 3114 * extents to clone above, but shouldn't round up the file size. ··· 3325 3191 * @inode: Inode to clone to 3326 3192 * @off: Offset within source to start clone from 3327 3193 * @olen: Original length, passed by user, of range to clone 3328 - * @olen_aligned: Block-aligned value of olen, extent_same uses 3329 - * identical values here 3194 + * @olen_aligned: Block-aligned value of olen 3330 3195 * @destoff: Offset within @inode to start clone 3196 + * @no_time_update: Whether to update mtime/ctime on the target inode 3331 3197 */ 3332 3198 static int btrfs_clone(struct inode *src, struct inode *inode, 3333 3199 const u64 off, const u64 olen, const u64 olen_aligned, 3334 - const u64 destoff) 3200 + const u64 destoff, int no_time_update) 3335 3201 { 3336 3202 struct btrfs_root *root = BTRFS_I(inode)->root; 3337 3203 struct btrfs_path *path = NULL; ··· 3655 3521 root->sectorsize); 3656 3522 ret = clone_finish_inode_update(trans, inode, 3657 3523 last_dest_end, 3658 - destoff, olen); 3524 + destoff, olen, 3525 + no_time_update); 3659 3526 if (ret) 3660 3527 goto out; 3661 3528 if (new_key.offset + datal >= destoff + len) ··· 3694 3559 clone_update_extent_map(inode, trans, NULL, last_dest_end, 3695 3560 destoff + len - last_dest_end); 3696 3561 ret = clone_finish_inode_update(trans, inode, destoff + len, 3697 - destoff, olen); 3562 + destoff, olen, no_time_update); 3698 3563 } 3699 3564 3700 3565 out: ··· 3831 3696 lock_extent_range(inode, destoff, len); 3832 3697 } 3833 3698 3834 - ret = btrfs_clone(src, inode, off, olen, len, destoff); 3699 + ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3835 3700 3836 3701 if (same_inode) { 3837 3702 u64 lock_start = min_t(u64, off, destoff);
+5
fs/btrfs/ordered-data.c
··· 552 552 trace_btrfs_ordered_extent_put(entry->inode, entry); 553 553 554 554 if (atomic_dec_and_test(&entry->refs)) { 555 + ASSERT(list_empty(&entry->log_list)); 556 + ASSERT(list_empty(&entry->trans_list)); 557 + ASSERT(list_empty(&entry->root_extent_list)); 558 + ASSERT(RB_EMPTY_NODE(&entry->rb_node)); 555 559 if (entry->inode) 556 560 btrfs_add_delayed_iput(entry->inode); 557 561 while (!list_empty(&entry->list)) { ··· 583 579 spin_lock_irq(&tree->lock); 584 580 node = &entry->rb_node; 585 581 rb_erase(node, &tree->tree); 582 + RB_CLEAR_NODE(node); 586 583 if (tree->last == node) 587 584 tree->last = NULL; 588 585 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
+41 -8
fs/btrfs/qgroup.c
··· 1349 1349 struct btrfs_root *quota_root; 1350 1350 struct btrfs_qgroup *qgroup; 1351 1351 int ret = 0; 1352 + /* Sometimes we would want to clear the limit on this qgroup. 1353 + * To meet this requirement, we treat the -1 as a special value 1354 + * which tell kernel to clear the limit on this qgroup. 1355 + */ 1356 + const u64 CLEAR_VALUE = -1; 1352 1357 1353 1358 mutex_lock(&fs_info->qgroup_ioctl_lock); 1354 1359 quota_root = fs_info->quota_root; ··· 1369 1364 } 1370 1365 1371 1366 spin_lock(&fs_info->qgroup_lock); 1372 - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) 1373 - qgroup->max_rfer = limit->max_rfer; 1374 - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) 1375 - qgroup->max_excl = limit->max_excl; 1376 - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) 1377 - qgroup->rsv_rfer = limit->rsv_rfer; 1378 - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) 1379 - qgroup->rsv_excl = limit->rsv_excl; 1367 + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1368 + if (limit->max_rfer == CLEAR_VALUE) { 1369 + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1370 + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1371 + qgroup->max_rfer = 0; 1372 + } else { 1373 + qgroup->max_rfer = limit->max_rfer; 1374 + } 1375 + } 1376 + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1377 + if (limit->max_excl == CLEAR_VALUE) { 1378 + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1379 + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1380 + qgroup->max_excl = 0; 1381 + } else { 1382 + qgroup->max_excl = limit->max_excl; 1383 + } 1384 + } 1385 + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1386 + if (limit->rsv_rfer == CLEAR_VALUE) { 1387 + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1388 + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1389 + qgroup->rsv_rfer = 0; 1390 + } else { 1391 + qgroup->rsv_rfer = limit->rsv_rfer; 1392 + } 1393 + } 1394 + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1395 + if (limit->rsv_excl == CLEAR_VALUE) { 1396 + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1397 + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1398 + qgroup->rsv_excl = 0; 1399 + } else { 1400 + qgroup->rsv_excl = limit->rsv_excl; 1401 + } 1402 + } 1380 1403 qgroup->lim_flags |= limit->flags; 1381 1404 1382 1405 spin_unlock(&fs_info->qgroup_lock);
+1 -1
fs/btrfs/relocation.c
··· 4049 4049 if (trans && progress && err == -ENOSPC) { 4050 4050 ret = btrfs_force_chunk_alloc(trans, rc->extent_root, 4051 4051 rc->block_group->flags); 4052 - if (ret == 0) { 4052 + if (ret == 1) { 4053 4053 err = 0; 4054 4054 progress = 0; 4055 4055 goto restart;
+20 -19
fs/btrfs/scrub.c
··· 3571 3571 static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, 3572 3572 int is_dev_replace) 3573 3573 { 3574 - int ret = 0; 3575 3574 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; 3576 3575 int max_active = fs_info->thread_pool_size; 3577 3576 ··· 3583 3584 fs_info->scrub_workers = 3584 3585 btrfs_alloc_workqueue("btrfs-scrub", flags, 3585 3586 max_active, 4); 3586 - if (!fs_info->scrub_workers) { 3587 - ret = -ENOMEM; 3588 - goto out; 3589 - } 3587 + if (!fs_info->scrub_workers) 3588 + goto fail_scrub_workers; 3589 + 3590 3590 fs_info->scrub_wr_completion_workers = 3591 3591 btrfs_alloc_workqueue("btrfs-scrubwrc", flags, 3592 3592 max_active, 2); 3593 - if (!fs_info->scrub_wr_completion_workers) { 3594 - ret = -ENOMEM; 3595 - goto out; 3596 - } 3593 + if (!fs_info->scrub_wr_completion_workers) 3594 + goto fail_scrub_wr_completion_workers; 3595 + 3597 3596 fs_info->scrub_nocow_workers = 3598 3597 btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); 3599 - if (!fs_info->scrub_nocow_workers) { 3600 - ret = -ENOMEM; 3601 - goto out; 3602 - } 3598 + if (!fs_info->scrub_nocow_workers) 3599 + goto fail_scrub_nocow_workers; 3603 3600 fs_info->scrub_parity_workers = 3604 3601 btrfs_alloc_workqueue("btrfs-scrubparity", flags, 3605 3602 max_active, 2); 3606 - if (!fs_info->scrub_parity_workers) { 3607 - ret = -ENOMEM; 3608 - goto out; 3609 - } 3603 + if (!fs_info->scrub_parity_workers) 3604 + goto fail_scrub_parity_workers; 3610 3605 } 3611 3606 ++fs_info->scrub_workers_refcnt; 3612 - out: 3613 - return ret; 3607 + return 0; 3608 + 3609 + fail_scrub_parity_workers: 3610 + btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); 3611 + fail_scrub_nocow_workers: 3612 + btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); 3613 + fail_scrub_wr_completion_workers: 3614 + btrfs_destroy_workqueue(fs_info->scrub_workers); 3615 + fail_scrub_workers: 3616 + return -ENOMEM; 3614 3617 } 3615 3618 3616 3619 static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
+221 -5
fs/btrfs/tree-log.c
··· 4117 4117 return 0; 4118 4118 } 4119 4119 4120 + /* 4121 + * At the moment we always log all xattrs. This is to figure out at log replay 4122 + * time which xattrs must have their deletion replayed. If a xattr is missing 4123 + * in the log tree and exists in the fs/subvol tree, we delete it. This is 4124 + * because if a xattr is deleted, the inode is fsynced and a power failure 4125 + * happens, causing the log to be replayed the next time the fs is mounted, 4126 + * we want the xattr to not exist anymore (same behaviour as other filesystems 4127 + * with a journal, ext3/4, xfs, f2fs, etc). 4128 + */ 4129 + static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, 4130 + struct btrfs_root *root, 4131 + struct inode *inode, 4132 + struct btrfs_path *path, 4133 + struct btrfs_path *dst_path) 4134 + { 4135 + int ret; 4136 + struct btrfs_key key; 4137 + const u64 ino = btrfs_ino(inode); 4138 + int ins_nr = 0; 4139 + int start_slot = 0; 4140 + 4141 + key.objectid = ino; 4142 + key.type = BTRFS_XATTR_ITEM_KEY; 4143 + key.offset = 0; 4144 + 4145 + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4146 + if (ret < 0) 4147 + return ret; 4148 + 4149 + while (true) { 4150 + int slot = path->slots[0]; 4151 + struct extent_buffer *leaf = path->nodes[0]; 4152 + int nritems = btrfs_header_nritems(leaf); 4153 + 4154 + if (slot >= nritems) { 4155 + if (ins_nr > 0) { 4156 + u64 last_extent = 0; 4157 + 4158 + ret = copy_items(trans, inode, dst_path, path, 4159 + &last_extent, start_slot, 4160 + ins_nr, 1, 0); 4161 + /* can't be 1, extent items aren't processed */ 4162 + ASSERT(ret <= 0); 4163 + if (ret < 0) 4164 + return ret; 4165 + ins_nr = 0; 4166 + } 4167 + ret = btrfs_next_leaf(root, path); 4168 + if (ret < 0) 4169 + return ret; 4170 + else if (ret > 0) 4171 + break; 4172 + continue; 4173 + } 4174 + 4175 + btrfs_item_key_to_cpu(leaf, &key, slot); 4176 + if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) 4177 + break; 4178 + 4179 + if (ins_nr == 0) 4180 + start_slot = slot; 4181 + ins_nr++; 4182 + path->slots[0]++; 4183 + cond_resched(); 4184 + } 4185 + if (ins_nr > 0) { 4186 + u64 last_extent = 0; 4187 + 4188 + ret = copy_items(trans, inode, dst_path, path, 4189 + &last_extent, start_slot, 4190 + ins_nr, 1, 0); 4191 + /* can't be 1, extent items aren't processed */ 4192 + ASSERT(ret <= 0); 4193 + if (ret < 0) 4194 + return ret; 4195 + } 4196 + 4197 + return 0; 4198 + } 4199 + 4200 + /* 4201 + * If the no holes feature is enabled we need to make sure any hole between the 4202 + * last extent and the i_size of our inode is explicitly marked in the log. This 4203 + * is to make sure that doing something like: 4204 + * 4205 + * 1) create file with 128Kb of data 4206 + * 2) truncate file to 64Kb 4207 + * 3) truncate file to 256Kb 4208 + * 4) fsync file 4209 + * 5) <crash/power failure> 4210 + * 6) mount fs and trigger log replay 4211 + * 4212 + * Will give us a file with a size of 256Kb, the first 64Kb of data match what 4213 + * the file had in its first 64Kb of data at step 1 and the last 192Kb of the 4214 + * file correspond to a hole. The presence of explicit holes in a log tree is 4215 + * what guarantees that log replay will remove/adjust file extent items in the 4216 + * fs/subvol tree. 4217 + * 4218 + * Here we do not need to care about holes between extents, that is already done 4219 + * by copy_items(). We also only need to do this in the full sync path, where we 4220 + * lookup for extents from the fs/subvol tree only. In the fast path case, we 4221 + * lookup the list of modified extent maps and if any represents a hole, we 4222 + * insert a corresponding extent representing a hole in the log tree. 4223 + */ 4224 + static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, 4225 + struct btrfs_root *root, 4226 + struct inode *inode, 4227 + struct btrfs_path *path) 4228 + { 4229 + int ret; 4230 + struct btrfs_key key; 4231 + u64 hole_start; 4232 + u64 hole_size; 4233 + struct extent_buffer *leaf; 4234 + struct btrfs_root *log = root->log_root; 4235 + const u64 ino = btrfs_ino(inode); 4236 + const u64 i_size = i_size_read(inode); 4237 + 4238 + if (!btrfs_fs_incompat(root->fs_info, NO_HOLES)) 4239 + return 0; 4240 + 4241 + key.objectid = ino; 4242 + key.type = BTRFS_EXTENT_DATA_KEY; 4243 + key.offset = (u64)-1; 4244 + 4245 + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4246 + ASSERT(ret != 0); 4247 + if (ret < 0) 4248 + return ret; 4249 + 4250 + ASSERT(path->slots[0] > 0); 4251 + path->slots[0]--; 4252 + leaf = path->nodes[0]; 4253 + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4254 + 4255 + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { 4256 + /* inode does not have any extents */ 4257 + hole_start = 0; 4258 + hole_size = i_size; 4259 + } else { 4260 + struct btrfs_file_extent_item *extent; 4261 + u64 len; 4262 + 4263 + /* 4264 + * If there's an extent beyond i_size, an explicit hole was 4265 + * already inserted by copy_items(). 4266 + */ 4267 + if (key.offset >= i_size) 4268 + return 0; 4269 + 4270 + extent = btrfs_item_ptr(leaf, path->slots[0], 4271 + struct btrfs_file_extent_item); 4272 + 4273 + if (btrfs_file_extent_type(leaf, extent) == 4274 + BTRFS_FILE_EXTENT_INLINE) { 4275 + len = btrfs_file_extent_inline_len(leaf, 4276 + path->slots[0], 4277 + extent); 4278 + ASSERT(len == i_size); 4279 + return 0; 4280 + } 4281 + 4282 + len = btrfs_file_extent_num_bytes(leaf, extent); 4283 + /* Last extent goes beyond i_size, no need to log a hole. */ 4284 + if (key.offset + len > i_size) 4285 + return 0; 4286 + hole_start = key.offset + len; 4287 + hole_size = i_size - hole_start; 4288 + } 4289 + btrfs_release_path(path); 4290 + 4291 + /* Last extent ends at i_size. */ 4292 + if (hole_size == 0) 4293 + return 0; 4294 + 4295 + hole_size = ALIGN(hole_size, root->sectorsize); 4296 + ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, 4297 + hole_size, 0, hole_size, 0, 0, 0); 4298 + return ret; 4299 + } 4300 + 4120 4301 /* log a single inode in the tree log. 4121 4302 * At least one parent directory for this inode must exist in the tree 4122 4303 * or be logged already. ··· 4336 4155 u64 ino = btrfs_ino(inode); 4337 4156 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4338 4157 u64 logged_isize = 0; 4158 + bool need_log_inode_item = true; 4339 4159 4340 4160 path = btrfs_alloc_path(); 4341 4161 if (!path) ··· 4445 4263 } else { 4446 4264 if (inode_only == LOG_INODE_ALL) 4447 4265 fast_search = true; 4448 - ret = log_inode_item(trans, log, dst_path, inode); 4449 - if (ret) { 4450 - err = ret; 4451 - goto out_unlock; 4452 - } 4453 4266 goto log_extents; 4454 4267 } 4455 4268 ··· 4466 4289 break; 4467 4290 if (min_key.type > max_key.type) 4468 4291 break; 4292 + 4293 + if (min_key.type == BTRFS_INODE_ITEM_KEY) 4294 + need_log_inode_item = false; 4295 + 4296 + /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ 4297 + if (min_key.type == BTRFS_XATTR_ITEM_KEY) { 4298 + if (ins_nr == 0) 4299 + goto next_slot; 4300 + ret = copy_items(trans, inode, dst_path, path, 4301 + &last_extent, ins_start_slot, 4302 + ins_nr, inode_only, logged_isize); 4303 + if (ret < 0) { 4304 + err = ret; 4305 + goto out_unlock; 4306 + } 4307 + ins_nr = 0; 4308 + if (ret) { 4309 + btrfs_release_path(path); 4310 + continue; 4311 + } 4312 + goto next_slot; 4313 + } 4469 4314 4470 4315 src = path->nodes[0]; 4471 4316 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ··· 4556 4357 ins_nr = 0; 4557 4358 } 4558 4359 4360 + btrfs_release_path(path); 4361 + btrfs_release_path(dst_path); 4362 + err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); 4363 + if (err) 4364 + goto out_unlock; 4365 + if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { 4366 + btrfs_release_path(path); 4367 + btrfs_release_path(dst_path); 4368 + err = btrfs_log_trailing_hole(trans, root, inode, path); 4369 + if (err) 4370 + goto out_unlock; 4371 + } 4559 4372 log_extents: 4560 4373 btrfs_release_path(path); 4561 4374 btrfs_release_path(dst_path); 4375 + if (need_log_inode_item) { 4376 + err = log_inode_item(trans, log, dst_path, inode); 4377 + if (err) 4378 + goto out_unlock; 4379 + } 4562 4380 if (fast_search) { 4563 4381 /* 4564 4382 * Some ordered extents started by fsync might have completed
+44 -6
fs/btrfs/volumes.c
··· 2766 2766 root = root->fs_info->chunk_root; 2767 2767 extent_root = root->fs_info->extent_root; 2768 2768 2769 + /* 2770 + * Prevent races with automatic removal of unused block groups. 2771 + * After we relocate and before we remove the chunk with offset 2772 + * chunk_offset, automatic removal of the block group can kick in, 2773 + * resulting in a failure when calling btrfs_remove_chunk() below. 2774 + * 2775 + * Make sure to acquire this mutex before doing a tree search (dev 2776 + * or chunk trees) to find chunks. Otherwise the cleaner kthread might 2777 + * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after 2778 + * we release the path used to search the chunk/dev tree and before 2779 + * the current task acquires this mutex and calls us. 2780 + */ 2781 + ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex)); 2782 + 2769 2783 ret = btrfs_can_relocate(extent_root, chunk_offset); 2770 2784 if (ret) 2771 2785 return -ENOSPC; ··· 2828 2814 key.type = BTRFS_CHUNK_ITEM_KEY; 2829 2815 2830 2816 while (1) { 2817 + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); 2831 2818 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 2832 - if (ret < 0) 2819 + if (ret < 0) { 2820 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 2833 2821 goto error; 2822 + } 2834 2823 BUG_ON(ret == 0); /* Corruption */ 2835 2824 2836 2825 ret = btrfs_previous_item(chunk_root, path, key.objectid, 2837 2826 key.type); 2827 + if (ret) 2828 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 2838 2829 if (ret < 0) 2839 2830 goto error; 2840 2831 if (ret > 0) ··· 2862 2843 else 2863 2844 BUG_ON(ret); 2864 2845 } 2846 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 2865 2847 2866 2848 if (found_key.offset == 0) 2867 2849 break; ··· 3319 3299 goto error; 3320 3300 } 3321 3301 3302 + mutex_lock(&fs_info->delete_unused_bgs_mutex); 3322 3303 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 3323 - if (ret < 0) 3304 + if (ret < 0) { 3305 + mutex_unlock(&fs_info->delete_unused_bgs_mutex); 3324 3306 goto error; 3307 + } 3325 3308 3326 3309 /* 3327 3310 * this shouldn't happen, it means the last relocate ··· 3336 3313 ret = btrfs_previous_item(chunk_root, path, 0, 3337 3314 BTRFS_CHUNK_ITEM_KEY); 3338 3315 if (ret) { 3316 + mutex_unlock(&fs_info->delete_unused_bgs_mutex); 3339 3317 ret = 0; 3340 3318 break; 3341 3319 } ··· 3345 3321 slot = path->slots[0]; 3346 3322 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3347 3323 3348 - if (found_key.objectid != key.objectid) 3324 + if (found_key.objectid != key.objectid) { 3325 + mutex_unlock(&fs_info->delete_unused_bgs_mutex); 3349 3326 break; 3327 + } 3350 3328 3351 3329 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 3352 3330 ··· 3361 3335 ret = should_balance_chunk(chunk_root, leaf, chunk, 3362 3336 found_key.offset); 3363 3337 btrfs_release_path(path); 3364 - if (!ret) 3338 + if (!ret) { 3339 + mutex_unlock(&fs_info->delete_unused_bgs_mutex); 3365 3340 goto loop; 3341 + } 3366 3342 3367 3343 if (counting) { 3344 + mutex_unlock(&fs_info->delete_unused_bgs_mutex); 3368 3345 spin_lock(&fs_info->balance_lock); 3369 3346 bctl->stat.expected++; 3370 3347 spin_unlock(&fs_info->balance_lock); ··· 3377 3348 ret = btrfs_relocate_chunk(chunk_root, 3378 3349 found_key.objectid, 3379 3350 found_key.offset); 3351 + mutex_unlock(&fs_info->delete_unused_bgs_mutex); 3380 3352 if (ret && ret != -ENOSPC) 3381 3353 goto error; 3382 3354 if (ret == -ENOSPC) { ··· 4117 4087 key.type = BTRFS_DEV_EXTENT_KEY; 4118 4088 4119 4089 do { 4090 + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); 4120 4091 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4121 - if (ret < 0) 4092 + if (ret < 0) { 4093 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 4122 4094 goto done; 4095 + } 4123 4096 4124 4097 ret = btrfs_previous_item(root, path, 0, key.type); 4098 + if (ret) 4099 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 4125 4100 if (ret < 0) 4126 4101 goto done; 4127 4102 if (ret) { ··· 4140 4105 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 4141 4106 4142 4107 if (key.objectid != device->devid) { 4108 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 4143 4109 btrfs_release_path(path); 4144 4110 break; 4145 4111 } ··· 4149 4113 length = btrfs_dev_extent_length(l, dev_extent); 4150 4114 4151 4115 if (key.offset + length <= new_size) { 4116 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 4152 4117 btrfs_release_path(path); 4153 4118 break; 4154 4119 } ··· 4159 4122 btrfs_release_path(path); 4160 4123 4161 4124 ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); 4125 + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); 4162 4126 if (ret && ret != -ENOSPC) 4163 4127 goto done; 4164 4128 if (ret == -ENOSPC) ··· 5753 5715 static void btrfs_end_bio(struct bio *bio, int err) 5754 5716 { 5755 5717 struct btrfs_bio *bbio = bio->bi_private; 5756 - struct btrfs_device *dev = bbio->stripes[0].dev; 5757 5718 int is_orig_bio = 0; 5758 5719 5759 5720 if (err) { ··· 5760 5723 if (err == -EIO || err == -EREMOTEIO) { 5761 5724 unsigned int stripe_index = 5762 5725 btrfs_io_bio(bio)->stripe_index; 5726 + struct btrfs_device *dev; 5763 5727 5764 5728 BUG_ON(stripe_index >= bbio->num_stripes); 5765 5729 dev = bbio->stripes[stripe_index].dev;