Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

- Fix a deadlock in fiemap.

There was a big lock around the whole operation that can interfere
with a page fault and mkwrite.

Reducing the lock scope can also speed up fiemap

- Fix range condition for extent defragmentation which could lead to
worse layout in some cases

* tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: fix deadlock with fiemap and extent locking
btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size

+46 -18
+1 -1
fs/btrfs/defrag.c
··· 1046 1046 goto add; 1047 1047 1048 1048 /* Skip too large extent */ 1049 - if (range_len >= extent_thresh) 1049 + if (em->len >= extent_thresh) 1050 1050 goto next; 1051 1051 1052 1052 /*
+45 -17
fs/btrfs/extent_io.c
··· 2689 2689 * it beyond i_size. 2690 2690 */ 2691 2691 while (cur_offset < end && cur_offset < i_size) { 2692 + struct extent_state *cached_state = NULL; 2692 2693 u64 delalloc_start; 2693 2694 u64 delalloc_end; 2694 2695 u64 prealloc_start; 2696 + u64 lockstart; 2697 + u64 lockend; 2695 2698 u64 prealloc_len = 0; 2696 2699 bool delalloc; 2697 2700 2701 + lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize); 2702 + lockend = round_up(end, inode->root->fs_info->sectorsize); 2703 + 2704 + /* 2705 + * We are only locking for the delalloc range because that's the 2706 + * only thing that can change here. With fiemap we have a lock 2707 + * on the inode, so no buffered or direct writes can happen. 2708 + * 2709 + * However mmaps and normal page writeback will cause this to 2710 + * change arbitrarily. We have to lock the extent lock here to 2711 + * make sure that nobody messes with the tree while we're doing 2712 + * btrfs_find_delalloc_in_range. 2713 + */ 2714 + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); 2698 2715 delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, 2699 2716 delalloc_cached_state, 2700 2717 &delalloc_start, 2701 2718 &delalloc_end); 2719 + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); 2702 2720 if (!delalloc) 2703 2721 break; 2704 2722 ··· 2884 2866 u64 start, u64 len) 2885 2867 { 2886 2868 const u64 ino = btrfs_ino(inode); 2887 - struct extent_state *cached_state = NULL; 2888 2869 struct extent_state *delalloc_cached_state = NULL; 2889 2870 struct btrfs_path *path; 2890 2871 struct fiemap_cache cache = { 0 }; 2891 2872 struct btrfs_backref_share_check_ctx *backref_ctx; 2892 2873 u64 last_extent_end; 2893 2874 u64 prev_extent_end; 2894 - u64 lockstart; 2895 - u64 lockend; 2875 + u64 range_start; 2876 + u64 range_end; 2877 + const u64 sectorsize = inode->root->fs_info->sectorsize; 2896 2878 bool stopped = false; 2897 2879 int ret; 2898 2880 ··· 2903 2885 goto out; 2904 2886 } 2905 2887 2906 - lockstart = round_down(start, inode->root->fs_info->sectorsize); 2907 - lockend = round_up(start + len, inode->root->fs_info->sectorsize); 2908 - prev_extent_end = lockstart; 2888 + range_start = round_down(start, sectorsize); 2889 + range_end = round_up(start + len, sectorsize); 2890 + prev_extent_end = range_start; 2909 2891 2910 2892 btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); 2911 - lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); 2912 2893 2913 2894 ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); 2914 2895 if (ret < 0) ··· 2915 2898 btrfs_release_path(path); 2916 2899 2917 2900 path->reada = READA_FORWARD; 2918 - ret = fiemap_search_slot(inode, path, lockstart); 2901 + ret = fiemap_search_slot(inode, path, range_start); 2919 2902 if (ret < 0) { 2920 2903 goto out_unlock; 2921 2904 } else if (ret > 0) { ··· 2927 2910 goto check_eof_delalloc; 2928 2911 } 2929 2912 2930 - while (prev_extent_end < lockend) { 2913 + while (prev_extent_end < range_end) { 2931 2914 struct extent_buffer *leaf = path->nodes[0]; 2932 2915 struct btrfs_file_extent_item *ei; 2933 2916 struct btrfs_key key; ··· 2950 2933 * The first iteration can leave us at an extent item that ends 2951 2934 * before our range's start. Move to the next item. 2952 2935 */ 2953 - if (extent_end <= lockstart) 2936 + if (extent_end <= range_start) 2954 2937 goto next_item; 2955 2938 2956 2939 backref_ctx->curr_leaf_bytenr = leaf->start; 2957 2940 2958 2941 /* We have in implicit hole (NO_HOLES feature enabled). */ 2959 2942 if (prev_extent_end < key.offset) { 2960 - const u64 range_end = min(key.offset, lockend) - 1; 2943 + const u64 hole_end = min(key.offset, range_end) - 1; 2961 2944 2962 2945 ret = fiemap_process_hole(inode, fieinfo, &cache, 2963 2946 &delalloc_cached_state, 2964 2947 backref_ctx, 0, 0, 0, 2965 - prev_extent_end, range_end); 2948 + prev_extent_end, hole_end); 2966 2949 if (ret < 0) { 2967 2950 goto out_unlock; 2968 2951 } else if (ret > 0) { ··· 2972 2955 } 2973 2956 2974 2957 /* We've reached the end of the fiemap range, stop. */ 2975 - if (key.offset >= lockend) { 2958 + if (key.offset >= range_end) { 2976 2959 stopped = true; 2977 2960 break; 2978 2961 } ··· 3066 3049 btrfs_free_path(path); 3067 3050 path = NULL; 3068 3051 3069 - if (!stopped && prev_extent_end < lockend) { 3052 + if (!stopped && prev_extent_end < range_end) { 3070 3053 ret = fiemap_process_hole(inode, fieinfo, &cache, 3071 3054 &delalloc_cached_state, backref_ctx, 3072 - 0, 0, 0, prev_extent_end, lockend - 1); 3055 + 0, 0, 0, prev_extent_end, range_end - 1); 3073 3056 if (ret < 0) 3074 3057 goto out_unlock; 3075 - prev_extent_end = lockend; 3058 + prev_extent_end = range_end; 3076 3059 } 3077 3060 3078 3061 if (cache.cached && cache.offset + cache.len >= last_extent_end) { 3079 3062 const u64 i_size = i_size_read(&inode->vfs_inode); 3080 3063 3081 3064 if (prev_extent_end < i_size) { 3065 + struct extent_state *cached_state = NULL; 3082 3066 u64 delalloc_start; 3083 3067 u64 delalloc_end; 3068 + u64 lockstart; 3069 + u64 lockend; 3084 3070 bool delalloc; 3085 3071 3072 + lockstart = round_down(prev_extent_end, sectorsize); 3073 + lockend = round_up(i_size, sectorsize); 3074 + 3075 + /* 3076 + * See the comment in fiemap_process_hole as to why 3077 + * we're doing the locking here. 3078 + */ 3079 + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); 3086 3080 delalloc = btrfs_find_delalloc_in_range(inode, 3087 3081 prev_extent_end, 3088 3082 i_size - 1, 3089 3083 &delalloc_cached_state, 3090 3084 &delalloc_start, 3091 3085 &delalloc_end); 3086 + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); 3092 3087 if (!delalloc) 3093 3088 cache.flags |= FIEMAP_EXTENT_LAST; 3094 3089 } else { ··· 3111 3082 ret = emit_last_fiemap_cache(fieinfo, &cache); 3112 3083 3113 3084 out_unlock: 3114 - unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); 3115 3085 btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); 3116 3086 out: 3117 3087 free_extent_state(delalloc_cached_state);