btrfs: move reclaiming of a single block group into its own function

+133 -123

1 changed file

expand all

btrfs

block-group.c

+133 -123

fs/btrfs/block-group.c

··· 1909 1909 return true; 1910 1910 } 1911 1911 1912 + static int btrfs_reclaim_block_group(struct btrfs_block_group *bg) 1913 + { 1914 + struct btrfs_fs_info *fs_info = bg->fs_info; 1915 + struct btrfs_space_info *space_info = bg->space_info; 1916 + u64 used; 1917 + u64 reserved; 1918 + u64 old_total; 1919 + int ret = 0; 1920 + 1921 + /* Don't race with allocators so take the groups_sem */ 1922 + down_write(&space_info->groups_sem); 1923 + 1924 + spin_lock(&space_info->lock); 1925 + spin_lock(&bg->lock); 1926 + if (bg->reserved || bg->pinned || bg->ro) { 1927 + /* 1928 + * We want to bail if we made new allocations or have 1929 + * outstanding allocations in this block group. We do 1930 + * the ro check in case balance is currently acting on 1931 + * this block group. 1932 + */ 1933 + spin_unlock(&bg->lock); 1934 + spin_unlock(&space_info->lock); 1935 + up_write(&space_info->groups_sem); 1936 + return 0; 1937 + } 1938 + 1939 + if (bg->used == 0) { 1940 + /* 1941 + * It is possible that we trigger relocation on a block 1942 + * group as its extents are deleted and it first goes 1943 + * below the threshold, then shortly after goes empty. 1944 + * 1945 + * In this case, relocating it does delete it, but has 1946 + * some overhead in relocation specific metadata, looking 1947 + * for the non-existent extents and running some extra 1948 + * transactions, which we can avoid by using one of the 1949 + * other mechanisms for dealing with empty block groups. 1950 + */ 1951 + if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) 1952 + btrfs_mark_bg_unused(bg); 1953 + spin_unlock(&bg->lock); 1954 + spin_unlock(&space_info->lock); 1955 + up_write(&space_info->groups_sem); 1956 + return 0; 1957 + } 1958 + 1959 + /* 1960 + * The block group might no longer meet the reclaim condition by 1961 + * the time we get around to reclaiming it, so to avoid 1962 + * reclaiming overly full block_groups, skip reclaiming them. 1963 + * 1964 + * Since the decision making process also depends on the amount 1965 + * being freed, pass in a fake giant value to skip that extra 1966 + * check, which is more meaningful when adding to the list in 1967 + * the first place. 1968 + */ 1969 + if (!should_reclaim_block_group(bg, bg->length)) { 1970 + spin_unlock(&bg->lock); 1971 + spin_unlock(&space_info->lock); 1972 + up_write(&space_info->groups_sem); 1973 + return 0; 1974 + } 1975 + 1976 + spin_unlock(&bg->lock); 1977 + old_total = space_info->total_bytes; 1978 + spin_unlock(&space_info->lock); 1979 + 1980 + /* 1981 + * Get out fast, in case we're read-only or unmounting the 1982 + * filesystem. It is OK to drop block groups from the list even 1983 + * for the read-only case. As we did take the super write lock, 1984 + * "mount -o remount,ro" won't happen and read-only filesystem 1985 + * means it is forced read-only due to a fatal error. So, it 1986 + * never gets back to read-write to let us reclaim again. 1987 + */ 1988 + if (btrfs_need_cleaner_sleep(fs_info)) { 1989 + up_write(&space_info->groups_sem); 1990 + return 0; 1991 + } 1992 + 1993 + ret = inc_block_group_ro(bg, false); 1994 + up_write(&space_info->groups_sem); 1995 + if (ret < 0) 1996 + return ret; 1997 + 1998 + /* 1999 + * The amount of bytes reclaimed corresponds to the sum of the 2000 + * "used" and "reserved" counters. We have set the block group 2001 + * to RO above, which prevents reservations from happening but 2002 + * we may have existing reservations for which allocation has 2003 + * not yet been done - btrfs_update_block_group() was not yet 2004 + * called, which is where we will transfer a reserved extent's 2005 + * size from the "reserved" counter to the "used" counter - this 2006 + * happens when running delayed references. When we relocate the 2007 + * chunk below, relocation first flushes delalloc, waits for 2008 + * ordered extent completion (which is where we create delayed 2009 + * references for data extents) and commits the current 2010 + * transaction (which runs delayed references), and only after 2011 + * it does the actual work to move extents out of the block 2012 + * group. So the reported amount of reclaimed bytes is 2013 + * effectively the sum of the 'used' and 'reserved' counters. 2014 + */ 2015 + spin_lock(&bg->lock); 2016 + used = bg->used; 2017 + reserved = bg->reserved; 2018 + spin_unlock(&bg->lock); 2019 + 2020 + trace_btrfs_reclaim_block_group(bg); 2021 + ret = btrfs_relocate_chunk(fs_info, bg->start, false); 2022 + if (ret) { 2023 + btrfs_dec_block_group_ro(bg); 2024 + btrfs_err(fs_info, "error relocating chunk %llu", 2025 + bg->start); 2026 + used = 0; 2027 + reserved = 0; 2028 + spin_lock(&space_info->lock); 2029 + space_info->reclaim_errors++; 2030 + spin_unlock(&space_info->lock); 2031 + } 2032 + spin_lock(&space_info->lock); 2033 + space_info->reclaim_count++; 2034 + space_info->reclaim_bytes += used; 2035 + space_info->reclaim_bytes += reserved; 2036 + if (space_info->total_bytes < old_total) 2037 + btrfs_set_periodic_reclaim_ready(space_info, true); 2038 + spin_unlock(&space_info->lock); 2039 + 2040 + return ret; 2041 + } 2042 + 1912 2043 void btrfs_reclaim_bgs_work(struct work_struct *work) 1913 2044 { 1914 2045 struct btrfs_fs_info *fs_info = ··· 2073 1942 */ 2074 1943 list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp); 2075 1944 while (!list_empty(&fs_info->reclaim_bgs)) { 2076 - u64 used; 2077 - u64 reserved; 2078 - u64 old_total; 2079 - int ret = 0; 1945 + int ret; 2080 1946 2081 1947 bg = list_first_entry(&fs_info->reclaim_bgs, 2082 1948 struct btrfs_block_group, ··· 2082 1954 2083 1955 space_info = bg->space_info; 2084 1956 spin_unlock(&fs_info->unused_bgs_lock); 1957 + ret = btrfs_reclaim_block_group(bg); 2085 1958 2086 - /* Don't race with allocators so take the groups_sem */ 2087 - down_write(&space_info->groups_sem); 2088 - 2089 - spin_lock(&space_info->lock); 2090 - spin_lock(&bg->lock); 2091 - if (bg->reserved || bg->pinned || bg->ro) { 2092 - /* 2093 - * We want to bail if we made new allocations or have 2094 - * outstanding allocations in this block group. We do 2095 - * the ro check in case balance is currently acting on 2096 - * this block group. 2097 - */ 2098 - spin_unlock(&bg->lock); 2099 - spin_unlock(&space_info->lock); 2100 - up_write(&space_info->groups_sem); 2101 - goto next; 2102 - } 2103 - if (bg->used == 0) { 2104 - /* 2105 - * It is possible that we trigger relocation on a block 2106 - * group as its extents are deleted and it first goes 2107 - * below the threshold, then shortly after goes empty. 2108 - * 2109 - * In this case, relocating it does delete it, but has 2110 - * some overhead in relocation specific metadata, looking 2111 - * for the non-existent extents and running some extra 2112 - * transactions, which we can avoid by using one of the 2113 - * other mechanisms for dealing with empty block groups. 2114 - */ 2115 - if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) 2116 - btrfs_mark_bg_unused(bg); 2117 - spin_unlock(&bg->lock); 2118 - spin_unlock(&space_info->lock); 2119 - up_write(&space_info->groups_sem); 2120 - goto next; 2121 - 2122 - } 2123 - /* 2124 - * The block group might no longer meet the reclaim condition by 2125 - * the time we get around to reclaiming it, so to avoid 2126 - * reclaiming overly full block_groups, skip reclaiming them. 2127 - * 2128 - * Since the decision making process also depends on the amount 2129 - * being freed, pass in a fake giant value to skip that extra 2130 - * check, which is more meaningful when adding to the list in 2131 - * the first place. 2132 - */ 2133 - if (!should_reclaim_block_group(bg, bg->length)) { 2134 - spin_unlock(&bg->lock); 2135 - spin_unlock(&space_info->lock); 2136 - up_write(&space_info->groups_sem); 2137 - goto next; 2138 - } 2139 - 2140 - spin_unlock(&bg->lock); 2141 - old_total = space_info->total_bytes; 2142 - spin_unlock(&space_info->lock); 2143 - 2144 - /* 2145 - * Get out fast, in case we're read-only or unmounting the 2146 - * filesystem. It is OK to drop block groups from the list even 2147 - * for the read-only case. As we did take the super write lock, 2148 - * "mount -o remount,ro" won't happen and read-only filesystem 2149 - * means it is forced read-only due to a fatal error. So, it 2150 - * never gets back to read-write to let us reclaim again. 2151 - */ 2152 - if (btrfs_need_cleaner_sleep(fs_info)) { 2153 - up_write(&space_info->groups_sem); 2154 - goto next; 2155 - } 2156 - 2157 - ret = inc_block_group_ro(bg, false); 2158 - up_write(&space_info->groups_sem); 2159 - if (ret < 0) 2160 - goto next; 2161 - 2162 - /* 2163 - * The amount of bytes reclaimed corresponds to the sum of the 2164 - * "used" and "reserved" counters. We have set the block group 2165 - * to RO above, which prevents reservations from happening but 2166 - * we may have existing reservations for which allocation has 2167 - * not yet been done - btrfs_update_block_group() was not yet 2168 - * called, which is where we will transfer a reserved extent's 2169 - * size from the "reserved" counter to the "used" counter - this 2170 - * happens when running delayed references. When we relocate the 2171 - * chunk below, relocation first flushes delalloc, waits for 2172 - * ordered extent completion (which is where we create delayed 2173 - * references for data extents) and commits the current 2174 - * transaction (which runs delayed references), and only after 2175 - * it does the actual work to move extents out of the block 2176 - * group. So the reported amount of reclaimed bytes is 2177 - * effectively the sum of the 'used' and 'reserved' counters. 2178 - */ 2179 - spin_lock(&bg->lock); 2180 - used = bg->used; 2181 - reserved = bg->reserved; 2182 - spin_unlock(&bg->lock); 2183 - 2184 - trace_btrfs_reclaim_block_group(bg); 2185 - ret = btrfs_relocate_chunk(fs_info, bg->start, false); 2186 - if (ret) { 2187 - btrfs_dec_block_group_ro(bg); 2188 - btrfs_err(fs_info, "error relocating chunk %llu", 2189 - bg->start); 2190 - used = 0; 2191 - reserved = 0; 2192 - spin_lock(&space_info->lock); 2193 - space_info->reclaim_errors++; 2194 - spin_unlock(&space_info->lock); 2195 - } 2196 - spin_lock(&space_info->lock); 2197 - space_info->reclaim_count++; 2198 - space_info->reclaim_bytes += used; 2199 - space_info->reclaim_bytes += reserved; 2200 - if (space_info->total_bytes < old_total) 2201 - btrfs_set_periodic_reclaim_ready(space_info, true); 2202 - spin_unlock(&space_info->lock); 2203 - 2204 - next: 2205 1959 if (ret && !READ_ONCE(space_info->periodic_reclaim)) 2206 1960 btrfs_link_bg_list(bg, &retry_list); 2207 1961 btrfs_put_block_group(bg);

Configure Feed

Configure Feed