Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-6.8-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
"Miscellaneous bug fixes and cleanups in ext4's multi-block allocator
and extent handling code"

* tag 'for-linus-6.8-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (23 commits)
ext4: make ext4_set_iomap() recognize IOMAP_DELALLOC map type
ext4: make ext4_map_blocks() distinguish delalloc only extent
ext4: add a hole extent entry in cache after punch
ext4: correct the hole length returned by ext4_map_blocks()
ext4: convert to exclusive lock while inserting delalloc extents
ext4: refactor ext4_da_map_blocks()
ext4: remove 'needed' in trace_ext4_discard_preallocations
ext4: remove unnecessary parameter "needed" in ext4_discard_preallocations
ext4: remove unused return value of ext4_mb_release_group_pa
ext4: remove unused return value of ext4_mb_release_inode_pa
ext4: remove unused return value of ext4_mb_release
ext4: remove unused ext4_allocation_context::ac_groups_considered
ext4: remove unneeded return value of ext4_mb_release_context
ext4: remove unused parameter ngroup in ext4_mb_choose_next_group_*()
ext4: remove unused return value of __mb_check_buddy
ext4: mark the group block bitmap as corrupted before reporting an error
ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal()
ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found()
ext4: avoid dividing by 0 in mb_update_avg_fragment_size() when block bitmap corrupt
ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks()
...

+203 -189
+5 -3
fs/ext4/ext4.h
··· 252 252 #define EXT4_MAP_MAPPED BIT(BH_Mapped) 253 253 #define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) 254 254 #define EXT4_MAP_BOUNDARY BIT(BH_Boundary) 255 + #define EXT4_MAP_DELAYED BIT(BH_Delay) 255 256 #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 256 - EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) 257 + EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ 258 + EXT4_MAP_DELAYED) 257 259 258 260 struct ext4_map_blocks { 259 261 ext4_fsblk_t m_pblk; ··· 2914 2912 extern const struct seq_operations ext4_mb_seq_structs_summary_ops; 2915 2913 extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset); 2916 2914 extern int ext4_mb_init(struct super_block *); 2917 - extern int ext4_mb_release(struct super_block *); 2915 + extern void ext4_mb_release(struct super_block *); 2918 2916 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, 2919 2917 struct ext4_allocation_request *, int *); 2920 - extern void ext4_discard_preallocations(struct inode *, unsigned int); 2918 + extern void ext4_discard_preallocations(struct inode *); 2921 2919 extern int __init ext4_init_mballoc(void); 2922 2920 extern void ext4_exit_mballoc(void); 2923 2921 extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
+78 -46
fs/ext4/extents.c
··· 100 100 * i_rwsem. So we can safely drop the i_data_sem here. 101 101 */ 102 102 BUG_ON(EXT4_JOURNAL(inode) == NULL); 103 - ext4_discard_preallocations(inode, 0); 103 + ext4_discard_preallocations(inode); 104 104 up_write(&EXT4_I(inode)->i_data_sem); 105 105 *dropped = 1; 106 106 return 0; ··· 2229 2229 2230 2230 2231 2231 /* 2232 - * ext4_ext_determine_hole - determine hole around given block 2232 + * ext4_ext_find_hole - find hole around given block according to the given path 2233 2233 * @inode: inode we lookup in 2234 2234 * @path: path in extent tree to @lblk 2235 2235 * @lblk: pointer to logical block around which we want to determine hole ··· 2241 2241 * The function returns the length of a hole starting at @lblk. We update @lblk 2242 2242 * to the beginning of the hole if we managed to find it. 2243 2243 */ 2244 - static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, 2245 - struct ext4_ext_path *path, 2246 - ext4_lblk_t *lblk) 2244 + static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, 2245 + struct ext4_ext_path *path, 2246 + ext4_lblk_t *lblk) 2247 2247 { 2248 2248 int depth = ext_depth(inode); 2249 2249 struct ext4_extent *ex; ··· 2268 2268 BUG(); 2269 2269 } 2270 2270 return len; 2271 - } 2272 - 2273 - /* 2274 - * ext4_ext_put_gap_in_cache: 2275 - * calculate boundaries of the gap that the requested block fits into 2276 - * and cache this gap 2277 - */ 2278 - static void 2279 - ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, 2280 - ext4_lblk_t hole_len) 2281 - { 2282 - struct extent_status es; 2283 - 2284 - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, 2285 - hole_start + hole_len - 1, &es); 2286 - if (es.es_len) { 2287 - /* There's delayed extent containing lblock? */ 2288 - if (es.es_lblk <= hole_start) 2289 - return; 2290 - hole_len = min(es.es_lblk - hole_start, hole_len); 2291 - } 2292 - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); 2293 - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, 2294 - EXTENT_STATUS_HOLE); 2295 2271 } 2296 2272 2297 2273 /* ··· 4038 4062 return 0; 4039 4063 } 4040 4064 4065 + /* 4066 + * Determine hole length around the given logical block, first try to 4067 + * locate and expand the hole from the given @path, and then adjust it 4068 + * if it's partially or completely converted to delayed extents, insert 4069 + * it into the extent cache tree if it's indeed a hole, finally return 4070 + * the length of the determined extent. 4071 + */ 4072 + static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, 4073 + struct ext4_ext_path *path, 4074 + ext4_lblk_t lblk) 4075 + { 4076 + ext4_lblk_t hole_start, len; 4077 + struct extent_status es; 4078 + 4079 + hole_start = lblk; 4080 + len = ext4_ext_find_hole(inode, path, &hole_start); 4081 + again: 4082 + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, 4083 + hole_start + len - 1, &es); 4084 + if (!es.es_len) 4085 + goto insert_hole; 4086 + 4087 + /* 4088 + * There's a delalloc extent in the hole, handle it if the delalloc 4089 + * extent is in front of, behind and straddle the queried range. 4090 + */ 4091 + if (lblk >= es.es_lblk + es.es_len) { 4092 + /* 4093 + * The delalloc extent is in front of the queried range, 4094 + * find again from the queried start block. 4095 + */ 4096 + len -= lblk - hole_start; 4097 + hole_start = lblk; 4098 + goto again; 4099 + } else if (in_range(lblk, es.es_lblk, es.es_len)) { 4100 + /* 4101 + * The delalloc extent containing lblk, it must have been 4102 + * added after ext4_map_blocks() checked the extent status 4103 + * tree so we are not holding i_rwsem and delalloc info is 4104 + * only stabilized by i_data_sem we are going to release 4105 + * soon. Don't modify the extent status tree and report 4106 + * extent as a hole, just adjust the length to the delalloc 4107 + * extent's after lblk. 4108 + */ 4109 + len = es.es_lblk + es.es_len - lblk; 4110 + return len; 4111 + } else { 4112 + /* 4113 + * The delalloc extent is partially or completely behind 4114 + * the queried range, update hole length until the 4115 + * beginning of the delalloc extent. 4116 + */ 4117 + len = min(es.es_lblk - hole_start, len); 4118 + } 4119 + 4120 + insert_hole: 4121 + /* Put just found gap into cache to speed up subsequent requests */ 4122 + ext_debug(inode, " -> %u:%u\n", hole_start, len); 4123 + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); 4124 + 4125 + /* Update hole_len to reflect hole size after lblk */ 4126 + if (hole_start != lblk) 4127 + len -= lblk - hole_start; 4128 + 4129 + return len; 4130 + } 4041 4131 4042 4132 /* 4043 4133 * Block allocation/map/preallocation routine for extents based files ··· 4221 4179 * we couldn't try to create block if create flag is zero 4222 4180 */ 4223 4181 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 4224 - ext4_lblk_t hole_start, hole_len; 4182 + ext4_lblk_t len; 4225 4183 4226 - hole_start = map->m_lblk; 4227 - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); 4228 - /* 4229 - * put just found gap into cache to speed up 4230 - * subsequent requests 4231 - */ 4232 - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); 4184 + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); 4233 4185 4234 - /* Update hole_len to reflect hole size after map->m_lblk */ 4235 - if (hole_start != map->m_lblk) 4236 - hole_len -= map->m_lblk - hole_start; 4237 4186 map->m_pblk = 0; 4238 - map->m_len = min_t(unsigned int, map->m_len, hole_len); 4239 - 4187 + map->m_len = min_t(unsigned int, map->m_len, len); 4240 4188 goto out; 4241 4189 } 4242 4190 ··· 4345 4313 * not a good idea to call discard here directly, 4346 4314 * but otherwise we'd need to call it every free(). 4347 4315 */ 4348 - ext4_discard_preallocations(inode, 0); 4316 + ext4_discard_preallocations(inode); 4349 4317 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 4350 4318 fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; 4351 4319 ext4_free_blocks(handle, inode, NULL, newblock, ··· 5389 5357 ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); 5390 5358 5391 5359 down_write(&EXT4_I(inode)->i_data_sem); 5392 - ext4_discard_preallocations(inode, 0); 5360 + ext4_discard_preallocations(inode); 5393 5361 ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); 5394 5362 5395 5363 ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); ··· 5397 5365 up_write(&EXT4_I(inode)->i_data_sem); 5398 5366 goto out_stop; 5399 5367 } 5400 - ext4_discard_preallocations(inode, 0); 5368 + ext4_discard_preallocations(inode); 5401 5369 5402 5370 ret = ext4_ext_shift_extents(inode, handle, punch_stop, 5403 5371 punch_stop - punch_start, SHIFT_LEFT); ··· 5529 5497 goto out_stop; 5530 5498 5531 5499 down_write(&EXT4_I(inode)->i_data_sem); 5532 - ext4_discard_preallocations(inode, 0); 5500 + ext4_discard_preallocations(inode); 5533 5501 5534 5502 path = ext4_find_extent(inode, offset_lblk, NULL, 0); 5535 5503 if (IS_ERR(path)) {
+1 -1
fs/ext4/file.c
··· 174 174 (atomic_read(&inode->i_writecount) == 1) && 175 175 !EXT4_I(inode)->i_reserved_data_blocks) { 176 176 down_write(&EXT4_I(inode)->i_data_sem); 177 - ext4_discard_preallocations(inode, 0); 177 + ext4_discard_preallocations(inode); 178 178 up_write(&EXT4_I(inode)->i_data_sem); 179 179 } 180 180 if (is_dx(inode) && filp->private_data)
+1 -1
fs/ext4/indirect.c
··· 714 714 * i_rwsem. So we can safely drop the i_data_sem here. 715 715 */ 716 716 BUG_ON(EXT4_JOURNAL(inode) == NULL); 717 - ext4_discard_preallocations(inode, 0); 717 + ext4_discard_preallocations(inode); 718 718 up_write(&EXT4_I(inode)->i_data_sem); 719 719 *dropped = 1; 720 720 return 0;
+30 -60
fs/ext4/inode.c
··· 371 371 */ 372 372 if ((ei->i_reserved_data_blocks == 0) && 373 373 !inode_is_open_for_write(inode)) 374 - ext4_discard_preallocations(inode, 0); 374 + ext4_discard_preallocations(inode); 375 375 } 376 376 377 377 static int __check_block_validity(struct inode *inode, const char *func, ··· 515 515 map->m_len = retval; 516 516 } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { 517 517 map->m_pblk = 0; 518 + map->m_flags |= ext4_es_is_delayed(&es) ? 519 + EXT4_MAP_DELAYED : 0; 518 520 retval = es.es_len - (map->m_lblk - es.es_lblk); 519 521 if (retval > map->m_len) 520 522 retval = map->m_len; ··· 1705 1703 1706 1704 /* Lookup extent status tree firstly */ 1707 1705 if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { 1708 - if (ext4_es_is_hole(&es)) { 1709 - retval = 0; 1710 - down_read(&EXT4_I(inode)->i_data_sem); 1706 + if (ext4_es_is_hole(&es)) 1711 1707 goto add_delayed; 1712 - } 1713 1708 1714 1709 /* 1715 1710 * Delayed extent could be allocated by fallocate. ··· 1748 1749 retval = ext4_ext_map_blocks(NULL, inode, map, 0); 1749 1750 else 1750 1751 retval = ext4_ind_map_blocks(NULL, inode, map, 0); 1751 - 1752 - add_delayed: 1753 - if (retval == 0) { 1754 - int ret; 1755 - 1756 - /* 1757 - * XXX: __block_prepare_write() unmaps passed block, 1758 - * is it OK? 1759 - */ 1760 - 1761 - ret = ext4_insert_delayed_block(inode, map->m_lblk); 1762 - if (ret != 0) { 1763 - retval = ret; 1764 - goto out_unlock; 1765 - } 1766 - 1767 - map_bh(bh, inode->i_sb, invalid_block); 1768 - set_buffer_new(bh); 1769 - set_buffer_delay(bh); 1770 - } else if (retval > 0) { 1752 + if (retval < 0) { 1753 + up_read(&EXT4_I(inode)->i_data_sem); 1754 + return retval; 1755 + } 1756 + if (retval > 0) { 1771 1757 unsigned int status; 1772 1758 1773 1759 if (unlikely(retval != map->m_len)) { ··· 1767 1783 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 1768 1784 ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 1769 1785 map->m_pblk, status); 1786 + up_read(&EXT4_I(inode)->i_data_sem); 1787 + return retval; 1770 1788 } 1789 + up_read(&EXT4_I(inode)->i_data_sem); 1771 1790 1772 - out_unlock: 1773 - up_read((&EXT4_I(inode)->i_data_sem)); 1791 + add_delayed: 1792 + down_write(&EXT4_I(inode)->i_data_sem); 1793 + retval = ext4_insert_delayed_block(inode, map->m_lblk); 1794 + up_write(&EXT4_I(inode)->i_data_sem); 1795 + if (retval) 1796 + return retval; 1774 1797 1798 + map_bh(bh, inode->i_sb, invalid_block); 1799 + set_buffer_new(bh); 1800 + set_buffer_delay(bh); 1775 1801 return retval; 1776 1802 } 1777 1803 ··· 3262 3268 iomap->addr = (u64) map->m_pblk << blkbits; 3263 3269 if (flags & IOMAP_DAX) 3264 3270 iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; 3271 + } else if (map->m_flags & EXT4_MAP_DELAYED) { 3272 + iomap->type = IOMAP_DELALLOC; 3273 + iomap->addr = IOMAP_NULL_ADDR; 3265 3274 } else { 3266 3275 iomap->type = IOMAP_HOLE; 3267 3276 iomap->addr = IOMAP_NULL_ADDR; ··· 3427 3430 .iomap_end = ext4_iomap_end, 3428 3431 }; 3429 3432 3430 - static bool ext4_iomap_is_delalloc(struct inode *inode, 3431 - struct ext4_map_blocks *map) 3432 - { 3433 - struct extent_status es; 3434 - ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1; 3435 - 3436 - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, 3437 - map->m_lblk, end, &es); 3438 - 3439 - if (!es.es_len || es.es_lblk > end) 3440 - return false; 3441 - 3442 - if (es.es_lblk > map->m_lblk) { 3443 - map->m_len = es.es_lblk - map->m_lblk; 3444 - return false; 3445 - } 3446 - 3447 - offset = map->m_lblk - es.es_lblk; 3448 - map->m_len = es.es_len - offset; 3449 - 3450 - return true; 3451 - } 3452 - 3453 3433 static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, 3454 3434 loff_t length, unsigned int flags, 3455 3435 struct iomap *iomap, struct iomap *srcmap) 3456 3436 { 3457 3437 int ret; 3458 - bool delalloc = false; 3459 3438 struct ext4_map_blocks map; 3460 3439 u8 blkbits = inode->i_blkbits; 3461 3440 ··· 3472 3499 ret = ext4_map_blocks(NULL, inode, &map, 0); 3473 3500 if (ret < 0) 3474 3501 return ret; 3475 - if (ret == 0) 3476 - delalloc = ext4_iomap_is_delalloc(inode, &map); 3477 - 3478 3502 set_iomap: 3479 3503 ext4_set_iomap(inode, iomap, &map, offset, length, flags); 3480 - if (delalloc && iomap->type == IOMAP_HOLE) 3481 - iomap->type = IOMAP_DELALLOC; 3482 3504 3483 3505 return 0; 3484 3506 } ··· 3983 4015 3984 4016 /* If there are blocks to remove, do it */ 3985 4017 if (stop_block > first_block) { 4018 + ext4_lblk_t hole_len = stop_block - first_block; 3986 4019 3987 4020 down_write(&EXT4_I(inode)->i_data_sem); 3988 - ext4_discard_preallocations(inode, 0); 4021 + ext4_discard_preallocations(inode); 3989 4022 3990 - ext4_es_remove_extent(inode, first_block, 3991 - stop_block - first_block); 4023 + ext4_es_remove_extent(inode, first_block, hole_len); 3992 4024 3993 4025 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3994 4026 ret = ext4_ext_remove_space(inode, first_block, ··· 3997 4029 ret = ext4_ind_remove_space(handle, inode, first_block, 3998 4030 stop_block); 3999 4031 4032 + ext4_es_insert_extent(inode, first_block, hole_len, ~0, 4033 + EXTENT_STATUS_HOLE); 4000 4034 up_write(&EXT4_I(inode)->i_data_sem); 4001 4035 } 4002 4036 ext4_fc_track_range(handle, inode, first_block, stop_block); ··· 4140 4170 4141 4171 down_write(&EXT4_I(inode)->i_data_sem); 4142 4172 4143 - ext4_discard_preallocations(inode, 0); 4173 + ext4_discard_preallocations(inode); 4144 4174 4145 4175 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 4146 4176 err = ext4_ext_truncate(handle, inode);
+1 -1
fs/ext4/ioctl.c
··· 467 467 ext4_reset_inode_seed(inode); 468 468 ext4_reset_inode_seed(inode_bl); 469 469 470 - ext4_discard_preallocations(inode, 0); 470 + ext4_discard_preallocations(inode); 471 471 472 472 err = ext4_mark_inode_dirty(handle, inode); 473 473 if (err < 0) {
+78 -62
fs/ext4/mballoc.c
··· 564 564 565 565 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 566 566 blocknr += EXT4_C2B(EXT4_SB(sb), first + i); 567 + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, 568 + EXT4_GROUP_INFO_BBITMAP_CORRUPT); 567 569 ext4_grp_locked_error(sb, e4b->bd_group, 568 570 inode ? inode->i_ino : 0, 569 571 blocknr, 570 572 "freeing block already freed " 571 573 "(bit %u)", 572 574 first + i); 573 - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, 574 - EXT4_GROUP_INFO_BBITMAP_CORRUPT); 575 575 } 576 576 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); 577 577 } ··· 677 677 } \ 678 678 } while (0) 679 679 680 - static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, 680 + static void __mb_check_buddy(struct ext4_buddy *e4b, char *file, 681 681 const char *function, int line) 682 682 { 683 683 struct super_block *sb = e4b->bd_sb; ··· 696 696 void *buddy2; 697 697 698 698 if (e4b->bd_info->bb_check_counter++ % 10) 699 - return 0; 699 + return; 700 700 701 701 while (order > 1) { 702 702 buddy = mb_find_buddy(e4b, order, &max); ··· 758 758 759 759 grp = ext4_get_group_info(sb, e4b->bd_group); 760 760 if (!grp) 761 - return NULL; 761 + return; 762 762 list_for_each(cur, &grp->bb_prealloc_list) { 763 763 ext4_group_t groupnr; 764 764 struct ext4_prealloc_space *pa; ··· 768 768 for (i = 0; i < pa->pa_len; i++) 769 769 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); 770 770 } 771 - return 0; 772 771 } 773 772 #undef MB_CHECK_ASSERT 774 773 #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ ··· 841 842 struct ext4_sb_info *sbi = EXT4_SB(sb); 842 843 int new_order; 843 844 844 - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) 845 + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) 845 846 return; 846 847 847 848 new_order = mb_avg_fragment_size_order(sb, ··· 870 871 * cr level needs an update. 871 872 */ 872 873 static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac, 873 - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) 874 + enum criteria *new_cr, ext4_group_t *group) 874 875 { 875 876 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 876 877 struct ext4_group_info *iter; ··· 944 945 * order. Updates *new_cr if cr level needs an update. 945 946 */ 946 947 static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac, 947 - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) 948 + enum criteria *new_cr, ext4_group_t *group) 948 949 { 949 950 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 950 951 struct ext4_group_info *grp = NULL; ··· 989 990 * much and fall to CR_GOAL_LEN_SLOW in that case. 990 991 */ 991 992 static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac, 992 - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) 993 + enum criteria *new_cr, ext4_group_t *group) 993 994 { 994 995 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 995 996 struct ext4_group_info *grp = NULL; ··· 1124 1125 } 1125 1126 1126 1127 if (*new_cr == CR_POWER2_ALIGNED) { 1127 - ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups); 1128 + ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group); 1128 1129 } else if (*new_cr == CR_GOAL_LEN_FAST) { 1129 - ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups); 1130 + ext4_mb_choose_next_group_goal_fast(ac, new_cr, group); 1130 1131 } else if (*new_cr == CR_BEST_AVAIL_LEN) { 1131 - ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups); 1132 + ext4_mb_choose_next_group_best_avail(ac, new_cr, group); 1132 1133 } else { 1133 1134 /* 1134 1135 * TODO: For CR=2, we can arrange groups in an rb tree sorted by ··· 1230 1231 period = get_cycles() - period; 1231 1232 atomic_inc(&sbi->s_mb_buddies_generated); 1232 1233 atomic64_add(period, &sbi->s_mb_generation_time); 1234 + } 1235 + 1236 + static void mb_regenerate_buddy(struct ext4_buddy *e4b) 1237 + { 1238 + int count; 1239 + int order = 1; 1240 + void *buddy; 1241 + 1242 + while ((buddy = mb_find_buddy(e4b, order++, &count))) 1243 + mb_set_bits(buddy, 0, count); 1244 + 1245 + e4b->bd_info->bb_fragments = 0; 1246 + memset(e4b->bd_info->bb_counters, 0, 1247 + sizeof(*e4b->bd_info->bb_counters) * 1248 + (e4b->bd_sb->s_blocksize_bits + 2)); 1249 + 1250 + ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, 1251 + e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); 1233 1252 } 1234 1253 1235 1254 /* The buddy information is attached the buddy cache inode ··· 1908 1891 mb_check_buddy(e4b); 1909 1892 mb_free_blocks_double(inode, e4b, first, count); 1910 1893 1911 - this_cpu_inc(discard_pa_seq); 1912 - e4b->bd_info->bb_free += count; 1913 - if (first < e4b->bd_info->bb_first_free) 1914 - e4b->bd_info->bb_first_free = first; 1915 - 1916 1894 /* access memory sequentially: check left neighbour, 1917 1895 * clear range and then check right neighbour 1918 1896 */ ··· 1921 1909 struct ext4_sb_info *sbi = EXT4_SB(sb); 1922 1910 ext4_fsblk_t blocknr; 1923 1911 1912 + /* 1913 + * Fastcommit replay can free already freed blocks which 1914 + * corrupts allocation info. Regenerate it. 1915 + */ 1916 + if (sbi->s_mount_state & EXT4_FC_REPLAY) { 1917 + mb_regenerate_buddy(e4b); 1918 + goto check; 1919 + } 1920 + 1924 1921 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1925 1922 blocknr += EXT4_C2B(sbi, block); 1926 - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { 1927 - ext4_grp_locked_error(sb, e4b->bd_group, 1928 - inode ? inode->i_ino : 0, 1929 - blocknr, 1930 - "freeing already freed block (bit %u); block bitmap corrupt.", 1931 - block); 1932 - ext4_mark_group_bitmap_corrupted( 1933 - sb, e4b->bd_group, 1923 + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, 1934 1924 EXT4_GROUP_INFO_BBITMAP_CORRUPT); 1935 - } 1936 - goto done; 1925 + ext4_grp_locked_error(sb, e4b->bd_group, 1926 + inode ? inode->i_ino : 0, blocknr, 1927 + "freeing already freed block (bit %u); block bitmap corrupt.", 1928 + block); 1929 + return; 1937 1930 } 1931 + 1932 + this_cpu_inc(discard_pa_seq); 1933 + e4b->bd_info->bb_free += count; 1934 + if (first < e4b->bd_info->bb_first_free) 1935 + e4b->bd_info->bb_first_free = first; 1938 1936 1939 1937 /* let's maintain fragments counter */ 1940 1938 if (left_is_free && right_is_free) ··· 1970 1948 if (first <= last) 1971 1949 mb_buddy_mark_free(e4b, first >> 1, last >> 1); 1972 1950 1973 - done: 1974 1951 mb_set_largest_free_order(sb, e4b->bd_info); 1975 1952 mb_update_avg_fragment_size(sb, e4b->bd_info); 1953 + check: 1976 1954 mb_check_buddy(e4b); 1977 1955 } 1978 1956 ··· 2298 2276 return; 2299 2277 2300 2278 ext4_lock_group(ac->ac_sb, group); 2279 + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) 2280 + goto out; 2281 + 2301 2282 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); 2302 2283 2303 2284 if (max > 0) { ··· 2308 2283 ext4_mb_use_best_found(ac, e4b); 2309 2284 } 2310 2285 2286 + out: 2311 2287 ext4_unlock_group(ac->ac_sb, group); 2312 2288 ext4_mb_unload_buddy(e4b); 2313 2289 } ··· 2335 2309 if (err) 2336 2310 return err; 2337 2311 2338 - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { 2339 - ext4_mb_unload_buddy(e4b); 2340 - return 0; 2341 - } 2342 - 2343 2312 ext4_lock_group(ac->ac_sb, group); 2313 + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) 2314 + goto out; 2315 + 2344 2316 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, 2345 2317 ac->ac_g_ex.fe_len, &ex); 2346 2318 ex.fe_logical = 0xDEADFA11; /* debug value */ ··· 2371 2347 ac->ac_b_ex = ex; 2372 2348 ext4_mb_use_best_found(ac, e4b); 2373 2349 } 2350 + out: 2374 2351 ext4_unlock_group(ac->ac_sb, group); 2375 2352 ext4_mb_unload_buddy(e4b); 2376 2353 ··· 2405 2380 2406 2381 k = mb_find_next_zero_bit(buddy, max, 0); 2407 2382 if (k >= max) { 2383 + ext4_mark_group_bitmap_corrupted(ac->ac_sb, 2384 + e4b->bd_group, 2385 + EXT4_GROUP_INFO_BBITMAP_CORRUPT); 2408 2386 ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0, 2409 2387 "%d free clusters of order %d. But found 0", 2410 2388 grp->bb_counters[i], i); 2411 - ext4_mark_group_bitmap_corrupted(ac->ac_sb, 2412 - e4b->bd_group, 2413 - EXT4_GROUP_INFO_BBITMAP_CORRUPT); 2414 2389 break; 2415 2390 } 2416 2391 ac->ac_found++; ··· 2461 2436 * free blocks even though group info says we 2462 2437 * have free blocks 2463 2438 */ 2439 + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, 2440 + EXT4_GROUP_INFO_BBITMAP_CORRUPT); 2464 2441 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 2465 2442 "%d free clusters as per " 2466 2443 "group info. But bitmap says 0", 2467 2444 free); 2468 - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, 2469 - EXT4_GROUP_INFO_BBITMAP_CORRUPT); 2470 2445 break; 2471 2446 } 2472 2447 ··· 2492 2467 if (WARN_ON(ex.fe_len <= 0)) 2493 2468 break; 2494 2469 if (free < ex.fe_len) { 2470 + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, 2471 + EXT4_GROUP_INFO_BBITMAP_CORRUPT); 2495 2472 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 2496 2473 "%d free clusters as per " 2497 2474 "group info. But got %d blocks", 2498 2475 free, ex.fe_len); 2499 - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, 2500 - EXT4_GROUP_INFO_BBITMAP_CORRUPT); 2501 2476 /* 2502 2477 * The number of free blocks differs. This mostly 2503 2478 * indicate that the bitmap is corrupt. So exit ··· 3750 3725 return count; 3751 3726 } 3752 3727 3753 - int ext4_mb_release(struct super_block *sb) 3728 + void ext4_mb_release(struct super_block *sb) 3754 3729 { 3755 3730 ext4_group_t ngroups = ext4_get_groups_count(sb); 3756 3731 ext4_group_t i; ··· 3826 3801 } 3827 3802 3828 3803 free_percpu(sbi->s_locality_groups); 3829 - 3830 - return 0; 3831 3804 } 3832 3805 3833 3806 static inline int ext4_issue_discard(struct super_block *sb, ··· 5307 5284 * the caller MUST hold group/inode locks. 5308 5285 * TODO: optimize the case when there are no in-core structures yet 5309 5286 */ 5310 - static noinline_for_stack int 5287 + static noinline_for_stack void 5311 5288 ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, 5312 5289 struct ext4_prealloc_space *pa) 5313 5290 { ··· 5357 5334 */ 5358 5335 } 5359 5336 atomic_add(free, &sbi->s_mb_discarded); 5360 - 5361 - return 0; 5362 5337 } 5363 5338 5364 - static noinline_for_stack int 5339 + static noinline_for_stack void 5365 5340 ext4_mb_release_group_pa(struct ext4_buddy *e4b, 5366 5341 struct ext4_prealloc_space *pa) 5367 5342 { ··· 5373 5352 if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { 5374 5353 ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", 5375 5354 e4b->bd_group, group, pa->pa_pstart); 5376 - return 0; 5355 + return; 5377 5356 } 5378 5357 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); 5379 5358 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); 5380 5359 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); 5381 - 5382 - return 0; 5383 5360 } 5384 5361 5385 5362 /* ··· 5498 5479 * 5499 5480 * FIXME!! Make sure it is valid at all the call sites 5500 5481 */ 5501 - void ext4_discard_preallocations(struct inode *inode, unsigned int needed) 5482 + void ext4_discard_preallocations(struct inode *inode) 5502 5483 { 5503 5484 struct ext4_inode_info *ei = EXT4_I(inode); 5504 5485 struct super_block *sb = inode->i_sb; ··· 5510 5491 struct rb_node *iter; 5511 5492 int err; 5512 5493 5513 - if (!S_ISREG(inode->i_mode)) { 5494 + if (!S_ISREG(inode->i_mode)) 5514 5495 return; 5515 - } 5516 5496 5517 5497 if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) 5518 5498 return; ··· 5519 5501 mb_debug(sb, "discard preallocation for inode %lu\n", 5520 5502 inode->i_ino); 5521 5503 trace_ext4_discard_preallocations(inode, 5522 - atomic_read(&ei->i_prealloc_active), needed); 5523 - 5524 - if (needed == 0) 5525 - needed = UINT_MAX; 5504 + atomic_read(&ei->i_prealloc_active)); 5526 5505 5527 5506 repeat: 5528 5507 /* first, collect all pa's in the inode */ 5529 5508 write_lock(&ei->i_prealloc_lock); 5530 - for (iter = rb_first(&ei->i_prealloc_node); iter && needed; 5509 + for (iter = rb_first(&ei->i_prealloc_node); iter; 5531 5510 iter = rb_next(iter)) { 5532 5511 pa = rb_entry(iter, struct ext4_prealloc_space, 5533 5512 pa_node.inode_node); ··· 5548 5533 spin_unlock(&pa->pa_lock); 5549 5534 rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node); 5550 5535 list_add(&pa->u.pa_tmp_list, &list); 5551 - needed--; 5552 5536 continue; 5553 5537 } 5554 5538 ··· 5957 5943 /* 5958 5944 * release all resource we used in allocation 5959 5945 */ 5960 - static int ext4_mb_release_context(struct ext4_allocation_context *ac) 5946 + static void ext4_mb_release_context(struct ext4_allocation_context *ac) 5961 5947 { 5962 5948 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 5963 5949 struct ext4_prealloc_space *pa = ac->ac_pa; ··· 5994 5980 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) 5995 5981 mutex_unlock(&ac->ac_lg->lg_mutex); 5996 5982 ext4_mb_collect_stats(ac); 5997 - return 0; 5998 5983 } 5999 5984 6000 5985 static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) ··· 6773 6760 ext4_grpblk_t next, count, free_count, last, origin_start; 6774 6761 bool set_trimmed = false; 6775 6762 void *bitmap; 6763 + 6764 + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) 6765 + return 0; 6776 6766 6777 6767 last = ext4_last_grp_cluster(sb, e4b->bd_group); 6778 6768 bitmap = e4b->bd_bitmap;
-1
fs/ext4/mballoc.h
··· 192 192 */ 193 193 ext4_grpblk_t ac_orig_goal_len; 194 194 195 - __u32 ac_groups_considered; 196 195 __u32 ac_flags; /* allocation hints */ 197 196 __u16 ac_groups_scanned; 198 197 __u16 ac_groups_linear_remaining;
+4 -6
fs/ext4/move_extent.c
··· 618 618 goto out; 619 619 o_end = o_start + len; 620 620 621 + *moved_len = 0; 621 622 while (o_start < o_end) { 622 623 struct ext4_extent *ex; 623 624 ext4_lblk_t cur_blk, next_blk; ··· 673 672 */ 674 673 ext4_double_up_write_data_sem(orig_inode, donor_inode); 675 674 /* Swap original branches with new branches */ 676 - move_extent_per_page(o_filp, donor_inode, 675 + *moved_len += move_extent_per_page(o_filp, donor_inode, 677 676 orig_page_index, donor_page_index, 678 677 offset_in_page, cur_len, 679 678 unwritten, &ret); ··· 683 682 o_start += cur_len; 684 683 d_start += cur_len; 685 684 } 686 - *moved_len = o_start - orig_blk; 687 - if (*moved_len > len) 688 - *moved_len = len; 689 685 690 686 out: 691 687 if (*moved_len) { 692 - ext4_discard_preallocations(orig_inode, 0); 693 - ext4_discard_preallocations(donor_inode, 0); 688 + ext4_discard_preallocations(orig_inode); 689 + ext4_discard_preallocations(donor_inode); 694 690 } 695 691 696 692 ext4_free_ext_path(path);
+1 -1
fs/ext4/super.c
··· 1525 1525 ext4_fc_del(inode); 1526 1526 invalidate_inode_buffers(inode); 1527 1527 clear_inode(inode); 1528 - ext4_discard_preallocations(inode, 0); 1528 + ext4_discard_preallocations(inode); 1529 1529 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 1530 1530 dquot_drop(inode); 1531 1531 if (EXT4_I(inode)->jinode) {
+4 -7
include/trace/events/ext4.h
··· 772 772 ); 773 773 774 774 TRACE_EVENT(ext4_discard_preallocations, 775 - TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed), 775 + TP_PROTO(struct inode *inode, unsigned int len), 776 776 777 - TP_ARGS(inode, len, needed), 777 + TP_ARGS(inode, len), 778 778 779 779 TP_STRUCT__entry( 780 780 __field( dev_t, dev ) 781 781 __field( ino_t, ino ) 782 782 __field( unsigned int, len ) 783 - __field( unsigned int, needed ) 784 783 785 784 ), 786 785 ··· 787 788 __entry->dev = inode->i_sb->s_dev; 788 789 __entry->ino = inode->i_ino; 789 790 __entry->len = len; 790 - __entry->needed = needed; 791 791 ), 792 792 793 - TP_printk("dev %d,%d ino %lu len: %u needed %u", 793 + TP_printk("dev %d,%d ino %lu len: %u", 794 794 MAJOR(__entry->dev), MINOR(__entry->dev), 795 - (unsigned long) __entry->ino, __entry->len, 796 - __entry->needed) 795 + (unsigned long) __entry->ino, __entry->len) 797 796 ); 798 797 799 798 TRACE_EVENT(ext4_mb_discard_preallocations,