Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ext4_for_linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
"Lots of cleanups and bug fixes this cycle, primarily in the block
allocation, extent management, fast commit, and journalling"

* tag 'ext4_for_linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (93 commits)
ext4: convert EXT4_B2C(sbi->s_stripe) users to EXT4_NUM_B2C
ext4: check stripe size compatibility on remount as well
ext4: fix i_data_sem unlock order in ext4_ind_migrate()
ext4: remove the special buffer dirty handling in do_journal_get_write_access
ext4: fix a potential assertion failure due to improperly dirtied buffer
ext4: hoist ext4_block_write_begin and replace the __block_write_begin
ext4: persist the new uptodate buffers in ext4_journalled_zero_new_buffers
ext4: dax: keep orphan list before truncate overflow allocated blocks
ext4: fix error message when rejecting the default hash
ext4: save unnecessary indentation in ext4_ext_create_new_leaf()
ext4: make some fast commit functions reuse extents path
ext4: refactor ext4_swap_extents() to reuse extents path
ext4: get rid of ppath in convert_initialized_extent()
ext4: get rid of ppath in ext4_ext_handle_unwritten_extents()
ext4: get rid of ppath in ext4_ext_convert_to_initialized()
ext4: get rid of ppath in ext4_convert_unwritten_extents_endio()
ext4: get rid of ppath in ext4_split_convert_extents()
ext4: get rid of ppath in ext4_split_extent()
ext4: get rid of ppath in ext4_force_split_extent_at()
ext4: get rid of ppath in ext4_split_extent_at()
...

+991 -996
-10
Documentation/admin-guide/ext4.rst
··· 212 212 that ext4's inode table readahead algorithm will pre-read into the 213 213 buffer cache. The default value is 32 blocks. 214 214 215 - nouser_xattr 216 - Disables Extended User Attributes. See the attr(5) manual page for 217 - more information about extended attributes. 218 - 219 - noacl 220 - This option disables POSIX Access Control List support. If ACL support 221 - is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL 222 - is enabled by default on mount. See the acl(5) manual page for more 223 - information about acl. 224 - 225 215 bsddf (*) 226 216 Make 'df' act like BSD. 227 217
+6 -2
fs/ext4/bitmap.c
··· 18 18 19 19 int ext4_inode_bitmap_csum_verify(struct super_block *sb, 20 20 struct ext4_group_desc *gdp, 21 - struct buffer_head *bh, int sz) 21 + struct buffer_head *bh) 22 22 { 23 23 __u32 hi; 24 24 __u32 provided, calculated; 25 25 struct ext4_sb_info *sbi = EXT4_SB(sb); 26 + int sz; 26 27 27 28 if (!ext4_has_metadata_csum(sb)) 28 29 return 1; 29 30 31 + sz = EXT4_INODES_PER_GROUP(sb) >> 3; 30 32 provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); 31 33 calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 32 34 if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) { ··· 42 40 43 41 void ext4_inode_bitmap_csum_set(struct super_block *sb, 44 42 struct ext4_group_desc *gdp, 45 - struct buffer_head *bh, int sz) 43 + struct buffer_head *bh) 46 44 { 47 45 __u32 csum; 48 46 struct ext4_sb_info *sbi = EXT4_SB(sb); 47 + int sz; 49 48 50 49 if (!ext4_has_metadata_csum(sb)) 51 50 return; 52 51 52 + sz = EXT4_INODES_PER_GROUP(sb) >> 3; 53 53 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 54 54 gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); 55 55 if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END)
+11 -3
fs/ext4/dir.c
··· 280 280 struct fscrypt_str de_name = 281 281 FSTR_INIT(de->name, 282 282 de->name_len); 283 + u32 hash; 284 + u32 minor_hash; 285 + 286 + if (IS_CASEFOLDED(inode)) { 287 + hash = EXT4_DIRENT_HASH(de); 288 + minor_hash = EXT4_DIRENT_MINOR_HASH(de); 289 + } else { 290 + hash = 0; 291 + minor_hash = 0; 292 + } 283 293 284 294 /* Directory is encrypted */ 285 295 err = fscrypt_fname_disk_to_usr(inode, 286 - EXT4_DIRENT_HASH(de), 287 - EXT4_DIRENT_MINOR_HASH(de), 288 - &de_name, &fstr); 296 + hash, minor_hash, &de_name, &fstr); 289 297 de_name = fstr; 290 298 fstr.len = save_len; 291 299 if (err)
+18 -13
fs/ext4/ext4.h
··· 1058 1058 1059 1059 /* Number of ongoing updates on this inode */ 1060 1060 atomic_t i_fc_updates; 1061 + atomic_t i_unwritten; /* Nr. of inflight conversions pending */ 1061 1062 1062 1063 /* Fast commit wait queue for this inode */ 1063 1064 wait_queue_head_t i_fc_wait; ··· 1107 1106 1108 1107 /* mballoc */ 1109 1108 atomic_t i_prealloc_active; 1109 + 1110 + /* allocation reservation info for delalloc */ 1111 + /* In case of bigalloc, this refer to clusters rather than blocks */ 1112 + unsigned int i_reserved_data_blocks; 1110 1113 struct rb_root i_prealloc_node; 1111 1114 rwlock_t i_prealloc_lock; 1112 1115 ··· 1126 1121 1127 1122 /* ialloc */ 1128 1123 ext4_group_t i_last_alloc_group; 1129 - 1130 - /* allocation reservation info for delalloc */ 1131 - /* In case of bigalloc, this refer to clusters rather than blocks */ 1132 - unsigned int i_reserved_data_blocks; 1133 1124 1134 1125 /* pending cluster reservations for bigalloc file systems */ 1135 1126 struct ext4_pending_tree i_pending_tree; ··· 1150 1149 */ 1151 1150 struct list_head i_rsv_conversion_list; 1152 1151 struct work_struct i_rsv_conversion_work; 1153 - atomic_t i_unwritten; /* Nr. of inflight conversions pending */ 1154 1152 1155 1153 spinlock_t i_block_reservation_lock; 1156 1154 ··· 2338 2338 ((struct ext4_dir_entry_hash *) \ 2339 2339 (((void *)(entry)) + \ 2340 2340 ((8 + (entry)->name_len + EXT4_DIR_ROUND) & ~EXT4_DIR_ROUND))) 2341 - #define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(de)->hash) 2341 + #define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(entry)->hash) 2342 2342 #define EXT4_DIRENT_MINOR_HASH(entry) \ 2343 - le32_to_cpu(EXT4_DIRENT_HASHES(de)->minor_hash) 2343 + le32_to_cpu(EXT4_DIRENT_HASHES(entry)->minor_hash) 2344 2344 2345 2345 static inline bool ext4_hash_in_dirent(const struct inode *inode) 2346 2346 { ··· 2462 2462 #define DX_HASH_HALF_MD4_UNSIGNED 4 2463 2463 #define DX_HASH_TEA_UNSIGNED 5 2464 2464 #define DX_HASH_SIPHASH 6 2465 + #define DX_HASH_LAST DX_HASH_SIPHASH 2465 2466 2466 2467 static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, 2467 2468 const void *address, unsigned int length) ··· 2696 2695 extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); 2697 2696 void ext4_inode_bitmap_csum_set(struct super_block *sb, 2698 2697 struct ext4_group_desc *gdp, 2699 - struct buffer_head *bh, int sz); 2698 + struct buffer_head *bh); 2700 2699 int ext4_inode_bitmap_csum_verify(struct super_block *sb, 2701 2700 struct ext4_group_desc *gdp, 2702 - struct buffer_head *bh, int sz); 2701 + struct buffer_head *bh); 2703 2702 void ext4_block_bitmap_csum_set(struct super_block *sb, 2704 2703 struct ext4_group_desc *gdp, 2705 2704 struct buffer_head *bh); ··· 3713 3712 extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 3714 3713 int num, 3715 3714 struct ext4_ext_path *path); 3716 - extern int ext4_ext_insert_extent(handle_t *, struct inode *, 3717 - struct ext4_ext_path **, 3718 - struct ext4_extent *, int); 3715 + extern struct ext4_ext_path *ext4_ext_insert_extent( 3716 + handle_t *handle, struct inode *inode, 3717 + struct ext4_ext_path *path, 3718 + struct ext4_extent *newext, int gb_flags); 3719 3719 extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, 3720 - struct ext4_ext_path **, 3720 + struct ext4_ext_path *, 3721 3721 int flags); 3722 3722 extern void ext4_free_ext_path(struct ext4_ext_path *); 3723 3723 extern int ext4_ext_check_inode(struct inode *inode); ··· 3855 3853 return buffer_uptodate(bh); 3856 3854 } 3857 3855 3856 + extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, 3857 + loff_t pos, unsigned len, 3858 + get_block_t *get_block); 3858 3859 #endif /* __KERNEL__ */ 3859 3860 3860 3861 #define EFSBADCRC EBADMSG /* Bad CRC detected */
+425 -402
fs/ext4/extents.c
··· 84 84 et->et_checksum = ext4_extent_block_csum(inode, eh); 85 85 } 86 86 87 - static int ext4_split_extent_at(handle_t *handle, 88 - struct inode *inode, 89 - struct ext4_ext_path **ppath, 90 - ext4_lblk_t split, 91 - int split_flag, 92 - int flags); 87 + static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, 88 + struct inode *inode, 89 + struct ext4_ext_path *path, 90 + ext4_lblk_t split, 91 + int split_flag, int flags); 93 92 94 93 static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) 95 94 { ··· 105 106 return 0; 106 107 } 107 108 109 + static inline void ext4_ext_path_brelse(struct ext4_ext_path *path) 110 + { 111 + brelse(path->p_bh); 112 + path->p_bh = NULL; 113 + } 114 + 108 115 static void ext4_ext_drop_refs(struct ext4_ext_path *path) 109 116 { 110 117 int depth, i; 111 118 112 - if (!path) 119 + if (IS_ERR_OR_NULL(path)) 113 120 return; 114 121 depth = path->p_depth; 115 - for (i = 0; i <= depth; i++, path++) { 116 - brelse(path->p_bh); 117 - path->p_bh = NULL; 118 - } 122 + for (i = 0; i <= depth; i++, path++) 123 + ext4_ext_path_brelse(path); 119 124 } 120 125 121 126 void ext4_free_ext_path(struct ext4_ext_path *path) 122 127 { 128 + if (IS_ERR_OR_NULL(path)) 129 + return; 123 130 ext4_ext_drop_refs(path); 124 131 kfree(path); 125 132 } ··· 328 323 return size; 329 324 } 330 325 331 - static inline int 326 + static inline struct ext4_ext_path * 332 327 ext4_force_split_extent_at(handle_t *handle, struct inode *inode, 333 - struct ext4_ext_path **ppath, ext4_lblk_t lblk, 328 + struct ext4_ext_path *path, ext4_lblk_t lblk, 334 329 int nofail) 335 330 { 336 - struct ext4_ext_path *path = *ppath; 337 331 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); 338 332 int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO; 339 333 340 334 if (nofail) 341 335 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL; 342 336 343 - return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? 337 + return ext4_split_extent_at(handle, inode, path, lblk, unwritten ? 344 338 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, 345 339 flags); 346 340 } ··· 639 635 */ 640 636 if ((i == depth) || 641 637 path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) { 642 - brelse(path[i].p_bh); 643 - path[i].p_bh = NULL; 638 + ext4_ext_path_brelse(path + i); 644 639 i--; 645 640 continue; 646 641 } ··· 692 689 struct ext4_extent *ex; 693 690 int i; 694 691 695 - if (!path) 692 + if (IS_ERR_OR_NULL(path)) 696 693 return; 697 694 698 695 eh = path[depth].p_hdr; ··· 884 881 885 882 struct ext4_ext_path * 886 883 ext4_find_extent(struct inode *inode, ext4_lblk_t block, 887 - struct ext4_ext_path **orig_path, int flags) 884 + struct ext4_ext_path *path, int flags) 888 885 { 889 886 struct ext4_extent_header *eh; 890 887 struct buffer_head *bh; 891 - struct ext4_ext_path *path = orig_path ? *orig_path : NULL; 892 888 short int depth, i, ppos = 0; 893 889 int ret; 894 890 gfp_t gfp_flags = GFP_NOFS; ··· 908 906 ext4_ext_drop_refs(path); 909 907 if (depth > path[0].p_maxdepth) { 910 908 kfree(path); 911 - *orig_path = path = NULL; 909 + path = NULL; 912 910 } 913 911 } 914 912 if (!path) { ··· 963 961 964 962 err: 965 963 ext4_free_ext_path(path); 966 - if (orig_path) 967 - *orig_path = NULL; 968 964 return ERR_PTR(ret); 969 965 } 970 966 ··· 1395 1395 * finds empty index and adds new leaf. 1396 1396 * if no free index is found, then it requests in-depth growing. 1397 1397 */ 1398 - static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1399 - unsigned int mb_flags, 1400 - unsigned int gb_flags, 1401 - struct ext4_ext_path **ppath, 1402 - struct ext4_extent *newext) 1398 + static struct ext4_ext_path * 1399 + ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1400 + unsigned int mb_flags, unsigned int gb_flags, 1401 + struct ext4_ext_path *path, 1402 + struct ext4_extent *newext) 1403 1403 { 1404 - struct ext4_ext_path *path = *ppath; 1405 1404 struct ext4_ext_path *curp; 1406 1405 int depth, i, err = 0; 1406 + ext4_lblk_t ee_block = le32_to_cpu(newext->ee_block); 1407 1407 1408 1408 repeat: 1409 1409 i = depth = ext_depth(inode); ··· 1422 1422 * entry: create all needed subtree and add new leaf */ 1423 1423 err = ext4_ext_split(handle, inode, mb_flags, path, newext, i); 1424 1424 if (err) 1425 - goto out; 1425 + goto errout; 1426 1426 1427 1427 /* refill path */ 1428 - path = ext4_find_extent(inode, 1429 - (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1430 - ppath, gb_flags); 1431 - if (IS_ERR(path)) 1432 - err = PTR_ERR(path); 1433 - } else { 1434 - /* tree is full, time to grow in depth */ 1435 - err = ext4_ext_grow_indepth(handle, inode, mb_flags); 1436 - if (err) 1437 - goto out; 1438 - 1439 - /* refill path */ 1440 - path = ext4_find_extent(inode, 1441 - (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1442 - ppath, gb_flags); 1443 - if (IS_ERR(path)) { 1444 - err = PTR_ERR(path); 1445 - goto out; 1446 - } 1447 - 1448 - /* 1449 - * only first (depth 0 -> 1) produces free space; 1450 - * in all other cases we have to split the grown tree 1451 - */ 1452 - depth = ext_depth(inode); 1453 - if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { 1454 - /* now we need to split */ 1455 - goto repeat; 1456 - } 1428 + path = ext4_find_extent(inode, ee_block, path, gb_flags); 1429 + return path; 1457 1430 } 1458 1431 1459 - out: 1460 - return err; 1432 + /* tree is full, time to grow in depth */ 1433 + err = ext4_ext_grow_indepth(handle, inode, mb_flags); 1434 + if (err) 1435 + goto errout; 1436 + 1437 + /* refill path */ 1438 + path = ext4_find_extent(inode, ee_block, path, gb_flags); 1439 + if (IS_ERR(path)) 1440 + return path; 1441 + 1442 + /* 1443 + * only first (depth 0 -> 1) produces free space; 1444 + * in all other cases we have to split the grown tree 1445 + */ 1446 + depth = ext_depth(inode); 1447 + if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { 1448 + /* now we need to split */ 1449 + goto repeat; 1450 + } 1451 + 1452 + return path; 1453 + 1454 + errout: 1455 + ext4_free_ext_path(path); 1456 + return ERR_PTR(err); 1461 1457 } 1462 1458 1463 1459 /* ··· 1745 1749 break; 1746 1750 err = ext4_ext_get_access(handle, inode, path + k); 1747 1751 if (err) 1748 - break; 1752 + goto clean; 1749 1753 path[k].p_idx->ei_block = border; 1750 1754 err = ext4_ext_dirty(handle, inode, path + k); 1751 1755 if (err) 1752 - break; 1756 + goto clean; 1753 1757 } 1758 + return 0; 1759 + 1760 + clean: 1761 + /* 1762 + * The path[k].p_bh is either unmodified or with no verified bit 1763 + * set (see ext4_ext_get_access()). So just clear the verified bit 1764 + * of the successfully modified extents buffers, which will force 1765 + * these extents to be checked to avoid using inconsistent data. 1766 + */ 1767 + while (++k < depth) 1768 + clear_buffer_verified(path[k].p_bh); 1754 1769 1755 1770 return err; 1756 1771 } ··· 1883 1876 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); 1884 1877 path[0].p_hdr->eh_max = cpu_to_le16(max_root); 1885 1878 1886 - brelse(path[1].p_bh); 1879 + ext4_ext_path_brelse(path + 1); 1887 1880 ext4_free_blocks(handle, inode, NULL, blk, 1, 1888 1881 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 1889 1882 } ··· 1971 1964 * inserts requested extent as new one into the tree, 1972 1965 * creating new leaf in the no-space case. 1973 1966 */ 1974 - int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1975 - struct ext4_ext_path **ppath, 1976 - struct ext4_extent *newext, int gb_flags) 1967 + struct ext4_ext_path * 1968 + ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1969 + struct ext4_ext_path *path, 1970 + struct ext4_extent *newext, int gb_flags) 1977 1971 { 1978 - struct ext4_ext_path *path = *ppath; 1979 1972 struct ext4_extent_header *eh; 1980 1973 struct ext4_extent *ex, *fex; 1981 1974 struct ext4_extent *nearex; /* nearest extent */ 1982 - struct ext4_ext_path *npath = NULL; 1983 - int depth, len, err; 1975 + int depth, len, err = 0; 1984 1976 ext4_lblk_t next; 1985 1977 int mb_flags = 0, unwritten; 1986 1978 ··· 1987 1981 mb_flags |= EXT4_MB_DELALLOC_RESERVED; 1988 1982 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1989 1983 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1990 - return -EFSCORRUPTED; 1984 + err = -EFSCORRUPTED; 1985 + goto errout; 1991 1986 } 1992 1987 depth = ext_depth(inode); 1993 1988 ex = path[depth].p_ext; 1994 1989 eh = path[depth].p_hdr; 1995 1990 if (unlikely(path[depth].p_hdr == NULL)) { 1996 1991 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1997 - return -EFSCORRUPTED; 1992 + err = -EFSCORRUPTED; 1993 + goto errout; 1998 1994 } 1999 1995 2000 1996 /* try to insert block into found extent and return */ ··· 2034 2026 err = ext4_ext_get_access(handle, inode, 2035 2027 path + depth); 2036 2028 if (err) 2037 - return err; 2029 + goto errout; 2038 2030 unwritten = ext4_ext_is_unwritten(ex); 2039 2031 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 2040 2032 + ext4_ext_get_actual_len(newext)); ··· 2059 2051 err = ext4_ext_get_access(handle, inode, 2060 2052 path + depth); 2061 2053 if (err) 2062 - return err; 2054 + goto errout; 2063 2055 2064 2056 unwritten = ext4_ext_is_unwritten(ex); 2065 2057 ex->ee_block = newext->ee_block; ··· 2084 2076 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) 2085 2077 next = ext4_ext_next_leaf_block(path); 2086 2078 if (next != EXT_MAX_BLOCKS) { 2079 + struct ext4_ext_path *npath; 2080 + 2087 2081 ext_debug(inode, "next leaf block - %u\n", next); 2088 - BUG_ON(npath != NULL); 2089 2082 npath = ext4_find_extent(inode, next, NULL, gb_flags); 2090 - if (IS_ERR(npath)) 2091 - return PTR_ERR(npath); 2083 + if (IS_ERR(npath)) { 2084 + err = PTR_ERR(npath); 2085 + goto errout; 2086 + } 2092 2087 BUG_ON(npath->p_depth != path->p_depth); 2093 2088 eh = npath[depth].p_hdr; 2094 2089 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 2095 2090 ext_debug(inode, "next leaf isn't full(%d)\n", 2096 2091 le16_to_cpu(eh->eh_entries)); 2092 + ext4_free_ext_path(path); 2097 2093 path = npath; 2098 2094 goto has_space; 2099 2095 } 2100 2096 ext_debug(inode, "next leaf has no free space(%d,%d)\n", 2101 2097 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 2098 + ext4_free_ext_path(npath); 2102 2099 } 2103 2100 2104 2101 /* ··· 2112 2099 */ 2113 2100 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) 2114 2101 mb_flags |= EXT4_MB_USE_RESERVED; 2115 - err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, 2116 - ppath, newext); 2117 - if (err) 2118 - goto cleanup; 2102 + path = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, 2103 + path, newext); 2104 + if (IS_ERR(path)) 2105 + return path; 2119 2106 depth = ext_depth(inode); 2120 2107 eh = path[depth].p_hdr; 2121 2108 ··· 2124 2111 2125 2112 err = ext4_ext_get_access(handle, inode, path + depth); 2126 2113 if (err) 2127 - goto cleanup; 2114 + goto errout; 2128 2115 2129 2116 if (!nearex) { 2130 2117 /* there is no extent in this leaf, create first one */ ··· 2182 2169 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) 2183 2170 ext4_ext_try_to_merge(handle, inode, path, nearex); 2184 2171 2185 - 2186 2172 /* time to correct all indexes above */ 2187 2173 err = ext4_ext_correct_indexes(handle, inode, path); 2188 2174 if (err) 2189 - goto cleanup; 2175 + goto errout; 2190 2176 2191 2177 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 2178 + if (err) 2179 + goto errout; 2192 2180 2193 - cleanup: 2194 - ext4_free_ext_path(npath); 2195 - return err; 2181 + return path; 2182 + 2183 + errout: 2184 + ext4_free_ext_path(path); 2185 + return ERR_PTR(err); 2196 2186 } 2197 2187 2198 2188 static int ext4_fill_es_cache_info(struct inode *inode, ··· 2295 2279 { 2296 2280 int err; 2297 2281 ext4_fsblk_t leaf; 2282 + int k = depth - 1; 2298 2283 2299 2284 /* free index block */ 2300 - depth--; 2301 - path = path + depth; 2302 - leaf = ext4_idx_pblock(path->p_idx); 2303 - if (unlikely(path->p_hdr->eh_entries == 0)) { 2304 - EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); 2285 + leaf = ext4_idx_pblock(path[k].p_idx); 2286 + if (unlikely(path[k].p_hdr->eh_entries == 0)) { 2287 + EXT4_ERROR_INODE(inode, "path[%d].p_hdr->eh_entries == 0", k); 2305 2288 return -EFSCORRUPTED; 2306 2289 } 2307 - err = ext4_ext_get_access(handle, inode, path); 2290 + err = ext4_ext_get_access(handle, inode, path + k); 2308 2291 if (err) 2309 2292 return err; 2310 2293 2311 - if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) { 2312 - int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx; 2294 + if (path[k].p_idx != EXT_LAST_INDEX(path[k].p_hdr)) { 2295 + int len = EXT_LAST_INDEX(path[k].p_hdr) - path[k].p_idx; 2313 2296 len *= sizeof(struct ext4_extent_idx); 2314 - memmove(path->p_idx, path->p_idx + 1, len); 2297 + memmove(path[k].p_idx, path[k].p_idx + 1, len); 2315 2298 } 2316 2299 2317 - le16_add_cpu(&path->p_hdr->eh_entries, -1); 2318 - err = ext4_ext_dirty(handle, inode, path); 2300 + le16_add_cpu(&path[k].p_hdr->eh_entries, -1); 2301 + err = ext4_ext_dirty(handle, inode, path + k); 2319 2302 if (err) 2320 2303 return err; 2321 2304 ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf); ··· 2323 2308 ext4_free_blocks(handle, inode, NULL, leaf, 1, 2324 2309 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2325 2310 2326 - while (--depth >= 0) { 2327 - if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) 2311 + while (--k >= 0) { 2312 + if (path[k + 1].p_idx != EXT_FIRST_INDEX(path[k + 1].p_hdr)) 2328 2313 break; 2329 - path--; 2330 - err = ext4_ext_get_access(handle, inode, path); 2314 + err = ext4_ext_get_access(handle, inode, path + k); 2331 2315 if (err) 2332 - break; 2333 - path->p_idx->ei_block = (path+1)->p_idx->ei_block; 2334 - err = ext4_ext_dirty(handle, inode, path); 2316 + goto clean; 2317 + path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block; 2318 + err = ext4_ext_dirty(handle, inode, path + k); 2335 2319 if (err) 2336 - break; 2320 + goto clean; 2337 2321 } 2322 + return 0; 2323 + 2324 + clean: 2325 + /* 2326 + * The path[k].p_bh is either unmodified or with no verified bit 2327 + * set (see ext4_ext_get_access()). So just clear the verified bit 2328 + * of the successfully modified extents buffers, which will force 2329 + * these extents to be checked to avoid using inconsistent data. 2330 + */ 2331 + while (++k < depth) 2332 + clear_buffer_verified(path[k].p_bh); 2333 + 2338 2334 return err; 2339 2335 } 2340 2336 ··· 2898 2872 * fail removing space due to ENOSPC so try to use 2899 2873 * reserved block if that happens. 2900 2874 */ 2901 - err = ext4_force_split_extent_at(handle, inode, &path, 2902 - end + 1, 1); 2903 - if (err < 0) 2875 + path = ext4_force_split_extent_at(handle, inode, path, 2876 + end + 1, 1); 2877 + if (IS_ERR(path)) { 2878 + err = PTR_ERR(path); 2904 2879 goto out; 2905 - 2880 + } 2906 2881 } else if (sbi->s_cluster_ratio > 1 && end >= ex_end && 2907 2882 partial.state == initial) { 2908 2883 /* ··· 2961 2934 err = ext4_ext_rm_leaf(handle, inode, path, 2962 2935 &partial, start, end); 2963 2936 /* root level has p_bh == NULL, brelse() eats this */ 2964 - brelse(path[i].p_bh); 2965 - path[i].p_bh = NULL; 2937 + ext4_ext_path_brelse(path + i); 2966 2938 i--; 2967 2939 continue; 2968 2940 } ··· 3023 2997 err = ext4_ext_rm_idx(handle, inode, path, i); 3024 2998 } 3025 2999 /* root level has p_bh == NULL, brelse() eats this */ 3026 - brelse(path[i].p_bh); 3027 - path[i].p_bh = NULL; 3000 + ext4_ext_path_brelse(path + i); 3028 3001 i--; 3029 3002 ext_debug(inode, "return to level %d\n", i); 3030 3003 } ··· 3138 3113 return; 3139 3114 3140 3115 ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, 3141 - EXTENT_STATUS_WRITTEN); 3116 + EXTENT_STATUS_WRITTEN, 0); 3142 3117 } 3143 3118 3144 3119 /* FIXME!! we need to try to merge to left or right after zero-out */ ··· 3172 3147 * a> the extent are splitted into two extent. 3173 3148 * b> split is not needed, and just mark the extent. 3174 3149 * 3175 - * return 0 on success. 3150 + * Return an extent path pointer on success, or an error pointer on failure. 3176 3151 */ 3177 - static int ext4_split_extent_at(handle_t *handle, 3178 - struct inode *inode, 3179 - struct ext4_ext_path **ppath, 3180 - ext4_lblk_t split, 3181 - int split_flag, 3182 - int flags) 3152 + static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, 3153 + struct inode *inode, 3154 + struct ext4_ext_path *path, 3155 + ext4_lblk_t split, 3156 + int split_flag, int flags) 3183 3157 { 3184 - struct ext4_ext_path *path = *ppath; 3185 3158 ext4_fsblk_t newblock; 3186 3159 ext4_lblk_t ee_block; 3187 3160 struct ext4_extent *ex, newex, orig_ex, zero_ex; ··· 3249 3226 if (split_flag & EXT4_EXT_MARK_UNWRIT2) 3250 3227 ext4_ext_mark_unwritten(ex2); 3251 3228 3252 - err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); 3253 - if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM) 3229 + path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3230 + if (!IS_ERR(path)) 3254 3231 goto out; 3232 + 3233 + err = PTR_ERR(path); 3234 + if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM) 3235 + return path; 3236 + 3237 + /* 3238 + * Get a new path to try to zeroout or fix the extent length. 3239 + * Using EXT4_EX_NOFAIL guarantees that ext4_find_extent() 3240 + * will not return -ENOMEM, otherwise -ENOMEM will cause a 3241 + * retry in do_writepages(), and a WARN_ON may be triggered 3242 + * in ext4_da_update_reserve_space() due to an incorrect 3243 + * ee_len causing the i_reserved_data_blocks exception. 3244 + */ 3245 + path = ext4_find_extent(inode, ee_block, NULL, flags | EXT4_EX_NOFAIL); 3246 + if (IS_ERR(path)) { 3247 + EXT4_ERROR_INODE(inode, "Failed split extent on %u, err %ld", 3248 + split, PTR_ERR(path)); 3249 + return path; 3250 + } 3251 + depth = ext_depth(inode); 3252 + ex = path[depth].p_ext; 3255 3253 3256 3254 if (EXT4_EXT_MAY_ZEROOUT & split_flag) { 3257 3255 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { ··· 3324 3280 * and err is a non-zero error code. 3325 3281 */ 3326 3282 ext4_ext_dirty(handle, inode, path + path->p_depth); 3327 - return err; 3328 3283 out: 3284 + if (err) { 3285 + ext4_free_ext_path(path); 3286 + path = ERR_PTR(err); 3287 + } 3329 3288 ext4_ext_show_leaf(inode, path); 3330 - return err; 3289 + return path; 3331 3290 } 3332 3291 3333 3292 /* 3334 - * ext4_split_extents() splits an extent and mark extent which is covered 3293 + * ext4_split_extent() splits an extent and mark extent which is covered 3335 3294 * by @map as split_flags indicates 3336 3295 * 3337 3296 * It may result in splitting the extent into multiple extents (up to three) ··· 3344 3297 * c> Splits in three extents: Somone is splitting in middle of the extent 3345 3298 * 3346 3299 */ 3347 - static int ext4_split_extent(handle_t *handle, 3348 - struct inode *inode, 3349 - struct ext4_ext_path **ppath, 3350 - struct ext4_map_blocks *map, 3351 - int split_flag, 3352 - int flags) 3300 + static struct ext4_ext_path *ext4_split_extent(handle_t *handle, 3301 + struct inode *inode, 3302 + struct ext4_ext_path *path, 3303 + struct ext4_map_blocks *map, 3304 + int split_flag, int flags, 3305 + unsigned int *allocated) 3353 3306 { 3354 - struct ext4_ext_path *path = *ppath; 3355 3307 ext4_lblk_t ee_block; 3356 3308 struct ext4_extent *ex; 3357 3309 unsigned int ee_len, depth; 3358 - int err = 0; 3359 3310 int unwritten; 3360 3311 int split_flag1, flags1; 3361 - int allocated = map->m_len; 3362 3312 3363 3313 depth = ext_depth(inode); 3364 3314 ex = path[depth].p_ext; ··· 3371 3327 EXT4_EXT_MARK_UNWRIT2; 3372 3328 if (split_flag & EXT4_EXT_DATA_VALID2) 3373 3329 split_flag1 |= EXT4_EXT_DATA_VALID1; 3374 - err = ext4_split_extent_at(handle, inode, ppath, 3330 + path = ext4_split_extent_at(handle, inode, path, 3375 3331 map->m_lblk + map->m_len, split_flag1, flags1); 3376 - if (err) 3377 - goto out; 3378 - } else { 3379 - allocated = ee_len - (map->m_lblk - ee_block); 3332 + if (IS_ERR(path)) 3333 + return path; 3334 + /* 3335 + * Update path is required because previous ext4_split_extent_at 3336 + * may result in split of original leaf or extent zeroout. 3337 + */ 3338 + path = ext4_find_extent(inode, map->m_lblk, path, flags); 3339 + if (IS_ERR(path)) 3340 + return path; 3341 + depth = ext_depth(inode); 3342 + ex = path[depth].p_ext; 3343 + if (!ex) { 3344 + EXT4_ERROR_INODE(inode, "unexpected hole at %lu", 3345 + (unsigned long) map->m_lblk); 3346 + ext4_free_ext_path(path); 3347 + return ERR_PTR(-EFSCORRUPTED); 3348 + } 3349 + unwritten = ext4_ext_is_unwritten(ex); 3380 3350 } 3381 - /* 3382 - * Update path is required because previous ext4_split_extent_at() may 3383 - * result in split of original leaf or extent zeroout. 3384 - */ 3385 - path = ext4_find_extent(inode, map->m_lblk, ppath, flags); 3386 - if (IS_ERR(path)) 3387 - return PTR_ERR(path); 3388 - depth = ext_depth(inode); 3389 - ex = path[depth].p_ext; 3390 - if (!ex) { 3391 - EXT4_ERROR_INODE(inode, "unexpected hole at %lu", 3392 - (unsigned long) map->m_lblk); 3393 - return -EFSCORRUPTED; 3394 - } 3395 - unwritten = ext4_ext_is_unwritten(ex); 3396 3351 3397 3352 if (map->m_lblk >= ee_block) { 3398 3353 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; ··· 3400 3357 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | 3401 3358 EXT4_EXT_MARK_UNWRIT2); 3402 3359 } 3403 - err = ext4_split_extent_at(handle, inode, ppath, 3360 + path = ext4_split_extent_at(handle, inode, path, 3404 3361 map->m_lblk, split_flag1, flags); 3405 - if (err) 3406 - goto out; 3362 + if (IS_ERR(path)) 3363 + return path; 3407 3364 } 3408 3365 3366 + if (allocated) { 3367 + if (map->m_lblk + map->m_len > ee_block + ee_len) 3368 + *allocated = ee_len - (map->m_lblk - ee_block); 3369 + else 3370 + *allocated = map->m_len; 3371 + } 3409 3372 ext4_ext_show_leaf(inode, path); 3410 - out: 3411 - return err ? err : allocated; 3373 + return path; 3412 3374 } 3413 3375 3414 3376 /* ··· 3436 3388 * that are allocated and initialized. 3437 3389 * It is guaranteed to be >= map->m_len. 3438 3390 */ 3439 - static int ext4_ext_convert_to_initialized(handle_t *handle, 3440 - struct inode *inode, 3441 - struct ext4_map_blocks *map, 3442 - struct ext4_ext_path **ppath, 3443 - int flags) 3391 + static struct ext4_ext_path * 3392 + ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, 3393 + struct ext4_map_blocks *map, struct ext4_ext_path *path, 3394 + int flags, unsigned int *allocated) 3444 3395 { 3445 - struct ext4_ext_path *path = *ppath; 3446 3396 struct ext4_sb_info *sbi; 3447 3397 struct ext4_extent_header *eh; 3448 3398 struct ext4_map_blocks split_map; ··· 3450 3404 unsigned int ee_len, depth, map_len = map->m_len; 3451 3405 int err = 0; 3452 3406 int split_flag = EXT4_EXT_DATA_VALID2; 3453 - int allocated = 0; 3454 3407 unsigned int max_zeroout = 0; 3455 3408 3456 3409 ext_debug(inode, "logical block %llu, max_blocks %u\n", ··· 3490 3445 * - L2: we only attempt to merge with an extent stored in the 3491 3446 * same extent tree node. 3492 3447 */ 3448 + *allocated = 0; 3493 3449 if ((map->m_lblk == ee_block) && 3494 3450 /* See if we can merge left */ 3495 3451 (map_len < ee_len) && /*L1*/ ··· 3520 3474 (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ 3521 3475 err = ext4_ext_get_access(handle, inode, path + depth); 3522 3476 if (err) 3523 - goto out; 3477 + goto errout; 3524 3478 3525 3479 trace_ext4_ext_convert_to_initialized_fastpath(inode, 3526 3480 map, ex, abut_ex); ··· 3535 3489 abut_ex->ee_len = cpu_to_le16(prev_len + map_len); 3536 3490 3537 3491 /* Result: number of initialized blocks past m_lblk */ 3538 - allocated = map_len; 3492 + *allocated = map_len; 3539 3493 } 3540 3494 } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) && 3541 3495 (map_len < ee_len) && /*L1*/ ··· 3566 3520 (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ 3567 3521 err = ext4_ext_get_access(handle, inode, path + depth); 3568 3522 if (err) 3569 - goto out; 3523 + goto errout; 3570 3524 3571 3525 trace_ext4_ext_convert_to_initialized_fastpath(inode, 3572 3526 map, ex, abut_ex); ··· 3581 3535 abut_ex->ee_len = cpu_to_le16(next_len + map_len); 3582 3536 3583 3537 /* Result: number of initialized blocks past m_lblk */ 3584 - allocated = map_len; 3538 + *allocated = map_len; 3585 3539 } 3586 3540 } 3587 - if (allocated) { 3541 + if (*allocated) { 3588 3542 /* Mark the block containing both extents as dirty */ 3589 3543 err = ext4_ext_dirty(handle, inode, path + depth); 3590 3544 3591 3545 /* Update path to point to the right extent */ 3592 3546 path[depth].p_ext = abut_ex; 3547 + if (err) 3548 + goto errout; 3593 3549 goto out; 3594 3550 } else 3595 - allocated = ee_len - (map->m_lblk - ee_block); 3551 + *allocated = ee_len - (map->m_lblk - ee_block); 3596 3552 3597 3553 WARN_ON(map->m_lblk < ee_block); 3598 3554 /* ··· 3621 3573 split_map.m_lblk = map->m_lblk; 3622 3574 split_map.m_len = map->m_len; 3623 3575 3624 - if (max_zeroout && (allocated > split_map.m_len)) { 3625 - if (allocated <= max_zeroout) { 3576 + if (max_zeroout && (*allocated > split_map.m_len)) { 3577 + if (*allocated <= max_zeroout) { 3626 3578 /* case 3 or 5 */ 3627 3579 zero_ex1.ee_block = 3628 3580 cpu_to_le32(split_map.m_lblk + 3629 3581 split_map.m_len); 3630 3582 zero_ex1.ee_len = 3631 - cpu_to_le16(allocated - split_map.m_len); 3583 + cpu_to_le16(*allocated - split_map.m_len); 3632 3584 ext4_ext_store_pblock(&zero_ex1, 3633 3585 ext4_ext_pblock(ex) + split_map.m_lblk + 3634 3586 split_map.m_len - ee_block); 3635 3587 err = ext4_ext_zeroout(inode, &zero_ex1); 3636 3588 if (err) 3637 3589 goto fallback; 3638 - split_map.m_len = allocated; 3590 + split_map.m_len = *allocated; 3639 3591 } 3640 3592 if (split_map.m_lblk - ee_block + split_map.m_len < 3641 3593 max_zeroout) { ··· 3653 3605 3654 3606 split_map.m_len += split_map.m_lblk - ee_block; 3655 3607 split_map.m_lblk = ee_block; 3656 - allocated = map->m_len; 3608 + *allocated = map->m_len; 3657 3609 } 3658 3610 } 3659 3611 3660 3612 fallback: 3661 - err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, 3662 - flags); 3663 - if (err > 0) 3664 - err = 0; 3613 + path = ext4_split_extent(handle, inode, path, &split_map, split_flag, 3614 + flags, NULL); 3615 + if (IS_ERR(path)) 3616 + return path; 3665 3617 out: 3666 3618 /* If we have gotten a failure, don't zero out status tree */ 3667 - if (!err) { 3668 - ext4_zeroout_es(inode, &zero_ex1); 3669 - ext4_zeroout_es(inode, &zero_ex2); 3670 - } 3671 - return err ? err : allocated; 3619 + ext4_zeroout_es(inode, &zero_ex1); 3620 + ext4_zeroout_es(inode, &zero_ex2); 3621 + return path; 3622 + 3623 + errout: 3624 + ext4_free_ext_path(path); 3625 + return ERR_PTR(err); 3672 3626 } 3673 3627 3674 3628 /* ··· 3695 3645 * being filled will be convert to initialized by the end_io callback function 3696 3646 * via ext4_convert_unwritten_extents(). 3697 3647 * 3698 - * Returns the size of unwritten extent to be written on success. 3648 + * The size of unwritten extent to be written is passed to the caller via the 3649 + * allocated pointer. Return an extent path pointer on success, or an error 3650 + * pointer on failure. 3699 3651 */ 3700 - static int ext4_split_convert_extents(handle_t *handle, 3652 + static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, 3701 3653 struct inode *inode, 3702 3654 struct ext4_map_blocks *map, 3703 - struct ext4_ext_path **ppath, 3704 - int flags) 3655 + struct ext4_ext_path *path, 3656 + int flags, unsigned int *allocated) 3705 3657 { 3706 - struct ext4_ext_path *path = *ppath; 3707 3658 ext4_lblk_t eof_block; 3708 3659 ext4_lblk_t ee_block; 3709 3660 struct ext4_extent *ex; ··· 3737 3686 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); 3738 3687 } 3739 3688 flags |= EXT4_GET_BLOCKS_PRE_IO; 3740 - return ext4_split_extent(handle, inode, ppath, map, split_flag, flags); 3689 + return ext4_split_extent(handle, inode, path, map, split_flag, flags, 3690 + allocated); 3741 3691 } 3742 3692 3743 - static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3744 - struct inode *inode, 3745 - struct ext4_map_blocks *map, 3746 - struct ext4_ext_path **ppath) 3693 + static struct ext4_ext_path * 3694 + ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, 3695 + struct ext4_map_blocks *map, 3696 + struct ext4_ext_path *path) 3747 3697 { 3748 - struct ext4_ext_path *path = *ppath; 3749 3698 struct ext4_extent *ex; 3750 3699 ext4_lblk_t ee_block; 3751 3700 unsigned int ee_len; ··· 3773 3722 inode->i_ino, (unsigned long long)ee_block, ee_len, 3774 3723 (unsigned long long)map->m_lblk, map->m_len); 3775 3724 #endif 3776 - err = ext4_split_convert_extents(handle, inode, map, ppath, 3777 - EXT4_GET_BLOCKS_CONVERT); 3778 - if (err < 0) 3779 - return err; 3780 - path = ext4_find_extent(inode, map->m_lblk, ppath, 0); 3725 + path = ext4_split_convert_extents(handle, inode, map, path, 3726 + EXT4_GET_BLOCKS_CONVERT, NULL); 3781 3727 if (IS_ERR(path)) 3782 - return PTR_ERR(path); 3728 + return path; 3729 + 3730 + path = ext4_find_extent(inode, map->m_lblk, path, 0); 3731 + if (IS_ERR(path)) 3732 + return path; 3783 3733 depth = ext_depth(inode); 3784 3734 ex = path[depth].p_ext; 3785 3735 } 3786 3736 3787 3737 err = ext4_ext_get_access(handle, inode, path + depth); 3788 3738 if (err) 3789 - goto out; 3739 + goto errout; 3790 3740 /* first mark the extent as initialized */ 3791 3741 ext4_ext_mark_initialized(ex); 3792 3742 ··· 3798 3746 3799 3747 /* Mark modified extent as dirty */ 3800 3748 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 3801 - out: 3749 + if (err) 3750 + goto errout; 3751 + 3802 3752 ext4_ext_show_leaf(inode, path); 3803 - return err; 3753 + return path; 3754 + 3755 + errout: 3756 + ext4_free_ext_path(path); 3757 + return ERR_PTR(err); 3804 3758 } 3805 3759 3806 - static int 3760 + static struct ext4_ext_path * 3807 3761 convert_initialized_extent(handle_t *handle, struct inode *inode, 3808 3762 struct ext4_map_blocks *map, 3809 - struct ext4_ext_path **ppath, 3763 + struct ext4_ext_path *path, 3810 3764 unsigned int *allocated) 3811 3765 { 3812 - struct ext4_ext_path *path = *ppath; 3813 3766 struct ext4_extent *ex; 3814 3767 ext4_lblk_t ee_block; 3815 3768 unsigned int ee_len; ··· 3837 3780 (unsigned long long)ee_block, ee_len); 3838 3781 3839 3782 if (ee_block != map->m_lblk || ee_len > map->m_len) { 3840 - err = ext4_split_convert_extents(handle, inode, map, ppath, 3841 - EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); 3842 - if (err < 0) 3843 - return err; 3844 - path = ext4_find_extent(inode, map->m_lblk, ppath, 0); 3783 + path = ext4_split_convert_extents(handle, inode, map, path, 3784 + EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, NULL); 3845 3785 if (IS_ERR(path)) 3846 - return PTR_ERR(path); 3786 + return path; 3787 + 3788 + path = ext4_find_extent(inode, map->m_lblk, path, 0); 3789 + if (IS_ERR(path)) 3790 + return path; 3847 3791 depth = ext_depth(inode); 3848 3792 ex = path[depth].p_ext; 3849 3793 if (!ex) { 3850 3794 EXT4_ERROR_INODE(inode, "unexpected hole at %lu", 3851 3795 (unsigned long) map->m_lblk); 3852 - return -EFSCORRUPTED; 3796 + err = -EFSCORRUPTED; 3797 + goto errout; 3853 3798 } 3854 3799 } 3855 3800 3856 3801 err = ext4_ext_get_access(handle, inode, path + depth); 3857 3802 if (err) 3858 - return err; 3803 + goto errout; 3859 3804 /* first mark the extent as unwritten */ 3860 3805 ext4_ext_mark_unwritten(ex); 3861 3806 ··· 3869 3810 /* Mark modified extent as dirty */ 3870 3811 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 3871 3812 if (err) 3872 - return err; 3813 + goto errout; 3873 3814 ext4_ext_show_leaf(inode, path); 3874 3815 3875 3816 ext4_update_inode_fsync_trans(handle, inode, 1); ··· 3878 3819 if (*allocated > map->m_len) 3879 3820 *allocated = map->m_len; 3880 3821 map->m_len = *allocated; 3881 - return 0; 3822 + return path; 3823 + 3824 + errout: 3825 + ext4_free_ext_path(path); 3826 + return ERR_PTR(err); 3882 3827 } 3883 3828 3884 - static int 3829 + static struct ext4_ext_path * 3885 3830 ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, 3886 3831 struct ext4_map_blocks *map, 3887 - struct ext4_ext_path **ppath, int flags, 3888 - unsigned int allocated, ext4_fsblk_t newblock) 3832 + struct ext4_ext_path *path, int flags, 3833 + unsigned int *allocated, ext4_fsblk_t newblock) 3889 3834 { 3890 - struct ext4_ext_path __maybe_unused *path = *ppath; 3891 - int ret = 0; 3892 3835 int err = 0; 3893 3836 3894 3837 ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n", 3895 3838 (unsigned long long)map->m_lblk, map->m_len, flags, 3896 - allocated); 3839 + *allocated); 3897 3840 ext4_ext_show_leaf(inode, path); 3898 3841 3899 3842 /* ··· 3905 3844 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL; 3906 3845 3907 3846 trace_ext4_ext_handle_unwritten_extents(inode, map, flags, 3908 - allocated, newblock); 3847 + *allocated, newblock); 3909 3848 3910 3849 /* get_block() before submitting IO, split the extent */ 3911 3850 if (flags & EXT4_GET_BLOCKS_PRE_IO) { 3912 - ret = ext4_split_convert_extents(handle, inode, map, ppath, 3913 - flags | EXT4_GET_BLOCKS_CONVERT); 3914 - if (ret < 0) { 3915 - err = ret; 3916 - goto out2; 3917 - } 3851 + path = ext4_split_convert_extents(handle, inode, map, path, 3852 + flags | EXT4_GET_BLOCKS_CONVERT, allocated); 3853 + if (IS_ERR(path)) 3854 + return path; 3918 3855 /* 3919 - * shouldn't get a 0 return when splitting an extent unless 3856 + * shouldn't get a 0 allocated when splitting an extent unless 3920 3857 * m_len is 0 (bug) or extent has been corrupted 3921 3858 */ 3922 - if (unlikely(ret == 0)) { 3859 + if (unlikely(*allocated == 0)) { 3923 3860 EXT4_ERROR_INODE(inode, 3924 - "unexpected ret == 0, m_len = %u", 3861 + "unexpected allocated == 0, m_len = %u", 3925 3862 map->m_len); 3926 3863 err = -EFSCORRUPTED; 3927 - goto out2; 3864 + goto errout; 3928 3865 } 3929 3866 map->m_flags |= EXT4_MAP_UNWRITTEN; 3930 3867 goto out; 3931 3868 } 3932 3869 /* IO end_io complete, convert the filled extent to written */ 3933 3870 if (flags & EXT4_GET_BLOCKS_CONVERT) { 3934 - err = ext4_convert_unwritten_extents_endio(handle, inode, map, 3935 - ppath); 3936 - if (err < 0) 3937 - goto out2; 3871 + path = ext4_convert_unwritten_extents_endio(handle, inode, 3872 + map, path); 3873 + if (IS_ERR(path)) 3874 + return path; 3938 3875 ext4_update_inode_fsync_trans(handle, inode, 1); 3939 3876 goto map_out; 3940 3877 } ··· 3964 3905 * For buffered writes, at writepage time, etc. Convert a 3965 3906 * discovered unwritten extent to written. 3966 3907 */ 3967 - ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags); 3968 - if (ret < 0) { 3969 - err = ret; 3970 - goto out2; 3971 - } 3908 + path = ext4_ext_convert_to_initialized(handle, inode, map, path, 3909 + flags, allocated); 3910 + if (IS_ERR(path)) 3911 + return path; 3972 3912 ext4_update_inode_fsync_trans(handle, inode, 1); 3973 3913 /* 3974 - * shouldn't get a 0 return when converting an unwritten extent 3914 + * shouldn't get a 0 allocated when converting an unwritten extent 3975 3915 * unless m_len is 0 (bug) or extent has been corrupted 3976 3916 */ 3977 - if (unlikely(ret == 0)) { 3978 - EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u", 3917 + if (unlikely(*allocated == 0)) { 3918 + EXT4_ERROR_INODE(inode, "unexpected allocated == 0, m_len = %u", 3979 3919 map->m_len); 3980 3920 err = -EFSCORRUPTED; 3981 - goto out2; 3921 + goto errout; 3982 3922 } 3983 3923 3984 3924 out: 3985 - allocated = ret; 3986 3925 map->m_flags |= EXT4_MAP_NEW; 3987 3926 map_out: 3988 3927 map->m_flags |= EXT4_MAP_MAPPED; 3989 3928 out1: 3990 3929 map->m_pblk = newblock; 3991 - if (allocated > map->m_len) 3992 - allocated = map->m_len; 3993 - map->m_len = allocated; 3930 + if (*allocated > map->m_len) 3931 + *allocated = map->m_len; 3932 + map->m_len = *allocated; 3994 3933 ext4_ext_show_leaf(inode, path); 3995 - out2: 3996 - return err ? err : allocated; 3934 + return path; 3935 + 3936 + errout: 3937 + ext4_free_ext_path(path); 3938 + return ERR_PTR(err); 3997 3939 } 3998 3940 3999 3941 /* ··· 4157 4097 insert_hole: 4158 4098 /* Put just found gap into cache to speed up subsequent requests */ 4159 4099 ext_debug(inode, " -> %u:%u\n", hole_start, len); 4160 - ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); 4100 + ext4_es_insert_extent(inode, hole_start, len, ~0, 4101 + EXTENT_STATUS_HOLE, 0); 4161 4102 4162 4103 /* Update hole_len to reflect hole size after lblk */ 4163 4104 if (hole_start != lblk) ··· 4192 4131 struct ext4_extent newex, *ex, ex2; 4193 4132 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4194 4133 ext4_fsblk_t newblock = 0, pblk; 4195 - int err = 0, depth, ret; 4134 + int err = 0, depth; 4196 4135 unsigned int allocated = 0, offset = 0; 4197 4136 unsigned int allocated_clusters = 0; 4198 4137 struct ext4_allocation_request ar; ··· 4205 4144 path = ext4_find_extent(inode, map->m_lblk, NULL, 0); 4206 4145 if (IS_ERR(path)) { 4207 4146 err = PTR_ERR(path); 4208 - path = NULL; 4209 4147 goto out; 4210 4148 } 4211 4149 ··· 4253 4193 */ 4254 4194 if ((!ext4_ext_is_unwritten(ex)) && 4255 4195 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { 4256 - err = convert_initialized_extent(handle, 4257 - inode, map, &path, &allocated); 4196 + path = convert_initialized_extent(handle, 4197 + inode, map, path, &allocated); 4198 + if (IS_ERR(path)) 4199 + err = PTR_ERR(path); 4258 4200 goto out; 4259 4201 } else if (!ext4_ext_is_unwritten(ex)) { 4260 4202 map->m_flags |= EXT4_MAP_MAPPED; ··· 4268 4206 goto out; 4269 4207 } 4270 4208 4271 - ret = ext4_ext_handle_unwritten_extents( 4272 - handle, inode, map, &path, flags, 4273 - allocated, newblock); 4274 - if (ret < 0) 4275 - err = ret; 4276 - else 4277 - allocated = ret; 4209 + path = ext4_ext_handle_unwritten_extents( 4210 + handle, inode, map, path, flags, 4211 + &allocated, newblock); 4212 + if (IS_ERR(path)) 4213 + err = PTR_ERR(path); 4278 4214 goto out; 4279 4215 } 4280 4216 } ··· 4324 4264 get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) { 4325 4265 ar.len = allocated = map->m_len; 4326 4266 newblock = map->m_pblk; 4267 + err = 0; 4327 4268 goto got_allocated_blocks; 4328 4269 } 4329 4270 ··· 4397 4336 map->m_flags |= EXT4_MAP_UNWRITTEN; 4398 4337 } 4399 4338 4400 - err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags); 4401 - if (err) { 4339 + path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 4340 + if (IS_ERR(path)) { 4341 + err = PTR_ERR(path); 4402 4342 if (allocated_clusters) { 4403 4343 int fb_flags = 0; 4404 4344 ··· 4416 4354 fb_flags); 4417 4355 } 4418 4356 goto out; 4419 - } 4420 - 4421 - /* 4422 - * Reduce the reserved cluster count to reflect successful deferred 4423 - * allocation of delayed allocated clusters or direct allocation of 4424 - * clusters discovered to be delayed allocated. Once allocated, a 4425 - * cluster is not included in the reserved count. 4426 - */ 4427 - if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) { 4428 - if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { 4429 - /* 4430 - * When allocating delayed allocated clusters, simply 4431 - * reduce the reserved cluster count and claim quota 4432 - */ 4433 - ext4_da_update_reserve_space(inode, allocated_clusters, 4434 - 1); 4435 - } else { 4436 - ext4_lblk_t lblk, len; 4437 - unsigned int n; 4438 - 4439 - /* 4440 - * When allocating non-delayed allocated clusters 4441 - * (from fallocate, filemap, DIO, or clusters 4442 - * allocated when delalloc has been disabled by 4443 - * ext4_nonda_switch), reduce the reserved cluster 4444 - * count by the number of allocated clusters that 4445 - * have previously been delayed allocated. Quota 4446 - * has been claimed by ext4_mb_new_blocks() above, 4447 - * so release the quota reservations made for any 4448 - * previously delayed allocated clusters. 4449 - */ 4450 - lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk); 4451 - len = allocated_clusters << sbi->s_cluster_bits; 4452 - n = ext4_es_delayed_clu(inode, lblk, len); 4453 - if (n > 0) 4454 - ext4_da_update_reserve_space(inode, (int) n, 0); 4455 - } 4456 4357 } 4457 4358 4458 4359 /* ··· 5209 5184 * won't be shifted beyond EXT_MAX_BLOCKS. 5210 5185 */ 5211 5186 if (SHIFT == SHIFT_LEFT) { 5212 - path = ext4_find_extent(inode, start - 1, &path, 5187 + path = ext4_find_extent(inode, start - 1, path, 5213 5188 EXT4_EX_NOCACHE); 5214 5189 if (IS_ERR(path)) 5215 5190 return PTR_ERR(path); ··· 5258 5233 * becomes NULL to indicate the end of the loop. 5259 5234 */ 5260 5235 while (iterator && start <= stop) { 5261 - path = ext4_find_extent(inode, *iterator, &path, 5236 + path = ext4_find_extent(inode, *iterator, path, 5262 5237 EXT4_EX_NOCACHE); 5263 5238 if (IS_ERR(path)) 5264 5239 return PTR_ERR(path); ··· 5560 5535 path = ext4_find_extent(inode, offset_lblk, NULL, 0); 5561 5536 if (IS_ERR(path)) { 5562 5537 up_write(&EXT4_I(inode)->i_data_sem); 5538 + ret = PTR_ERR(path); 5563 5539 goto out_stop; 5564 5540 } 5565 5541 ··· 5579 5553 if (ext4_ext_is_unwritten(extent)) 5580 5554 split_flag = EXT4_EXT_MARK_UNWRIT1 | 5581 5555 EXT4_EXT_MARK_UNWRIT2; 5582 - ret = ext4_split_extent_at(handle, inode, &path, 5556 + path = ext4_split_extent_at(handle, inode, path, 5583 5557 offset_lblk, split_flag, 5584 5558 EXT4_EX_NOCACHE | 5585 5559 EXT4_GET_BLOCKS_PRE_IO | 5586 5560 EXT4_GET_BLOCKS_METADATA_NOFAIL); 5587 5561 } 5588 5562 5589 - ext4_free_ext_path(path); 5590 - if (ret < 0) { 5563 + if (IS_ERR(path)) { 5591 5564 up_write(&EXT4_I(inode)->i_data_sem); 5565 + ret = PTR_ERR(path); 5592 5566 goto out_stop; 5593 5567 } 5594 - } else { 5595 - ext4_free_ext_path(path); 5596 5568 } 5597 5569 5570 + ext4_free_ext_path(path); 5598 5571 ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk); 5599 5572 5600 5573 /* ··· 5661 5636 int e1_len, e2_len, len; 5662 5637 int split = 0; 5663 5638 5664 - path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE); 5639 + path1 = ext4_find_extent(inode1, lblk1, path1, EXT4_EX_NOCACHE); 5665 5640 if (IS_ERR(path1)) { 5666 5641 *erp = PTR_ERR(path1); 5667 - path1 = NULL; 5668 - finish: 5669 - count = 0; 5670 - goto repeat; 5642 + goto errout; 5671 5643 } 5672 - path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE); 5644 + path2 = ext4_find_extent(inode2, lblk2, path2, EXT4_EX_NOCACHE); 5673 5645 if (IS_ERR(path2)) { 5674 5646 *erp = PTR_ERR(path2); 5675 - path2 = NULL; 5676 - goto finish; 5647 + goto errout; 5677 5648 } 5678 5649 ex1 = path1[path1->p_depth].p_ext; 5679 5650 ex2 = path2[path2->p_depth].p_ext; 5680 5651 /* Do we have something to swap ? */ 5681 5652 if (unlikely(!ex2 || !ex1)) 5682 - goto finish; 5653 + goto errout; 5683 5654 5684 5655 e1_blk = le32_to_cpu(ex1->ee_block); 5685 5656 e2_blk = le32_to_cpu(ex2->ee_block); ··· 5697 5676 next2 = e2_blk; 5698 5677 /* Do we have something to swap */ 5699 5678 if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) 5700 - goto finish; 5679 + goto errout; 5701 5680 /* Move to the rightest boundary */ 5702 5681 len = next1 - lblk1; 5703 5682 if (len < next2 - lblk2) ··· 5707 5686 lblk1 += len; 5708 5687 lblk2 += len; 5709 5688 count -= len; 5710 - goto repeat; 5689 + continue; 5711 5690 } 5712 5691 5713 5692 /* Prepare left boundary */ 5714 5693 if (e1_blk < lblk1) { 5715 5694 split = 1; 5716 - *erp = ext4_force_split_extent_at(handle, inode1, 5717 - &path1, lblk1, 0); 5718 - if (unlikely(*erp)) 5719 - goto finish; 5695 + path1 = ext4_force_split_extent_at(handle, inode1, 5696 + path1, lblk1, 0); 5697 + if (IS_ERR(path1)) { 5698 + *erp = PTR_ERR(path1); 5699 + goto errout; 5700 + } 5720 5701 } 5721 5702 if (e2_blk < lblk2) { 5722 5703 split = 1; 5723 - *erp = ext4_force_split_extent_at(handle, inode2, 5724 - &path2, lblk2, 0); 5725 - if (unlikely(*erp)) 5726 - goto finish; 5704 + path2 = ext4_force_split_extent_at(handle, inode2, 5705 + path2, lblk2, 0); 5706 + if (IS_ERR(path2)) { 5707 + *erp = PTR_ERR(path2); 5708 + goto errout; 5709 + } 5727 5710 } 5728 5711 /* ext4_split_extent_at() may result in leaf extent split, 5729 5712 * path must to be revalidated. */ 5730 5713 if (split) 5731 - goto repeat; 5714 + continue; 5732 5715 5733 5716 /* Prepare right boundary */ 5734 5717 len = count; ··· 5743 5718 5744 5719 if (len != e1_len) { 5745 5720 split = 1; 5746 - *erp = ext4_force_split_extent_at(handle, inode1, 5747 - &path1, lblk1 + len, 0); 5748 - if (unlikely(*erp)) 5749 - goto finish; 5721 + path1 = ext4_force_split_extent_at(handle, inode1, 5722 + path1, lblk1 + len, 0); 5723 + if (IS_ERR(path1)) { 5724 + *erp = PTR_ERR(path1); 5725 + goto errout; 5726 + } 5750 5727 } 5751 5728 if (len != e2_len) { 5752 5729 split = 1; 5753 - *erp = ext4_force_split_extent_at(handle, inode2, 5754 - &path2, lblk2 + len, 0); 5755 - if (*erp) 5756 - goto finish; 5730 + path2 = ext4_force_split_extent_at(handle, inode2, 5731 + path2, lblk2 + len, 0); 5732 + if (IS_ERR(path2)) { 5733 + *erp = PTR_ERR(path2); 5734 + goto errout; 5735 + } 5757 5736 } 5758 5737 /* ext4_split_extent_at() may result in leaf extent split, 5759 5738 * path must to be revalidated. */ 5760 5739 if (split) 5761 - goto repeat; 5740 + continue; 5762 5741 5763 5742 BUG_ON(e2_len != e1_len); 5764 5743 *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth); 5765 5744 if (unlikely(*erp)) 5766 - goto finish; 5745 + goto errout; 5767 5746 *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth); 5768 5747 if (unlikely(*erp)) 5769 - goto finish; 5748 + goto errout; 5770 5749 5771 5750 /* Both extents are fully inside boundaries. Swap it now */ 5772 5751 tmp_ex = *ex1; ··· 5788 5759 *erp = ext4_ext_dirty(handle, inode2, path2 + 5789 5760 path2->p_depth); 5790 5761 if (unlikely(*erp)) 5791 - goto finish; 5762 + goto errout; 5792 5763 *erp = ext4_ext_dirty(handle, inode1, path1 + 5793 5764 path1->p_depth); 5794 5765 /* ··· 5798 5769 * aborted anyway. 5799 5770 */ 5800 5771 if (unlikely(*erp)) 5801 - goto finish; 5772 + goto errout; 5773 + 5802 5774 lblk1 += len; 5803 5775 lblk2 += len; 5804 5776 replaced_count += len; 5805 5777 count -= len; 5806 - 5807 - repeat: 5808 - ext4_free_ext_path(path1); 5809 - ext4_free_ext_path(path2); 5810 - path1 = path2 = NULL; 5811 5778 } 5779 + 5780 + errout: 5781 + ext4_free_ext_path(path1); 5782 + ext4_free_ext_path(path2); 5812 5783 return replaced_count; 5813 5784 } 5814 5785 ··· 5843 5814 5844 5815 /* search for the extent closest to the first block in the cluster */ 5845 5816 path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0); 5846 - if (IS_ERR(path)) { 5847 - err = PTR_ERR(path); 5848 - path = NULL; 5849 - goto out; 5850 - } 5817 + if (IS_ERR(path)) 5818 + return PTR_ERR(path); 5851 5819 5852 5820 depth = ext_depth(inode); 5853 5821 ··· 5906 5880 int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, 5907 5881 int len, int unwritten, ext4_fsblk_t pblk) 5908 5882 { 5909 - struct ext4_ext_path *path = NULL, *ppath; 5883 + struct ext4_ext_path *path; 5910 5884 struct ext4_extent *ex; 5911 5885 int ret; 5912 5886 ··· 5922 5896 if (le32_to_cpu(ex->ee_block) != start || 5923 5897 ext4_ext_get_actual_len(ex) != len) { 5924 5898 /* We need to split this extent to match our extent first */ 5925 - ppath = path; 5926 5899 down_write(&EXT4_I(inode)->i_data_sem); 5927 - ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1); 5900 + path = ext4_force_split_extent_at(NULL, inode, path, start, 1); 5928 5901 up_write(&EXT4_I(inode)->i_data_sem); 5929 - if (ret) 5902 + if (IS_ERR(path)) { 5903 + ret = PTR_ERR(path); 5930 5904 goto out; 5931 - kfree(path); 5932 - path = ext4_find_extent(inode, start, NULL, 0); 5905 + } 5906 + 5907 + path = ext4_find_extent(inode, start, path, 0); 5933 5908 if (IS_ERR(path)) 5934 - return -1; 5935 - ppath = path; 5909 + return PTR_ERR(path); 5910 + 5936 5911 ex = path[path->p_depth].p_ext; 5937 5912 WARN_ON(le32_to_cpu(ex->ee_block) != start); 5913 + 5938 5914 if (ext4_ext_get_actual_len(ex) != len) { 5939 5915 down_write(&EXT4_I(inode)->i_data_sem); 5940 - ret = ext4_force_split_extent_at(NULL, inode, &ppath, 5941 - start + len, 1); 5916 + path = ext4_force_split_extent_at(NULL, inode, path, 5917 + start + len, 1); 5942 5918 up_write(&EXT4_I(inode)->i_data_sem); 5943 - if (ret) 5919 + if (IS_ERR(path)) { 5920 + ret = PTR_ERR(path); 5944 5921 goto out; 5945 - kfree(path); 5946 - path = ext4_find_extent(inode, start, NULL, 0); 5922 + } 5923 + 5924 + path = ext4_find_extent(inode, start, path, 0); 5947 5925 if (IS_ERR(path)) 5948 - return -EINVAL; 5926 + return PTR_ERR(path); 5949 5927 ex = path[path->p_depth].p_ext; 5950 5928 } 5951 5929 } ··· 6031 6001 if (IS_ERR(path)) 6032 6002 return PTR_ERR(path); 6033 6003 ex = path[path->p_depth].p_ext; 6034 - if (!ex) { 6035 - ext4_free_ext_path(path); 6004 + if (!ex) 6036 6005 goto out; 6037 - } 6038 6006 end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); 6039 - ext4_free_ext_path(path); 6040 6007 6041 6008 /* Count the number of data blocks */ 6042 6009 cur = 0; ··· 6059 6032 ret = skip_hole(inode, &cur); 6060 6033 if (ret < 0) 6061 6034 goto out; 6062 - path = ext4_find_extent(inode, cur, NULL, 0); 6035 + path = ext4_find_extent(inode, cur, path, 0); 6063 6036 if (IS_ERR(path)) 6064 6037 goto out; 6065 6038 numblks += path->p_depth; 6066 - ext4_free_ext_path(path); 6067 6039 while (cur < end) { 6068 - path = ext4_find_extent(inode, cur, NULL, 0); 6040 + path = ext4_find_extent(inode, cur, path, 0); 6069 6041 if (IS_ERR(path)) 6070 6042 break; 6071 6043 ex = path[path->p_depth].p_ext; 6072 - if (!ex) { 6073 - ext4_free_ext_path(path); 6074 - return 0; 6075 - } 6044 + if (!ex) 6045 + goto cleanup; 6046 + 6076 6047 cur = max(cur + 1, le32_to_cpu(ex->ee_block) + 6077 6048 ext4_ext_get_actual_len(ex)); 6078 6049 ret = skip_hole(inode, &cur); 6079 - if (ret < 0) { 6080 - ext4_free_ext_path(path); 6050 + if (ret < 0) 6081 6051 break; 6082 - } 6083 - path2 = ext4_find_extent(inode, cur, NULL, 0); 6084 - if (IS_ERR(path2)) { 6085 - ext4_free_ext_path(path); 6052 + 6053 + path2 = ext4_find_extent(inode, cur, path2, 0); 6054 + if (IS_ERR(path2)) 6086 6055 break; 6087 - } 6056 + 6088 6057 for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { 6089 6058 cmp1 = cmp2 = 0; 6090 6059 if (i <= path->p_depth) ··· 6092 6069 if (cmp1 != cmp2 && cmp2 != 0) 6093 6070 numblks++; 6094 6071 } 6095 - ext4_free_ext_path(path); 6096 - ext4_free_ext_path(path2); 6097 6072 } 6098 6073 6099 6074 out: 6100 6075 inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9); 6101 6076 ext4_mark_inode_dirty(NULL, inode); 6077 + cleanup: 6078 + ext4_free_ext_path(path); 6079 + ext4_free_ext_path(path2); 6102 6080 return 0; 6103 6081 } 6104 6082 ··· 6120 6096 if (IS_ERR(path)) 6121 6097 return PTR_ERR(path); 6122 6098 ex = path[path->p_depth].p_ext; 6123 - if (!ex) { 6124 - ext4_free_ext_path(path); 6125 - return 0; 6126 - } 6099 + if (!ex) 6100 + goto out; 6127 6101 end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); 6128 - ext4_free_ext_path(path); 6129 6102 6130 6103 cur = 0; 6131 6104 while (cur < end) { ··· 6132 6111 if (ret < 0) 6133 6112 break; 6134 6113 if (ret > 0) { 6135 - path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 6136 - if (!IS_ERR_OR_NULL(path)) { 6114 + path = ext4_find_extent(inode, map.m_lblk, path, 0); 6115 + if (!IS_ERR(path)) { 6137 6116 for (j = 0; j < path->p_depth; j++) { 6138 - 6139 6117 ext4_mb_mark_bb(inode->i_sb, 6140 6118 path[j].p_block, 1, false); 6141 6119 ext4_fc_record_regions(inode->i_sb, inode->i_ino, 6142 6120 0, path[j].p_block, 1, 1); 6143 6121 } 6144 - ext4_free_ext_path(path); 6122 + } else { 6123 + path = NULL; 6145 6124 } 6146 6125 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false); 6147 6126 ext4_fc_record_regions(inode->i_sb, inode->i_ino, ··· 6150 6129 cur = cur + map.m_len; 6151 6130 } 6152 6131 6132 + out: 6133 + ext4_free_ext_path(path); 6153 6134 return 0; 6154 6135 }
+87 -153
fs/ext4/extents_status.c
··· 558 558 if (ext4_es_is_hole(es1)) 559 559 return 1; 560 560 561 - /* we need to check delayed extent is without unwritten status */ 562 - if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1)) 561 + /* we need to check delayed extent */ 562 + if (ext4_es_is_delayed(es1)) 563 563 return 1; 564 564 565 565 return 0; ··· 848 848 */ 849 849 void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 850 850 ext4_lblk_t len, ext4_fsblk_t pblk, 851 - unsigned int status) 851 + unsigned int status, int flags) 852 852 { 853 853 struct extent_status newes; 854 854 ext4_lblk_t end = lblk + len - 1; 855 855 int err1 = 0, err2 = 0, err3 = 0; 856 + int resv_used = 0, pending = 0; 856 857 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 857 858 struct extent_status *es1 = NULL; 858 859 struct extent_status *es2 = NULL; ··· 863 862 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 864 863 return; 865 864 866 - es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", 867 - lblk, len, pblk, status, inode->i_ino); 865 + es_debug("add [%u/%u) %llu %x %x to extent status tree of inode %lu\n", 866 + lblk, len, pblk, status, flags, inode->i_ino); 868 867 869 868 if (!len) 870 869 return; 871 870 872 871 BUG_ON(end < lblk); 873 - 874 - if ((status & EXTENT_STATUS_DELAYED) && 875 - (status & EXTENT_STATUS_WRITTEN)) { 876 - ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as " 877 - " delayed and written which can potentially " 878 - " cause data loss.", lblk, len); 879 - WARN_ON(1); 880 - } 872 + WARN_ON_ONCE(status & EXTENT_STATUS_DELAYED); 881 873 882 874 newes.es_lblk = lblk; 883 875 newes.es_len = len; ··· 888 894 es1 = __es_alloc_extent(true); 889 895 if ((err1 || err2) && !es2) 890 896 es2 = __es_alloc_extent(true); 891 - if ((err1 || err2 || err3) && revise_pending && !pr) 897 + if ((err1 || err2 || err3 < 0) && revise_pending && !pr) 892 898 pr = __alloc_pending(true); 893 899 write_lock(&EXT4_I(inode)->i_es_lock); 894 900 895 - err1 = __es_remove_extent(inode, lblk, end, NULL, es1); 901 + err1 = __es_remove_extent(inode, lblk, end, &resv_used, es1); 896 902 if (err1 != 0) 897 903 goto error; 898 904 /* Free preallocated extent if it didn't get used. */ ··· 916 922 917 923 if (revise_pending) { 918 924 err3 = __revise_pending(inode, lblk, len, &pr); 919 - if (err3 != 0) 925 + if (err3 < 0) 920 926 goto error; 921 927 if (pr) { 922 928 __free_pending(pr); 923 929 pr = NULL; 924 930 } 931 + pending = err3; 925 932 } 926 933 error: 927 934 write_unlock(&EXT4_I(inode)->i_es_lock); 928 - if (err1 || err2 || err3) 935 + /* 936 + * Reduce the reserved cluster count to reflect successful deferred 937 + * allocation of delayed allocated clusters or direct allocation of 938 + * clusters discovered to be delayed allocated. Once allocated, a 939 + * cluster is not included in the reserved count. 940 + * 941 + * When direct allocating (from fallocate, filemap, DIO, or clusters 942 + * allocated when delalloc has been disabled by ext4_nonda_switch()) 943 + * an extent either 1) contains delayed blocks but start with 944 + * non-delayed allocated blocks (e.g. hole) or 2) contains non-delayed 945 + * allocated blocks which belong to delayed allocated clusters when 946 + * bigalloc feature is enabled, quota has already been claimed by 947 + * ext4_mb_new_blocks(), so release the quota reservations made for 948 + * any previously delayed allocated clusters instead of claim them 949 + * again. 950 + */ 951 + resv_used += pending; 952 + if (resv_used) 953 + ext4_da_update_reserve_space(inode, resv_used, 954 + flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE); 955 + 956 + if (err1 || err2 || err3 < 0) 929 957 goto retry; 930 958 931 959 ext4_es_print_tree(inode); ··· 1067 1051 } 1068 1052 1069 1053 struct rsvd_count { 1070 - int ndelonly; 1054 + int ndelayed; 1071 1055 bool first_do_lblk_found; 1072 1056 ext4_lblk_t first_do_lblk; 1073 1057 ext4_lblk_t last_do_lblk; ··· 1093 1077 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1094 1078 struct rb_node *node; 1095 1079 1096 - rc->ndelonly = 0; 1080 + rc->ndelayed = 0; 1097 1081 1098 1082 /* 1099 - * for bigalloc, note the first delonly block in the range has not 1083 + * for bigalloc, note the first delayed block in the range has not 1100 1084 * been found, record the extent containing the block to the left of 1101 1085 * the region to be removed, if any, and note that there's no partial 1102 1086 * cluster to track ··· 1116 1100 } 1117 1101 1118 1102 /* 1119 - * count_rsvd - count the clusters containing delayed and not unwritten 1120 - * (delonly) blocks in a range within an extent and add to 1121 - * the running tally in rsvd_count 1103 + * count_rsvd - count the clusters containing delayed blocks in a range 1104 + * within an extent and add to the running tally in rsvd_count 1122 1105 * 1123 1106 * @inode - file containing extent 1124 1107 * @lblk - first block in range ··· 1134 1119 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1135 1120 ext4_lblk_t i, end, nclu; 1136 1121 1137 - if (!ext4_es_is_delonly(es)) 1122 + if (!ext4_es_is_delayed(es)) 1138 1123 return; 1139 1124 1140 1125 WARN_ON(len <= 0); 1141 1126 1142 1127 if (sbi->s_cluster_ratio == 1) { 1143 - rc->ndelonly += (int) len; 1128 + rc->ndelayed += (int) len; 1144 1129 return; 1145 1130 } 1146 1131 ··· 1150 1135 end = lblk + (ext4_lblk_t) len - 1; 1151 1136 end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end; 1152 1137 1153 - /* record the first block of the first delonly extent seen */ 1138 + /* record the first block of the first delayed extent seen */ 1154 1139 if (!rc->first_do_lblk_found) { 1155 1140 rc->first_do_lblk = i; 1156 1141 rc->first_do_lblk_found = true; ··· 1164 1149 * doesn't start with it, count it and stop tracking 1165 1150 */ 1166 1151 if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) { 1167 - rc->ndelonly++; 1152 + rc->ndelayed++; 1168 1153 rc->partial = false; 1169 1154 } 1170 1155 ··· 1174 1159 */ 1175 1160 if (EXT4_LBLK_COFF(sbi, i) != 0) { 1176 1161 if (end >= EXT4_LBLK_CFILL(sbi, i)) { 1177 - rc->ndelonly++; 1162 + rc->ndelayed++; 1178 1163 rc->partial = false; 1179 1164 i = EXT4_LBLK_CFILL(sbi, i) + 1; 1180 1165 } ··· 1182 1167 1183 1168 /* 1184 1169 * if the current cluster starts on a cluster boundary, count the 1185 - * number of whole delonly clusters in the extent 1170 + * number of whole delayed clusters in the extent 1186 1171 */ 1187 1172 if ((i + sbi->s_cluster_ratio - 1) <= end) { 1188 1173 nclu = (end - i + 1) >> sbi->s_cluster_bits; 1189 - rc->ndelonly += nclu; 1174 + rc->ndelayed += nclu; 1190 1175 i += nclu << sbi->s_cluster_bits; 1191 1176 } 1192 1177 ··· 1246 1231 * @rc - pointer to reserved count data 1247 1232 * 1248 1233 * The number of reservations to be released is equal to the number of 1249 - * clusters containing delayed and not unwritten (delonly) blocks within 1250 - * the range, minus the number of clusters still containing delonly blocks 1251 - * at the ends of the range, and minus the number of pending reservations 1252 - * within the range. 1234 + * clusters containing delayed blocks within the range, minus the number of 1235 + * clusters still containing delayed blocks at the ends of the range, and 1236 + * minus the number of pending reservations within the range. 1253 1237 */ 1254 1238 static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, 1255 1239 struct extent_status *right_es, ··· 1259 1245 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree; 1260 1246 struct rb_node *node; 1261 1247 ext4_lblk_t first_lclu, last_lclu; 1262 - bool left_delonly, right_delonly, count_pending; 1248 + bool left_delayed, right_delayed, count_pending; 1263 1249 struct extent_status *es; 1264 1250 1265 1251 if (sbi->s_cluster_ratio > 1) { 1266 1252 /* count any remaining partial cluster */ 1267 1253 if (rc->partial) 1268 - rc->ndelonly++; 1254 + rc->ndelayed++; 1269 1255 1270 - if (rc->ndelonly == 0) 1256 + if (rc->ndelayed == 0) 1271 1257 return 0; 1272 1258 1273 1259 first_lclu = EXT4_B2C(sbi, rc->first_do_lblk); 1274 1260 last_lclu = EXT4_B2C(sbi, rc->last_do_lblk); 1275 1261 1276 1262 /* 1277 - * decrease the delonly count by the number of clusters at the 1278 - * ends of the range that still contain delonly blocks - 1263 + * decrease the delayed count by the number of clusters at the 1264 + * ends of the range that still contain delayed blocks - 1279 1265 * these clusters still need to be reserved 1280 1266 */ 1281 - left_delonly = right_delonly = false; 1267 + left_delayed = right_delayed = false; 1282 1268 1283 1269 es = rc->left_es; 1284 1270 while (es && ext4_es_end(es) >= 1285 1271 EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) { 1286 - if (ext4_es_is_delonly(es)) { 1287 - rc->ndelonly--; 1288 - left_delonly = true; 1272 + if (ext4_es_is_delayed(es)) { 1273 + rc->ndelayed--; 1274 + left_delayed = true; 1289 1275 break; 1290 1276 } 1291 1277 node = rb_prev(&es->rb_node); ··· 1293 1279 break; 1294 1280 es = rb_entry(node, struct extent_status, rb_node); 1295 1281 } 1296 - if (right_es && (!left_delonly || first_lclu != last_lclu)) { 1282 + if (right_es && (!left_delayed || first_lclu != last_lclu)) { 1297 1283 if (end < ext4_es_end(right_es)) { 1298 1284 es = right_es; 1299 1285 } else { ··· 1303 1289 } 1304 1290 while (es && es->es_lblk <= 1305 1291 EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) { 1306 - if (ext4_es_is_delonly(es)) { 1307 - rc->ndelonly--; 1308 - right_delonly = true; 1292 + if (ext4_es_is_delayed(es)) { 1293 + rc->ndelayed--; 1294 + right_delayed = true; 1309 1295 break; 1310 1296 } 1311 1297 node = rb_next(&es->rb_node); ··· 1319 1305 /* 1320 1306 * Determine the block range that should be searched for 1321 1307 * pending reservations, if any. Clusters on the ends of the 1322 - * original removed range containing delonly blocks are 1308 + * original removed range containing delayed blocks are 1323 1309 * excluded. They've already been accounted for and it's not 1324 1310 * possible to determine if an associated pending reservation 1325 1311 * should be released with the information available in the 1326 1312 * extents status tree. 1327 1313 */ 1328 1314 if (first_lclu == last_lclu) { 1329 - if (left_delonly | right_delonly) 1315 + if (left_delayed | right_delayed) 1330 1316 count_pending = false; 1331 1317 else 1332 1318 count_pending = true; 1333 1319 } else { 1334 - if (left_delonly) 1320 + if (left_delayed) 1335 1321 first_lclu++; 1336 - if (right_delonly) 1322 + if (right_delayed) 1337 1323 last_lclu--; 1338 1324 if (first_lclu <= last_lclu) 1339 1325 count_pending = true; ··· 1344 1330 /* 1345 1331 * a pending reservation found between first_lclu and last_lclu 1346 1332 * represents an allocated cluster that contained at least one 1347 - * delonly block, so the delonly total must be reduced by one 1333 + * delayed block, so the delayed total must be reduced by one 1348 1334 * for each pending reservation found and released 1349 1335 */ 1350 1336 if (count_pending) { 1351 1337 pr = __pr_tree_search(&tree->root, first_lclu); 1352 1338 while (pr && pr->lclu <= last_lclu) { 1353 - rc->ndelonly--; 1339 + rc->ndelayed--; 1354 1340 node = rb_next(&pr->rb_node); 1355 1341 rb_erase(&pr->rb_node, &tree->root); 1356 1342 __free_pending(pr); ··· 1361 1347 } 1362 1348 } 1363 1349 } 1364 - return rc->ndelonly; 1350 + return rc->ndelayed; 1365 1351 } 1366 1352 1367 1353 ··· 1954 1940 * @lblk - logical block in the cluster to be added 1955 1941 * @prealloc - preallocated pending entry 1956 1942 * 1957 - * Returns 0 on successful insertion and -ENOMEM on failure. If the 1943 + * Returns 1 on successful insertion and -ENOMEM on failure. If the 1958 1944 * pending reservation is already in the set, returns successfully. 1959 1945 */ 1960 1946 static int __insert_pending(struct inode *inode, ext4_lblk_t lblk, ··· 1998 1984 1999 1985 rb_link_node(&pr->rb_node, parent, p); 2000 1986 rb_insert_color(&pr->rb_node, &tree->root); 1987 + ret = 1; 2001 1988 2002 1989 out: 2003 1990 return ret; ··· 2120 2105 es1 = __es_alloc_extent(true); 2121 2106 if ((err1 || err2) && !es2) 2122 2107 es2 = __es_alloc_extent(true); 2123 - if (err1 || err2 || err3) { 2108 + if (err1 || err2 || err3 < 0) { 2124 2109 if (lclu_allocated && !pr1) 2125 2110 pr1 = __alloc_pending(true); 2126 2111 if (end_allocated && !pr2) ··· 2150 2135 2151 2136 if (lclu_allocated) { 2152 2137 err3 = __insert_pending(inode, lblk, &pr1); 2153 - if (err3 != 0) 2138 + if (err3 < 0) 2154 2139 goto error; 2155 2140 if (pr1) { 2156 2141 __free_pending(pr1); ··· 2159 2144 } 2160 2145 if (end_allocated) { 2161 2146 err3 = __insert_pending(inode, end, &pr2); 2162 - if (err3 != 0) 2147 + if (err3 < 0) 2163 2148 goto error; 2164 2149 if (pr2) { 2165 2150 __free_pending(pr2); ··· 2168 2153 } 2169 2154 error: 2170 2155 write_unlock(&EXT4_I(inode)->i_es_lock); 2171 - if (err1 || err2 || err3) 2156 + if (err1 || err2 || err3 < 0) 2172 2157 goto retry; 2173 2158 2174 2159 ext4_es_print_tree(inode); 2175 2160 ext4_print_pending_tree(inode); 2176 2161 return; 2177 - } 2178 - 2179 - /* 2180 - * __es_delayed_clu - count number of clusters containing blocks that 2181 - * are delayed only 2182 - * 2183 - * @inode - file containing block range 2184 - * @start - logical block defining start of range 2185 - * @end - logical block defining end of range 2186 - * 2187 - * Returns the number of clusters containing only delayed (not delayed 2188 - * and unwritten) blocks in the range specified by @start and @end. Any 2189 - * cluster or part of a cluster within the range and containing a delayed 2190 - * and not unwritten block within the range is counted as a whole cluster. 2191 - */ 2192 - static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start, 2193 - ext4_lblk_t end) 2194 - { 2195 - struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; 2196 - struct extent_status *es; 2197 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2198 - struct rb_node *node; 2199 - ext4_lblk_t first_lclu, last_lclu; 2200 - unsigned long long last_counted_lclu; 2201 - unsigned int n = 0; 2202 - 2203 - /* guaranteed to be unequal to any ext4_lblk_t value */ 2204 - last_counted_lclu = ~0ULL; 2205 - 2206 - es = __es_tree_search(&tree->root, start); 2207 - 2208 - while (es && (es->es_lblk <= end)) { 2209 - if (ext4_es_is_delonly(es)) { 2210 - if (es->es_lblk <= start) 2211 - first_lclu = EXT4_B2C(sbi, start); 2212 - else 2213 - first_lclu = EXT4_B2C(sbi, es->es_lblk); 2214 - 2215 - if (ext4_es_end(es) >= end) 2216 - last_lclu = EXT4_B2C(sbi, end); 2217 - else 2218 - last_lclu = EXT4_B2C(sbi, ext4_es_end(es)); 2219 - 2220 - if (first_lclu == last_counted_lclu) 2221 - n += last_lclu - first_lclu; 2222 - else 2223 - n += last_lclu - first_lclu + 1; 2224 - last_counted_lclu = last_lclu; 2225 - } 2226 - node = rb_next(&es->rb_node); 2227 - if (!node) 2228 - break; 2229 - es = rb_entry(node, struct extent_status, rb_node); 2230 - } 2231 - 2232 - return n; 2233 - } 2234 - 2235 - /* 2236 - * ext4_es_delayed_clu - count number of clusters containing blocks that 2237 - * are both delayed and unwritten 2238 - * 2239 - * @inode - file containing block range 2240 - * @lblk - logical block defining start of range 2241 - * @len - number of blocks in range 2242 - * 2243 - * Locking for external use of __es_delayed_clu(). 2244 - */ 2245 - unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk, 2246 - ext4_lblk_t len) 2247 - { 2248 - struct ext4_inode_info *ei = EXT4_I(inode); 2249 - ext4_lblk_t end; 2250 - unsigned int n; 2251 - 2252 - if (len == 0) 2253 - return 0; 2254 - 2255 - end = lblk + len - 1; 2256 - WARN_ON(end < lblk); 2257 - 2258 - read_lock(&ei->i_es_lock); 2259 - 2260 - n = __es_delayed_clu(inode, lblk, end); 2261 - 2262 - read_unlock(&ei->i_es_lock); 2263 - 2264 - return n; 2265 2162 } 2266 2163 2267 2164 /* ··· 2190 2263 * 2191 2264 * Used after a newly allocated extent is added to the extents status tree. 2192 2265 * Requires that the extents in the range have either written or unwritten 2193 - * status. Must be called while holding i_es_lock. 2266 + * status. Must be called while holding i_es_lock. Returns number of new 2267 + * inserts pending cluster on insert pendings, returns 0 on remove pendings, 2268 + * return -ENOMEM on failure. 2194 2269 */ 2195 2270 static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, 2196 2271 ext4_lblk_t len, ··· 2202 2273 ext4_lblk_t end = lblk + len - 1; 2203 2274 ext4_lblk_t first, last; 2204 2275 bool f_del = false, l_del = false; 2276 + int pendings = 0; 2205 2277 int ret = 0; 2206 2278 2207 2279 if (len == 0) ··· 2224 2294 if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) { 2225 2295 first = EXT4_LBLK_CMASK(sbi, lblk); 2226 2296 if (first != lblk) 2227 - f_del = __es_scan_range(inode, &ext4_es_is_delonly, 2297 + f_del = __es_scan_range(inode, &ext4_es_is_delayed, 2228 2298 first, lblk - 1); 2229 2299 if (f_del) { 2230 2300 ret = __insert_pending(inode, first, prealloc); 2231 2301 if (ret < 0) 2232 2302 goto out; 2303 + pendings += ret; 2233 2304 } else { 2234 2305 last = EXT4_LBLK_CMASK(sbi, end) + 2235 2306 sbi->s_cluster_ratio - 1; 2236 2307 if (last != end) 2237 2308 l_del = __es_scan_range(inode, 2238 - &ext4_es_is_delonly, 2309 + &ext4_es_is_delayed, 2239 2310 end + 1, last); 2240 2311 if (l_del) { 2241 2312 ret = __insert_pending(inode, last, prealloc); 2242 2313 if (ret < 0) 2243 2314 goto out; 2315 + pendings += ret; 2244 2316 } else 2245 2317 __remove_pending(inode, last); 2246 2318 } 2247 2319 } else { 2248 2320 first = EXT4_LBLK_CMASK(sbi, lblk); 2249 2321 if (first != lblk) 2250 - f_del = __es_scan_range(inode, &ext4_es_is_delonly, 2322 + f_del = __es_scan_range(inode, &ext4_es_is_delayed, 2251 2323 first, lblk - 1); 2252 2324 if (f_del) { 2253 2325 ret = __insert_pending(inode, first, prealloc); 2254 2326 if (ret < 0) 2255 2327 goto out; 2328 + pendings += ret; 2256 2329 } else 2257 2330 __remove_pending(inode, first); 2258 2331 2259 2332 last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1; 2260 2333 if (last != end) 2261 - l_del = __es_scan_range(inode, &ext4_es_is_delonly, 2334 + l_del = __es_scan_range(inode, &ext4_es_is_delayed, 2262 2335 end + 1, last); 2263 2336 if (l_del) { 2264 2337 ret = __insert_pending(inode, last, prealloc); 2265 2338 if (ret < 0) 2266 2339 goto out; 2340 + pendings += ret; 2267 2341 } else 2268 2342 __remove_pending(inode, last); 2269 2343 } 2270 2344 out: 2271 - return ret; 2345 + return (ret < 0) ? ret : pendings; 2272 2346 }
+11 -17
fs/ext4/extents_status.h
··· 42 42 #define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS) 43 43 #define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT) 44 44 45 + /* 46 + * Besides EXTENT_STATUS_REFERENCED, all these extent type masks 47 + * are exclusive, only one type can be set at a time. 48 + */ 45 49 #define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B) 46 50 #define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B) 47 51 #define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B) ··· 55 51 #define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \ 56 52 EXTENT_STATUS_UNWRITTEN | \ 57 53 EXTENT_STATUS_DELAYED | \ 58 - EXTENT_STATUS_HOLE) << ES_SHIFT) 54 + EXTENT_STATUS_HOLE)) 55 + 56 + #define ES_TYPE_VALID(type) ((type) && !((type) & ((type) - 1))) 59 57 60 58 struct ext4_sb_info; 61 59 struct ext4_extent; ··· 135 129 136 130 extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 137 131 ext4_lblk_t len, ext4_fsblk_t pblk, 138 - unsigned int status); 132 + unsigned int status, int flags); 139 133 extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, 140 134 ext4_lblk_t len, ext4_fsblk_t pblk, 141 135 unsigned int status); ··· 162 156 163 157 static inline unsigned int ext4_es_type(struct extent_status *es) 164 158 { 165 - return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT; 159 + return (es->es_pblk >> ES_SHIFT) & ES_TYPE_MASK; 166 160 } 167 161 168 162 static inline int ext4_es_is_written(struct extent_status *es) ··· 188 182 static inline int ext4_es_is_mapped(struct extent_status *es) 189 183 { 190 184 return (ext4_es_is_written(es) || ext4_es_is_unwritten(es)); 191 - } 192 - 193 - static inline int ext4_es_is_delonly(struct extent_status *es) 194 - { 195 - return (ext4_es_is_delayed(es) && !ext4_es_is_unwritten(es)); 196 185 } 197 186 198 187 static inline void ext4_es_set_referenced(struct extent_status *es) ··· 225 224 es->es_pblk = block; 226 225 } 227 226 228 - static inline void ext4_es_store_status(struct extent_status *es, 229 - unsigned int status) 230 - { 231 - es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) | 232 - (es->es_pblk & ~ES_MASK); 233 - } 234 - 235 227 static inline void ext4_es_store_pblock_status(struct extent_status *es, 236 228 ext4_fsblk_t pb, 237 229 unsigned int status) 238 230 { 231 + WARN_ON_ONCE(!ES_TYPE_VALID(status & ES_TYPE_MASK)); 232 + 239 233 es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) | 240 234 (pb & ~ES_MASK); 241 235 } ··· 248 252 extern void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk, 249 253 ext4_lblk_t len, bool lclu_allocated, 250 254 bool end_allocated); 251 - extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk, 252 - ext4_lblk_t len); 253 255 extern void ext4_clear_inode_es(struct inode *inode); 254 256 255 257 #endif /* _EXT4_EXTENTS_STATUS_H */
+35 -12
fs/ext4/fast_commit.c
··· 339 339 { 340 340 struct ext4_sb_info *sbi = EXT4_SB(sb); 341 341 tid_t tid; 342 + bool has_transaction = true; 343 + bool is_ineligible; 342 344 343 345 if (ext4_fc_disabled(sb)) 344 346 return; 345 347 346 - ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 347 348 if (handle && !IS_ERR(handle)) 348 349 tid = handle->h_transaction->t_tid; 349 350 else { 350 351 read_lock(&sbi->s_journal->j_state_lock); 351 - tid = sbi->s_journal->j_running_transaction ? 352 - sbi->s_journal->j_running_transaction->t_tid : 0; 352 + if (sbi->s_journal->j_running_transaction) 353 + tid = sbi->s_journal->j_running_transaction->t_tid; 354 + else 355 + has_transaction = false; 353 356 read_unlock(&sbi->s_journal->j_state_lock); 354 357 } 355 358 spin_lock(&sbi->s_fc_lock); 356 - if (tid_gt(tid, sbi->s_fc_ineligible_tid)) 359 + is_ineligible = ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 360 + if (has_transaction && 361 + (!is_ineligible || 362 + (is_ineligible && tid_gt(tid, sbi->s_fc_ineligible_tid)))) 357 363 sbi->s_fc_ineligible_tid = tid; 364 + ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 358 365 spin_unlock(&sbi->s_fc_lock); 359 366 WARN_ON(reason >= EXT4_FC_REASON_MAX); 360 367 sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; ··· 1295 1288 list_del_init(&iter->i_fc_list); 1296 1289 ext4_clear_inode_state(&iter->vfs_inode, 1297 1290 EXT4_STATE_FC_COMMITTING); 1298 - if (tid_geq(tid, iter->i_sync_tid)) 1291 + if (tid_geq(tid, iter->i_sync_tid)) { 1299 1292 ext4_fc_reset_inode(&iter->vfs_inode); 1293 + } else if (full) { 1294 + /* 1295 + * We are called after a full commit, inode has been 1296 + * modified while the commit was running. Re-enqueue 1297 + * the inode into STAGING, which will then be splice 1298 + * back into MAIN. This cannot happen during 1299 + * fastcommit because the journal is locked all the 1300 + * time in that case (and tid doesn't increase so 1301 + * tid check above isn't reliable). 1302 + */ 1303 + list_add_tail(&EXT4_I(&iter->vfs_inode)->i_fc_list, 1304 + &sbi->s_fc_q[FC_Q_STAGING]); 1305 + } 1300 1306 /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 1301 1307 smp_mb(); 1302 1308 #if (BITS_PER_LONG < 64) ··· 1792 1772 1793 1773 if (ret == 0) { 1794 1774 /* Range is not mapped */ 1795 - path = ext4_find_extent(inode, cur, NULL, 0); 1775 + path = ext4_find_extent(inode, cur, path, 0); 1796 1776 if (IS_ERR(path)) 1797 1777 goto out; 1798 1778 memset(&newex, 0, sizeof(newex)); ··· 1803 1783 if (ext4_ext_is_unwritten(ex)) 1804 1784 ext4_ext_mark_unwritten(&newex); 1805 1785 down_write(&EXT4_I(inode)->i_data_sem); 1806 - ret = ext4_ext_insert_extent( 1807 - NULL, inode, &path, &newex, 0); 1786 + path = ext4_ext_insert_extent(NULL, inode, 1787 + path, &newex, 0); 1808 1788 up_write((&EXT4_I(inode)->i_data_sem)); 1809 - ext4_free_ext_path(path); 1810 - if (ret) 1789 + if (IS_ERR(path)) 1811 1790 goto out; 1812 1791 goto next; 1813 1792 } ··· 1855 1836 ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> 1856 1837 sb->s_blocksize_bits); 1857 1838 out: 1839 + ext4_free_ext_path(path); 1858 1840 iput(inode); 1859 1841 return 0; 1860 1842 } ··· 1956 1936 break; 1957 1937 1958 1938 if (ret > 0) { 1959 - path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 1939 + path = ext4_find_extent(inode, map.m_lblk, path, 0); 1960 1940 if (!IS_ERR(path)) { 1961 1941 for (j = 0; j < path->p_depth; j++) 1962 1942 ext4_mb_mark_bb(inode->i_sb, 1963 1943 path[j].p_block, 1, true); 1964 - ext4_free_ext_path(path); 1944 + } else { 1945 + path = NULL; 1965 1946 } 1966 1947 cur += ret; 1967 1948 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, ··· 1973 1952 } 1974 1953 iput(inode); 1975 1954 } 1955 + 1956 + ext4_free_ext_path(path); 1976 1957 } 1977 1958 1978 1959 /*
+10 -10
fs/ext4/file.c
··· 306 306 } 307 307 308 308 static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, 309 - ssize_t count) 309 + ssize_t written, ssize_t count) 310 310 { 311 311 handle_t *handle; 312 312 ··· 315 315 if (IS_ERR(handle)) 316 316 return PTR_ERR(handle); 317 317 318 - if (ext4_update_inode_size(inode, offset + count)) { 318 + if (ext4_update_inode_size(inode, offset + written)) { 319 319 int ret = ext4_mark_inode_dirty(handle, inode); 320 320 if (unlikely(ret)) { 321 321 ext4_journal_stop(handle); ··· 323 323 } 324 324 } 325 325 326 - if (inode->i_nlink) 326 + if ((written == count) && inode->i_nlink) 327 327 ext4_orphan_del(handle, inode); 328 328 ext4_journal_stop(handle); 329 329 330 - return count; 330 + return written; 331 331 } 332 332 333 333 /* 334 334 * Clean up the inode after DIO or DAX extending write has completed and the 335 335 * inode size has been updated using ext4_handle_inode_extension(). 336 336 */ 337 - static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count) 337 + static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc) 338 338 { 339 339 lockdep_assert_held_write(&inode->i_rwsem); 340 - if (count < 0) { 340 + if (need_trunc) { 341 341 ext4_truncate_failed_write(inode); 342 342 /* 343 343 * If the truncate operation failed early, then the inode may ··· 393 393 if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) && 394 394 pos + size <= i_size_read(inode)) 395 395 return size; 396 - return ext4_handle_inode_extension(inode, pos, size); 396 + return ext4_handle_inode_extension(inode, pos, size, size); 397 397 } 398 398 399 399 static const struct iomap_dio_ops ext4_dio_write_ops = { ··· 586 586 * writeback of delalloc blocks. 587 587 */ 588 588 WARN_ON_ONCE(ret == -EIOCBQUEUED); 589 - ext4_inode_extension_cleanup(inode, ret); 589 + ext4_inode_extension_cleanup(inode, ret < 0); 590 590 } 591 591 592 592 out: ··· 669 669 ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); 670 670 671 671 if (extend) { 672 - ret = ext4_handle_inode_extension(inode, offset, ret); 673 - ext4_inode_extension_cleanup(inode, ret); 672 + ret = ext4_handle_inode_extension(inode, offset, ret, count); 673 + ext4_inode_extension_cleanup(inode, ret < (ssize_t)count); 674 674 } 675 675 out: 676 676 inode_unlock(inode);
+16 -19
fs/ext4/ialloc.c
··· 87 87 if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) 88 88 return 0; 89 89 90 - grp = ext4_get_group_info(sb, block_group); 91 - 92 90 if (buffer_verified(bh)) 93 91 return 0; 92 + 93 + grp = ext4_get_group_info(sb, block_group); 94 94 if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) 95 95 return -EFSCORRUPTED; 96 96 ··· 98 98 if (buffer_verified(bh)) 99 99 goto verified; 100 100 blk = ext4_inode_bitmap(sb, desc); 101 - if (!ext4_inode_bitmap_csum_verify(sb, desc, bh, 102 - EXT4_INODES_PER_GROUP(sb) / 8) || 101 + if (!ext4_inode_bitmap_csum_verify(sb, desc, bh) || 103 102 ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) { 104 103 ext4_unlock_group(sb, block_group); 105 104 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " ··· 326 327 if (percpu_counter_initialized(&sbi->s_dirs_counter)) 327 328 percpu_counter_dec(&sbi->s_dirs_counter); 328 329 } 329 - ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh, 330 - EXT4_INODES_PER_GROUP(sb) / 8); 330 + ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh); 331 331 ext4_group_desc_csum_set(sb, block_group, gdp); 332 332 ext4_unlock_group(sb, block_group); 333 333 ··· 512 514 if (min_inodes < 1) 513 515 min_inodes = 1; 514 516 min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4; 517 + if (min_clusters < 0) 518 + min_clusters = 0; 515 519 516 520 /* 517 521 * Start looking in the flex group where we last allocated an ··· 755 755 struct ext4_group_desc *gdp; 756 756 ext4_group_t group; 757 757 int bit; 758 - int err = -EFSCORRUPTED; 758 + int err; 759 759 760 760 if (ino < EXT4_FIRST_INO(sb) || ino > max_ino) 761 - goto out; 761 + return -EFSCORRUPTED; 762 762 763 763 group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 764 764 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); ··· 772 772 } 773 773 774 774 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 775 - if (!gdp || !group_desc_bh) { 775 + if (!gdp) { 776 776 err = -EINVAL; 777 777 goto out; 778 778 } ··· 851 851 852 852 ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); 853 853 if (ext4_has_group_desc_csum(sb)) { 854 - ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh, 855 - EXT4_INODES_PER_GROUP(sb) / 8); 854 + ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh); 856 855 ext4_group_desc_csum_set(sb, group, gdp); 857 856 } 858 857 ··· 859 860 err = ext4_handle_dirty_metadata(NULL, NULL, group_desc_bh); 860 861 sync_dirty_buffer(group_desc_bh); 861 862 out: 863 + brelse(inode_bitmap_bh); 862 864 return err; 863 865 } 864 866 ··· 1053 1053 brelse(inode_bitmap_bh); 1054 1054 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); 1055 1055 /* Skip groups with suspicious inode tables */ 1056 - if (((!(sbi->s_mount_state & EXT4_FC_REPLAY)) 1057 - && EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || 1058 - IS_ERR(inode_bitmap_bh)) { 1056 + if (IS_ERR(inode_bitmap_bh)) { 1059 1057 inode_bitmap_bh = NULL; 1060 1058 goto next_group; 1061 1059 } 1060 + if (!(sbi->s_mount_state & EXT4_FC_REPLAY) && 1061 + EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) 1062 + goto next_group; 1062 1063 1063 - repeat_in_this_group: 1064 1064 ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino); 1065 1065 if (!ret2) 1066 1066 goto next_group; ··· 1110 1110 if (!ret2) 1111 1111 goto got; /* we grabbed the inode! */ 1112 1112 1113 - if (ino < EXT4_INODES_PER_GROUP(sb)) 1114 - goto repeat_in_this_group; 1115 1113 next_group: 1116 1114 if (++group == ngroups) 1117 1115 group = 0; ··· 1222 1224 } 1223 1225 } 1224 1226 if (ext4_has_group_desc_csum(sb)) { 1225 - ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh, 1226 - EXT4_INODES_PER_GROUP(sb) / 8); 1227 + ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh); 1227 1228 ext4_group_desc_csum_set(sb, group, gdp); 1228 1229 } 1229 1230 ext4_unlock_group(sb, group);
-7
fs/ext4/indirect.c
··· 652 652 ext4_update_inode_fsync_trans(handle, inode, 1); 653 653 count = ar.len; 654 654 655 - /* 656 - * Update reserved blocks/metadata blocks after successful block 657 - * allocation which had been deferred till now. 658 - */ 659 - if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 660 - ext4_da_update_reserve_space(inode, count, 1); 661 - 662 655 got_it: 663 656 map->m_flags |= EXT4_MAP_MAPPED; 664 657 map->m_pblk = le32_to_cpu(chain[depth-1].key);
+31 -15
fs/ext4/inline.c
··· 601 601 goto out; 602 602 603 603 if (ext4_should_dioread_nolock(inode)) { 604 - ret = __block_write_begin(folio, from, to, 605 - ext4_get_block_unwritten); 604 + ret = ext4_block_write_begin(handle, folio, from, to, 605 + ext4_get_block_unwritten); 606 606 } else 607 - ret = __block_write_begin(folio, from, to, ext4_get_block); 607 + ret = ext4_block_write_begin(handle, folio, from, to, 608 + ext4_get_block); 608 609 609 610 if (!ret && ext4_should_journal_data(inode)) { 610 611 ret = ext4_walk_page_buffers(handle, inode, ··· 857 856 goto out; 858 857 } 859 858 860 - ret = __block_write_begin(folio, 0, inline_size, 861 - ext4_da_get_block_prep); 859 + ret = ext4_block_write_begin(NULL, folio, 0, inline_size, 860 + ext4_da_get_block_prep); 862 861 if (ret) { 863 862 up_read(&EXT4_I(inode)->xattr_sem); 864 863 folio_unlock(folio); ··· 1666 1665 struct ext4_dir_entry_2 **res_dir, 1667 1666 int *has_inline_data) 1668 1667 { 1668 + struct ext4_xattr_ibody_find is = { 1669 + .s = { .not_found = -ENODATA, }, 1670 + }; 1671 + struct ext4_xattr_info i = { 1672 + .name_index = EXT4_XATTR_INDEX_SYSTEM, 1673 + .name = EXT4_XATTR_SYSTEM_DATA, 1674 + }; 1669 1675 int ret; 1670 - struct ext4_iloc iloc; 1671 1676 void *inline_start; 1672 1677 int inline_size; 1673 1678 1674 - if (ext4_get_inode_loc(dir, &iloc)) 1675 - return NULL; 1679 + ret = ext4_get_inode_loc(dir, &is.iloc); 1680 + if (ret) 1681 + return ERR_PTR(ret); 1676 1682 1677 1683 down_read(&EXT4_I(dir)->xattr_sem); 1684 + 1685 + ret = ext4_xattr_ibody_find(dir, &i, &is); 1686 + if (ret) 1687 + goto out; 1688 + 1678 1689 if (!ext4_has_inline_data(dir)) { 1679 1690 *has_inline_data = 0; 1680 1691 goto out; 1681 1692 } 1682 1693 1683 - inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1694 + inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + 1684 1695 EXT4_INLINE_DOTDOT_SIZE; 1685 1696 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1686 - ret = ext4_search_dir(iloc.bh, inline_start, inline_size, 1697 + ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1687 1698 dir, fname, 0, res_dir); 1688 1699 if (ret == 1) 1689 1700 goto out_find; ··· 1705 1692 if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) 1706 1693 goto out; 1707 1694 1708 - inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1695 + inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); 1709 1696 inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; 1710 1697 1711 - ret = ext4_search_dir(iloc.bh, inline_start, inline_size, 1698 + ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1712 1699 dir, fname, 0, res_dir); 1713 1700 if (ret == 1) 1714 1701 goto out_find; 1715 1702 1716 1703 out: 1717 - brelse(iloc.bh); 1718 - iloc.bh = NULL; 1704 + brelse(is.iloc.bh); 1705 + if (ret < 0) 1706 + is.iloc.bh = ERR_PTR(ret); 1707 + else 1708 + is.iloc.bh = NULL; 1719 1709 out_find: 1720 1710 up_read(&EXT4_I(dir)->xattr_sem); 1721 - return iloc.bh; 1711 + return is.iloc.bh; 1722 1712 } 1723 1713 1724 1714 int ext4_delete_inline_entry(handle_t *handle,
+133 -159
fs/ext4/inode.c
··· 49 49 50 50 #include <trace/events/ext4.h> 51 51 52 + static void ext4_journalled_zero_new_buffers(handle_t *handle, 53 + struct inode *inode, 54 + struct folio *folio, 55 + unsigned from, unsigned to); 56 + 52 57 static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, 53 58 struct ext4_inode_info *ei) 54 59 { ··· 483 478 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 484 479 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 485 480 ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 486 - map->m_pblk, status); 481 + map->m_pblk, status, 0); 482 + return retval; 483 + } 484 + 485 + static int ext4_map_create_blocks(handle_t *handle, struct inode *inode, 486 + struct ext4_map_blocks *map, int flags) 487 + { 488 + struct extent_status es; 489 + unsigned int status; 490 + int err, retval = 0; 491 + 492 + /* 493 + * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE 494 + * indicates that the blocks and quotas has already been 495 + * checked when the data was copied into the page cache. 496 + */ 497 + if (map->m_flags & EXT4_MAP_DELAYED) 498 + flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 499 + 500 + /* 501 + * Here we clear m_flags because after allocating an new extent, 502 + * it will be set again. 503 + */ 504 + map->m_flags &= ~EXT4_MAP_FLAGS; 505 + 506 + /* 507 + * We need to check for EXT4 here because migrate could have 508 + * changed the inode type in between. 509 + */ 510 + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 511 + retval = ext4_ext_map_blocks(handle, inode, map, flags); 512 + } else { 513 + retval = ext4_ind_map_blocks(handle, inode, map, flags); 514 + 515 + /* 516 + * We allocated new blocks which will result in i_data's 517 + * format changing. Force the migrate to fail by clearing 518 + * migrate flags. 519 + */ 520 + if (retval > 0 && map->m_flags & EXT4_MAP_NEW) 521 + ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 522 + } 523 + if (retval <= 0) 524 + return retval; 525 + 526 + if (unlikely(retval != map->m_len)) { 527 + ext4_warning(inode->i_sb, 528 + "ES len assertion failed for inode %lu: " 529 + "retval %d != map->m_len %d", 530 + inode->i_ino, retval, map->m_len); 531 + WARN_ON(1); 532 + } 533 + 534 + /* 535 + * We have to zeroout blocks before inserting them into extent 536 + * status tree. Otherwise someone could look them up there and 537 + * use them before they are really zeroed. We also have to 538 + * unmap metadata before zeroing as otherwise writeback can 539 + * overwrite zeros with stale data from block device. 540 + */ 541 + if (flags & EXT4_GET_BLOCKS_ZERO && 542 + map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_NEW) { 543 + err = ext4_issue_zeroout(inode, map->m_lblk, map->m_pblk, 544 + map->m_len); 545 + if (err) 546 + return err; 547 + } 548 + 549 + /* 550 + * If the extent has been zeroed out, we don't need to update 551 + * extent status tree. 552 + */ 553 + if (flags & EXT4_GET_BLOCKS_PRE_IO && 554 + ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 555 + if (ext4_es_is_written(&es)) 556 + return retval; 557 + } 558 + 559 + status = map->m_flags & EXT4_MAP_UNWRITTEN ? 560 + EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 561 + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 562 + map->m_pblk, status, flags); 563 + 487 564 return retval; 488 565 } 489 566 ··· 663 576 * file system block. 664 577 */ 665 578 down_read(&EXT4_I(inode)->i_data_sem); 666 - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 667 - retval = ext4_ext_map_blocks(handle, inode, map, 0); 668 - } else { 669 - retval = ext4_ind_map_blocks(handle, inode, map, 0); 670 - } 671 - if (retval > 0) { 672 - unsigned int status; 673 - 674 - if (unlikely(retval != map->m_len)) { 675 - ext4_warning(inode->i_sb, 676 - "ES len assertion failed for inode " 677 - "%lu: retval %d != map->m_len %d", 678 - inode->i_ino, retval, map->m_len); 679 - WARN_ON(1); 680 - } 681 - 682 - status = map->m_flags & EXT4_MAP_UNWRITTEN ? 683 - EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 684 - if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 685 - !(status & EXTENT_STATUS_WRITTEN) && 686 - ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, 687 - map->m_lblk + map->m_len - 1)) 688 - status |= EXTENT_STATUS_DELAYED; 689 - ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 690 - map->m_pblk, status); 691 - } 579 + retval = ext4_map_query_blocks(handle, inode, map); 692 580 up_read((&EXT4_I(inode)->i_data_sem)); 693 581 694 582 found: ··· 693 631 return retval; 694 632 695 633 /* 696 - * Here we clear m_flags because after allocating an new extent, 697 - * it will be set again. 698 - */ 699 - map->m_flags &= ~EXT4_MAP_FLAGS; 700 - 701 - /* 702 634 * New blocks allocate and/or writing to unwritten extent 703 635 * will possibly result in updating i_data, so we take 704 636 * the write lock of i_data_sem, and call get_block() 705 637 * with create == 1 flag. 706 638 */ 707 639 down_write(&EXT4_I(inode)->i_data_sem); 708 - 709 - /* 710 - * We need to check for EXT4 here because migrate 711 - * could have changed the inode type in between 712 - */ 713 - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 714 - retval = ext4_ext_map_blocks(handle, inode, map, flags); 715 - } else { 716 - retval = ext4_ind_map_blocks(handle, inode, map, flags); 717 - 718 - if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { 719 - /* 720 - * We allocated new blocks which will result in 721 - * i_data's format changing. Force the migrate 722 - * to fail by clearing migrate flags 723 - */ 724 - ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 725 - } 726 - } 727 - 728 - if (retval > 0) { 729 - unsigned int status; 730 - 731 - if (unlikely(retval != map->m_len)) { 732 - ext4_warning(inode->i_sb, 733 - "ES len assertion failed for inode " 734 - "%lu: retval %d != map->m_len %d", 735 - inode->i_ino, retval, map->m_len); 736 - WARN_ON(1); 737 - } 738 - 739 - /* 740 - * We have to zeroout blocks before inserting them into extent 741 - * status tree. Otherwise someone could look them up there and 742 - * use them before they are really zeroed. We also have to 743 - * unmap metadata before zeroing as otherwise writeback can 744 - * overwrite zeros with stale data from block device. 745 - */ 746 - if (flags & EXT4_GET_BLOCKS_ZERO && 747 - map->m_flags & EXT4_MAP_MAPPED && 748 - map->m_flags & EXT4_MAP_NEW) { 749 - ret = ext4_issue_zeroout(inode, map->m_lblk, 750 - map->m_pblk, map->m_len); 751 - if (ret) { 752 - retval = ret; 753 - goto out_sem; 754 - } 755 - } 756 - 757 - /* 758 - * If the extent has been zeroed out, we don't need to update 759 - * extent status tree. 760 - */ 761 - if ((flags & EXT4_GET_BLOCKS_PRE_IO) && 762 - ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 763 - if (ext4_es_is_written(&es)) 764 - goto out_sem; 765 - } 766 - status = map->m_flags & EXT4_MAP_UNWRITTEN ? 767 - EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 768 - if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 769 - !(status & EXTENT_STATUS_WRITTEN) && 770 - ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, 771 - map->m_lblk + map->m_len - 1)) 772 - status |= EXTENT_STATUS_DELAYED; 773 - ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 774 - map->m_pblk, status); 775 - } 776 - 777 - out_sem: 640 + retval = ext4_map_create_blocks(handle, inode, map, flags); 778 641 up_write((&EXT4_I(inode)->i_data_sem)); 779 642 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 780 643 ret = check_block_validity(inode, map); ··· 1005 1018 int do_journal_get_write_access(handle_t *handle, struct inode *inode, 1006 1019 struct buffer_head *bh) 1007 1020 { 1008 - int dirty = buffer_dirty(bh); 1009 - int ret; 1010 - 1011 1021 if (!buffer_mapped(bh) || buffer_freed(bh)) 1012 1022 return 0; 1013 - /* 1014 - * __block_write_begin() could have dirtied some buffers. Clean 1015 - * the dirty bit as jbd2_journal_get_write_access() could complain 1016 - * otherwise about fs integrity issues. Setting of the dirty bit 1017 - * by __block_write_begin() isn't a real problem here as we clear 1018 - * the bit before releasing a page lock and thus writeback cannot 1019 - * ever write the buffer. 1020 - */ 1021 - if (dirty) 1022 - clear_buffer_dirty(bh); 1023 1023 BUFFER_TRACE(bh, "get write access"); 1024 - ret = ext4_journal_get_write_access(handle, inode->i_sb, bh, 1024 + return ext4_journal_get_write_access(handle, inode->i_sb, bh, 1025 1025 EXT4_JTR_NONE); 1026 - if (!ret && dirty) 1027 - ret = ext4_dirty_journalled_data(handle, bh); 1028 - return ret; 1029 1026 } 1030 1027 1031 - #ifdef CONFIG_FS_ENCRYPTION 1032 - static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, 1033 - get_block_t *get_block) 1028 + int ext4_block_write_begin(handle_t *handle, struct folio *folio, 1029 + loff_t pos, unsigned len, 1030 + get_block_t *get_block) 1034 1031 { 1035 1032 unsigned from = pos & (PAGE_SIZE - 1); 1036 1033 unsigned to = from + len; ··· 1027 1056 struct buffer_head *bh, *head, *wait[2]; 1028 1057 int nr_wait = 0; 1029 1058 int i; 1059 + bool should_journal_data = ext4_should_journal_data(inode); 1030 1060 1031 1061 BUG_ON(!folio_test_locked(folio)); 1032 1062 BUG_ON(from > PAGE_SIZE); ··· 1057 1085 if (err) 1058 1086 break; 1059 1087 if (buffer_new(bh)) { 1088 + /* 1089 + * We may be zeroing partial buffers or all new 1090 + * buffers in case of failure. Prepare JBD2 for 1091 + * that. 1092 + */ 1093 + if (should_journal_data) 1094 + do_journal_get_write_access(handle, 1095 + inode, bh); 1060 1096 if (folio_test_uptodate(folio)) { 1061 - clear_buffer_new(bh); 1097 + /* 1098 + * Unlike __block_write_begin() we leave 1099 + * dirtying of new uptodate buffers to 1100 + * ->write_end() time or 1101 + * folio_zero_new_buffers(). 1102 + */ 1062 1103 set_buffer_uptodate(bh); 1063 - mark_buffer_dirty(bh); 1064 1104 continue; 1065 1105 } 1066 1106 if (block_end > to || block_start < from) ··· 1102 1118 err = -EIO; 1103 1119 } 1104 1120 if (unlikely(err)) { 1105 - folio_zero_new_buffers(folio, from, to); 1121 + if (should_journal_data) 1122 + ext4_journalled_zero_new_buffers(handle, inode, folio, 1123 + from, to); 1124 + else 1125 + folio_zero_new_buffers(folio, from, to); 1106 1126 } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) { 1107 1127 for (i = 0; i < nr_wait; i++) { 1108 1128 int err2; ··· 1122 1134 1123 1135 return err; 1124 1136 } 1125 - #endif 1126 1137 1127 1138 /* 1128 1139 * To preserve ordering, it is essential that the hole instantiation and ··· 1203 1216 /* In case writeback began while the folio was unlocked */ 1204 1217 folio_wait_stable(folio); 1205 1218 1206 - #ifdef CONFIG_FS_ENCRYPTION 1207 1219 if (ext4_should_dioread_nolock(inode)) 1208 - ret = ext4_block_write_begin(folio, pos, len, 1220 + ret = ext4_block_write_begin(handle, folio, pos, len, 1209 1221 ext4_get_block_unwritten); 1210 1222 else 1211 - ret = ext4_block_write_begin(folio, pos, len, ext4_get_block); 1212 - #else 1213 - if (ext4_should_dioread_nolock(inode)) 1214 - ret = __block_write_begin(folio, pos, len, 1215 - ext4_get_block_unwritten); 1216 - else 1217 - ret = __block_write_begin(folio, pos, len, ext4_get_block); 1218 - #endif 1223 + ret = ext4_block_write_begin(handle, folio, pos, len, 1224 + ext4_get_block); 1219 1225 if (!ret && ext4_should_journal_data(inode)) { 1220 1226 ret = ext4_walk_page_buffers(handle, inode, 1221 1227 folio_buffers(folio), from, to, ··· 1221 1241 1222 1242 folio_unlock(folio); 1223 1243 /* 1224 - * __block_write_begin may have instantiated a few blocks 1244 + * ext4_block_write_begin may have instantiated a few blocks 1225 1245 * outside i_size. Trim these off again. Don't need 1226 1246 * i_size_read because we hold i_rwsem. 1227 1247 * ··· 1368 1388 size = min(to, block_end) - start; 1369 1389 1370 1390 folio_zero_range(folio, start, size); 1371 - write_end_fn(handle, inode, bh); 1372 1391 } 1373 1392 clear_buffer_new(bh); 1393 + write_end_fn(handle, inode, bh); 1374 1394 } 1375 1395 } 1376 1396 block_start = block_end; ··· 1641 1661 int ret; 1642 1662 1643 1663 /* Has delalloc reservation? */ 1644 - if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) 1664 + if (ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk)) 1645 1665 return 1; 1646 1666 1647 1667 /* Already been allocated? */ ··· 1762 1782 * Delayed extent could be allocated by fallocate. 1763 1783 * So we need to check it. 1764 1784 */ 1765 - if (ext4_es_is_delonly(&es)) { 1785 + if (ext4_es_is_delayed(&es)) { 1766 1786 map->m_flags |= EXT4_MAP_DELAYED; 1767 1787 return 0; 1768 1788 } ··· 2197 2217 * writeback and there is nothing we can do about it so it might result 2198 2218 * in data loss. So use reserved blocks to allocate metadata if 2199 2219 * possible. 2200 - * 2201 - * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if 2202 - * the blocks in question are delalloc blocks. This indicates 2203 - * that the blocks and quotas has already been checked when 2204 - * the data was copied into the page cache. 2205 2220 */ 2206 2221 get_blocks_flags = EXT4_GET_BLOCKS_CREATE | 2207 2222 EXT4_GET_BLOCKS_METADATA_NOFAIL | ··· 2204 2229 dioread_nolock = ext4_should_dioread_nolock(inode); 2205 2230 if (dioread_nolock) 2206 2231 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; 2207 - if (map->m_flags & BIT(BH_Delay)) 2208 - get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2209 2232 2210 2233 err = ext4_map_blocks(handle, inode, map, get_blocks_flags); 2211 2234 if (err < 0) ··· 2932 2959 if (IS_ERR(folio)) 2933 2960 return PTR_ERR(folio); 2934 2961 2935 - #ifdef CONFIG_FS_ENCRYPTION 2936 - ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep); 2937 - #else 2938 - ret = __block_write_begin(folio, pos, len, ext4_da_get_block_prep); 2939 - #endif 2962 + ret = ext4_block_write_begin(NULL, folio, pos, len, 2963 + ext4_da_get_block_prep); 2940 2964 if (ret < 0) { 2941 2965 folio_unlock(folio); 2942 2966 folio_put(folio); ··· 4037 4067 stop_block); 4038 4068 4039 4069 ext4_es_insert_extent(inode, first_block, hole_len, ~0, 4040 - EXTENT_STATUS_HOLE); 4070 + EXTENT_STATUS_HOLE, 0); 4041 4071 up_write(&EXT4_I(inode)->i_data_sem); 4042 4072 } 4043 4073 ext4_fc_track_range(handle, inode, first_block, stop_block); ··· 5246 5276 { 5247 5277 unsigned offset; 5248 5278 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 5249 - tid_t commit_tid = 0; 5279 + tid_t commit_tid; 5250 5280 int ret; 5281 + bool has_transaction; 5251 5282 5252 5283 offset = inode->i_size & (PAGE_SIZE - 1); 5253 5284 /* ··· 5273 5302 folio_put(folio); 5274 5303 if (ret != -EBUSY) 5275 5304 return; 5276 - commit_tid = 0; 5305 + has_transaction = false; 5277 5306 read_lock(&journal->j_state_lock); 5278 - if (journal->j_committing_transaction) 5307 + if (journal->j_committing_transaction) { 5279 5308 commit_tid = journal->j_committing_transaction->t_tid; 5309 + has_transaction = true; 5310 + } 5280 5311 read_unlock(&journal->j_state_lock); 5281 - if (commit_tid) 5312 + if (has_transaction) 5282 5313 jbd2_log_wait_commit(journal, commit_tid); 5283 5314 } 5284 5315 } ··· 6189 6216 if (folio_pos(folio) + len > size) 6190 6217 len = size - folio_pos(folio); 6191 6218 6192 - err = __block_write_begin(folio, 0, len, ext4_get_block); 6219 + err = ext4_block_write_begin(handle, folio, 0, len, 6220 + ext4_get_block); 6193 6221 if (!err) { 6194 6222 ret = VM_FAULT_SIGBUS; 6195 6223 if (ext4_journal_folio_buffers(handle, folio, len))
+12 -13
fs/ext4/mballoc.c
··· 2356 2356 ex.fe_logical = 0xDEADFA11; /* debug value */ 2357 2357 2358 2358 if (max >= ac->ac_g_ex.fe_len && 2359 - ac->ac_g_ex.fe_len == EXT4_B2C(sbi, sbi->s_stripe)) { 2359 + ac->ac_g_ex.fe_len == EXT4_NUM_B2C(sbi, sbi->s_stripe)) { 2360 2360 ext4_fsblk_t start; 2361 2361 2362 2362 start = ext4_grp_offs_to_block(ac->ac_sb, &ex); ··· 2553 2553 do_div(a, sbi->s_stripe); 2554 2554 i = (a * sbi->s_stripe) - first_group_block; 2555 2555 2556 - stripe = EXT4_B2C(sbi, sbi->s_stripe); 2556 + stripe = EXT4_NUM_B2C(sbi, sbi->s_stripe); 2557 2557 i = EXT4_B2C(sbi, i); 2558 2558 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { 2559 2559 if (!mb_test_bit(i, bitmap)) { ··· 2928 2928 if (cr == CR_POWER2_ALIGNED) 2929 2929 ext4_mb_simple_scan_group(ac, &e4b); 2930 2930 else { 2931 - bool is_stripe_aligned = sbi->s_stripe && 2931 + bool is_stripe_aligned = 2932 + (sbi->s_stripe >= 2933 + sbi->s_cluster_ratio) && 2932 2934 !(ac->ac_g_ex.fe_len % 2933 - EXT4_B2C(sbi, sbi->s_stripe)); 2935 + EXT4_NUM_B2C(sbi, sbi->s_stripe)); 2934 2936 2935 2937 if ((cr == CR_GOAL_LEN_FAST || 2936 2938 cr == CR_BEST_AVAIL_LEN) && ··· 3077 3075 seq_puts(seq, " ]"); 3078 3076 if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info)) 3079 3077 seq_puts(seq, " Block bitmap corrupted!"); 3080 - seq_puts(seq, "\n"); 3081 - 3078 + seq_putc(seq, '\n'); 3082 3079 return 0; 3083 3080 } 3084 3081 ··· 3708 3707 */ 3709 3708 if (sbi->s_stripe > 1) { 3710 3709 sbi->s_mb_group_prealloc = roundup( 3711 - sbi->s_mb_group_prealloc, EXT4_B2C(sbi, sbi->s_stripe)); 3710 + sbi->s_mb_group_prealloc, EXT4_NUM_B2C(sbi, sbi->s_stripe)); 3712 3711 } 3713 3712 3714 3713 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); ··· 3888 3887 /* 3889 3888 * Clear the trimmed flag for the group so that the next 3890 3889 * ext4_trim_fs can trim it. 3891 - * If the volume is mounted with -o discard, online discard 3892 - * is supported and the free blocks will be trimmed online. 3893 3890 */ 3894 - if (!test_opt(sb, DISCARD)) 3895 - EXT4_MB_GRP_CLEAR_TRIMMED(db); 3891 + EXT4_MB_GRP_CLEAR_TRIMMED(db); 3896 3892 3897 3893 if (!db->bb_free_root.rb_node) { 3898 3894 /* No more items in the per group rb tree ··· 6513 6515 " group:%u block:%d count:%lu failed" 6514 6516 " with %d", block_group, bit, count, 6515 6517 err); 6516 - } else 6517 - EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); 6518 + } 6519 + 6520 + EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); 6518 6521 6519 6522 ext4_lock_group(sb, block_group); 6520 6523 mb_free_blocks(inode, &e4b, bit, count_clusters);
+4 -3
fs/ext4/migrate.c
··· 37 37 path = ext4_find_extent(inode, lb->first_block, NULL, 0); 38 38 if (IS_ERR(path)) { 39 39 retval = PTR_ERR(path); 40 - path = NULL; 41 40 goto err_out; 42 41 } 43 42 ··· 52 53 retval = ext4_datasem_ensure_credits(handle, inode, needed, needed, 0); 53 54 if (retval < 0) 54 55 goto err_out; 55 - retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); 56 + path = ext4_ext_insert_extent(handle, inode, path, &newext, 0); 57 + if (IS_ERR(path)) 58 + retval = PTR_ERR(path); 56 59 err_out: 57 60 up_write((&EXT4_I(inode)->i_data_sem)); 58 61 ext4_free_ext_path(path); ··· 664 663 if (unlikely(ret2 && !ret)) 665 664 ret = ret2; 666 665 errout: 667 - ext4_journal_stop(handle); 668 666 up_write(&EXT4_I(inode)->i_data_sem); 667 + ext4_journal_stop(handle); 669 668 out_unlock: 670 669 ext4_writepages_up_write(inode->i_sb, alloc_ctx); 671 670 return ret;
+51 -39
fs/ext4/move_extent.c
··· 17 17 * get_ext_path() - Find an extent path for designated logical block number. 18 18 * @inode: inode to be searched 19 19 * @lblock: logical block number to find an extent path 20 - * @ppath: pointer to an extent path pointer (for output) 20 + * @path: pointer to an extent path 21 21 * 22 - * ext4_find_extent wrapper. Return 0 on success, or a negative error value 23 - * on failure. 22 + * ext4_find_extent wrapper. Return an extent path pointer on success, 23 + * or an error pointer on failure. 24 24 */ 25 - static inline int 25 + static inline struct ext4_ext_path * 26 26 get_ext_path(struct inode *inode, ext4_lblk_t lblock, 27 - struct ext4_ext_path **ppath) 27 + struct ext4_ext_path *path) 28 28 { 29 - struct ext4_ext_path *path; 30 - 31 - path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE); 29 + path = ext4_find_extent(inode, lblock, path, EXT4_EX_NOCACHE); 32 30 if (IS_ERR(path)) 33 - return PTR_ERR(path); 31 + return path; 34 32 if (path[ext_depth(inode)].p_ext == NULL) { 35 33 ext4_free_ext_path(path); 36 - *ppath = NULL; 37 - return -ENODATA; 34 + return ERR_PTR(-ENODATA); 38 35 } 39 - *ppath = path; 40 - return 0; 36 + return path; 41 37 } 42 38 43 39 /** ··· 91 95 int ret = 0; 92 96 ext4_lblk_t last = from + count; 93 97 while (from < last) { 94 - *err = get_ext_path(inode, from, &path); 95 - if (*err) 96 - goto out; 98 + path = get_ext_path(inode, from, path); 99 + if (IS_ERR(path)) { 100 + *err = PTR_ERR(path); 101 + return ret; 102 + } 97 103 ext = path[ext_depth(inode)].p_ext; 98 104 if (unwritten != ext4_ext_is_unwritten(ext)) 99 105 goto out; ··· 164 166 return 0; 165 167 } 166 168 167 - /* Force page buffers uptodate w/o dropping page's lock */ 168 - static int 169 - mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to) 169 + /* Force folio buffers uptodate w/o dropping folio's lock */ 170 + static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to) 170 171 { 171 172 struct inode *inode = folio->mapping->host; 172 173 sector_t block; 173 - struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 174 + struct buffer_head *bh, *head; 174 175 unsigned int blocksize, block_start, block_end; 175 - int i, err, nr = 0, partial = 0; 176 + int nr = 0; 177 + bool partial = false; 178 + 176 179 BUG_ON(!folio_test_locked(folio)); 177 180 BUG_ON(folio_test_writeback(folio)); 178 181 ··· 185 186 if (!head) 186 187 head = create_empty_buffers(folio, blocksize, 0); 187 188 188 - block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits); 189 - for (bh = head, block_start = 0; bh != head || !block_start; 190 - block++, block_start = block_end, bh = bh->b_this_page) { 189 + block = folio_pos(folio) >> inode->i_blkbits; 190 + block_end = 0; 191 + bh = head; 192 + do { 193 + block_start = block_end; 191 194 block_end = block_start + blocksize; 192 195 if (block_end <= from || block_start >= to) { 193 196 if (!buffer_uptodate(bh)) 194 - partial = 1; 197 + partial = true; 195 198 continue; 196 199 } 197 200 if (buffer_uptodate(bh)) 198 201 continue; 199 202 if (!buffer_mapped(bh)) { 200 - err = ext4_get_block(inode, block, bh, 0); 203 + int err = ext4_get_block(inode, block, bh, 0); 201 204 if (err) 202 205 return err; 203 206 if (!buffer_mapped(bh)) { ··· 208 207 continue; 209 208 } 210 209 } 211 - BUG_ON(nr >= MAX_BUF_PER_PAGE); 212 - arr[nr++] = bh; 213 - } 210 + lock_buffer(bh); 211 + if (buffer_uptodate(bh)) { 212 + unlock_buffer(bh); 213 + continue; 214 + } 215 + ext4_read_bh_nowait(bh, 0, NULL); 216 + nr++; 217 + } while (block++, (bh = bh->b_this_page) != head); 218 + 214 219 /* No io required */ 215 220 if (!nr) 216 221 goto out; 217 222 218 - for (i = 0; i < nr; i++) { 219 - bh = arr[i]; 220 - if (!bh_uptodate_or_lock(bh)) { 221 - err = ext4_read_bh(bh, 0, NULL); 222 - if (err) 223 - return err; 224 - } 225 - } 223 + bh = head; 224 + do { 225 + if (bh_offset(bh) + blocksize <= from) 226 + continue; 227 + if (bh_offset(bh) > to) 228 + break; 229 + wait_on_buffer(bh); 230 + if (buffer_uptodate(bh)) 231 + continue; 232 + return -EIO; 233 + } while ((bh = bh->b_this_page) != head); 226 234 out: 227 235 if (!partial) 228 236 folio_mark_uptodate(folio); ··· 634 624 int offset_in_page; 635 625 int unwritten, cur_len; 636 626 637 - ret = get_ext_path(orig_inode, o_start, &path); 638 - if (ret) 627 + path = get_ext_path(orig_inode, o_start, path); 628 + if (IS_ERR(path)) { 629 + ret = PTR_ERR(path); 639 630 goto out; 631 + } 640 632 ex = path[path->p_depth].p_ext; 641 633 cur_blk = le32_to_cpu(ex->ee_block); 642 634 cur_len = ext4_ext_get_actual_len(ex);
+9 -7
fs/ext4/namei.c
··· 1482 1482 } 1483 1483 1484 1484 /* 1485 - * Returns 0 if not found, -1 on failure, and 1 on success 1485 + * Returns 0 if not found, -EFSCORRUPTED on failure, and 1 on success 1486 1486 */ 1487 1487 int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, 1488 1488 struct inode *dir, struct ext4_filename *fname, ··· 1503 1503 * a full check */ 1504 1504 if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf, 1505 1505 buf_size, offset)) 1506 - return -1; 1506 + return -EFSCORRUPTED; 1507 1507 *res_dir = de; 1508 1508 return 1; 1509 1509 } ··· 1511 1511 de_len = ext4_rec_len_from_disk(de->rec_len, 1512 1512 dir->i_sb->s_blocksize); 1513 1513 if (de_len <= 0) 1514 - return -1; 1514 + return -EFSCORRUPTED; 1515 1515 offset += de_len; 1516 1516 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); 1517 1517 } ··· 1574 1574 &has_inline_data); 1575 1575 if (inlined) 1576 1576 *inlined = has_inline_data; 1577 - if (has_inline_data) 1577 + if (has_inline_data || IS_ERR(ret)) 1578 1578 goto cleanup_and_exit; 1579 1579 } 1580 1580 ··· 1663 1663 goto cleanup_and_exit; 1664 1664 } else { 1665 1665 brelse(bh); 1666 - if (i < 0) 1666 + if (i < 0) { 1667 + ret = ERR_PTR(i); 1667 1668 goto cleanup_and_exit; 1669 + } 1668 1670 } 1669 1671 next: 1670 1672 if (++block >= nblocks) ··· 1760 1758 if (retval == 1) 1761 1759 goto success; 1762 1760 brelse(bh); 1763 - if (retval == -1) { 1761 + if (retval < 0) { 1764 1762 bh = ERR_PTR(ERR_BAD_DX_DIR); 1765 1763 goto errout; 1766 1764 } ··· 2001 1999 split = count/2; 2002 2000 2003 2001 hash2 = map[split].hash; 2004 - continued = hash2 == map[split - 1].hash; 2002 + continued = split > 0 ? hash2 == map[split - 1].hash : 0; 2005 2003 dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n", 2006 2004 (unsigned long)dx_get_block(frame->at), 2007 2005 hash2, split, count-split));
+8 -8
fs/ext4/readpage.c
··· 221 221 sector_t block_in_file; 222 222 sector_t last_block; 223 223 sector_t last_block_in_file; 224 - sector_t blocks[MAX_BUF_PER_PAGE]; 224 + sector_t first_block; 225 225 unsigned page_block; 226 226 struct block_device *bdev = inode->i_sb->s_bdev; 227 227 int length; ··· 263 263 unsigned map_offset = block_in_file - map.m_lblk; 264 264 unsigned last = map.m_len - map_offset; 265 265 266 + first_block = map.m_pblk + map_offset; 266 267 for (relative_block = 0; ; relative_block++) { 267 268 if (relative_block == last) { 268 269 /* needed? */ ··· 272 271 } 273 272 if (page_block == blocks_per_page) 274 273 break; 275 - blocks[page_block] = map.m_pblk + map_offset + 276 - relative_block; 277 274 page_block++; 278 275 block_in_file++; 279 276 } ··· 306 307 goto confused; /* hole -> non-hole */ 307 308 308 309 /* Contiguous blocks? */ 309 - if (page_block && blocks[page_block-1] != map.m_pblk-1) 310 + if (!page_block) 311 + first_block = map.m_pblk; 312 + else if (first_block + page_block != map.m_pblk) 310 313 goto confused; 311 314 for (relative_block = 0; ; relative_block++) { 312 315 if (relative_block == map.m_len) { ··· 317 316 break; 318 317 } else if (page_block == blocks_per_page) 319 318 break; 320 - blocks[page_block] = map.m_pblk+relative_block; 321 319 page_block++; 322 320 block_in_file++; 323 321 } ··· 339 339 * This folio will go to BIO. Do we need to send this 340 340 * BIO off first? 341 341 */ 342 - if (bio && (last_block_in_bio != blocks[0] - 1 || 342 + if (bio && (last_block_in_bio != first_block - 1 || 343 343 !fscrypt_mergeable_bio(bio, inode, next_block))) { 344 344 submit_and_realloc: 345 345 submit_bio(bio); ··· 355 355 fscrypt_set_bio_crypt_ctx(bio, inode, next_block, 356 356 GFP_KERNEL); 357 357 ext4_set_bio_post_read_ctx(bio, inode, folio->index); 358 - bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); 358 + bio->bi_iter.bi_sector = first_block << (blkbits - 9); 359 359 bio->bi_end_io = mpage_end_io; 360 360 if (rac) 361 361 bio->bi_opf |= REQ_RAHEAD; ··· 371 371 submit_bio(bio); 372 372 bio = NULL; 373 373 } else 374 - last_block_in_bio = blocks[blocks_per_page - 1]; 374 + last_block_in_bio = first_block + blocks_per_page - 1; 375 375 continue; 376 376 confused: 377 377 if (bio) {
+1 -2
fs/ext4/resize.c
··· 1319 1319 bh = ext4_get_bitmap(sb, group_data->inode_bitmap); 1320 1320 if (!bh) 1321 1321 return -EIO; 1322 - ext4_inode_bitmap_csum_set(sb, gdp, bh, 1323 - EXT4_INODES_PER_GROUP(sb) / 8); 1322 + ext4_inode_bitmap_csum_set(sb, gdp, bh); 1324 1323 brelse(bh); 1325 1324 1326 1325 bh = ext4_get_bitmap(sb, group_data->block_bitmap);
+48 -17
fs/ext4/super.c
··· 735 735 736 736 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 737 737 /* 738 - * Make sure updated value of ->s_mount_flags will be visible before 739 - * ->s_flags update 738 + * EXT4_FLAGS_SHUTDOWN was set which stops all filesystem 739 + * modifications. We don't set SB_RDONLY because that requires 740 + * sb->s_umount semaphore and setting it without proper remount 741 + * procedure is confusing code such as freeze_super() leading to 742 + * deadlocks and other problems. 740 743 */ 741 - smp_wmb(); 742 - sb->s_flags |= SB_RDONLY; 743 744 } 744 745 745 746 static void update_super_work(struct work_struct *work) ··· 3046 3045 3047 3046 seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw"); 3048 3047 rc = _ext4_show_options(seq, sb, 1); 3049 - seq_puts(seq, "\n"); 3048 + seq_putc(seq, '\n'); 3050 3049 return rc; 3051 3050 } 3052 3051 ··· 5088 5087 return ret; 5089 5088 } 5090 5089 5091 - static void ext4_hash_info_init(struct super_block *sb) 5090 + static int ext4_hash_info_init(struct super_block *sb) 5092 5091 { 5093 5092 struct ext4_sb_info *sbi = EXT4_SB(sb); 5094 5093 struct ext4_super_block *es = sbi->s_es; 5095 5094 unsigned int i; 5096 5095 5096 + sbi->s_def_hash_version = es->s_def_hash_version; 5097 + 5098 + if (sbi->s_def_hash_version > DX_HASH_LAST) { 5099 + ext4_msg(sb, KERN_ERR, 5100 + "Invalid default hash set in the superblock"); 5101 + return -EINVAL; 5102 + } else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) { 5103 + ext4_msg(sb, KERN_ERR, 5104 + "SIPHASH is not a valid default hash value"); 5105 + return -EINVAL; 5106 + } 5107 + 5097 5108 for (i = 0; i < 4; i++) 5098 5109 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 5099 5110 5100 - sbi->s_def_hash_version = es->s_def_hash_version; 5101 5111 if (ext4_has_feature_dir_index(sb)) { 5102 5112 i = le32_to_cpu(es->s_flags); 5103 5113 if (i & EXT2_FLAGS_UNSIGNED_HASH) ··· 5126 5114 #endif 5127 5115 } 5128 5116 } 5117 + return 0; 5129 5118 } 5130 5119 5131 5120 static int ext4_block_group_meta_init(struct super_block *sb, int silent) ··· 5176 5163 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 5177 5164 5178 5165 return 0; 5166 + } 5167 + 5168 + /* 5169 + * It's hard to get stripe aligned blocks if stripe is not aligned with 5170 + * cluster, just disable stripe and alert user to simplify code and avoid 5171 + * stripe aligned allocation which will rarely succeed. 5172 + */ 5173 + static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe) 5174 + { 5175 + struct ext4_sb_info *sbi = EXT4_SB(sb); 5176 + return (stripe > 0 && sbi->s_cluster_ratio > 1 && 5177 + stripe % sbi->s_cluster_ratio != 0); 5179 5178 } 5180 5179 5181 5180 static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ··· 5274 5249 if (err) 5275 5250 goto failed_mount; 5276 5251 5277 - ext4_hash_info_init(sb); 5252 + err = ext4_hash_info_init(sb); 5253 + if (err) 5254 + goto failed_mount; 5278 5255 5279 5256 err = ext4_handle_clustersize(sb); 5280 5257 if (err) ··· 5299 5272 goto failed_mount3; 5300 5273 5301 5274 sbi->s_stripe = ext4_get_stripe_size(sbi); 5302 - /* 5303 - * It's hard to get stripe aligned blocks if stripe is not aligned with 5304 - * cluster, just disable stripe and alert user to simpfy code and avoid 5305 - * stripe aligned allocation which will rarely successes. 5306 - */ 5307 - if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 && 5308 - sbi->s_stripe % sbi->s_cluster_ratio != 0) { 5275 + if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) { 5309 5276 ext4_msg(sb, KERN_WARNING, 5310 5277 "stripe (%lu) is not aligned with cluster size (%u), " 5311 5278 "stripe is disabled", ··· 5333 5312 5334 5313 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 5335 5314 mutex_init(&sbi->s_orphan_lock); 5315 + 5316 + spin_lock_init(&sbi->s_bdev_wb_lock); 5336 5317 5337 5318 ext4_fast_commit_init(sb); 5338 5319 ··· 5557 5534 * Save the original bdev mapping's wb_err value which could be 5558 5535 * used to detect the metadata async write error. 5559 5536 */ 5560 - spin_lock_init(&sbi->s_bdev_wb_lock); 5561 5537 errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err, 5562 5538 &sbi->s_bdev_wb_err); 5563 5539 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ··· 5636 5614 failed_mount3: 5637 5615 /* flush s_sb_upd_work before sbi destroy */ 5638 5616 flush_work(&sbi->s_sb_upd_work); 5639 - del_timer_sync(&sbi->s_err_report); 5640 5617 ext4_stop_mmpd(sbi); 5618 + del_timer_sync(&sbi->s_err_report); 5641 5619 ext4_group_desc_free(sbi); 5642 5620 failed_mount: 5643 5621 if (sbi->s_chksum_driver) ··· 6461 6439 else 6462 6440 ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 6463 6441 6442 + } 6443 + 6444 + if ((ctx->spec & EXT4_SPEC_s_stripe) && 6445 + ext4_is_stripe_incompatible(sb, ctx->s_stripe)) { 6446 + ext4_msg(sb, KERN_WARNING, 6447 + "stripe (%lu) is not aligned with cluster size (%u), " 6448 + "stripe is disabled", 6449 + ctx->s_stripe, sbi->s_cluster_ratio); 6450 + ctx->s_stripe = 0; 6464 6451 } 6465 6452 6466 6453 /*
+13 -18
fs/ext4/xattr.c
··· 458 458 ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE); 459 459 ext4_xattr_inode_set_ref(inode, 1); 460 460 } else { 461 - inode_lock(inode); 461 + inode_lock_nested(inode, I_MUTEX_XATTR); 462 462 inode->i_flags |= S_NOQUOTA; 463 463 inode_unlock(inode); 464 464 } ··· 1039 1039 s64 ref_count; 1040 1040 int ret; 1041 1041 1042 - inode_lock(ea_inode); 1042 + inode_lock_nested(ea_inode, I_MUTEX_XATTR); 1043 1043 1044 1044 ret = ext4_reserve_inode_write(handle, ea_inode, &iloc); 1045 1045 if (ret) ··· 2879 2879 if (*ea_inode_array == NULL) { 2880 2880 /* 2881 2881 * Start with 15 inodes, so it fits into a power-of-two size. 2882 - * If *ea_inode_array is NULL, this is essentially offsetof() 2883 2882 */ 2884 - (*ea_inode_array) = 2885 - kmalloc(offsetof(struct ext4_xattr_inode_array, 2886 - inodes[EIA_MASK]), 2887 - GFP_NOFS); 2883 + (*ea_inode_array) = kmalloc( 2884 + struct_size(*ea_inode_array, inodes, EIA_MASK), 2885 + GFP_NOFS); 2888 2886 if (*ea_inode_array == NULL) 2889 2887 return -ENOMEM; 2890 2888 (*ea_inode_array)->count = 0; 2891 2889 } else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) { 2892 2890 /* expand the array once all 15 + n * 16 slots are full */ 2893 2891 struct ext4_xattr_inode_array *new_array = NULL; 2894 - int count = (*ea_inode_array)->count; 2895 2892 2896 - /* if new_array is NULL, this is essentially offsetof() */ 2897 2893 new_array = kmalloc( 2898 - offsetof(struct ext4_xattr_inode_array, 2899 - inodes[count + EIA_INCR]), 2900 - GFP_NOFS); 2894 + struct_size(*ea_inode_array, inodes, 2895 + (*ea_inode_array)->count + EIA_INCR), 2896 + GFP_NOFS); 2901 2897 if (new_array == NULL) 2902 2898 return -ENOMEM; 2903 2899 memcpy(new_array, *ea_inode_array, 2904 - offsetof(struct ext4_xattr_inode_array, inodes[count])); 2900 + struct_size(*ea_inode_array, inodes, 2901 + (*ea_inode_array)->count)); 2905 2902 kfree(*ea_inode_array); 2906 2903 *ea_inode_array = new_array; 2907 2904 } 2908 - (*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode; 2905 + (*ea_inode_array)->count++; 2906 + (*ea_inode_array)->inodes[(*ea_inode_array)->count - 1] = inode; 2909 2907 return 0; 2910 2908 } 2911 2909 ··· 3034 3036 * 3035 3037 * Create a new entry in the extended attribute block cache, and insert 3036 3038 * it unless such an entry is already in the cache. 3037 - * 3038 - * Returns 0, or a negative error number on failure. 3039 3039 */ 3040 3040 static void 3041 3041 ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache, ··· 3061 3065 * 3062 3066 * Compare two extended attribute blocks for equality. 3063 3067 * 3064 - * Returns 0 if the blocks are equal, 1 if they differ, and 3065 - * a negative error number on errors. 3068 + * Returns 0 if the blocks are equal, 1 if they differ. 3066 3069 */ 3067 3070 static int 3068 3071 ext4_xattr_cmp(struct ext4_xattr_header *header1,
+3 -4
fs/ext4/xattr.h
··· 32 32 __le32 h_refcount; /* reference count */ 33 33 __le32 h_blocks; /* number of disk blocks used */ 34 34 __le32 h_hash; /* hash value of all attributes */ 35 - __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ 36 - /* id = inum if refcount=1, blknum otherwise */ 35 + __le32 h_checksum; /* crc32c(uuid+blknum+xattrblock) */ 37 36 __u32 h_reserved[3]; /* zero right now */ 38 37 }; 39 38 ··· 129 130 }; 130 131 131 132 struct ext4_xattr_inode_array { 132 - unsigned int count; /* # of used items in the array */ 133 - struct inode *inodes[]; 133 + unsigned int count; 134 + struct inode *inodes[] __counted_by(count); 134 135 }; 135 136 136 137 extern const struct xattr_handler ext4_xattr_user_handler;
+15 -6
fs/jbd2/checkpoint.c
··· 79 79 if (space_left < nblocks) { 80 80 int chkpt = journal->j_checkpoint_transactions != NULL; 81 81 tid_t tid = 0; 82 + bool has_transaction = false; 82 83 83 - if (journal->j_committing_transaction) 84 + if (journal->j_committing_transaction) { 84 85 tid = journal->j_committing_transaction->t_tid; 86 + has_transaction = true; 87 + } 85 88 spin_unlock(&journal->j_list_lock); 86 89 write_unlock(&journal->j_state_lock); 87 90 if (chkpt) { 88 91 jbd2_log_do_checkpoint(journal); 89 - } else if (jbd2_cleanup_journal_tail(journal) == 0) { 90 - /* We were able to recover space; yay! */ 92 + } else if (jbd2_cleanup_journal_tail(journal) <= 0) { 93 + /* 94 + * We were able to recover space or the 95 + * journal was aborted due to an error. 96 + */ 91 97 ; 92 - } else if (tid) { 98 + } else if (has_transaction) { 93 99 /* 94 100 * jbd2_journal_commit_transaction() may want 95 101 * to take the checkpoint_mutex if JBD2_FLUSHED ··· 413 407 tid_t tid = 0; 414 408 unsigned long nr_freed = 0; 415 409 unsigned long freed; 410 + bool first_set = false; 416 411 417 412 again: 418 413 spin_lock(&journal->j_list_lock); ··· 433 426 else 434 427 transaction = journal->j_checkpoint_transactions; 435 428 436 - if (!first_tid) 429 + if (!first_set) { 437 430 first_tid = transaction->t_tid; 431 + first_set = true; 432 + } 438 433 last_transaction = journal->j_checkpoint_transactions->t_cpprev; 439 434 next_transaction = transaction; 440 435 last_tid = last_transaction->t_tid; ··· 466 457 spin_unlock(&journal->j_list_lock); 467 458 cond_resched(); 468 459 469 - if (*nr_to_scan && next_tid) 460 + if (*nr_to_scan && journal->j_shrink_transaction) 470 461 goto again; 471 462 out: 472 463 trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
+42 -55
fs/jbd2/journal.c
··· 281 281 write_unlock(&journal->j_state_lock); 282 282 } 283 283 284 + static inline bool jbd2_data_needs_escaping(char *data) 285 + { 286 + return *((__be32 *)data) == cpu_to_be32(JBD2_MAGIC_NUMBER); 287 + } 288 + 289 + static inline void jbd2_data_do_escape(char *data) 290 + { 291 + *((unsigned int *)data) = 0; 292 + } 293 + 284 294 /* 285 295 * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal. 286 296 * ··· 328 318 struct buffer_head **bh_out, 329 319 sector_t blocknr) 330 320 { 331 - int done_copy_out = 0; 332 321 int do_escape = 0; 333 - char *mapped_data; 334 322 struct buffer_head *new_bh; 335 323 struct folio *new_folio; 336 324 unsigned int new_offset; ··· 357 349 * we use that version of the data for the commit. 358 350 */ 359 351 if (jh_in->b_frozen_data) { 360 - done_copy_out = 1; 361 352 new_folio = virt_to_folio(jh_in->b_frozen_data); 362 353 new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data); 354 + do_escape = jbd2_data_needs_escaping(jh_in->b_frozen_data); 355 + if (do_escape) 356 + jbd2_data_do_escape(jh_in->b_frozen_data); 363 357 } else { 358 + char *tmp; 359 + char *mapped_data; 360 + 364 361 new_folio = bh_in->b_folio; 365 362 new_offset = offset_in_folio(new_folio, bh_in->b_data); 366 - } 367 - 368 - mapped_data = kmap_local_folio(new_folio, new_offset); 369 - /* 370 - * Fire data frozen trigger if data already wasn't frozen. Do this 371 - * before checking for escaping, as the trigger may modify the magic 372 - * offset. If a copy-out happens afterwards, it will have the correct 373 - * data in the buffer. 374 - */ 375 - if (!done_copy_out) 363 + mapped_data = kmap_local_folio(new_folio, new_offset); 364 + /* 365 + * Fire data frozen trigger if data already wasn't frozen. Do 366 + * this before checking for escaping, as the trigger may modify 367 + * the magic offset. If a copy-out happens afterwards, it will 368 + * have the correct data in the buffer. 369 + */ 376 370 jbd2_buffer_frozen_trigger(jh_in, mapped_data, 377 371 jh_in->b_triggers); 378 - 379 - /* 380 - * Check for escaping 381 - */ 382 - if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) 383 - do_escape = 1; 384 - kunmap_local(mapped_data); 385 - 386 - /* 387 - * Do we need to do a data copy? 388 - */ 389 - if (do_escape && !done_copy_out) { 390 - char *tmp; 372 + do_escape = jbd2_data_needs_escaping(mapped_data); 373 + kunmap_local(mapped_data); 374 + /* 375 + * Do we need to do a data copy? 376 + */ 377 + if (!do_escape) 378 + goto escape_done; 391 379 392 380 spin_unlock(&jh_in->b_state_lock); 393 381 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); ··· 410 406 copy_done: 411 407 new_folio = virt_to_folio(jh_in->b_frozen_data); 412 408 new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data); 413 - done_copy_out = 1; 409 + jbd2_data_do_escape(jh_in->b_frozen_data); 414 410 } 415 411 416 - /* 417 - * Did we need to do an escaping? Now we've done all the 418 - * copying, we can finally do so. 419 - * b_frozen_data is from jbd2_alloc() which always provides an 420 - * address from the direct kernels mapping. 421 - */ 422 - if (do_escape) 423 - *((unsigned int *)jh_in->b_frozen_data) = 0; 424 - 412 + escape_done: 425 413 folio_set_bh(new_bh, new_folio, new_offset); 426 414 new_bh->b_size = bh_in->b_size; 427 415 new_bh->b_bdev = journal->j_dev; ··· 706 710 return -EINVAL; 707 711 708 712 write_lock(&journal->j_state_lock); 709 - if (tid <= journal->j_commit_sequence) { 713 + if (tid_geq(journal->j_commit_sequence, tid)) { 710 714 write_unlock(&journal->j_state_lock); 711 715 return -EALREADY; 712 716 } ··· 736 740 */ 737 741 static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) 738 742 { 739 - jbd2_journal_unlock_updates(journal); 740 743 if (journal->j_fc_cleanup_callback) 741 744 journal->j_fc_cleanup_callback(journal, 0, tid); 745 + jbd2_journal_unlock_updates(journal); 742 746 write_lock(&journal->j_state_lock); 743 747 journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; 744 748 if (fallback) ··· 837 841 838 842 *bh_out = NULL; 839 843 840 - if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) { 841 - fc_off = journal->j_fc_off; 842 - blocknr = journal->j_fc_first + fc_off; 843 - journal->j_fc_off++; 844 - } else { 845 - ret = -EINVAL; 846 - } 844 + if (journal->j_fc_off + journal->j_fc_first >= journal->j_fc_last) 845 + return -EINVAL; 847 846 848 - if (ret) 849 - return ret; 850 - 847 + fc_off = journal->j_fc_off; 848 + blocknr = journal->j_fc_first + fc_off; 849 + journal->j_fc_off++; 851 850 ret = jbd2_journal_bmap(journal, blocknr, &pblock); 852 851 if (ret) 853 852 return ret; ··· 850 859 bh = __getblk(journal->j_dev, pblock, journal->j_blocksize); 851 860 if (!bh) 852 861 return -ENOMEM; 853 - 854 862 855 863 journal->j_fc_wbuf[fc_off] = bh; 856 864 ··· 893 903 } 894 904 EXPORT_SYMBOL(jbd2_fc_wait_bufs); 895 905 896 - int jbd2_fc_release_bufs(journal_t *journal) 906 + void jbd2_fc_release_bufs(journal_t *journal) 897 907 { 898 908 struct buffer_head *bh; 899 909 int i, j_fc_off; ··· 907 917 put_bh(bh); 908 918 journal->j_fc_wbuf[i] = NULL; 909 919 } 910 - 911 - return 0; 912 920 } 913 921 EXPORT_SYMBOL(jbd2_fc_release_bufs); 914 922 ··· 1932 1944 if (had_fast_commit) 1933 1945 jbd2_set_feature_fast_commit(journal); 1934 1946 1935 - /* Log is no longer empty */ 1947 + /* Log is empty */ 1936 1948 write_lock(&journal->j_state_lock); 1937 1949 journal->j_flags |= JBD2_FLUSHED; 1938 1950 write_unlock(&journal->j_state_lock); ··· 2854 2866 ret = kmem_cache_zalloc(jbd2_journal_head_cache, 2855 2867 GFP_NOFS | __GFP_NOFAIL); 2856 2868 } 2857 - if (ret) 2858 - spin_lock_init(&ret->b_state_lock); 2869 + spin_lock_init(&ret->b_state_lock); 2859 2870 return ret; 2860 2871 } 2861 2872
+2 -2
include/linux/jbd2.h
··· 1086 1086 int j_revoke_records_per_block; 1087 1087 1088 1088 /** 1089 - * @j_transaction_overhead: 1089 + * @j_transaction_overhead_buffers: 1090 1090 * 1091 1091 * Number of blocks each transaction needs for its own bookkeeping 1092 1092 */ ··· 1675 1675 int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode); 1676 1676 int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); 1677 1677 int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); 1678 - int jbd2_fc_release_bufs(journal_t *journal); 1678 + void jbd2_fc_release_bufs(journal_t *journal); 1679 1679 1680 1680 /* 1681 1681 * is_journal_abort