Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ext4_for_linus-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
"Cleanup ext4's multi-block allocator, including adding some unit
tests, as well as cleaning how we update the backup superblock after
online resizes or updating the label or uuid.

Optimize handling of released data blocks in ext4's commit machinery
to avoid a potential lock contention on s_md_lock spinlock.

Fix a number of ext4 bugs:

- fix race between writepages and remount

- fix racy may inline data check in dio write

- add missed brelse in an error path in update_backups

- fix umask handling when ACL support is disabled

- fix lost EIO error when a journal commit races with a fsync of the
blockdev

- fix potential improper i_size when there is a crash right after an
O_SYNC direct write.

- check extent node for validity before potentially using what might
be an invalid pointer

- fix potential stale data exposure when writing to an unwritten
extent and the file system is nearly out of space

- fix potential accounting error around block reservations when
writing partial delayed allocation writes to a bigalloc cluster

- avoid memory allocation failure when tracking partial delayed
allocation writes to a bigalloc cluster

- fix various debugging print messages"

* tag 'ext4_for_linus-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (41 commits)
ext4: properly sync file size update after O_SYNC direct IO
ext4: fix racy may inline data check in dio write
ext4: run mballoc test with different layouts setting
ext4: add first unit test for ext4_mb_new_blocks_simple in mballoc
ext4: add some kunit stub for mballoc kunit test
ext4: call ext4_mb_mark_context in ext4_group_add_blocks()
ext4: Separate block bitmap and buddy bitmap freeing in ext4_group_add_blocks()
ext4: call ext4_mb_mark_context in ext4_mb_clear_bb
ext4: Separate block bitmap and buddy bitmap freeing in ext4_mb_clear_bb()
ext4: call ext4_mb_mark_context in ext4_mb_mark_diskspace_used
ext4: extend ext4_mb_mark_context to support allocation under journal
ext4: call ext4_mb_mark_context in ext4_free_blocks_simple
ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb
ext4: make state in ext4_mb_mark_bb to be bool
jbd2: fix potential data lost in recovering journal raced with synchronizing fs bdev
ext4: apply umask if ACL support is disabled
ext4: mark buffer new if it is unwritten to avoid stale data exposure
ext4: move 'ix' sanity check to corrent position
jbd2: fix printk format type for 'io_block' in do_one_pass()
jbd2: print io_block if check data block checksum failed when do recovery
...

+828 -590
+5
fs/ext4/acl.h
··· 68 68 static inline int 69 69 ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) 70 70 { 71 + /* usually, the umask is applied by posix_acl_create(), but if 72 + ext4 ACL support is disabled at compile time, we need to do 73 + it here, because posix_acl_create() will never be called */ 74 + inode->i_mode &= ~current_umask(); 75 + 71 76 return 0; 72 77 } 73 78 #endif /* CONFIG_EXT4_FS_POSIX_ACL */
+12 -4
fs/ext4/balloc.c
··· 22 22 #include "mballoc.h" 23 23 24 24 #include <trace/events/ext4.h> 25 + #include <kunit/static_stub.h> 25 26 26 27 static unsigned ext4_num_base_meta_clusters(struct super_block *sb, 27 28 ext4_group_t block_group); ··· 112 111 itbl_blk_start = ext4_inode_table(sb, gdp); 113 112 itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1; 114 113 if (itbl_blk_start <= end && itbl_blk_end >= start) { 115 - itbl_blk_start = itbl_blk_start >= start ? 116 - itbl_blk_start : start; 117 - itbl_blk_end = itbl_blk_end <= end ? 118 - itbl_blk_end : end; 114 + itbl_blk_start = max(itbl_blk_start, start); 115 + itbl_blk_end = min(itbl_blk_end, end); 119 116 120 117 itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start); 121 118 itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start); ··· 272 273 struct ext4_group_desc *desc; 273 274 struct ext4_sb_info *sbi = EXT4_SB(sb); 274 275 struct buffer_head *bh_p; 276 + 277 + KUNIT_STATIC_STUB_REDIRECT(ext4_get_group_desc, 278 + sb, block_group, bh); 275 279 276 280 if (block_group >= ngroups) { 277 281 ext4_error(sb, "block_group >= groups_count - block_group = %u," ··· 470 468 ext4_fsblk_t bitmap_blk; 471 469 int err; 472 470 471 + KUNIT_STATIC_STUB_REDIRECT(ext4_read_block_bitmap_nowait, 472 + sb, block_group, ignore_locked); 473 + 473 474 desc = ext4_get_group_desc(sb, block_group, NULL); 474 475 if (!desc) 475 476 return ERR_PTR(-EFSCORRUPTED); ··· 567 562 struct buffer_head *bh) 568 563 { 569 564 struct ext4_group_desc *desc; 565 + 566 + KUNIT_STATIC_STUB_REDIRECT(ext4_wait_block_bitmap, 567 + sb, block_group, bh); 570 568 571 569 if (!buffer_new(bh)) 572 570 return 0;
+5 -3
fs/ext4/ext4.h
··· 1504 1504 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ 1505 1505 struct buffer_head * s_sbh; /* Buffer containing the super block */ 1506 1506 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 1507 + /* Array of bh's for the block group descriptors */ 1507 1508 struct buffer_head * __rcu *s_group_desc; 1508 1509 unsigned int s_mount_opt; 1509 1510 unsigned int s_mount_opt2; ··· 1575 1574 unsigned int *s_mb_maxs; 1576 1575 unsigned int s_group_info_size; 1577 1576 unsigned int s_mb_free_pending; 1578 - struct list_head s_freed_data_list; /* List of blocks to be freed 1577 + struct list_head s_freed_data_list[2]; /* List of blocks to be freed 1579 1578 after commit completed */ 1580 1579 struct list_head s_discard_list; 1581 1580 struct work_struct s_discard_work; ··· 1687 1686 1688 1687 /* 1689 1688 * Barrier between writepages ops and changing any inode's JOURNAL_DATA 1690 - * or EXTENTS flag. 1689 + * or EXTENTS flag or between writepages ops and changing DELALLOC or 1690 + * DIOREAD_NOLOCK mount options on remount. 1691 1691 */ 1692 1692 struct percpu_rw_semaphore s_writepages_rwsem; 1693 1693 struct dax_device *s_daxdev; ··· 2936 2934 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 2937 2935 extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid); 2938 2936 extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, 2939 - int len, int state); 2937 + int len, bool state); 2940 2938 static inline bool ext4_mb_cr_expensive(enum criteria cr) 2941 2939 { 2942 2940 return cr >= CR_GOAL_LEN_SLOW;
+7 -7
fs/ext4/extents.c
··· 1010 1010 ix = curp->p_idx; 1011 1011 } 1012 1012 1013 + if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { 1014 + EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); 1015 + return -EFSCORRUPTED; 1016 + } 1017 + 1013 1018 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; 1014 1019 BUG_ON(len < 0); 1015 1020 if (len > 0) { ··· 1022 1017 "move %d indices from 0x%p to 0x%p\n", 1023 1018 logical, len, ix, ix + 1); 1024 1019 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); 1025 - } 1026 - 1027 - if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { 1028 - EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); 1029 - return -EFSCORRUPTED; 1030 1020 } 1031 1021 1032 1022 ix->ei_block = cpu_to_le32(logical); ··· 6081 6081 for (j = 0; j < path->p_depth; j++) { 6082 6082 6083 6083 ext4_mb_mark_bb(inode->i_sb, 6084 - path[j].p_block, 1, 0); 6084 + path[j].p_block, 1, false); 6085 6085 ext4_fc_record_regions(inode->i_sb, inode->i_ino, 6086 6086 0, path[j].p_block, 1, 1); 6087 6087 } 6088 6088 ext4_free_ext_path(path); 6089 6089 } 6090 - ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 6090 + ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false); 6091 6091 ext4_fc_record_regions(inode->i_sb, inode->i_ino, 6092 6092 map.m_lblk, map.m_pblk, map.m_len, 1); 6093 6093 }
+91 -36
fs/ext4/extents_status.c
··· 152 152 static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); 153 153 static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, 154 154 struct ext4_inode_info *locked_ei); 155 - static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, 156 - ext4_lblk_t len); 155 + static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, 156 + ext4_lblk_t len, 157 + struct pending_reservation **prealloc); 157 158 158 159 int __init ext4_init_es(void) 159 160 { ··· 447 446 WARN_ON_ONCE(sbi->s_es_nr_inode < 0); 448 447 } 449 448 spin_unlock(&sbi->s_es_lock); 449 + } 450 + 451 + static inline struct pending_reservation *__alloc_pending(bool nofail) 452 + { 453 + if (!nofail) 454 + return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); 455 + 456 + return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL); 457 + } 458 + 459 + static inline void __free_pending(struct pending_reservation *pr) 460 + { 461 + kmem_cache_free(ext4_pending_cachep, pr); 450 462 } 451 463 452 464 /* ··· 850 836 { 851 837 struct extent_status newes; 852 838 ext4_lblk_t end = lblk + len - 1; 853 - int err1 = 0; 854 - int err2 = 0; 839 + int err1 = 0, err2 = 0, err3 = 0; 855 840 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 856 841 struct extent_status *es1 = NULL; 857 842 struct extent_status *es2 = NULL; 843 + struct pending_reservation *pr = NULL; 844 + bool revise_pending = false; 858 845 859 846 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 860 847 return; ··· 883 868 884 869 ext4_es_insert_extent_check(inode, &newes); 885 870 871 + revise_pending = sbi->s_cluster_ratio > 1 && 872 + test_opt(inode->i_sb, DELALLOC) && 873 + (status & (EXTENT_STATUS_WRITTEN | 874 + EXTENT_STATUS_UNWRITTEN)); 886 875 retry: 887 876 if (err1 && !es1) 888 877 es1 = __es_alloc_extent(true); 889 878 if ((err1 || err2) && !es2) 890 879 es2 = __es_alloc_extent(true); 880 + if ((err1 || err2 || err3) && revise_pending && !pr) 881 + pr = __alloc_pending(true); 891 882 write_lock(&EXT4_I(inode)->i_es_lock); 892 883 893 884 err1 = __es_remove_extent(inode, lblk, end, NULL, es1); ··· 918 897 es2 = NULL; 919 898 } 920 899 921 - if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) && 922 - (status & EXTENT_STATUS_WRITTEN || 923 - status & EXTENT_STATUS_UNWRITTEN)) 924 - __revise_pending(inode, lblk, len); 900 + if (revise_pending) { 901 + err3 = __revise_pending(inode, lblk, len, &pr); 902 + if (err3 != 0) 903 + goto error; 904 + if (pr) { 905 + __free_pending(pr); 906 + pr = NULL; 907 + } 908 + } 925 909 error: 926 910 write_unlock(&EXT4_I(inode)->i_es_lock); 927 - if (err1 || err2) 911 + if (err1 || err2 || err3) 928 912 goto retry; 929 913 930 914 ext4_es_print_tree(inode); ··· 1337 1311 rc->ndelonly--; 1338 1312 node = rb_next(&pr->rb_node); 1339 1313 rb_erase(&pr->rb_node, &tree->root); 1340 - kmem_cache_free(ext4_pending_cachep, pr); 1314 + __free_pending(pr); 1341 1315 if (!node) 1342 1316 break; 1343 1317 pr = rb_entry(node, struct pending_reservation, ··· 1431 1405 } 1432 1406 } 1433 1407 if (count_reserved) 1434 - count_rsvd(inode, lblk, orig_es.es_len - len1 - len2, 1435 - &orig_es, &rc); 1408 + count_rsvd(inode, orig_es.es_lblk + len1, 1409 + orig_es.es_len - len1 - len2, &orig_es, &rc); 1436 1410 goto out_get_reserved; 1437 1411 } 1438 1412 ··· 1933 1907 * 1934 1908 * @inode - file containing the cluster 1935 1909 * @lblk - logical block in the cluster to be added 1910 + * @prealloc - preallocated pending entry 1936 1911 * 1937 1912 * Returns 0 on successful insertion and -ENOMEM on failure. If the 1938 1913 * pending reservation is already in the set, returns successfully. 1939 1914 */ 1940 - static int __insert_pending(struct inode *inode, ext4_lblk_t lblk) 1915 + static int __insert_pending(struct inode *inode, ext4_lblk_t lblk, 1916 + struct pending_reservation **prealloc) 1941 1917 { 1942 1918 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1943 1919 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree; ··· 1965 1937 } 1966 1938 } 1967 1939 1968 - pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); 1969 - if (pr == NULL) { 1970 - ret = -ENOMEM; 1971 - goto out; 1940 + if (likely(*prealloc == NULL)) { 1941 + pr = __alloc_pending(false); 1942 + if (!pr) { 1943 + ret = -ENOMEM; 1944 + goto out; 1945 + } 1946 + } else { 1947 + pr = *prealloc; 1948 + *prealloc = NULL; 1972 1949 } 1973 1950 pr->lclu = lclu; 1974 1951 ··· 2003 1970 if (pr != NULL) { 2004 1971 tree = &EXT4_I(inode)->i_pending_tree; 2005 1972 rb_erase(&pr->rb_node, &tree->root); 2006 - kmem_cache_free(ext4_pending_cachep, pr); 1973 + __free_pending(pr); 2007 1974 } 2008 1975 } 2009 1976 ··· 2062 2029 bool allocated) 2063 2030 { 2064 2031 struct extent_status newes; 2065 - int err1 = 0; 2066 - int err2 = 0; 2032 + int err1 = 0, err2 = 0, err3 = 0; 2067 2033 struct extent_status *es1 = NULL; 2068 2034 struct extent_status *es2 = NULL; 2035 + struct pending_reservation *pr = NULL; 2069 2036 2070 2037 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 2071 2038 return; ··· 2085 2052 es1 = __es_alloc_extent(true); 2086 2053 if ((err1 || err2) && !es2) 2087 2054 es2 = __es_alloc_extent(true); 2055 + if ((err1 || err2 || err3) && allocated && !pr) 2056 + pr = __alloc_pending(true); 2088 2057 write_lock(&EXT4_I(inode)->i_es_lock); 2089 2058 2090 2059 err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); ··· 2109 2074 es2 = NULL; 2110 2075 } 2111 2076 2112 - if (allocated) 2113 - __insert_pending(inode, lblk); 2077 + if (allocated) { 2078 + err3 = __insert_pending(inode, lblk, &pr); 2079 + if (err3 != 0) 2080 + goto error; 2081 + if (pr) { 2082 + __free_pending(pr); 2083 + pr = NULL; 2084 + } 2085 + } 2114 2086 error: 2115 2087 write_unlock(&EXT4_I(inode)->i_es_lock); 2116 - if (err1 || err2) 2088 + if (err1 || err2 || err3) 2117 2089 goto retry; 2118 2090 2119 2091 ext4_es_print_tree(inode); ··· 2226 2184 * @inode - file containing the range 2227 2185 * @lblk - logical block defining the start of range 2228 2186 * @len - length of range in blocks 2187 + * @prealloc - preallocated pending entry 2229 2188 * 2230 2189 * Used after a newly allocated extent is added to the extents status tree. 2231 2190 * Requires that the extents in the range have either written or unwritten 2232 2191 * status. Must be called while holding i_es_lock. 2233 2192 */ 2234 - static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, 2235 - ext4_lblk_t len) 2193 + static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, 2194 + ext4_lblk_t len, 2195 + struct pending_reservation **prealloc) 2236 2196 { 2237 2197 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2238 2198 ext4_lblk_t end = lblk + len - 1; 2239 2199 ext4_lblk_t first, last; 2240 2200 bool f_del = false, l_del = false; 2201 + int ret = 0; 2241 2202 2242 2203 if (len == 0) 2243 - return; 2204 + return 0; 2244 2205 2245 2206 /* 2246 2207 * Two cases - block range within single cluster and block range ··· 2264 2219 f_del = __es_scan_range(inode, &ext4_es_is_delonly, 2265 2220 first, lblk - 1); 2266 2221 if (f_del) { 2267 - __insert_pending(inode, first); 2222 + ret = __insert_pending(inode, first, prealloc); 2223 + if (ret < 0) 2224 + goto out; 2268 2225 } else { 2269 2226 last = EXT4_LBLK_CMASK(sbi, end) + 2270 2227 sbi->s_cluster_ratio - 1; ··· 2274 2227 l_del = __es_scan_range(inode, 2275 2228 &ext4_es_is_delonly, 2276 2229 end + 1, last); 2277 - if (l_del) 2278 - __insert_pending(inode, last); 2279 - else 2230 + if (l_del) { 2231 + ret = __insert_pending(inode, last, prealloc); 2232 + if (ret < 0) 2233 + goto out; 2234 + } else 2280 2235 __remove_pending(inode, last); 2281 2236 } 2282 2237 } else { ··· 2286 2237 if (first != lblk) 2287 2238 f_del = __es_scan_range(inode, &ext4_es_is_delonly, 2288 2239 first, lblk - 1); 2289 - if (f_del) 2290 - __insert_pending(inode, first); 2291 - else 2240 + if (f_del) { 2241 + ret = __insert_pending(inode, first, prealloc); 2242 + if (ret < 0) 2243 + goto out; 2244 + } else 2292 2245 __remove_pending(inode, first); 2293 2246 2294 2247 last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1; 2295 2248 if (last != end) 2296 2249 l_del = __es_scan_range(inode, &ext4_es_is_delonly, 2297 2250 end + 1, last); 2298 - if (l_del) 2299 - __insert_pending(inode, last); 2300 - else 2251 + if (l_del) { 2252 + ret = __insert_pending(inode, last, prealloc); 2253 + if (ret < 0) 2254 + goto out; 2255 + } else 2301 2256 __remove_pending(inode, last); 2302 2257 } 2258 + out: 2259 + return ret; 2303 2260 }
+4 -4
fs/ext4/fast_commit.c
··· 1806 1806 * at the end of the FC replay using our array of 1807 1807 * modified inodes. 1808 1808 */ 1809 - ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 1809 + ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false); 1810 1810 goto next; 1811 1811 } 1812 1812 ··· 1875 1875 if (ret > 0) { 1876 1876 remaining -= ret; 1877 1877 cur += ret; 1878 - ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 1878 + ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false); 1879 1879 } else { 1880 1880 remaining -= map.m_len; 1881 1881 cur += map.m_len; ··· 1934 1934 if (!IS_ERR(path)) { 1935 1935 for (j = 0; j < path->p_depth; j++) 1936 1936 ext4_mb_mark_bb(inode->i_sb, 1937 - path[j].p_block, 1, 1); 1937 + path[j].p_block, 1, true); 1938 1938 ext4_free_ext_path(path); 1939 1939 } 1940 1940 cur += ret; 1941 1941 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, 1942 - map.m_len, 1); 1942 + map.m_len, true); 1943 1943 } else { 1944 1944 cur = cur + (map.m_len ? map.m_len : 1); 1945 1945 }
+74 -95
fs/ext4/file.c
··· 306 306 } 307 307 308 308 static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, 309 - ssize_t written, size_t count) 309 + ssize_t count) 310 310 { 311 311 handle_t *handle; 312 - bool truncate = false; 313 - u8 blkbits = inode->i_blkbits; 314 - ext4_lblk_t written_blk, end_blk; 315 - int ret; 316 312 317 - /* 318 - * Note that EXT4_I(inode)->i_disksize can get extended up to 319 - * inode->i_size while the I/O was running due to writeback of delalloc 320 - * blocks. But, the code in ext4_iomap_alloc() is careful to use 321 - * zeroed/unwritten extents if this is possible; thus we won't leave 322 - * uninitialized blocks in a file even if we didn't succeed in writing 323 - * as much as we intended. 324 - */ 325 - WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize); 326 - if (offset + count <= EXT4_I(inode)->i_disksize) { 327 - /* 328 - * We need to ensure that the inode is removed from the orphan 329 - * list if it has been added prematurely, due to writeback of 330 - * delalloc blocks. 331 - */ 332 - if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { 333 - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 334 - 335 - if (IS_ERR(handle)) { 336 - ext4_orphan_del(NULL, inode); 337 - return PTR_ERR(handle); 338 - } 339 - 340 - ext4_orphan_del(handle, inode); 341 - ext4_journal_stop(handle); 342 - } 343 - 344 - return written; 345 - } 346 - 347 - if (written < 0) 348 - goto truncate; 349 - 313 + lockdep_assert_held_write(&inode->i_rwsem); 350 314 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 351 - if (IS_ERR(handle)) { 352 - written = PTR_ERR(handle); 353 - goto truncate; 354 - } 315 + if (IS_ERR(handle)) 316 + return PTR_ERR(handle); 355 317 356 - if (ext4_update_inode_size(inode, offset + written)) { 357 - ret = ext4_mark_inode_dirty(handle, inode); 318 + if (ext4_update_inode_size(inode, offset + count)) { 319 + int ret = ext4_mark_inode_dirty(handle, inode); 358 320 if (unlikely(ret)) { 359 - written = ret; 360 321 ext4_journal_stop(handle); 361 - goto truncate; 322 + return ret; 362 323 } 363 324 } 364 325 365 - /* 366 - * We may need to truncate allocated but not written blocks beyond EOF. 367 - */ 368 - written_blk = ALIGN(offset + written, 1 << blkbits); 369 - end_blk = ALIGN(offset + count, 1 << blkbits); 370 - if (written_blk < end_blk && ext4_can_truncate(inode)) 371 - truncate = true; 372 - 373 - /* 374 - * Remove the inode from the orphan list if it has been extended and 375 - * everything went OK. 376 - */ 377 - if (!truncate && inode->i_nlink) 326 + if (inode->i_nlink) 378 327 ext4_orphan_del(handle, inode); 379 328 ext4_journal_stop(handle); 380 329 381 - if (truncate) { 382 - truncate: 330 + return count; 331 + } 332 + 333 + /* 334 + * Clean up the inode after DIO or DAX extending write has completed and the 335 + * inode size has been updated using ext4_handle_inode_extension(). 336 + */ 337 + static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count) 338 + { 339 + lockdep_assert_held_write(&inode->i_rwsem); 340 + if (count < 0) { 383 341 ext4_truncate_failed_write(inode); 384 342 /* 385 343 * If the truncate operation failed early, then the inode may ··· 346 388 */ 347 389 if (inode->i_nlink) 348 390 ext4_orphan_del(NULL, inode); 391 + return; 349 392 } 393 + /* 394 + * If i_disksize got extended due to writeback of delalloc blocks while 395 + * the DIO was running we could fail to cleanup the orphan list in 396 + * ext4_handle_inode_extension(). Do it now. 397 + */ 398 + if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { 399 + handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 350 400 351 - return written; 401 + if (IS_ERR(handle)) { 402 + /* 403 + * The write has successfully completed. Not much to 404 + * do with the error here so just cleanup the orphan 405 + * list and hope for the best. 406 + */ 407 + ext4_orphan_del(NULL, inode); 408 + return; 409 + } 410 + ext4_orphan_del(handle, inode); 411 + ext4_journal_stop(handle); 412 + } 352 413 } 353 414 354 415 static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, ··· 376 399 loff_t pos = iocb->ki_pos; 377 400 struct inode *inode = file_inode(iocb->ki_filp); 378 401 402 + if (!error && size && flags & IOMAP_DIO_UNWRITTEN) 403 + error = ext4_convert_unwritten_extents(NULL, inode, pos, size); 379 404 if (error) 380 405 return error; 381 - 382 - if (size && flags & IOMAP_DIO_UNWRITTEN) { 383 - error = ext4_convert_unwritten_extents(NULL, inode, pos, size); 384 - if (error < 0) 385 - return error; 386 - } 387 406 /* 388 - * If we are extending the file, we have to update i_size here before 389 - * page cache gets invalidated in iomap_dio_rw(). Otherwise racing 390 - * buffered reads could zero out too much from page cache pages. Update 391 - * of on-disk size will happen later in ext4_dio_write_iter() where 392 - * we have enough information to also perform orphan list handling etc. 393 - * Note that we perform all extending writes synchronously under 394 - * i_rwsem held exclusively so i_size update is safe here in that case. 395 - * If the write was not extending, we cannot see pos > i_size here 396 - * because operations reducing i_size like truncate wait for all 397 - * outstanding DIO before updating i_size. 407 + * Note that EXT4_I(inode)->i_disksize can get extended up to 408 + * inode->i_size while the I/O was running due to writeback of delalloc 409 + * blocks. But the code in ext4_iomap_alloc() is careful to use 410 + * zeroed/unwritten extents if this is possible; thus we won't leave 411 + * uninitialized blocks in a file even if we didn't succeed in writing 412 + * as much as we intended. 398 413 */ 399 - pos += size; 400 - if (pos > i_size_read(inode)) 401 - i_size_write(inode, pos); 402 - 403 - return 0; 414 + WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize)); 415 + if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize)) 416 + return size; 417 + return ext4_handle_inode_extension(inode, pos, size); 404 418 } 405 419 406 420 static const struct iomap_dio_ops ext4_dio_write_ops = { ··· 537 569 return ext4_buffered_write_iter(iocb, from); 538 570 } 539 571 572 + /* 573 + * Prevent inline data from being created since we are going to allocate 574 + * blocks for DIO. We know the inode does not currently have inline data 575 + * because ext4_should_use_dio() checked for it, but we have to clear 576 + * the state flag before the write checks because a lock cycle could 577 + * introduce races with other writers. 578 + */ 579 + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 580 + 540 581 ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend, 541 582 &unwritten, &dio_flags); 542 583 if (ret <= 0) 543 584 return ret; 544 - 545 - /* 546 - * Make sure inline data cannot be created anymore since we are going 547 - * to allocate blocks for DIO. We know the inode does not have any 548 - * inline data now because ext4_dio_supported() checked for that. 549 - */ 550 - ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 551 585 552 586 offset = iocb->ki_pos; 553 587 count = ret; ··· 576 606 dio_flags, NULL, 0); 577 607 if (ret == -ENOTBLK) 578 608 ret = 0; 579 - 580 - if (extend) 581 - ret = ext4_handle_inode_extension(inode, offset, ret, count); 609 + if (extend) { 610 + /* 611 + * We always perform extending DIO write synchronously so by 612 + * now the IO is completed and ext4_handle_inode_extension() 613 + * was called. Cleanup the inode in case of error or race with 614 + * writeback of delalloc blocks. 615 + */ 616 + WARN_ON_ONCE(ret == -EIOCBQUEUED); 617 + ext4_inode_extension_cleanup(inode, ret); 618 + } 582 619 583 620 out: 584 621 if (ilock_shared) ··· 666 689 667 690 ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); 668 691 669 - if (extend) 670 - ret = ext4_handle_inode_extension(inode, offset, ret, count); 692 + if (extend) { 693 + ret = ext4_handle_inode_extension(inode, offset, ret); 694 + ext4_inode_extension_cleanup(inode, ret); 695 + } 671 696 out: 672 697 inode_unlock(inode); 673 698 if (ret > 0)
+13 -1
fs/ext4/inode.c
··· 789 789 int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, 790 790 struct buffer_head *bh_result, int create) 791 791 { 792 + int ret = 0; 793 + 792 794 ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", 793 795 inode->i_ino, create); 794 - return _ext4_get_block(inode, iblock, bh_result, 796 + ret = _ext4_get_block(inode, iblock, bh_result, 795 797 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT); 798 + 799 + /* 800 + * If the buffer is marked unwritten, mark it as new to make sure it is 801 + * zeroed out correctly in case of partial writes. Otherwise, there is 802 + * a chance of stale data getting exposed. 803 + */ 804 + if (ret == 0 && buffer_unwritten(bh_result)) 805 + set_buffer_new(bh_result); 806 + 807 + return ret; 796 808 } 797 809 798 810 /* Maximum number of blocks we map for direct IO at once. */
+349
fs/ext4/mballoc-test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * KUnit test of ext4 multiblocks allocation. 4 + */ 5 + 6 + #include <kunit/test.h> 7 + #include <kunit/static_stub.h> 8 + 9 + #include "ext4.h" 10 + 11 + struct mbt_grp_ctx { 12 + struct buffer_head bitmap_bh; 13 + /* desc and gd_bh are just the place holders for now */ 14 + struct ext4_group_desc desc; 15 + struct buffer_head gd_bh; 16 + }; 17 + 18 + struct mbt_ctx { 19 + struct mbt_grp_ctx *grp_ctx; 20 + }; 21 + 22 + struct mbt_ext4_super_block { 23 + struct super_block sb; 24 + struct mbt_ctx mbt_ctx; 25 + }; 26 + 27 + #define MBT_CTX(_sb) (&(container_of((_sb), struct mbt_ext4_super_block, sb)->mbt_ctx)) 28 + #define MBT_GRP_CTX(_sb, _group) (&MBT_CTX(_sb)->grp_ctx[_group]) 29 + 30 + static struct super_block *mbt_ext4_alloc_super_block(void) 31 + { 32 + struct ext4_super_block *es = kzalloc(sizeof(*es), GFP_KERNEL); 33 + struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 34 + struct mbt_ext4_super_block *fsb = kzalloc(sizeof(*fsb), GFP_KERNEL); 35 + 36 + if (fsb == NULL || sbi == NULL || es == NULL) 37 + goto out; 38 + 39 + sbi->s_es = es; 40 + fsb->sb.s_fs_info = sbi; 41 + return &fsb->sb; 42 + 43 + out: 44 + kfree(fsb); 45 + kfree(sbi); 46 + kfree(es); 47 + return NULL; 48 + } 49 + 50 + static void mbt_ext4_free_super_block(struct super_block *sb) 51 + { 52 + struct mbt_ext4_super_block *fsb = 53 + container_of(sb, struct mbt_ext4_super_block, sb); 54 + struct ext4_sb_info *sbi = EXT4_SB(sb); 55 + 56 + kfree(sbi->s_es); 57 + kfree(sbi); 58 + kfree(fsb); 59 + } 60 + 61 + struct mbt_ext4_block_layout { 62 + unsigned char blocksize_bits; 63 + unsigned int cluster_bits; 64 + uint32_t blocks_per_group; 65 + ext4_group_t group_count; 66 + uint16_t desc_size; 67 + }; 68 + 69 + static void mbt_init_sb_layout(struct super_block *sb, 70 + struct mbt_ext4_block_layout *layout) 71 + { 72 + struct ext4_sb_info *sbi = EXT4_SB(sb); 73 + struct ext4_super_block *es = sbi->s_es; 74 + 75 + sb->s_blocksize = 1UL << layout->blocksize_bits; 76 + sb->s_blocksize_bits = layout->blocksize_bits; 77 + 78 + sbi->s_groups_count = layout->group_count; 79 + sbi->s_blocks_per_group = layout->blocks_per_group; 80 + sbi->s_cluster_bits = layout->cluster_bits; 81 + sbi->s_cluster_ratio = 1U << layout->cluster_bits; 82 + sbi->s_clusters_per_group = layout->blocks_per_group >> 83 + layout->cluster_bits; 84 + sbi->s_desc_size = layout->desc_size; 85 + 86 + es->s_first_data_block = cpu_to_le32(0); 87 + es->s_blocks_count_lo = cpu_to_le32(layout->blocks_per_group * 88 + layout->group_count); 89 + } 90 + 91 + static int mbt_grp_ctx_init(struct super_block *sb, 92 + struct mbt_grp_ctx *grp_ctx) 93 + { 94 + grp_ctx->bitmap_bh.b_data = kzalloc(EXT4_BLOCK_SIZE(sb), GFP_KERNEL); 95 + if (grp_ctx->bitmap_bh.b_data == NULL) 96 + return -ENOMEM; 97 + 98 + return 0; 99 + } 100 + 101 + static void mbt_grp_ctx_release(struct mbt_grp_ctx *grp_ctx) 102 + { 103 + kfree(grp_ctx->bitmap_bh.b_data); 104 + grp_ctx->bitmap_bh.b_data = NULL; 105 + } 106 + 107 + static void mbt_ctx_mark_used(struct super_block *sb, ext4_group_t group, 108 + unsigned int start, unsigned int len) 109 + { 110 + struct mbt_grp_ctx *grp_ctx = MBT_GRP_CTX(sb, group); 111 + 112 + mb_set_bits(grp_ctx->bitmap_bh.b_data, start, len); 113 + } 114 + 115 + /* called after mbt_init_sb_layout */ 116 + static int mbt_ctx_init(struct super_block *sb) 117 + { 118 + struct mbt_ctx *ctx = MBT_CTX(sb); 119 + ext4_group_t i, ngroups = ext4_get_groups_count(sb); 120 + 121 + ctx->grp_ctx = kcalloc(ngroups, sizeof(struct mbt_grp_ctx), 122 + GFP_KERNEL); 123 + if (ctx->grp_ctx == NULL) 124 + return -ENOMEM; 125 + 126 + for (i = 0; i < ngroups; i++) 127 + if (mbt_grp_ctx_init(sb, &ctx->grp_ctx[i])) 128 + goto out; 129 + 130 + /* 131 + * first data block(first cluster in first group) is used by 132 + * metadata, mark it used to avoid to alloc data block at first 133 + * block which will fail ext4_sb_block_valid check. 134 + */ 135 + mb_set_bits(ctx->grp_ctx[0].bitmap_bh.b_data, 0, 1); 136 + 137 + return 0; 138 + out: 139 + while (i-- > 0) 140 + mbt_grp_ctx_release(&ctx->grp_ctx[i]); 141 + kfree(ctx->grp_ctx); 142 + return -ENOMEM; 143 + } 144 + 145 + static void mbt_ctx_release(struct super_block *sb) 146 + { 147 + struct mbt_ctx *ctx = MBT_CTX(sb); 148 + ext4_group_t i, ngroups = ext4_get_groups_count(sb); 149 + 150 + for (i = 0; i < ngroups; i++) 151 + mbt_grp_ctx_release(&ctx->grp_ctx[i]); 152 + kfree(ctx->grp_ctx); 153 + } 154 + 155 + static struct buffer_head * 156 + ext4_read_block_bitmap_nowait_stub(struct super_block *sb, ext4_group_t block_group, 157 + bool ignore_locked) 158 + { 159 + struct mbt_grp_ctx *grp_ctx = MBT_GRP_CTX(sb, block_group); 160 + 161 + /* paired with brelse from caller of ext4_read_block_bitmap_nowait */ 162 + get_bh(&grp_ctx->bitmap_bh); 163 + return &grp_ctx->bitmap_bh; 164 + } 165 + 166 + static int ext4_wait_block_bitmap_stub(struct super_block *sb, 167 + ext4_group_t block_group, 168 + struct buffer_head *bh) 169 + { 170 + return 0; 171 + } 172 + 173 + static struct ext4_group_desc * 174 + ext4_get_group_desc_stub(struct super_block *sb, ext4_group_t block_group, 175 + struct buffer_head **bh) 176 + { 177 + struct mbt_grp_ctx *grp_ctx = MBT_GRP_CTX(sb, block_group); 178 + 179 + if (bh != NULL) 180 + *bh = &grp_ctx->gd_bh; 181 + 182 + return &grp_ctx->desc; 183 + } 184 + 185 + static int 186 + ext4_mb_mark_context_stub(handle_t *handle, struct super_block *sb, bool state, 187 + ext4_group_t group, ext4_grpblk_t blkoff, 188 + ext4_grpblk_t len, int flags, 189 + ext4_grpblk_t *ret_changed) 190 + { 191 + struct mbt_grp_ctx *grp_ctx = MBT_GRP_CTX(sb, group); 192 + struct buffer_head *bitmap_bh = &grp_ctx->bitmap_bh; 193 + 194 + if (state) 195 + mb_set_bits(bitmap_bh->b_data, blkoff, len); 196 + else 197 + mb_clear_bits(bitmap_bh->b_data, blkoff, len); 198 + 199 + return 0; 200 + } 201 + 202 + #define TEST_GOAL_GROUP 1 203 + static int mbt_kunit_init(struct kunit *test) 204 + { 205 + struct mbt_ext4_block_layout *layout = 206 + (struct mbt_ext4_block_layout *)(test->param_value); 207 + struct super_block *sb; 208 + int ret; 209 + 210 + sb = mbt_ext4_alloc_super_block(); 211 + if (sb == NULL) 212 + return -ENOMEM; 213 + 214 + mbt_init_sb_layout(sb, layout); 215 + 216 + ret = mbt_ctx_init(sb); 217 + if (ret != 0) { 218 + mbt_ext4_free_super_block(sb); 219 + return ret; 220 + } 221 + 222 + test->priv = sb; 223 + kunit_activate_static_stub(test, 224 + ext4_read_block_bitmap_nowait, 225 + ext4_read_block_bitmap_nowait_stub); 226 + kunit_activate_static_stub(test, 227 + ext4_wait_block_bitmap, 228 + ext4_wait_block_bitmap_stub); 229 + kunit_activate_static_stub(test, 230 + ext4_get_group_desc, 231 + ext4_get_group_desc_stub); 232 + kunit_activate_static_stub(test, 233 + ext4_mb_mark_context, 234 + ext4_mb_mark_context_stub); 235 + return 0; 236 + } 237 + 238 + static void mbt_kunit_exit(struct kunit *test) 239 + { 240 + struct super_block *sb = (struct super_block *)test->priv; 241 + 242 + mbt_ctx_release(sb); 243 + mbt_ext4_free_super_block(sb); 244 + } 245 + 246 + static void test_new_blocks_simple(struct kunit *test) 247 + { 248 + struct super_block *sb = (struct super_block *)test->priv; 249 + struct inode inode = { .i_sb = sb, }; 250 + struct ext4_allocation_request ar; 251 + ext4_group_t i, goal_group = TEST_GOAL_GROUP; 252 + int err = 0; 253 + ext4_fsblk_t found; 254 + struct ext4_sb_info *sbi = EXT4_SB(sb); 255 + 256 + ar.inode = &inode; 257 + 258 + /* get block at goal */ 259 + ar.goal = ext4_group_first_block_no(sb, goal_group); 260 + found = ext4_mb_new_blocks_simple(&ar, &err); 261 + KUNIT_ASSERT_EQ_MSG(test, ar.goal, found, 262 + "failed to alloc block at goal, expected %llu found %llu", 263 + ar.goal, found); 264 + 265 + /* get block after goal in goal group */ 266 + ar.goal = ext4_group_first_block_no(sb, goal_group); 267 + found = ext4_mb_new_blocks_simple(&ar, &err); 268 + KUNIT_ASSERT_EQ_MSG(test, ar.goal + EXT4_C2B(sbi, 1), found, 269 + "failed to alloc block after goal in goal group, expected %llu found %llu", 270 + ar.goal + 1, found); 271 + 272 + /* get block after goal group */ 273 + mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); 274 + ar.goal = ext4_group_first_block_no(sb, goal_group); 275 + found = ext4_mb_new_blocks_simple(&ar, &err); 276 + KUNIT_ASSERT_EQ_MSG(test, 277 + ext4_group_first_block_no(sb, goal_group + 1), found, 278 + "failed to alloc block after goal group, expected %llu found %llu", 279 + ext4_group_first_block_no(sb, goal_group + 1), found); 280 + 281 + /* get block before goal group */ 282 + for (i = goal_group; i < ext4_get_groups_count(sb); i++) 283 + mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); 284 + ar.goal = ext4_group_first_block_no(sb, goal_group); 285 + found = ext4_mb_new_blocks_simple(&ar, &err); 286 + KUNIT_ASSERT_EQ_MSG(test, 287 + ext4_group_first_block_no(sb, 0) + EXT4_C2B(sbi, 1), found, 288 + "failed to alloc block before goal group, expected %llu found %llu", 289 + ext4_group_first_block_no(sb, 0 + EXT4_C2B(sbi, 1)), found); 290 + 291 + /* no block available, fail to allocate block */ 292 + for (i = 0; i < ext4_get_groups_count(sb); i++) 293 + mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); 294 + ar.goal = ext4_group_first_block_no(sb, goal_group); 295 + found = ext4_mb_new_blocks_simple(&ar, &err); 296 + KUNIT_ASSERT_NE_MSG(test, err, 0, 297 + "unexpectedly get block when no block is available"); 298 + } 299 + 300 + static const struct mbt_ext4_block_layout mbt_test_layouts[] = { 301 + { 302 + .blocksize_bits = 10, 303 + .cluster_bits = 3, 304 + .blocks_per_group = 8192, 305 + .group_count = 4, 306 + .desc_size = 64, 307 + }, 308 + { 309 + .blocksize_bits = 12, 310 + .cluster_bits = 3, 311 + .blocks_per_group = 8192, 312 + .group_count = 4, 313 + .desc_size = 64, 314 + }, 315 + { 316 + .blocksize_bits = 16, 317 + .cluster_bits = 3, 318 + .blocks_per_group = 8192, 319 + .group_count = 4, 320 + .desc_size = 64, 321 + }, 322 + }; 323 + 324 + static void mbt_show_layout(const struct mbt_ext4_block_layout *layout, 325 + char *desc) 326 + { 327 + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "block_bits=%d cluster_bits=%d " 328 + "blocks_per_group=%d group_count=%d desc_size=%d\n", 329 + layout->blocksize_bits, layout->cluster_bits, 330 + layout->blocks_per_group, layout->group_count, 331 + layout->desc_size); 332 + } 333 + KUNIT_ARRAY_PARAM(mbt_layouts, mbt_test_layouts, mbt_show_layout); 334 + 335 + static struct kunit_case mbt_test_cases[] = { 336 + KUNIT_CASE_PARAM(test_new_blocks_simple, mbt_layouts_gen_params), 337 + {} 338 + }; 339 + 340 + static struct kunit_suite mbt_test_suite = { 341 + .name = "ext4_mballoc_test", 342 + .init = mbt_kunit_init, 343 + .exit = mbt_kunit_exit, 344 + .test_cases = mbt_test_cases, 345 + }; 346 + 347 + kunit_test_suites(&mbt_test_suite); 348 + 349 + MODULE_LICENSE("GPL");
+203 -378
fs/ext4/mballoc.c
··· 18 18 #include <linux/backing-dev.h> 19 19 #include <linux/freezer.h> 20 20 #include <trace/events/ext4.h> 21 + #include <kunit/static_stub.h> 21 22 22 23 /* 23 24 * MUSTDO: ··· 418 417 419 418 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 420 419 ext4_group_t group); 421 - static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 422 - ext4_group_t group); 423 420 static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac); 424 421 425 422 static bool ext4_mb_good_group(struct ext4_allocation_context *ac, ··· 1360 1361 * We place the buddy block and bitmap block 1361 1362 * close together 1362 1363 */ 1364 + grinfo = ext4_get_group_info(sb, group); 1365 + if (!grinfo) { 1366 + err = -EFSCORRUPTED; 1367 + goto out; 1368 + } 1363 1369 if ((first_block + i) & 1) { 1364 1370 /* this is block of buddy */ 1365 1371 BUG_ON(incore == NULL); 1366 1372 mb_debug(sb, "put buddy for group %u in page %lu/%x\n", 1367 1373 group, page->index, i * blocksize); 1368 1374 trace_ext4_mb_buddy_bitmap_load(sb, group); 1369 - grinfo = ext4_get_group_info(sb, group); 1370 - if (!grinfo) { 1371 - err = -EFSCORRUPTED; 1372 - goto out; 1373 - } 1374 1375 grinfo->bb_fragments = 0; 1375 1376 memset(grinfo->bb_counters, 0, 1376 1377 sizeof(*grinfo->bb_counters) * ··· 1397 1398 1398 1399 /* mark all preallocated blks used in in-core bitmap */ 1399 1400 ext4_mb_generate_from_pa(sb, data, group); 1400 - ext4_mb_generate_from_freelist(sb, data, group); 1401 + WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root)); 1401 1402 ext4_unlock_group(sb, group); 1402 1403 1403 1404 /* set incore so that the buddy information can be ··· 3630 3631 3631 3632 spin_lock_init(&sbi->s_md_lock); 3632 3633 sbi->s_mb_free_pending = 0; 3633 - INIT_LIST_HEAD(&sbi->s_freed_data_list); 3634 + INIT_LIST_HEAD(&sbi->s_freed_data_list[0]); 3635 + INIT_LIST_HEAD(&sbi->s_freed_data_list[1]); 3634 3636 INIT_LIST_HEAD(&sbi->s_discard_list); 3635 3637 INIT_WORK(&sbi->s_discard_work, ext4_discard_work); 3636 3638 atomic_set(&sbi->s_retry_alloc_pending, 0); ··· 3883 3883 struct ext4_sb_info *sbi = EXT4_SB(sb); 3884 3884 struct ext4_free_data *entry, *tmp; 3885 3885 LIST_HEAD(freed_data_list); 3886 - struct list_head *cut_pos = NULL; 3886 + struct list_head *s_freed_head = &sbi->s_freed_data_list[commit_tid & 1]; 3887 3887 bool wake; 3888 3888 3889 - spin_lock(&sbi->s_md_lock); 3890 - list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) { 3891 - if (entry->efd_tid != commit_tid) 3892 - break; 3893 - cut_pos = &entry->efd_list; 3894 - } 3895 - if (cut_pos) 3896 - list_cut_position(&freed_data_list, &sbi->s_freed_data_list, 3897 - cut_pos); 3898 - spin_unlock(&sbi->s_md_lock); 3889 + list_replace_init(s_freed_head, &freed_data_list); 3899 3890 3900 3891 list_for_each_entry(entry, &freed_data_list, efd_list) 3901 3892 ext4_free_data_in_buddy(sb, entry); ··· 3944 3953 ext4_groupinfo_destroy_slabs(); 3945 3954 } 3946 3955 3956 + #define EXT4_MB_BITMAP_MARKED_CHECK 0x0001 3957 + #define EXT4_MB_SYNC_UPDATE 0x0002 3958 + static int 3959 + ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state, 3960 + ext4_group_t group, ext4_grpblk_t blkoff, 3961 + ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed) 3962 + { 3963 + struct ext4_sb_info *sbi = EXT4_SB(sb); 3964 + struct buffer_head *bitmap_bh = NULL; 3965 + struct ext4_group_desc *gdp; 3966 + struct buffer_head *gdp_bh; 3967 + int err; 3968 + unsigned int i, already, changed = len; 3969 + 3970 + KUNIT_STATIC_STUB_REDIRECT(ext4_mb_mark_context, 3971 + handle, sb, state, group, blkoff, len, 3972 + flags, ret_changed); 3973 + 3974 + if (ret_changed) 3975 + *ret_changed = 0; 3976 + bitmap_bh = ext4_read_block_bitmap(sb, group); 3977 + if (IS_ERR(bitmap_bh)) 3978 + return PTR_ERR(bitmap_bh); 3979 + 3980 + if (handle) { 3981 + BUFFER_TRACE(bitmap_bh, "getting write access"); 3982 + err = ext4_journal_get_write_access(handle, sb, bitmap_bh, 3983 + EXT4_JTR_NONE); 3984 + if (err) 3985 + goto out_err; 3986 + } 3987 + 3988 + err = -EIO; 3989 + gdp = ext4_get_group_desc(sb, group, &gdp_bh); 3990 + if (!gdp) 3991 + goto out_err; 3992 + 3993 + if (handle) { 3994 + BUFFER_TRACE(gdp_bh, "get_write_access"); 3995 + err = ext4_journal_get_write_access(handle, sb, gdp_bh, 3996 + EXT4_JTR_NONE); 3997 + if (err) 3998 + goto out_err; 3999 + } 4000 + 4001 + ext4_lock_group(sb, group); 4002 + if (ext4_has_group_desc_csum(sb) && 4003 + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { 4004 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 4005 + ext4_free_group_clusters_set(sb, gdp, 4006 + ext4_free_clusters_after_init(sb, group, gdp)); 4007 + } 4008 + 4009 + if (flags & EXT4_MB_BITMAP_MARKED_CHECK) { 4010 + already = 0; 4011 + for (i = 0; i < len; i++) 4012 + if (mb_test_bit(blkoff + i, bitmap_bh->b_data) == 4013 + state) 4014 + already++; 4015 + changed = len - already; 4016 + } 4017 + 4018 + if (state) { 4019 + mb_set_bits(bitmap_bh->b_data, blkoff, len); 4020 + ext4_free_group_clusters_set(sb, gdp, 4021 + ext4_free_group_clusters(sb, gdp) - changed); 4022 + } else { 4023 + mb_clear_bits(bitmap_bh->b_data, blkoff, len); 4024 + ext4_free_group_clusters_set(sb, gdp, 4025 + ext4_free_group_clusters(sb, gdp) + changed); 4026 + } 4027 + 4028 + ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); 4029 + ext4_group_desc_csum_set(sb, group, gdp); 4030 + ext4_unlock_group(sb, group); 4031 + if (ret_changed) 4032 + *ret_changed = changed; 4033 + 4034 + if (sbi->s_log_groups_per_flex) { 4035 + ext4_group_t flex_group = ext4_flex_group(sbi, group); 4036 + struct flex_groups *fg = sbi_array_rcu_deref(sbi, 4037 + s_flex_groups, flex_group); 4038 + 4039 + if (state) 4040 + atomic64_sub(changed, &fg->free_clusters); 4041 + else 4042 + atomic64_add(changed, &fg->free_clusters); 4043 + } 4044 + 4045 + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4046 + if (err) 4047 + goto out_err; 4048 + err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); 4049 + if (err) 4050 + goto out_err; 4051 + 4052 + if (flags & EXT4_MB_SYNC_UPDATE) { 4053 + sync_dirty_buffer(bitmap_bh); 4054 + sync_dirty_buffer(gdp_bh); 4055 + } 4056 + 4057 + out_err: 4058 + brelse(bitmap_bh); 4059 + return err; 4060 + } 3947 4061 3948 4062 /* 3949 4063 * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps ··· 4058 3962 ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 4059 3963 handle_t *handle, unsigned int reserv_clstrs) 4060 3964 { 4061 - struct buffer_head *bitmap_bh = NULL; 4062 3965 struct ext4_group_desc *gdp; 4063 - struct buffer_head *gdp_bh; 4064 3966 struct ext4_sb_info *sbi; 4065 3967 struct super_block *sb; 4066 3968 ext4_fsblk_t block; 4067 3969 int err, len; 3970 + int flags = 0; 3971 + ext4_grpblk_t changed; 4068 3972 4069 3973 BUG_ON(ac->ac_status != AC_STATUS_FOUND); 4070 3974 BUG_ON(ac->ac_b_ex.fe_len <= 0); ··· 4072 3976 sb = ac->ac_sb; 4073 3977 sbi = EXT4_SB(sb); 4074 3978 4075 - bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); 4076 - if (IS_ERR(bitmap_bh)) { 4077 - return PTR_ERR(bitmap_bh); 4078 - } 4079 - 4080 - BUFFER_TRACE(bitmap_bh, "getting write access"); 4081 - err = ext4_journal_get_write_access(handle, sb, bitmap_bh, 4082 - EXT4_JTR_NONE); 4083 - if (err) 4084 - goto out_err; 4085 - 4086 - err = -EIO; 4087 - gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh); 3979 + gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, NULL); 4088 3980 if (!gdp) 4089 - goto out_err; 4090 - 3981 + return -EIO; 4091 3982 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, 4092 3983 ext4_free_group_clusters(sb, gdp)); 4093 3984 4094 - BUFFER_TRACE(gdp_bh, "get_write_access"); 4095 - err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE); 4096 - if (err) 4097 - goto out_err; 4098 - 4099 3985 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 4100 - 4101 3986 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); 4102 3987 if (!ext4_inode_block_valid(ac->ac_inode, block, len)) { 4103 3988 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " ··· 4087 4010 * Fix the bitmap and return EFSCORRUPTED 4088 4011 * We leak some of the blocks here. 4089 4012 */ 4090 - ext4_lock_group(sb, ac->ac_b_ex.fe_group); 4091 - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, 4092 - ac->ac_b_ex.fe_len); 4093 - ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 4094 - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4013 + err = ext4_mb_mark_context(handle, sb, true, 4014 + ac->ac_b_ex.fe_group, 4015 + ac->ac_b_ex.fe_start, 4016 + ac->ac_b_ex.fe_len, 4017 + 0, NULL); 4095 4018 if (!err) 4096 4019 err = -EFSCORRUPTED; 4097 - goto out_err; 4020 + return err; 4098 4021 } 4099 4022 4100 - ext4_lock_group(sb, ac->ac_b_ex.fe_group); 4101 4023 #ifdef AGGRESSIVE_CHECK 4102 - { 4103 - int i; 4104 - for (i = 0; i < ac->ac_b_ex.fe_len; i++) { 4105 - BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i, 4106 - bitmap_bh->b_data)); 4107 - } 4108 - } 4024 + flags |= EXT4_MB_BITMAP_MARKED_CHECK; 4109 4025 #endif 4110 - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, 4111 - ac->ac_b_ex.fe_len); 4112 - if (ext4_has_group_desc_csum(sb) && 4113 - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { 4114 - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 4115 - ext4_free_group_clusters_set(sb, gdp, 4116 - ext4_free_clusters_after_init(sb, 4117 - ac->ac_b_ex.fe_group, gdp)); 4118 - } 4119 - len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; 4120 - ext4_free_group_clusters_set(sb, gdp, len); 4121 - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); 4122 - ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp); 4026 + err = ext4_mb_mark_context(handle, sb, true, ac->ac_b_ex.fe_group, 4027 + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len, 4028 + flags, &changed); 4123 4029 4124 - ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 4030 + if (err && changed == 0) 4031 + return err; 4032 + 4033 + #ifdef AGGRESSIVE_CHECK 4034 + BUG_ON(changed != ac->ac_b_ex.fe_len); 4035 + #endif 4125 4036 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); 4126 4037 /* 4127 4038 * Now reduce the dirty block count also. Should not go negative ··· 4119 4054 percpu_counter_sub(&sbi->s_dirtyclusters_counter, 4120 4055 reserv_clstrs); 4121 4056 4122 - if (sbi->s_log_groups_per_flex) { 4123 - ext4_group_t flex_group = ext4_flex_group(sbi, 4124 - ac->ac_b_ex.fe_group); 4125 - atomic64_sub(ac->ac_b_ex.fe_len, 4126 - &sbi_array_rcu_deref(sbi, s_flex_groups, 4127 - flex_group)->free_clusters); 4128 - } 4129 - 4130 - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4131 - if (err) 4132 - goto out_err; 4133 - err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); 4134 - 4135 - out_err: 4136 - brelse(bitmap_bh); 4137 4057 return err; 4138 4058 } 4139 4059 ··· 4127 4077 * blocks in bitmaps and update counters. 4128 4078 */ 4129 4079 void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, 4130 - int len, int state) 4080 + int len, bool state) 4131 4081 { 4132 - struct buffer_head *bitmap_bh = NULL; 4133 - struct ext4_group_desc *gdp; 4134 - struct buffer_head *gdp_bh; 4135 4082 struct ext4_sb_info *sbi = EXT4_SB(sb); 4136 4083 ext4_group_t group; 4137 4084 ext4_grpblk_t blkoff; 4138 - int i, err = 0; 4139 - int already; 4140 - unsigned int clen, clen_changed, thisgrp_len; 4085 + int err = 0; 4086 + unsigned int clen, thisgrp_len; 4141 4087 4142 4088 while (len > 0) { 4143 4089 ext4_get_group_no_and_offset(sb, block, &group, &blkoff); ··· 4154 4108 ext4_error(sb, "Marking blocks in system zone - " 4155 4109 "Block = %llu, len = %u", 4156 4110 block, thisgrp_len); 4157 - bitmap_bh = NULL; 4158 4111 break; 4159 4112 } 4160 4113 4161 - bitmap_bh = ext4_read_block_bitmap(sb, group); 4162 - if (IS_ERR(bitmap_bh)) { 4163 - err = PTR_ERR(bitmap_bh); 4164 - bitmap_bh = NULL; 4165 - break; 4166 - } 4167 - 4168 - err = -EIO; 4169 - gdp = ext4_get_group_desc(sb, group, &gdp_bh); 4170 - if (!gdp) 4171 - break; 4172 - 4173 - ext4_lock_group(sb, group); 4174 - already = 0; 4175 - for (i = 0; i < clen; i++) 4176 - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == 4177 - !state) 4178 - already++; 4179 - 4180 - clen_changed = clen - already; 4181 - if (state) 4182 - mb_set_bits(bitmap_bh->b_data, blkoff, clen); 4183 - else 4184 - mb_clear_bits(bitmap_bh->b_data, blkoff, clen); 4185 - if (ext4_has_group_desc_csum(sb) && 4186 - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { 4187 - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 4188 - ext4_free_group_clusters_set(sb, gdp, 4189 - ext4_free_clusters_after_init(sb, group, gdp)); 4190 - } 4191 - if (state) 4192 - clen = ext4_free_group_clusters(sb, gdp) - clen_changed; 4193 - else 4194 - clen = ext4_free_group_clusters(sb, gdp) + clen_changed; 4195 - 4196 - ext4_free_group_clusters_set(sb, gdp, clen); 4197 - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); 4198 - ext4_group_desc_csum_set(sb, group, gdp); 4199 - 4200 - ext4_unlock_group(sb, group); 4201 - 4202 - if (sbi->s_log_groups_per_flex) { 4203 - ext4_group_t flex_group = ext4_flex_group(sbi, group); 4204 - struct flex_groups *fg = sbi_array_rcu_deref(sbi, 4205 - s_flex_groups, flex_group); 4206 - 4207 - if (state) 4208 - atomic64_sub(clen_changed, &fg->free_clusters); 4209 - else 4210 - atomic64_add(clen_changed, &fg->free_clusters); 4211 - 4212 - } 4213 - 4214 - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); 4215 - if (err) 4216 - break; 4217 - sync_dirty_buffer(bitmap_bh); 4218 - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); 4219 - sync_dirty_buffer(gdp_bh); 4114 + err = ext4_mb_mark_context(NULL, sb, state, 4115 + group, blkoff, clen, 4116 + EXT4_MB_BITMAP_MARKED_CHECK | 4117 + EXT4_MB_SYNC_UPDATE, 4118 + NULL); 4220 4119 if (err) 4221 4120 break; 4222 4121 4223 4122 block += thisgrp_len; 4224 4123 len -= thisgrp_len; 4225 - brelse(bitmap_bh); 4226 4124 BUG_ON(len < 0); 4227 4125 } 4228 - 4229 - if (err) 4230 - brelse(bitmap_bh); 4231 4126 } 4232 4127 4233 4128 /* ··· 4943 4956 return true; 4944 4957 } 4945 4958 return false; 4946 - } 4947 - 4948 - /* 4949 - * the function goes through all block freed in the group 4950 - * but not yet committed and marks them used in in-core bitmap. 4951 - * buddy must be generated from this bitmap 4952 - * Need to be called with the ext4 group lock held 4953 - */ 4954 - static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 4955 - ext4_group_t group) 4956 - { 4957 - struct rb_node *n; 4958 - struct ext4_group_info *grp; 4959 - struct ext4_free_data *entry; 4960 - 4961 - grp = ext4_get_group_info(sb, group); 4962 - if (!grp) 4963 - return; 4964 - n = rb_first(&(grp->bb_free_root)); 4965 - 4966 - while (n) { 4967 - entry = rb_entry(n, struct ext4_free_data, efd_node); 4968 - mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); 4969 - n = rb_next(n); 4970 - } 4971 4959 } 4972 4960 4973 4961 /* ··· 6092 6130 } 6093 6131 6094 6132 block = ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, i); 6095 - ext4_mb_mark_bb(sb, block, 1, 1); 6133 + ext4_mb_mark_bb(sb, block, 1, true); 6096 6134 ar->len = 1; 6097 6135 6098 6136 return block; ··· 6340 6378 } 6341 6379 6342 6380 spin_lock(&sbi->s_md_lock); 6343 - list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list); 6381 + list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list[new_entry->efd_tid & 1]); 6344 6382 sbi->s_mb_free_pending += clusters; 6345 6383 spin_unlock(&sbi->s_md_lock); 6346 6384 } ··· 6348 6386 static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, 6349 6387 unsigned long count) 6350 6388 { 6351 - struct buffer_head *bitmap_bh; 6352 6389 struct super_block *sb = inode->i_sb; 6353 - struct ext4_group_desc *gdp; 6354 - struct buffer_head *gdp_bh; 6355 6390 ext4_group_t group; 6356 6391 ext4_grpblk_t blkoff; 6357 - int already_freed = 0, err, i; 6358 6392 6359 6393 ext4_get_group_no_and_offset(sb, block, &group, &blkoff); 6360 - bitmap_bh = ext4_read_block_bitmap(sb, group); 6361 - if (IS_ERR(bitmap_bh)) { 6362 - pr_warn("Failed to read block bitmap\n"); 6363 - return; 6364 - } 6365 - gdp = ext4_get_group_desc(sb, group, &gdp_bh); 6366 - if (!gdp) 6367 - goto err_out; 6368 - 6369 - for (i = 0; i < count; i++) { 6370 - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data)) 6371 - already_freed++; 6372 - } 6373 - mb_clear_bits(bitmap_bh->b_data, blkoff, count); 6374 - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); 6375 - if (err) 6376 - goto err_out; 6377 - ext4_free_group_clusters_set( 6378 - sb, gdp, ext4_free_group_clusters(sb, gdp) + 6379 - count - already_freed); 6380 - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); 6381 - ext4_group_desc_csum_set(sb, group, gdp); 6382 - ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); 6383 - sync_dirty_buffer(bitmap_bh); 6384 - sync_dirty_buffer(gdp_bh); 6385 - 6386 - err_out: 6387 - brelse(bitmap_bh); 6394 + ext4_mb_mark_context(NULL, sb, false, group, blkoff, count, 6395 + EXT4_MB_BITMAP_MARKED_CHECK | 6396 + EXT4_MB_SYNC_UPDATE, 6397 + NULL); 6388 6398 } 6389 6399 6390 6400 /** ··· 6372 6438 ext4_fsblk_t block, unsigned long count, 6373 6439 int flags) 6374 6440 { 6375 - struct buffer_head *bitmap_bh = NULL; 6376 6441 struct super_block *sb = inode->i_sb; 6377 - struct ext4_group_desc *gdp; 6378 6442 struct ext4_group_info *grp; 6379 6443 unsigned int overflow; 6380 6444 ext4_grpblk_t bit; 6381 - struct buffer_head *gd_bh; 6382 6445 ext4_group_t block_group; 6383 6446 struct ext4_sb_info *sbi; 6384 6447 struct ext4_buddy e4b; 6385 6448 unsigned int count_clusters; 6386 6449 int err = 0; 6387 - int ret; 6450 + int mark_flags = 0; 6451 + ext4_grpblk_t changed; 6388 6452 6389 6453 sbi = EXT4_SB(sb); 6390 6454 ··· 6391 6459 ext4_error(sb, "Freeing blocks in system zone - " 6392 6460 "Block = %llu, count = %lu", block, count); 6393 6461 /* err = 0. ext4_std_error should be a no op */ 6394 - goto error_return; 6462 + goto error_out; 6395 6463 } 6396 6464 flags |= EXT4_FREE_BLOCKS_VALIDATED; 6397 6465 ··· 6415 6483 flags &= ~EXT4_FREE_BLOCKS_VALIDATED; 6416 6484 } 6417 6485 count_clusters = EXT4_NUM_B2C(sbi, count); 6418 - bitmap_bh = ext4_read_block_bitmap(sb, block_group); 6419 - if (IS_ERR(bitmap_bh)) { 6420 - err = PTR_ERR(bitmap_bh); 6421 - bitmap_bh = NULL; 6422 - goto error_return; 6423 - } 6424 - gdp = ext4_get_group_desc(sb, block_group, &gd_bh); 6425 - if (!gdp) { 6426 - err = -EIO; 6427 - goto error_return; 6428 - } 6429 - 6430 - if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && 6431 - !ext4_inode_block_valid(inode, block, count)) { 6432 - ext4_error(sb, "Freeing blocks in system zone - " 6433 - "Block = %llu, count = %lu", block, count); 6434 - /* err = 0. ext4_std_error should be a no op */ 6435 - goto error_return; 6436 - } 6437 - 6438 - BUFFER_TRACE(bitmap_bh, "getting write access"); 6439 - err = ext4_journal_get_write_access(handle, sb, bitmap_bh, 6440 - EXT4_JTR_NONE); 6441 - if (err) 6442 - goto error_return; 6443 - 6444 - /* 6445 - * We are about to modify some metadata. Call the journal APIs 6446 - * to unshare ->b_data if a currently-committing transaction is 6447 - * using it 6448 - */ 6449 - BUFFER_TRACE(gd_bh, "get_write_access"); 6450 - err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE); 6451 - if (err) 6452 - goto error_return; 6453 - #ifdef AGGRESSIVE_CHECK 6454 - { 6455 - int i; 6456 - for (i = 0; i < count_clusters; i++) 6457 - BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 6458 - } 6459 - #endif 6460 6486 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); 6461 6487 6462 6488 /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */ 6463 6489 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b, 6464 6490 GFP_NOFS|__GFP_NOFAIL); 6465 6491 if (err) 6466 - goto error_return; 6492 + goto error_out; 6493 + 6494 + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && 6495 + !ext4_inode_block_valid(inode, block, count)) { 6496 + ext4_error(sb, "Freeing blocks in system zone - " 6497 + "Block = %llu, count = %lu", block, count); 6498 + /* err = 0. ext4_std_error should be a no op */ 6499 + goto error_clean; 6500 + } 6501 + 6502 + #ifdef AGGRESSIVE_CHECK 6503 + mark_flags |= EXT4_MB_BITMAP_MARKED_CHECK; 6504 + #endif 6505 + err = ext4_mb_mark_context(handle, sb, false, block_group, bit, 6506 + count_clusters, mark_flags, &changed); 6507 + 6508 + 6509 + if (err && changed == 0) 6510 + goto error_clean; 6511 + 6512 + #ifdef AGGRESSIVE_CHECK 6513 + BUG_ON(changed != count_clusters); 6514 + #endif 6467 6515 6468 6516 /* 6469 6517 * We need to make sure we don't reuse the freed block until after the ··· 6467 6555 new_entry->efd_tid = handle->h_transaction->t_tid; 6468 6556 6469 6557 ext4_lock_group(sb, block_group); 6470 - mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 6471 6558 ext4_mb_free_metadata(handle, &e4b, new_entry); 6472 6559 } else { 6473 - /* need to update group_info->bb_free and bitmap 6474 - * with group lock held. generate_buddy look at 6475 - * them with group lock_held 6476 - */ 6477 6560 if (test_opt(sb, DISCARD)) { 6478 6561 err = ext4_issue_discard(sb, block_group, bit, 6479 6562 count_clusters, NULL); ··· 6481 6574 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); 6482 6575 6483 6576 ext4_lock_group(sb, block_group); 6484 - mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 6485 6577 mb_free_blocks(inode, &e4b, bit, count_clusters); 6486 6578 } 6487 6579 6488 - ret = ext4_free_group_clusters(sb, gdp) + count_clusters; 6489 - ext4_free_group_clusters_set(sb, gdp, ret); 6490 - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); 6491 - ext4_group_desc_csum_set(sb, block_group, gdp); 6492 6580 ext4_unlock_group(sb, block_group); 6493 - 6494 - if (sbi->s_log_groups_per_flex) { 6495 - ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 6496 - atomic64_add(count_clusters, 6497 - &sbi_array_rcu_deref(sbi, s_flex_groups, 6498 - flex_group)->free_clusters); 6499 - } 6500 6581 6501 6582 /* 6502 6583 * on a bigalloc file system, defer the s_freeclusters_counter ··· 6498 6603 count_clusters); 6499 6604 } 6500 6605 6501 - ext4_mb_unload_buddy(&e4b); 6502 - 6503 - /* We dirtied the bitmap block */ 6504 - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 6505 - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 6506 - 6507 - /* And the group descriptor block */ 6508 - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 6509 - ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); 6510 - if (!err) 6511 - err = ret; 6512 - 6513 6606 if (overflow && !err) { 6514 6607 block += count; 6515 6608 count = overflow; 6516 - put_bh(bitmap_bh); 6609 + ext4_mb_unload_buddy(&e4b); 6517 6610 /* The range changed so it's no longer validated */ 6518 6611 flags &= ~EXT4_FREE_BLOCKS_VALIDATED; 6519 6612 goto do_more; 6520 6613 } 6521 - error_return: 6522 - brelse(bitmap_bh); 6614 + 6615 + error_clean: 6616 + ext4_mb_unload_buddy(&e4b); 6617 + error_out: 6523 6618 ext4_std_error(sb, err); 6524 6619 } 6525 6620 ··· 6627 6742 int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, 6628 6743 ext4_fsblk_t block, unsigned long count) 6629 6744 { 6630 - struct buffer_head *bitmap_bh = NULL; 6631 - struct buffer_head *gd_bh; 6632 6745 ext4_group_t block_group; 6633 6746 ext4_grpblk_t bit; 6634 - unsigned int i; 6635 - struct ext4_group_desc *desc; 6636 6747 struct ext4_sb_info *sbi = EXT4_SB(sb); 6637 6748 struct ext4_buddy e4b; 6638 - int err = 0, ret, free_clusters_count; 6639 - ext4_grpblk_t clusters_freed; 6749 + int err = 0; 6640 6750 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block); 6641 6751 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1); 6642 6752 unsigned long cluster_count = last_cluster - first_cluster + 1; 6753 + ext4_grpblk_t changed; 6643 6754 6644 6755 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); 6645 6756 6646 - if (count == 0) 6757 + if (cluster_count == 0) 6647 6758 return 0; 6648 6759 6649 6760 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); ··· 6651 6770 ext4_warning(sb, "too many blocks added to group %u", 6652 6771 block_group); 6653 6772 err = -EINVAL; 6654 - goto error_return; 6773 + goto error_out; 6655 6774 } 6656 6775 6657 - bitmap_bh = ext4_read_block_bitmap(sb, block_group); 6658 - if (IS_ERR(bitmap_bh)) { 6659 - err = PTR_ERR(bitmap_bh); 6660 - bitmap_bh = NULL; 6661 - goto error_return; 6662 - } 6663 - 6664 - desc = ext4_get_group_desc(sb, block_group, &gd_bh); 6665 - if (!desc) { 6666 - err = -EIO; 6667 - goto error_return; 6668 - } 6776 + err = ext4_mb_load_buddy(sb, block_group, &e4b); 6777 + if (err) 6778 + goto error_out; 6669 6779 6670 6780 if (!ext4_sb_block_valid(sb, NULL, block, count)) { 6671 6781 ext4_error(sb, "Adding blocks in system zones - " 6672 6782 "Block = %llu, count = %lu", 6673 6783 block, count); 6674 6784 err = -EINVAL; 6675 - goto error_return; 6785 + goto error_clean; 6676 6786 } 6677 6787 6678 - BUFFER_TRACE(bitmap_bh, "getting write access"); 6679 - err = ext4_journal_get_write_access(handle, sb, bitmap_bh, 6680 - EXT4_JTR_NONE); 6681 - if (err) 6682 - goto error_return; 6788 + err = ext4_mb_mark_context(handle, sb, false, block_group, bit, 6789 + cluster_count, EXT4_MB_BITMAP_MARKED_CHECK, 6790 + &changed); 6791 + if (err && changed == 0) 6792 + goto error_clean; 6683 6793 6684 - /* 6685 - * We are about to modify some metadata. Call the journal APIs 6686 - * to unshare ->b_data if a currently-committing transaction is 6687 - * using it 6688 - */ 6689 - BUFFER_TRACE(gd_bh, "get_write_access"); 6690 - err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE); 6691 - if (err) 6692 - goto error_return; 6794 + if (changed != cluster_count) 6795 + ext4_error(sb, "bit already cleared in group %u", block_group); 6693 6796 6694 - for (i = 0, clusters_freed = 0; i < cluster_count; i++) { 6695 - BUFFER_TRACE(bitmap_bh, "clear bit"); 6696 - if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { 6697 - ext4_error(sb, "bit already cleared for block %llu", 6698 - (ext4_fsblk_t)(block + i)); 6699 - BUFFER_TRACE(bitmap_bh, "bit already cleared"); 6700 - } else { 6701 - clusters_freed++; 6702 - } 6703 - } 6704 - 6705 - err = ext4_mb_load_buddy(sb, block_group, &e4b); 6706 - if (err) 6707 - goto error_return; 6708 - 6709 - /* 6710 - * need to update group_info->bb_free and bitmap 6711 - * with group lock held. generate_buddy look at 6712 - * them with group lock_held 6713 - */ 6714 6797 ext4_lock_group(sb, block_group); 6715 - mb_clear_bits(bitmap_bh->b_data, bit, cluster_count); 6716 6798 mb_free_blocks(NULL, &e4b, bit, cluster_count); 6717 - free_clusters_count = clusters_freed + 6718 - ext4_free_group_clusters(sb, desc); 6719 - ext4_free_group_clusters_set(sb, desc, free_clusters_count); 6720 - ext4_block_bitmap_csum_set(sb, desc, bitmap_bh); 6721 - ext4_group_desc_csum_set(sb, block_group, desc); 6722 6799 ext4_unlock_group(sb, block_group); 6723 6800 percpu_counter_add(&sbi->s_freeclusters_counter, 6724 - clusters_freed); 6801 + changed); 6725 6802 6726 - if (sbi->s_log_groups_per_flex) { 6727 - ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 6728 - atomic64_add(clusters_freed, 6729 - &sbi_array_rcu_deref(sbi, s_flex_groups, 6730 - flex_group)->free_clusters); 6731 - } 6732 - 6803 + error_clean: 6733 6804 ext4_mb_unload_buddy(&e4b); 6734 - 6735 - /* We dirtied the bitmap block */ 6736 - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 6737 - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 6738 - 6739 - /* And the group descriptor block */ 6740 - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 6741 - ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); 6742 - if (!err) 6743 - err = ret; 6744 - 6745 - error_return: 6746 - brelse(bitmap_bh); 6805 + error_out: 6747 6806 ext4_std_error(sb, err); 6748 6807 return err; 6749 6808 } ··· 6991 7170 6992 7171 return error; 6993 7172 } 7173 + 7174 + #ifdef CONFIG_EXT4_KUNIT_TESTS 7175 + #include "mballoc-test.c" 7176 + #endif
+1 -2
fs/ext4/namei.c
··· 2280 2280 top = data2 + len; 2281 2281 while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { 2282 2282 if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, 2283 - (data2 + (blocksize - csum_size) - 2284 - (char *) de))) { 2283 + (char *)de - data2)) { 2285 2284 brelse(bh2); 2286 2285 brelse(bh); 2287 2286 return -EFSCORRUPTED;
+37 -57
fs/ext4/resize.c
··· 10 10 */ 11 11 12 12 13 - #define EXT4FS_DEBUG 14 - 15 13 #include <linux/errno.h> 16 14 #include <linux/slab.h> 17 15 #include <linux/jiffies.h> ··· 55 57 * If the reserved GDT blocks is non-zero, the resize_inode feature 56 58 * should always be set. 57 59 */ 58 - if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks && 60 + if (sbi->s_es->s_reserved_gdt_blocks && 59 61 !ext4_has_feature_resize_inode(sb)) { 60 62 ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero"); 61 63 return -EFSCORRUPTED; ··· 67 69 * bad time to do it anyways. 68 70 */ 69 71 if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) != 70 - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 72 + le32_to_cpu(sbi->s_es->s_first_data_block)) { 71 73 ext4_warning(sb, "won't resize using backup superblock at %llu", 72 - (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 74 + (unsigned long long)sbi->s_sbh->b_blocknr); 73 75 return -EPERM; 74 76 } 75 77 ··· 77 79 * We are not allowed to do online-resizing on a filesystem mounted 78 80 * with error, because it can destroy the filesystem easily. 79 81 */ 80 - if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 82 + if (sbi->s_mount_state & EXT4_ERROR_FS) { 81 83 ext4_warning(sb, "There are errors in the filesystem, " 82 84 "so online resizing is not allowed"); 83 85 return -EPERM; ··· 89 91 } 90 92 91 93 if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, 92 - &EXT4_SB(sb)->s_ext4_flags)) 94 + &sbi->s_ext4_flags)) 93 95 ret = -EBUSY; 94 96 95 97 return ret; ··· 102 104 if (update_backups) 103 105 return ext4_update_overhead(sb, true); 104 106 return 0; 105 - } 106 - 107 - static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, 108 - ext4_group_t group) { 109 - return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) << 110 - EXT4_DESC_PER_BLOCK_BITS(sb); 111 - } 112 - 113 - static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb, 114 - ext4_group_t group) { 115 - group = ext4_meta_bg_first_group(sb, group); 116 - return ext4_group_first_block_no(sb, group); 117 107 } 118 108 119 109 static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb, ··· 140 154 141 155 overhead = ext4_group_overhead_blocks(sb, group); 142 156 metaend = start + overhead; 143 - input->free_clusters_count = free_blocks_count = 144 - input->blocks_count - 2 - overhead - sbi->s_itb_per_group; 157 + free_blocks_count = input->blocks_count - 2 - overhead - 158 + sbi->s_itb_per_group; 159 + input->free_clusters_count = EXT4_B2C(sbi, free_blocks_count); 145 160 146 161 if (test_opt(sb, DEBUG)) 147 162 printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks " ··· 447 460 448 461 ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster, 449 462 last_cluster); 450 - for (count2 = count; count > 0; 451 - count -= count2, first_cluster += count2) { 463 + for (; count > 0; count -= count2, first_cluster += count2) { 452 464 ext4_fsblk_t start; 453 465 struct buffer_head *bh; 454 466 ext4_group_t group; ··· 546 560 if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) 547 561 goto handle_itb; 548 562 549 - if (meta_bg == 1) { 550 - ext4_group_t first_group; 551 - first_group = ext4_meta_bg_first_group(sb, group); 552 - if (first_group != group + 1 && 553 - first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) 554 - goto handle_itb; 555 - } 563 + if (meta_bg == 1) 564 + goto handle_itb; 556 565 557 566 block = start + ext4_bg_has_super(sb, group); 558 567 /* Copy all of the GDT blocks into the backup in this group */ ··· 595 614 } 596 615 597 616 handle_itb: 598 - /* Initialize group tables of the grop @group */ 617 + /* Initialize group tables of the group @group */ 599 618 if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) 600 619 goto handle_bb; 601 620 ··· 685 704 block = start; 686 705 } 687 706 688 - if (count) { 689 - err = set_flexbg_block_bitmap(sb, handle, 690 - flex_gd, 691 - EXT4_B2C(sbi, start), 692 - EXT4_B2C(sbi, 693 - start + count 694 - - 1)); 695 - if (err) 696 - goto out; 697 - } 707 + err = set_flexbg_block_bitmap(sb, handle, 708 + flex_gd, 709 + EXT4_B2C(sbi, start), 710 + EXT4_B2C(sbi, 711 + start + count 712 + - 1)); 713 + if (err) 714 + goto out; 698 715 } 699 716 700 717 out: ··· 931 952 } 932 953 933 954 /* 934 - * add_new_gdb_meta_bg is the sister of add_new_gdb. 955 + * If there is no available space in the existing block group descriptors for 956 + * the new block group and there are no reserved block group descriptors, then 957 + * the meta_bg feature will get enabled, and es->s_first_meta_bg will get set 958 + * to the first block group that is managed using meta_bg and s_first_meta_bg 959 + * must be a multiple of EXT4_DESC_PER_BLOCK(sb). 960 + * This function will be called when first group of meta_bg is added to bring 961 + * new group descriptors block of new added meta_bg. 935 962 */ 936 963 static int add_new_gdb_meta_bg(struct super_block *sb, 937 964 handle_t *handle, ext4_group_t group) { ··· 947 962 unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); 948 963 int err; 949 964 950 - gdblock = ext4_meta_bg_first_block_no(sb, group) + 951 - ext4_bg_has_super(sb, group); 965 + gdblock = ext4_group_first_block_no(sb, group) + 966 + ext4_bg_has_super(sb, group); 952 967 gdb_bh = ext4_sb_bread(sb, gdblock, 0); 953 968 if (IS_ERR(gdb_bh)) 954 969 return PTR_ERR(gdb_bh); ··· 1072 1087 for (i = 0; i < reserved_gdb; i++) { 1073 1088 int err2; 1074 1089 data = (__le32 *)primary[i]->b_data; 1075 - /* printk("reserving backup %lu[%u] = %lu\n", 1076 - primary[i]->b_blocknr, gdbackups, 1077 - blk + primary[i]->b_blocknr); */ 1078 1090 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); 1079 1091 err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]); 1080 1092 if (!err) ··· 1173 1191 ext4_group_first_block_no(sb, group)); 1174 1192 BUFFER_TRACE(bh, "get_write_access"); 1175 1193 if ((err = ext4_journal_get_write_access(handle, sb, bh, 1176 - EXT4_JTR_NONE))) 1194 + EXT4_JTR_NONE))) { 1195 + brelse(bh); 1177 1196 break; 1197 + } 1178 1198 lock_buffer(bh); 1179 1199 memcpy(bh->b_data, data, size); 1180 1200 if (rest) ··· 1585 1601 int gdb_num_end = ((group + flex_gd->count - 1) / 1586 1602 EXT4_DESC_PER_BLOCK(sb)); 1587 1603 int meta_bg = ext4_has_feature_meta_bg(sb); 1588 - sector_t old_gdb = 0; 1604 + sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - 1605 + ext4_group_first_block_no(sb, 0); 1589 1606 1590 1607 update_backups(sb, ext4_group_first_block_no(sb, 0), 1591 1608 (char *)es, sizeof(struct ext4_super_block), 0); ··· 1595 1610 1596 1611 gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, 1597 1612 gdb_num); 1598 - if (old_gdb == gdb_bh->b_blocknr) 1599 - continue; 1600 - update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, 1601 - gdb_bh->b_size, meta_bg); 1602 - old_gdb = gdb_bh->b_blocknr; 1613 + update_backups(sb, gdb_bh->b_blocknr - padding_blocks, 1614 + gdb_bh->b_data, gdb_bh->b_size, meta_bg); 1603 1615 } 1604 1616 } 1605 1617 exit: ··· 1962 1980 1963 1981 errout: 1964 1982 ret = ext4_journal_stop(handle); 1965 - if (!err) 1966 - err = ret; 1967 - return ret; 1983 + return err ? err : ret; 1968 1984 1969 1985 invalid_resize_inode: 1970 1986 ext4_error(sb, "corrupted/inconsistent resize inode");
+16 -1
fs/ext4/super.c
··· 768 768 */ 769 769 if (!sb_rdonly(sbi->s_sb) && journal) { 770 770 struct buffer_head *sbh = sbi->s_sbh; 771 - bool call_notify_err; 771 + bool call_notify_err = false; 772 + 772 773 handle = jbd2_journal_start(journal, 1); 773 774 if (IS_ERR(handle)) 774 775 goto write_directly; ··· 6445 6444 struct ext4_mount_options old_opts; 6446 6445 ext4_group_t g; 6447 6446 int err = 0; 6447 + int alloc_ctx; 6448 6448 #ifdef CONFIG_QUOTA 6449 6449 int enable_quota = 0; 6450 6450 int i, j; ··· 6486 6484 6487 6485 } 6488 6486 6487 + /* 6488 + * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause 6489 + * two calls to ext4_should_dioread_nolock() to return inconsistent 6490 + * values, triggering WARN_ON in ext4_add_complete_io(). we grab 6491 + * here s_writepages_rwsem to avoid race between writepages ops and 6492 + * remount. 6493 + */ 6494 + alloc_ctx = ext4_writepages_down_write(sb); 6489 6495 ext4_apply_options(fc, sb); 6496 + ext4_writepages_up_write(sb, alloc_ctx); 6490 6497 6491 6498 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ 6492 6499 test_opt(sb, JOURNAL_CHECKSUM)) { ··· 6713 6702 if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) && 6714 6703 sb_any_quota_suspended(sb)) 6715 6704 dquot_resume(sb, -1); 6705 + 6706 + alloc_ctx = ext4_writepages_down_write(sb); 6716 6707 sb->s_flags = old_sb_flags; 6717 6708 sbi->s_mount_opt = old_opts.s_mount_opt; 6718 6709 sbi->s_mount_opt2 = old_opts.s_mount_opt2; ··· 6723 6710 sbi->s_commit_interval = old_opts.s_commit_interval; 6724 6711 sbi->s_min_batch_time = old_opts.s_min_batch_time; 6725 6712 sbi->s_max_batch_time = old_opts.s_max_batch_time; 6713 + ext4_writepages_up_write(sb, alloc_ctx); 6714 + 6726 6715 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) 6727 6716 ext4_release_system_zone(sb); 6728 6717 #ifdef CONFIG_QUOTA
+11 -2
fs/jbd2/recovery.c
··· 289 289 journal_superblock_t * sb; 290 290 291 291 struct recovery_info info; 292 + errseq_t wb_err; 293 + struct address_space *mapping; 292 294 293 295 memset(&info, 0, sizeof(info)); 294 296 sb = journal->j_superblock; ··· 308 306 return 0; 309 307 } 310 308 309 + wb_err = 0; 310 + mapping = journal->j_fs_dev->bd_inode->i_mapping; 311 + errseq_check_and_advance(&mapping->wb_err, &wb_err); 311 312 err = do_one_pass(journal, &info, PASS_SCAN); 312 313 if (!err) 313 314 err = do_one_pass(journal, &info, PASS_REVOKE); ··· 332 327 333 328 jbd2_journal_clear_revoke(journal); 334 329 err2 = sync_blockdev(journal->j_fs_dev); 330 + if (!err) 331 + err = err2; 332 + err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err); 335 333 if (!err) 336 334 err = err2; 337 335 /* Make sure all replayed data is on permanent storage */ ··· 640 632 success = err; 641 633 printk(KERN_ERR 642 634 "JBD2: IO error %d recovering " 643 - "block %ld in log\n", 635 + "block %lu in log\n", 644 636 err, io_block); 645 637 } else { 646 638 unsigned long long blocknr; ··· 669 661 printk(KERN_ERR "JBD2: Invalid " 670 662 "checksum recovering " 671 663 "data block %llu in " 672 - "log\n", blocknr); 664 + "journal block %lu\n", 665 + blocknr, io_block); 673 666 block_error = 1; 674 667 goto skip_write; 675 668 }