Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
"The first two changes involve files outside of fs/ext4:

- submit_bh() can never return an error, so change it to return void,
and remove the unused checks from its callers

- fix I_DIRTY_TIME handling so it will be set even if the inode
already has I_DIRTY_INODE

Performance:

- Always enable i_version counter (as btrfs and xfs already do).
Remove some uneeded i_version bumps to avoid unnecessary nfs cache
invalidations

- Wake up journal waiters in FIFO order, to avoid some journal users
from not getting a journal handle for an unfairly long time

- In ext4_write_begin() allocate any necessary buffer heads before
starting the journal handle

- Don't try to prefetch the block allocation bitmaps for a read-only
file system

Bug Fixes:

- Fix a number of fast commit bugs, including resources leaks and out
of bound references in various error handling paths and/or if the
fast commit log is corrupted

- Avoid stopping the online resize early when expanding a file system
which is less than 16TiB to a size greater than 16TiB

- Fix apparent metadata corruption caused by a race with a metadata
buffer head getting migrated while it was trying to be read

- Mark the lazy initialization thread freezable to prevent suspend
failures

- Other miscellaneous bug fixes

Cleanups:

- Break up the incredibly long ext4_full_super() function by
refactoring to move code into more understandable, smaller
functions

- Remove the deprecated (and ignored) noacl and nouser_attr mount
option

- Factor out some common code in fast commit handling

- Other miscellaneous cleanups"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (53 commits)
ext4: fix potential out of bound read in ext4_fc_replay_scan()
ext4: factor out ext4_fc_get_tl()
ext4: introduce EXT4_FC_TAG_BASE_LEN helper
ext4: factor out ext4_free_ext_path()
ext4: remove unnecessary drop path references in mext_check_coverage()
ext4: update 'state->fc_regions_size' after successful memory allocation
ext4: fix potential memory leak in ext4_fc_record_regions()
ext4: fix potential memory leak in ext4_fc_record_modified_inode()
ext4: remove redundant checking in ext4_ioctl_checkpoint
jbd2: add miss release buffer head in fc_do_one_pass()
ext4: move DIOREAD_NOLOCK setting to ext4_set_def_opts()
ext4: remove useless local variable 'blocksize'
ext4: unify the ext4 super block loading operation
ext4: factor out ext4_journal_data_mode_check()
ext4: factor out ext4_load_and_init_journal()
ext4: factor out ext4_group_desc_init() and ext4_group_desc_free()
ext4: factor out ext4_geometry_check()
ext4: factor out ext4_check_feature_compatibility()
ext4: factor out ext4_init_metadata_csum()
ext4: factor out ext4_encoding_init()
...

+993 -806
+3
Documentation/filesystems/vfs.rst
··· 274 274 This is specifically for the inode itself being marked dirty, 275 275 not its data. If the update needs to be persisted by fdatasync(), 276 276 then I_DIRTY_DATASYNC will be set in the flags argument. 277 + I_DIRTY_TIME will be set in the flags in case lazytime is enabled 278 + and struct inode has times updated since the last ->dirty_inode 279 + call. 277 280 278 281 ``write_inode`` 279 282 this method is called when the VFS needs to write an inode to
+10 -13
fs/buffer.c
··· 52 52 #include "internal.h" 53 53 54 54 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 55 - static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, 56 - struct writeback_control *wbc); 55 + static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, 56 + struct writeback_control *wbc); 57 57 58 58 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) 59 59 ··· 2673 2673 bio_put(bio); 2674 2674 } 2675 2675 2676 - static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, 2677 - struct writeback_control *wbc) 2676 + static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, 2677 + struct writeback_control *wbc) 2678 2678 { 2679 2679 const enum req_op op = opf & REQ_OP_MASK; 2680 2680 struct bio *bio; ··· 2717 2717 } 2718 2718 2719 2719 submit_bio(bio); 2720 - return 0; 2721 2720 } 2722 2721 2723 - int submit_bh(blk_opf_t opf, struct buffer_head *bh) 2722 + void submit_bh(blk_opf_t opf, struct buffer_head *bh) 2724 2723 { 2725 - return submit_bh_wbc(opf, bh, NULL); 2724 + submit_bh_wbc(opf, bh, NULL); 2726 2725 } 2727 2726 EXPORT_SYMBOL(submit_bh); 2728 2727 ··· 2800 2801 */ 2801 2802 int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) 2802 2803 { 2803 - int ret = 0; 2804 - 2805 2804 WARN_ON(atomic_read(&bh->b_count) < 1); 2806 2805 lock_buffer(bh); 2807 2806 if (test_clear_buffer_dirty(bh)) { ··· 2814 2817 2815 2818 get_bh(bh); 2816 2819 bh->b_end_io = end_buffer_write_sync; 2817 - ret = submit_bh(REQ_OP_WRITE | op_flags, bh); 2820 + submit_bh(REQ_OP_WRITE | op_flags, bh); 2818 2821 wait_on_buffer(bh); 2819 - if (!ret && !buffer_uptodate(bh)) 2820 - ret = -EIO; 2822 + if (!buffer_uptodate(bh)) 2823 + return -EIO; 2821 2824 } else { 2822 2825 unlock_buffer(bh); 2823 2826 } 2824 - return ret; 2827 + return 0; 2825 2828 } 2826 2829 EXPORT_SYMBOL(__sync_dirty_buffer); 2827 2830
+1 -4
fs/ext4/ext4.h
··· 3592 3592 extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, 3593 3593 struct ext4_dir_entry_2 **parent_de, 3594 3594 int *retval); 3595 - extern int ext4_inline_data_fiemap(struct inode *inode, 3596 - struct fiemap_extent_info *fieinfo, 3597 - int *has_inline, __u64 start, __u64 len); 3598 3595 extern void *ext4_read_inline_link(struct inode *inode); 3599 3596 3600 3597 struct iomap; ··· 3710 3713 extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, 3711 3714 struct ext4_ext_path **, 3712 3715 int flags); 3713 - extern void ext4_ext_drop_refs(struct ext4_ext_path *); 3716 + extern void ext4_free_ext_path(struct ext4_ext_path *); 3714 3717 extern int ext4_ext_check_inode(struct inode *inode); 3715 3718 extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); 3716 3719 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+44 -63
fs/ext4/extents.c
··· 106 106 return 0; 107 107 } 108 108 109 + static void ext4_ext_drop_refs(struct ext4_ext_path *path) 110 + { 111 + int depth, i; 112 + 113 + if (!path) 114 + return; 115 + depth = path->p_depth; 116 + for (i = 0; i <= depth; i++, path++) { 117 + brelse(path->p_bh); 118 + path->p_bh = NULL; 119 + } 120 + } 121 + 122 + void ext4_free_ext_path(struct ext4_ext_path *path) 123 + { 124 + ext4_ext_drop_refs(path); 125 + kfree(path); 126 + } 127 + 109 128 /* 110 129 * Make sure 'handle' has at least 'check_cred' credits. If not, restart 111 130 * transaction with 'restart_cred' credits. The function drops i_data_sem ··· 655 636 ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED); 656 637 out: 657 638 up_read(&ei->i_data_sem); 658 - ext4_ext_drop_refs(path); 659 - kfree(path); 639 + ext4_free_ext_path(path); 660 640 return ret; 661 641 } 662 642 ··· 741 723 #define ext4_ext_show_leaf(inode, path) 742 724 #define ext4_ext_show_move(inode, path, newblock, level) 743 725 #endif 744 - 745 - void ext4_ext_drop_refs(struct ext4_ext_path *path) 746 - { 747 - int depth, i; 748 - 749 - if (!path) 750 - return; 751 - depth = path->p_depth; 752 - for (i = 0; i <= depth; i++, path++) { 753 - brelse(path->p_bh); 754 - path->p_bh = NULL; 755 - } 756 - } 757 726 758 727 /* 759 728 * ext4_ext_binsearch_idx: ··· 960 955 return path; 961 956 962 957 err: 963 - ext4_ext_drop_refs(path); 964 - kfree(path); 958 + ext4_free_ext_path(path); 965 959 if (orig_path) 966 960 *orig_path = NULL; 967 961 return ERR_PTR(ret); ··· 2178 2174 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 2179 2175 2180 2176 cleanup: 2181 - ext4_ext_drop_refs(npath); 2182 - kfree(npath); 2177 + ext4_free_ext_path(npath); 2183 2178 return err; 2184 2179 } 2185 2180 ··· 3064 3061 } 3065 3062 } 3066 3063 out: 3067 - ext4_ext_drop_refs(path); 3068 - kfree(path); 3064 + ext4_free_ext_path(path); 3069 3065 path = NULL; 3070 3066 if (err == -EAGAIN) 3071 3067 goto again; ··· 4377 4375 allocated = map->m_len; 4378 4376 ext4_ext_show_leaf(inode, path); 4379 4377 out: 4380 - ext4_ext_drop_refs(path); 4381 - kfree(path); 4378 + ext4_free_ext_path(path); 4382 4379 4383 4380 trace_ext4_ext_map_blocks_exit(inode, flags, map, 4384 4381 err ? err : allocated); ··· 5246 5245 break; 5247 5246 } 5248 5247 out: 5249 - ext4_ext_drop_refs(path); 5250 - kfree(path); 5248 + ext4_free_ext_path(path); 5251 5249 return ret; 5252 5250 } 5253 5251 ··· 5538 5538 EXT4_GET_BLOCKS_METADATA_NOFAIL); 5539 5539 } 5540 5540 5541 - ext4_ext_drop_refs(path); 5542 - kfree(path); 5541 + ext4_free_ext_path(path); 5543 5542 if (ret < 0) { 5544 5543 up_write(&EXT4_I(inode)->i_data_sem); 5545 5544 goto out_stop; 5546 5545 } 5547 5546 } else { 5548 - ext4_ext_drop_refs(path); 5549 - kfree(path); 5547 + ext4_free_ext_path(path); 5550 5548 } 5551 5549 5552 5550 ret = ext4_es_remove_extent(inode, offset_lblk, ··· 5764 5766 count -= len; 5765 5767 5766 5768 repeat: 5767 - ext4_ext_drop_refs(path1); 5768 - kfree(path1); 5769 - ext4_ext_drop_refs(path2); 5770 - kfree(path2); 5769 + ext4_free_ext_path(path1); 5770 + ext4_free_ext_path(path2); 5771 5771 path1 = path2 = NULL; 5772 5772 } 5773 5773 return replaced_count; ··· 5844 5848 } 5845 5849 5846 5850 out: 5847 - ext4_ext_drop_refs(path); 5848 - kfree(path); 5851 + ext4_free_ext_path(path); 5849 5852 5850 5853 return err ? err : mapped; 5851 5854 } ··· 5911 5916 ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]); 5912 5917 up_write(&EXT4_I(inode)->i_data_sem); 5913 5918 out: 5914 - ext4_ext_drop_refs(path); 5915 - kfree(path); 5919 + ext4_free_ext_path(path); 5916 5920 ext4_mark_inode_dirty(NULL, inode); 5917 5921 return ret; 5918 5922 } ··· 5929 5935 return; 5930 5936 ex = path[path->p_depth].p_ext; 5931 5937 if (!ex) { 5932 - ext4_ext_drop_refs(path); 5933 - kfree(path); 5938 + ext4_free_ext_path(path); 5934 5939 ext4_mark_inode_dirty(NULL, inode); 5935 5940 return; 5936 5941 } ··· 5942 5949 ext4_ext_dirty(NULL, inode, &path[path->p_depth]); 5943 5950 up_write(&EXT4_I(inode)->i_data_sem); 5944 5951 ext4_mark_inode_dirty(NULL, inode); 5945 - ext4_ext_drop_refs(path); 5946 - kfree(path); 5952 + ext4_free_ext_path(path); 5947 5953 } 5948 5954 } 5949 5955 ··· 5981 5989 return PTR_ERR(path); 5982 5990 ex = path[path->p_depth].p_ext; 5983 5991 if (!ex) { 5984 - ext4_ext_drop_refs(path); 5985 - kfree(path); 5992 + ext4_free_ext_path(path); 5986 5993 goto out; 5987 5994 } 5988 5995 end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); 5989 - ext4_ext_drop_refs(path); 5990 - kfree(path); 5996 + ext4_free_ext_path(path); 5991 5997 5992 5998 /* Count the number of data blocks */ 5993 5999 cur = 0; ··· 6015 6025 if (IS_ERR(path)) 6016 6026 goto out; 6017 6027 numblks += path->p_depth; 6018 - ext4_ext_drop_refs(path); 6019 - kfree(path); 6028 + ext4_free_ext_path(path); 6020 6029 while (cur < end) { 6021 6030 path = ext4_find_extent(inode, cur, NULL, 0); 6022 6031 if (IS_ERR(path)) 6023 6032 break; 6024 6033 ex = path[path->p_depth].p_ext; 6025 6034 if (!ex) { 6026 - ext4_ext_drop_refs(path); 6027 - kfree(path); 6035 + ext4_free_ext_path(path); 6028 6036 return 0; 6029 6037 } 6030 6038 cur = max(cur + 1, le32_to_cpu(ex->ee_block) + 6031 6039 ext4_ext_get_actual_len(ex)); 6032 6040 ret = skip_hole(inode, &cur); 6033 6041 if (ret < 0) { 6034 - ext4_ext_drop_refs(path); 6035 - kfree(path); 6042 + ext4_free_ext_path(path); 6036 6043 break; 6037 6044 } 6038 6045 path2 = ext4_find_extent(inode, cur, NULL, 0); 6039 6046 if (IS_ERR(path2)) { 6040 - ext4_ext_drop_refs(path); 6041 - kfree(path); 6047 + ext4_free_ext_path(path); 6042 6048 break; 6043 6049 } 6044 6050 for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { ··· 6048 6062 if (cmp1 != cmp2 && cmp2 != 0) 6049 6063 numblks++; 6050 6064 } 6051 - ext4_ext_drop_refs(path); 6052 - ext4_ext_drop_refs(path2); 6053 - kfree(path); 6054 - kfree(path2); 6065 + ext4_free_ext_path(path); 6066 + ext4_free_ext_path(path2); 6055 6067 } 6056 6068 6057 6069 out: ··· 6076 6092 return PTR_ERR(path); 6077 6093 ex = path[path->p_depth].p_ext; 6078 6094 if (!ex) { 6079 - ext4_ext_drop_refs(path); 6080 - kfree(path); 6095 + ext4_free_ext_path(path); 6081 6096 return 0; 6082 6097 } 6083 6098 end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); 6084 - ext4_ext_drop_refs(path); 6085 - kfree(path); 6099 + ext4_free_ext_path(path); 6086 6100 6087 6101 cur = 0; 6088 6102 while (cur < end) { ··· 6099 6117 ext4_fc_record_regions(inode->i_sb, inode->i_ino, 6100 6118 0, path[j].p_block, 1, 1); 6101 6119 } 6102 - ext4_ext_drop_refs(path); 6103 - kfree(path); 6120 + ext4_free_ext_path(path); 6104 6121 } 6105 6122 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 6106 6123 ext4_fc_record_regions(inode->i_sb, inode->i_ino,
+1 -2
fs/ext4/extents_status.c
··· 667 667 } 668 668 } 669 669 out: 670 - ext4_ext_drop_refs(path); 671 - kfree(path); 670 + ext4_free_ext_path(path); 672 671 } 673 672 674 673 static void ext4_es_insert_extent_ind_check(struct inode *inode,
+124 -86
fs/ext4/fast_commit.c
··· 229 229 finish_wait(wq, &wait.wq_entry); 230 230 } 231 231 232 + static bool ext4_fc_disabled(struct super_block *sb) 233 + { 234 + return (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 235 + (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)); 236 + } 237 + 232 238 /* 233 239 * Inform Ext4's fast about start of an inode update 234 240 * ··· 246 240 { 247 241 struct ext4_inode_info *ei = EXT4_I(inode); 248 242 249 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 250 - (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 243 + if (ext4_fc_disabled(inode->i_sb)) 251 244 return; 252 245 253 246 restart: ··· 270 265 { 271 266 struct ext4_inode_info *ei = EXT4_I(inode); 272 267 273 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 274 - (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 268 + if (ext4_fc_disabled(inode->i_sb)) 275 269 return; 276 270 277 271 if (atomic_dec_and_test(&ei->i_fc_updates)) ··· 287 283 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 288 284 struct ext4_fc_dentry_update *fc_dentry; 289 285 290 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 291 - (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) 286 + if (ext4_fc_disabled(inode->i_sb)) 292 287 return; 293 288 294 289 restart: ··· 340 337 struct ext4_sb_info *sbi = EXT4_SB(sb); 341 338 tid_t tid; 342 339 343 - if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 344 - (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) 340 + if (ext4_fc_disabled(sb)) 345 341 return; 346 342 347 343 ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ··· 495 493 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) 496 494 { 497 495 struct inode *inode = d_inode(dentry); 498 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 499 496 500 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 501 - (sbi->s_mount_state & EXT4_FC_REPLAY)) 497 + if (ext4_fc_disabled(inode->i_sb)) 502 498 return; 503 499 504 500 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) ··· 522 522 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) 523 523 { 524 524 struct inode *inode = d_inode(dentry); 525 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 526 525 527 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 528 - (sbi->s_mount_state & EXT4_FC_REPLAY)) 526 + if (ext4_fc_disabled(inode->i_sb)) 529 527 return; 530 528 531 529 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) ··· 549 551 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) 550 552 { 551 553 struct inode *inode = d_inode(dentry); 552 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 553 554 554 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 555 - (sbi->s_mount_state & EXT4_FC_REPLAY)) 555 + if (ext4_fc_disabled(inode->i_sb)) 556 556 return; 557 557 558 558 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) ··· 572 576 573 577 void ext4_fc_track_inode(handle_t *handle, struct inode *inode) 574 578 { 575 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 576 579 int ret; 577 580 578 581 if (S_ISDIR(inode->i_mode)) 582 + return; 583 + 584 + if (ext4_fc_disabled(inode->i_sb)) 579 585 return; 580 586 581 587 if (ext4_should_journal_data(inode)) { ··· 585 587 EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); 586 588 return; 587 589 } 588 - 589 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 590 - (sbi->s_mount_state & EXT4_FC_REPLAY)) 591 - return; 592 590 593 591 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 594 592 return; ··· 628 634 void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, 629 635 ext4_lblk_t end) 630 636 { 631 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 632 637 struct __track_range_args args; 633 638 int ret; 634 639 635 640 if (S_ISDIR(inode->i_mode)) 636 641 return; 637 642 638 - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || 639 - (sbi->s_mount_state & EXT4_FC_REPLAY)) 643 + if (ext4_fc_disabled(inode->i_sb)) 640 644 return; 641 645 642 646 if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) ··· 702 710 * After allocating len, we should have space at least for a 0 byte 703 711 * padding. 704 712 */ 705 - if (len + sizeof(struct ext4_fc_tl) > bsize) 713 + if (len + EXT4_FC_TAG_BASE_LEN > bsize) 706 714 return NULL; 707 715 708 - if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { 716 + if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) { 709 717 /* 710 718 * Only allocate from current buffer if we have enough space for 711 719 * this request AND we have space to add a zero byte padding. ··· 722 730 /* Need to add PAD tag */ 723 731 tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); 724 732 tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 725 - pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); 733 + pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN; 726 734 tl->fc_len = cpu_to_le16(pad_len); 727 735 if (crc) 728 - *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); 736 + *crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN); 729 737 if (pad_len > 0) 730 738 ext4_fc_memzero(sb, tl + 1, pad_len, crc); 731 739 ext4_fc_submit_bh(sb, false); ··· 767 775 * ext4_fc_reserve_space takes care of allocating an extra block if 768 776 * there's no enough space on this block for accommodating this tail. 769 777 */ 770 - dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); 778 + dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc); 771 779 if (!dst) 772 780 return -ENOSPC; 773 781 ··· 777 785 tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); 778 786 sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 779 787 780 - ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); 781 - dst += sizeof(tl); 788 + ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc); 789 + dst += EXT4_FC_TAG_BASE_LEN; 782 790 tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 783 791 ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); 784 792 dst += sizeof(tail.fc_tid); ··· 800 808 struct ext4_fc_tl tl; 801 809 u8 *dst; 802 810 803 - dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); 811 + dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc); 804 812 if (!dst) 805 813 return false; 806 814 807 815 tl.fc_tag = cpu_to_le16(tag); 808 816 tl.fc_len = cpu_to_le16(len); 809 817 810 - ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 811 - ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); 818 + ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); 819 + ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc); 812 820 813 821 return true; 814 822 } ··· 820 828 struct ext4_fc_dentry_info fcd; 821 829 struct ext4_fc_tl tl; 822 830 int dlen = fc_dentry->fcd_name.len; 823 - u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, 824 - crc); 831 + u8 *dst = ext4_fc_reserve_space(sb, 832 + EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); 825 833 826 834 if (!dst) 827 835 return false; ··· 830 838 fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); 831 839 tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); 832 840 tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 833 - ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); 834 - dst += sizeof(tl); 841 + ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); 842 + dst += EXT4_FC_TAG_BASE_LEN; 835 843 ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); 836 844 dst += sizeof(fcd); 837 845 ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc); ··· 866 874 tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 867 875 tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 868 876 877 + ret = -ECANCELED; 869 878 dst = ext4_fc_reserve_space(inode->i_sb, 870 - sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); 879 + EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc); 871 880 if (!dst) 872 - return -ECANCELED; 881 + goto err; 873 882 874 - if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) 875 - return -ECANCELED; 876 - dst += sizeof(tl); 883 + if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc)) 884 + goto err; 885 + dst += EXT4_FC_TAG_BASE_LEN; 877 886 if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) 878 - return -ECANCELED; 887 + goto err; 879 888 dst += sizeof(fc_inode); 880 889 if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), 881 890 inode_len, crc)) 882 - return -ECANCELED; 883 - 884 - return 0; 891 + goto err; 892 + ret = 0; 893 + err: 894 + brelse(iloc.bh); 895 + return ret; 885 896 } 886 897 887 898 /* ··· 1338 1343 }; 1339 1344 1340 1345 static inline void tl_to_darg(struct dentry_info_args *darg, 1341 - struct ext4_fc_tl *tl, u8 *val) 1346 + struct ext4_fc_tl *tl, u8 *val) 1342 1347 { 1343 1348 struct ext4_fc_dentry_info fcd; 1344 1349 ··· 1347 1352 darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); 1348 1353 darg->ino = le32_to_cpu(fcd.fc_ino); 1349 1354 darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); 1350 - darg->dname_len = le16_to_cpu(tl->fc_len) - 1351 - sizeof(struct ext4_fc_dentry_info); 1355 + darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info); 1356 + } 1357 + 1358 + static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val) 1359 + { 1360 + memcpy(tl, val, EXT4_FC_TAG_BASE_LEN); 1361 + tl->fc_len = le16_to_cpu(tl->fc_len); 1362 + tl->fc_tag = le16_to_cpu(tl->fc_tag); 1352 1363 } 1353 1364 1354 1365 /* Unlink replay function */ ··· 1492 1491 if (state->fc_modified_inodes[i] == ino) 1493 1492 return 0; 1494 1493 if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { 1495 - state->fc_modified_inodes = krealloc( 1496 - state->fc_modified_inodes, 1494 + int *fc_modified_inodes; 1495 + 1496 + fc_modified_inodes = krealloc(state->fc_modified_inodes, 1497 1497 sizeof(int) * (state->fc_modified_inodes_size + 1498 1498 EXT4_FC_REPLAY_REALLOC_INCREMENT), 1499 1499 GFP_KERNEL); 1500 - if (!state->fc_modified_inodes) 1500 + if (!fc_modified_inodes) 1501 1501 return -ENOMEM; 1502 + state->fc_modified_inodes = fc_modified_inodes; 1502 1503 state->fc_modified_inodes_size += 1503 1504 EXT4_FC_REPLAY_REALLOC_INCREMENT; 1504 1505 } ··· 1519 1516 struct ext4_inode *raw_fc_inode; 1520 1517 struct inode *inode = NULL; 1521 1518 struct ext4_iloc iloc; 1522 - int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag); 1519 + int inode_len, ino, ret, tag = tl->fc_tag; 1523 1520 struct ext4_extent_header *eh; 1524 1521 1525 1522 memcpy(&fc_inode, val, sizeof(fc_inode)); ··· 1544 1541 if (ret) 1545 1542 goto out; 1546 1543 1547 - inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode); 1544 + inode_len = tl->fc_len - sizeof(struct ext4_fc_inode); 1548 1545 raw_inode = ext4_raw_inode(&iloc); 1549 1546 1550 1547 memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); ··· 1685 1682 if (replay && state->fc_regions_used != state->fc_regions_valid) 1686 1683 state->fc_regions_used = state->fc_regions_valid; 1687 1684 if (state->fc_regions_used == state->fc_regions_size) { 1685 + struct ext4_fc_alloc_region *fc_regions; 1686 + 1687 + fc_regions = krealloc(state->fc_regions, 1688 + sizeof(struct ext4_fc_alloc_region) * 1689 + (state->fc_regions_size + 1690 + EXT4_FC_REPLAY_REALLOC_INCREMENT), 1691 + GFP_KERNEL); 1692 + if (!fc_regions) 1693 + return -ENOMEM; 1688 1694 state->fc_regions_size += 1689 1695 EXT4_FC_REPLAY_REALLOC_INCREMENT; 1690 - state->fc_regions = krealloc( 1691 - state->fc_regions, 1692 - state->fc_regions_size * 1693 - sizeof(struct ext4_fc_alloc_region), 1694 - GFP_KERNEL); 1695 - if (!state->fc_regions) 1696 - return -ENOMEM; 1696 + state->fc_regions = fc_regions; 1697 1697 } 1698 1698 region = &state->fc_regions[state->fc_regions_used++]; 1699 1699 region->ino = ino; ··· 1776 1770 ret = ext4_ext_insert_extent( 1777 1771 NULL, inode, &path, &newex, 0); 1778 1772 up_write((&EXT4_I(inode)->i_data_sem)); 1779 - ext4_ext_drop_refs(path); 1780 - kfree(path); 1773 + ext4_free_ext_path(path); 1781 1774 if (ret) 1782 1775 goto out; 1783 1776 goto next; ··· 1931 1926 for (j = 0; j < path->p_depth; j++) 1932 1927 ext4_mb_mark_bb(inode->i_sb, 1933 1928 path[j].p_block, 1, 1); 1934 - ext4_ext_drop_refs(path); 1935 - kfree(path); 1929 + ext4_free_ext_path(path); 1936 1930 } 1937 1931 cur += ret; 1938 1932 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, ··· 1974 1970 sbi->s_mount_state &= ~EXT4_FC_REPLAY; 1975 1971 kfree(sbi->s_fc_replay_state.fc_regions); 1976 1972 kfree(sbi->s_fc_replay_state.fc_modified_inodes); 1973 + } 1974 + 1975 + static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl, 1976 + u8 *val, u8 *end) 1977 + { 1978 + if (val + tl->fc_len > end) 1979 + return false; 1980 + 1981 + /* Here only check ADD_RANGE/TAIL/HEAD which will read data when do 1982 + * journal rescan before do CRC check. Other tags length check will 1983 + * rely on CRC check. 1984 + */ 1985 + switch (tl->fc_tag) { 1986 + case EXT4_FC_TAG_ADD_RANGE: 1987 + return (sizeof(struct ext4_fc_add_range) == tl->fc_len); 1988 + case EXT4_FC_TAG_TAIL: 1989 + return (sizeof(struct ext4_fc_tail) <= tl->fc_len); 1990 + case EXT4_FC_TAG_HEAD: 1991 + return (sizeof(struct ext4_fc_head) == tl->fc_len); 1992 + case EXT4_FC_TAG_DEL_RANGE: 1993 + case EXT4_FC_TAG_LINK: 1994 + case EXT4_FC_TAG_UNLINK: 1995 + case EXT4_FC_TAG_CREAT: 1996 + case EXT4_FC_TAG_INODE: 1997 + case EXT4_FC_TAG_PAD: 1998 + default: 1999 + return true; 2000 + } 1977 2001 } 1978 2002 1979 2003 /* ··· 2060 2028 } 2061 2029 2062 2030 state->fc_replay_expected_off++; 2063 - for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { 2064 - memcpy(&tl, cur, sizeof(tl)); 2065 - val = cur + sizeof(tl); 2031 + for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN; 2032 + cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { 2033 + ext4_fc_get_tl(&tl, cur); 2034 + val = cur + EXT4_FC_TAG_BASE_LEN; 2035 + if (!ext4_fc_tag_len_isvalid(&tl, val, end)) { 2036 + ret = state->fc_replay_num_tags ? 2037 + JBD2_FC_REPLAY_STOP : -ECANCELED; 2038 + goto out_err; 2039 + } 2066 2040 ext4_debug("Scan phase, tag:%s, blk %lld\n", 2067 - tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr); 2068 - switch (le16_to_cpu(tl.fc_tag)) { 2041 + tag2str(tl.fc_tag), bh->b_blocknr); 2042 + switch (tl.fc_tag) { 2069 2043 case EXT4_FC_TAG_ADD_RANGE: 2070 2044 memcpy(&ext, val, sizeof(ext)); 2071 2045 ex = (struct ext4_extent *)&ext.fc_ex; ··· 2091 2053 case EXT4_FC_TAG_PAD: 2092 2054 state->fc_cur_tag++; 2093 2055 state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2094 - sizeof(tl) + le16_to_cpu(tl.fc_len)); 2056 + EXT4_FC_TAG_BASE_LEN + tl.fc_len); 2095 2057 break; 2096 2058 case EXT4_FC_TAG_TAIL: 2097 2059 state->fc_cur_tag++; 2098 2060 memcpy(&tail, val, sizeof(tail)); 2099 2061 state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2100 - sizeof(tl) + 2062 + EXT4_FC_TAG_BASE_LEN + 2101 2063 offsetof(struct ext4_fc_tail, 2102 2064 fc_crc)); 2103 2065 if (le32_to_cpu(tail.fc_tid) == expected_tid && ··· 2124 2086 } 2125 2087 state->fc_cur_tag++; 2126 2088 state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 2127 - sizeof(tl) + le16_to_cpu(tl.fc_len)); 2089 + EXT4_FC_TAG_BASE_LEN + tl.fc_len); 2128 2090 break; 2129 2091 default: 2130 2092 ret = state->fc_replay_num_tags ? ··· 2179 2141 start = (u8 *)bh->b_data; 2180 2142 end = (__u8 *)bh->b_data + journal->j_blocksize - 1; 2181 2143 2182 - for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { 2183 - memcpy(&tl, cur, sizeof(tl)); 2184 - val = cur + sizeof(tl); 2144 + for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN; 2145 + cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { 2146 + ext4_fc_get_tl(&tl, cur); 2147 + val = cur + EXT4_FC_TAG_BASE_LEN; 2185 2148 2186 2149 if (state->fc_replay_num_tags == 0) { 2187 2150 ret = JBD2_FC_REPLAY_STOP; 2188 2151 ext4_fc_set_bitmaps_and_counters(sb); 2189 2152 break; 2190 2153 } 2191 - ext4_debug("Replay phase, tag:%s\n", 2192 - tag2str(le16_to_cpu(tl.fc_tag))); 2154 + 2155 + ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag)); 2193 2156 state->fc_replay_num_tags--; 2194 - switch (le16_to_cpu(tl.fc_tag)) { 2157 + switch (tl.fc_tag) { 2195 2158 case EXT4_FC_TAG_LINK: 2196 2159 ret = ext4_fc_replay_link(sb, &tl, val); 2197 2160 break; ··· 2213 2174 break; 2214 2175 case EXT4_FC_TAG_PAD: 2215 2176 trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, 2216 - le16_to_cpu(tl.fc_len), 0); 2177 + tl.fc_len, 0); 2217 2178 break; 2218 2179 case EXT4_FC_TAG_TAIL: 2219 - trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0, 2220 - le16_to_cpu(tl.fc_len), 0); 2180 + trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 2181 + 0, tl.fc_len, 0); 2221 2182 memcpy(&tail, val, sizeof(tail)); 2222 2183 WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); 2223 2184 break; 2224 2185 case EXT4_FC_TAG_HEAD: 2225 2186 break; 2226 2187 default: 2227 - trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0, 2228 - le16_to_cpu(tl.fc_len), 0); 2188 + trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0); 2229 2189 ret = -ECANCELED; 2230 2190 break; 2231 2191 }
+3
fs/ext4/fast_commit.h
··· 70 70 __le32 fc_crc; 71 71 }; 72 72 73 + /* Tag base length */ 74 + #define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl)) 75 + 73 76 /* 74 77 * Fast commit status codes 75 78 */
+6
fs/ext4/file.c
··· 543 543 ret = -EAGAIN; 544 544 goto out; 545 545 } 546 + /* 547 + * Make sure inline data cannot be created anymore since we are going 548 + * to allocate blocks for DIO. We know the inode does not have any 549 + * inline data now because ext4_dio_supported() checked for that. 550 + */ 551 + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 546 552 547 553 offset = iocb->ki_pos; 548 554 count = ret;
+12 -5
fs/ext4/inode.c
··· 1188 1188 page = grab_cache_page_write_begin(mapping, index); 1189 1189 if (!page) 1190 1190 return -ENOMEM; 1191 + /* 1192 + * The same as page allocation, we prealloc buffer heads before 1193 + * starting the handle. 1194 + */ 1195 + if (!page_has_buffers(page)) 1196 + create_empty_buffers(page, inode->i_sb->s_blocksize, 0); 1197 + 1191 1198 unlock_page(page); 1192 1199 1193 1200 retry_journal: ··· 5349 5342 int error, rc = 0; 5350 5343 int orphan = 0; 5351 5344 const unsigned int ia_valid = attr->ia_valid; 5345 + bool inc_ivers = true; 5352 5346 5353 5347 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 5354 5348 return -EIO; ··· 5433 5425 return -EINVAL; 5434 5426 } 5435 5427 5436 - if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) 5437 - inode_inc_iversion(inode); 5428 + if (attr->ia_size == inode->i_size) 5429 + inc_ivers = false; 5438 5430 5439 5431 if (shrink) { 5440 5432 if (ext4_should_order_data(inode)) { ··· 5536 5528 } 5537 5529 5538 5530 if (!error) { 5531 + if (inc_ivers) 5532 + inode_inc_iversion(inode); 5539 5533 setattr_copy(mnt_userns, inode, attr); 5540 5534 mark_inode_dirty(inode); 5541 5535 } ··· 5777 5767 return -EIO; 5778 5768 } 5779 5769 ext4_fc_track_inode(handle, inode); 5780 - 5781 - if (IS_I_VERSION(inode)) 5782 - inode_inc_iversion(inode); 5783 5770 5784 5771 /* the do_update_inode consumes one bh->b_count */ 5785 5772 get_bh(iloc->bh);
+4 -3
fs/ext4/ioctl.c
··· 452 452 swap_inode_data(inode, inode_bl); 453 453 454 454 inode->i_ctime = inode_bl->i_ctime = current_time(inode); 455 + inode_inc_iversion(inode); 455 456 456 457 inode->i_generation = prandom_u32(); 457 458 inode_bl->i_generation = prandom_u32(); ··· 666 665 ext4_set_inode_flags(inode, false); 667 666 668 667 inode->i_ctime = current_time(inode); 668 + inode_inc_iversion(inode); 669 669 670 670 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 671 671 flags_err: ··· 777 775 778 776 EXT4_I(inode)->i_projid = kprojid; 779 777 inode->i_ctime = current_time(inode); 778 + inode_inc_iversion(inode); 780 779 out_dirty: 781 780 rc = ext4_mark_iloc_dirty(handle, inode, &iloc); 782 781 if (!err) ··· 1063 1060 if (!EXT4_SB(sb)->s_journal) 1064 1061 return -ENODEV; 1065 1062 1066 - if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID) 1067 - return -EINVAL; 1068 - 1069 1063 if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && 1070 1064 !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev)) 1071 1065 return -EOPNOTSUPP; ··· 1257 1257 err = ext4_reserve_inode_write(handle, inode, &iloc); 1258 1258 if (err == 0) { 1259 1259 inode->i_ctime = current_time(inode); 1260 + inode_inc_iversion(inode); 1260 1261 inode->i_generation = generation; 1261 1262 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 1262 1263 }
+1 -2
fs/ext4/migrate.c
··· 56 56 retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); 57 57 err_out: 58 58 up_write((&EXT4_I(inode)->i_data_sem)); 59 - ext4_ext_drop_refs(path); 60 - kfree(path); 59 + ext4_free_ext_path(path); 61 60 lb->first_pblock = 0; 62 61 return retval; 63 62 }
+10 -16
fs/ext4/move_extent.c
··· 32 32 if (IS_ERR(path)) 33 33 return PTR_ERR(path); 34 34 if (path[ext_depth(inode)].p_ext == NULL) { 35 - ext4_ext_drop_refs(path); 36 - kfree(path); 35 + ext4_free_ext_path(path); 37 36 *ppath = NULL; 38 37 return -ENODATA; 39 38 } ··· 102 103 if (unwritten != ext4_ext_is_unwritten(ext)) 103 104 goto out; 104 105 from += ext4_ext_get_actual_len(ext); 105 - ext4_ext_drop_refs(path); 106 106 } 107 107 ret = 1; 108 108 out: 109 - ext4_ext_drop_refs(path); 110 - kfree(path); 109 + ext4_free_ext_path(path); 111 110 return ret; 112 111 } 113 112 ··· 469 472 if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) 470 473 return -EPERM; 471 474 472 - /* Ext4 move extent does not support swapfile */ 475 + /* Ext4 move extent does not support swap files */ 473 476 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { 474 - ext4_debug("ext4 move extent: The argument files should " 475 - "not be swapfile [ino:orig %lu, donor %lu]\n", 477 + ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n", 476 478 orig_inode->i_ino, donor_inode->i_ino); 477 - return -EBUSY; 479 + return -ETXTBSY; 478 480 } 479 481 480 482 if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) { 481 - ext4_debug("ext4 move extent: The argument files should " 482 - "not be quota files [ino:orig %lu, donor %lu]\n", 483 + ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n", 483 484 orig_inode->i_ino, donor_inode->i_ino); 484 - return -EBUSY; 485 + return -EOPNOTSUPP; 485 486 } 486 487 487 488 /* Ext4 move extent supports only extent based file */ ··· 626 631 if (ret) 627 632 goto out; 628 633 ex = path[path->p_depth].p_ext; 629 - next_blk = ext4_ext_next_allocated_block(path); 630 634 cur_blk = le32_to_cpu(ex->ee_block); 631 635 cur_len = ext4_ext_get_actual_len(ex); 632 636 /* Check hole before the start pos */ 633 637 if (cur_blk + cur_len - 1 < o_start) { 638 + next_blk = ext4_ext_next_allocated_block(path); 634 639 if (next_blk == EXT_MAX_BLOCKS) { 635 640 ret = -ENODATA; 636 641 goto out; ··· 658 663 donor_page_index = d_start >> (PAGE_SHIFT - 659 664 donor_inode->i_blkbits); 660 665 offset_in_page = o_start % blocks_per_page; 661 - if (cur_len > blocks_per_page- offset_in_page) 666 + if (cur_len > blocks_per_page - offset_in_page) 662 667 cur_len = blocks_per_page - offset_in_page; 663 668 /* 664 669 * Up semaphore to avoid following problems: ··· 689 694 ext4_discard_preallocations(donor_inode, 0); 690 695 } 691 696 692 - ext4_ext_drop_refs(path); 693 - kfree(path); 697 + ext4_free_ext_path(path); 694 698 ext4_double_up_write_data_sem(orig_inode, donor_inode); 695 699 unlock_two_nondirectories(orig_inode, donor_inode); 696 700
+11 -6
fs/ext4/namei.c
··· 85 85 return bh; 86 86 inode->i_size += inode->i_sb->s_blocksize; 87 87 EXT4_I(inode)->i_disksize = inode->i_size; 88 + err = ext4_mark_inode_dirty(handle, inode); 89 + if (err) 90 + goto out; 88 91 BUFFER_TRACE(bh, "get_write_access"); 89 92 err = ext4_journal_get_write_access(handle, inode->i_sb, bh, 90 93 EXT4_JTR_NONE); 91 - if (err) { 92 - brelse(bh); 93 - ext4_std_error(inode->i_sb, err); 94 - return ERR_PTR(err); 95 - } 94 + if (err) 95 + goto out; 96 96 return bh; 97 + 98 + out: 99 + brelse(bh); 100 + ext4_std_error(inode->i_sb, err); 101 + return ERR_PTR(err); 97 102 } 98 103 99 104 static int ext4_dx_csum_verify(struct inode *inode, ··· 131 126 struct ext4_dir_entry *dirent; 132 127 int is_dx_block = 0; 133 128 134 - if (block >= inode->i_size) { 129 + if (block >= inode->i_size >> inode->i_blkbits) { 135 130 ext4_error_inode(inode, func, line, block, 136 131 "Attempting to read directory block (%u) that is past i_size (%llu)", 137 132 block, inode->i_size);
+1 -1
fs/ext4/resize.c
··· 2122 2122 goto out; 2123 2123 } 2124 2124 2125 - if (ext4_blocks_count(es) == n_blocks_count) 2125 + if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0) 2126 2126 goto out; 2127 2127 2128 2128 err = ext4_alloc_flex_bg_array(sb, n_group + 1);
+687 -556
fs/ext4/super.c
··· 205 205 206 206 int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait) 207 207 { 208 - if (trylock_buffer(bh)) { 209 - if (wait) 210 - return ext4_read_bh(bh, op_flags, NULL); 208 + lock_buffer(bh); 209 + if (!wait) { 211 210 ext4_read_bh_nowait(bh, op_flags, NULL); 212 211 return 0; 213 212 } 214 - if (wait) { 215 - wait_on_buffer(bh); 216 - if (buffer_uptodate(bh)) 217 - return 0; 218 - return -EIO; 219 - } 220 - return 0; 213 + return ext4_read_bh(bh, op_flags, NULL); 221 214 } 222 215 223 216 /* ··· 257 264 struct buffer_head *bh = sb_getblk_gfp(sb, block, 0); 258 265 259 266 if (likely(bh)) { 260 - ext4_read_bh_lock(bh, REQ_RAHEAD, false); 267 + if (trylock_buffer(bh)) 268 + ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL); 261 269 brelse(bh); 262 270 } 263 271 } ··· 1570 1576 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1571 1577 Opt_resgid, Opt_resuid, Opt_sb, 1572 1578 Opt_nouid32, Opt_debug, Opt_removed, 1573 - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1579 + Opt_user_xattr, Opt_acl, 1574 1580 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, 1575 1581 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, 1576 1582 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, ··· 1579 1585 Opt_inlinecrypt, 1580 1586 Opt_usrjquota, Opt_grpjquota, Opt_quota, 1581 1587 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1582 - Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, 1588 + Opt_usrquota, Opt_grpquota, Opt_prjquota, 1583 1589 Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never, 1584 1590 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, 1585 1591 Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize, ··· 1656 1662 fsparam_flag ("oldalloc", Opt_removed), 1657 1663 fsparam_flag ("orlov", Opt_removed), 1658 1664 fsparam_flag ("user_xattr", Opt_user_xattr), 1659 - fsparam_flag ("nouser_xattr", Opt_nouser_xattr), 1660 1665 fsparam_flag ("acl", Opt_acl), 1661 - fsparam_flag ("noacl", Opt_noacl), 1662 1666 fsparam_flag ("norecovery", Opt_noload), 1663 1667 fsparam_flag ("noload", Opt_noload), 1664 1668 fsparam_flag ("bh", Opt_removed), ··· 1686 1694 fsparam_flag ("barrier", Opt_barrier), 1687 1695 fsparam_u32 ("barrier", Opt_barrier), 1688 1696 fsparam_flag ("nobarrier", Opt_nobarrier), 1689 - fsparam_flag ("i_version", Opt_i_version), 1697 + fsparam_flag ("i_version", Opt_removed), 1690 1698 fsparam_flag ("dax", Opt_dax), 1691 1699 fsparam_enum ("dax", Opt_dax_type, ext4_param_dax), 1692 1700 fsparam_u32 ("stripe", Opt_stripe), ··· 1806 1814 {Opt_journal_ioprio, 0, MOPT_NO_EXT2}, 1807 1815 {Opt_data, 0, MOPT_NO_EXT2}, 1808 1816 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1809 - {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1810 1817 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1811 1818 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, 1812 - {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, 1813 1819 #else 1814 1820 {Opt_acl, 0, MOPT_NOSUPPORT}, 1815 - {Opt_noacl, 0, MOPT_NOSUPPORT}, 1816 1821 #endif 1817 1822 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, 1818 1823 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, ··· 2109 2120 else 2110 2121 return note_qf_name(fc, GRPQUOTA, param); 2111 2122 #endif 2112 - case Opt_noacl: 2113 - case Opt_nouser_xattr: 2114 - ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5"); 2115 - break; 2116 2123 case Opt_sb: 2117 2124 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { 2118 2125 ext4_msg(NULL, KERN_WARNING, ··· 2124 2139 return 0; 2125 2140 case Opt_abort: 2126 2141 ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); 2127 - return 0; 2128 - case Opt_i_version: 2129 - ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); 2130 - ext4_msg(NULL, KERN_WARNING, "Use iversion instead\n"); 2131 - ctx_set_flags(ctx, SB_I_VERSION); 2132 2142 return 0; 2133 2143 case Opt_inlinecrypt: 2134 2144 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT ··· 2794 2814 sb->s_flags &= ~ctx->mask_s_flags; 2795 2815 sb->s_flags |= ctx->vals_s_flags; 2796 2816 2797 - /* 2798 - * i_version differs from common mount option iversion so we have 2799 - * to let vfs know that it was set, otherwise it would get cleared 2800 - * on remount 2801 - */ 2802 - if (ctx->mask_s_flags & SB_I_VERSION) 2803 - fc->sb_flags |= SB_I_VERSION; 2804 - 2805 2817 #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; }) 2806 2818 APPLY(s_commit_interval); 2807 2819 APPLY(s_stripe); ··· 2942 2970 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); 2943 2971 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) 2944 2972 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); 2945 - if (sb->s_flags & SB_I_VERSION) 2946 - SEQ_OPTS_PUTS("i_version"); 2947 2973 if (nodefs || sbi->s_stripe) 2948 2974 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); 2949 2975 if (nodefs || EXT4_MOUNT_DATA_FLAGS & ··· 3737 3767 unsigned long next_wakeup, cur; 3738 3768 3739 3769 BUG_ON(NULL == eli); 3770 + set_freezable(); 3740 3771 3741 3772 cont_thread: 3742 3773 while (true) { ··· 3953 3982 goto out; 3954 3983 } 3955 3984 3956 - if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) && 3957 - (first_not_zeroed == ngroups || sb_rdonly(sb) || 3958 - !test_opt(sb, INIT_INODE_TABLE))) 3985 + if (sb_rdonly(sb) || 3986 + (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) && 3987 + (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE)))) 3959 3988 goto out; 3960 3989 3961 3990 elr = ext4_li_request_new(sb, first_not_zeroed); ··· 4282 4311 return NULL; 4283 4312 } 4284 4313 4285 - static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) 4314 + static void ext4_set_def_opts(struct super_block *sb, 4315 + struct ext4_super_block *es) 4286 4316 { 4287 - struct buffer_head *bh, **group_desc; 4288 - struct ext4_super_block *es = NULL; 4289 - struct ext4_sb_info *sbi = EXT4_SB(sb); 4290 - struct flex_groups **flex_groups; 4291 - ext4_fsblk_t block; 4292 - ext4_fsblk_t logical_sb_block; 4293 - unsigned long offset = 0; 4294 4317 unsigned long def_mount_opts; 4295 - struct inode *root; 4296 - int ret = -ENOMEM; 4297 - int blocksize, clustersize; 4298 - unsigned int db_count; 4299 - unsigned int i; 4300 - int needs_recovery, has_huge_files; 4301 - __u64 blocks_count; 4302 - int err = 0; 4303 - ext4_group_t first_not_zeroed; 4304 - struct ext4_fs_context *ctx = fc->fs_private; 4305 - int silent = fc->sb_flags & SB_SILENT; 4306 - 4307 - /* Set defaults for the variables that will be set during parsing */ 4308 - if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) 4309 - ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4310 - 4311 - sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 4312 - sbi->s_sectors_written_start = 4313 - part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); 4314 - 4315 - /* -EINVAL is default */ 4316 - ret = -EINVAL; 4317 - blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 4318 - if (!blocksize) { 4319 - ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 4320 - goto out_fail; 4321 - } 4322 - 4323 - /* 4324 - * The ext4 superblock will not be buffer aligned for other than 1kB 4325 - * block sizes. We need to calculate the offset from buffer start. 4326 - */ 4327 - if (blocksize != EXT4_MIN_BLOCK_SIZE) { 4328 - logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; 4329 - offset = do_div(logical_sb_block, blocksize); 4330 - } else { 4331 - logical_sb_block = sbi->s_sb_block; 4332 - } 4333 - 4334 - bh = ext4_sb_bread_unmovable(sb, logical_sb_block); 4335 - if (IS_ERR(bh)) { 4336 - ext4_msg(sb, KERN_ERR, "unable to read superblock"); 4337 - ret = PTR_ERR(bh); 4338 - goto out_fail; 4339 - } 4340 - /* 4341 - * Note: s_es must be initialized as soon as possible because 4342 - * some ext4 macro-instructions depend on its value 4343 - */ 4344 - es = (struct ext4_super_block *) (bh->b_data + offset); 4345 - sbi->s_es = es; 4346 - sb->s_magic = le16_to_cpu(es->s_magic); 4347 - if (sb->s_magic != EXT4_SUPER_MAGIC) 4348 - goto cantfind_ext4; 4349 - sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 4350 - 4351 - /* Warn if metadata_csum and gdt_csum are both set. */ 4352 - if (ext4_has_feature_metadata_csum(sb) && 4353 - ext4_has_feature_gdt_csum(sb)) 4354 - ext4_warning(sb, "metadata_csum and uninit_bg are " 4355 - "redundant flags; please run fsck."); 4356 - 4357 - /* Check for a known checksum algorithm */ 4358 - if (!ext4_verify_csum_type(sb, es)) { 4359 - ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 4360 - "unknown checksum algorithm."); 4361 - silent = 1; 4362 - goto cantfind_ext4; 4363 - } 4364 - ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, 4365 - ext4_orphan_file_block_trigger); 4366 - 4367 - /* Load the checksum driver */ 4368 - sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 4369 - if (IS_ERR(sbi->s_chksum_driver)) { 4370 - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); 4371 - ret = PTR_ERR(sbi->s_chksum_driver); 4372 - sbi->s_chksum_driver = NULL; 4373 - goto failed_mount; 4374 - } 4375 - 4376 - /* Check superblock checksum */ 4377 - if (!ext4_superblock_csum_verify(sb, es)) { 4378 - ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 4379 - "invalid superblock checksum. Run e2fsck?"); 4380 - silent = 1; 4381 - ret = -EFSBADCRC; 4382 - goto cantfind_ext4; 4383 - } 4384 - 4385 - /* Precompute checksum seed for all metadata */ 4386 - if (ext4_has_feature_csum_seed(sb)) 4387 - sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); 4388 - else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb)) 4389 - sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 4390 - sizeof(es->s_uuid)); 4391 4318 4392 4319 /* Set defaults before we parse the mount options */ 4393 4320 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); ··· 4314 4445 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 4315 4446 set_opt(sb, WRITEBACK_DATA); 4316 4447 4317 - if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 4448 + if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC) 4318 4449 set_opt(sb, ERRORS_PANIC); 4319 - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 4450 + else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE) 4320 4451 set_opt(sb, ERRORS_CONT); 4321 4452 else 4322 4453 set_opt(sb, ERRORS_RO); ··· 4324 4455 set_opt(sb, BLOCK_VALIDITY); 4325 4456 if (def_mount_opts & EXT4_DEFM_DISCARD) 4326 4457 set_opt(sb, DISCARD); 4327 - 4328 - sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); 4329 - sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); 4330 - sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 4331 - sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 4332 - sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 4333 4458 4334 4459 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) 4335 4460 set_opt(sb, BARRIER); ··· 4336 4473 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 4337 4474 set_opt(sb, DELALLOC); 4338 4475 4339 - /* 4340 - * set default s_li_wait_mult for lazyinit, for the case there is 4341 - * no mount option specified. 4342 - */ 4343 - sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 4344 - 4345 - if (le32_to_cpu(es->s_log_block_size) > 4346 - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { 4347 - ext4_msg(sb, KERN_ERR, 4348 - "Invalid log block size: %u", 4349 - le32_to_cpu(es->s_log_block_size)); 4350 - goto failed_mount; 4351 - } 4352 - if (le32_to_cpu(es->s_log_cluster_size) > 4353 - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { 4354 - ext4_msg(sb, KERN_ERR, 4355 - "Invalid log cluster size: %u", 4356 - le32_to_cpu(es->s_log_cluster_size)); 4357 - goto failed_mount; 4358 - } 4359 - 4360 - blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 4361 - 4362 - if (blocksize == PAGE_SIZE) 4476 + if (sb->s_blocksize == PAGE_SIZE) 4363 4477 set_opt(sb, DIOREAD_NOLOCK); 4478 + } 4479 + 4480 + static int ext4_handle_clustersize(struct super_block *sb) 4481 + { 4482 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4483 + struct ext4_super_block *es = sbi->s_es; 4484 + int clustersize; 4485 + 4486 + /* Handle clustersize */ 4487 + clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); 4488 + if (ext4_has_feature_bigalloc(sb)) { 4489 + if (clustersize < sb->s_blocksize) { 4490 + ext4_msg(sb, KERN_ERR, 4491 + "cluster size (%d) smaller than " 4492 + "block size (%lu)", clustersize, sb->s_blocksize); 4493 + return -EINVAL; 4494 + } 4495 + sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - 4496 + le32_to_cpu(es->s_log_block_size); 4497 + sbi->s_clusters_per_group = 4498 + le32_to_cpu(es->s_clusters_per_group); 4499 + if (sbi->s_clusters_per_group > sb->s_blocksize * 8) { 4500 + ext4_msg(sb, KERN_ERR, 4501 + "#clusters per group too big: %lu", 4502 + sbi->s_clusters_per_group); 4503 + return -EINVAL; 4504 + } 4505 + if (sbi->s_blocks_per_group != 4506 + (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) { 4507 + ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " 4508 + "clusters per group (%lu) inconsistent", 4509 + sbi->s_blocks_per_group, 4510 + sbi->s_clusters_per_group); 4511 + return -EINVAL; 4512 + } 4513 + } else { 4514 + if (clustersize != sb->s_blocksize) { 4515 + ext4_msg(sb, KERN_ERR, 4516 + "fragment/cluster size (%d) != " 4517 + "block size (%lu)", clustersize, sb->s_blocksize); 4518 + return -EINVAL; 4519 + } 4520 + if (sbi->s_blocks_per_group > sb->s_blocksize * 8) { 4521 + ext4_msg(sb, KERN_ERR, 4522 + "#blocks per group too big: %lu", 4523 + sbi->s_blocks_per_group); 4524 + return -EINVAL; 4525 + } 4526 + sbi->s_clusters_per_group = sbi->s_blocks_per_group; 4527 + sbi->s_cluster_bits = 0; 4528 + } 4529 + sbi->s_cluster_ratio = clustersize / sb->s_blocksize; 4530 + 4531 + /* Do we have standard group size of clustersize * 8 blocks ? */ 4532 + if (sbi->s_blocks_per_group == clustersize << 3) 4533 + set_opt2(sb, STD_GROUP_SIZE); 4534 + 4535 + return 0; 4536 + } 4537 + 4538 + static void ext4_fast_commit_init(struct super_block *sb) 4539 + { 4540 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4541 + 4542 + /* Initialize fast commit stuff */ 4543 + atomic_set(&sbi->s_fc_subtid, 0); 4544 + INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); 4545 + INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); 4546 + INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); 4547 + INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); 4548 + sbi->s_fc_bytes = 0; 4549 + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 4550 + sbi->s_fc_ineligible_tid = 0; 4551 + spin_lock_init(&sbi->s_fc_lock); 4552 + memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); 4553 + sbi->s_fc_replay_state.fc_regions = NULL; 4554 + sbi->s_fc_replay_state.fc_regions_size = 0; 4555 + sbi->s_fc_replay_state.fc_regions_used = 0; 4556 + sbi->s_fc_replay_state.fc_regions_valid = 0; 4557 + sbi->s_fc_replay_state.fc_modified_inodes = NULL; 4558 + sbi->s_fc_replay_state.fc_modified_inodes_size = 0; 4559 + sbi->s_fc_replay_state.fc_modified_inodes_used = 0; 4560 + } 4561 + 4562 + static int ext4_inode_info_init(struct super_block *sb, 4563 + struct ext4_super_block *es) 4564 + { 4565 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4364 4566 4365 4567 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 4366 4568 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; ··· 4436 4508 if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { 4437 4509 ext4_msg(sb, KERN_ERR, "invalid first ino: %u", 4438 4510 sbi->s_first_ino); 4439 - goto failed_mount; 4511 + return -EINVAL; 4440 4512 } 4441 4513 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 4442 4514 (!is_power_of_2(sbi->s_inode_size)) || 4443 - (sbi->s_inode_size > blocksize)) { 4515 + (sbi->s_inode_size > sb->s_blocksize)) { 4444 4516 ext4_msg(sb, KERN_ERR, 4445 4517 "unsupported inode size: %d", 4446 4518 sbi->s_inode_size); 4447 - ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); 4448 - goto failed_mount; 4519 + ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize); 4520 + return -EINVAL; 4449 4521 } 4450 4522 /* 4451 4523 * i_atime_extra is the last extra field available for ··· 4463 4535 } 4464 4536 sb->s_time_min = EXT4_TIMESTAMP_MIN; 4465 4537 } 4538 + 4466 4539 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 4467 4540 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 4468 4541 EXT4_GOOD_OLD_INODE_SIZE; ··· 4475 4546 if (v > max) { 4476 4547 ext4_msg(sb, KERN_ERR, 4477 4548 "bad s_want_extra_isize: %d", v); 4478 - goto failed_mount; 4549 + return -EINVAL; 4479 4550 } 4480 4551 if (sbi->s_want_extra_isize < v) 4481 4552 sbi->s_want_extra_isize = v; ··· 4484 4555 if (v > max) { 4485 4556 ext4_msg(sb, KERN_ERR, 4486 4557 "bad s_min_extra_isize: %d", v); 4487 - goto failed_mount; 4558 + return -EINVAL; 4488 4559 } 4489 4560 if (sbi->s_want_extra_isize < v) 4490 4561 sbi->s_want_extra_isize = v; 4491 4562 } 4492 4563 } 4493 4564 4494 - err = parse_apply_sb_mount_options(sb, ctx); 4495 - if (err < 0) 4496 - goto failed_mount; 4497 - 4498 - sbi->s_def_mount_opt = sbi->s_mount_opt; 4499 - 4500 - err = ext4_check_opt_consistency(fc, sb); 4501 - if (err < 0) 4502 - goto failed_mount; 4503 - 4504 - ext4_apply_options(fc, sb); 4565 + return 0; 4566 + } 4505 4567 4506 4568 #if IS_ENABLED(CONFIG_UNICODE) 4507 - if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { 4508 - const struct ext4_sb_encodings *encoding_info; 4509 - struct unicode_map *encoding; 4510 - __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags); 4569 + static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es) 4570 + { 4571 + const struct ext4_sb_encodings *encoding_info; 4572 + struct unicode_map *encoding; 4573 + __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags); 4511 4574 4512 - encoding_info = ext4_sb_read_encoding(es); 4513 - if (!encoding_info) { 4514 - ext4_msg(sb, KERN_ERR, 4515 - "Encoding requested by superblock is unknown"); 4516 - goto failed_mount; 4517 - } 4575 + if (!ext4_has_feature_casefold(sb) || sb->s_encoding) 4576 + return 0; 4518 4577 4519 - encoding = utf8_load(encoding_info->version); 4520 - if (IS_ERR(encoding)) { 4521 - ext4_msg(sb, KERN_ERR, 4522 - "can't mount with superblock charset: %s-%u.%u.%u " 4523 - "not supported by the kernel. flags: 0x%x.", 4524 - encoding_info->name, 4525 - unicode_major(encoding_info->version), 4526 - unicode_minor(encoding_info->version), 4527 - unicode_rev(encoding_info->version), 4528 - encoding_flags); 4529 - goto failed_mount; 4530 - } 4531 - ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " 4532 - "%s-%u.%u.%u with flags 0x%hx", encoding_info->name, 4533 - unicode_major(encoding_info->version), 4534 - unicode_minor(encoding_info->version), 4535 - unicode_rev(encoding_info->version), 4536 - encoding_flags); 4537 - 4538 - sb->s_encoding = encoding; 4539 - sb->s_encoding_flags = encoding_flags; 4578 + encoding_info = ext4_sb_read_encoding(es); 4579 + if (!encoding_info) { 4580 + ext4_msg(sb, KERN_ERR, 4581 + "Encoding requested by superblock is unknown"); 4582 + return -EINVAL; 4540 4583 } 4584 + 4585 + encoding = utf8_load(encoding_info->version); 4586 + if (IS_ERR(encoding)) { 4587 + ext4_msg(sb, KERN_ERR, 4588 + "can't mount with superblock charset: %s-%u.%u.%u " 4589 + "not supported by the kernel. flags: 0x%x.", 4590 + encoding_info->name, 4591 + unicode_major(encoding_info->version), 4592 + unicode_minor(encoding_info->version), 4593 + unicode_rev(encoding_info->version), 4594 + encoding_flags); 4595 + return -EINVAL; 4596 + } 4597 + ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " 4598 + "%s-%u.%u.%u with flags 0x%hx", encoding_info->name, 4599 + unicode_major(encoding_info->version), 4600 + unicode_minor(encoding_info->version), 4601 + unicode_rev(encoding_info->version), 4602 + encoding_flags); 4603 + 4604 + sb->s_encoding = encoding; 4605 + sb->s_encoding_flags = encoding_flags; 4606 + 4607 + return 0; 4608 + } 4609 + #else 4610 + static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es) 4611 + { 4612 + return 0; 4613 + } 4541 4614 #endif 4542 4615 4543 - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 4544 - printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n"); 4545 - /* can't mount with both data=journal and dioread_nolock. */ 4546 - clear_opt(sb, DIOREAD_NOLOCK); 4547 - clear_opt2(sb, JOURNAL_FAST_COMMIT); 4548 - if (test_opt2(sb, EXPLICIT_DELALLOC)) { 4549 - ext4_msg(sb, KERN_ERR, "can't mount with " 4550 - "both data=journal and delalloc"); 4551 - goto failed_mount; 4552 - } 4553 - if (test_opt(sb, DAX_ALWAYS)) { 4554 - ext4_msg(sb, KERN_ERR, "can't mount with " 4555 - "both data=journal and dax"); 4556 - goto failed_mount; 4557 - } 4558 - if (ext4_has_feature_encrypt(sb)) { 4559 - ext4_msg(sb, KERN_WARNING, 4560 - "encrypted files will use data=ordered " 4561 - "instead of data journaling mode"); 4562 - } 4563 - if (test_opt(sb, DELALLOC)) 4564 - clear_opt(sb, DELALLOC); 4565 - } else { 4566 - sb->s_iflags |= SB_I_CGROUPWB; 4616 + static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es) 4617 + { 4618 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4619 + 4620 + /* Warn if metadata_csum and gdt_csum are both set. */ 4621 + if (ext4_has_feature_metadata_csum(sb) && 4622 + ext4_has_feature_gdt_csum(sb)) 4623 + ext4_warning(sb, "metadata_csum and uninit_bg are " 4624 + "redundant flags; please run fsck."); 4625 + 4626 + /* Check for a known checksum algorithm */ 4627 + if (!ext4_verify_csum_type(sb, es)) { 4628 + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 4629 + "unknown checksum algorithm."); 4630 + return -EINVAL; 4631 + } 4632 + ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, 4633 + ext4_orphan_file_block_trigger); 4634 + 4635 + /* Load the checksum driver */ 4636 + sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 4637 + if (IS_ERR(sbi->s_chksum_driver)) { 4638 + int ret = PTR_ERR(sbi->s_chksum_driver); 4639 + ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); 4640 + sbi->s_chksum_driver = NULL; 4641 + return ret; 4567 4642 } 4568 4643 4569 - sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | 4570 - (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); 4644 + /* Check superblock checksum */ 4645 + if (!ext4_superblock_csum_verify(sb, es)) { 4646 + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 4647 + "invalid superblock checksum. Run e2fsck?"); 4648 + return -EFSBADCRC; 4649 + } 4571 4650 4651 + /* Precompute checksum seed for all metadata */ 4652 + if (ext4_has_feature_csum_seed(sb)) 4653 + sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); 4654 + else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb)) 4655 + sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 4656 + sizeof(es->s_uuid)); 4657 + return 0; 4658 + } 4659 + 4660 + static int ext4_check_feature_compatibility(struct super_block *sb, 4661 + struct ext4_super_block *es, 4662 + int silent) 4663 + { 4572 4664 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 4573 4665 (ext4_has_compat_features(sb) || 4574 4666 ext4_has_ro_compat_features(sb) || ··· 4603 4653 if (ext4_has_feature_64bit(sb)) { 4604 4654 ext4_msg(sb, KERN_ERR, 4605 4655 "The Hurd can't support 64-bit file systems"); 4606 - goto failed_mount; 4656 + return -EINVAL; 4607 4657 } 4608 4658 4609 4659 /* ··· 4613 4663 if (ext4_has_feature_ea_inode(sb)) { 4614 4664 ext4_msg(sb, KERN_ERR, 4615 4665 "ea_inode feature is not supported for Hurd"); 4616 - goto failed_mount; 4666 + return -EINVAL; 4617 4667 } 4618 4668 } 4619 4669 ··· 4627 4677 * it's actually an ext[34] filesystem. 4628 4678 */ 4629 4679 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) 4630 - goto failed_mount; 4680 + return -EINVAL; 4631 4681 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " 4632 4682 "to feature incompatibilities"); 4633 - goto failed_mount; 4683 + return -EINVAL; 4634 4684 } 4635 4685 } 4636 4686 ··· 4644 4694 * it's actually an ext4 filesystem. 4645 4695 */ 4646 4696 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) 4647 - goto failed_mount; 4697 + return -EINVAL; 4648 4698 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " 4649 4699 "to feature incompatibilities"); 4650 - goto failed_mount; 4700 + return -EINVAL; 4651 4701 } 4652 4702 } 4653 4703 ··· 4657 4707 * so there is a chance incompat flags are set on a rev 0 filesystem. 4658 4708 */ 4659 4709 if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) 4710 + return -EINVAL; 4711 + 4712 + return 0; 4713 + } 4714 + 4715 + static int ext4_geometry_check(struct super_block *sb, 4716 + struct ext4_super_block *es) 4717 + { 4718 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4719 + __u64 blocks_count; 4720 + 4721 + /* check blocks count against device size */ 4722 + blocks_count = sb_bdev_nr_blocks(sb); 4723 + if (blocks_count && ext4_blocks_count(es) > blocks_count) { 4724 + ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 4725 + "exceeds size of device (%llu blocks)", 4726 + ext4_blocks_count(es), blocks_count); 4727 + return -EINVAL; 4728 + } 4729 + 4730 + /* 4731 + * It makes no sense for the first data block to be beyond the end 4732 + * of the filesystem. 4733 + */ 4734 + if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 4735 + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " 4736 + "block %u is beyond end of filesystem (%llu)", 4737 + le32_to_cpu(es->s_first_data_block), 4738 + ext4_blocks_count(es)); 4739 + return -EINVAL; 4740 + } 4741 + if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && 4742 + (sbi->s_cluster_ratio == 1)) { 4743 + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " 4744 + "block is 0 with a 1k block and cluster size"); 4745 + return -EINVAL; 4746 + } 4747 + 4748 + blocks_count = (ext4_blocks_count(es) - 4749 + le32_to_cpu(es->s_first_data_block) + 4750 + EXT4_BLOCKS_PER_GROUP(sb) - 1); 4751 + do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 4752 + if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 4753 + ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " 4754 + "(block count %llu, first data block %u, " 4755 + "blocks per group %lu)", blocks_count, 4756 + ext4_blocks_count(es), 4757 + le32_to_cpu(es->s_first_data_block), 4758 + EXT4_BLOCKS_PER_GROUP(sb)); 4759 + return -EINVAL; 4760 + } 4761 + sbi->s_groups_count = blocks_count; 4762 + sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 4763 + (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 4764 + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != 4765 + le32_to_cpu(es->s_inodes_count)) { 4766 + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", 4767 + le32_to_cpu(es->s_inodes_count), 4768 + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); 4769 + return -EINVAL; 4770 + } 4771 + 4772 + return 0; 4773 + } 4774 + 4775 + static void ext4_group_desc_free(struct ext4_sb_info *sbi) 4776 + { 4777 + struct buffer_head **group_desc; 4778 + int i; 4779 + 4780 + rcu_read_lock(); 4781 + group_desc = rcu_dereference(sbi->s_group_desc); 4782 + for (i = 0; i < sbi->s_gdb_count; i++) 4783 + brelse(group_desc[i]); 4784 + kvfree(group_desc); 4785 + rcu_read_unlock(); 4786 + } 4787 + 4788 + static int ext4_group_desc_init(struct super_block *sb, 4789 + struct ext4_super_block *es, 4790 + ext4_fsblk_t logical_sb_block, 4791 + ext4_group_t *first_not_zeroed) 4792 + { 4793 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4794 + unsigned int db_count; 4795 + ext4_fsblk_t block; 4796 + int ret; 4797 + int i; 4798 + 4799 + db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 4800 + EXT4_DESC_PER_BLOCK(sb); 4801 + if (ext4_has_feature_meta_bg(sb)) { 4802 + if (le32_to_cpu(es->s_first_meta_bg) > db_count) { 4803 + ext4_msg(sb, KERN_WARNING, 4804 + "first meta block group too large: %u " 4805 + "(group descriptor block count %u)", 4806 + le32_to_cpu(es->s_first_meta_bg), db_count); 4807 + return -EINVAL; 4808 + } 4809 + } 4810 + rcu_assign_pointer(sbi->s_group_desc, 4811 + kvmalloc_array(db_count, 4812 + sizeof(struct buffer_head *), 4813 + GFP_KERNEL)); 4814 + if (sbi->s_group_desc == NULL) { 4815 + ext4_msg(sb, KERN_ERR, "not enough memory"); 4816 + return -ENOMEM; 4817 + } 4818 + 4819 + bgl_lock_init(sbi->s_blockgroup_lock); 4820 + 4821 + /* Pre-read the descriptors into the buffer cache */ 4822 + for (i = 0; i < db_count; i++) { 4823 + block = descriptor_loc(sb, logical_sb_block, i); 4824 + ext4_sb_breadahead_unmovable(sb, block); 4825 + } 4826 + 4827 + for (i = 0; i < db_count; i++) { 4828 + struct buffer_head *bh; 4829 + 4830 + block = descriptor_loc(sb, logical_sb_block, i); 4831 + bh = ext4_sb_bread_unmovable(sb, block); 4832 + if (IS_ERR(bh)) { 4833 + ext4_msg(sb, KERN_ERR, 4834 + "can't read group descriptor %d", i); 4835 + sbi->s_gdb_count = i; 4836 + ret = PTR_ERR(bh); 4837 + goto out; 4838 + } 4839 + rcu_read_lock(); 4840 + rcu_dereference(sbi->s_group_desc)[i] = bh; 4841 + rcu_read_unlock(); 4842 + } 4843 + sbi->s_gdb_count = db_count; 4844 + if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) { 4845 + ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 4846 + ret = -EFSCORRUPTED; 4847 + goto out; 4848 + } 4849 + return 0; 4850 + out: 4851 + ext4_group_desc_free(sbi); 4852 + return ret; 4853 + } 4854 + 4855 + static int ext4_load_and_init_journal(struct super_block *sb, 4856 + struct ext4_super_block *es, 4857 + struct ext4_fs_context *ctx) 4858 + { 4859 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4860 + int err; 4861 + 4862 + err = ext4_load_journal(sb, es, ctx->journal_devnum); 4863 + if (err) 4864 + return err; 4865 + 4866 + if (ext4_has_feature_64bit(sb) && 4867 + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 4868 + JBD2_FEATURE_INCOMPAT_64BIT)) { 4869 + ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 4870 + goto out; 4871 + } 4872 + 4873 + if (!set_journal_csum_feature_set(sb)) { 4874 + ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " 4875 + "feature set"); 4876 + goto out; 4877 + } 4878 + 4879 + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && 4880 + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 4881 + JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) { 4882 + ext4_msg(sb, KERN_ERR, 4883 + "Failed to set fast commit journal feature"); 4884 + goto out; 4885 + } 4886 + 4887 + /* We have now updated the journal if required, so we can 4888 + * validate the data journaling mode. */ 4889 + switch (test_opt(sb, DATA_FLAGS)) { 4890 + case 0: 4891 + /* No mode set, assume a default based on the journal 4892 + * capabilities: ORDERED_DATA if the journal can 4893 + * cope, else JOURNAL_DATA 4894 + */ 4895 + if (jbd2_journal_check_available_features 4896 + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 4897 + set_opt(sb, ORDERED_DATA); 4898 + sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 4899 + } else { 4900 + set_opt(sb, JOURNAL_DATA); 4901 + sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 4902 + } 4903 + break; 4904 + 4905 + case EXT4_MOUNT_ORDERED_DATA: 4906 + case EXT4_MOUNT_WRITEBACK_DATA: 4907 + if (!jbd2_journal_check_available_features 4908 + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 4909 + ext4_msg(sb, KERN_ERR, "Journal does not support " 4910 + "requested data journaling mode"); 4911 + goto out; 4912 + } 4913 + break; 4914 + default: 4915 + break; 4916 + } 4917 + 4918 + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && 4919 + test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 4920 + ext4_msg(sb, KERN_ERR, "can't mount with " 4921 + "journal_async_commit in data=ordered mode"); 4922 + goto out; 4923 + } 4924 + 4925 + set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); 4926 + 4927 + sbi->s_journal->j_submit_inode_data_buffers = 4928 + ext4_journal_submit_inode_data_buffers; 4929 + sbi->s_journal->j_finish_inode_data_buffers = 4930 + ext4_journal_finish_inode_data_buffers; 4931 + 4932 + return 0; 4933 + 4934 + out: 4935 + /* flush s_error_work before journal destroy. */ 4936 + flush_work(&sbi->s_error_work); 4937 + jbd2_journal_destroy(sbi->s_journal); 4938 + sbi->s_journal = NULL; 4939 + return err; 4940 + } 4941 + 4942 + static int ext4_journal_data_mode_check(struct super_block *sb) 4943 + { 4944 + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 4945 + printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with " 4946 + "data=journal disables delayed allocation, " 4947 + "dioread_nolock, O_DIRECT and fast_commit support!\n"); 4948 + /* can't mount with both data=journal and dioread_nolock. */ 4949 + clear_opt(sb, DIOREAD_NOLOCK); 4950 + clear_opt2(sb, JOURNAL_FAST_COMMIT); 4951 + if (test_opt2(sb, EXPLICIT_DELALLOC)) { 4952 + ext4_msg(sb, KERN_ERR, "can't mount with " 4953 + "both data=journal and delalloc"); 4954 + return -EINVAL; 4955 + } 4956 + if (test_opt(sb, DAX_ALWAYS)) { 4957 + ext4_msg(sb, KERN_ERR, "can't mount with " 4958 + "both data=journal and dax"); 4959 + return -EINVAL; 4960 + } 4961 + if (ext4_has_feature_encrypt(sb)) { 4962 + ext4_msg(sb, KERN_WARNING, 4963 + "encrypted files will use data=ordered " 4964 + "instead of data journaling mode"); 4965 + } 4966 + if (test_opt(sb, DELALLOC)) 4967 + clear_opt(sb, DELALLOC); 4968 + } else { 4969 + sb->s_iflags |= SB_I_CGROUPWB; 4970 + } 4971 + 4972 + return 0; 4973 + } 4974 + 4975 + static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb, 4976 + int silent) 4977 + { 4978 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4979 + struct ext4_super_block *es; 4980 + ext4_fsblk_t logical_sb_block; 4981 + unsigned long offset = 0; 4982 + struct buffer_head *bh; 4983 + int ret = -EINVAL; 4984 + int blocksize; 4985 + 4986 + blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 4987 + if (!blocksize) { 4988 + ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 4989 + return -EINVAL; 4990 + } 4991 + 4992 + /* 4993 + * The ext4 superblock will not be buffer aligned for other than 1kB 4994 + * block sizes. We need to calculate the offset from buffer start. 4995 + */ 4996 + if (blocksize != EXT4_MIN_BLOCK_SIZE) { 4997 + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; 4998 + offset = do_div(logical_sb_block, blocksize); 4999 + } else { 5000 + logical_sb_block = sbi->s_sb_block; 5001 + } 5002 + 5003 + bh = ext4_sb_bread_unmovable(sb, logical_sb_block); 5004 + if (IS_ERR(bh)) { 5005 + ext4_msg(sb, KERN_ERR, "unable to read superblock"); 5006 + return PTR_ERR(bh); 5007 + } 5008 + /* 5009 + * Note: s_es must be initialized as soon as possible because 5010 + * some ext4 macro-instructions depend on its value 5011 + */ 5012 + es = (struct ext4_super_block *) (bh->b_data + offset); 5013 + sbi->s_es = es; 5014 + sb->s_magic = le16_to_cpu(es->s_magic); 5015 + if (sb->s_magic != EXT4_SUPER_MAGIC) { 5016 + if (!silent) 5017 + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 5018 + goto out; 5019 + } 5020 + 5021 + if (le32_to_cpu(es->s_log_block_size) > 5022 + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { 5023 + ext4_msg(sb, KERN_ERR, 5024 + "Invalid log block size: %u", 5025 + le32_to_cpu(es->s_log_block_size)); 5026 + goto out; 5027 + } 5028 + if (le32_to_cpu(es->s_log_cluster_size) > 5029 + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { 5030 + ext4_msg(sb, KERN_ERR, 5031 + "Invalid log cluster size: %u", 5032 + le32_to_cpu(es->s_log_cluster_size)); 5033 + goto out; 5034 + } 5035 + 5036 + blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 5037 + 5038 + /* 5039 + * If the default block size is not the same as the real block size, 5040 + * we need to reload it. 5041 + */ 5042 + if (sb->s_blocksize == blocksize) { 5043 + *lsb = logical_sb_block; 5044 + sbi->s_sbh = bh; 5045 + return 0; 5046 + } 5047 + 5048 + /* 5049 + * bh must be released before kill_bdev(), otherwise 5050 + * it won't be freed and its page also. kill_bdev() 5051 + * is called by sb_set_blocksize(). 5052 + */ 5053 + brelse(bh); 5054 + /* Validate the filesystem blocksize */ 5055 + if (!sb_set_blocksize(sb, blocksize)) { 5056 + ext4_msg(sb, KERN_ERR, "bad block size %d", 5057 + blocksize); 5058 + bh = NULL; 5059 + goto out; 5060 + } 5061 + 5062 + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; 5063 + offset = do_div(logical_sb_block, blocksize); 5064 + bh = ext4_sb_bread_unmovable(sb, logical_sb_block); 5065 + if (IS_ERR(bh)) { 5066 + ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try"); 5067 + ret = PTR_ERR(bh); 5068 + bh = NULL; 5069 + goto out; 5070 + } 5071 + es = (struct ext4_super_block *)(bh->b_data + offset); 5072 + sbi->s_es = es; 5073 + if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 5074 + ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!"); 5075 + goto out; 5076 + } 5077 + *lsb = logical_sb_block; 5078 + sbi->s_sbh = bh; 5079 + return 0; 5080 + out: 5081 + brelse(bh); 5082 + return ret; 5083 + } 5084 + 5085 + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) 5086 + { 5087 + struct ext4_super_block *es = NULL; 5088 + struct ext4_sb_info *sbi = EXT4_SB(sb); 5089 + struct flex_groups **flex_groups; 5090 + ext4_fsblk_t block; 5091 + ext4_fsblk_t logical_sb_block; 5092 + struct inode *root; 5093 + int ret = -ENOMEM; 5094 + unsigned int i; 5095 + int needs_recovery, has_huge_files; 5096 + int err = 0; 5097 + ext4_group_t first_not_zeroed; 5098 + struct ext4_fs_context *ctx = fc->fs_private; 5099 + int silent = fc->sb_flags & SB_SILENT; 5100 + 5101 + /* Set defaults for the variables that will be set during parsing */ 5102 + if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) 5103 + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 5104 + 5105 + sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 5106 + sbi->s_sectors_written_start = 5107 + part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); 5108 + 5109 + /* -EINVAL is default */ 5110 + ret = -EINVAL; 5111 + err = ext4_load_super(sb, &logical_sb_block, silent); 5112 + if (err) 5113 + goto out_fail; 5114 + 5115 + es = sbi->s_es; 5116 + sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 5117 + 5118 + err = ext4_init_metadata_csum(sb, es); 5119 + if (err) 4660 5120 goto failed_mount; 4661 5121 4662 - if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { 5122 + ext4_set_def_opts(sb, es); 5123 + 5124 + sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); 5125 + sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); 5126 + sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 5127 + sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 5128 + sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 5129 + 5130 + /* 5131 + * set default s_li_wait_mult for lazyinit, for the case there is 5132 + * no mount option specified. 5133 + */ 5134 + sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 5135 + 5136 + if (ext4_inode_info_init(sb, es)) 5137 + goto failed_mount; 5138 + 5139 + err = parse_apply_sb_mount_options(sb, ctx); 5140 + if (err < 0) 5141 + goto failed_mount; 5142 + 5143 + sbi->s_def_mount_opt = sbi->s_mount_opt; 5144 + 5145 + err = ext4_check_opt_consistency(fc, sb); 5146 + if (err < 0) 5147 + goto failed_mount; 5148 + 5149 + ext4_apply_options(fc, sb); 5150 + 5151 + if (ext4_encoding_init(sb, es)) 5152 + goto failed_mount; 5153 + 5154 + if (ext4_journal_data_mode_check(sb)) 5155 + goto failed_mount; 5156 + 5157 + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | 5158 + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); 5159 + 5160 + /* i_version is always enabled now */ 5161 + sb->s_flags |= SB_I_VERSION; 5162 + 5163 + if (ext4_check_feature_compatibility(sb, es, silent)) 5164 + goto failed_mount; 5165 + 5166 + if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) { 4663 5167 ext4_msg(sb, KERN_ERR, 4664 5168 "Number of reserved GDT blocks insanely large: %d", 4665 5169 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks)); ··· 5121 4717 } 5122 4718 5123 4719 if (sbi->s_daxdev) { 5124 - if (blocksize == PAGE_SIZE) 4720 + if (sb->s_blocksize == PAGE_SIZE) 5125 4721 set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); 5126 4722 else 5127 4723 ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); ··· 5146 4742 goto failed_mount; 5147 4743 } 5148 4744 5149 - if (sb->s_blocksize != blocksize) { 5150 - /* 5151 - * bh must be released before kill_bdev(), otherwise 5152 - * it won't be freed and its page also. kill_bdev() 5153 - * is called by sb_set_blocksize(). 5154 - */ 5155 - brelse(bh); 5156 - /* Validate the filesystem blocksize */ 5157 - if (!sb_set_blocksize(sb, blocksize)) { 5158 - ext4_msg(sb, KERN_ERR, "bad block size %d", 5159 - blocksize); 5160 - bh = NULL; 5161 - goto failed_mount; 5162 - } 5163 - 5164 - logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; 5165 - offset = do_div(logical_sb_block, blocksize); 5166 - bh = ext4_sb_bread_unmovable(sb, logical_sb_block); 5167 - if (IS_ERR(bh)) { 5168 - ext4_msg(sb, KERN_ERR, 5169 - "Can't read superblock on 2nd try"); 5170 - ret = PTR_ERR(bh); 5171 - bh = NULL; 5172 - goto failed_mount; 5173 - } 5174 - es = (struct ext4_super_block *)(bh->b_data + offset); 5175 - sbi->s_es = es; 5176 - if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 5177 - ext4_msg(sb, KERN_ERR, 5178 - "Magic mismatch, very weird!"); 5179 - goto failed_mount; 5180 - } 5181 - } 5182 - 5183 4745 has_huge_files = ext4_has_feature_huge_file(sb); 5184 4746 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 5185 4747 has_huge_files); ··· 5167 4797 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 5168 4798 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 5169 4799 5170 - sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 5171 - if (sbi->s_inodes_per_block == 0) 5172 - goto cantfind_ext4; 4800 + sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb); 4801 + if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) { 4802 + if (!silent) 4803 + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 4804 + goto failed_mount; 4805 + } 5173 4806 if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || 5174 - sbi->s_inodes_per_group > blocksize * 8) { 4807 + sbi->s_inodes_per_group > sb->s_blocksize * 8) { 5175 4808 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", 5176 4809 sbi->s_inodes_per_group); 5177 4810 goto failed_mount; 5178 4811 } 5179 4812 sbi->s_itb_per_group = sbi->s_inodes_per_group / 5180 4813 sbi->s_inodes_per_block; 5181 - sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 5182 - sbi->s_sbh = bh; 4814 + sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb); 5183 4815 sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY; 5184 4816 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 5185 4817 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); ··· 5207 4835 } 5208 4836 } 5209 4837 5210 - /* Handle clustersize */ 5211 - clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); 5212 - if (ext4_has_feature_bigalloc(sb)) { 5213 - if (clustersize < blocksize) { 5214 - ext4_msg(sb, KERN_ERR, 5215 - "cluster size (%d) smaller than " 5216 - "block size (%d)", clustersize, blocksize); 5217 - goto failed_mount; 5218 - } 5219 - sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - 5220 - le32_to_cpu(es->s_log_block_size); 5221 - sbi->s_clusters_per_group = 5222 - le32_to_cpu(es->s_clusters_per_group); 5223 - if (sbi->s_clusters_per_group > blocksize * 8) { 5224 - ext4_msg(sb, KERN_ERR, 5225 - "#clusters per group too big: %lu", 5226 - sbi->s_clusters_per_group); 5227 - goto failed_mount; 5228 - } 5229 - if (sbi->s_blocks_per_group != 5230 - (sbi->s_clusters_per_group * (clustersize / blocksize))) { 5231 - ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " 5232 - "clusters per group (%lu) inconsistent", 5233 - sbi->s_blocks_per_group, 5234 - sbi->s_clusters_per_group); 5235 - goto failed_mount; 5236 - } 5237 - } else { 5238 - if (clustersize != blocksize) { 5239 - ext4_msg(sb, KERN_ERR, 5240 - "fragment/cluster size (%d) != " 5241 - "block size (%d)", clustersize, blocksize); 5242 - goto failed_mount; 5243 - } 5244 - if (sbi->s_blocks_per_group > blocksize * 8) { 5245 - ext4_msg(sb, KERN_ERR, 5246 - "#blocks per group too big: %lu", 5247 - sbi->s_blocks_per_group); 5248 - goto failed_mount; 5249 - } 5250 - sbi->s_clusters_per_group = sbi->s_blocks_per_group; 5251 - sbi->s_cluster_bits = 0; 5252 - } 5253 - sbi->s_cluster_ratio = clustersize / blocksize; 5254 - 5255 - /* Do we have standard group size of clustersize * 8 blocks ? */ 5256 - if (sbi->s_blocks_per_group == clustersize << 3) 5257 - set_opt2(sb, STD_GROUP_SIZE); 4838 + if (ext4_handle_clustersize(sb)) 4839 + goto failed_mount; 5258 4840 5259 4841 /* 5260 4842 * Test whether we have more sectors than will fit in sector_t, ··· 5222 4896 goto failed_mount; 5223 4897 } 5224 4898 5225 - if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 5226 - goto cantfind_ext4; 5227 - 5228 - /* check blocks count against device size */ 5229 - blocks_count = sb_bdev_nr_blocks(sb); 5230 - if (blocks_count && ext4_blocks_count(es) > blocks_count) { 5231 - ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 5232 - "exceeds size of device (%llu blocks)", 5233 - ext4_blocks_count(es), blocks_count); 4899 + if (ext4_geometry_check(sb, es)) 5234 4900 goto failed_mount; 5235 - } 5236 4901 5237 - /* 5238 - * It makes no sense for the first data block to be beyond the end 5239 - * of the filesystem. 5240 - */ 5241 - if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 5242 - ext4_msg(sb, KERN_WARNING, "bad geometry: first data " 5243 - "block %u is beyond end of filesystem (%llu)", 5244 - le32_to_cpu(es->s_first_data_block), 5245 - ext4_blocks_count(es)); 4902 + err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed); 4903 + if (err) 5246 4904 goto failed_mount; 5247 - } 5248 - if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && 5249 - (sbi->s_cluster_ratio == 1)) { 5250 - ext4_msg(sb, KERN_WARNING, "bad geometry: first data " 5251 - "block is 0 with a 1k block and cluster size"); 5252 - goto failed_mount; 5253 - } 5254 - 5255 - blocks_count = (ext4_blocks_count(es) - 5256 - le32_to_cpu(es->s_first_data_block) + 5257 - EXT4_BLOCKS_PER_GROUP(sb) - 1); 5258 - do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 5259 - if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 5260 - ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " 5261 - "(block count %llu, first data block %u, " 5262 - "blocks per group %lu)", blocks_count, 5263 - ext4_blocks_count(es), 5264 - le32_to_cpu(es->s_first_data_block), 5265 - EXT4_BLOCKS_PER_GROUP(sb)); 5266 - goto failed_mount; 5267 - } 5268 - sbi->s_groups_count = blocks_count; 5269 - sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 5270 - (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 5271 - if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != 5272 - le32_to_cpu(es->s_inodes_count)) { 5273 - ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", 5274 - le32_to_cpu(es->s_inodes_count), 5275 - ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); 5276 - ret = -EINVAL; 5277 - goto failed_mount; 5278 - } 5279 - db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 5280 - EXT4_DESC_PER_BLOCK(sb); 5281 - if (ext4_has_feature_meta_bg(sb)) { 5282 - if (le32_to_cpu(es->s_first_meta_bg) > db_count) { 5283 - ext4_msg(sb, KERN_WARNING, 5284 - "first meta block group too large: %u " 5285 - "(group descriptor block count %u)", 5286 - le32_to_cpu(es->s_first_meta_bg), db_count); 5287 - goto failed_mount; 5288 - } 5289 - } 5290 - rcu_assign_pointer(sbi->s_group_desc, 5291 - kvmalloc_array(db_count, 5292 - sizeof(struct buffer_head *), 5293 - GFP_KERNEL)); 5294 - if (sbi->s_group_desc == NULL) { 5295 - ext4_msg(sb, KERN_ERR, "not enough memory"); 5296 - ret = -ENOMEM; 5297 - goto failed_mount; 5298 - } 5299 - 5300 - bgl_lock_init(sbi->s_blockgroup_lock); 5301 - 5302 - /* Pre-read the descriptors into the buffer cache */ 5303 - for (i = 0; i < db_count; i++) { 5304 - block = descriptor_loc(sb, logical_sb_block, i); 5305 - ext4_sb_breadahead_unmovable(sb, block); 5306 - } 5307 - 5308 - for (i = 0; i < db_count; i++) { 5309 - struct buffer_head *bh; 5310 - 5311 - block = descriptor_loc(sb, logical_sb_block, i); 5312 - bh = ext4_sb_bread_unmovable(sb, block); 5313 - if (IS_ERR(bh)) { 5314 - ext4_msg(sb, KERN_ERR, 5315 - "can't read group descriptor %d", i); 5316 - db_count = i; 5317 - ret = PTR_ERR(bh); 5318 - goto failed_mount2; 5319 - } 5320 - rcu_read_lock(); 5321 - rcu_dereference(sbi->s_group_desc)[i] = bh; 5322 - rcu_read_unlock(); 5323 - } 5324 - sbi->s_gdb_count = db_count; 5325 - if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { 5326 - ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 5327 - ret = -EFSCORRUPTED; 5328 - goto failed_mount2; 5329 - } 5330 4905 5331 4906 timer_setup(&sbi->s_err_report, print_daily_error_info, 0); 5332 4907 spin_lock_init(&sbi->s_error_lock); ··· 5265 5038 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 5266 5039 mutex_init(&sbi->s_orphan_lock); 5267 5040 5268 - /* Initialize fast commit stuff */ 5269 - atomic_set(&sbi->s_fc_subtid, 0); 5270 - INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); 5271 - INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); 5272 - INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); 5273 - INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); 5274 - sbi->s_fc_bytes = 0; 5275 - ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 5276 - sbi->s_fc_ineligible_tid = 0; 5277 - spin_lock_init(&sbi->s_fc_lock); 5278 - memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); 5279 - sbi->s_fc_replay_state.fc_regions = NULL; 5280 - sbi->s_fc_replay_state.fc_regions_size = 0; 5281 - sbi->s_fc_replay_state.fc_regions_used = 0; 5282 - sbi->s_fc_replay_state.fc_regions_valid = 0; 5283 - sbi->s_fc_replay_state.fc_modified_inodes = NULL; 5284 - sbi->s_fc_replay_state.fc_modified_inodes_size = 0; 5285 - sbi->s_fc_replay_state.fc_modified_inodes_used = 0; 5041 + ext4_fast_commit_init(sb); 5286 5042 5287 5043 sb->s_root = NULL; 5288 5044 ··· 5282 5072 * root first: it may be modified in the journal! 5283 5073 */ 5284 5074 if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { 5285 - err = ext4_load_journal(sb, es, ctx->journal_devnum); 5075 + err = ext4_load_and_init_journal(sb, es, ctx); 5286 5076 if (err) 5287 5077 goto failed_mount3a; 5288 5078 } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && 5289 5079 ext4_has_feature_journal_needs_recovery(sb)) { 5290 5080 ext4_msg(sb, KERN_ERR, "required journal recovery " 5291 5081 "suppressed and not mounted read-only"); 5292 - goto failed_mount_wq; 5082 + goto failed_mount3a; 5293 5083 } else { 5294 5084 /* Nojournal mode, all journal mount options are illegal */ 5295 5085 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { 5296 5086 ext4_msg(sb, KERN_ERR, "can't mount with " 5297 5087 "journal_checksum, fs mounted w/o journal"); 5298 - goto failed_mount_wq; 5088 + goto failed_mount3a; 5299 5089 } 5300 5090 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 5301 5091 ext4_msg(sb, KERN_ERR, "can't mount with " 5302 5092 "journal_async_commit, fs mounted w/o journal"); 5303 - goto failed_mount_wq; 5093 + goto failed_mount3a; 5304 5094 } 5305 5095 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 5306 5096 ext4_msg(sb, KERN_ERR, "can't mount with " 5307 5097 "commit=%lu, fs mounted w/o journal", 5308 5098 sbi->s_commit_interval / HZ); 5309 - goto failed_mount_wq; 5099 + goto failed_mount3a; 5310 5100 } 5311 5101 if (EXT4_MOUNT_DATA_FLAGS & 5312 5102 (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { 5313 5103 ext4_msg(sb, KERN_ERR, "can't mount with " 5314 5104 "data=, fs mounted w/o journal"); 5315 - goto failed_mount_wq; 5105 + goto failed_mount3a; 5316 5106 } 5317 5107 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; 5318 5108 clear_opt(sb, JOURNAL_CHECKSUM); ··· 5320 5110 clear_opt2(sb, JOURNAL_FAST_COMMIT); 5321 5111 sbi->s_journal = NULL; 5322 5112 needs_recovery = 0; 5323 - goto no_journal; 5324 5113 } 5325 5114 5326 - if (ext4_has_feature_64bit(sb) && 5327 - !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 5328 - JBD2_FEATURE_INCOMPAT_64BIT)) { 5329 - ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 5330 - goto failed_mount_wq; 5331 - } 5332 - 5333 - if (!set_journal_csum_feature_set(sb)) { 5334 - ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " 5335 - "feature set"); 5336 - goto failed_mount_wq; 5337 - } 5338 - 5339 - if (test_opt2(sb, JOURNAL_FAST_COMMIT) && 5340 - !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 5341 - JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) { 5342 - ext4_msg(sb, KERN_ERR, 5343 - "Failed to set fast commit journal feature"); 5344 - goto failed_mount_wq; 5345 - } 5346 - 5347 - /* We have now updated the journal if required, so we can 5348 - * validate the data journaling mode. */ 5349 - switch (test_opt(sb, DATA_FLAGS)) { 5350 - case 0: 5351 - /* No mode set, assume a default based on the journal 5352 - * capabilities: ORDERED_DATA if the journal can 5353 - * cope, else JOURNAL_DATA 5354 - */ 5355 - if (jbd2_journal_check_available_features 5356 - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 5357 - set_opt(sb, ORDERED_DATA); 5358 - sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 5359 - } else { 5360 - set_opt(sb, JOURNAL_DATA); 5361 - sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 5362 - } 5363 - break; 5364 - 5365 - case EXT4_MOUNT_ORDERED_DATA: 5366 - case EXT4_MOUNT_WRITEBACK_DATA: 5367 - if (!jbd2_journal_check_available_features 5368 - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 5369 - ext4_msg(sb, KERN_ERR, "Journal does not support " 5370 - "requested data journaling mode"); 5371 - goto failed_mount_wq; 5372 - } 5373 - break; 5374 - default: 5375 - break; 5376 - } 5377 - 5378 - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && 5379 - test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 5380 - ext4_msg(sb, KERN_ERR, "can't mount with " 5381 - "journal_async_commit in data=ordered mode"); 5382 - goto failed_mount_wq; 5383 - } 5384 - 5385 - set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); 5386 - 5387 - sbi->s_journal->j_submit_inode_data_buffers = 5388 - ext4_journal_submit_inode_data_buffers; 5389 - sbi->s_journal->j_finish_inode_data_buffers = 5390 - ext4_journal_finish_inode_data_buffers; 5391 - 5392 - no_journal: 5393 5115 if (!test_opt(sb, NO_MBCACHE)) { 5394 5116 sbi->s_ea_block_cache = ext4_xattr_create_cache(); 5395 5117 if (!sbi->s_ea_block_cache) { ··· 5340 5198 } 5341 5199 } 5342 5200 5343 - if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) { 5201 + if (ext4_has_feature_verity(sb) && sb->s_blocksize != PAGE_SIZE) { 5344 5202 ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity"); 5345 5203 goto failed_mount_wq; 5346 5204 } ··· 5550 5408 5551 5409 return 0; 5552 5410 5553 - cantfind_ext4: 5554 - if (!silent) 5555 - ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 5556 - goto failed_mount; 5557 - 5558 5411 failed_mount9: 5559 5412 ext4_release_orphan_info(sb); 5560 5413 failed_mount8: ··· 5603 5466 flush_work(&sbi->s_error_work); 5604 5467 del_timer_sync(&sbi->s_err_report); 5605 5468 ext4_stop_mmpd(sbi); 5606 - failed_mount2: 5607 - rcu_read_lock(); 5608 - group_desc = rcu_dereference(sbi->s_group_desc); 5609 - for (i = 0; i < db_count; i++) 5610 - brelse(group_desc[i]); 5611 - kvfree(group_desc); 5612 - rcu_read_unlock(); 5469 + ext4_group_desc_free(sbi); 5613 5470 failed_mount: 5614 5471 if (sbi->s_chksum_driver) 5615 5472 crypto_free_shash(sbi->s_chksum_driver); ··· 5618 5487 #endif 5619 5488 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); 5620 5489 /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */ 5621 - brelse(bh); 5490 + brelse(sbi->s_sbh); 5622 5491 ext4_blkdev_remove(sbi); 5623 5492 out_fail: 5624 5493 sb->s_fs_info = NULL; ··· 6784 6653 handle_t *handle; 6785 6654 6786 6655 /* Data block + inode block */ 6787 - handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2); 6656 + handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2); 6788 6657 if (IS_ERR(handle)) 6789 6658 return PTR_ERR(handle); 6790 6659 ret = dquot_commit_info(sb, type);
+2 -4
fs/ext4/verity.c
··· 298 298 last_extent = path[path->p_depth].p_ext; 299 299 if (!last_extent) { 300 300 EXT4_ERROR_INODE(inode, "verity file has no extents"); 301 - ext4_ext_drop_refs(path); 302 - kfree(path); 301 + ext4_free_ext_path(path); 303 302 return -EFSCORRUPTED; 304 303 } 305 304 306 305 end_lblk = le32_to_cpu(last_extent->ee_block) + 307 306 ext4_ext_get_actual_len(last_extent); 308 307 desc_size_pos = (u64)end_lblk << inode->i_blkbits; 309 - ext4_ext_drop_refs(path); 310 - kfree(path); 308 + ext4_free_ext_path(path); 311 309 312 310 if (desc_size_pos < sizeof(desc_size_disk)) 313 311 goto bad;
+1
fs/ext4/xattr.c
··· 2412 2412 if (!error) { 2413 2413 ext4_xattr_update_super_block(handle, inode->i_sb); 2414 2414 inode->i_ctime = current_time(inode); 2415 + inode_inc_iversion(inode); 2415 2416 if (!value) 2416 2417 no_expand = 0; 2417 2418 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
+25 -12
fs/fs-writeback.c
··· 1718 1718 */ 1719 1719 if (!(inode->i_state & I_DIRTY_ALL)) 1720 1720 inode_cgwb_move_to_attached(inode, wb); 1721 - else if (!(inode->i_state & I_SYNC_QUEUED) && 1722 - (inode->i_state & I_DIRTY)) 1723 - redirty_tail_locked(inode, wb); 1721 + else if (!(inode->i_state & I_SYNC_QUEUED)) { 1722 + if ((inode->i_state & I_DIRTY)) 1723 + redirty_tail_locked(inode, wb); 1724 + else if (inode->i_state & I_DIRTY_TIME) { 1725 + inode->dirtied_when = jiffies; 1726 + inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); 1727 + } 1728 + } 1724 1729 1725 1730 spin_unlock(&wb->list_lock); 1726 1731 inode_sync_complete(inode); ··· 2375 2370 2376 2371 if (flags & I_DIRTY_INODE) { 2377 2372 /* 2373 + * Inode timestamp update will piggback on this dirtying. 2374 + * We tell ->dirty_inode callback that timestamps need to 2375 + * be updated by setting I_DIRTY_TIME in flags. 2376 + */ 2377 + if (inode->i_state & I_DIRTY_TIME) { 2378 + spin_lock(&inode->i_lock); 2379 + if (inode->i_state & I_DIRTY_TIME) { 2380 + inode->i_state &= ~I_DIRTY_TIME; 2381 + flags |= I_DIRTY_TIME; 2382 + } 2383 + spin_unlock(&inode->i_lock); 2384 + } 2385 + 2386 + /* 2378 2387 * Notify the filesystem about the inode being dirtied, so that 2379 2388 * (if needed) it can update on-disk fields and journal the 2380 2389 * inode. This is only needed when the inode itself is being ··· 2397 2378 */ 2398 2379 trace_writeback_dirty_inode_start(inode, flags); 2399 2380 if (sb->s_op->dirty_inode) 2400 - sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE); 2381 + sb->s_op->dirty_inode(inode, 2382 + flags & (I_DIRTY_INODE | I_DIRTY_TIME)); 2401 2383 trace_writeback_dirty_inode(inode, flags); 2402 2384 2403 2385 /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ ··· 2419 2399 */ 2420 2400 smp_mb(); 2421 2401 2422 - if (((inode->i_state & flags) == flags) || 2423 - (dirtytime && (inode->i_state & I_DIRTY_INODE))) 2402 + if ((inode->i_state & flags) == flags) 2424 2403 return; 2425 2404 2426 2405 spin_lock(&inode->i_lock); 2427 - if (dirtytime && (inode->i_state & I_DIRTY_INODE)) 2428 - goto out_unlock_inode; 2429 2406 if ((inode->i_state & flags) != flags) { 2430 2407 const int was_dirty = inode->i_state & I_DIRTY; 2431 2408 2432 2409 inode_attach_wb(inode, NULL); 2433 2410 2434 - /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ 2435 - if (flags & I_DIRTY_INODE) 2436 - inode->i_state &= ~I_DIRTY_TIME; 2437 2411 inode->i_state |= flags; 2438 2412 2439 2413 /* ··· 2500 2486 out_unlock: 2501 2487 if (wb) 2502 2488 spin_unlock(&wb->list_lock); 2503 - out_unlock_inode: 2504 2489 spin_unlock(&inode->i_lock); 2505 2490 } 2506 2491 EXPORT_SYMBOL(__mark_inode_dirty);
+5 -7
fs/jbd2/commit.c
··· 122 122 { 123 123 struct commit_header *tmp; 124 124 struct buffer_head *bh; 125 - int ret; 126 125 struct timespec64 now; 126 + blk_opf_t write_flags = REQ_OP_WRITE | REQ_SYNC; 127 127 128 128 *cbh = NULL; 129 129 ··· 155 155 156 156 if (journal->j_flags & JBD2_BARRIER && 157 157 !jbd2_has_feature_async_commit(journal)) 158 - ret = submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | 159 - REQ_FUA, bh); 160 - else 161 - ret = submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); 158 + write_flags |= REQ_PREFLUSH | REQ_FUA; 162 159 160 + submit_bh(write_flags, bh); 163 161 *cbh = bh; 164 - return ret; 162 + return 0; 165 163 } 166 164 167 165 /* ··· 568 570 journal->j_running_transaction = NULL; 569 571 start_time = ktime_get(); 570 572 commit_transaction->t_log_start = journal->j_head; 571 - wake_up(&journal->j_wait_transaction_locked); 573 + wake_up_all(&journal->j_wait_transaction_locked); 572 574 write_unlock(&journal->j_state_lock); 573 575 574 576 jbd2_debug(3, "JBD2: commit phase 2a\n");
+12 -7
fs/jbd2/journal.c
··· 923 923 for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) { 924 924 bh = journal->j_fc_wbuf[i]; 925 925 wait_on_buffer(bh); 926 + /* 927 + * Update j_fc_off so jbd2_fc_release_bufs can release remain 928 + * buffer head. 929 + */ 930 + if (unlikely(!buffer_uptodate(bh))) { 931 + journal->j_fc_off = i + 1; 932 + return -EIO; 933 + } 926 934 put_bh(bh); 927 935 journal->j_fc_wbuf[i] = NULL; 928 - if (unlikely(!buffer_uptodate(bh))) 929 - return -EIO; 930 936 } 931 937 932 938 return 0; ··· 1612 1606 { 1613 1607 struct buffer_head *bh = journal->j_sb_buffer; 1614 1608 journal_superblock_t *sb = journal->j_superblock; 1615 - int ret; 1609 + int ret = 0; 1616 1610 1617 1611 /* Buffer got discarded which means block device got invalidated */ 1618 1612 if (!buffer_mapped(bh)) { ··· 1642 1636 sb->s_checksum = jbd2_superblock_csum(journal, sb); 1643 1637 get_bh(bh); 1644 1638 bh->b_end_io = end_buffer_write_sync; 1645 - ret = submit_bh(REQ_OP_WRITE | write_flags, bh); 1639 + submit_bh(REQ_OP_WRITE | write_flags, bh); 1646 1640 wait_on_buffer(bh); 1647 1641 if (buffer_write_io_error(bh)) { 1648 1642 clear_buffer_write_io_error(bh); ··· 1650 1644 ret = -EIO; 1651 1645 } 1652 1646 if (ret) { 1653 - printk(KERN_ERR "JBD2: Error %d detected when updating " 1654 - "journal superblock for %s.\n", ret, 1655 - journal->j_devname); 1647 + printk(KERN_ERR "JBD2: I/O error when updating journal superblock for %s.\n", 1648 + journal->j_devname); 1656 1649 if (!is_journal_aborted(journal)) 1657 1650 jbd2_journal_abort(journal, ret); 1658 1651 }
+1
fs/jbd2/recovery.c
··· 256 256 err = journal->j_fc_replay_callback(journal, bh, pass, 257 257 next_fc_block - journal->j_fc_first, 258 258 expected_commit_id); 259 + brelse(bh); 259 260 next_fc_block++; 260 261 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 261 262 break;
+3 -3
fs/jbd2/transaction.c
··· 168 168 int need_to_start; 169 169 tid_t tid = journal->j_running_transaction->t_tid; 170 170 171 - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 171 + prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, 172 172 TASK_UNINTERRUPTIBLE); 173 173 need_to_start = !tid_geq(journal->j_commit_request, tid); 174 174 read_unlock(&journal->j_state_lock); ··· 194 194 read_unlock(&journal->j_state_lock); 195 195 return; 196 196 } 197 - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 197 + prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, 198 198 TASK_UNINTERRUPTIBLE); 199 199 read_unlock(&journal->j_state_lock); 200 200 /* ··· 920 920 write_lock(&journal->j_state_lock); 921 921 --journal->j_barrier_count; 922 922 write_unlock(&journal->j_state_lock); 923 - wake_up(&journal->j_wait_transaction_locked); 923 + wake_up_all(&journal->j_wait_transaction_locked); 924 924 } 925 925 926 926 static void warn_dirty_buffer(struct buffer_head *bh)
+10 -7
fs/mbcache.c
··· 90 90 return -ENOMEM; 91 91 92 92 INIT_LIST_HEAD(&entry->e_list); 93 - /* Initial hash reference */ 94 - atomic_set(&entry->e_refcnt, 1); 93 + /* 94 + * We create entry with two references. One reference is kept by the 95 + * hash table, the other reference is used to protect us from 96 + * mb_cache_entry_delete_or_get() until the entry is fully setup. This 97 + * avoids nesting of cache->c_list_lock into hash table bit locks which 98 + * is problematic for RT. 99 + */ 100 + atomic_set(&entry->e_refcnt, 2); 95 101 entry->e_key = key; 96 102 entry->e_value = value; 97 103 entry->e_reusable = reusable; ··· 112 106 } 113 107 } 114 108 hlist_bl_add_head(&entry->e_hash_list, head); 115 - /* 116 - * Add entry to LRU list before it can be found by 117 - * mb_cache_entry_delete() to avoid races 118 - */ 109 + hlist_bl_unlock(head); 119 110 spin_lock(&cache->c_list_lock); 120 111 list_add_tail(&entry->e_list, &cache->c_list); 121 112 cache->c_entry_count++; 122 113 spin_unlock(&cache->c_list_lock); 123 - hlist_bl_unlock(head); 114 + mb_cache_entry_put(cache, entry); 124 115 125 116 return 0; 126 117 }
+2 -2
fs/ntfs/file.c
··· 527 527 goto out; 528 528 } 529 529 530 - static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) 530 + static inline void ntfs_submit_bh_for_read(struct buffer_head *bh) 531 531 { 532 532 lock_buffer(bh); 533 533 get_bh(bh); 534 534 bh->b_end_io = end_buffer_read_sync; 535 - return submit_bh(REQ_OP_READ, bh); 535 + submit_bh(REQ_OP_READ, bh); 536 536 } 537 537 538 538 /**
+8 -2
fs/xfs/xfs_super.c
··· 653 653 static void 654 654 xfs_fs_dirty_inode( 655 655 struct inode *inode, 656 - int flag) 656 + int flags) 657 657 { 658 658 struct xfs_inode *ip = XFS_I(inode); 659 659 struct xfs_mount *mp = ip->i_mount; ··· 661 661 662 662 if (!(inode->i_sb->s_flags & SB_LAZYTIME)) 663 663 return; 664 - if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) 664 + 665 + /* 666 + * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) 667 + * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed 668 + * in flags possibly together with I_DIRTY_SYNC. 669 + */ 670 + if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) 665 671 return; 666 672 667 673 if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
+1 -1
include/linux/buffer_head.h
··· 240 240 int sync_dirty_buffer(struct buffer_head *bh); 241 241 int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); 242 242 void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); 243 - int submit_bh(blk_opf_t, struct buffer_head *); 243 + void submit_bh(blk_opf_t, struct buffer_head *); 244 244 void write_boundary_block(struct block_device *bdev, 245 245 sector_t bblock, unsigned blocksize); 246 246 int bh_uptodate_or_lock(struct buffer_head *bh);
+5 -4
include/linux/fs.h
··· 2372 2372 * don't have to write inode on fdatasync() when only 2373 2373 * e.g. the timestamps have changed. 2374 2374 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. 2375 - * I_DIRTY_TIME The inode itself only has dirty timestamps, and the 2375 + * I_DIRTY_TIME The inode itself has dirty timestamps, and the 2376 2376 * lazytime mount option is enabled. We keep track of this 2377 2377 * separately from I_DIRTY_SYNC in order to implement 2378 2378 * lazytime. This gets cleared if I_DIRTY_INODE 2379 - * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e. 2380 - * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in 2381 - * i_state, but not both. I_DIRTY_PAGES may still be set. 2379 + * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But 2380 + * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already 2381 + * in place because writeback might already be in progress 2382 + * and we don't want to lose the time update 2382 2383 * I_NEW Serves as both a mutex and completion notification. 2383 2384 * New inodes set I_NEW. If two processes both create 2384 2385 * the same inode, one of them will release its inode and