Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
"Various bug fixes and cleanups for ext4.

In particular, move the crypto related fucntions from fs/ext4/super.c
into a new fs/ext4/crypto.c, and fix a number of bugs found by fuzzers
and error injection tools"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits)
ext4: only allow test_dummy_encryption when supported
ext4: fix bug_on in __es_tree_search
ext4: avoid cycles in directory h-tree
ext4: verify dir block before splitting it
ext4: filter out EXT4_FC_REPLAY from on-disk superblock field s_state
ext4: fix bug_on in ext4_writepages
ext4: refactor and move ext4_ioctl_get_encryption_pwsalt()
ext4: cleanup function defs from ext4.h into crypto.c
ext4: move ext4 crypto code to its own file crypto.c
ext4: fix memory leak in parse_apply_sb_mount_options()
ext4: reject the 'commit' option on ext2 filesystems
ext4: remove duplicated #include of dax.h in inode.c
ext4: fix race condition between ext4_write and ext4_convert_inline_data
ext4: convert symlink external data block mapping to bdev
ext4: add nowait mode for ext4_getblk()
ext4: fix journal_ioprio mount option handling
ext4: mark group as trimmed only if it was fully scanned
ext4: fix use-after-free in ext4_rename_dir_prepare
ext4: add unmount filesystem message
ext4: remove unnecessary conditionals
...

+564 -429
+1
fs/ext4/Makefile
··· 17 17 ext4-inode-test-objs += inode-test.o 18 18 obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o 19 19 ext4-$(CONFIG_FS_VERITY) += verity.o 20 + ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o
+246
fs/ext4/crypto.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/quotaops.h> 4 + #include <linux/uuid.h> 5 + 6 + #include "ext4.h" 7 + #include "xattr.h" 8 + #include "ext4_jbd2.h" 9 + 10 + static void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, 11 + const struct fscrypt_name *src) 12 + { 13 + memset(dst, 0, sizeof(*dst)); 14 + 15 + dst->usr_fname = src->usr_fname; 16 + dst->disk_name = src->disk_name; 17 + dst->hinfo.hash = src->hash; 18 + dst->hinfo.minor_hash = src->minor_hash; 19 + dst->crypto_buf = src->crypto_buf; 20 + } 21 + 22 + int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, 23 + int lookup, struct ext4_filename *fname) 24 + { 25 + struct fscrypt_name name; 26 + int err; 27 + 28 + err = fscrypt_setup_filename(dir, iname, lookup, &name); 29 + if (err) 30 + return err; 31 + 32 + ext4_fname_from_fscrypt_name(fname, &name); 33 + 34 + #if IS_ENABLED(CONFIG_UNICODE) 35 + err = ext4_fname_setup_ci_filename(dir, iname, fname); 36 + #endif 37 + return err; 38 + } 39 + 40 + int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry, 41 + struct ext4_filename *fname) 42 + { 43 + struct fscrypt_name name; 44 + int err; 45 + 46 + err = fscrypt_prepare_lookup(dir, dentry, &name); 47 + if (err) 48 + return err; 49 + 50 + ext4_fname_from_fscrypt_name(fname, &name); 51 + 52 + #if IS_ENABLED(CONFIG_UNICODE) 53 + err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); 54 + #endif 55 + return err; 56 + } 57 + 58 + void ext4_fname_free_filename(struct ext4_filename *fname) 59 + { 60 + struct fscrypt_name name; 61 + 62 + name.crypto_buf = fname->crypto_buf; 63 + fscrypt_free_filename(&name); 64 + 65 + fname->crypto_buf.name = NULL; 66 + fname->usr_fname = NULL; 67 + fname->disk_name.name = NULL; 68 + 69 + #if IS_ENABLED(CONFIG_UNICODE) 70 + kfree(fname->cf_name.name); 71 + fname->cf_name.name = NULL; 72 + #endif 73 + } 74 + 75 + static bool uuid_is_zero(__u8 u[16]) 76 + { 77 + int i; 78 + 79 + for (i = 0; i < 16; i++) 80 + if (u[i]) 81 + return false; 82 + return true; 83 + } 84 + 85 + int ext4_ioctl_get_encryption_pwsalt(struct file *filp, void __user *arg) 86 + { 87 + struct super_block *sb = file_inode(filp)->i_sb; 88 + struct ext4_sb_info *sbi = EXT4_SB(sb); 89 + int err, err2; 90 + handle_t *handle; 91 + 92 + if (!ext4_has_feature_encrypt(sb)) 93 + return -EOPNOTSUPP; 94 + 95 + if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) { 96 + err = mnt_want_write_file(filp); 97 + if (err) 98 + return err; 99 + handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); 100 + if (IS_ERR(handle)) { 101 + err = PTR_ERR(handle); 102 + goto pwsalt_err_exit; 103 + } 104 + err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, 105 + EXT4_JTR_NONE); 106 + if (err) 107 + goto pwsalt_err_journal; 108 + lock_buffer(sbi->s_sbh); 109 + generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); 110 + ext4_superblock_csum_set(sb); 111 + unlock_buffer(sbi->s_sbh); 112 + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); 113 + pwsalt_err_journal: 114 + err2 = ext4_journal_stop(handle); 115 + if (err2 && !err) 116 + err = err2; 117 + pwsalt_err_exit: 118 + mnt_drop_write_file(filp); 119 + if (err) 120 + return err; 121 + } 122 + 123 + if (copy_to_user(arg, sbi->s_es->s_encrypt_pw_salt, 16)) 124 + return -EFAULT; 125 + return 0; 126 + } 127 + 128 + static int ext4_get_context(struct inode *inode, void *ctx, size_t len) 129 + { 130 + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, 131 + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); 132 + } 133 + 134 + static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, 135 + void *fs_data) 136 + { 137 + handle_t *handle = fs_data; 138 + int res, res2, credits, retries = 0; 139 + 140 + /* 141 + * Encrypting the root directory is not allowed because e2fsck expects 142 + * lost+found to exist and be unencrypted, and encrypting the root 143 + * directory would imply encrypting the lost+found directory as well as 144 + * the filename "lost+found" itself. 145 + */ 146 + if (inode->i_ino == EXT4_ROOT_INO) 147 + return -EPERM; 148 + 149 + if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode))) 150 + return -EINVAL; 151 + 152 + if (ext4_test_inode_flag(inode, EXT4_INODE_DAX)) 153 + return -EOPNOTSUPP; 154 + 155 + res = ext4_convert_inline_data(inode); 156 + if (res) 157 + return res; 158 + 159 + /* 160 + * If a journal handle was specified, then the encryption context is 161 + * being set on a new inode via inheritance and is part of a larger 162 + * transaction to create the inode. Otherwise the encryption context is 163 + * being set on an existing inode in its own transaction. Only in the 164 + * latter case should the "retry on ENOSPC" logic be used. 165 + */ 166 + 167 + if (handle) { 168 + res = ext4_xattr_set_handle(handle, inode, 169 + EXT4_XATTR_INDEX_ENCRYPTION, 170 + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, 171 + ctx, len, 0); 172 + if (!res) { 173 + ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); 174 + ext4_clear_inode_state(inode, 175 + EXT4_STATE_MAY_INLINE_DATA); 176 + /* 177 + * Update inode->i_flags - S_ENCRYPTED will be enabled, 178 + * S_DAX may be disabled 179 + */ 180 + ext4_set_inode_flags(inode, false); 181 + } 182 + return res; 183 + } 184 + 185 + res = dquot_initialize(inode); 186 + if (res) 187 + return res; 188 + retry: 189 + res = ext4_xattr_set_credits(inode, len, false /* is_create */, 190 + &credits); 191 + if (res) 192 + return res; 193 + 194 + handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); 195 + if (IS_ERR(handle)) 196 + return PTR_ERR(handle); 197 + 198 + res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, 199 + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, 200 + ctx, len, 0); 201 + if (!res) { 202 + ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); 203 + /* 204 + * Update inode->i_flags - S_ENCRYPTED will be enabled, 205 + * S_DAX may be disabled 206 + */ 207 + ext4_set_inode_flags(inode, false); 208 + res = ext4_mark_inode_dirty(handle, inode); 209 + if (res) 210 + EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); 211 + } 212 + res2 = ext4_journal_stop(handle); 213 + 214 + if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 215 + goto retry; 216 + if (!res) 217 + res = res2; 218 + return res; 219 + } 220 + 221 + static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb) 222 + { 223 + return EXT4_SB(sb)->s_dummy_enc_policy.policy; 224 + } 225 + 226 + static bool ext4_has_stable_inodes(struct super_block *sb) 227 + { 228 + return ext4_has_feature_stable_inodes(sb); 229 + } 230 + 231 + static void ext4_get_ino_and_lblk_bits(struct super_block *sb, 232 + int *ino_bits_ret, int *lblk_bits_ret) 233 + { 234 + *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); 235 + *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); 236 + } 237 + 238 + const struct fscrypt_operations ext4_cryptops = { 239 + .key_prefix = "ext4:", 240 + .get_context = ext4_get_context, 241 + .set_context = ext4_set_context, 242 + .get_dummy_policy = ext4_get_dummy_policy, 243 + .empty_dir = ext4_empty_dir, 244 + .has_stable_inodes = ext4_has_stable_inodes, 245 + .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, 246 + };
+3 -3
fs/ext4/dir.c
··· 412 412 }; 413 413 414 414 /* 415 - * This functoin implements a non-recursive way of freeing all of the 415 + * This function implements a non-recursive way of freeing all of the 416 416 * nodes in the red-black tree. 417 417 */ 418 418 static void free_rb_tree_fname(struct rb_root *root) ··· 515 515 516 516 /* 517 517 * This is a helper function for ext4_dx_readdir. It calls filldir 518 - * for all entres on the fname linked list. (Normally there is only 518 + * for all entries on the fname linked list. (Normally there is only 519 519 * one entry on the linked list, unless there are 62 bit hash collisions.) 520 520 */ 521 521 static int call_filldir(struct file *file, struct dir_context *ctx, ··· 648 648 unsigned int offset = 0; 649 649 char *top; 650 650 651 - de = (struct ext4_dir_entry_2 *)buf; 651 + de = buf; 652 652 top = buf + buf_size; 653 653 while ((char *) de < top) { 654 654 if (ext4_check_dir_entry(dir, NULL, de, bh,
+16 -68
fs/ext4/ext4.h
··· 673 673 /* Caller will submit data before dropping transaction handle. This 674 674 * allows jbd2 to avoid submitting data before commit. */ 675 675 #define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400 676 + /* Caller is in the atomic contex, find extent if it has been cached */ 677 + #define EXT4_GET_BLOCKS_CACHED_NOWAIT 0x0800 676 678 677 679 /* 678 680 * The bit position of these flags must not overlap with any of the ··· 1441 1439 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) 1442 1440 1443 1441 #ifdef __KERNEL__ 1444 - 1445 - #ifdef CONFIG_FS_ENCRYPTION 1446 - #define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL) 1447 - #else 1448 - #define DUMMY_ENCRYPTION_ENABLED(sbi) (0) 1449 - #endif 1450 1442 1451 1443 /* Number of quota types we support */ 1452 1444 #define EXT4_MAXQUOTAS 3 ··· 2727 2731 struct ext4_filename *fname); 2728 2732 #endif 2729 2733 2734 + /* ext4 encryption related stuff goes here crypto.c */ 2730 2735 #ifdef CONFIG_FS_ENCRYPTION 2731 - static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, 2732 - const struct fscrypt_name *src) 2733 - { 2734 - memset(dst, 0, sizeof(*dst)); 2736 + extern const struct fscrypt_operations ext4_cryptops; 2735 2737 2736 - dst->usr_fname = src->usr_fname; 2737 - dst->disk_name = src->disk_name; 2738 - dst->hinfo.hash = src->hash; 2739 - dst->hinfo.minor_hash = src->minor_hash; 2740 - dst->crypto_buf = src->crypto_buf; 2741 - } 2738 + int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, 2739 + int lookup, struct ext4_filename *fname); 2742 2740 2743 - static inline int ext4_fname_setup_filename(struct inode *dir, 2744 - const struct qstr *iname, 2745 - int lookup, 2746 - struct ext4_filename *fname) 2747 - { 2748 - struct fscrypt_name name; 2749 - int err; 2741 + int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry, 2742 + struct ext4_filename *fname); 2750 2743 2751 - err = fscrypt_setup_filename(dir, iname, lookup, &name); 2752 - if (err) 2753 - return err; 2744 + void ext4_fname_free_filename(struct ext4_filename *fname); 2754 2745 2755 - ext4_fname_from_fscrypt_name(fname, &name); 2746 + int ext4_ioctl_get_encryption_pwsalt(struct file *filp, void __user *arg); 2756 2747 2757 - #if IS_ENABLED(CONFIG_UNICODE) 2758 - err = ext4_fname_setup_ci_filename(dir, iname, fname); 2759 - #endif 2760 - return err; 2761 - } 2762 - 2763 - static inline int ext4_fname_prepare_lookup(struct inode *dir, 2764 - struct dentry *dentry, 2765 - struct ext4_filename *fname) 2766 - { 2767 - struct fscrypt_name name; 2768 - int err; 2769 - 2770 - err = fscrypt_prepare_lookup(dir, dentry, &name); 2771 - if (err) 2772 - return err; 2773 - 2774 - ext4_fname_from_fscrypt_name(fname, &name); 2775 - 2776 - #if IS_ENABLED(CONFIG_UNICODE) 2777 - err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); 2778 - #endif 2779 - return err; 2780 - } 2781 - 2782 - static inline void ext4_fname_free_filename(struct ext4_filename *fname) 2783 - { 2784 - struct fscrypt_name name; 2785 - 2786 - name.crypto_buf = fname->crypto_buf; 2787 - fscrypt_free_filename(&name); 2788 - 2789 - fname->crypto_buf.name = NULL; 2790 - fname->usr_fname = NULL; 2791 - fname->disk_name.name = NULL; 2792 - 2793 - #if IS_ENABLED(CONFIG_UNICODE) 2794 - kfree(fname->cf_name.name); 2795 - fname->cf_name.name = NULL; 2796 - #endif 2797 - } 2798 2748 #else /* !CONFIG_FS_ENCRYPTION */ 2799 2749 static inline int ext4_fname_setup_filename(struct inode *dir, 2800 2750 const struct qstr *iname, ··· 2772 2830 kfree(fname->cf_name.name); 2773 2831 fname->cf_name.name = NULL; 2774 2832 #endif 2833 + } 2834 + 2835 + static inline int ext4_ioctl_get_encryption_pwsalt(struct file *filp, 2836 + void __user *arg) 2837 + { 2838 + return -EOPNOTSUPP; 2775 2839 } 2776 2840 #endif /* !CONFIG_FS_ENCRYPTION */ 2777 2841
+11 -9
fs/ext4/extents.c
··· 372 372 { 373 373 unsigned short entries; 374 374 ext4_lblk_t lblock = 0; 375 - ext4_lblk_t prev = 0; 375 + ext4_lblk_t cur = 0; 376 376 377 377 if (eh->eh_entries == 0) 378 378 return 1; ··· 396 396 397 397 /* Check for overlapping extents */ 398 398 lblock = le32_to_cpu(ext->ee_block); 399 - if ((lblock <= prev) && prev) { 399 + if (lblock < cur) { 400 400 *pblk = ext4_ext_pblock(ext); 401 401 return 0; 402 402 } 403 - prev = lblock + ext4_ext_get_actual_len(ext) - 1; 403 + cur = lblock + ext4_ext_get_actual_len(ext); 404 404 ext++; 405 405 entries--; 406 406 } ··· 420 420 421 421 /* Check for overlapping index extents */ 422 422 lblock = le32_to_cpu(ext_idx->ei_block); 423 - if ((lblock <= prev) && prev) { 423 + if (lblock < cur) { 424 424 *pblk = ext4_idx_pblock(ext_idx); 425 425 return 0; 426 426 } 427 427 ext_idx++; 428 428 entries--; 429 - prev = lblock; 429 + cur = lblock + 1; 430 430 } 431 431 } 432 432 return 1; ··· 4693 4693 FALLOC_FL_INSERT_RANGE)) 4694 4694 return -EOPNOTSUPP; 4695 4695 4696 + inode_lock(inode); 4697 + ret = ext4_convert_inline_data(inode); 4698 + inode_unlock(inode); 4699 + if (ret) 4700 + goto exit; 4701 + 4696 4702 if (mode & FALLOC_FL_PUNCH_HOLE) { 4697 4703 ret = ext4_punch_hole(file, offset, len); 4698 4704 goto exit; 4699 4705 } 4700 - 4701 - ret = ext4_convert_inline_data(inode); 4702 - if (ret) 4703 - goto exit; 4704 4706 4705 4707 if (mode & FALLOC_FL_COLLAPSE_RANGE) { 4706 4708 ret = ext4_collapse_range(file, offset, len);
+6 -7
fs/ext4/fast_commit.c
··· 970 970 /* Submit data for all the fast commit inodes */ 971 971 static int ext4_fc_submit_inode_data_all(journal_t *journal) 972 972 { 973 - struct super_block *sb = (struct super_block *)(journal->j_private); 973 + struct super_block *sb = journal->j_private; 974 974 struct ext4_sb_info *sbi = EXT4_SB(sb); 975 975 struct ext4_inode_info *ei; 976 976 int ret = 0; ··· 1004 1004 /* Wait for completion of data for all the fast commit inodes */ 1005 1005 static int ext4_fc_wait_inode_data_all(journal_t *journal) 1006 1006 { 1007 - struct super_block *sb = (struct super_block *)(journal->j_private); 1007 + struct super_block *sb = journal->j_private; 1008 1008 struct ext4_sb_info *sbi = EXT4_SB(sb); 1009 1009 struct ext4_inode_info *pos, *n; 1010 1010 int ret = 0; ··· 1031 1031 __acquires(&sbi->s_fc_lock) 1032 1032 __releases(&sbi->s_fc_lock) 1033 1033 { 1034 - struct super_block *sb = (struct super_block *)(journal->j_private); 1034 + struct super_block *sb = journal->j_private; 1035 1035 struct ext4_sb_info *sbi = EXT4_SB(sb); 1036 1036 struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; 1037 1037 struct inode *inode; ··· 1093 1093 1094 1094 static int ext4_fc_perform_commit(journal_t *journal) 1095 1095 { 1096 - struct super_block *sb = (struct super_block *)(journal->j_private); 1096 + struct super_block *sb = journal->j_private; 1097 1097 struct ext4_sb_info *sbi = EXT4_SB(sb); 1098 1098 struct ext4_inode_info *iter; 1099 1099 struct ext4_fc_head head; ··· 1198 1198 */ 1199 1199 int ext4_fc_commit(journal_t *journal, tid_t commit_tid) 1200 1200 { 1201 - struct super_block *sb = (struct super_block *)(journal->j_private); 1201 + struct super_block *sb = journal->j_private; 1202 1202 struct ext4_sb_info *sbi = EXT4_SB(sb); 1203 1203 int nblks = 0, ret, bsize = journal->j_blocksize; 1204 1204 int subtid = atomic_read(&sbi->s_fc_subtid); ··· 1659 1659 set_nlink(inode, 1); 1660 1660 ext4_mark_inode_dirty(NULL, inode); 1661 1661 out: 1662 - if (inode) 1663 - iput(inode); 1662 + iput(inode); 1664 1663 return ret; 1665 1664 } 1666 1665
+15 -3
fs/ext4/inline.c
··· 1083 1083 void *limit; 1084 1084 int de_len; 1085 1085 1086 - de = (struct ext4_dir_entry_2 *)de_buf; 1086 + de = de_buf; 1087 1087 if (old_size) { 1088 1088 limit = de_buf + old_size; 1089 1089 do { 1090 1090 prev_de = de; 1091 1091 de_len = ext4_rec_len_from_disk(de->rec_len, old_size); 1092 1092 de_buf += de_len; 1093 - de = (struct ext4_dir_entry_2 *)de_buf; 1093 + de = de_buf; 1094 1094 } while (de_buf < limit); 1095 1095 1096 1096 prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - ··· 1155 1155 * First create "." and ".." and then copy the dir information 1156 1156 * back to the block. 1157 1157 */ 1158 - de = (struct ext4_dir_entry_2 *)target; 1158 + de = target; 1159 1159 de = ext4_init_dot_dotdot(inode, de, 1160 1160 inode->i_sb->s_blocksize, csum_size, 1161 1161 le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1); ··· 2005 2005 if (!ext4_has_inline_data(inode)) { 2006 2006 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 2007 2007 return 0; 2008 + } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 2009 + /* 2010 + * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is 2011 + * cleared. This means we are in the middle of moving of 2012 + * inline data to delay allocated block. Just force writeout 2013 + * here to finish conversion. 2014 + */ 2015 + error = filemap_flush(inode->i_mapping); 2016 + if (error) 2017 + return error; 2018 + if (!ext4_has_inline_data(inode)) 2019 + return 0; 2008 2020 } 2009 2021 2010 2022 needed_blocks = ext4_writepage_trans_blocks(inode);
+20 -17
fs/ext4/inode.c
··· 41 41 #include <linux/bitops.h> 42 42 #include <linux/iomap.h> 43 43 #include <linux/iversion.h> 44 - #include <linux/dax.h> 45 44 46 45 #include "ext4_jbd2.h" 47 46 #include "xattr.h" ··· 198 199 */ 199 200 if (inode->i_ino != EXT4_JOURNAL_INO && 200 201 ext4_should_journal_data(inode) && 201 - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && 202 - inode->i_data.nrpages) { 202 + S_ISREG(inode->i_mode) && inode->i_data.nrpages) { 203 203 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 204 204 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 205 205 ··· 543 545 } else { 544 546 BUG(); 545 547 } 548 + 549 + if (flags & EXT4_GET_BLOCKS_CACHED_NOWAIT) 550 + return retval; 546 551 #ifdef ES_AGGRESSIVE_TEST 547 552 ext4_map_blocks_es_recheck(handle, inode, map, 548 553 &orig_map, flags); 549 554 #endif 550 555 goto found; 551 556 } 557 + /* 558 + * In the query cache no-wait mode, nothing we can do more if we 559 + * cannot find extent in the cache. 560 + */ 561 + if (flags & EXT4_GET_BLOCKS_CACHED_NOWAIT) 562 + return 0; 552 563 553 564 /* 554 565 * Try to see if we can get the block without requesting a new ··· 844 837 struct ext4_map_blocks map; 845 838 struct buffer_head *bh; 846 839 int create = map_flags & EXT4_GET_BLOCKS_CREATE; 840 + bool nowait = map_flags & EXT4_GET_BLOCKS_CACHED_NOWAIT; 847 841 int err; 848 842 849 843 ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 850 844 || handle != NULL || create == 0); 845 + ASSERT(create == 0 || !nowait); 851 846 852 847 map.m_lblk = block; 853 848 map.m_len = 1; ··· 859 850 return create ? ERR_PTR(-ENOSPC) : NULL; 860 851 if (err < 0) 861 852 return ERR_PTR(err); 853 + 854 + if (nowait) 855 + return sb_find_get_block(inode->i_sb, map.m_pblk); 862 856 863 857 bh = sb_getblk(inode->i_sb, map.m_pblk); 864 858 if (unlikely(!bh)) ··· 2956 2944 2957 2945 index = pos >> PAGE_SHIFT; 2958 2946 2959 - if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) || 2960 - ext4_verity_in_progress(inode)) { 2947 + if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) { 2961 2948 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 2962 2949 return ext4_write_begin(file, mapping, pos, 2963 2950 len, flags, pagep, fsdata); ··· 3978 3967 3979 3968 trace_ext4_punch_hole(inode, offset, length, 0); 3980 3969 3981 - ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 3982 - if (ext4_has_inline_data(inode)) { 3983 - filemap_invalidate_lock(mapping); 3984 - ret = ext4_convert_inline_data(inode); 3985 - filemap_invalidate_unlock(mapping); 3986 - if (ret) 3987 - return ret; 3988 - } 3989 - 3990 3970 /* 3991 3971 * Write out all dirty pages to avoid race conditions 3992 3972 * Then release them. ··· 4993 4991 } 4994 4992 if (IS_ENCRYPTED(inode)) { 4995 4993 inode->i_op = &ext4_encrypted_symlink_inode_operations; 4996 - ext4_set_aops(inode); 4997 4994 } else if (ext4_inode_is_fast_symlink(inode)) { 4998 4995 inode->i_link = (char *)ei->i_data; 4999 4996 inode->i_op = &ext4_fast_symlink_inode_operations; ··· 5000 4999 sizeof(ei->i_data) - 1); 5001 5000 } else { 5002 5001 inode->i_op = &ext4_symlink_inode_operations; 5003 - ext4_set_aops(inode); 5004 5002 } 5005 - inode_nohighmem(inode); 5006 5003 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 5007 5004 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 5008 5005 inode->i_op = &ext4_special_inode_operations; ··· 5397 5398 if (attr->ia_valid & ATTR_SIZE) { 5398 5399 handle_t *handle; 5399 5400 loff_t oldsize = inode->i_size; 5401 + loff_t old_disksize; 5400 5402 int shrink = (attr->ia_size < inode->i_size); 5401 5403 5402 5404 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { ··· 5469 5469 inode->i_sb->s_blocksize_bits); 5470 5470 5471 5471 down_write(&EXT4_I(inode)->i_data_sem); 5472 + old_disksize = EXT4_I(inode)->i_disksize; 5472 5473 EXT4_I(inode)->i_disksize = attr->ia_size; 5473 5474 rc = ext4_mark_inode_dirty(handle, inode); 5474 5475 if (!error) ··· 5481 5480 */ 5482 5481 if (!error) 5483 5482 i_size_write(inode, attr->ia_size); 5483 + else 5484 + EXT4_I(inode)->i_disksize = old_disksize; 5484 5485 up_write(&EXT4_I(inode)->i_data_sem); 5485 5486 ext4_journal_stop(handle); 5486 5487 if (error)
+2 -57
fs/ext4/ioctl.c
··· 16 16 #include <linux/file.h> 17 17 #include <linux/quotaops.h> 18 18 #include <linux/random.h> 19 - #include <linux/uuid.h> 20 19 #include <linux/uaccess.h> 21 20 #include <linux/delay.h> 22 21 #include <linux/iversion.h> ··· 502 503 iput(inode_bl); 503 504 return err; 504 505 } 505 - 506 - #ifdef CONFIG_FS_ENCRYPTION 507 - static int uuid_is_zero(__u8 u[16]) 508 - { 509 - int i; 510 - 511 - for (i = 0; i < 16; i++) 512 - if (u[i]) 513 - return 0; 514 - return 1; 515 - } 516 - #endif 517 506 518 507 /* 519 508 * If immutable is set and we are not clearing it, we're not allowed to change ··· 1415 1428 return -EOPNOTSUPP; 1416 1429 return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); 1417 1430 1418 - case FS_IOC_GET_ENCRYPTION_PWSALT: { 1419 - #ifdef CONFIG_FS_ENCRYPTION 1420 - int err, err2; 1421 - struct ext4_sb_info *sbi = EXT4_SB(sb); 1422 - handle_t *handle; 1431 + case FS_IOC_GET_ENCRYPTION_PWSALT: 1432 + return ext4_ioctl_get_encryption_pwsalt(filp, (void __user *)arg); 1423 1433 1424 - if (!ext4_has_feature_encrypt(sb)) 1425 - return -EOPNOTSUPP; 1426 - if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) { 1427 - err = mnt_want_write_file(filp); 1428 - if (err) 1429 - return err; 1430 - handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); 1431 - if (IS_ERR(handle)) { 1432 - err = PTR_ERR(handle); 1433 - goto pwsalt_err_exit; 1434 - } 1435 - err = ext4_journal_get_write_access(handle, sb, 1436 - sbi->s_sbh, 1437 - EXT4_JTR_NONE); 1438 - if (err) 1439 - goto pwsalt_err_journal; 1440 - lock_buffer(sbi->s_sbh); 1441 - generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); 1442 - ext4_superblock_csum_set(sb); 1443 - unlock_buffer(sbi->s_sbh); 1444 - err = ext4_handle_dirty_metadata(handle, NULL, 1445 - sbi->s_sbh); 1446 - pwsalt_err_journal: 1447 - err2 = ext4_journal_stop(handle); 1448 - if (err2 && !err) 1449 - err = err2; 1450 - pwsalt_err_exit: 1451 - mnt_drop_write_file(filp); 1452 - if (err) 1453 - return err; 1454 - } 1455 - if (copy_to_user((void __user *) arg, 1456 - sbi->s_es->s_encrypt_pw_salt, 16)) 1457 - return -EFAULT; 1458 - return 0; 1459 - #else 1460 - return -EOPNOTSUPP; 1461 - #endif 1462 - } 1463 1434 case FS_IOC_GET_ENCRYPTION_POLICY: 1464 1435 if (!ext4_has_feature_encrypt(sb)) 1465 1436 return -EOPNOTSUPP;
+14 -11
fs/ext4/mballoc.c
··· 695 695 for (i = 0; i < max; i++) { 696 696 697 697 if (mb_test_bit(i, buddy)) { 698 - /* only single bit in buddy2 may be 1 */ 698 + /* only single bit in buddy2 may be 0 */ 699 699 if (!mb_test_bit(i << 1, buddy2)) { 700 700 MB_CHECK_ASSERT( 701 701 mb_test_bit((i<<1)+1, buddy2)); 702 - } else if (!mb_test_bit((i << 1) + 1, buddy2)) { 703 - MB_CHECK_ASSERT( 704 - mb_test_bit(i << 1, buddy2)); 705 702 } 706 703 continue; 707 704 } ··· 2916 2919 2917 2920 int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) 2918 2921 { 2919 - struct super_block *sb = (struct super_block *)seq->private; 2922 + struct super_block *sb = seq->private; 2920 2923 struct ext4_sb_info *sbi = EXT4_SB(sb); 2921 2924 2922 2925 seq_puts(seq, "mballoc:\n"); ··· 6395 6398 * @start: first group block to examine 6396 6399 * @max: last group block to examine 6397 6400 * @minblocks: minimum extent block count 6401 + * @set_trimmed: set the trimmed flag if at least one block is trimmed 6398 6402 * 6399 6403 * ext4_trim_all_free walks through group's block bitmap searching for free 6400 6404 * extents. When the free extent is found, mark it as used in group buddy ··· 6405 6407 static ext4_grpblk_t 6406 6408 ext4_trim_all_free(struct super_block *sb, ext4_group_t group, 6407 6409 ext4_grpblk_t start, ext4_grpblk_t max, 6408 - ext4_grpblk_t minblocks) 6410 + ext4_grpblk_t minblocks, bool set_trimmed) 6409 6411 { 6410 6412 struct ext4_buddy e4b; 6411 6413 int ret; ··· 6424 6426 if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || 6425 6427 minblocks < EXT4_SB(sb)->s_last_trim_minblks) { 6426 6428 ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); 6427 - if (ret >= 0) 6429 + if (ret >= 0 && set_trimmed) 6428 6430 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); 6429 6431 } else { 6430 6432 ret = 0; ··· 6461 6463 ext4_fsblk_t first_data_blk = 6462 6464 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 6463 6465 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); 6466 + bool whole_group, eof = false; 6464 6467 int ret = 0; 6465 6468 6466 6469 start = range->start >> sb->s_blocksize_bits; ··· 6480 6481 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) 6481 6482 goto out; 6482 6483 } 6483 - if (end >= max_blks) 6484 + if (end >= max_blks - 1) { 6484 6485 end = max_blks - 1; 6486 + eof = true; 6487 + } 6485 6488 if (end <= first_data_blk) 6486 6489 goto out; 6487 6490 if (start < first_data_blk) ··· 6497 6496 6498 6497 /* end now represents the last cluster to discard in this group */ 6499 6498 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; 6499 + whole_group = true; 6500 6500 6501 6501 for (group = first_group; group <= last_group; group++) { 6502 6502 grp = ext4_get_group_info(sb, group); ··· 6514 6512 * change it for the last group, note that last_cluster is 6515 6513 * already computed earlier by ext4_get_group_no_and_offset() 6516 6514 */ 6517 - if (group == last_group) 6515 + if (group == last_group) { 6518 6516 end = last_cluster; 6519 - 6517 + whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1; 6518 + } 6520 6519 if (grp->bb_free >= minlen) { 6521 6520 cnt = ext4_trim_all_free(sb, group, first_cluster, 6522 - end, minlen); 6521 + end, minlen, whole_group); 6523 6522 if (cnt < 0) { 6524 6523 ret = cnt; 6525 6524 break;
+1 -1
fs/ext4/mmp.c
··· 127 127 */ 128 128 static int kmmpd(void *data) 129 129 { 130 - struct super_block *sb = (struct super_block *) data; 130 + struct super_block *sb = data; 131 131 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 132 132 struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh; 133 133 struct mmp_struct *mmp;
+125 -89
fs/ext4/namei.c
··· 277 277 struct dx_hash_info *hinfo, 278 278 struct dx_frame *frame); 279 279 static void dx_release(struct dx_frame *frames); 280 - static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, 281 - unsigned blocksize, struct dx_hash_info *hinfo, 282 - struct dx_map_entry map[]); 280 + static int dx_make_map(struct inode *dir, struct buffer_head *bh, 281 + struct dx_hash_info *hinfo, 282 + struct dx_map_entry *map_tail); 283 283 static void dx_sort_map(struct dx_map_entry *map, unsigned count); 284 284 static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from, 285 285 char *to, struct dx_map_entry *offsets, ··· 777 777 dx_probe(struct ext4_filename *fname, struct inode *dir, 778 778 struct dx_hash_info *hinfo, struct dx_frame *frame_in) 779 779 { 780 - unsigned count, indirect; 780 + unsigned count, indirect, level, i; 781 781 struct dx_entry *at, *entries, *p, *q, *m; 782 782 struct dx_root *root; 783 783 struct dx_frame *frame = frame_in; 784 784 struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); 785 785 u32 hash; 786 + ext4_lblk_t block; 787 + ext4_lblk_t blocks[EXT4_HTREE_LEVEL]; 786 788 787 789 memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); 788 790 frame->bh = ext4_read_dirblock(dir, 0, INDEX); ··· 856 854 } 857 855 858 856 dxtrace(printk("Look up %x", hash)); 857 + level = 0; 858 + blocks[0] = 0; 859 859 while (1) { 860 860 count = dx_get_count(entries); 861 861 if (!count || count > dx_get_limit(entries)) { ··· 886 882 dx_get_block(at))); 887 883 frame->entries = entries; 888 884 frame->at = at; 889 - if (!indirect--) 885 + 886 + block = dx_get_block(at); 887 + for (i = 0; i <= level; i++) { 888 + if (blocks[i] == block) { 889 + ext4_warning_inode(dir, 890 + "dx entry: tree cycle block %u points back to block %u", 891 + blocks[level], block); 892 + goto fail; 893 + } 894 + } 895 + if (++level > indirect) 890 896 return frame; 897 + blocks[level] = block; 891 898 frame++; 892 - frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); 899 + frame->bh = ext4_read_dirblock(dir, block, INDEX); 893 900 if (IS_ERR(frame->bh)) { 894 901 ret_err = (struct dx_frame *) frame->bh; 895 902 frame->bh = NULL; 896 903 goto fail; 897 904 } 905 + 898 906 entries = ((struct dx_node *) frame->bh->b_data)->entries; 899 907 900 908 if (dx_get_limit(entries) != dx_node_limit(dir)) { ··· 1265 1249 * Create map of hash values, offsets, and sizes, stored at end of block. 1266 1250 * Returns number of entries mapped. 1267 1251 */ 1268 - static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, 1269 - unsigned blocksize, struct dx_hash_info *hinfo, 1252 + static int dx_make_map(struct inode *dir, struct buffer_head *bh, 1253 + struct dx_hash_info *hinfo, 1270 1254 struct dx_map_entry *map_tail) 1271 1255 { 1272 1256 int count = 0; 1273 - char *base = (char *) de; 1257 + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data; 1258 + unsigned int buflen = bh->b_size; 1259 + char *base = bh->b_data; 1274 1260 struct dx_hash_info h = *hinfo; 1275 1261 1276 - while ((char *) de < base + blocksize) { 1262 + if (ext4_has_metadata_csum(dir->i_sb)) 1263 + buflen -= sizeof(struct ext4_dir_entry_tail); 1264 + 1265 + while ((char *) de < base + buflen) { 1266 + if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen, 1267 + ((char *)de) - base)) 1268 + return -EFSCORRUPTED; 1277 1269 if (de->name_len && de->inode) { 1278 1270 if (ext4_hash_in_dirent(dir)) 1279 1271 h.hash = EXT4_DIRENT_HASH(de); ··· 1294 1270 count++; 1295 1271 cond_resched(); 1296 1272 } 1297 - /* XXX: do we need to check rec_len == 0 case? -Chris */ 1298 - de = ext4_next_entry(de, blocksize); 1273 + de = ext4_next_entry(de, dir->i_sb->s_blocksize); 1299 1274 } 1300 1275 return count; 1301 1276 } ··· 1966 1943 1967 1944 /* create map in the end of data2 block */ 1968 1945 map = (struct dx_map_entry *) (data2 + blocksize); 1969 - count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1, 1970 - blocksize, hinfo, map); 1946 + count = dx_make_map(dir, *bh, hinfo, map); 1947 + if (count < 0) { 1948 + err = count; 1949 + goto journal_error; 1950 + } 1971 1951 map -= count; 1972 1952 dx_sort_map(map, count); 1973 1953 /* Ensure that neither split block is over half full */ ··· 2057 2031 unsigned int offset = 0; 2058 2032 char *top; 2059 2033 2060 - de = (struct ext4_dir_entry_2 *)buf; 2034 + de = buf; 2061 2035 top = buf + buf_size - reclen; 2062 2036 while ((char *) de <= top) { 2063 2037 if (ext4_check_dir_entry(dir, NULL, de, bh, ··· 2613 2587 2614 2588 i = 0; 2615 2589 pde = NULL; 2616 - de = (struct ext4_dir_entry_2 *)entry_buf; 2590 + de = entry_buf; 2617 2591 while (i < buf_size - csum_size) { 2618 2592 if (ext4_check_dir_entry(dir, NULL, de, bh, 2619 2593 entry_buf, buf_size, i)) ··· 3275 3249 return retval; 3276 3250 } 3277 3251 3252 + static int ext4_init_symlink_block(handle_t *handle, struct inode *inode, 3253 + struct fscrypt_str *disk_link) 3254 + { 3255 + struct buffer_head *bh; 3256 + char *kaddr; 3257 + int err = 0; 3258 + 3259 + bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE); 3260 + if (IS_ERR(bh)) 3261 + return PTR_ERR(bh); 3262 + 3263 + BUFFER_TRACE(bh, "get_write_access"); 3264 + err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); 3265 + if (err) 3266 + goto out; 3267 + 3268 + kaddr = (char *)bh->b_data; 3269 + memcpy(kaddr, disk_link->name, disk_link->len); 3270 + inode->i_size = disk_link->len - 1; 3271 + EXT4_I(inode)->i_disksize = inode->i_size; 3272 + err = ext4_handle_dirty_metadata(handle, inode, bh); 3273 + out: 3274 + brelse(bh); 3275 + return err; 3276 + } 3277 + 3278 3278 static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, 3279 3279 struct dentry *dentry, const char *symname) 3280 3280 { ··· 3309 3257 int err, len = strlen(symname); 3310 3258 int credits; 3311 3259 struct fscrypt_str disk_link; 3260 + int retries = 0; 3312 3261 3313 3262 if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) 3314 3263 return -EIO; ··· 3323 3270 if (err) 3324 3271 return err; 3325 3272 3326 - if ((disk_link.len > EXT4_N_BLOCKS * 4)) { 3327 - /* 3328 - * For non-fast symlinks, we just allocate inode and put it on 3329 - * orphan list in the first transaction => we need bitmap, 3330 - * group descriptor, sb, inode block, quota blocks, and 3331 - * possibly selinux xattr blocks. 3332 - */ 3333 - credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + 3334 - EXT4_XATTR_TRANS_BLOCKS; 3335 - } else { 3336 - /* 3337 - * Fast symlink. We have to add entry to directory 3338 - * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS), 3339 - * allocate new inode (bitmap, group descriptor, inode block, 3340 - * quota blocks, sb is already counted in previous macros). 3341 - */ 3342 - credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 3343 - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; 3344 - } 3345 - 3273 + /* 3274 + * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the 3275 + * directory. +3 for inode, inode bitmap, group descriptor allocation. 3276 + * EXT4_DATA_TRANS_BLOCKS for the data block allocation and 3277 + * modification. 3278 + */ 3279 + credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 3280 + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; 3281 + retry: 3346 3282 inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO, 3347 3283 &dentry->d_name, 0, NULL, 3348 3284 EXT4_HT_DIR, credits); ··· 3339 3297 if (IS_ERR(inode)) { 3340 3298 if (handle) 3341 3299 ext4_journal_stop(handle); 3342 - return PTR_ERR(inode); 3300 + err = PTR_ERR(inode); 3301 + goto out_retry; 3343 3302 } 3344 3303 3345 3304 if (IS_ENCRYPTED(inode)) { ··· 3348 3305 if (err) 3349 3306 goto err_drop_inode; 3350 3307 inode->i_op = &ext4_encrypted_symlink_inode_operations; 3308 + } else { 3309 + if ((disk_link.len > EXT4_N_BLOCKS * 4)) { 3310 + inode->i_op = &ext4_symlink_inode_operations; 3311 + } else { 3312 + inode->i_op = &ext4_fast_symlink_inode_operations; 3313 + inode->i_link = (char *)&EXT4_I(inode)->i_data; 3314 + } 3351 3315 } 3352 3316 3353 3317 if ((disk_link.len > EXT4_N_BLOCKS * 4)) { 3354 - if (!IS_ENCRYPTED(inode)) 3355 - inode->i_op = &ext4_symlink_inode_operations; 3356 - inode_nohighmem(inode); 3357 - ext4_set_aops(inode); 3358 - /* 3359 - * We cannot call page_symlink() with transaction started 3360 - * because it calls into ext4_write_begin() which can wait 3361 - * for transaction commit if we are running out of space 3362 - * and thus we deadlock. So we have to stop transaction now 3363 - * and restart it when symlink contents is written. 3364 - * 3365 - * To keep fs consistent in case of crash, we have to put inode 3366 - * to orphan list in the mean time. 3367 - */ 3368 - drop_nlink(inode); 3369 - err = ext4_orphan_add(handle, inode); 3370 - if (handle) 3371 - ext4_journal_stop(handle); 3372 - handle = NULL; 3373 - if (err) 3374 - goto err_drop_inode; 3375 - err = __page_symlink(inode, disk_link.name, disk_link.len, 1); 3376 - if (err) 3377 - goto err_drop_inode; 3378 - /* 3379 - * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS 3380 - * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified 3381 - */ 3382 - handle = ext4_journal_start(dir, EXT4_HT_DIR, 3383 - EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 3384 - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); 3385 - if (IS_ERR(handle)) { 3386 - err = PTR_ERR(handle); 3387 - handle = NULL; 3388 - goto err_drop_inode; 3389 - } 3390 - set_nlink(inode, 1); 3391 - err = ext4_orphan_del(handle, inode); 3318 + /* alloc symlink block and fill it */ 3319 + err = ext4_init_symlink_block(handle, inode, &disk_link); 3392 3320 if (err) 3393 3321 goto err_drop_inode; 3394 3322 } else { 3395 3323 /* clear the extent format for fast symlink */ 3396 3324 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 3397 - if (!IS_ENCRYPTED(inode)) { 3398 - inode->i_op = &ext4_fast_symlink_inode_operations; 3399 - inode->i_link = (char *)&EXT4_I(inode)->i_data; 3400 - } 3401 3325 memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name, 3402 3326 disk_link.len); 3403 3327 inode->i_size = disk_link.len - 1; 3328 + EXT4_I(inode)->i_disksize = inode->i_size; 3404 3329 } 3405 - EXT4_I(inode)->i_disksize = inode->i_size; 3406 3330 err = ext4_add_nondir(handle, dentry, &inode); 3407 3331 if (handle) 3408 3332 ext4_journal_stop(handle); 3409 - if (inode) 3410 - iput(inode); 3411 - goto out_free_encrypted_link; 3333 + iput(inode); 3334 + goto out_retry; 3412 3335 3413 3336 err_drop_inode: 3337 + clear_nlink(inode); 3338 + ext4_orphan_add(handle, inode); 3339 + unlock_new_inode(inode); 3414 3340 if (handle) 3415 3341 ext4_journal_stop(handle); 3416 - clear_nlink(inode); 3417 - unlock_new_inode(inode); 3418 3342 iput(inode); 3419 - out_free_encrypted_link: 3343 + out_retry: 3344 + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 3345 + goto retry; 3420 3346 if (disk_link.name != (unsigned char *)symname) 3421 3347 kfree(disk_link.name); 3422 3348 return err; ··· 3467 3455 struct buffer_head *bh; 3468 3456 3469 3457 if (!ext4_has_inline_data(inode)) { 3458 + struct ext4_dir_entry_2 *de; 3459 + unsigned int offset; 3460 + 3470 3461 /* The first directory block must not be a hole, so 3471 3462 * treat it as DIRENT_HTREE 3472 3463 */ ··· 3478 3463 *retval = PTR_ERR(bh); 3479 3464 return NULL; 3480 3465 } 3481 - *parent_de = ext4_next_entry( 3482 - (struct ext4_dir_entry_2 *)bh->b_data, 3483 - inode->i_sb->s_blocksize); 3466 + 3467 + de = (struct ext4_dir_entry_2 *) bh->b_data; 3468 + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, 3469 + bh->b_size, 0) || 3470 + le32_to_cpu(de->inode) != inode->i_ino || 3471 + strcmp(".", de->name)) { 3472 + EXT4_ERROR_INODE(inode, "directory missing '.'"); 3473 + brelse(bh); 3474 + *retval = -EFSCORRUPTED; 3475 + return NULL; 3476 + } 3477 + offset = ext4_rec_len_from_disk(de->rec_len, 3478 + inode->i_sb->s_blocksize); 3479 + de = ext4_next_entry(de, inode->i_sb->s_blocksize); 3480 + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, 3481 + bh->b_size, offset) || 3482 + le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { 3483 + EXT4_ERROR_INODE(inode, "directory missing '..'"); 3484 + brelse(bh); 3485 + *retval = -EFSCORRUPTED; 3486 + return NULL; 3487 + } 3488 + *parent_de = de; 3489 + 3484 3490 return bh; 3485 3491 } 3486 3492
+61 -156
fs/ext4/super.c
··· 1211 1211 */ 1212 1212 ext4_unregister_sysfs(sb); 1213 1213 1214 + if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount")) 1215 + ext4_msg(sb, KERN_INFO, "unmounting filesystem."); 1216 + 1214 1217 ext4_unregister_li_request(sb); 1215 1218 ext4_quota_off_umount(sb); 1216 1219 ··· 1400 1397 1401 1398 static void init_once(void *foo) 1402 1399 { 1403 - struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 1400 + struct ext4_inode_info *ei = foo; 1404 1401 1405 1402 INIT_LIST_HEAD(&ei->i_orphan); 1406 1403 init_rwsem(&ei->xattr_sem); ··· 1494 1491 trace_ext4_nfs_commit_metadata(inode); 1495 1492 return ext4_write_inode(inode, &wbc); 1496 1493 } 1497 - 1498 - #ifdef CONFIG_FS_ENCRYPTION 1499 - static int ext4_get_context(struct inode *inode, void *ctx, size_t len) 1500 - { 1501 - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, 1502 - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); 1503 - } 1504 - 1505 - static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, 1506 - void *fs_data) 1507 - { 1508 - handle_t *handle = fs_data; 1509 - int res, res2, credits, retries = 0; 1510 - 1511 - /* 1512 - * Encrypting the root directory is not allowed because e2fsck expects 1513 - * lost+found to exist and be unencrypted, and encrypting the root 1514 - * directory would imply encrypting the lost+found directory as well as 1515 - * the filename "lost+found" itself. 1516 - */ 1517 - if (inode->i_ino == EXT4_ROOT_INO) 1518 - return -EPERM; 1519 - 1520 - if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode))) 1521 - return -EINVAL; 1522 - 1523 - if (ext4_test_inode_flag(inode, EXT4_INODE_DAX)) 1524 - return -EOPNOTSUPP; 1525 - 1526 - res = ext4_convert_inline_data(inode); 1527 - if (res) 1528 - return res; 1529 - 1530 - /* 1531 - * If a journal handle was specified, then the encryption context is 1532 - * being set on a new inode via inheritance and is part of a larger 1533 - * transaction to create the inode. Otherwise the encryption context is 1534 - * being set on an existing inode in its own transaction. Only in the 1535 - * latter case should the "retry on ENOSPC" logic be used. 1536 - */ 1537 - 1538 - if (handle) { 1539 - res = ext4_xattr_set_handle(handle, inode, 1540 - EXT4_XATTR_INDEX_ENCRYPTION, 1541 - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, 1542 - ctx, len, 0); 1543 - if (!res) { 1544 - ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); 1545 - ext4_clear_inode_state(inode, 1546 - EXT4_STATE_MAY_INLINE_DATA); 1547 - /* 1548 - * Update inode->i_flags - S_ENCRYPTED will be enabled, 1549 - * S_DAX may be disabled 1550 - */ 1551 - ext4_set_inode_flags(inode, false); 1552 - } 1553 - return res; 1554 - } 1555 - 1556 - res = dquot_initialize(inode); 1557 - if (res) 1558 - return res; 1559 - retry: 1560 - res = ext4_xattr_set_credits(inode, len, false /* is_create */, 1561 - &credits); 1562 - if (res) 1563 - return res; 1564 - 1565 - handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); 1566 - if (IS_ERR(handle)) 1567 - return PTR_ERR(handle); 1568 - 1569 - res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, 1570 - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, 1571 - ctx, len, 0); 1572 - if (!res) { 1573 - ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); 1574 - /* 1575 - * Update inode->i_flags - S_ENCRYPTED will be enabled, 1576 - * S_DAX may be disabled 1577 - */ 1578 - ext4_set_inode_flags(inode, false); 1579 - res = ext4_mark_inode_dirty(handle, inode); 1580 - if (res) 1581 - EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); 1582 - } 1583 - res2 = ext4_journal_stop(handle); 1584 - 1585 - if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 1586 - goto retry; 1587 - if (!res) 1588 - res = res2; 1589 - return res; 1590 - } 1591 - 1592 - static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb) 1593 - { 1594 - return EXT4_SB(sb)->s_dummy_enc_policy.policy; 1595 - } 1596 - 1597 - static bool ext4_has_stable_inodes(struct super_block *sb) 1598 - { 1599 - return ext4_has_feature_stable_inodes(sb); 1600 - } 1601 - 1602 - static void ext4_get_ino_and_lblk_bits(struct super_block *sb, 1603 - int *ino_bits_ret, int *lblk_bits_ret) 1604 - { 1605 - *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); 1606 - *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); 1607 - } 1608 - 1609 - static const struct fscrypt_operations ext4_cryptops = { 1610 - .key_prefix = "ext4:", 1611 - .get_context = ext4_get_context, 1612 - .set_context = ext4_set_context, 1613 - .get_dummy_policy = ext4_get_dummy_policy, 1614 - .empty_dir = ext4_empty_dir, 1615 - .has_stable_inodes = ext4_has_stable_inodes, 1616 - .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, 1617 - }; 1618 - #endif 1619 1494 1620 1495 #ifdef CONFIG_QUOTA 1621 1496 static const char * const quotatypes[] = INITQFNAMES; ··· 1748 1867 }; 1749 1868 1750 1869 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1751 - #define DEFAULT_MB_OPTIMIZE_SCAN (-1) 1752 1870 1753 1871 static const char deprecated_msg[] = 1754 1872 "Mount option \"%s\" will be removed by %s\n" ··· 1793 1913 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1794 1914 {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, 1795 1915 {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, 1916 + {Opt_commit, 0, MOPT_NO_EXT2}, 1796 1917 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, 1797 1918 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1798 1919 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, ··· 2308 2427 ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; 2309 2428 ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size, 2310 2429 GFP_KERNEL); 2430 + return 0; 2311 2431 #else 2312 2432 ext4_msg(NULL, KERN_WARNING, 2313 - "Test dummy encryption mount option ignored"); 2433 + "test_dummy_encryption option not supported"); 2434 + return -EINVAL; 2314 2435 #endif 2315 - return 0; 2316 2436 case Opt_dax: 2317 2437 case Opt_dax_type: 2318 2438 #ifdef CONFIG_FS_DAX ··· 2507 2625 ret = ext4_apply_options(fc, sb); 2508 2626 2509 2627 out_free: 2510 - kfree(s_ctx); 2511 - kfree(fc); 2628 + if (fc) { 2629 + ext4_fc_free(fc); 2630 + kfree(fc); 2631 + } 2512 2632 kfree(s_mount_opts); 2513 2633 return ret; 2514 2634 } ··· 2670 2786 #endif 2671 2787 } 2672 2788 2789 + static int ext4_check_test_dummy_encryption(const struct fs_context *fc, 2790 + struct super_block *sb) 2791 + { 2792 + #ifdef CONFIG_FS_ENCRYPTION 2793 + const struct ext4_fs_context *ctx = fc->fs_private; 2794 + const struct ext4_sb_info *sbi = EXT4_SB(sb); 2795 + 2796 + if (!(ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION)) 2797 + return 0; 2798 + 2799 + if (!ext4_has_feature_encrypt(sb)) { 2800 + ext4_msg(NULL, KERN_WARNING, 2801 + "test_dummy_encryption requires encrypt feature"); 2802 + return -EINVAL; 2803 + } 2804 + /* 2805 + * This mount option is just for testing, and it's not worthwhile to 2806 + * implement the extra complexity (e.g. RCU protection) that would be 2807 + * needed to allow it to be set or changed during remount. We do allow 2808 + * it to be specified during remount, but only if there is no change. 2809 + */ 2810 + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && 2811 + !sbi->s_dummy_enc_policy.policy) { 2812 + ext4_msg(NULL, KERN_WARNING, 2813 + "Can't set test_dummy_encryption on remount"); 2814 + return -EINVAL; 2815 + } 2816 + #endif /* CONFIG_FS_ENCRYPTION */ 2817 + return 0; 2818 + } 2819 + 2673 2820 static int ext4_check_opt_consistency(struct fs_context *fc, 2674 2821 struct super_block *sb) 2675 2822 { 2676 2823 struct ext4_fs_context *ctx = fc->fs_private; 2677 2824 struct ext4_sb_info *sbi = fc->s_fs_info; 2678 2825 int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; 2826 + int err; 2679 2827 2680 2828 if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { 2681 2829 ext4_msg(NULL, KERN_ERR, ··· 2737 2821 "for blocksize < PAGE_SIZE"); 2738 2822 } 2739 2823 2740 - #ifdef CONFIG_FS_ENCRYPTION 2741 - /* 2742 - * This mount option is just for testing, and it's not worthwhile to 2743 - * implement the extra complexity (e.g. RCU protection) that would be 2744 - * needed to allow it to be set or changed during remount. We do allow 2745 - * it to be specified during remount, but only if there is no change. 2746 - */ 2747 - if ((ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) && 2748 - is_remount && !sbi->s_dummy_enc_policy.policy) { 2749 - ext4_msg(NULL, KERN_WARNING, 2750 - "Can't set test_dummy_encryption on remount"); 2751 - return -1; 2752 - } 2753 - #endif 2824 + err = ext4_check_test_dummy_encryption(fc, sb); 2825 + if (err) 2826 + return err; 2754 2827 2755 2828 if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) { 2756 2829 if (!sbi->s_journal) { ··· 3742 3837 */ 3743 3838 static int ext4_lazyinit_thread(void *arg) 3744 3839 { 3745 - struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; 3840 + struct ext4_lazy_init *eli = arg; 3746 3841 struct list_head *pos, *n; 3747 3842 struct ext4_li_request *elr; 3748 3843 unsigned long next_wakeup, cur; ··· 4314 4409 int silent = fc->sb_flags & SB_SILENT; 4315 4410 4316 4411 /* Set defaults for the variables that will be set during parsing */ 4317 - ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4412 + if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) 4413 + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4318 4414 4319 4415 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 4320 4416 sbi->s_sectors_written_start = ··· 4792 4886 sbi->s_inodes_per_block; 4793 4887 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 4794 4888 sbi->s_sbh = bh; 4795 - sbi->s_mount_state = le16_to_cpu(es->s_state); 4889 + sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY; 4796 4890 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 4797 4891 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 4798 4892 ··· 5183 5277 if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) { 5184 5278 ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity"); 5185 5279 goto failed_mount_wq; 5186 - } 5187 - 5188 - if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && 5189 - !ext4_has_feature_encrypt(sb)) { 5190 - ext4_set_feature_encrypt(sb); 5191 - ext4_commit_super(sb); 5192 5280 } 5193 5281 5194 5282 /* ··· 6172 6272 char *to_free[EXT4_MAXQUOTAS]; 6173 6273 #endif 6174 6274 6175 - ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 6176 6275 6177 6276 /* Store the original options */ 6178 6277 old_sb_flags = sb->s_flags; ··· 6197 6298 } else 6198 6299 old_opts.s_qf_names[i] = NULL; 6199 6300 #endif 6200 - if (sbi->s_journal && sbi->s_journal->j_task->io_context) 6201 - ctx->journal_ioprio = 6202 - sbi->s_journal->j_task->io_context->ioprio; 6301 + if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) { 6302 + if (sbi->s_journal && sbi->s_journal->j_task->io_context) 6303 + ctx->journal_ioprio = 6304 + sbi->s_journal->j_task->io_context->ioprio; 6305 + else 6306 + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 6307 + 6308 + } 6203 6309 6204 6310 ext4_apply_options(fc, sb); 6205 6311 ··· 6345 6441 if (err) 6346 6442 goto restore_opts; 6347 6443 } 6348 - sbi->s_mount_state = le16_to_cpu(es->s_state); 6444 + sbi->s_mount_state = (le16_to_cpu(es->s_state) & 6445 + ~EXT4_FC_REPLAY); 6349 6446 6350 6447 err = ext4_setup_super(sb, es, 0); 6351 6448 if (err)
+43 -8
fs/ext4/symlink.c
··· 27 27 struct inode *inode, 28 28 struct delayed_call *done) 29 29 { 30 - struct page *cpage = NULL; 30 + struct buffer_head *bh = NULL; 31 31 const void *caddr; 32 32 unsigned int max_size; 33 33 const char *paddr; ··· 39 39 caddr = EXT4_I(inode)->i_data; 40 40 max_size = sizeof(EXT4_I(inode)->i_data); 41 41 } else { 42 - cpage = read_mapping_page(inode->i_mapping, 0, NULL); 43 - if (IS_ERR(cpage)) 44 - return ERR_CAST(cpage); 45 - caddr = page_address(cpage); 42 + bh = ext4_bread(NULL, inode, 0, 0); 43 + if (IS_ERR(bh)) 44 + return ERR_CAST(bh); 45 + if (!bh) { 46 + EXT4_ERROR_INODE(inode, "bad symlink."); 47 + return ERR_PTR(-EFSCORRUPTED); 48 + } 49 + caddr = bh->b_data; 46 50 max_size = inode->i_sb->s_blocksize; 47 51 } 48 52 49 53 paddr = fscrypt_get_symlink(inode, caddr, max_size, done); 50 - if (cpage) 51 - put_page(cpage); 54 + brelse(bh); 52 55 return paddr; 53 56 } 54 57 ··· 65 62 return fscrypt_symlink_getattr(path, stat); 66 63 } 67 64 65 + static void ext4_free_link(void *bh) 66 + { 67 + brelse(bh); 68 + } 69 + 70 + static const char *ext4_get_link(struct dentry *dentry, struct inode *inode, 71 + struct delayed_call *callback) 72 + { 73 + struct buffer_head *bh; 74 + 75 + if (!dentry) { 76 + bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT); 77 + if (IS_ERR(bh)) 78 + return ERR_CAST(bh); 79 + if (!bh || !ext4_buffer_uptodate(bh)) 80 + return ERR_PTR(-ECHILD); 81 + } else { 82 + bh = ext4_bread(NULL, inode, 0, 0); 83 + if (IS_ERR(bh)) 84 + return ERR_CAST(bh); 85 + if (!bh) { 86 + EXT4_ERROR_INODE(inode, "bad symlink."); 87 + return ERR_PTR(-EFSCORRUPTED); 88 + } 89 + } 90 + 91 + set_delayed_call(callback, ext4_free_link, bh); 92 + nd_terminate_link(bh->b_data, inode->i_size, 93 + inode->i_sb->s_blocksize - 1); 94 + return bh->b_data; 95 + } 96 + 68 97 const struct inode_operations ext4_encrypted_symlink_inode_operations = { 69 98 .get_link = ext4_encrypted_get_link, 70 99 .setattr = ext4_setattr, ··· 105 70 }; 106 71 107 72 const struct inode_operations ext4_symlink_inode_operations = { 108 - .get_link = page_get_link, 73 + .get_link = ext4_get_link, 109 74 .setattr = ext4_setattr, 110 75 .getattr = ext4_getattr, 111 76 .listxattr = ext4_listxattr,