Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.18-rc1.inode' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs inode updates from Christian Brauner:
"This contains a series I originally wrote and that Eric brought over
the finish line. It moves out the i_crypt_info and i_verity_info
pointers out of 'struct inode' and into the fs-specific part of the
inode.

So now the few filesytems that actually make use of this pay the price
in their own private inode storage instead of forcing it upon every
user of struct inode.

The pointer for the crypt and verity info is simply found by storing
an offset to its address in struct fsverity_operations and struct
fscrypt_operations. This shrinks struct inode by 16 bytes.

I hope to move a lot more out of it in the future so that struct inode
becomes really just about very core stuff that we need, much like
struct dentry and struct file, instead of the dumping ground it has
become over the years.

On top of this are a various changes associated with the ongoing inode
lifetime handling rework that multiple people are pushing forward:

- Stop accessing inode->i_count directly in f2fs and gfs2. They
simply should use the __iget() and iput() helpers

- Make the i_state flags an enum

- Rework the iput() logic

Currently, if we are the last iput, and we have the I_DIRTY_TIME
bit set, we will grab a reference on the inode again and then mark
it dirty and then redo the put. This is to make sure we delay the
time update for as long as possible

We can rework this logic to simply dec i_count if it is not 1, and
if it is do the time update while still holding the i_count
reference

Then we can replace the atomic_dec_and_lock with locking the
->i_lock and doing atomic_dec_and_test, since we did the
atomic_add_unless above

- Add an icount_read() helper and convert everyone that accesses
inode->i_count directly for this purpose to use the helper

- Expand dump_inode() to dump more information about an inode helping
in debugging

- Add some might_sleep() annotations to iput() and associated
helpers"

* tag 'vfs-6.18-rc1.inode' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
fs: add might_sleep() annotation to iput() and more
fs: expand dump_inode()
inode: fix whitespace issues
fs: add an icount_read helper
fs: rework iput logic
fs: make the i_state flags an enum
fs: stop accessing ->i_count directly in f2fs and gfs2
fsverity: check IS_VERITY() in fsverity_cleanup_inode()
fs: remove inode::i_verity_info
btrfs: move verity info pointer to fs-specific part of inode
f2fs: move verity info pointer to fs-specific part of inode
ext4: move verity info pointer to fs-specific part of inode
fsverity: add support for info in fs-specific part of inode
fs: remove inode::i_crypt_info
ceph: move crypt info pointer to fs-specific part of inode
ubifs: move crypt info pointer to fs-specific part of inode
f2fs: move crypt info pointer to fs-specific part of inode
ext4: move crypt info pointer to fs-specific part of inode
fscrypt: add support for info in fs-specific part of inode
fscrypt: replace raw loads of info pointer with helper function

+428 -234
+1 -1
arch/powerpc/platforms/cell/spufs/file.c
··· 1430 1430 if (ctx->owner != current->mm) 1431 1431 return -EINVAL; 1432 1432 1433 - if (atomic_read(&inode->i_count) != 1) 1433 + if (icount_read(inode) != 1) 1434 1434 return -EBUSY; 1435 1435 1436 1436 mutex_lock(&ctx->mapping_lock);
+5
fs/btrfs/btrfs_inode.h
··· 338 338 struct list_head delayed_iput; 339 339 340 340 struct rw_semaphore i_mmap_lock; 341 + 342 + #ifdef CONFIG_FS_VERITY 343 + struct fsverity_info *i_verity_info; 344 + #endif 345 + 341 346 struct inode vfs_inode; 342 347 }; 343 348
+4 -1
fs/btrfs/inode.c
··· 4556 4556 4557 4557 inode = btrfs_find_first_inode(root, min_ino); 4558 4558 while (inode) { 4559 - if (atomic_read(&inode->vfs_inode.i_count) > 1) 4559 + if (icount_read(&inode->vfs_inode) > 1) 4560 4560 d_prune_aliases(&inode->vfs_inode); 4561 4561 4562 4562 min_ino = btrfs_ino(inode) + 1; ··· 7981 7981 struct btrfs_inode *ei = foo; 7982 7982 7983 7983 inode_init_once(&ei->vfs_inode); 7984 + #ifdef CONFIG_FS_VERITY 7985 + ei->i_verity_info = NULL; 7986 + #endif 7984 7987 } 7985 7988 7986 7989 void __cold btrfs_destroy_cachep(void)
+2
fs/btrfs/verity.c
··· 802 802 } 803 803 804 804 const struct fsverity_operations btrfs_verityops = { 805 + .inode_info_offs = (int)offsetof(struct btrfs_inode, i_verity_info) - 806 + (int)offsetof(struct btrfs_inode, vfs_inode), 805 807 .begin_enable_verity = btrfs_begin_enable_verity, 806 808 .end_enable_verity = btrfs_end_enable_verity, 807 809 .get_verity_descriptor = btrfs_get_verity_descriptor,
+2
fs/ceph/crypto.c
··· 133 133 } 134 134 135 135 static struct fscrypt_operations ceph_fscrypt_ops = { 136 + .inode_info_offs = (int)offsetof(struct ceph_inode_info, i_crypt_info) - 137 + (int)offsetof(struct ceph_inode_info, netfs.inode), 136 138 .needs_bounce_pages = 1, 137 139 .get_context = ceph_crypt_get_context, 138 140 .set_context = ceph_crypt_set_context,
+1
fs/ceph/inode.c
··· 711 711 ci->i_work_mask = 0; 712 712 memset(&ci->i_btime, '\0', sizeof(ci->i_btime)); 713 713 #ifdef CONFIG_FS_ENCRYPTION 714 + ci->i_crypt_info = NULL; 714 715 ci->fscrypt_auth = NULL; 715 716 ci->fscrypt_auth_len = 0; 716 717 #endif
+1 -1
fs/ceph/mds_client.c
··· 2221 2221 int count; 2222 2222 dput(dentry); 2223 2223 d_prune_aliases(inode); 2224 - count = atomic_read(&inode->i_count); 2224 + count = icount_read(inode); 2225 2225 if (count == 1) 2226 2226 (*remaining)--; 2227 2227 doutc(cl, "%p %llx.%llx cap %p pruned, count now %d\n",
+1
fs/ceph/super.h
··· 463 463 unsigned long i_work_mask; 464 464 465 465 #ifdef CONFIG_FS_ENCRYPTION 466 + struct fscrypt_inode_info *i_crypt_info; 466 467 u32 fscrypt_auth_len; 467 468 u32 fscrypt_file_len; 468 469 u8 *fscrypt_auth;
+1 -1
fs/crypto/bio.c
··· 113 113 int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, 114 114 sector_t pblk, unsigned int len) 115 115 { 116 - const struct fscrypt_inode_info *ci = inode->i_crypt_info; 116 + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); 117 117 const unsigned int du_bits = ci->ci_data_unit_bits; 118 118 const unsigned int du_size = 1U << du_bits; 119 119 const unsigned int du_per_page_bits = PAGE_SHIFT - du_bits;
+8 -6
fs/crypto/crypto.c
··· 173 173 size_t len, size_t offs, gfp_t gfp_flags) 174 174 { 175 175 const struct inode *inode = folio->mapping->host; 176 - const struct fscrypt_inode_info *ci = inode->i_crypt_info; 176 + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); 177 177 const unsigned int du_bits = ci->ci_data_unit_bits; 178 178 const unsigned int du_size = 1U << du_bits; 179 179 struct page *ciphertext_page; ··· 232 232 { 233 233 if (WARN_ON_ONCE(inode->i_sb->s_cop->supports_subblock_data_units)) 234 234 return -EOPNOTSUPP; 235 - return fscrypt_crypt_data_unit(inode->i_crypt_info, FS_ENCRYPT, 236 - lblk_num, page, page, len, offs); 235 + return fscrypt_crypt_data_unit(fscrypt_get_inode_info_raw(inode), 236 + FS_ENCRYPT, lblk_num, page, page, len, 237 + offs); 237 238 } 238 239 EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); 239 240 ··· 256 255 size_t offs) 257 256 { 258 257 const struct inode *inode = folio->mapping->host; 259 - const struct fscrypt_inode_info *ci = inode->i_crypt_info; 258 + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); 260 259 const unsigned int du_bits = ci->ci_data_unit_bits; 261 260 const unsigned int du_size = 1U << du_bits; 262 261 u64 index = ((u64)folio->index << (PAGE_SHIFT - du_bits)) + ··· 306 305 { 307 306 if (WARN_ON_ONCE(inode->i_sb->s_cop->supports_subblock_data_units)) 308 307 return -EOPNOTSUPP; 309 - return fscrypt_crypt_data_unit(inode->i_crypt_info, FS_DECRYPT, 310 - lblk_num, page, page, len, offs); 308 + return fscrypt_crypt_data_unit(fscrypt_get_inode_info_raw(inode), 309 + FS_DECRYPT, lblk_num, page, page, len, 310 + offs); 311 311 } 312 312 EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); 313 313
+6 -5
fs/crypto/fname.c
··· 94 94 int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, 95 95 u8 *out, unsigned int olen) 96 96 { 97 - const struct fscrypt_inode_info *ci = inode->i_crypt_info; 97 + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); 98 98 struct crypto_sync_skcipher *tfm = ci->ci_enc_key.tfm; 99 99 SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); 100 100 union fscrypt_iv iv; ··· 138 138 const struct fscrypt_str *iname, 139 139 struct fscrypt_str *oname) 140 140 { 141 - const struct fscrypt_inode_info *ci = inode->i_crypt_info; 141 + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); 142 142 struct crypto_sync_skcipher *tfm = ci->ci_enc_key.tfm; 143 143 SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); 144 144 union fscrypt_iv iv; ··· 274 274 bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, 275 275 u32 max_len, u32 *encrypted_len_ret) 276 276 { 277 - return __fscrypt_fname_encrypted_size(&inode->i_crypt_info->ci_policy, 278 - orig_len, max_len, 277 + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); 278 + 279 + return __fscrypt_fname_encrypted_size(&ci->ci_policy, orig_len, max_len, 279 280 encrypted_len_ret); 280 281 } 281 282 EXPORT_SYMBOL_GPL(fscrypt_fname_encrypted_size); ··· 544 543 */ 545 544 u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) 546 545 { 547 - const struct fscrypt_inode_info *ci = dir->i_crypt_info; 546 + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(dir); 548 547 549 548 WARN_ON_ONCE(!ci->ci_dirhash_key_initialized); 550 549
+2 -2
fs/crypto/fscrypt_private.h
··· 249 249 * fscrypt_inode_info - the "encryption key" for an inode 250 250 * 251 251 * When an encrypted file's key is made available, an instance of this struct is 252 - * allocated and stored in ->i_crypt_info. Once created, it remains until the 253 - * inode is evicted. 252 + * allocated and a pointer to it is stored in the file's in-memory inode. Once 253 + * created, it remains until the inode is evicted. 254 254 */ 255 255 struct fscrypt_inode_info { 256 256
+1 -1
fs/crypto/hooks.c
··· 199 199 err = fscrypt_require_key(inode); 200 200 if (err) 201 201 return err; 202 - ci = inode->i_crypt_info; 202 + ci = fscrypt_get_inode_info_raw(inode); 203 203 if (ci->ci_policy.version != FSCRYPT_POLICY_V2) 204 204 return -EINVAL; 205 205 mk = ci->ci_master_key;
+7 -5
fs/crypto/inline_crypt.c
··· 263 263 264 264 bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) 265 265 { 266 - return inode->i_crypt_info->ci_inlinecrypt; 266 + return fscrypt_get_inode_info_raw(inode)->ci_inlinecrypt; 267 267 } 268 268 EXPORT_SYMBOL_GPL(__fscrypt_inode_uses_inline_crypto); 269 269 ··· 307 307 308 308 if (!fscrypt_inode_uses_inline_crypto(inode)) 309 309 return; 310 - ci = inode->i_crypt_info; 310 + ci = fscrypt_get_inode_info_raw(inode); 311 311 312 312 fscrypt_generate_dun(ci, first_lblk, dun); 313 313 bio_crypt_set_ctx(bio, ci->ci_enc_key.blk_key, dun, gfp_mask); ··· 385 385 u64 next_lblk) 386 386 { 387 387 const struct bio_crypt_ctx *bc = bio->bi_crypt_context; 388 + const struct fscrypt_inode_info *ci; 388 389 u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; 389 390 390 391 if (!!bc != fscrypt_inode_uses_inline_crypto(inode)) 391 392 return false; 392 393 if (!bc) 393 394 return true; 395 + ci = fscrypt_get_inode_info_raw(inode); 394 396 395 397 /* 396 398 * Comparing the key pointers is good enough, as all I/O for each key 397 399 * uses the same pointer. I.e., there's currently no need to support 398 400 * merging requests where the keys are the same but the pointers differ. 399 401 */ 400 - if (bc->bc_key != inode->i_crypt_info->ci_enc_key.blk_key) 402 + if (bc->bc_key != ci->ci_enc_key.blk_key) 401 403 return false; 402 404 403 - fscrypt_generate_dun(inode->i_crypt_info, next_lblk, next_dun); 405 + fscrypt_generate_dun(ci, next_lblk, next_dun); 404 406 return bio_crypt_dun_is_contiguous(bc, bio->bi_iter.bi_size, next_dun); 405 407 } 406 408 EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio); ··· 504 502 if (nr_blocks <= 1) 505 503 return nr_blocks; 506 504 507 - ci = inode->i_crypt_info; 505 + ci = fscrypt_get_inode_info_raw(inode); 508 506 if (!(fscrypt_policy_flags(&ci->ci_policy) & 509 507 FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) 510 508 return nr_blocks;
+26 -17
fs/crypto/keysetup.c
··· 642 642 goto out; 643 643 644 644 /* 645 - * For existing inodes, multiple tasks may race to set ->i_crypt_info. 646 - * So use cmpxchg_release(). This pairs with the smp_load_acquire() in 647 - * fscrypt_get_inode_info(). I.e., here we publish ->i_crypt_info with 648 - * a RELEASE barrier so that other tasks can ACQUIRE it. 645 + * For existing inodes, multiple tasks may race to set the inode's 646 + * fscrypt info pointer. So use cmpxchg_release(). This pairs with the 647 + * smp_load_acquire() in fscrypt_get_inode_info(). I.e., publish the 648 + * pointer with a RELEASE barrier so that other tasks can ACQUIRE it. 649 649 */ 650 - if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL) { 650 + if (cmpxchg_release(fscrypt_inode_info_addr(inode), NULL, crypt_info) == 651 + NULL) { 651 652 /* 652 - * We won the race and set ->i_crypt_info to our crypt_info. 653 - * Now link it into the master key's inode list. 653 + * We won the race and set the inode's fscrypt info to our 654 + * crypt_info. Now link it into the master key's inode list. 654 655 */ 655 656 if (mk) { 656 657 crypt_info->ci_master_key = mk; ··· 682 681 * %false unless the operation being performed is needed in 683 682 * order for files (or directories) to be deleted. 684 683 * 685 - * Set up ->i_crypt_info, if it hasn't already been done. 684 + * Set up the inode's encryption key, if it hasn't already been done. 686 685 * 687 - * Note: unless ->i_crypt_info is already set, this isn't %GFP_NOFS-safe. So 686 + * Note: unless the key setup was already done, this isn't %GFP_NOFS-safe. So 688 687 * generally this shouldn't be called from within a filesystem transaction. 689 688 * 690 - * Return: 0 if ->i_crypt_info was set or was already set, *or* if the 691 - * encryption key is unavailable. (Use fscrypt_has_encryption_key() to 689 + * Return: 0 if the key is now set up, *or* if it couldn't be set up because the 690 + * needed master key is absent. (Use fscrypt_has_encryption_key() to 692 691 * distinguish these cases.) Also can return another -errno code. 693 692 */ 694 693 int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported) ··· 742 741 * ->i_ino doesn't need to be set yet. 743 742 * @encrypt_ret: (output) set to %true if the new inode will be encrypted 744 743 * 745 - * If the directory is encrypted, set up its ->i_crypt_info in preparation for 744 + * If the directory is encrypted, set up its encryption key in preparation for 746 745 * encrypting the name of the new file. Also, if the new inode will be 747 - * encrypted, set up its ->i_crypt_info and set *encrypt_ret=true. 746 + * encrypted, set up its encryption key too and set *encrypt_ret=true. 748 747 * 749 748 * This isn't %GFP_NOFS-safe, and therefore it should be called before starting 750 749 * any filesystem transaction to create the inode. For this reason, ->i_ino ··· 753 752 * This doesn't persist the new inode's encryption context. That still needs to 754 753 * be done later by calling fscrypt_set_context(). 755 754 * 756 - * Return: 0 on success, -ENOKEY if the encryption key is missing, or another 757 - * -errno code 755 + * Return: 0 on success, -ENOKEY if a key needs to be set up for @dir or @inode 756 + * but the needed master key is absent, or another -errno code 758 757 */ 759 758 int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, 760 759 bool *encrypt_ret) ··· 801 800 */ 802 801 void fscrypt_put_encryption_info(struct inode *inode) 803 802 { 804 - put_crypt_info(inode->i_crypt_info); 805 - inode->i_crypt_info = NULL; 803 + /* 804 + * Ideally we'd start with a lightweight IS_ENCRYPTED() check here 805 + * before proceeding to retrieve and check the pointer. However, during 806 + * inode creation, the fscrypt_inode_info is set before S_ENCRYPTED. If 807 + * an error occurs, it needs to be cleaned up regardless. 808 + */ 809 + struct fscrypt_inode_info **ci_addr = fscrypt_inode_info_addr(inode); 810 + 811 + put_crypt_info(*ci_addr); 812 + *ci_addr = NULL; 806 813 } 807 814 EXPORT_SYMBOL(fscrypt_put_encryption_info); 808 815
+4 -3
fs/crypto/policy.c
··· 727 727 err = fscrypt_require_key(dir); 728 728 if (err) 729 729 return ERR_PTR(err); 730 - return &dir->i_crypt_info->ci_policy; 730 + return &fscrypt_get_inode_info_raw(dir)->ci_policy; 731 731 } 732 732 733 733 return fscrypt_get_dummy_policy(dir->i_sb); ··· 746 746 */ 747 747 int fscrypt_context_for_new_inode(void *ctx, struct inode *inode) 748 748 { 749 - struct fscrypt_inode_info *ci = inode->i_crypt_info; 749 + struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); 750 750 751 751 BUILD_BUG_ON(sizeof(union fscrypt_context) != 752 752 FSCRYPT_SET_CONTEXT_MAX_SIZE); ··· 771 771 */ 772 772 int fscrypt_set_context(struct inode *inode, void *fs_data) 773 773 { 774 - struct fscrypt_inode_info *ci = inode->i_crypt_info; 774 + struct fscrypt_inode_info *ci; 775 775 union fscrypt_context ctx; 776 776 int ctxsize; 777 777 ··· 783 783 * This may be the first time the inode number is available, so do any 784 784 * delayed key setup that requires the inode number. 785 785 */ 786 + ci = fscrypt_get_inode_info_raw(inode); 786 787 if (ci->ci_policy.version == FSCRYPT_POLICY_V2 && 787 788 (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) 788 789 fscrypt_hash_inode_number(ci, ci->ci_master_key);
+2
fs/ext4/crypto.c
··· 227 227 } 228 228 229 229 const struct fscrypt_operations ext4_cryptops = { 230 + .inode_info_offs = (int)offsetof(struct ext4_inode_info, i_crypt_info) - 231 + (int)offsetof(struct ext4_inode_info, vfs_inode), 230 232 .needs_bounce_pages = 1, 231 233 .has_32bit_inodes = 1, 232 234 .supports_subblock_data_units = 1,
+8
fs/ext4/ext4.h
··· 1182 1182 __u32 i_csum_seed; 1183 1183 1184 1184 kprojid_t i_projid; 1185 + 1186 + #ifdef CONFIG_FS_ENCRYPTION 1187 + struct fscrypt_inode_info *i_crypt_info; 1188 + #endif 1189 + 1190 + #ifdef CONFIG_FS_VERITY 1191 + struct fsverity_info *i_verity_info; 1192 + #endif 1185 1193 }; 1186 1194 1187 1195 /*
+2 -2
fs/ext4/ialloc.c
··· 252 252 "nonexistent device\n", __func__, __LINE__); 253 253 return; 254 254 } 255 - if (atomic_read(&inode->i_count) > 1) { 255 + if (icount_read(inode) > 1) { 256 256 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", 257 257 __func__, __LINE__, inode->i_ino, 258 - atomic_read(&inode->i_count)); 258 + icount_read(inode)); 259 259 return; 260 260 } 261 261 if (inode->i_nlink) {
+6
fs/ext4/super.c
··· 1470 1470 init_rwsem(&ei->i_data_sem); 1471 1471 inode_init_once(&ei->vfs_inode); 1472 1472 ext4_fc_init_inode(&ei->vfs_inode); 1473 + #ifdef CONFIG_FS_ENCRYPTION 1474 + ei->i_crypt_info = NULL; 1475 + #endif 1476 + #ifdef CONFIG_FS_VERITY 1477 + ei->i_verity_info = NULL; 1478 + #endif 1473 1479 } 1474 1480 1475 1481 static int __init init_inodecache(void)
+2
fs/ext4/verity.c
··· 389 389 } 390 390 391 391 const struct fsverity_operations ext4_verityops = { 392 + .inode_info_offs = (int)offsetof(struct ext4_inode_info, i_verity_info) - 393 + (int)offsetof(struct ext4_inode_info, vfs_inode), 392 394 .begin_enable_verity = ext4_begin_enable_verity, 393 395 .end_enable_verity = ext4_end_enable_verity, 394 396 .get_verity_descriptor = ext4_get_verity_descriptor,
+6
fs/f2fs/f2fs.h
··· 907 907 908 908 unsigned int atomic_write_cnt; 909 909 loff_t original_i_size; /* original i_size before atomic write */ 910 + #ifdef CONFIG_FS_ENCRYPTION 911 + struct fscrypt_inode_info *i_crypt_info; /* filesystem encryption info */ 912 + #endif 913 + #ifdef CONFIG_FS_VERITY 914 + struct fsverity_info *i_verity_info; /* filesystem verity info */ 915 + #endif 910 916 }; 911 917 912 918 static inline void get_read_extent_info(struct extent_info *ext,
+11 -3
fs/f2fs/super.c
··· 480 480 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; 481 481 482 482 inode_init_once(&fi->vfs_inode); 483 + #ifdef CONFIG_FS_ENCRYPTION 484 + fi->i_crypt_info = NULL; 485 + #endif 486 + #ifdef CONFIG_FS_VERITY 487 + fi->i_verity_info = NULL; 488 + #endif 483 489 } 484 490 485 491 #ifdef CONFIG_QUOTA ··· 1750 1744 if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) { 1751 1745 if (!inode->i_nlink && !is_bad_inode(inode)) { 1752 1746 /* to avoid evict_inode call simultaneously */ 1753 - atomic_inc(&inode->i_count); 1747 + __iget(inode); 1754 1748 spin_unlock(&inode->i_lock); 1755 1749 1756 1750 /* should remain fi->extent_tree for writepage */ ··· 1769 1763 sb_end_intwrite(inode->i_sb); 1770 1764 1771 1765 spin_lock(&inode->i_lock); 1772 - atomic_dec(&inode->i_count); 1766 + iput(inode); 1773 1767 } 1774 1768 trace_f2fs_drop_inode(inode, 0); 1775 1769 return 0; ··· 3576 3570 } 3577 3571 3578 3572 static const struct fscrypt_operations f2fs_cryptops = { 3573 + .inode_info_offs = (int)offsetof(struct f2fs_inode_info, i_crypt_info) - 3574 + (int)offsetof(struct f2fs_inode_info, vfs_inode), 3579 3575 .needs_bounce_pages = 1, 3580 3576 .has_32bit_inodes = 1, 3581 3577 .supports_subblock_data_units = 1, ··· 3589 3581 .has_stable_inodes = f2fs_has_stable_inodes, 3590 3582 .get_devices = f2fs_get_devices, 3591 3583 }; 3592 - #endif 3584 + #endif /* CONFIG_FS_ENCRYPTION */ 3593 3585 3594 3586 static struct inode *f2fs_nfs_get_inode(struct super_block *sb, 3595 3587 u64 ino, u32 generation)
+2
fs/f2fs/verity.c
··· 287 287 } 288 288 289 289 const struct fsverity_operations f2fs_verityops = { 290 + .inode_info_offs = (int)offsetof(struct f2fs_inode_info, i_verity_info) - 291 + (int)offsetof(struct f2fs_inode_info, vfs_inode), 290 292 .begin_enable_verity = f2fs_begin_enable_verity, 291 293 .end_enable_verity = f2fs_end_enable_verity, 292 294 .get_verity_descriptor = f2fs_get_verity_descriptor,
+1 -1
fs/fs-writeback.c
··· 1767 1767 int ret = 0; 1768 1768 1769 1769 spin_lock(&inode->i_lock); 1770 - if (!atomic_read(&inode->i_count)) 1770 + if (!icount_read(inode)) 1771 1771 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 1772 1772 else 1773 1773 WARN_ON(inode->i_state & I_WILL_FREE);
+1 -1
fs/gfs2/ops_fstype.c
··· 1754 1754 spin_unlock(&inode->i_lock); 1755 1755 continue; 1756 1756 } 1757 - atomic_inc(&inode->i_count); 1757 + __iget(inode); 1758 1758 spin_unlock(&inode->i_lock); 1759 1759 spin_unlock(&sb->s_inode_list_lock); 1760 1760
+1 -1
fs/hpfs/inode.c
··· 184 184 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 185 185 struct inode *parent; 186 186 if (i->i_ino == hpfs_sb(i->i_sb)->sb_root) return; 187 - if (hpfs_inode->i_rddir_off && !atomic_read(&i->i_count)) { 187 + if (hpfs_inode->i_rddir_off && !icount_read(i)) { 188 188 if (*hpfs_inode->i_rddir_off) 189 189 pr_err("write_inode: some position still there\n"); 190 190 kfree(hpfs_inode->i_rddir_off);
+70 -20
fs/inode.c
··· 534 534 { 535 535 if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) 536 536 return; 537 - if (atomic_read(&inode->i_count)) 537 + if (icount_read(inode)) 538 538 return; 539 539 if (!(inode->i_sb->s_flags & SB_ACTIVE)) 540 540 return; ··· 550 550 struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe, 551 551 struct inode *inode, u32 bit) 552 552 { 553 - void *bit_address; 553 + void *bit_address; 554 554 555 - bit_address = inode_state_wait_address(inode, bit); 556 - init_wait_var_entry(wqe, bit_address, 0); 557 - return __var_waitqueue(bit_address); 555 + bit_address = inode_state_wait_address(inode, bit); 556 + init_wait_var_entry(wqe, bit_address, 0); 557 + return __var_waitqueue(bit_address); 558 558 } 559 559 EXPORT_SYMBOL(inode_bit_waitqueue); 560 560 ··· 871 871 again: 872 872 spin_lock(&sb->s_inode_list_lock); 873 873 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 874 - if (atomic_read(&inode->i_count)) 874 + if (icount_read(inode)) 875 875 continue; 876 876 877 877 spin_lock(&inode->i_lock); 878 - if (atomic_read(&inode->i_count)) { 878 + if (icount_read(inode)) { 879 879 spin_unlock(&inode->i_lock); 880 880 continue; 881 881 } ··· 937 937 * unreclaimable for a while. Remove them lazily here; iput, 938 938 * sync, or the last page cache deletion will requeue them. 939 939 */ 940 - if (atomic_read(&inode->i_count) || 940 + if (icount_read(inode) || 941 941 (inode->i_state & ~I_REFERENCED) || 942 942 !mapping_shrinkable(&inode->i_data)) { 943 943 list_lru_isolate(lru, &inode->i_lru); ··· 1279 1279 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); 1280 1280 struct inode *old; 1281 1281 1282 + might_sleep(); 1283 + 1282 1284 again: 1283 1285 spin_lock(&inode_hash_lock); 1284 1286 old = find_inode(inode->i_sb, head, test, data, true); ··· 1384 1382 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1385 1383 struct inode *inode, *new; 1386 1384 1385 + might_sleep(); 1386 + 1387 1387 again: 1388 1388 inode = find_inode(sb, head, test, data, false); 1389 1389 if (inode) { ··· 1426 1422 { 1427 1423 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1428 1424 struct inode *inode; 1425 + 1426 + might_sleep(); 1427 + 1429 1428 again: 1430 1429 inode = find_inode_fast(sb, head, ino, false); 1431 1430 if (inode) { ··· 1612 1605 int (*test)(struct inode *, void *), void *data) 1613 1606 { 1614 1607 struct inode *inode; 1608 + 1609 + might_sleep(); 1610 + 1615 1611 again: 1616 1612 inode = ilookup5_nowait(sb, hashval, test, data); 1617 1613 if (inode) { ··· 1640 1630 { 1641 1631 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1642 1632 struct inode *inode; 1633 + 1634 + might_sleep(); 1635 + 1643 1636 again: 1644 1637 inode = find_inode_fast(sb, head, ino, false); 1645 1638 ··· 1793 1780 ino_t ino = inode->i_ino; 1794 1781 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1795 1782 1783 + might_sleep(); 1784 + 1796 1785 while (1) { 1797 1786 struct inode *old = NULL; 1798 1787 spin_lock(&inode_hash_lock); ··· 1840 1825 int (*test)(struct inode *, void *), void *data) 1841 1826 { 1842 1827 struct inode *old; 1828 + 1829 + might_sleep(); 1843 1830 1844 1831 inode->i_state |= I_CREATING; 1845 1832 old = inode_insert5(inode, hashval, test, NULL, data); ··· 1925 1908 */ 1926 1909 void iput(struct inode *inode) 1927 1910 { 1928 - if (!inode) 1911 + might_sleep(); 1912 + if (unlikely(!inode)) 1929 1913 return; 1930 - BUG_ON(inode->i_state & I_CLEAR); 1914 + 1931 1915 retry: 1932 - if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) { 1933 - if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) { 1934 - atomic_inc(&inode->i_count); 1935 - spin_unlock(&inode->i_lock); 1936 - trace_writeback_lazytime_iput(inode); 1937 - mark_inode_dirty_sync(inode); 1938 - goto retry; 1939 - } 1940 - iput_final(inode); 1916 + lockdep_assert_not_held(&inode->i_lock); 1917 + VFS_BUG_ON_INODE(inode->i_state & I_CLEAR, inode); 1918 + /* 1919 + * Note this assert is technically racy as if the count is bogusly 1920 + * equal to one, then two CPUs racing to further drop it can both 1921 + * conclude it's fine. 1922 + */ 1923 + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) < 1, inode); 1924 + 1925 + if (atomic_add_unless(&inode->i_count, -1, 1)) 1926 + return; 1927 + 1928 + if ((inode->i_state & I_DIRTY_TIME) && inode->i_nlink) { 1929 + trace_writeback_lazytime_iput(inode); 1930 + mark_inode_dirty_sync(inode); 1931 + goto retry; 1941 1932 } 1933 + 1934 + spin_lock(&inode->i_lock); 1935 + if (unlikely((inode->i_state & I_DIRTY_TIME) && inode->i_nlink)) { 1936 + spin_unlock(&inode->i_lock); 1937 + goto retry; 1938 + } 1939 + 1940 + if (!atomic_dec_and_test(&inode->i_count)) { 1941 + spin_unlock(&inode->i_lock); 1942 + return; 1943 + } 1944 + 1945 + /* 1946 + * iput_final() drops ->i_lock, we can't assert on it as the inode may 1947 + * be deallocated by the time the call returns. 1948 + */ 1949 + iput_final(inode); 1942 1950 } 1943 1951 EXPORT_SYMBOL(iput); 1944 1952 ··· 2959 2917 * 2960 2918 * TODO: add a proper inode dumping routine, this is a stub to get debug off the 2961 2919 * ground. 2920 + * 2921 + * TODO: handle getting to fs type with get_kernel_nofault()? 2922 + * See dump_mapping() above. 2962 2923 */ 2963 2924 void dump_inode(struct inode *inode, const char *reason) 2964 2925 { 2965 - pr_warn("%s encountered for inode %px (%s)\n", reason, inode, inode->i_sb->s_type->name); 2926 + struct super_block *sb = inode->i_sb; 2927 + 2928 + pr_warn("%s encountered for inode %px\n" 2929 + "fs %s mode %ho opflags 0x%hx flags 0x%x state 0x%x count %d\n", 2930 + reason, inode, sb->s_type->name, inode->i_mode, inode->i_opflags, 2931 + inode->i_flags, inode->i_state, atomic_read(&inode->i_count)); 2966 2932 } 2967 2933 2968 2934 EXPORT_SYMBOL(dump_inode);
+2 -2
fs/nfs/inode.c
··· 608 608 inode->i_sb->s_id, 609 609 (unsigned long long)NFS_FILEID(inode), 610 610 nfs_display_fhandle_hash(fh), 611 - atomic_read(&inode->i_count)); 611 + icount_read(inode)); 612 612 613 613 out: 614 614 return inode; ··· 2236 2236 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%llx)\n", 2237 2237 __func__, inode->i_sb->s_id, inode->i_ino, 2238 2238 nfs_display_fhandle_hash(NFS_FH(inode)), 2239 - atomic_read(&inode->i_count), fattr->valid); 2239 + icount_read(inode), fattr->valid); 2240 2240 2241 2241 if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { 2242 2242 /* Only a mounted-on-fileid? Just exit */
+1 -1
fs/notify/fsnotify.c
··· 66 66 * removed all zero refcount inodes, in any case. Test to 67 67 * be sure. 68 68 */ 69 - if (!atomic_read(&inode->i_count)) { 69 + if (!icount_read(inode)) { 70 70 spin_unlock(&inode->i_lock); 71 71 continue; 72 72 }
+1 -1
fs/smb/client/inode.c
··· 2844 2844 } 2845 2845 2846 2846 cifs_dbg(FYI, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time %ld jiffies %ld\n", 2847 - full_path, inode, inode->i_count.counter, 2847 + full_path, inode, icount_read(inode), 2848 2848 dentry, cifs_get_time(dentry), jiffies); 2849 2849 2850 2850 again:
+2
fs/ubifs/crypto.c
··· 88 88 } 89 89 90 90 const struct fscrypt_operations ubifs_crypt_operations = { 91 + .inode_info_offs = (int)offsetof(struct ubifs_inode, i_crypt_info) - 92 + (int)offsetof(struct ubifs_inode, vfs_inode), 91 93 .legacy_key_prefix = "ubifs:", 92 94 .get_context = ubifs_crypt_get_context, 93 95 .set_context = ubifs_crypt_set_context,
+1 -1
fs/ubifs/super.c
··· 358 358 goto out; 359 359 360 360 dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); 361 - ubifs_assert(c, !atomic_read(&inode->i_count)); 361 + ubifs_assert(c, !icount_read(inode)); 362 362 363 363 truncate_inode_pages_final(&inode->i_data); 364 364
+4
fs/ubifs/ubifs.h
··· 365 365 * @read_in_a_row: number of consecutive pages read in a row (for bulk read) 366 366 * @data_len: length of the data attached to the inode 367 367 * @data: inode's data 368 + * @i_crypt_info: inode's fscrypt information 368 369 * 369 370 * @ui_mutex exists for two main reasons. At first it prevents inodes from 370 371 * being written back while UBIFS changing them, being in the middle of an VFS ··· 417 416 pgoff_t read_in_a_row; 418 417 int data_len; 419 418 void *data; 419 + #ifdef CONFIG_FS_ENCRYPTION 420 + struct fscrypt_inode_info *i_crypt_info; 421 + #endif 420 422 }; 421 423 422 424 /**
+3 -3
fs/verity/enable.c
··· 284 284 /* Successfully enabled verity */ 285 285 286 286 /* 287 - * Readers can start using ->i_verity_info immediately, so it 288 - * can't be rolled back once set. So don't set it until just 289 - * after the filesystem has successfully enabled verity. 287 + * Readers can start using the inode's verity info immediately, 288 + * so it can't be rolled back once set. So don't set it until 289 + * just after the filesystem has successfully enabled verity. 290 290 */ 291 291 fsverity_set_info(inode, vi); 292 292 }
+5 -4
fs/verity/fsverity_private.h
··· 63 63 * fsverity_info - cached verity metadata for an inode 64 64 * 65 65 * When a verity file is first opened, an instance of this struct is allocated 66 - * and stored in ->i_verity_info; it remains until the inode is evicted. It 67 - * caches information about the Merkle tree that's needed to efficiently verify 68 - * data read from the file. It also caches the file digest. The Merkle tree 69 - * pages themselves are not cached here, but the filesystem may cache them. 66 + * and a pointer to it is stored in the file's in-memory inode. It remains 67 + * until the inode is evicted. It caches information about the Merkle tree 68 + * that's needed to efficiently verify data read from the file. It also caches 69 + * the file digest. The Merkle tree pages themselves are not cached here, but 70 + * the filesystem may cache them. 70 71 */ 71 72 struct fsverity_info { 72 73 struct merkle_tree_params tree_params;
+12 -11
fs/verity/open.c
··· 244 244 void fsverity_set_info(struct inode *inode, struct fsverity_info *vi) 245 245 { 246 246 /* 247 - * Multiple tasks may race to set ->i_verity_info, so use 248 - * cmpxchg_release(). This pairs with the smp_load_acquire() in 249 - * fsverity_get_info(). I.e., here we publish ->i_verity_info with a 250 - * RELEASE barrier so that other tasks can ACQUIRE it. 247 + * Multiple tasks may race to set the inode's verity info pointer, so 248 + * use cmpxchg_release(). This pairs with the smp_load_acquire() in 249 + * fsverity_get_info(). I.e., publish the pointer with a RELEASE 250 + * barrier so that other tasks can ACQUIRE it. 251 251 */ 252 - if (cmpxchg_release(&inode->i_verity_info, NULL, vi) != NULL) { 253 - /* Lost the race, so free the fsverity_info we allocated. */ 252 + if (cmpxchg_release(fsverity_info_addr(inode), NULL, vi) != NULL) { 253 + /* Lost the race, so free the verity info we allocated. */ 254 254 fsverity_free_info(vi); 255 255 /* 256 - * Afterwards, the caller may access ->i_verity_info directly, 257 - * so make sure to ACQUIRE the winning fsverity_info. 256 + * Afterwards, the caller may access the inode's verity info 257 + * directly, so make sure to ACQUIRE the winning verity info. 258 258 */ 259 259 (void)fsverity_get_info(inode); 260 260 } ··· 350 350 return 0; 351 351 } 352 352 353 - /* Ensure the inode has an ->i_verity_info */ 354 353 static int ensure_verity_info(struct inode *inode) 355 354 { 356 355 struct fsverity_info *vi = fsverity_get_info(inode); ··· 394 395 395 396 void __fsverity_cleanup_inode(struct inode *inode) 396 397 { 397 - fsverity_free_info(inode->i_verity_info); 398 - inode->i_verity_info = NULL; 398 + struct fsverity_info **vi_addr = fsverity_info_addr(inode); 399 + 400 + fsverity_free_info(*vi_addr); 401 + *vi_addr = NULL; 399 402 } 400 403 EXPORT_SYMBOL_GPL(__fsverity_cleanup_inode); 401 404
+1 -1
fs/verity/verify.c
··· 245 245 unsigned long max_ra_pages) 246 246 { 247 247 struct inode *inode = data_folio->mapping->host; 248 - struct fsverity_info *vi = inode->i_verity_info; 248 + struct fsverity_info *vi = *fsverity_info_addr(inode); 249 249 const unsigned int block_size = vi->tree_params.block_size; 250 250 u64 pos = (u64)data_folio->index << PAGE_SHIFT; 251 251
+1 -1
fs/xfs/xfs_inode.c
··· 1035 1035 int error = 0; 1036 1036 1037 1037 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 1038 - if (atomic_read(&VFS_I(ip)->i_count)) 1038 + if (icount_read(VFS_I(ip))) 1039 1039 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); 1040 1040 ASSERT(new_size <= XFS_ISIZE(ip)); 1041 1041 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+1 -1
fs/xfs/xfs_trace.h
··· 1152 1152 TP_fast_assign( 1153 1153 __entry->dev = VFS_I(ip)->i_sb->s_dev; 1154 1154 __entry->ino = ip->i_ino; 1155 - __entry->count = atomic_read(&VFS_I(ip)->i_count); 1155 + __entry->count = icount_read(VFS_I(ip)); 1156 1156 __entry->pincount = atomic_read(&ip->i_pincount); 1157 1157 __entry->iflags = ip->i_flags; 1158 1158 __entry->caller_ip = caller_ip;
+124 -122
include/linux/fs.h
··· 72 72 struct seq_file; 73 73 struct workqueue_struct; 74 74 struct iov_iter; 75 - struct fscrypt_inode_info; 76 75 struct fscrypt_operations; 77 - struct fsverity_info; 78 76 struct fsverity_operations; 79 77 struct fsnotify_mark_connector; 80 78 struct fsnotify_sb_info; ··· 667 669 #define IOP_CACHED_LINK 0x0040 668 670 669 671 /* 672 + * Inode state bits. Protected by inode->i_lock 673 + * 674 + * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, 675 + * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. 676 + * 677 + * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, 678 + * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at 679 + * various stages of removing an inode. 680 + * 681 + * Two bits are used for locking and completion notification, I_NEW and I_SYNC. 682 + * 683 + * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on 684 + * fdatasync() (unless I_DIRTY_DATASYNC is also set). 685 + * Timestamp updates are the usual cause. 686 + * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of 687 + * these changes separately from I_DIRTY_SYNC so that we 688 + * don't have to write inode on fdatasync() when only 689 + * e.g. the timestamps have changed. 690 + * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. 691 + * I_DIRTY_TIME The inode itself has dirty timestamps, and the 692 + * lazytime mount option is enabled. We keep track of this 693 + * separately from I_DIRTY_SYNC in order to implement 694 + * lazytime. This gets cleared if I_DIRTY_INODE 695 + * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But 696 + * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already 697 + * in place because writeback might already be in progress 698 + * and we don't want to lose the time update 699 + * I_NEW Serves as both a mutex and completion notification. 700 + * New inodes set I_NEW. If two processes both create 701 + * the same inode, one of them will release its inode and 702 + * wait for I_NEW to be released before returning. 703 + * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can 704 + * also cause waiting on I_NEW, without I_NEW actually 705 + * being set. find_inode() uses this to prevent returning 706 + * nearly-dead inodes. 707 + * I_WILL_FREE Must be set when calling write_inode_now() if i_count 708 + * is zero. I_FREEING must be set when I_WILL_FREE is 709 + * cleared. 710 + * I_FREEING Set when inode is about to be freed but still has dirty 711 + * pages or buffers attached or the inode itself is still 712 + * dirty. 713 + * I_CLEAR Added by clear_inode(). In this state the inode is 714 + * clean and can be destroyed. Inode keeps I_FREEING. 715 + * 716 + * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are 717 + * prohibited for many purposes. iget() must wait for 718 + * the inode to be completely released, then create it 719 + * anew. Other functions will just ignore such inodes, 720 + * if appropriate. I_NEW is used for waiting. 721 + * 722 + * I_SYNC Writeback of inode is running. The bit is set during 723 + * data writeback, and cleared with a wakeup on the bit 724 + * address once it is done. The bit is also used to pin 725 + * the inode in memory for flusher thread. 726 + * 727 + * I_REFERENCED Marks the inode as recently references on the LRU list. 728 + * 729 + * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to 730 + * synchronize competing switching instances and to tell 731 + * wb stat updates to grab the i_pages lock. See 732 + * inode_switch_wbs_work_fn() for details. 733 + * 734 + * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper 735 + * and work dirs among overlayfs mounts. 736 + * 737 + * I_CREATING New object's inode in the middle of setting up. 738 + * 739 + * I_DONTCACHE Evict inode as soon as it is not used anymore. 740 + * 741 + * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. 742 + * Used to detect that mark_inode_dirty() should not move 743 + * inode between dirty lists. 744 + * 745 + * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. 746 + * 747 + * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding 748 + * i_count. 749 + * 750 + * Q: What is the difference between I_WILL_FREE and I_FREEING? 751 + * 752 + * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait 753 + * upon. There's one free address left. 754 + */ 755 + 756 + enum inode_state_bits { 757 + __I_NEW = 0U, 758 + __I_SYNC = 1U, 759 + __I_LRU_ISOLATING = 2U 760 + /* reserved wait address bit 3 */ 761 + }; 762 + 763 + enum inode_state_flags_t { 764 + I_NEW = (1U << __I_NEW), 765 + I_SYNC = (1U << __I_SYNC), 766 + I_LRU_ISOLATING = (1U << __I_LRU_ISOLATING), 767 + /* reserved flag bit 3 */ 768 + I_DIRTY_SYNC = (1U << 4), 769 + I_DIRTY_DATASYNC = (1U << 5), 770 + I_DIRTY_PAGES = (1U << 6), 771 + I_WILL_FREE = (1U << 7), 772 + I_FREEING = (1U << 8), 773 + I_CLEAR = (1U << 9), 774 + I_REFERENCED = (1U << 10), 775 + I_LINKABLE = (1U << 11), 776 + I_DIRTY_TIME = (1U << 12), 777 + I_WB_SWITCH = (1U << 13), 778 + I_OVL_INUSE = (1U << 14), 779 + I_CREATING = (1U << 15), 780 + I_DONTCACHE = (1U << 16), 781 + I_SYNC_QUEUED = (1U << 17), 782 + I_PINNING_NETFS_WB = (1U << 18) 783 + }; 784 + 785 + #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) 786 + #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) 787 + #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) 788 + 789 + /* 670 790 * Keep mostly read-only and often accessed (especially for 671 791 * the RCU path lookup and 'stat' data) fields at the beginning 672 792 * of the 'struct inode' ··· 842 726 #endif 843 727 844 728 /* Misc */ 845 - u32 i_state; 729 + enum inode_state_flags_t i_state; 846 730 /* 32-bit hole */ 847 731 struct rw_semaphore i_rwsem; 848 732 ··· 896 780 __u32 i_fsnotify_mask; /* all events this inode cares about */ 897 781 /* 32-bit hole reserved for expanding i_fsnotify_mask */ 898 782 struct fsnotify_mark_connector __rcu *i_fsnotify_marks; 899 - #endif 900 - 901 - #ifdef CONFIG_FS_ENCRYPTION 902 - struct fscrypt_inode_info *i_crypt_info; 903 - #endif 904 - 905 - #ifdef CONFIG_FS_VERITY 906 - struct fsverity_info *i_verity_info; 907 783 #endif 908 784 909 785 void *i_private; /* fs or device private pointer */ ··· 2600 2492 }; 2601 2493 } 2602 2494 2603 - /* 2604 - * Inode state bits. Protected by inode->i_lock 2605 - * 2606 - * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, 2607 - * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. 2608 - * 2609 - * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, 2610 - * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at 2611 - * various stages of removing an inode. 2612 - * 2613 - * Two bits are used for locking and completion notification, I_NEW and I_SYNC. 2614 - * 2615 - * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on 2616 - * fdatasync() (unless I_DIRTY_DATASYNC is also set). 2617 - * Timestamp updates are the usual cause. 2618 - * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of 2619 - * these changes separately from I_DIRTY_SYNC so that we 2620 - * don't have to write inode on fdatasync() when only 2621 - * e.g. the timestamps have changed. 2622 - * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. 2623 - * I_DIRTY_TIME The inode itself has dirty timestamps, and the 2624 - * lazytime mount option is enabled. We keep track of this 2625 - * separately from I_DIRTY_SYNC in order to implement 2626 - * lazytime. This gets cleared if I_DIRTY_INODE 2627 - * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But 2628 - * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already 2629 - * in place because writeback might already be in progress 2630 - * and we don't want to lose the time update 2631 - * I_NEW Serves as both a mutex and completion notification. 2632 - * New inodes set I_NEW. If two processes both create 2633 - * the same inode, one of them will release its inode and 2634 - * wait for I_NEW to be released before returning. 2635 - * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can 2636 - * also cause waiting on I_NEW, without I_NEW actually 2637 - * being set. find_inode() uses this to prevent returning 2638 - * nearly-dead inodes. 2639 - * I_WILL_FREE Must be set when calling write_inode_now() if i_count 2640 - * is zero. I_FREEING must be set when I_WILL_FREE is 2641 - * cleared. 2642 - * I_FREEING Set when inode is about to be freed but still has dirty 2643 - * pages or buffers attached or the inode itself is still 2644 - * dirty. 2645 - * I_CLEAR Added by clear_inode(). In this state the inode is 2646 - * clean and can be destroyed. Inode keeps I_FREEING. 2647 - * 2648 - * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are 2649 - * prohibited for many purposes. iget() must wait for 2650 - * the inode to be completely released, then create it 2651 - * anew. Other functions will just ignore such inodes, 2652 - * if appropriate. I_NEW is used for waiting. 2653 - * 2654 - * I_SYNC Writeback of inode is running. The bit is set during 2655 - * data writeback, and cleared with a wakeup on the bit 2656 - * address once it is done. The bit is also used to pin 2657 - * the inode in memory for flusher thread. 2658 - * 2659 - * I_REFERENCED Marks the inode as recently references on the LRU list. 2660 - * 2661 - * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to 2662 - * synchronize competing switching instances and to tell 2663 - * wb stat updates to grab the i_pages lock. See 2664 - * inode_switch_wbs_work_fn() for details. 2665 - * 2666 - * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper 2667 - * and work dirs among overlayfs mounts. 2668 - * 2669 - * I_CREATING New object's inode in the middle of setting up. 2670 - * 2671 - * I_DONTCACHE Evict inode as soon as it is not used anymore. 2672 - * 2673 - * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. 2674 - * Used to detect that mark_inode_dirty() should not move 2675 - * inode between dirty lists. 2676 - * 2677 - * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. 2678 - * 2679 - * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding 2680 - * i_count. 2681 - * 2682 - * Q: What is the difference between I_WILL_FREE and I_FREEING? 2683 - * 2684 - * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait 2685 - * upon. There's one free address left. 2686 - */ 2687 - #define __I_NEW 0 2688 - #define I_NEW (1 << __I_NEW) 2689 - #define __I_SYNC 1 2690 - #define I_SYNC (1 << __I_SYNC) 2691 - #define __I_LRU_ISOLATING 2 2692 - #define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) 2693 - 2694 - #define I_DIRTY_SYNC (1 << 3) 2695 - #define I_DIRTY_DATASYNC (1 << 4) 2696 - #define I_DIRTY_PAGES (1 << 5) 2697 - #define I_WILL_FREE (1 << 6) 2698 - #define I_FREEING (1 << 7) 2699 - #define I_CLEAR (1 << 8) 2700 - #define I_REFERENCED (1 << 9) 2701 - #define I_LINKABLE (1 << 10) 2702 - #define I_DIRTY_TIME (1 << 11) 2703 - #define I_WB_SWITCH (1 << 12) 2704 - #define I_OVL_INUSE (1 << 13) 2705 - #define I_CREATING (1 << 14) 2706 - #define I_DONTCACHE (1 << 15) 2707 - #define I_SYNC_QUEUED (1 << 16) 2708 - #define I_PINNING_NETFS_WB (1 << 17) 2709 - 2710 - #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) 2711 - #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) 2712 - #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) 2713 - 2714 2495 extern void __mark_inode_dirty(struct inode *, int); 2715 2496 static inline void mark_inode_dirty(struct inode *inode) 2716 2497 { ··· 2609 2612 static inline void mark_inode_dirty_sync(struct inode *inode) 2610 2613 { 2611 2614 __mark_inode_dirty(inode, I_DIRTY_SYNC); 2615 + } 2616 + 2617 + static inline int icount_read(const struct inode *inode) 2618 + { 2619 + return atomic_read(&inode->i_count); 2612 2620 } 2613 2621 2614 2622 /*
+37 -3
include/linux/fscrypt.h
··· 61 61 62 62 /* Crypto operations for filesystems */ 63 63 struct fscrypt_operations { 64 + /* 65 + * The offset of the pointer to struct fscrypt_inode_info in the 66 + * filesystem-specific part of the inode, relative to the beginning of 67 + * the common part of the inode (the 'struct inode'). 68 + */ 69 + ptrdiff_t inode_info_offs; 64 70 65 71 /* 66 72 * If set, then fs/crypto/ will allocate a global bounce page pool the ··· 201 195 int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, 202 196 struct dentry *dentry, unsigned int flags); 203 197 198 + /* 199 + * Returns the address of the fscrypt info pointer within the 200 + * filesystem-specific part of the inode. (To save memory on filesystems that 201 + * don't support fscrypt, a field in 'struct inode' itself is no longer used.) 202 + */ 203 + static inline struct fscrypt_inode_info ** 204 + fscrypt_inode_info_addr(const struct inode *inode) 205 + { 206 + VFS_WARN_ON_ONCE(inode->i_sb->s_cop->inode_info_offs == 0); 207 + return (void *)inode + inode->i_sb->s_cop->inode_info_offs; 208 + } 209 + 210 + /* 211 + * Load the inode's fscrypt info pointer, using a raw dereference. Since this 212 + * uses a raw dereference with no memory barrier, it is appropriate to use only 213 + * when the caller knows the inode's key setup already happened, resulting in 214 + * non-NULL fscrypt info. E.g., the file contents en/decryption functions use 215 + * this, since fscrypt_file_open() set up the key. 216 + */ 217 + static inline struct fscrypt_inode_info * 218 + fscrypt_get_inode_info_raw(const struct inode *inode) 219 + { 220 + struct fscrypt_inode_info *ci = *fscrypt_inode_info_addr(inode); 221 + 222 + VFS_WARN_ON_ONCE(ci == NULL); 223 + return ci; 224 + } 225 + 204 226 static inline struct fscrypt_inode_info * 205 227 fscrypt_get_inode_info(const struct inode *inode) 206 228 { 207 229 /* 208 230 * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). 209 - * I.e., another task may publish ->i_crypt_info concurrently, executing 210 - * a RELEASE barrier. We need to use smp_load_acquire() here to safely 231 + * I.e., another task may publish the fscrypt info concurrently, 232 + * executing a RELEASE barrier. Use smp_load_acquire() here to safely 211 233 * ACQUIRE the memory the other task published. 212 234 */ 213 - return smp_load_acquire(&inode->i_crypt_info); 235 + return smp_load_acquire(fscrypt_inode_info_addr(inode)); 214 236 } 215 237 216 238 /**
+47 -10
include/linux/fsverity.h
··· 26 26 /* Arbitrary limit to bound the kmalloc() size. Can be changed. */ 27 27 #define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384 28 28 29 + struct fsverity_info; 30 + 29 31 /* Verity operations for filesystems */ 30 32 struct fsverity_operations { 33 + /** 34 + * The offset of the pointer to struct fsverity_info in the 35 + * filesystem-specific part of the inode, relative to the beginning of 36 + * the common part of the inode (the 'struct inode'). 37 + */ 38 + ptrdiff_t inode_info_offs; 31 39 32 40 /** 33 41 * Begin enabling verity on the given file. ··· 132 124 133 125 #ifdef CONFIG_FS_VERITY 134 126 127 + /* 128 + * Returns the address of the verity info pointer within the filesystem-specific 129 + * part of the inode. (To save memory on filesystems that don't support 130 + * fsverity, a field in 'struct inode' itself is no longer used.) 131 + */ 132 + static inline struct fsverity_info ** 133 + fsverity_info_addr(const struct inode *inode) 134 + { 135 + VFS_WARN_ON_ONCE(inode->i_sb->s_vop->inode_info_offs == 0); 136 + return (void *)inode + inode->i_sb->s_vop->inode_info_offs; 137 + } 138 + 135 139 static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) 136 140 { 137 141 /* 138 - * Pairs with the cmpxchg_release() in fsverity_set_info(). 139 - * I.e., another task may publish ->i_verity_info concurrently, 140 - * executing a RELEASE barrier. We need to use smp_load_acquire() here 141 - * to safely ACQUIRE the memory the other task published. 142 + * Since this function can be called on inodes belonging to filesystems 143 + * that don't support fsverity at all, and fsverity_info_addr() doesn't 144 + * work on such filesystems, we have to start with an IS_VERITY() check. 145 + * Checking IS_VERITY() here is also useful to minimize the overhead of 146 + * fsverity_active() on non-verity files. 142 147 */ 143 - return smp_load_acquire(&inode->i_verity_info); 148 + if (!IS_VERITY(inode)) 149 + return NULL; 150 + 151 + /* 152 + * Pairs with the cmpxchg_release() in fsverity_set_info(). I.e., 153 + * another task may publish the inode's verity info concurrently, 154 + * executing a RELEASE barrier. Use smp_load_acquire() here to safely 155 + * ACQUIRE the memory the other task published. 156 + */ 157 + return smp_load_acquire(fsverity_info_addr(inode)); 144 158 } 145 159 146 160 /* enable.c */ ··· 186 156 * fsverity_cleanup_inode() - free the inode's verity info, if present 187 157 * @inode: an inode being evicted 188 158 * 189 - * Filesystems must call this on inode eviction to free ->i_verity_info. 159 + * Filesystems must call this on inode eviction to free the inode's verity info. 190 160 */ 191 161 static inline void fsverity_cleanup_inode(struct inode *inode) 192 162 { 193 - if (inode->i_verity_info) 163 + /* 164 + * Only IS_VERITY() inodes can have verity info, so start by checking 165 + * for IS_VERITY() (which is faster than retrieving the pointer to the 166 + * verity info). This minimizes overhead for non-verity inodes. 167 + */ 168 + if (IS_VERITY(inode)) 194 169 __fsverity_cleanup_inode(inode); 170 + else 171 + VFS_WARN_ON_ONCE(*fsverity_info_addr(inode) != NULL); 195 172 } 196 173 197 174 /* read_metadata.c */ ··· 304 267 * fsverity_active() - do reads from the inode need to go through fs-verity? 305 268 * @inode: inode to check 306 269 * 307 - * This checks whether ->i_verity_info has been set. 270 + * This checks whether the inode's verity info has been set. 308 271 * 309 272 * Filesystems call this from ->readahead() to check whether the pages need to 310 273 * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to 311 274 * a race condition where the file is being read concurrently with 312 - * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.) 275 + * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before the verity info.) 313 276 * 314 277 * Return: true if reads need to go through fs-verity, otherwise false 315 278 */ ··· 324 287 * @filp: the struct file being set up 325 288 * 326 289 * When opening a verity file, deny the open if it is for writing. Otherwise, 327 - * set up the inode's ->i_verity_info if not already done. 290 + * set up the inode's verity info if not already done. 328 291 * 329 292 * When combined with fscrypt, this must be called after fscrypt_file_open(). 330 293 * Otherwise, we won't have the key set up to decrypt the verity metadata.
+1 -1
include/trace/events/filelock.h
··· 190 190 __entry->i_ino = inode->i_ino; 191 191 __entry->wcount = atomic_read(&inode->i_writecount); 192 192 __entry->rcount = atomic_read(&inode->i_readcount); 193 - __entry->icount = atomic_read(&inode->i_count); 193 + __entry->icount = icount_read(inode); 194 194 __entry->owner = fl->c.flc_owner; 195 195 __entry->flags = fl->c.flc_flags; 196 196 __entry->type = fl->c.flc_type;
+1 -1
security/landlock/fs.c
··· 1281 1281 struct landlock_object *object; 1282 1282 1283 1283 /* Only handles referenced inodes. */ 1284 - if (!atomic_read(&inode->i_count)) 1284 + if (!icount_read(inode)) 1285 1285 continue; 1286 1286 1287 1287 /*