Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

btrfs: introduce a new shutdown state

A new fs state EMERGENCY_SHUTDOWN is introduced, which is btrfs'
equivalent of XFS_IOC_GOINGDOWN or EXT4_IOC_SHUTDOWN, after entering
emergency shutdown state, all operations will return errors (-EIO), and
can not be bring back to normal state until unmouont.

The new state will reject the following file operations:

- read_iter()
- write_iter()
- mmap()
- open()
- remap_file_range()
- uring_cmd()
- splice_read()
This requires a small wrapper to do the extra shutdown check, then call
the regular filemap_splice_read() function

This should reject most of the file operations on a shutdown btrfs.

And for the existing dirty folios, extra shutdown checks are introduced
to the following functions:

- run_delalloc_nocow()
- run_delalloc_compressed()
- cow_file_range()

So that dirty ranges will still be properly cleaned without being
submitted.

Finally the shutdown state will also set the fs error, so that no new
transaction will be committed, protecting the metadata from any possible
further corruption.

And when the fs entered shutdown mode for the first time, a critical
level kernel message will show up to indicate the incident.

That message will be important for end users as rejected delalloc ranges
will output error messages, hopefully that shutdown message and the fact
that all fs operations are returning error will prevent end users from
getting too confused about the delalloc error messages.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Anand Jain <asj@kernel.org>
Tested-by: Anand Jain <asj@kernel.org>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>

authored by

Qu Wenruo and committed by
David Sterba
9b283945 892794c0

+73 -3
+24 -1
fs/btrfs/file.c
··· 1440 1440 struct btrfs_inode *inode = BTRFS_I(file_inode(file)); 1441 1441 ssize_t num_written, num_sync; 1442 1442 1443 + if (unlikely(btrfs_is_shutdown(inode->root->fs_info))) 1444 + return -EIO; 1443 1445 /* 1444 1446 * If the fs flips readonly due to some impossible error, although we 1445 1447 * have opened a file as writable, we have to stop this write operation ··· 2044 2042 struct file *filp = desc->file; 2045 2043 struct address_space *mapping = filp->f_mapping; 2046 2044 2045 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(filp))))) 2046 + return -EIO; 2047 2047 if (!mapping->a_ops->read_folio) 2048 2048 return -ENOEXEC; 2049 2049 ··· 3115 3111 int blocksize = BTRFS_I(inode)->root->fs_info->sectorsize; 3116 3112 int ret; 3117 3113 3114 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode)))) 3115 + return -EIO; 3116 + 3118 3117 /* Do not allow fallocate in ZONED mode */ 3119 3118 if (btrfs_is_zoned(inode_to_fs_info(inode))) 3120 3119 return -EOPNOTSUPP; ··· 3809 3802 { 3810 3803 int ret; 3811 3804 3805 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode)))) 3806 + return -EIO; 3807 + 3812 3808 filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; 3813 3809 3814 3810 ret = fsverity_file_open(inode, filp); ··· 3824 3814 { 3825 3815 ssize_t ret = 0; 3826 3816 3817 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(iocb->ki_filp))))) 3818 + return -EIO; 3819 + 3827 3820 if (iocb->ki_flags & IOCB_DIRECT) { 3828 3821 ret = btrfs_direct_read(iocb, to); 3829 3822 if (ret < 0 || !iov_iter_count(to) || ··· 3837 3824 return filemap_read(iocb, to, ret); 3838 3825 } 3839 3826 3827 + static ssize_t btrfs_file_splice_read(struct file *in, loff_t *ppos, 3828 + struct pipe_inode_info *pipe, 3829 + size_t len, unsigned int flags) 3830 + { 3831 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(in))))) 3832 + return -EIO; 3833 + 3834 + return filemap_splice_read(in, ppos, pipe, len, flags); 3835 + } 3836 + 3840 3837 const struct file_operations btrfs_file_operations = { 3841 3838 .llseek = btrfs_file_llseek, 3842 3839 .read_iter = btrfs_file_read_iter, 3843 - .splice_read = filemap_splice_read, 3840 + .splice_read = btrfs_file_splice_read, 3844 3841 .write_iter = btrfs_file_write_iter, 3845 3842 .splice_write = iter_file_splice_write, 3846 3843 .mmap_prepare = btrfs_file_mmap_prepare,
+28
fs/btrfs/fs.h
··· 29 29 #include "extent-io-tree.h" 30 30 #include "async-thread.h" 31 31 #include "block-rsv.h" 32 + #include "messages.h" 32 33 33 34 struct inode; 34 35 struct super_block; ··· 124 123 125 124 /* No more delayed iput can be queued. */ 126 125 BTRFS_FS_STATE_NO_DELAYED_IPUT, 126 + 127 + /* 128 + * Emergency shutdown, a step further than transaction aborted by 129 + * rejecting all operations. 130 + */ 131 + BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, 127 132 128 133 BTRFS_FS_STATE_COUNT 129 134 }; ··· 1126 1119 #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \ 1127 1120 (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ 1128 1121 &(fs_info)->fs_state))) 1122 + 1123 + static inline bool btrfs_is_shutdown(struct btrfs_fs_info *fs_info) 1124 + { 1125 + return test_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state); 1126 + } 1127 + 1128 + static inline void btrfs_force_shutdown(struct btrfs_fs_info *fs_info) 1129 + { 1130 + /* 1131 + * Here we do not want to use handle_fs_error(), which will mark the fs 1132 + * read-only. 1133 + * Some call sites like shutdown ioctl will mark the fs shutdown when 1134 + * the fs is frozen. But thaw path will handle RO and RW fs 1135 + * differently. 1136 + * 1137 + * So here we only mark the fs error without flipping it RO. 1138 + */ 1139 + WRITE_ONCE(fs_info->fs_error, -EIO); 1140 + if (!test_and_set_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state)) 1141 + btrfs_crit(fs_info, "emergency shutdown"); 1142 + } 1129 1143 1130 1144 /* 1131 1145 * We use folio flag owner_2 to indicate there is an ordered extent with
+14 -2
fs/btrfs/inode.c
··· 864 864 u64 actual_end; 865 865 u64 i_size; 866 866 int ret = 0; 867 - struct folio **folios; 867 + struct folio **folios = NULL; 868 868 unsigned long nr_folios; 869 869 unsigned long total_compressed = 0; 870 870 unsigned long total_in = 0; ··· 872 872 int i; 873 873 int compress_type = fs_info->compress_type; 874 874 int compress_level = fs_info->compress_level; 875 + 876 + if (unlikely(btrfs_is_shutdown(fs_info))) 877 + goto cleanup_and_bail_uncompressed; 875 878 876 879 inode_should_defrag(inode, start, end, end - start + 1, SZ_16K); 877 880 ··· 1290 1287 unsigned clear_bits; 1291 1288 unsigned long page_ops; 1292 1289 int ret = 0; 1290 + 1291 + if (unlikely(btrfs_is_shutdown(fs_info))) { 1292 + ret = -EIO; 1293 + goto out_unlock; 1294 + } 1293 1295 1294 1296 if (btrfs_is_free_space_inode(inode)) { 1295 1297 ret = -EINVAL; ··· 2014 2006 { 2015 2007 struct btrfs_fs_info *fs_info = inode->root->fs_info; 2016 2008 struct btrfs_root *root = inode->root; 2017 - struct btrfs_path *path; 2009 + struct btrfs_path *path = NULL; 2018 2010 u64 cow_start = (u64)-1; 2019 2011 /* 2020 2012 * If not 0, represents the inclusive end of the last fallback_to_cow() ··· 2044 2036 */ 2045 2037 ASSERT(!btrfs_is_zoned(fs_info) || btrfs_is_data_reloc_root(root)); 2046 2038 2039 + if (unlikely(btrfs_is_shutdown(fs_info))) { 2040 + ret = -EIO; 2041 + goto error; 2042 + } 2047 2043 path = btrfs_alloc_path(); 2048 2044 if (!path) { 2049 2045 ret = -ENOMEM;
+3
fs/btrfs/ioctl.c
··· 5077 5077 5078 5078 int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 5079 5079 { 5080 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(cmd->file))))) 5081 + return -EIO; 5082 + 5080 5083 switch (cmd->cmd_op) { 5081 5084 case BTRFS_IOC_ENCODED_READ: 5082 5085 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+1
fs/btrfs/messages.c
··· 24 24 [BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C', 25 25 [BTRFS_FS_STATE_SKIP_META_CSUMS] = 'S', 26 26 [BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L', 27 + [BTRFS_FS_STATE_EMERGENCY_SHUTDOWN] = 'E', 27 28 }; 28 29 29 30 static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+3
fs/btrfs/reflink.c
··· 868 868 bool same_inode = dst_inode == src_inode; 869 869 int ret; 870 870 871 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(src_file))))) 872 + return -EIO; 873 + 871 874 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 872 875 return -EINVAL; 873 876