Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ext4: move ordered data handling out of ext4_block_do_zero_range()

Remove the handle parameter from ext4_block_do_zero_range() and move the
ordered data handling to ext4_block_zero_eof().

This is necessary for truncate up and append writes across a range
extending beyond EOF. The ordered data must be committed before updating
i_disksize to prevent exposing stale on-disk data from concurrent
post-EOF mmap writes during previous folio writeback or in case of
system crash during append writes.

This is unnecessary for partial block hole punching because the entire
punch operation does not provide atomicity guarantees and can already
expose intermediate results in case of crash.

Hole punching can only ever expose data that was there before the punch
but missed zeroing during append / truncate could expose data that was
not visible in the file before the operation.

Since ordered data handling is no longer performed inside
ext4_zero_partial_blocks(), ext4_punch_hole() no longer needs to attach
jinode.

This is prepared for the conversion to the iomap infrastructure, which
does not use ordered data mode while zeroing post-EOF partial blocks.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260327102939.1095257-6-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Zhang Yi and committed by
Theodore Ts'o
69e2d5c1 ad11526d

+32 -29
+32 -29
fs/ext4/inode.c
··· 4105 4105 return err ? ERR_PTR(err) : NULL; 4106 4106 } 4107 4107 4108 - static int ext4_block_do_zero_range(handle_t *handle, struct inode *inode, 4109 - loff_t from, loff_t length, bool *did_zero) 4108 + static int ext4_block_do_zero_range(struct inode *inode, loff_t from, 4109 + loff_t length, bool *did_zero, 4110 + bool *zero_written) 4110 4111 { 4111 4112 struct buffer_head *bh; 4112 4113 struct folio *folio; 4113 - int err = 0; 4114 4114 4115 4115 bh = ext4_load_tail_bh(inode, from); 4116 4116 if (IS_ERR_OR_NULL(bh)) ··· 4121 4121 BUFFER_TRACE(bh, "zeroed end of block"); 4122 4122 4123 4123 mark_buffer_dirty(bh); 4124 - /* 4125 - * Only the written block requires ordered data to prevent exposing 4126 - * stale data. 4127 - */ 4128 - if (ext4_should_order_data(inode) && 4129 - !buffer_unwritten(bh) && !buffer_delay(bh)) 4130 - err = ext4_jbd2_inode_add_write(handle, inode, from, length); 4131 - if (!err && did_zero) 4124 + if (did_zero) 4132 4125 *did_zero = true; 4126 + if (zero_written && !buffer_unwritten(bh) && !buffer_delay(bh)) 4127 + *zero_written = true; 4133 4128 4134 4129 folio_unlock(folio); 4135 4130 folio_put(folio); 4136 - return err; 4131 + return 0; 4137 4132 } 4138 4133 4139 4134 static int ext4_block_journalled_zero_range(handle_t *handle, ··· 4171 4176 * shortened to end of the block that corresponds to 'from'. 4172 4177 */ 4173 4178 static int ext4_block_zero_range(handle_t *handle, struct inode *inode, 4174 - loff_t from, loff_t length, bool *did_zero) 4179 + loff_t from, loff_t length, bool *did_zero, 4180 + bool *zero_written) 4175 4181 { 4176 4182 unsigned blocksize = inode->i_sb->s_blocksize; 4177 4183 unsigned int max = blocksize - (from & (blocksize - 1)); ··· 4191 4195 return ext4_block_journalled_zero_range(handle, inode, from, 4192 4196 length, did_zero); 4193 4197 } 4194 - return ext4_block_do_zero_range(handle, inode, from, length, did_zero); 4198 + return ext4_block_do_zero_range(inode, from, length, did_zero, 4199 + zero_written); 4195 4200 } 4196 4201 4197 4202 /* ··· 4208 4211 unsigned int blocksize = i_blocksize(inode); 4209 4212 unsigned int offset; 4210 4213 loff_t length = end - from; 4214 + bool did_zero = false; 4215 + bool zero_written = false; 4216 + int err; 4211 4217 4212 4218 offset = from & (blocksize - 1); 4213 4219 if (!offset || from >= end) ··· 4222 4222 if (length > blocksize - offset) 4223 4223 length = blocksize - offset; 4224 4224 4225 - return ext4_block_zero_range(handle, inode, from, length, NULL); 4225 + err = ext4_block_zero_range(handle, inode, from, length, 4226 + &did_zero, &zero_written); 4227 + if (err) 4228 + return err; 4229 + /* 4230 + * It's necessary to order zeroed data before update i_disksize when 4231 + * truncating up or performing an append write, because there might be 4232 + * exposing stale on-disk data which may caused by concurrent post-EOF 4233 + * mmap write during folio writeback. 4234 + */ 4235 + if (ext4_should_order_data(inode) && 4236 + did_zero && zero_written && !IS_DAX(inode)) 4237 + err = ext4_jbd2_inode_add_write(handle, inode, from, length); 4238 + 4239 + return err; 4226 4240 } 4227 4241 4228 4242 int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, ··· 4258 4244 if (start == end && 4259 4245 (partial_start || (partial_end != sb->s_blocksize - 1))) { 4260 4246 err = ext4_block_zero_range(handle, inode, lstart, 4261 - length, NULL); 4247 + length, NULL, NULL); 4262 4248 return err; 4263 4249 } 4264 4250 /* Handle partial zero out on the start of the range */ 4265 4251 if (partial_start) { 4266 4252 err = ext4_block_zero_range(handle, inode, lstart, 4267 - sb->s_blocksize, NULL); 4253 + sb->s_blocksize, NULL, NULL); 4268 4254 if (err) 4269 4255 return err; 4270 4256 } ··· 4272 4258 if (partial_end != sb->s_blocksize - 1) 4273 4259 err = ext4_block_zero_range(handle, inode, 4274 4260 byte_end - partial_end, 4275 - partial_end + 1, NULL); 4261 + partial_end + 1, NULL, NULL); 4276 4262 return err; 4277 4263 } 4278 4264 ··· 4446 4432 if (end > max_end) 4447 4433 end = max_end; 4448 4434 length = end - offset; 4449 - 4450 - /* 4451 - * Attach jinode to inode for jbd2 if we do any zeroing of partial 4452 - * block. 4453 - */ 4454 - if (!IS_ALIGNED(offset | end, sb->s_blocksize)) { 4455 - ret = ext4_inode_attach_jinode(inode); 4456 - if (ret < 0) 4457 - return ret; 4458 - } 4459 - 4460 4435 4461 4436 ret = ext4_update_disksize_before_punch(inode, offset, length); 4462 4437 if (ret)