Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

jbd2: store jinode dirty range in PAGE_SIZE units

jbd2_inode fields are updated under journal->j_list_lock, but some paths
read them without holding the lock (e.g. fast commit helpers and ordered
truncate helpers).

READ_ONCE() alone is not sufficient for the dirty range fields when they
are stored as loff_t because 32-bit platforms can observe torn loads.
Store the dirty range in PAGE_SIZE units as pgoff_t instead.

Represent the dirty range end as an exclusive end page. This avoids a
special sentinel value and keeps MAX_LFS_FILESIZE on 32-bit representable.

Publish a new dirty range by updating end_page before start_page, and
treat start_page >= end_page as empty in the accessor for robustness.

Use READ_ONCE() on the read side and WRITE_ONCE() on the write side for the
dirty range and i_flags to match the existing lockless access pattern.

Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Li Chen <me@linux.beauty>
Link: https://patch.msgid.link/20260306085643.465275-5-me@linux.beauty
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Li Chen and committed by
Theodore Ts'o
4edafa81 be81084e

+81 -36
+42 -13
fs/jbd2/commit.c
··· 180 180 /* Send all the data buffers related to an inode */ 181 181 int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode) 182 182 { 183 - if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) 183 + unsigned long flags; 184 + 185 + if (!jinode) 186 + return 0; 187 + 188 + flags = READ_ONCE(jinode->i_flags); 189 + if (!(flags & JI_WRITE_DATA)) 184 190 return 0; 185 191 186 192 trace_jbd2_submit_inode_data(jinode->i_vfs_inode); ··· 197 191 198 192 int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode) 199 193 { 200 - if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) || 201 - !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping) 194 + struct address_space *mapping; 195 + struct inode *inode; 196 + unsigned long flags; 197 + loff_t start_byte, end_byte; 198 + 199 + if (!jinode) 200 + return 0; 201 + 202 + flags = READ_ONCE(jinode->i_flags); 203 + if (!(flags & JI_WAIT_DATA)) 204 + return 0; 205 + 206 + inode = jinode->i_vfs_inode; 207 + if (!inode) 208 + return 0; 209 + 210 + mapping = inode->i_mapping; 211 + if (!mapping) 212 + return 0; 213 + 214 + if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte)) 202 215 return 0; 203 216 return filemap_fdatawait_range_keep_errors( 204 - jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start, 205 - jinode->i_dirty_end); 217 + mapping, start_byte, end_byte); 206 218 } 207 219 EXPORT_SYMBOL(jbd2_wait_inode_data); 208 220 ··· 242 218 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 243 219 if (!(jinode->i_flags & JI_WRITE_DATA)) 244 220 continue; 245 - jinode->i_flags |= JI_COMMIT_RUNNING; 221 + WRITE_ONCE(jinode->i_flags, 222 + jinode->i_flags | JI_COMMIT_RUNNING); 246 223 spin_unlock(&journal->j_list_lock); 247 224 /* submit the inode data buffers. */ 248 225 trace_jbd2_submit_inode_data(jinode->i_vfs_inode); ··· 254 229 } 255 230 spin_lock(&journal->j_list_lock); 256 231 J_ASSERT(jinode->i_transaction == commit_transaction); 257 - jinode->i_flags &= ~JI_COMMIT_RUNNING; 232 + WRITE_ONCE(jinode->i_flags, 233 + jinode->i_flags & ~JI_COMMIT_RUNNING); 258 234 smp_mb(); 259 235 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 260 236 } ··· 266 240 int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) 267 241 { 268 242 struct address_space *mapping = jinode->i_vfs_inode->i_mapping; 243 + loff_t start_byte, end_byte; 244 + 245 + if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte)) 246 + return 0; 269 247 270 248 return filemap_fdatawait_range_keep_errors(mapping, 271 - jinode->i_dirty_start, 272 - jinode->i_dirty_end); 249 + start_byte, end_byte); 273 250 } 274 251 275 252 /* ··· 291 262 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 292 263 if (!(jinode->i_flags & JI_WAIT_DATA)) 293 264 continue; 294 - jinode->i_flags |= JI_COMMIT_RUNNING; 265 + WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING); 295 266 spin_unlock(&journal->j_list_lock); 296 267 /* wait for the inode data buffers writeout. */ 297 268 if (journal->j_finish_inode_data_buffers) { ··· 301 272 } 302 273 cond_resched(); 303 274 spin_lock(&journal->j_list_lock); 304 - jinode->i_flags &= ~JI_COMMIT_RUNNING; 275 + WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING); 305 276 smp_mb(); 306 277 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 307 278 } ··· 317 288 &jinode->i_transaction->t_inode_list); 318 289 } else { 319 290 jinode->i_transaction = NULL; 320 - jinode->i_dirty_start = 0; 321 - jinode->i_dirty_end = 0; 291 + WRITE_ONCE(jinode->i_dirty_start_page, 0); 292 + WRITE_ONCE(jinode->i_dirty_end_page, 0); 322 293 } 323 294 } 324 295 spin_unlock(&journal->j_list_lock);
+2 -3
fs/jbd2/journal.c
··· 3018 3018 jinode->i_next_transaction = NULL; 3019 3019 jinode->i_vfs_inode = inode; 3020 3020 jinode->i_flags = 0; 3021 - jinode->i_dirty_start = 0; 3022 - jinode->i_dirty_end = 0; 3021 + jinode->i_dirty_start_page = 0; 3022 + jinode->i_dirty_end_page = 0; 3023 3023 INIT_LIST_HEAD(&jinode->i_list); 3024 3024 } 3025 3025 ··· 3176 3176 MODULE_LICENSE("GPL"); 3177 3177 module_init(journal_init); 3178 3178 module_exit(journal_exit); 3179 -
+15 -8
fs/jbd2/transaction.c
··· 2694 2694 { 2695 2695 transaction_t *transaction = handle->h_transaction; 2696 2696 journal_t *journal; 2697 + pgoff_t start_page, end_page; 2697 2698 int err = 0; 2698 2699 int abort_transaction = 0; 2699 2700 ··· 2705 2704 jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, 2706 2705 transaction->t_tid); 2707 2706 2708 - spin_lock(&journal->j_list_lock); 2709 - jinode->i_flags |= flags; 2707 + start_page = (pgoff_t)(start_byte >> PAGE_SHIFT); 2708 + end_page = (pgoff_t)(end_byte >> PAGE_SHIFT) + 1; 2710 2709 2711 - if (jinode->i_dirty_end) { 2712 - jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); 2713 - jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); 2710 + spin_lock(&journal->j_list_lock); 2711 + WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags); 2712 + 2713 + if (jinode->i_dirty_start_page != jinode->i_dirty_end_page) { 2714 + WRITE_ONCE(jinode->i_dirty_start_page, 2715 + min(jinode->i_dirty_start_page, start_page)); 2716 + WRITE_ONCE(jinode->i_dirty_end_page, 2717 + max(jinode->i_dirty_end_page, end_page)); 2714 2718 } else { 2715 - jinode->i_dirty_start = start_byte; 2716 - jinode->i_dirty_end = end_byte; 2719 + /* Publish a new non-empty range by making end visible first. */ 2720 + WRITE_ONCE(jinode->i_dirty_end_page, end_page); 2721 + WRITE_ONCE(jinode->i_dirty_start_page, start_page); 2717 2722 } 2718 2723 2719 2724 /* Is inode already attached where we need it? */ ··· 2809 2802 int ret = 0; 2810 2803 2811 2804 /* This is a quick check to avoid locking if not necessary */ 2812 - if (!jinode->i_transaction) 2805 + if (!READ_ONCE(jinode->i_transaction)) 2813 2806 goto out; 2814 2807 /* Locks are here just to force reading of recent values, it is 2815 2808 * enough that the transaction was not committing before we started
+22 -12
include/linux/jbd2.h
··· 429 429 unsigned long i_flags; 430 430 431 431 /** 432 - * @i_dirty_start: 432 + * @i_dirty_start_page: 433 433 * 434 - * Offset in bytes where the dirty range for this inode starts. 434 + * Dirty range start in PAGE_SIZE units. 435 + * 436 + * The dirty range is empty if @i_dirty_start_page is greater than or 437 + * equal to @i_dirty_end_page. 438 + * 435 439 * [j_list_lock] 436 440 */ 437 - loff_t i_dirty_start; 441 + pgoff_t i_dirty_start_page; 438 442 439 443 /** 440 - * @i_dirty_end: 444 + * @i_dirty_end_page: 441 445 * 442 - * Inclusive offset in bytes where the dirty range for this inode 443 - * ends. [j_list_lock] 446 + * Dirty range end in PAGE_SIZE units (exclusive). 447 + * 448 + * [j_list_lock] 444 449 */ 445 - loff_t i_dirty_end; 450 + pgoff_t i_dirty_end_page; 446 451 }; 447 452 453 + /* 454 + * Lockless readers treat start_page >= end_page as an empty range. 455 + * Writers publish a new non-empty range by storing i_dirty_end_page before 456 + * i_dirty_start_page. 457 + */ 448 458 static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode, 449 459 loff_t *start, loff_t *end) 450 460 { 451 - loff_t start_byte = jinode->i_dirty_start; 452 - loff_t end_byte = jinode->i_dirty_end; 461 + pgoff_t start_page = READ_ONCE(jinode->i_dirty_start_page); 462 + pgoff_t end_page = READ_ONCE(jinode->i_dirty_end_page); 453 463 454 - if (!end_byte) 464 + if (start_page >= end_page) 455 465 return false; 456 466 457 - *start = start_byte; 458 - *end = end_byte; 467 + *start = (loff_t)start_page << PAGE_SHIFT; 468 + *end = ((loff_t)end_page << PAGE_SHIFT) - 1; 459 469 return true; 460 470 } 461 471