Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
"More miscellaneous ext4 bug fixes (all stable fodder)"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: fix mount failure with quota configured as module
jbd2: fix ocfs2 corrupt when clearing block group bits
ext4: fix race between writepages and enabling EXT4_EXTENTS_FL
ext4: rename s_journal_flag_rwsem to s_writepages_rwsem
ext4: fix potential race between s_flex_groups online resizing and access
ext4: fix potential race between s_group_info online resizing and access
ext4: fix potential race between online resizing and write operations
ext4: add cond_resched() to __ext4_find_entry()
ext4: fix a data race in EXT4_I(inode)->i_disksize

+256 -108
+11 -3
fs/ext4/balloc.c
··· 270 270 ext4_group_t ngroups = ext4_get_groups_count(sb); 271 271 struct ext4_group_desc *desc; 272 272 struct ext4_sb_info *sbi = EXT4_SB(sb); 273 + struct buffer_head *bh_p; 273 274 274 275 if (block_group >= ngroups) { 275 276 ext4_error(sb, "block_group >= groups_count - block_group = %u," ··· 281 280 282 281 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 283 282 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 284 - if (!sbi->s_group_desc[group_desc]) { 283 + bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); 284 + /* 285 + * sbi_array_rcu_deref returns with rcu unlocked, this is ok since 286 + * the pointer being dereferenced won't be dereferenced again. By 287 + * looking at the usage in add_new_gdb() the value isn't modified, 288 + * just the pointer, and so it remains valid. 289 + */ 290 + if (!bh_p) { 285 291 ext4_error(sb, "Group descriptor not loaded - " 286 292 "block_group = %u, group_desc = %u, desc = %u", 287 293 block_group, group_desc, offset); ··· 296 288 } 297 289 298 290 desc = (struct ext4_group_desc *)( 299 - (__u8 *)sbi->s_group_desc[group_desc]->b_data + 291 + (__u8 *)bh_p->b_data + 300 292 offset * EXT4_DESC_SIZE(sb)); 301 293 if (bh) 302 - *bh = sbi->s_group_desc[group_desc]; 294 + *bh = bh_p; 303 295 return desc; 304 296 } 305 297
+30 -9
fs/ext4/ext4.h
··· 1400 1400 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ 1401 1401 struct buffer_head * s_sbh; /* Buffer containing the super block */ 1402 1402 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 1403 - struct buffer_head **s_group_desc; 1403 + struct buffer_head * __rcu *s_group_desc; 1404 1404 unsigned int s_mount_opt; 1405 1405 unsigned int s_mount_opt2; 1406 1406 unsigned int s_mount_flags; ··· 1462 1462 #endif 1463 1463 1464 1464 /* for buddy allocator */ 1465 - struct ext4_group_info ***s_group_info; 1465 + struct ext4_group_info ** __rcu *s_group_info; 1466 1466 struct inode *s_buddy_cache; 1467 1467 spinlock_t s_md_lock; 1468 1468 unsigned short *s_mb_offsets; ··· 1512 1512 unsigned int s_extent_max_zeroout_kb; 1513 1513 1514 1514 unsigned int s_log_groups_per_flex; 1515 - struct flex_groups *s_flex_groups; 1515 + struct flex_groups * __rcu *s_flex_groups; 1516 1516 ext4_group_t s_flex_groups_allocated; 1517 1517 1518 1518 /* workqueue for reserved extent conversions (buffered io) */ ··· 1552 1552 struct ratelimit_state s_warning_ratelimit_state; 1553 1553 struct ratelimit_state s_msg_ratelimit_state; 1554 1554 1555 - /* Barrier between changing inodes' journal flags and writepages ops. */ 1556 - struct percpu_rw_semaphore s_journal_flag_rwsem; 1555 + /* 1556 + * Barrier between writepages ops and changing any inode's JOURNAL_DATA 1557 + * or EXTENTS flag. 1558 + */ 1559 + struct percpu_rw_semaphore s_writepages_rwsem; 1557 1560 struct dax_device *s_daxdev; 1558 1561 #ifdef CONFIG_EXT4_DEBUG 1559 1562 unsigned long s_simulate_fail; ··· 1578 1575 (ino >= EXT4_FIRST_INO(sb) && 1579 1576 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); 1580 1577 } 1578 + 1579 + /* 1580 + * Returns: sbi->field[index] 1581 + * Used to access an array element from the following sbi fields which require 1582 + * rcu protection to avoid dereferencing an invalid pointer due to reassignment 1583 + * - s_group_desc 1584 + * - s_group_info 1585 + * - s_flex_group 1586 + */ 1587 + #define sbi_array_rcu_deref(sbi, field, index) \ 1588 + ({ \ 1589 + typeof(*((sbi)->field)) _v; \ 1590 + rcu_read_lock(); \ 1591 + _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ 1592 + rcu_read_unlock(); \ 1593 + _v; \ 1594 + }) 1581 1595 1582 1596 /* 1583 1597 * Simulate_fail codes ··· 2750 2730 extern bool ext4_empty_dir(struct inode *inode); 2751 2731 2752 2732 /* resize.c */ 2733 + extern void ext4_kvfree_array_rcu(void *to_free); 2753 2734 extern int ext4_group_add(struct super_block *sb, 2754 2735 struct ext4_new_group_data *input); 2755 2736 extern int ext4_group_extend(struct super_block *sb, ··· 2997 2976 struct ext4_group_info *ext4_get_group_info(struct super_block *sb, 2998 2977 ext4_group_t group) 2999 2978 { 3000 - struct ext4_group_info ***grp_info; 2979 + struct ext4_group_info **grp_info; 3001 2980 long indexv, indexh; 3002 2981 BUG_ON(group >= EXT4_SB(sb)->s_groups_count); 3003 - grp_info = EXT4_SB(sb)->s_group_info; 3004 2982 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); 3005 2983 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); 3006 - return grp_info[indexv][indexh]; 2984 + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); 2985 + return grp_info[indexh]; 3007 2986 } 3008 2987 3009 2988 /* ··· 3053 3032 !inode_is_locked(inode)); 3054 3033 down_write(&EXT4_I(inode)->i_data_sem); 3055 3034 if (newsize > EXT4_I(inode)->i_disksize) 3056 - EXT4_I(inode)->i_disksize = newsize; 3035 + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); 3057 3036 up_write(&EXT4_I(inode)->i_data_sem); 3058 3037 } 3059 3038
+14 -9
fs/ext4/ialloc.c
··· 328 328 329 329 percpu_counter_inc(&sbi->s_freeinodes_counter); 330 330 if (sbi->s_log_groups_per_flex) { 331 - ext4_group_t f = ext4_flex_group(sbi, block_group); 331 + struct flex_groups *fg; 332 332 333 - atomic_inc(&sbi->s_flex_groups[f].free_inodes); 333 + fg = sbi_array_rcu_deref(sbi, s_flex_groups, 334 + ext4_flex_group(sbi, block_group)); 335 + atomic_inc(&fg->free_inodes); 334 336 if (is_directory) 335 - atomic_dec(&sbi->s_flex_groups[f].used_dirs); 337 + atomic_dec(&fg->used_dirs); 336 338 } 337 339 BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); 338 340 fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); ··· 370 368 int flex_size, struct orlov_stats *stats) 371 369 { 372 370 struct ext4_group_desc *desc; 373 - struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; 374 371 375 372 if (flex_size > 1) { 376 - stats->free_inodes = atomic_read(&flex_group[g].free_inodes); 377 - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); 378 - stats->used_dirs = atomic_read(&flex_group[g].used_dirs); 373 + struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), 374 + s_flex_groups, g); 375 + stats->free_inodes = atomic_read(&fg->free_inodes); 376 + stats->free_clusters = atomic64_read(&fg->free_clusters); 377 + stats->used_dirs = atomic_read(&fg->used_dirs); 379 378 return; 380 379 } 381 380 ··· 1057 1054 if (sbi->s_log_groups_per_flex) { 1058 1055 ext4_group_t f = ext4_flex_group(sbi, group); 1059 1056 1060 - atomic_inc(&sbi->s_flex_groups[f].used_dirs); 1057 + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, 1058 + f)->used_dirs); 1061 1059 } 1062 1060 } 1063 1061 if (ext4_has_group_desc_csum(sb)) { ··· 1081 1077 1082 1078 if (sbi->s_log_groups_per_flex) { 1083 1079 flex_group = ext4_flex_group(sbi, group); 1084 - atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); 1080 + atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, 1081 + flex_group)->free_inodes); 1085 1082 } 1086 1083 1087 1084 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
+8 -8
fs/ext4/inode.c
··· 2465 2465 * truncate are avoided by checking i_size under i_data_sem. 2466 2466 */ 2467 2467 disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; 2468 - if (disksize > EXT4_I(inode)->i_disksize) { 2468 + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { 2469 2469 int err2; 2470 2470 loff_t i_size; 2471 2471 ··· 2628 2628 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 2629 2629 return -EIO; 2630 2630 2631 - percpu_down_read(&sbi->s_journal_flag_rwsem); 2631 + percpu_down_read(&sbi->s_writepages_rwsem); 2632 2632 trace_ext4_writepages(inode, wbc); 2633 2633 2634 2634 /* ··· 2849 2849 out_writepages: 2850 2850 trace_ext4_writepages_result(inode, wbc, ret, 2851 2851 nr_to_write - wbc->nr_to_write); 2852 - percpu_up_read(&sbi->s_journal_flag_rwsem); 2852 + percpu_up_read(&sbi->s_writepages_rwsem); 2853 2853 return ret; 2854 2854 } 2855 2855 ··· 2864 2864 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 2865 2865 return -EIO; 2866 2866 2867 - percpu_down_read(&sbi->s_journal_flag_rwsem); 2867 + percpu_down_read(&sbi->s_writepages_rwsem); 2868 2868 trace_ext4_writepages(inode, wbc); 2869 2869 2870 2870 ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); 2871 2871 trace_ext4_writepages_result(inode, wbc, ret, 2872 2872 nr_to_write - wbc->nr_to_write); 2873 - percpu_up_read(&sbi->s_journal_flag_rwsem); 2873 + percpu_up_read(&sbi->s_writepages_rwsem); 2874 2874 return ret; 2875 2875 } 2876 2876 ··· 5861 5861 } 5862 5862 } 5863 5863 5864 - percpu_down_write(&sbi->s_journal_flag_rwsem); 5864 + percpu_down_write(&sbi->s_writepages_rwsem); 5865 5865 jbd2_journal_lock_updates(journal); 5866 5866 5867 5867 /* ··· 5878 5878 err = jbd2_journal_flush(journal); 5879 5879 if (err < 0) { 5880 5880 jbd2_journal_unlock_updates(journal); 5881 - percpu_up_write(&sbi->s_journal_flag_rwsem); 5881 + percpu_up_write(&sbi->s_writepages_rwsem); 5882 5882 return err; 5883 5883 } 5884 5884 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); ··· 5886 5886 ext4_set_aops(inode); 5887 5887 5888 5888 jbd2_journal_unlock_updates(journal); 5889 - percpu_up_write(&sbi->s_journal_flag_rwsem); 5889 + percpu_up_write(&sbi->s_writepages_rwsem); 5890 5890 5891 5891 if (val) 5892 5892 up_write(&EXT4_I(inode)->i_mmap_sem);
+41 -20
fs/ext4/mballoc.c
··· 2356 2356 { 2357 2357 struct ext4_sb_info *sbi = EXT4_SB(sb); 2358 2358 unsigned size; 2359 - struct ext4_group_info ***new_groupinfo; 2359 + struct ext4_group_info ***old_groupinfo, ***new_groupinfo; 2360 2360 2361 2361 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> 2362 2362 EXT4_DESC_PER_BLOCK_BITS(sb); ··· 2369 2369 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); 2370 2370 return -ENOMEM; 2371 2371 } 2372 - if (sbi->s_group_info) { 2373 - memcpy(new_groupinfo, sbi->s_group_info, 2372 + rcu_read_lock(); 2373 + old_groupinfo = rcu_dereference(sbi->s_group_info); 2374 + if (old_groupinfo) 2375 + memcpy(new_groupinfo, old_groupinfo, 2374 2376 sbi->s_group_info_size * sizeof(*sbi->s_group_info)); 2375 - kvfree(sbi->s_group_info); 2376 - } 2377 - sbi->s_group_info = new_groupinfo; 2377 + rcu_read_unlock(); 2378 + rcu_assign_pointer(sbi->s_group_info, new_groupinfo); 2378 2379 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); 2380 + if (old_groupinfo) 2381 + ext4_kvfree_array_rcu(old_groupinfo); 2379 2382 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", 2380 2383 sbi->s_group_info_size); 2381 2384 return 0; ··· 2390 2387 { 2391 2388 int i; 2392 2389 int metalen = 0; 2390 + int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); 2393 2391 struct ext4_sb_info *sbi = EXT4_SB(sb); 2394 2392 struct ext4_group_info **meta_group_info; 2395 2393 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); ··· 2409 2405 "for a buddy group"); 2410 2406 goto exit_meta_group_info; 2411 2407 } 2412 - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = 2413 - meta_group_info; 2408 + rcu_read_lock(); 2409 + rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; 2410 + rcu_read_unlock(); 2414 2411 } 2415 2412 2416 - meta_group_info = 2417 - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; 2413 + meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); 2418 2414 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); 2419 2415 2420 2416 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); ··· 2462 2458 exit_group_info: 2463 2459 /* If a meta_group_info table has been allocated, release it now */ 2464 2460 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { 2465 - kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); 2466 - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; 2461 + struct ext4_group_info ***group_info; 2462 + 2463 + rcu_read_lock(); 2464 + group_info = rcu_dereference(sbi->s_group_info); 2465 + kfree(group_info[idx]); 2466 + group_info[idx] = NULL; 2467 + rcu_read_unlock(); 2467 2468 } 2468 2469 exit_meta_group_info: 2469 2470 return -ENOMEM; ··· 2481 2472 struct ext4_sb_info *sbi = EXT4_SB(sb); 2482 2473 int err; 2483 2474 struct ext4_group_desc *desc; 2475 + struct ext4_group_info ***group_info; 2484 2476 struct kmem_cache *cachep; 2485 2477 2486 2478 err = ext4_mb_alloc_groupinfo(sb, ngroups); ··· 2517 2507 while (i-- > 0) 2518 2508 kmem_cache_free(cachep, ext4_get_group_info(sb, i)); 2519 2509 i = sbi->s_group_info_size; 2510 + rcu_read_lock(); 2511 + group_info = rcu_dereference(sbi->s_group_info); 2520 2512 while (i-- > 0) 2521 - kfree(sbi->s_group_info[i]); 2513 + kfree(group_info[i]); 2514 + rcu_read_unlock(); 2522 2515 iput(sbi->s_buddy_cache); 2523 2516 err_freesgi: 2524 - kvfree(sbi->s_group_info); 2517 + rcu_read_lock(); 2518 + kvfree(rcu_dereference(sbi->s_group_info)); 2519 + rcu_read_unlock(); 2525 2520 return -ENOMEM; 2526 2521 } 2527 2522 ··· 2715 2700 ext4_group_t ngroups = ext4_get_groups_count(sb); 2716 2701 ext4_group_t i; 2717 2702 int num_meta_group_infos; 2718 - struct ext4_group_info *grinfo; 2703 + struct ext4_group_info *grinfo, ***group_info; 2719 2704 struct ext4_sb_info *sbi = EXT4_SB(sb); 2720 2705 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); 2721 2706 ··· 2734 2719 num_meta_group_infos = (ngroups + 2735 2720 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2736 2721 EXT4_DESC_PER_BLOCK_BITS(sb); 2722 + rcu_read_lock(); 2723 + group_info = rcu_dereference(sbi->s_group_info); 2737 2724 for (i = 0; i < num_meta_group_infos; i++) 2738 - kfree(sbi->s_group_info[i]); 2739 - kvfree(sbi->s_group_info); 2725 + kfree(group_info[i]); 2726 + kvfree(group_info); 2727 + rcu_read_unlock(); 2740 2728 } 2741 2729 kfree(sbi->s_mb_offsets); 2742 2730 kfree(sbi->s_mb_maxs); ··· 3038 3020 ext4_group_t flex_group = ext4_flex_group(sbi, 3039 3021 ac->ac_b_ex.fe_group); 3040 3022 atomic64_sub(ac->ac_b_ex.fe_len, 3041 - &sbi->s_flex_groups[flex_group].free_clusters); 3023 + &sbi_array_rcu_deref(sbi, s_flex_groups, 3024 + flex_group)->free_clusters); 3042 3025 } 3043 3026 3044 3027 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); ··· 4937 4918 if (sbi->s_log_groups_per_flex) { 4938 4919 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4939 4920 atomic64_add(count_clusters, 4940 - &sbi->s_flex_groups[flex_group].free_clusters); 4921 + &sbi_array_rcu_deref(sbi, s_flex_groups, 4922 + flex_group)->free_clusters); 4941 4923 } 4942 4924 4943 4925 /* ··· 5095 5075 if (sbi->s_log_groups_per_flex) { 5096 5076 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 5097 5077 atomic64_add(clusters_freed, 5098 - &sbi->s_flex_groups[flex_group].free_clusters); 5078 + &sbi_array_rcu_deref(sbi, s_flex_groups, 5079 + flex_group)->free_clusters); 5099 5080 } 5100 5081 5101 5082 ext4_mb_unload_buddy(&e4b);
+19 -8
fs/ext4/migrate.c
··· 407 407 408 408 int ext4_ext_migrate(struct inode *inode) 409 409 { 410 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 410 411 handle_t *handle; 411 412 int retval = 0, i; 412 413 __le32 *i_data; ··· 432 431 */ 433 432 return retval; 434 433 434 + percpu_down_write(&sbi->s_writepages_rwsem); 435 + 435 436 /* 436 437 * Worst case we can touch the allocation bitmaps, a bgd 437 438 * block, and a block to link in the orphan list. We do need ··· 444 441 445 442 if (IS_ERR(handle)) { 446 443 retval = PTR_ERR(handle); 447 - return retval; 444 + goto out_unlock; 448 445 } 449 446 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 450 447 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; ··· 455 452 if (IS_ERR(tmp_inode)) { 456 453 retval = PTR_ERR(tmp_inode); 457 454 ext4_journal_stop(handle); 458 - return retval; 455 + goto out_unlock; 459 456 } 460 457 i_size_write(tmp_inode, i_size_read(inode)); 461 458 /* ··· 497 494 */ 498 495 ext4_orphan_del(NULL, tmp_inode); 499 496 retval = PTR_ERR(handle); 500 - goto out; 497 + goto out_tmp_inode; 501 498 } 502 499 503 500 ei = EXT4_I(inode); ··· 579 576 ext4_ext_tree_init(handle, tmp_inode); 580 577 out_stop: 581 578 ext4_journal_stop(handle); 582 - out: 579 + out_tmp_inode: 583 580 unlock_new_inode(tmp_inode); 584 581 iput(tmp_inode); 585 - 582 + out_unlock: 583 + percpu_up_write(&sbi->s_writepages_rwsem); 586 584 return retval; 587 585 } 588 586 ··· 593 589 int ext4_ind_migrate(struct inode *inode) 594 590 { 595 591 struct ext4_extent_header *eh; 596 - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 592 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 593 + struct ext4_super_block *es = sbi->s_es; 597 594 struct ext4_inode_info *ei = EXT4_I(inode); 598 595 struct ext4_extent *ex; 599 596 unsigned int i, len; ··· 618 613 if (test_opt(inode->i_sb, DELALLOC)) 619 614 ext4_alloc_da_blocks(inode); 620 615 616 + percpu_down_write(&sbi->s_writepages_rwsem); 617 + 621 618 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); 622 - if (IS_ERR(handle)) 623 - return PTR_ERR(handle); 619 + if (IS_ERR(handle)) { 620 + ret = PTR_ERR(handle); 621 + goto out_unlock; 622 + } 624 623 625 624 down_write(&EXT4_I(inode)->i_data_sem); 626 625 ret = ext4_ext_check_inode(inode); ··· 659 650 errout: 660 651 ext4_journal_stop(handle); 661 652 up_write(&EXT4_I(inode)->i_data_sem); 653 + out_unlock: 654 + percpu_up_write(&sbi->s_writepages_rwsem); 662 655 return ret; 663 656 }
+1
fs/ext4/namei.c
··· 1511 1511 /* 1512 1512 * We deal with the read-ahead logic here. 1513 1513 */ 1514 + cond_resched(); 1514 1515 if (ra_ptr >= ra_max) { 1515 1516 /* Refill the readahead buffer */ 1516 1517 ra_ptr = 0;
+49 -13
fs/ext4/resize.c
··· 17 17 18 18 #include "ext4_jbd2.h" 19 19 20 + struct ext4_rcu_ptr { 21 + struct rcu_head rcu; 22 + void *ptr; 23 + }; 24 + 25 + static void ext4_rcu_ptr_callback(struct rcu_head *head) 26 + { 27 + struct ext4_rcu_ptr *ptr; 28 + 29 + ptr = container_of(head, struct ext4_rcu_ptr, rcu); 30 + kvfree(ptr->ptr); 31 + kfree(ptr); 32 + } 33 + 34 + void ext4_kvfree_array_rcu(void *to_free) 35 + { 36 + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 37 + 38 + if (ptr) { 39 + ptr->ptr = to_free; 40 + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); 41 + return; 42 + } 43 + synchronize_rcu(); 44 + kvfree(to_free); 45 + } 46 + 20 47 int ext4_resize_begin(struct super_block *sb) 21 48 { 22 49 struct ext4_sb_info *sbi = EXT4_SB(sb); ··· 569 542 brelse(gdb); 570 543 goto out; 571 544 } 572 - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, 573 - gdb->b_size); 545 + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, 546 + s_group_desc, j)->b_data, gdb->b_size); 574 547 set_buffer_uptodate(gdb); 575 548 576 549 err = ext4_handle_dirty_metadata(handle, NULL, gdb); ··· 887 860 } 888 861 brelse(dind); 889 862 890 - o_group_desc = EXT4_SB(sb)->s_group_desc; 863 + rcu_read_lock(); 864 + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); 891 865 memcpy(n_group_desc, o_group_desc, 892 866 EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); 867 + rcu_read_unlock(); 893 868 n_group_desc[gdb_num] = gdb_bh; 894 - EXT4_SB(sb)->s_group_desc = n_group_desc; 869 + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); 895 870 EXT4_SB(sb)->s_gdb_count++; 896 - kvfree(o_group_desc); 871 + ext4_kvfree_array_rcu(o_group_desc); 897 872 898 873 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 899 874 err = ext4_handle_dirty_super(handle, sb); ··· 938 909 return err; 939 910 } 940 911 941 - o_group_desc = EXT4_SB(sb)->s_group_desc; 912 + rcu_read_lock(); 913 + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); 942 914 memcpy(n_group_desc, o_group_desc, 943 915 EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); 916 + rcu_read_unlock(); 944 917 n_group_desc[gdb_num] = gdb_bh; 945 918 946 919 BUFFER_TRACE(gdb_bh, "get_write_access"); ··· 953 922 return err; 954 923 } 955 924 956 - EXT4_SB(sb)->s_group_desc = n_group_desc; 925 + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); 957 926 EXT4_SB(sb)->s_gdb_count++; 958 - kvfree(o_group_desc); 927 + ext4_kvfree_array_rcu(o_group_desc); 959 928 return err; 960 929 } 961 930 ··· 1219 1188 * use non-sparse filesystems anymore. This is already checked above. 1220 1189 */ 1221 1190 if (gdb_off) { 1222 - gdb_bh = sbi->s_group_desc[gdb_num]; 1191 + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, 1192 + gdb_num); 1223 1193 BUFFER_TRACE(gdb_bh, "get_write_access"); 1224 1194 err = ext4_journal_get_write_access(handle, gdb_bh); 1225 1195 ··· 1302 1270 /* 1303 1271 * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). 1304 1272 */ 1305 - gdb_bh = sbi->s_group_desc[gdb_num]; 1273 + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); 1306 1274 /* Update group descriptor block for new group */ 1307 1275 gdp = (struct ext4_group_desc *)(gdb_bh->b_data + 1308 1276 gdb_off * EXT4_DESC_SIZE(sb)); ··· 1430 1398 percpu_counter_read(&sbi->s_freeclusters_counter)); 1431 1399 if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { 1432 1400 ext4_group_t flex_group; 1401 + struct flex_groups *fg; 1402 + 1433 1403 flex_group = ext4_flex_group(sbi, group_data[0].group); 1404 + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); 1434 1405 atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), 1435 - &sbi->s_flex_groups[flex_group].free_clusters); 1406 + &fg->free_clusters); 1436 1407 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, 1437 - &sbi->s_flex_groups[flex_group].free_inodes); 1408 + &fg->free_inodes); 1438 1409 } 1439 1410 1440 1411 /* ··· 1532 1497 for (; gdb_num <= gdb_num_end; gdb_num++) { 1533 1498 struct buffer_head *gdb_bh; 1534 1499 1535 - gdb_bh = sbi->s_group_desc[gdb_num]; 1500 + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, 1501 + gdb_num); 1536 1502 if (old_gdb == gdb_bh->b_blocknr) 1537 1503 continue; 1538 1504 update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
+77 -36
fs/ext4/super.c
··· 1014 1014 { 1015 1015 struct ext4_sb_info *sbi = EXT4_SB(sb); 1016 1016 struct ext4_super_block *es = sbi->s_es; 1017 + struct buffer_head **group_desc; 1018 + struct flex_groups **flex_groups; 1017 1019 int aborted = 0; 1018 1020 int i, err; 1019 1021 ··· 1048 1046 if (!sb_rdonly(sb)) 1049 1047 ext4_commit_super(sb, 1); 1050 1048 1049 + rcu_read_lock(); 1050 + group_desc = rcu_dereference(sbi->s_group_desc); 1051 1051 for (i = 0; i < sbi->s_gdb_count; i++) 1052 - brelse(sbi->s_group_desc[i]); 1053 - kvfree(sbi->s_group_desc); 1054 - kvfree(sbi->s_flex_groups); 1052 + brelse(group_desc[i]); 1053 + kvfree(group_desc); 1054 + flex_groups = rcu_dereference(sbi->s_flex_groups); 1055 + if (flex_groups) { 1056 + for (i = 0; i < sbi->s_flex_groups_allocated; i++) 1057 + kvfree(flex_groups[i]); 1058 + kvfree(flex_groups); 1059 + } 1060 + rcu_read_unlock(); 1055 1061 percpu_counter_destroy(&sbi->s_freeclusters_counter); 1056 1062 percpu_counter_destroy(&sbi->s_freeinodes_counter); 1057 1063 percpu_counter_destroy(&sbi->s_dirs_counter); 1058 1064 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 1059 - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); 1065 + percpu_free_rwsem(&sbi->s_writepages_rwsem); 1060 1066 #ifdef CONFIG_QUOTA 1061 1067 for (i = 0; i < EXT4_MAXQUOTAS; i++) 1062 1068 kfree(get_qf_name(sb, sbi, i)); ··· 2390 2380 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) 2391 2381 { 2392 2382 struct ext4_sb_info *sbi = EXT4_SB(sb); 2393 - struct flex_groups *new_groups; 2394 - int size; 2383 + struct flex_groups **old_groups, **new_groups; 2384 + int size, i; 2395 2385 2396 2386 if (!sbi->s_log_groups_per_flex) 2397 2387 return 0; ··· 2400 2390 if (size <= sbi->s_flex_groups_allocated) 2401 2391 return 0; 2402 2392 2403 - size = roundup_pow_of_two(size * sizeof(struct flex_groups)); 2404 - new_groups = kvzalloc(size, GFP_KERNEL); 2393 + new_groups = kvzalloc(roundup_pow_of_two(size * 2394 + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); 2405 2395 if (!new_groups) { 2406 - ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", 2407 - size / (int) sizeof(struct flex_groups)); 2396 + ext4_msg(sb, KERN_ERR, 2397 + "not enough memory for %d flex group pointers", size); 2408 2398 return -ENOMEM; 2409 2399 } 2410 - 2411 - if (sbi->s_flex_groups) { 2412 - memcpy(new_groups, sbi->s_flex_groups, 2413 - (sbi->s_flex_groups_allocated * 2414 - sizeof(struct flex_groups))); 2415 - kvfree(sbi->s_flex_groups); 2400 + for (i = sbi->s_flex_groups_allocated; i < size; i++) { 2401 + new_groups[i] = kvzalloc(roundup_pow_of_two( 2402 + sizeof(struct flex_groups)), 2403 + GFP_KERNEL); 2404 + if (!new_groups[i]) { 2405 + for (i--; i >= sbi->s_flex_groups_allocated; i--) 2406 + kvfree(new_groups[i]); 2407 + kvfree(new_groups); 2408 + ext4_msg(sb, KERN_ERR, 2409 + "not enough memory for %d flex groups", size); 2410 + return -ENOMEM; 2411 + } 2416 2412 } 2417 - sbi->s_flex_groups = new_groups; 2418 - sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); 2413 + rcu_read_lock(); 2414 + old_groups = rcu_dereference(sbi->s_flex_groups); 2415 + if (old_groups) 2416 + memcpy(new_groups, old_groups, 2417 + (sbi->s_flex_groups_allocated * 2418 + sizeof(struct flex_groups *))); 2419 + rcu_read_unlock(); 2420 + rcu_assign_pointer(sbi->s_flex_groups, new_groups); 2421 + sbi->s_flex_groups_allocated = size; 2422 + if (old_groups) 2423 + ext4_kvfree_array_rcu(old_groups); 2419 2424 return 0; 2420 2425 } 2421 2426 ··· 2438 2413 { 2439 2414 struct ext4_sb_info *sbi = EXT4_SB(sb); 2440 2415 struct ext4_group_desc *gdp = NULL; 2416 + struct flex_groups *fg; 2441 2417 ext4_group_t flex_group; 2442 2418 int i, err; 2443 2419 ··· 2456 2430 gdp = ext4_get_group_desc(sb, i, NULL); 2457 2431 2458 2432 flex_group = ext4_flex_group(sbi, i); 2459 - atomic_add(ext4_free_inodes_count(sb, gdp), 2460 - &sbi->s_flex_groups[flex_group].free_inodes); 2433 + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); 2434 + atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); 2461 2435 atomic64_add(ext4_free_group_clusters(sb, gdp), 2462 - &sbi->s_flex_groups[flex_group].free_clusters); 2463 - atomic_add(ext4_used_dirs_count(sb, gdp), 2464 - &sbi->s_flex_groups[flex_group].used_dirs); 2436 + &fg->free_clusters); 2437 + atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); 2465 2438 } 2466 2439 2467 2440 return 1; ··· 3034 3009 return 0; 3035 3010 } 3036 3011 3037 - #if !defined(CONFIG_QUOTA) || !defined(CONFIG_QFMT_V2) 3012 + #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) 3038 3013 if (!readonly && (ext4_has_feature_quota(sb) || 3039 3014 ext4_has_feature_project(sb))) { 3040 3015 ext4_msg(sb, KERN_ERR, ··· 3659 3634 { 3660 3635 struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); 3661 3636 char *orig_data = kstrdup(data, GFP_KERNEL); 3662 - struct buffer_head *bh; 3637 + struct buffer_head *bh, **group_desc; 3663 3638 struct ext4_super_block *es = NULL; 3664 3639 struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 3640 + struct flex_groups **flex_groups; 3665 3641 ext4_fsblk_t block; 3666 3642 ext4_fsblk_t sb_block = get_sb_block(&data); 3667 3643 ext4_fsblk_t logical_sb_block; ··· 4316 4290 goto failed_mount; 4317 4291 } 4318 4292 } 4319 - sbi->s_group_desc = kvmalloc_array(db_count, 4320 - sizeof(struct buffer_head *), 4321 - GFP_KERNEL); 4293 + rcu_assign_pointer(sbi->s_group_desc, 4294 + kvmalloc_array(db_count, 4295 + sizeof(struct buffer_head *), 4296 + GFP_KERNEL)); 4322 4297 if (sbi->s_group_desc == NULL) { 4323 4298 ext4_msg(sb, KERN_ERR, "not enough memory"); 4324 4299 ret = -ENOMEM; ··· 4335 4308 } 4336 4309 4337 4310 for (i = 0; i < db_count; i++) { 4311 + struct buffer_head *bh; 4312 + 4338 4313 block = descriptor_loc(sb, logical_sb_block, i); 4339 - sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); 4340 - if (!sbi->s_group_desc[i]) { 4314 + bh = sb_bread_unmovable(sb, block); 4315 + if (!bh) { 4341 4316 ext4_msg(sb, KERN_ERR, 4342 4317 "can't read group descriptor %d", i); 4343 4318 db_count = i; 4344 4319 goto failed_mount2; 4345 4320 } 4321 + rcu_read_lock(); 4322 + rcu_dereference(sbi->s_group_desc)[i] = bh; 4323 + rcu_read_unlock(); 4346 4324 } 4347 4325 sbi->s_gdb_count = db_count; 4348 4326 if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { ··· 4626 4594 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, 4627 4595 GFP_KERNEL); 4628 4596 if (!err) 4629 - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); 4597 + err = percpu_init_rwsem(&sbi->s_writepages_rwsem); 4630 4598 4631 4599 if (err) { 4632 4600 ext4_msg(sb, KERN_ERR, "insufficient memory"); ··· 4714 4682 ext4_unregister_li_request(sb); 4715 4683 failed_mount6: 4716 4684 ext4_mb_release(sb); 4717 - if (sbi->s_flex_groups) 4718 - kvfree(sbi->s_flex_groups); 4685 + rcu_read_lock(); 4686 + flex_groups = rcu_dereference(sbi->s_flex_groups); 4687 + if (flex_groups) { 4688 + for (i = 0; i < sbi->s_flex_groups_allocated; i++) 4689 + kvfree(flex_groups[i]); 4690 + kvfree(flex_groups); 4691 + } 4692 + rcu_read_unlock(); 4719 4693 percpu_counter_destroy(&sbi->s_freeclusters_counter); 4720 4694 percpu_counter_destroy(&sbi->s_freeinodes_counter); 4721 4695 percpu_counter_destroy(&sbi->s_dirs_counter); 4722 4696 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 4723 - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); 4697 + percpu_free_rwsem(&sbi->s_writepages_rwsem); 4724 4698 failed_mount5: 4725 4699 ext4_ext_release(sb); 4726 4700 ext4_release_system_zone(sb); ··· 4755 4717 if (sbi->s_mmp_tsk) 4756 4718 kthread_stop(sbi->s_mmp_tsk); 4757 4719 failed_mount2: 4720 + rcu_read_lock(); 4721 + group_desc = rcu_dereference(sbi->s_group_desc); 4758 4722 for (i = 0; i < db_count; i++) 4759 - brelse(sbi->s_group_desc[i]); 4760 - kvfree(sbi->s_group_desc); 4723 + brelse(group_desc[i]); 4724 + kvfree(group_desc); 4725 + rcu_read_unlock(); 4761 4726 failed_mount: 4762 4727 if (sbi->s_chksum_driver) 4763 4728 crypto_free_shash(sbi->s_chksum_driver);
+6 -2
fs/jbd2/transaction.c
··· 936 936 char *frozen_buffer = NULL; 937 937 unsigned long start_lock, time_lock; 938 938 939 - if (is_handle_aborted(handle)) 940 - return -EROFS; 941 939 journal = transaction->t_journal; 942 940 943 941 jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); ··· 1187 1189 struct journal_head *jh; 1188 1190 int rc; 1189 1191 1192 + if (is_handle_aborted(handle)) 1193 + return -EROFS; 1194 + 1190 1195 if (jbd2_write_access_granted(handle, bh, false)) 1191 1196 return 0; 1192 1197 ··· 1326 1325 int err; 1327 1326 struct journal_head *jh; 1328 1327 char *committed_data = NULL; 1328 + 1329 + if (is_handle_aborted(handle)) 1330 + return -EROFS; 1329 1331 1330 1332 if (jbd2_write_access_granted(handle, bh, true)) 1331 1333 return 0;