Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

f2fs: block cache/dio write during f2fs_enable_checkpoint()

If there are too many background IOs during f2fs_enable_checkpoint(),
sync_inodes_sb() may be blocked for long time due to it will loop to
write dirty datas which are generated by in parallel write()
continuously.

Let's change as below to resolve this issue:
- hold cp_enable_rwsem write lock to block any cache/dio write
- decrease DEF_ENABLE_INTERVAL from 16 to 5

In addition, dump more logs during f2fs_enable_checkpoint().

Testcase:
1. fill data into filesystem until 90% usage.
2. mount -o remount,checkpoint=disable:10% /data
3. fio --rw=randwrite --bs=4kb --size=1GB --numjobs=10 \
--iodepth=64 --ioengine=psync --time_based --runtime=600 \
--directory=/data/fio_dir/ &
4. mount -o remount,checkpoint=enable /data

Before:
F2FS-fs (dm-51): f2fs_enable_checkpoint() finishes, writeback:7232, sync:39793, cp:457

After:
F2FS-fs (dm-51): f2fs_enable_checkpoint end, writeback:5032, lock:0, sync_inode:5552, sync_fs:84

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

authored by

Chao Yu and committed by
Jaegeuk Kim
196c81fd be112e74

+34 -9
+2
fs/f2fs/data.c
··· 1418 1418 1419 1419 static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag) 1420 1420 { 1421 + f2fs_down_read(&sbi->cp_enable_rwsem); 1421 1422 if (flag == F2FS_GET_BLOCK_PRE_AIO) 1422 1423 f2fs_down_read(&sbi->node_change); 1423 1424 else ··· 1431 1430 f2fs_up_read(&sbi->node_change); 1432 1431 else 1433 1432 f2fs_unlock_op(sbi); 1433 + f2fs_up_read(&sbi->cp_enable_rwsem); 1434 1434 } 1435 1435 1436 1436 int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
+2 -1
fs/f2fs/f2fs.h
··· 281 281 #define DEF_CP_INTERVAL 60 /* 60 secs */ 282 282 #define DEF_IDLE_INTERVAL 5 /* 5 secs */ 283 283 #define DEF_DISABLE_INTERVAL 5 /* 5 secs */ 284 - #define DEF_ENABLE_INTERVAL 16 /* 16 secs */ 284 + #define DEF_ENABLE_INTERVAL 5 /* 5 secs */ 285 285 #define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */ 286 286 #define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */ 287 287 ··· 1695 1695 long interval_time[MAX_TIME]; /* to store thresholds */ 1696 1696 struct ckpt_req_control cprc_info; /* for checkpoint request control */ 1697 1697 struct cp_stats cp_stats; /* for time stat of checkpoint */ 1698 + struct f2fs_rwsem cp_enable_rwsem; /* block cache/dio write */ 1698 1699 1699 1700 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 1700 1701
+30 -8
fs/f2fs/super.c
··· 2635 2635 static int f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) 2636 2636 { 2637 2637 unsigned int nr_pages = get_pages(sbi, F2FS_DIRTY_DATA) / 16; 2638 - long long start, writeback, end; 2638 + long long start, writeback, lock, sync_inode, end; 2639 2639 int ret; 2640 2640 2641 - f2fs_info(sbi, "f2fs_enable_checkpoint() starts, meta: %lld, node: %lld, data: %lld", 2641 + f2fs_info(sbi, "%s start, meta: %lld, node: %lld, data: %lld", 2642 + __func__, 2642 2643 get_pages(sbi, F2FS_DIRTY_META), 2643 2644 get_pages(sbi, F2FS_DIRTY_NODES), 2644 2645 get_pages(sbi, F2FS_DIRTY_DATA)); ··· 2658 2657 } 2659 2658 writeback = ktime_get(); 2660 2659 2661 - sync_inodes_sb(sbi->sb); 2660 + f2fs_down_write(&sbi->cp_enable_rwsem); 2661 + 2662 + lock = ktime_get(); 2663 + 2664 + if (get_pages(sbi, F2FS_DIRTY_DATA)) 2665 + sync_inodes_sb(sbi->sb); 2662 2666 2663 2667 if (unlikely(get_pages(sbi, F2FS_DIRTY_DATA))) 2664 - f2fs_warn(sbi, "checkpoint=enable has some unwritten data: %lld", 2665 - get_pages(sbi, F2FS_DIRTY_DATA)); 2668 + f2fs_warn(sbi, "%s: has some unwritten data: %lld", 2669 + __func__, get_pages(sbi, F2FS_DIRTY_DATA)); 2670 + 2671 + sync_inode = ktime_get(); 2666 2672 2667 2673 f2fs_down_write(&sbi->gc_lock); 2668 2674 f2fs_dirty_to_prefree(sbi); ··· 2678 2670 set_sbi_flag(sbi, SBI_IS_DIRTY); 2679 2671 f2fs_up_write(&sbi->gc_lock); 2680 2672 2673 + f2fs_info(sbi, "%s sync_fs, meta: %lld, imeta: %lld, node: %lld, dents: %lld, qdata: %lld", 2674 + __func__, 2675 + get_pages(sbi, F2FS_DIRTY_META), 2676 + get_pages(sbi, F2FS_DIRTY_IMETA), 2677 + get_pages(sbi, F2FS_DIRTY_NODES), 2678 + get_pages(sbi, F2FS_DIRTY_DENTS), 2679 + get_pages(sbi, F2FS_DIRTY_QDATA)); 2681 2680 ret = f2fs_sync_fs(sbi->sb, 1); 2682 2681 if (ret) 2683 2682 f2fs_err(sbi, "%s sync_fs failed, ret: %d", __func__, ret); ··· 2692 2677 /* Let's ensure there's no pending checkpoint anymore */ 2693 2678 f2fs_flush_ckpt_thread(sbi); 2694 2679 2680 + f2fs_up_write(&sbi->cp_enable_rwsem); 2681 + 2695 2682 end = ktime_get(); 2696 2683 2697 - f2fs_info(sbi, "f2fs_enable_checkpoint() finishes, writeback:%llu, sync:%llu", 2698 - ktime_ms_delta(writeback, start), 2699 - ktime_ms_delta(end, writeback)); 2684 + f2fs_info(sbi, "%s end, writeback:%llu, " 2685 + "lock:%llu, sync_inode:%llu, sync_fs:%llu", 2686 + __func__, 2687 + ktime_ms_delta(writeback, start), 2688 + ktime_ms_delta(lock, writeback), 2689 + ktime_ms_delta(sync_inode, lock), 2690 + ktime_ms_delta(end, sync_inode)); 2700 2691 return ret; 2701 2692 } 2702 2693 ··· 4891 4870 init_f2fs_rwsem(&sbi->node_change); 4892 4871 spin_lock_init(&sbi->stat_lock); 4893 4872 init_f2fs_rwsem(&sbi->cp_rwsem); 4873 + init_f2fs_rwsem(&sbi->cp_enable_rwsem); 4894 4874 init_f2fs_rwsem(&sbi->quota_sem); 4895 4875 init_waitqueue_head(&sbi->cp_wait); 4896 4876 spin_lock_init(&sbi->error_lock);