Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull writeback updates from Christian Brauner:
"Features:

- Allow file systems to increase the minimum writeback chunk size.

The relatively low minimal writeback size of 4MiB means that
written back inodes on rotational media are switched a lot. Besides
introducing additional seeks, this also can lead to extreme file
fragmentation on zoned devices when a lot of files are cached
relative to the available writeback bandwidth.

This adds a superblock field that allows the file system to
override the default size, and sets it to the zone size for zoned
XFS.

- Add logging for slow writeback when it exceeds
sysctl_hung_task_timeout_secs. This helps identify tasks waiting
for a long time and pinpoint potential issues. Recording the
starting jiffies is also useful when debugging a crashed vmcore.

- Wake up waiting tasks when finishing the writeback of a chunk

Cleanups:

- filemap_* writeback interface cleanups.

Adding filemap_fdatawrite_wbc ended up being a mistake, as all but
the original btrfs caller should be using better high level
interfaces instead.

This series removes all these low-level interfaces, switches btrfs
to a more specific interface, and cleans up other too low-level
interfaces. With this the writeback_control that is passed to the
writeback code is only initialized in three places.

- Remove __filemap_fdatawrite, __filemap_fdatawrite_range, and
filemap_fdatawrite_wbc

- Add filemap_flush_nr helper for btrfs

- Push struct writeback_control into start_delalloc_inodes in btrfs

- Rename filemap_fdatawrite_range_kick to filemap_flush_range

- Stop opencoding filemap_fdatawrite_range in 9p, ocfs2, and mm

- Make wbc_to_tag() inline and use it in fs"

* tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
fs: Make wbc_to_tag() inline and use it in fs.
xfs: set s_min_writeback_pages for zoned file systems
writeback: allow the file system to override MIN_WRITEBACK_PAGES
writeback: cleanup writeback_chunk_size
mm: rename filemap_fdatawrite_range_kick to filemap_flush_range
mm: remove __filemap_fdatawrite_range
mm: remove filemap_fdatawrite_wbc
mm: remove __filemap_fdatawrite
mm,btrfs: add a filemap_flush_nr helper
btrfs: push struct writeback_control into start_delalloc_inodes
btrfs: use the local tmp_inode variable in start_delalloc_inodes
ocfs2: don't opencode filemap_fdatawrite_range in ocfs2_journal_submit_inode_data_buffers
9p: don't opencode filemap_fdatawrite_range in v9fs_mmap_vm_close
mm: don't opencode filemap_fdatawrite_range in filemap_invalidate_inode
writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs)
writeback: Wake up waiting tasks when finishing the writeback of a chunk.

+157 -187
+4 -13
fs/9p/vfs_file.c
··· 483 483 484 484 static void v9fs_mmap_vm_close(struct vm_area_struct *vma) 485 485 { 486 - struct inode *inode; 487 - 488 - struct writeback_control wbc = { 489 - .nr_to_write = LONG_MAX, 490 - .sync_mode = WB_SYNC_ALL, 491 - .range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE, 492 - /* absolute end, byte at end included */ 493 - .range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE + 494 - (vma->vm_end - vma->vm_start - 1), 495 - }; 496 - 497 486 if (!(vma->vm_flags & VM_SHARED)) 498 487 return; 499 488 500 489 p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma); 501 490 502 - inode = file_inode(vma->vm_file); 503 - filemap_fdatawrite_wbc(inode->i_mapping, &wbc); 491 + filemap_fdatawrite_range(file_inode(vma->vm_file)->i_mapping, 492 + (loff_t)vma->vm_pgoff * PAGE_SIZE, 493 + (loff_t)vma->vm_pgoff * PAGE_SIZE + 494 + (vma->vm_end - vma->vm_start - 1)); 504 495 } 505 496 506 497 static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
+1 -4
fs/btrfs/extent_io.c
··· 2468 2468 &BTRFS_I(inode)->runtime_flags)) 2469 2469 wbc->tagged_writepages = 1; 2470 2470 2471 - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2472 - tag = PAGECACHE_TAG_TOWRITE; 2473 - else 2474 - tag = PAGECACHE_TAG_DIRTY; 2471 + tag = wbc_to_tag(wbc); 2475 2472 retry: 2476 2473 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2477 2474 tag_pages_for_writeback(mapping, index, end);
+14 -32
fs/btrfs/inode.c
··· 8715 8715 * some fairly slow code that needs optimization. This walks the list 8716 8716 * of all the inodes with pending delalloc and forces them to disk. 8717 8717 */ 8718 - static int start_delalloc_inodes(struct btrfs_root *root, 8719 - struct writeback_control *wbc, bool snapshot, 8720 - bool in_reclaim_context) 8718 + static int start_delalloc_inodes(struct btrfs_root *root, long *nr_to_write, 8719 + bool snapshot, bool in_reclaim_context) 8721 8720 { 8722 8721 struct btrfs_delalloc_work *work, *next; 8723 8722 LIST_HEAD(works); 8724 8723 LIST_HEAD(splice); 8725 8724 int ret = 0; 8726 - bool full_flush = wbc->nr_to_write == LONG_MAX; 8727 8725 8728 8726 mutex_lock(&root->delalloc_mutex); 8729 8727 spin_lock(&root->delalloc_lock); ··· 8747 8749 8748 8750 if (snapshot) 8749 8751 set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags); 8750 - if (full_flush) { 8751 - work = btrfs_alloc_delalloc_work(&inode->vfs_inode); 8752 + if (nr_to_write == NULL) { 8753 + work = btrfs_alloc_delalloc_work(tmp_inode); 8752 8754 if (!work) { 8753 - iput(&inode->vfs_inode); 8755 + iput(tmp_inode); 8754 8756 ret = -ENOMEM; 8755 8757 goto out; 8756 8758 } ··· 8758 8760 btrfs_queue_work(root->fs_info->flush_workers, 8759 8761 &work->work); 8760 8762 } else { 8761 - ret = filemap_fdatawrite_wbc(inode->vfs_inode.i_mapping, wbc); 8763 + ret = filemap_flush_nr(tmp_inode->i_mapping, 8764 + nr_to_write); 8762 8765 btrfs_add_delayed_iput(inode); 8763 - if (ret || wbc->nr_to_write <= 0) 8766 + 8767 + if (ret || *nr_to_write <= 0) 8764 8768 goto out; 8765 8769 } 8766 8770 cond_resched(); ··· 8788 8788 8789 8789 int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context) 8790 8790 { 8791 - struct writeback_control wbc = { 8792 - .nr_to_write = LONG_MAX, 8793 - .sync_mode = WB_SYNC_NONE, 8794 - .range_start = 0, 8795 - .range_end = LLONG_MAX, 8796 - }; 8797 8791 struct btrfs_fs_info *fs_info = root->fs_info; 8798 8792 8799 8793 if (BTRFS_FS_ERROR(fs_info)) 8800 8794 return -EROFS; 8801 - 8802 - return start_delalloc_inodes(root, &wbc, true, in_reclaim_context); 8795 + return start_delalloc_inodes(root, NULL, true, in_reclaim_context); 8803 8796 } 8804 8797 8805 8798 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, 8806 8799 bool in_reclaim_context) 8807 8800 { 8808 - struct writeback_control wbc = { 8809 - .nr_to_write = nr, 8810 - .sync_mode = WB_SYNC_NONE, 8811 - .range_start = 0, 8812 - .range_end = LLONG_MAX, 8813 - }; 8801 + long *nr_to_write = nr == LONG_MAX ? NULL : &nr; 8814 8802 struct btrfs_root *root; 8815 8803 LIST_HEAD(splice); 8816 8804 int ret; ··· 8810 8822 spin_lock(&fs_info->delalloc_root_lock); 8811 8823 list_splice_init(&fs_info->delalloc_roots, &splice); 8812 8824 while (!list_empty(&splice)) { 8813 - /* 8814 - * Reset nr_to_write here so we know that we're doing a full 8815 - * flush. 8816 - */ 8817 - if (nr == LONG_MAX) 8818 - wbc.nr_to_write = LONG_MAX; 8819 - 8820 8825 root = list_first_entry(&splice, struct btrfs_root, 8821 8826 delalloc_root); 8822 8827 root = btrfs_grab_root(root); ··· 8818 8837 &fs_info->delalloc_roots); 8819 8838 spin_unlock(&fs_info->delalloc_root_lock); 8820 8839 8821 - ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); 8840 + ret = start_delalloc_inodes(root, nr_to_write, false, 8841 + in_reclaim_context); 8822 8842 btrfs_put_root(root); 8823 - if (ret < 0 || wbc.nr_to_write <= 0) 8843 + if (ret < 0 || nr <= 0) 8824 8844 goto out; 8825 8845 spin_lock(&fs_info->delalloc_root_lock); 8826 8846 }
+1 -5
fs/ceph/addr.c
··· 1045 1045 ceph_wbc->index = ceph_wbc->start_index; 1046 1046 ceph_wbc->end = -1; 1047 1047 1048 - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) { 1049 - ceph_wbc->tag = PAGECACHE_TAG_TOWRITE; 1050 - } else { 1051 - ceph_wbc->tag = PAGECACHE_TAG_DIRTY; 1052 - } 1048 + ceph_wbc->tag = wbc_to_tag(wbc); 1053 1049 1054 1050 ceph_wbc->op_idx = -1; 1055 1051 ceph_wbc->num_ops = 0;
+1 -4
fs/ext4/inode.c
··· 2618 2618 handle_t *handle = NULL; 2619 2619 int bpp = ext4_journal_blocks_per_folio(mpd->inode); 2620 2620 2621 - if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) 2622 - tag = PAGECACHE_TAG_TOWRITE; 2623 - else 2624 - tag = PAGECACHE_TAG_DIRTY; 2621 + tag = wbc_to_tag(mpd->wbc); 2625 2622 2626 2623 mpd->map.m_len = 0; 2627 2624 mpd->next_pos = mpd->start_pos;
+1 -4
fs/f2fs/data.c
··· 2986 2986 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2987 2987 range_whole = 1; 2988 2988 } 2989 - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2990 - tag = PAGECACHE_TAG_TOWRITE; 2991 - else 2992 - tag = PAGECACHE_TAG_DIRTY; 2989 + tag = wbc_to_tag(wbc); 2993 2990 retry: 2994 2991 retry = 0; 2995 2992 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+34 -21
fs/fs-writeback.c
··· 14 14 * Additions for address_space-based writeback 15 15 */ 16 16 17 + #include <linux/sched/sysctl.h> 17 18 #include <linux/kernel.h> 18 19 #include <linux/export.h> 19 20 #include <linux/spinlock.h> ··· 31 30 #include <linux/device.h> 32 31 #include <linux/memcontrol.h> 33 32 #include "internal.h" 34 - 35 - /* 36 - * 4MB minimal write chunk size 37 - */ 38 - #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) 39 33 40 34 /* 41 35 * Passed into wb_writeback(), essentially a subset of writeback_control ··· 196 200 spin_unlock_irq(&wb->work_lock); 197 201 } 198 202 203 + static bool wb_wait_for_completion_cb(struct wb_completion *done) 204 + { 205 + unsigned long waited_secs = (jiffies - done->wait_start) / HZ; 206 + 207 + done->progress_stamp = jiffies; 208 + if (waited_secs > sysctl_hung_task_timeout_secs) 209 + pr_info("INFO: The task %s:%d has been waiting for writeback " 210 + "completion for more than %lu seconds.", 211 + current->comm, current->pid, waited_secs); 212 + 213 + return !atomic_read(&done->cnt); 214 + } 215 + 199 216 /** 200 217 * wb_wait_for_completion - wait for completion of bdi_writeback_works 201 218 * @done: target wb_completion ··· 221 212 */ 222 213 void wb_wait_for_completion(struct wb_completion *done) 223 214 { 215 + done->wait_start = jiffies; 224 216 atomic_dec(&done->cnt); /* put down the initial count */ 225 - wait_event(*done->waitq, !atomic_read(&done->cnt)); 217 + wait_event(*done->waitq, wb_wait_for_completion_cb(done)); 226 218 } 227 219 228 220 #ifdef CONFIG_CGROUP_WRITEBACK ··· 818 808 * @wbc: writeback_control of interest 819 809 * @inode: target inode 820 810 * 821 - * This function is to be used by __filemap_fdatawrite_range(), which is an 822 - * alternative entry point into writeback code, and first ensures @inode is 823 - * associated with a bdi_writeback and attaches it to @wbc. 811 + * This function is to be used by filemap_writeback(), which is an alternative 812 + * entry point into writeback code, and first ensures @inode is associated with 813 + * a bdi_writeback and attaches it to @wbc. 824 814 */ 825 815 void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, 826 816 struct inode *inode) ··· 1892 1882 return ret; 1893 1883 } 1894 1884 1895 - static long writeback_chunk_size(struct bdi_writeback *wb, 1896 - struct wb_writeback_work *work) 1885 + static long writeback_chunk_size(struct super_block *sb, 1886 + struct bdi_writeback *wb, struct wb_writeback_work *work) 1897 1887 { 1898 1888 long pages; 1899 1889 ··· 1911 1901 * (maybe slowly) sync all tagged pages 1912 1902 */ 1913 1903 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) 1914 - pages = LONG_MAX; 1915 - else { 1916 - pages = min(wb->avg_write_bandwidth / 2, 1917 - global_wb_domain.dirty_limit / DIRTY_SCOPE); 1918 - pages = min(pages, work->nr_pages); 1919 - pages = round_down(pages + MIN_WRITEBACK_PAGES, 1920 - MIN_WRITEBACK_PAGES); 1921 - } 1904 + return LONG_MAX; 1922 1905 1923 - return pages; 1906 + pages = min(wb->avg_write_bandwidth / 2, 1907 + global_wb_domain.dirty_limit / DIRTY_SCOPE); 1908 + pages = min(pages, work->nr_pages); 1909 + return round_down(pages + sb->s_min_writeback_pages, 1910 + sb->s_min_writeback_pages); 1924 1911 } 1925 1912 1926 1913 /* ··· 2019 2012 inode_state_set(inode, I_SYNC); 2020 2013 wbc_attach_and_unlock_inode(&wbc, inode); 2021 2014 2022 - write_chunk = writeback_chunk_size(wb, work); 2015 + write_chunk = writeback_chunk_size(inode->i_sb, wb, work); 2023 2016 wbc.nr_to_write = write_chunk; 2024 2017 wbc.pages_skipped = 0; 2025 2018 ··· 2028 2021 * evict_inode() will wait so the inode cannot be freed. 2029 2022 */ 2030 2023 __writeback_single_inode(inode, &wbc); 2024 + 2025 + /* Report progress to inform the hung task detector of the progress. */ 2026 + if (work->done && work->done->progress_stamp && 2027 + (jiffies - work->done->progress_stamp) > HZ * 2028 + sysctl_hung_task_timeout_secs / 2) 2029 + wake_up_all(work->done->waitq); 2031 2030 2032 2031 wbc_detach_inode(&wbc); 2033 2032 work->nr_pages -= write_chunk - wbc.nr_to_write;
+1 -4
fs/gfs2/aops.c
··· 311 311 range_whole = 1; 312 312 cycled = 1; /* ignore range_cyclic tests */ 313 313 } 314 - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 315 - tag = PAGECACHE_TAG_TOWRITE; 316 - else 317 - tag = PAGECACHE_TAG_DIRTY; 314 + tag = wbc_to_tag(wbc); 318 315 319 316 retry: 320 317 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+2 -9
fs/ocfs2/journal.c
··· 902 902 903 903 static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) 904 904 { 905 - struct address_space *mapping = jinode->i_vfs_inode->i_mapping; 906 - struct writeback_control wbc = { 907 - .sync_mode = WB_SYNC_ALL, 908 - .nr_to_write = mapping->nrpages * 2, 909 - .range_start = jinode->i_dirty_start, 910 - .range_end = jinode->i_dirty_end, 911 - }; 912 - 913 - return filemap_fdatawrite_wbc(mapping, &wbc); 905 + return filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, 906 + jinode->i_dirty_start, jinode->i_dirty_end); 914 907 } 915 908 916 909 int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
+1
fs/super.c
··· 389 389 goto fail; 390 390 if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink)) 391 391 goto fail; 392 + s->s_min_writeback_pages = MIN_WRITEBACK_PAGES; 392 393 return s; 393 394 394 395 fail:
+4 -6
fs/sync.c
··· 281 281 } 282 282 283 283 if (flags & SYNC_FILE_RANGE_WRITE) { 284 - int sync_mode = WB_SYNC_NONE; 285 - 286 284 if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) == 287 285 SYNC_FILE_RANGE_WRITE_AND_WAIT) 288 - sync_mode = WB_SYNC_ALL; 289 - 290 - ret = __filemap_fdatawrite_range(mapping, offset, endbyte, 291 - sync_mode); 286 + ret = filemap_fdatawrite_range(mapping, offset, 287 + endbyte); 288 + else 289 + ret = filemap_flush_range(mapping, offset, endbyte); 292 290 if (ret < 0) 293 291 goto out; 294 292 }
+26 -2
fs/xfs/xfs_zone_alloc.c
··· 1204 1204 .mp = mp, 1205 1205 }; 1206 1206 struct xfs_buftarg *bt = mp->m_rtdev_targp; 1207 + xfs_extlen_t zone_blocks = mp->m_groups[XG_TYPE_RTG].blocks; 1207 1208 int error; 1208 1209 1209 1210 if (!bt) { ··· 1235 1234 return -ENOMEM; 1236 1235 1237 1236 xfs_info(mp, "%u zones of %u blocks (%u max open zones)", 1238 - mp->m_sb.sb_rgcount, mp->m_groups[XG_TYPE_RTG].blocks, 1239 - mp->m_max_open_zones); 1237 + mp->m_sb.sb_rgcount, zone_blocks, mp->m_max_open_zones); 1240 1238 trace_xfs_zones_mount(mp); 1239 + 1240 + /* 1241 + * The writeback code switches between inodes regularly to provide 1242 + * fairness. The default lower bound is 4MiB, but for zoned file 1243 + * systems we want to increase that both to reduce seeks, but also more 1244 + * importantly so that workloads that writes files in a multiple of the 1245 + * zone size do not get fragmented and require garbage collection when 1246 + * they shouldn't. Increase is to the zone size capped by the max 1247 + * extent len. 1248 + * 1249 + * Note that because s_min_writeback_pages is a superblock field, this 1250 + * value also get applied to non-zoned files on the data device if 1251 + * there are any. On typical zoned setup all data is on the RT device 1252 + * because using the more efficient sequential write required zones 1253 + * is the reason for using the zone allocator, and either the RT device 1254 + * and the (meta)data device are on the same block device, or the 1255 + * (meta)data device is on a fast SSD while the data on the RT device 1256 + * is on a SMR HDD. In any combination of the above cases enforcing 1257 + * the higher min_writeback_pages for non-RT inodes is either a noop 1258 + * or beneficial. 1259 + */ 1260 + mp->m_super->s_min_writeback_pages = 1261 + XFS_FSB_TO_B(mp, min(zone_blocks, XFS_MAX_BMBT_EXTLEN)) >> 1262 + PAGE_SHIFT; 1241 1263 1242 1264 if (bdev_is_zoned(bt->bt_bdev)) { 1243 1265 error = blkdev_report_zones(bt->bt_bdev,
+2
include/linux/backing-dev-defs.h
··· 63 63 struct wb_completion { 64 64 atomic_t cnt; 65 65 wait_queue_head_t *waitq; 66 + unsigned long progress_stamp; /* The jiffies when slow progress is detected */ 67 + unsigned long wait_start; /* The jiffies when waiting for the writeback work to finish */ 66 68 }; 67 69 68 70 #define __WB_COMPLETION_INIT(_waitq) \
+4 -3
include/linux/fs.h
··· 1642 1642 1643 1643 spinlock_t s_inode_wblist_lock; 1644 1644 struct list_head s_inodes_wb; /* writeback inodes */ 1645 + long s_min_writeback_pages; 1645 1646 } __randomize_layout; 1646 1647 1647 1648 static inline struct user_namespace *i_user_ns(const struct inode *inode) ··· 3076 3075 extern int __must_check file_check_and_advance_wb_err(struct file *file); 3077 3076 extern int __must_check file_write_and_wait_range(struct file *file, 3078 3077 loff_t start, loff_t end); 3079 - int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start, 3078 + int filemap_flush_range(struct address_space *mapping, loff_t start, 3080 3079 loff_t end); 3081 3080 3082 3081 static inline int file_write_and_wait(struct file *file) ··· 3113 3112 } else if (iocb->ki_flags & IOCB_DONTCACHE) { 3114 3113 struct address_space *mapping = iocb->ki_filp->f_mapping; 3115 3114 3116 - filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, 3117 - iocb->ki_pos - 1); 3115 + filemap_flush_range(mapping, iocb->ki_pos - count, 3116 + iocb->ki_pos - 1); 3118 3117 } 3119 3118 3120 3119 return count;
+1 -4
include/linux/pagemap.h
··· 38 38 int write_inode_now(struct inode *, int sync); 39 39 int filemap_fdatawrite(struct address_space *); 40 40 int filemap_flush(struct address_space *); 41 + int filemap_flush_nr(struct address_space *mapping, long *nr_to_write); 41 42 int filemap_fdatawait_keep_errors(struct address_space *mapping); 42 43 int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); 43 44 int filemap_fdatawait_range_keep_errors(struct address_space *mapping, ··· 54 53 bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); 55 54 int filemap_write_and_wait_range(struct address_space *mapping, 56 55 loff_t lstart, loff_t lend); 57 - int __filemap_fdatawrite_range(struct address_space *mapping, 58 - loff_t start, loff_t end, int sync_mode); 59 56 int filemap_fdatawrite_range(struct address_space *mapping, 60 57 loff_t start, loff_t end); 61 58 int filemap_check_errors(struct address_space *mapping); 62 59 void __filemap_set_wb_err(struct address_space *mapping, int err); 63 - int filemap_fdatawrite_wbc(struct address_space *mapping, 64 - struct writeback_control *wbc); 65 60 int kiocb_write_and_wait(struct kiocb *iocb, size_t count); 66 61 67 62 static inline int filemap_write_and_wait(struct address_space *mapping)
+12
include/linux/writeback.h
··· 189 189 void inode_wait_for_writeback(struct inode *inode); 190 190 void inode_io_list_del(struct inode *inode); 191 191 192 + static inline xa_mark_t wbc_to_tag(struct writeback_control *wbc) 193 + { 194 + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 195 + return PAGECACHE_TAG_TOWRITE; 196 + return PAGECACHE_TAG_DIRTY; 197 + } 198 + 192 199 #ifdef CONFIG_CGROUP_WRITEBACK 193 200 194 201 #include <linux/cgroup.h> ··· 373 366 374 367 void sb_mark_inode_writeback(struct inode *inode); 375 368 void sb_clear_inode_writeback(struct inode *inode); 369 + 370 + /* 371 + * 4MB minimal write chunk size 372 + */ 373 + #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) 376 374 377 375 #endif /* WRITEBACK_H */
+1 -2
mm/fadvise.c
··· 111 111 spin_unlock(&file->f_lock); 112 112 break; 113 113 case POSIX_FADV_DONTNEED: 114 - __filemap_fdatawrite_range(mapping, offset, endbyte, 115 - WB_SYNC_NONE); 114 + filemap_flush_range(mapping, offset, endbyte); 116 115 117 116 /* 118 117 * First and last FULL page! Partial pages are deliberately
+47 -68
mm/filemap.c
··· 366 366 return 0; 367 367 } 368 368 369 - /** 370 - * filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range 371 - * @mapping: address space structure to write 372 - * @wbc: the writeback_control controlling the writeout 373 - * 374 - * Call writepages on the mapping using the provided wbc to control the 375 - * writeout. 376 - * 377 - * Return: %0 on success, negative error code otherwise. 378 - */ 379 - int filemap_fdatawrite_wbc(struct address_space *mapping, 380 - struct writeback_control *wbc) 369 + static int filemap_writeback(struct address_space *mapping, loff_t start, 370 + loff_t end, enum writeback_sync_modes sync_mode, 371 + long *nr_to_write) 381 372 { 373 + struct writeback_control wbc = { 374 + .sync_mode = sync_mode, 375 + .nr_to_write = nr_to_write ? *nr_to_write : LONG_MAX, 376 + .range_start = start, 377 + .range_end = end, 378 + }; 382 379 int ret; 383 380 384 381 if (!mapping_can_writeback(mapping) || 385 382 !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 386 383 return 0; 387 384 388 - wbc_attach_fdatawrite_inode(wbc, mapping->host); 389 - ret = do_writepages(mapping, wbc); 390 - wbc_detach_inode(wbc); 385 + wbc_attach_fdatawrite_inode(&wbc, mapping->host); 386 + ret = do_writepages(mapping, &wbc); 387 + wbc_detach_inode(&wbc); 388 + 389 + if (!ret && nr_to_write) 390 + *nr_to_write = wbc.nr_to_write; 391 391 return ret; 392 392 } 393 - EXPORT_SYMBOL(filemap_fdatawrite_wbc); 394 393 395 394 /** 396 - * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range 395 + * filemap_fdatawrite_range - start writeback on mapping dirty pages in range 397 396 * @mapping: address space structure to write 398 397 * @start: offset in bytes where the range starts 399 398 * @end: offset in bytes where the range ends (inclusive) 400 - * @sync_mode: enable synchronous operation 401 399 * 402 400 * Start writeback against all of a mapping's dirty pages that lie 403 401 * within the byte offsets <start, end> inclusive. 404 402 * 405 - * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as 406 - * opposed to a regular memory cleansing writeback. The difference between 407 - * these two operations is that if a dirty page/buffer is encountered, it must 408 - * be waited upon, and not just skipped over. 403 + * This is a data integrity operation that waits upon dirty or in writeback 404 + * pages. 409 405 * 410 406 * Return: %0 on success, negative error code otherwise. 411 407 */ 412 - int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, 413 - loff_t end, int sync_mode) 414 - { 415 - struct writeback_control wbc = { 416 - .sync_mode = sync_mode, 417 - .nr_to_write = LONG_MAX, 418 - .range_start = start, 419 - .range_end = end, 420 - }; 421 - 422 - return filemap_fdatawrite_wbc(mapping, &wbc); 423 - } 424 - 425 - static inline int __filemap_fdatawrite(struct address_space *mapping, 426 - int sync_mode) 427 - { 428 - return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode); 429 - } 430 - 431 - int filemap_fdatawrite(struct address_space *mapping) 432 - { 433 - return __filemap_fdatawrite(mapping, WB_SYNC_ALL); 434 - } 435 - EXPORT_SYMBOL(filemap_fdatawrite); 436 - 437 408 int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, 438 - loff_t end) 409 + loff_t end) 439 410 { 440 - return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); 411 + return filemap_writeback(mapping, start, end, WB_SYNC_ALL, NULL); 441 412 } 442 413 EXPORT_SYMBOL(filemap_fdatawrite_range); 443 414 415 + int filemap_fdatawrite(struct address_space *mapping) 416 + { 417 + return filemap_fdatawrite_range(mapping, 0, LLONG_MAX); 418 + } 419 + EXPORT_SYMBOL(filemap_fdatawrite); 420 + 444 421 /** 445 - * filemap_fdatawrite_range_kick - start writeback on a range 422 + * filemap_flush_range - start writeback on a range 446 423 * @mapping: target address_space 447 424 * @start: index to start writeback on 448 425 * @end: last (inclusive) index for writeback ··· 429 452 * 430 453 * Return: %0 on success, negative error code otherwise. 431 454 */ 432 - int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start, 455 + int filemap_flush_range(struct address_space *mapping, loff_t start, 433 456 loff_t end) 434 457 { 435 - return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE); 458 + return filemap_writeback(mapping, start, end, WB_SYNC_NONE, NULL); 436 459 } 437 - EXPORT_SYMBOL_GPL(filemap_fdatawrite_range_kick); 460 + EXPORT_SYMBOL_GPL(filemap_flush_range); 438 461 439 462 /** 440 463 * filemap_flush - mostly a non-blocking flush ··· 447 470 */ 448 471 int filemap_flush(struct address_space *mapping) 449 472 { 450 - return __filemap_fdatawrite(mapping, WB_SYNC_NONE); 473 + return filemap_flush_range(mapping, 0, LLONG_MAX); 451 474 } 452 475 EXPORT_SYMBOL(filemap_flush); 476 + 477 + /* 478 + * Start writeback on @nr_to_write pages from @mapping. No one but the existing 479 + * btrfs caller should be using this. Talk to linux-mm if you think adding a 480 + * new caller is a good idea. 481 + */ 482 + int filemap_flush_nr(struct address_space *mapping, long *nr_to_write) 483 + { 484 + return filemap_writeback(mapping, 0, LLONG_MAX, WB_SYNC_NONE, 485 + nr_to_write); 486 + } 487 + EXPORT_SYMBOL_FOR_MODULES(filemap_flush_nr, "btrfs"); 453 488 454 489 /** 455 490 * filemap_range_has_page - check if a page exists in range. ··· 680 691 return 0; 681 692 682 693 if (mapping_needs_writeback(mapping)) { 683 - err = __filemap_fdatawrite_range(mapping, lstart, lend, 684 - WB_SYNC_ALL); 694 + err = filemap_fdatawrite_range(mapping, lstart, lend); 685 695 /* 686 696 * Even if the above returned error, the pages may be 687 697 * written partially (e.g. -ENOSPC), so we wait for it. ··· 782 794 return 0; 783 795 784 796 if (mapping_needs_writeback(mapping)) { 785 - err = __filemap_fdatawrite_range(mapping, lstart, lend, 786 - WB_SYNC_ALL); 797 + err = filemap_fdatawrite_range(mapping, lstart, lend); 787 798 /* See comment of filemap_write_and_wait() */ 788 799 if (err != -EIO) 789 800 __filemap_fdatawait_range(mapping, lstart, lend); ··· 4515 4528 unmap_mapping_pages(mapping, first, nr, false); 4516 4529 4517 4530 /* Write back the data if we're asked to. */ 4518 - if (flush) { 4519 - struct writeback_control wbc = { 4520 - .sync_mode = WB_SYNC_ALL, 4521 - .nr_to_write = LONG_MAX, 4522 - .range_start = start, 4523 - .range_end = end, 4524 - }; 4525 - 4526 - filemap_fdatawrite_wbc(mapping, &wbc); 4527 - } 4531 + if (flush) 4532 + filemap_fdatawrite_range(mapping, start, end); 4528 4533 4529 4534 /* Wait for writeback to complete on all folios and discard. */ 4530 4535 invalidate_inode_pages2_range(mapping, start / PAGE_SIZE, end / PAGE_SIZE);
-6
mm/page-writeback.c
··· 2434 2434 return true; 2435 2435 } 2436 2436 2437 - static xa_mark_t wbc_to_tag(struct writeback_control *wbc) 2438 - { 2439 - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2440 - return PAGECACHE_TAG_TOWRITE; 2441 - return PAGECACHE_TAG_DIRTY; 2442 - } 2443 2437 2444 2438 static pgoff_t wbc_end(struct writeback_control *wbc) 2445 2439 {