Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

writeback: collect stats of all wb of bdi in bdi_debug_stats_show

Patch series "Improve visibility of writeback", v5.

This series tries to improve visilibity of writeback. Patch 1 make
/sys/kernel/debug/bdi/xxx/stats show writeback info of whole bdi instead
of only writeback info in root cgroup. Patch 2 add a new debug file
/sys/kernel/debug/bdi/xxx/wb_stats to show per wb writeback info. Patch 3
add wb_monitor.py to monitor basic writeback info of running system, more
info could be added on demand. Patch 4 is a random cleanup. More details
can be found in respective patches.

Following domain hierarchy is tested:
global domain (320G)
/ \
cgroup domain1(10G) cgroup domain2(10G)
| |
bdi wb1 wb2

/* all writeback info of bdi is successfully collected */
cat stats
BdiWriteback: 4704 kB
BdiReclaimable: 1294496 kB
BdiDirtyThresh: 204208088 kB
DirtyThresh: 195259944 kB
BackgroundThresh: 32503588 kB
BdiDirtied: 48519296 kB
BdiWritten: 47225696 kB
BdiWriteBandwidth: 1173892 kBps
b_dirty: 1
b_io: 0
b_more_io: 1
b_dirty_time: 0
bdi_list: 1
state: 1

/* per wb writeback info of bdi is collected */
cat /sys/kernel/debug/bdi/252:16/wb_stats
WbCgIno: 1
WbWriteback: 0 kB
WbReclaimable: 0 kB
WbDirtyThresh: 0 kB
WbDirtied: 0 kB
WbWritten: 0 kB
WbWriteBandwidth: 102400 kBps
b_dirty: 0
b_io: 0
b_more_io: 0
b_dirty_time: 0
state: 1

WbCgIno: 4208
WbWriteback: 59808 kB
WbReclaimable: 676480 kB
WbDirtyThresh: 6004624 kB
WbDirtied: 23348192 kB
WbWritten: 22614592 kB
WbWriteBandwidth: 593204 kBps
b_dirty: 1
b_io: 1
b_more_io: 0
b_dirty_time: 0
state: 7

WbCgIno: 4249
WbWriteback: 144256 kB
WbReclaimable: 432096 kB
WbDirtyThresh: 6004344 kB
WbDirtied: 25727744 kB
WbWritten: 25154752 kB
WbWriteBandwidth: 577904 kBps
b_dirty: 0
b_io: 1
b_more_io: 0
b_dirty_time: 0
state: 7

The wb_monitor.py script output is as following:
./wb_monitor.py 252:16 -c
writeback reclaimable dirtied written avg_bw
252:16_1 0 0 0 0 102400
252:16_4284 672 820064 9230368 8410304 685612
252:16_4325 896 819840 10491264 9671648 652348
252:16 1568 1639904 19721632 18081952 1440360

writeback reclaimable dirtied written avg_bw
252:16_1 0 0 0 0 102400
252:16_4284 672 820064 9230368 8410304 685612
252:16_4325 896 819840 10491264 9671648 652348
252:16 1568 1639904 19721632 18081952 1440360
...


This patch (of 5):

/sys/kernel/debug/bdi/xxx/stats is supposed to show writeback information
of whole bdi, but only writeback information of bdi in root cgroup is
collected. So writeback information in non-root cgroup are missing now.
To be more specific, considering following case:

/* create writeback cgroup */
cd /sys/fs/cgroup
echo "+memory +io" > cgroup.subtree_control
mkdir group1
cd group1
echo $$ > cgroup.procs
/* do writeback in cgroup */
fio -name test -filename=/dev/vdb ...
/* get writeback info of bdi */
cat /sys/kernel/debug/bdi/xxx/stats
The cat result unexpectedly implies that there is no writeback on target
bdi.

Fix this by collecting stats of all wb in bdi instead of only wb in
root cgroup.

Following domain hierarchy is tested:
global domain (320G)
/ \
cgroup domain1(10G) cgroup domain2(10G)
| |
bdi wb1 wb2

/* all writeback info of bdi is successfully collected */
cat stats
BdiWriteback: 2912 kB
BdiReclaimable: 1598464 kB
BdiDirtyThresh: 167479028 kB
DirtyThresh: 195038532 kB
BackgroundThresh: 32466728 kB
BdiDirtied: 19141696 kB
BdiWritten: 17543456 kB
BdiWriteBandwidth: 1136172 kBps
b_dirty: 2
b_io: 0
b_more_io: 1
b_dirty_time: 0
bdi_list: 1
state: 1

Link: https://lkml.kernel.org/r/20240423034643.141219-1-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20240423034643.141219-2-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Brian Foster <bfoster@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Kemeng Shi and committed by
Andrew Morton
e32e2700 4673ad3b

+78 -28
+78 -28
mm/backing-dev.c
··· 39 39 #include <linux/debugfs.h> 40 40 #include <linux/seq_file.h> 41 41 42 + struct wb_stats { 43 + unsigned long nr_dirty; 44 + unsigned long nr_io; 45 + unsigned long nr_more_io; 46 + unsigned long nr_dirty_time; 47 + unsigned long nr_writeback; 48 + unsigned long nr_reclaimable; 49 + unsigned long nr_dirtied; 50 + unsigned long nr_written; 51 + unsigned long dirty_thresh; 52 + unsigned long wb_thresh; 53 + }; 54 + 42 55 static struct dentry *bdi_debug_root; 43 56 44 57 static void bdi_debug_init(void) ··· 59 46 bdi_debug_root = debugfs_create_dir("bdi", NULL); 60 47 } 61 48 49 + static void collect_wb_stats(struct wb_stats *stats, 50 + struct bdi_writeback *wb) 51 + { 52 + struct inode *inode; 53 + 54 + spin_lock(&wb->list_lock); 55 + list_for_each_entry(inode, &wb->b_dirty, i_io_list) 56 + stats->nr_dirty++; 57 + list_for_each_entry(inode, &wb->b_io, i_io_list) 58 + stats->nr_io++; 59 + list_for_each_entry(inode, &wb->b_more_io, i_io_list) 60 + stats->nr_more_io++; 61 + list_for_each_entry(inode, &wb->b_dirty_time, i_io_list) 62 + if (inode->i_state & I_DIRTY_TIME) 63 + stats->nr_dirty_time++; 64 + spin_unlock(&wb->list_lock); 65 + 66 + stats->nr_writeback += wb_stat(wb, WB_WRITEBACK); 67 + stats->nr_reclaimable += wb_stat(wb, WB_RECLAIMABLE); 68 + stats->nr_dirtied += wb_stat(wb, WB_DIRTIED); 69 + stats->nr_written += wb_stat(wb, WB_WRITTEN); 70 + stats->wb_thresh += wb_calc_thresh(wb, stats->dirty_thresh); 71 + } 72 + 73 + #ifdef CONFIG_CGROUP_WRITEBACK 74 + static void bdi_collect_stats(struct backing_dev_info *bdi, 75 + struct wb_stats *stats) 76 + { 77 + struct bdi_writeback *wb; 78 + 79 + rcu_read_lock(); 80 + list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) { 81 + if (!wb_tryget(wb)) 82 + continue; 83 + 84 + collect_wb_stats(stats, wb); 85 + wb_put(wb); 86 + } 87 + rcu_read_unlock(); 88 + } 89 + #else 90 + static void bdi_collect_stats(struct backing_dev_info *bdi, 91 + struct wb_stats *stats) 92 + { 93 + collect_wb_stats(stats, &bdi->wb); 94 + } 95 + #endif 96 + 62 97 static int bdi_debug_stats_show(struct seq_file *m, void *v) 63 98 { 64 99 struct backing_dev_info *bdi = m->private; 65 - struct bdi_writeback *wb = &bdi->wb; 66 100 unsigned long background_thresh; 67 101 unsigned long dirty_thresh; 68 - unsigned long wb_thresh; 69 - unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time; 70 - struct inode *inode; 71 - 72 - nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0; 73 - spin_lock(&wb->list_lock); 74 - list_for_each_entry(inode, &wb->b_dirty, i_io_list) 75 - nr_dirty++; 76 - list_for_each_entry(inode, &wb->b_io, i_io_list) 77 - nr_io++; 78 - list_for_each_entry(inode, &wb->b_more_io, i_io_list) 79 - nr_more_io++; 80 - list_for_each_entry(inode, &wb->b_dirty_time, i_io_list) 81 - if (inode->i_state & I_DIRTY_TIME) 82 - nr_dirty_time++; 83 - spin_unlock(&wb->list_lock); 102 + struct wb_stats stats; 103 + unsigned long tot_bw; 84 104 85 105 global_dirty_limits(&background_thresh, &dirty_thresh); 86 - wb_thresh = wb_calc_thresh(wb, dirty_thresh); 106 + 107 + memset(&stats, 0, sizeof(stats)); 108 + stats.dirty_thresh = dirty_thresh; 109 + bdi_collect_stats(bdi, &stats); 110 + tot_bw = atomic_long_read(&bdi->tot_write_bandwidth); 87 111 88 112 seq_printf(m, 89 113 "BdiWriteback: %10lu kB\n" ··· 137 87 "b_dirty_time: %10lu\n" 138 88 "bdi_list: %10u\n" 139 89 "state: %10lx\n", 140 - (unsigned long) K(wb_stat(wb, WB_WRITEBACK)), 141 - (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)), 142 - K(wb_thresh), 90 + K(stats.nr_writeback), 91 + K(stats.nr_reclaimable), 92 + K(stats.wb_thresh), 143 93 K(dirty_thresh), 144 94 K(background_thresh), 145 - (unsigned long) K(wb_stat(wb, WB_DIRTIED)), 146 - (unsigned long) K(wb_stat(wb, WB_WRITTEN)), 147 - (unsigned long) K(wb->write_bandwidth), 148 - nr_dirty, 149 - nr_io, 150 - nr_more_io, 151 - nr_dirty_time, 95 + K(stats.nr_dirtied), 96 + K(stats.nr_written), 97 + K(tot_bw), 98 + stats.nr_dirty, 99 + stats.nr_io, 100 + stats.nr_more_io, 101 + stats.nr_dirty_time, 152 102 !list_empty(&bdi->bdi_list), bdi->wb.state); 153 103 154 104 return 0;