Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

block: enable passthrough command statistics

Applications using the passthrough interfaces for IO want to continue
seeing the disk stats. These requests had been fenced off from this
block layer feature. While the block layer doesn't necessarily know what
a passthrough command does, we do know the data size and direction,
which is enough to account for the command's stats.

Since tracking these has the potential to produce unexpected results,
the passthrough stats are locked behind a new queue flag that needs to
be enabled with the /sys/block/<dev>/queue/iostats_passthrough
attribute.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241007153236.2818562-1-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Keith Busch and committed by
Jens Axboe
110234da d51c9cdf

+73 -1
+7
Documentation/ABI/stable/sysfs-block
··· 424 424 [RW] This file is used to control (on/off) the iostats 425 425 accounting of the disk. 426 426 427 + What: /sys/block/<disk>/queue/iostats_passthrough 428 + Date: October 2024 429 + Contact: linux-block@vger.kernel.org 430 + Description: 431 + [RW] This file is used to control (on/off) the iostats 432 + accounting of the disk for passthrough commands. 433 + 427 434 428 435 What: /sys/block/<disk>/queue/logical_block_size 429 436 Date: May 2009
+31 -1
block/blk-mq.c
··· 988 988 } 989 989 } 990 990 991 + static inline bool blk_rq_passthrough_stats(struct request *req) 992 + { 993 + struct bio *bio = req->bio; 994 + 995 + if (!blk_queue_passthrough_stat(req->q)) 996 + return false; 997 + 998 + /* Requests without a bio do not transfer data. */ 999 + if (!bio) 1000 + return false; 1001 + 1002 + /* 1003 + * Stats are accumulated in the bdev, so must have one attached to a 1004 + * bio to track stats. Most drivers do not set the bdev for passthrough 1005 + * requests, but nvme is one that will set it. 1006 + */ 1007 + if (!bio->bi_bdev) 1008 + return false; 1009 + 1010 + /* 1011 + * We don't know what a passthrough command does, but we know the 1012 + * payload size and data direction. Ensuring the size is aligned to the 1013 + * block size filters out most commands with payloads that don't 1014 + * represent sector access. 1015 + */ 1016 + if (blk_rq_bytes(req) & (bdev_logical_block_size(bio->bi_bdev) - 1)) 1017 + return false; 1018 + return true; 1019 + } 1020 + 991 1021 static inline void blk_account_io_start(struct request *req) 992 1022 { 993 1023 trace_block_io_start(req); 994 1024 995 1025 if (!blk_queue_io_stat(req->q)) 996 1026 return; 997 - if (blk_rq_is_passthrough(req)) 1027 + if (blk_rq_is_passthrough(req) && !blk_rq_passthrough_stats(req)) 998 1028 return; 999 1029 1000 1030 req->rq_flags |= RQF_IO_STAT;
+30
block/blk-sysfs.c
··· 272 272 return queue_var_show(disk_nr_zones(disk), page); 273 273 } 274 274 275 + static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) 276 + { 277 + return queue_var_show(blk_queue_passthrough_stat(disk->queue), page); 278 + } 279 + 280 + static ssize_t queue_iostats_passthrough_store(struct gendisk *disk, 281 + const char *page, size_t count) 282 + { 283 + struct queue_limits lim; 284 + unsigned long ios; 285 + ssize_t ret; 286 + 287 + ret = queue_var_store(&ios, page, count); 288 + if (ret < 0) 289 + return ret; 290 + 291 + lim = queue_limits_start_update(disk->queue); 292 + if (ios) 293 + lim.flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; 294 + else 295 + lim.flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; 296 + 297 + ret = queue_limits_commit_update(disk->queue, &lim); 298 + if (ret) 299 + return ret; 300 + 301 + return count; 302 + } 275 303 static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) 276 304 { 277 305 return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | ··· 488 460 QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones"); 489 461 490 462 QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); 463 + QUEUE_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); 491 464 QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); 492 465 QUEUE_RW_ENTRY(queue_poll, "io_poll"); 493 466 QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); ··· 615 586 &queue_max_open_zones_entry.attr, 616 587 &queue_max_active_zones_entry.attr, 617 588 &queue_nomerges_entry.attr, 589 + &queue_iostats_passthrough_entry.attr, 618 590 &queue_iostats_entry.attr, 619 591 &queue_stable_writes_entry.attr, 620 592 &queue_add_random_entry.attr,
+5
include/linux/blkdev.h
··· 349 349 /* I/O topology is misaligned */ 350 350 #define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1)) 351 351 352 + /* passthrough command IO accounting */ 353 + #define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2)) 354 + 352 355 struct queue_limits { 353 356 blk_features_t features; 354 357 blk_flags_t flags; ··· 620 617 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) 621 618 #define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) 622 619 #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) 620 + #define blk_queue_passthrough_stat(q) \ 621 + ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH) 623 622 #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) 624 623 #define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) 625 624 #ifdef CONFIG_BLK_RQ_ALLOC_TIME