block: improve struct request_queue layout

It's clearly been a while since someone looked at this, so I gave it a
quick shot. There are few issues in here:

- Random bundling of members that are mostly read-only and often written
- Random holes that need not be there

This moves the most frequently used bits into cacheline 1 and 2, with
the 2nd one being more write intensive than the first one, which is
basically read-only.

Outside of making this work a bit more efficiently, it also reduces the
size of struct request_queue for my test setup from 864 bytes (spanning
14 cachelines!) to 832 bytes and 13 cachelines.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/d2b7b61c-4868-45c0-9060-4f9c73de9d7e@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>

Jens Axboe 2 years ago 0c734c5e 6ef02df1

+49 -46

1 changed file

expand all

include

linux

blkdev.h

+49 -46

include/linux/blkdev.h

··· 367 367 }; 368 368 369 369 struct request_queue { 370 - struct request *last_merge; 371 - struct elevator_queue *elevator; 372 - 373 - struct percpu_ref q_usage_counter; 374 - 375 - struct blk_queue_stats *stats; 376 - struct rq_qos *rq_qos; 377 - struct mutex rq_qos_mutex; 378 - 379 - const struct blk_mq_ops *mq_ops; 380 - 381 - /* sw queues */ 382 - struct blk_mq_ctx __percpu *queue_ctx; 383 - 384 - unsigned int queue_depth; 385 - 386 - /* hw dispatch queues */ 387 - struct xarray hctx_table; 388 - unsigned int nr_hw_queues; 389 - 390 370 /* 391 371 * The queue owner gets to use this for whatever they like. 392 372 * ll_rw_blk doesn't touch it. 393 373 */ 394 374 void *queuedata; 395 375 376 + struct elevator_queue *elevator; 377 + 378 + const struct blk_mq_ops *mq_ops; 379 + 380 + /* sw queues */ 381 + struct blk_mq_ctx __percpu *queue_ctx; 382 + 396 383 /* 397 384 * various queue flags, see QUEUE_* below 398 385 */ 399 386 unsigned long queue_flags; 400 - /* 401 - * Number of contexts that have called blk_set_pm_only(). If this 402 - * counter is above zero then only RQF_PM requests are processed. 403 - */ 404 - atomic_t pm_only; 405 387 406 - /* 407 - * ida allocated id for this queue. Used to index queues from 408 - * ioctx. 409 - */ 410 - int id; 388 + unsigned int rq_timeout; 389 + 390 + unsigned int queue_depth; 391 + 392 + refcount_t refs; 393 + 394 + /* hw dispatch queues */ 395 + unsigned int nr_hw_queues; 396 + struct xarray hctx_table; 397 + 398 + struct percpu_ref q_usage_counter; 399 + 400 + struct request *last_merge; 411 401 412 402 spinlock_t queue_lock; 413 403 414 - struct gendisk *disk; 404 + int quiesce_depth; 415 405 416 - refcount_t refs; 406 + struct gendisk *disk; 417 407 418 408 /* 419 409 * mq queue kobject 420 410 */ 421 411 struct kobject *mq_kobj; 412 + 413 + struct queue_limits limits; 422 414 423 415 #ifdef CONFIG_BLK_DEV_INTEGRITY 424 416 struct blk_integrity integrity; ··· 422 430 #endif 423 431 424 432 /* 433 + * Number of contexts that have called blk_set_pm_only(). If this 434 + * counter is above zero then only RQF_PM requests are processed. 435 + */ 436 + atomic_t pm_only; 437 + 438 + struct blk_queue_stats *stats; 439 + struct rq_qos *rq_qos; 440 + struct mutex rq_qos_mutex; 441 + 442 + /* 443 + * ida allocated id for this queue. Used to index queues from 444 + * ioctx. 445 + */ 446 + int id; 447 + 448 + unsigned int dma_pad_mask; 449 + 450 + /* 425 451 * queue settings 426 452 */ 427 453 unsigned long nr_requests; /* Max # of requests */ 428 - 429 - unsigned int dma_pad_mask; 430 454 431 455 #ifdef CONFIG_BLK_INLINE_ENCRYPTION 432 456 struct blk_crypto_profile *crypto_profile; 433 457 struct kobject *crypto_kobject; 434 458 #endif 435 459 436 - unsigned int rq_timeout; 437 - 438 460 struct timer_list timeout; 439 461 struct work_struct timeout_work; 440 462 441 463 atomic_t nr_active_requests_shared_tags; 464 + 465 + unsigned int required_elevator_features; 442 466 443 467 struct blk_mq_tags *sched_shared_tags; 444 468 ··· 466 458 struct mutex blkcg_mutex; 467 459 #endif 468 460 469 - struct queue_limits limits; 470 - 471 - unsigned int required_elevator_features; 472 - 473 461 int node; 462 + 463 + spinlock_t requeue_lock; 464 + struct list_head requeue_list; 465 + struct delayed_work requeue_work; 466 + 474 467 #ifdef CONFIG_BLK_DEV_IO_TRACE 475 468 struct blk_trace __rcu *blk_trace; 476 469 #endif ··· 480 471 */ 481 472 struct blk_flush_queue *fq; 482 473 struct list_head flush_list; 483 - 484 - struct list_head requeue_list; 485 - spinlock_t requeue_lock; 486 - struct delayed_work requeue_work; 487 474 488 475 struct mutex sysfs_lock; 489 476 struct mutex sysfs_dir_lock; ··· 504 499 * percpu_ref_kill() and percpu_ref_reinit(). 505 500 */ 506 501 struct mutex mq_freeze_lock; 507 - 508 - int quiesce_depth; 509 502 510 503 struct blk_mq_tag_set *tag_set; 511 504 struct list_head tag_set_list;

Configure Feed

Configure Feed