block: remove q->sysfs_lock for attributes which don't need it

There're few sysfs attributes in block layer which don't really need
acquiring q->sysfs_lock while accessing it. The reason being, reading/
writing a value from/to such attributes are either atomic or could be
easily protected using READ_ONCE()/WRITE_ONCE(). Moreover, sysfs
attributes are inherently protected with sysfs/kernfs internal locking.

So this change help segregate all existing sysfs attributes for which
we could avoid acquiring q->sysfs_lock. For all read-only attributes
we removed the q->sysfs_lock from show method of such attributes. In
case attribute is read/write then we removed the q->sysfs_lock from
both show and store methods of these attributes.

We audited all block sysfs attributes and found following list of
attributes which shouldn't require q->sysfs_lock protection:

1. io_poll:
Write to this attribute is ignored. So, we don't need q->sysfs_lock.

2. io_poll_delay:
Write to this attribute is NOP, so we don't need q->sysfs_lock.

3. io_timeout:
Write to this attribute updates q->rq_timeout and read of this
attribute returns the value stored in q->rq_timeout Moreover, the
q->rq_timeout is set only once when we init the queue (under blk_mq_
init_allocated_queue()) even before disk is added. So that means
that we don't need to protect it with q->sysfs_lock. As this
attribute is not directly correlated with anything else simply using
READ_ONCE/WRITE_ONCE should be enough.

4. nomerges:
Write to this attribute file updates two q->flags : QUEUE_FLAG_
NOMERGES and QUEUE_FLAG_NOXMERGES. These flags are accessed during
bio-merge which anyways doesn't run with q->sysfs_lock held.
Moreover, the q->flags are updated/accessed with bitops which are
atomic. So, protecting it with q->sysfs_lock is not necessary.

5. rq_affinity:
Write to this attribute file makes atomic updates to q->flags:
QUEUE_FLAG_SAME_COMP and QUEUE_FLAG_SAME_FORCE. These flags are
also accessed from blk_mq_complete_need_ipi() using test_bit macro.
As read/write to q->flags uses bitops which are atomic, protecting
it with q->stsys_lock is not necessary.

6. nr_zones:
Write to this attribute happens in the driver probe method (except
nvme) before disk is added and outside of q->sysfs_lock or any other
lock. Moreover nr_zones is defined as "unsigned int" and so reading
this attribute, even when it's simultaneously being updated on other
cpu, should not return torn value on any architecture supported by
linux. So we can avoid using q->sysfs_lock or any other lock/
protection while reading this attribute.

7. discard_zeroes_data:
Reading of this attribute always returns 0, so we don't require
holding q->sysfs_lock.

8. write_same_max_bytes
Reading of this attribute always returns 0, so we don't require
holding q->sysfs_lock.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Link: https://lore.kernel.org/r/20250304102551.2533767-4-nilay@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Nilay Shroff and committed by

Jens Axboe 1 year ago d23977fe b07a889e

+29 -54

2 changed files

expand all

block

blk-settings.c

blk-sysfs.c

+1 -1

block/blk-settings.c

··· 21 21 22 22 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) 23 23 { 24 - q->rq_timeout = timeout; 24 + WRITE_ONCE(q->rq_timeout, timeout); 25 25 } 26 26 EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); 27 27

+28 -53

block/blk-sysfs.c

··· 172 172 #define QUEUE_SYSFS_SHOW_CONST(_name, _val) \ 173 173 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 174 174 { \ 175 - ssize_t ret; \ 176 - \ 177 - mutex_lock(&disk->queue->sysfs_lock); \ 178 - ret = sysfs_emit(page, "%d\n", _val); \ 179 - mutex_unlock(&disk->queue->sysfs_lock); \ 180 - return ret; \ 175 + return sysfs_emit(page, "%d\n", _val); \ 181 176 } 182 177 183 178 /* deprecated fields */ ··· 261 266 262 267 static ssize_t queue_poll_show(struct gendisk *disk, char *page) 263 268 { 264 - ssize_t ret; 269 + if (queue_is_mq(disk->queue)) 270 + return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); 265 271 266 - mutex_lock(&disk->queue->sysfs_lock); 267 - if (queue_is_mq(disk->queue)) { 268 - ret = sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); 269 - } else { 270 - ret = sysfs_emit(page, "%u\n", 272 + return sysfs_emit(page, "%u\n", 271 273 !!(disk->queue->limits.features & BLK_FEAT_POLL)); 272 - } 273 - mutex_unlock(&disk->queue->sysfs_lock); 274 - return ret; 275 274 } 276 275 277 276 static ssize_t queue_zoned_show(struct gendisk *disk, char *page) ··· 277 288 278 289 static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) 279 290 { 280 - ssize_t ret; 281 - 282 - mutex_lock(&disk->queue->sysfs_lock); 283 - ret = queue_var_show(disk_nr_zones(disk), page); 284 - mutex_unlock(&disk->queue->sysfs_lock); 285 - return ret; 291 + return queue_var_show(disk_nr_zones(disk), page); 286 292 } 287 293 288 294 static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) ··· 304 320 305 321 static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) 306 322 { 307 - ssize_t ret; 308 - 309 - mutex_lock(&disk->queue->sysfs_lock); 310 - ret = queue_var_show((blk_queue_nomerges(disk->queue) << 1) | 323 + return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | 311 324 blk_queue_noxmerges(disk->queue), page); 312 - mutex_unlock(&disk->queue->sysfs_lock); 313 - return ret; 314 325 } 315 326 316 327 static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page, ··· 319 340 if (ret < 0) 320 341 return ret; 321 342 322 - mutex_lock(&q->sysfs_lock); 323 343 memflags = blk_mq_freeze_queue(q); 324 344 blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 325 345 blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); ··· 327 349 else if (nm) 328 350 blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); 329 351 blk_mq_unfreeze_queue(q, memflags); 330 - mutex_unlock(&q->sysfs_lock); 331 352 332 353 return ret; 333 354 } 334 355 335 356 static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page) 336 357 { 337 - ssize_t ret; 338 - bool set, force; 358 + bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags); 359 + bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags); 339 360 340 - mutex_lock(&disk->queue->sysfs_lock); 341 - set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags); 342 - force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags); 343 - ret = queue_var_show(set << force, page); 344 - mutex_unlock(&disk->queue->sysfs_lock); 345 - return ret; 361 + return queue_var_show(set << force, page); 346 362 } 347 363 348 364 static ssize_t ··· 352 380 if (ret < 0) 353 381 return ret; 354 382 355 - mutex_lock(&q->sysfs_lock); 383 + /* 384 + * Here we update two queue flags each using atomic bitops, although 385 + * updating two flags isn't atomic it should be harmless as those flags 386 + * are accessed individually using atomic test_bit operation. So we 387 + * don't grab any lock while updating these flags. 388 + */ 356 389 memflags = blk_mq_freeze_queue(q); 357 390 if (val == 2) { 358 391 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); ··· 370 393 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 371 394 } 372 395 blk_mq_unfreeze_queue(q, memflags); 373 - mutex_unlock(&q->sysfs_lock); 374 396 #endif 375 397 return ret; 376 398 } ··· 387 411 ssize_t ret = count; 388 412 struct request_queue *q = disk->queue; 389 413 390 - mutex_lock(&q->sysfs_lock); 391 414 memflags = blk_mq_freeze_queue(q); 392 415 if (!(q->limits.features & BLK_FEAT_POLL)) { 393 416 ret = -EINVAL; 394 417 goto out; 395 418 } 419 + 396 420 pr_info_ratelimited("writes to the poll attribute are ignored.\n"); 397 421 pr_info_ratelimited("please use driver specific parameters instead.\n"); 398 422 out: 399 423 blk_mq_unfreeze_queue(q, memflags); 400 - mutex_unlock(&q->sysfs_lock); 401 - 402 424 return ret; 403 425 } 404 426 405 427 static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page) 406 428 { 407 - ssize_t ret; 408 - 409 - mutex_lock(&disk->queue->sysfs_lock); 410 - ret = sysfs_emit(page, "%u\n", 411 - jiffies_to_msecs(disk->queue->rq_timeout)); 412 - mutex_unlock(&disk->queue->sysfs_lock); 413 - return ret; 429 + return sysfs_emit(page, "%u\n", 430 + jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout))); 414 431 } 415 432 416 433 static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, ··· 417 448 if (err || val == 0) 418 449 return -EINVAL; 419 450 420 - mutex_lock(&q->sysfs_lock); 421 451 memflags = blk_mq_freeze_queue(q); 422 452 blk_queue_rq_timeout(q, msecs_to_jiffies(val)); 423 453 blk_mq_unfreeze_queue(q, memflags); 424 - mutex_unlock(&q->sysfs_lock); 425 454 426 455 return count; 427 456 } ··· 673 706 * Attributes which are protected with q->sysfs_lock. 674 707 */ 675 708 &queue_ra_entry.attr, 709 + 710 + /* 711 + * Attributes which don't require locking. 712 + */ 676 713 &queue_discard_zeroes_data_entry.attr, 677 714 &queue_write_same_max_entry.attr, 678 715 &queue_nr_zones_entry.attr, ··· 694 723 */ 695 724 &queue_requests_entry.attr, 696 725 &elv_iosched_entry.attr, 697 - &queue_rq_affinity_entry.attr, 698 - &queue_io_timeout_entry.attr, 699 726 #ifdef CONFIG_BLK_WBT 700 727 &queue_wb_lat_entry.attr, 701 728 #endif 729 + /* 730 + * Attributes which don't require locking. 731 + */ 732 + &queue_rq_affinity_entry.attr, 733 + &queue_io_timeout_entry.attr, 734 + 702 735 NULL, 703 736 }; 704 737

Configure Feed

Configure Feed