Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

block: avoid cpu_hotplug_lock depedency on freeze_lock

A recent lockdep[1] splat observed while running blktest block/005
reveals a potential deadlock caused by the cpu_hotplug_lock dependency
on ->freeze_lock. This dependency was introduced by commit 033b667a823e
("block: blk-rq-qos: guard rq-qos helpers by static key").

That change added a static key to avoid fetching q->rq_qos when
neither blk-wbt nor blk-iolatency is configured. The static key
dynamically patches kernel text to a NOP when disabled, eliminating
overhead of fetching q->rq_qos in the I/O hot path. However, enabling
a static key at runtime requires acquiring both cpu_hotplug_lock and
jump_label_mutex. When this happens after the queue has already been
frozen (i.e., while holding ->freeze_lock), it creates a locking
dependency from cpu_hotplug_lock to ->freeze_lock, which leads to a
potential deadlock reported by lockdep [1].

To resolve this, replace the static key mechanism with q->queue_flags:
QUEUE_FLAG_QOS_ENABLED. This flag is evaluated in the fast path before
accessing q->rq_qos. If the flag is set, we proceed to fetch q->rq_qos;
otherwise, the access is skipped.

Since q->queue_flags is commonly accessed in IO hotpath and resides in
the first cacheline of struct request_queue, checking it imposes minimal
overhead while eliminating the deadlock risk.

This change avoids the lockdep splat without introducing performance
regressions.

[1] https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/

Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Closes: https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/
Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key")
Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250814082612.500845-4-nilay@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Nilay Shroff and committed by
Jens Axboe
370ac285 ade1beea

+36 -27
+1
block/blk-mq-debugfs.c
··· 95 95 QUEUE_FLAG_NAME(SQ_SCHED), 96 96 QUEUE_FLAG_NAME(DISABLE_WBT_DEF), 97 97 QUEUE_FLAG_NAME(NO_ELV_SWITCH), 98 + QUEUE_FLAG_NAME(QOS_ENABLED), 98 99 }; 99 100 #undef QUEUE_FLAG_NAME 100 101
+4 -5
block/blk-rq-qos.c
··· 2 2 3 3 #include "blk-rq-qos.h" 4 4 5 - __read_mostly DEFINE_STATIC_KEY_FALSE(block_rq_qos); 6 - 7 5 /* 8 6 * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, 9 7 * false if 'v' + 1 would be bigger than 'below'. ··· 317 319 struct rq_qos *rqos = q->rq_qos; 318 320 q->rq_qos = rqos->next; 319 321 rqos->ops->exit(rqos); 320 - static_branch_dec(&block_rq_qos); 321 322 } 323 + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); 322 324 mutex_unlock(&q->rq_qos_mutex); 323 325 } 324 326 ··· 344 346 goto ebusy; 345 347 rqos->next = q->rq_qos; 346 348 q->rq_qos = rqos; 347 - static_branch_inc(&block_rq_qos); 349 + blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); 348 350 349 351 blk_mq_unfreeze_queue(q, memflags); 350 352 ··· 372 374 for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { 373 375 if (*cur == rqos) { 374 376 *cur = rqos->next; 375 - static_branch_dec(&block_rq_qos); 376 377 break; 377 378 } 378 379 } 380 + if (!q->rq_qos) 381 + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); 379 382 blk_mq_unfreeze_queue(q, memflags); 380 383 381 384 mutex_lock(&q->debugfs_mutex);
+30 -22
block/blk-rq-qos.h
··· 12 12 #include "blk-mq-debugfs.h" 13 13 14 14 struct blk_mq_debugfs_attr; 15 - extern struct static_key_false block_rq_qos; 16 15 17 16 enum rq_qos_id { 18 17 RQ_QOS_WBT, ··· 112 113 113 114 static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio) 114 115 { 115 - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) 116 + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && 117 + q->rq_qos) 116 118 __rq_qos_cleanup(q->rq_qos, bio); 117 119 } 118 120 119 121 static inline void rq_qos_done(struct request_queue *q, struct request *rq) 120 122 { 121 - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos && 122 - !blk_rq_is_passthrough(rq)) 123 + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && 124 + q->rq_qos && !blk_rq_is_passthrough(rq)) 123 125 __rq_qos_done(q->rq_qos, rq); 124 126 } 125 127 126 128 static inline void rq_qos_issue(struct request_queue *q, struct request *rq) 127 129 { 128 - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) 130 + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && 131 + q->rq_qos) 129 132 __rq_qos_issue(q->rq_qos, rq); 130 133 } 131 134 132 135 static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) 133 136 { 134 - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) 137 + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && 138 + q->rq_qos) 135 139 __rq_qos_requeue(q->rq_qos, rq); 136 140 } 137 141 138 142 static inline void rq_qos_done_bio(struct bio *bio) 139 143 { 140 - if (static_branch_unlikely(&block_rq_qos) && 141 - bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || 142 - bio_flagged(bio, BIO_QOS_MERGED))) { 143 - struct request_queue *q = bdev_get_queue(bio->bi_bdev); 144 + struct request_queue *q; 144 145 145 - /* 146 - * If a bio has BIO_QOS_xxx set, it implicitly implies that 147 - * q->rq_qos is present. So, we skip re-checking q->rq_qos 148 - * here as an extra optimization and directly call 149 - * __rq_qos_done_bio(). 150 - */ 151 - __rq_qos_done_bio(q->rq_qos, bio); 152 - } 146 + if (!bio->bi_bdev || (!bio_flagged(bio, BIO_QOS_THROTTLED) && 147 + !bio_flagged(bio, BIO_QOS_MERGED))) 148 + return; 149 + 150 + q = bdev_get_queue(bio->bi_bdev); 151 + 152 + /* 153 + * If a bio has BIO_QOS_xxx set, it implicitly implies that 154 + * q->rq_qos is present. So, we skip re-checking q->rq_qos 155 + * here as an extra optimization and directly call 156 + * __rq_qos_done_bio(). 157 + */ 158 + __rq_qos_done_bio(q->rq_qos, bio); 153 159 } 154 160 155 161 static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) 156 162 { 157 - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { 163 + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && 164 + q->rq_qos) { 158 165 bio_set_flag(bio, BIO_QOS_THROTTLED); 159 166 __rq_qos_throttle(q->rq_qos, bio); 160 167 } ··· 169 164 static inline void rq_qos_track(struct request_queue *q, struct request *rq, 170 165 struct bio *bio) 171 166 { 172 - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) 167 + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && 168 + q->rq_qos) 173 169 __rq_qos_track(q->rq_qos, rq, bio); 174 170 } 175 171 176 172 static inline void rq_qos_merge(struct request_queue *q, struct request *rq, 177 173 struct bio *bio) 178 174 { 179 - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { 175 + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && 176 + q->rq_qos) { 180 177 bio_set_flag(bio, BIO_QOS_MERGED); 181 178 __rq_qos_merge(q->rq_qos, rq, bio); 182 179 } ··· 186 179 187 180 static inline void rq_qos_queue_depth_changed(struct request_queue *q) 188 181 { 189 - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) 182 + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && 183 + q->rq_qos) 190 184 __rq_qos_queue_depth_changed(q->rq_qos); 191 185 } 192 186
+1
include/linux/blkdev.h
··· 656 656 QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ 657 657 QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ 658 658 QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ 659 + QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ 659 660 QUEUE_FLAG_MAX 660 661 }; 661 662