Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

block: break pcpu_alloc_mutex dependency on freeze_lock

While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex
after ->freeze_lock is acquired or queue is frozen. This potentially
creates a circular dependency involving ->fs_reclaim if reclaim is
triggered simultaneously in a code path which first acquires ->pcpu_
alloc_mutex. As the queue is already frozen while nr_hw_queue update
allocates tagsets, the reclaim can't forward progress and thus it could
cause a potential deadlock as reported in lockdep splat[1].

Fix this by pre-allocating tagset tags before we freeze queue during
nr_hw_queue update. Later the allocated tagset tags could be safely
installed and used after queue is frozen.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/ [1]
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Yu Kuai <yukuai@fnnas.com>
[axboe: fix brace style issue]
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Nilay Shroff and committed by
Jens Axboe
539d1b47 da46b5df

+30 -15
+30 -15
block/blk-mq.c
··· 4793 4793 } 4794 4794 } 4795 4795 4796 - static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, 4797 - int new_nr_hw_queues) 4796 + static struct blk_mq_tags **blk_mq_prealloc_tag_set_tags( 4797 + struct blk_mq_tag_set *set, 4798 + int new_nr_hw_queues) 4798 4799 { 4799 4800 struct blk_mq_tags **new_tags; 4800 4801 int i; 4801 4802 4802 4803 if (set->nr_hw_queues >= new_nr_hw_queues) 4803 - goto done; 4804 + return NULL; 4804 4805 4805 4806 new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), 4806 4807 GFP_KERNEL, set->numa_node); 4807 4808 if (!new_tags) 4808 - return -ENOMEM; 4809 + return ERR_PTR(-ENOMEM); 4809 4810 4810 4811 if (set->tags) 4811 4812 memcpy(new_tags, set->tags, set->nr_hw_queues * 4812 4813 sizeof(*set->tags)); 4813 - kfree(set->tags); 4814 - set->tags = new_tags; 4815 4814 4816 4815 for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { 4817 - if (!__blk_mq_alloc_map_and_rqs(set, i)) { 4818 - while (--i >= set->nr_hw_queues) 4819 - __blk_mq_free_map_and_rqs(set, i); 4820 - return -ENOMEM; 4816 + if (blk_mq_is_shared_tags(set->flags)) { 4817 + new_tags[i] = set->shared_tags; 4818 + } else { 4819 + new_tags[i] = blk_mq_alloc_map_and_rqs(set, i, 4820 + set->queue_depth); 4821 + if (!new_tags[i]) 4822 + goto out_unwind; 4821 4823 } 4822 4824 cond_resched(); 4823 4825 } 4824 4826 4825 - done: 4826 - set->nr_hw_queues = new_nr_hw_queues; 4827 - return 0; 4827 + return new_tags; 4828 + out_unwind: 4829 + while (--i >= set->nr_hw_queues) { 4830 + if (!blk_mq_is_shared_tags(set->flags)) 4831 + blk_mq_free_map_and_rqs(set, new_tags[i], i); 4832 + } 4833 + kfree(new_tags); 4834 + return ERR_PTR(-ENOMEM); 4828 4835 } 4829 4836 4830 4837 /* ··· 5120 5113 unsigned int memflags; 5121 5114 int i; 5122 5115 struct xarray elv_tbl; 5116 + struct blk_mq_tags **new_tags; 5123 5117 bool queues_frozen = false; 5124 5118 5125 5119 lockdep_assert_held(&set->tag_list_lock); ··· 5155 5147 if (blk_mq_elv_switch_none(q, &elv_tbl)) 5156 5148 goto switch_back; 5157 5149 5150 + new_tags = blk_mq_prealloc_tag_set_tags(set, nr_hw_queues); 5151 + if (IS_ERR(new_tags)) 5152 + goto switch_back; 5153 + 5158 5154 list_for_each_entry(q, &set->tag_list, tag_set_list) 5159 5155 blk_mq_freeze_queue_nomemsave(q); 5160 5156 queues_frozen = true; 5161 - if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) 5162 - goto switch_back; 5157 + if (new_tags) { 5158 + kfree(set->tags); 5159 + set->tags = new_tags; 5160 + } 5161 + set->nr_hw_queues = nr_hw_queues; 5163 5162 5164 5163 fallback: 5165 5164 blk_mq_update_queue_map(set);