Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

blk-mq: Defer freeing of tags page_list to SRCU callback

Tag iterators can race with the freeing of the request pages(tags->page_list),
potentially leading to use-after-free issues.

Defer the freeing of the page list and the tags structure itself until
after an SRCU grace period has passed. This ensures that any concurrent
tag iterators have completed before the memory is released. With this
way, we can replace the big tags->lock in tags iterator code path with
srcu for solving the issue.

This is achieved by:
- Adding a new `srcu_struct tags_srcu` to `blk_mq_tag_set` to protect
tag map iteration.
- Adding an `rcu_head` to `struct blk_mq_tags` to be used with
`call_srcu`.
- Moving the page list freeing logic and the `kfree(tags)` call into a
new callback function, `blk_mq_free_tags_callback`.
- In `blk_mq_free_tags`, invoking `call_srcu` to schedule the new
callback for deferred execution.

The read-side protection for the tag iterators will be added in a
subsequent patch.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Ming Lei and committed by
Jens Axboe
ad0d05db 9ad8e5af

+38 -14
+23 -1
block/blk-mq-tag.c
··· 8 8 */ 9 9 #include <linux/kernel.h> 10 10 #include <linux/module.h> 11 + #include <linux/slab.h> 12 + #include <linux/mm.h> 13 + #include <linux/kmemleak.h> 11 14 12 15 #include <linux/delay.h> 13 16 #include "blk.h" ··· 579 576 return NULL; 580 577 } 581 578 579 + static void blk_mq_free_tags_callback(struct rcu_head *head) 580 + { 581 + struct blk_mq_tags *tags = container_of(head, struct blk_mq_tags, 582 + rcu_head); 583 + struct page *page; 584 + 585 + while (!list_empty(&tags->page_list)) { 586 + page = list_first_entry(&tags->page_list, struct page, lru); 587 + list_del_init(&page->lru); 588 + /* 589 + * Remove kmemleak object previously allocated in 590 + * blk_mq_alloc_rqs(). 591 + */ 592 + kmemleak_free(page_address(page)); 593 + __free_pages(page, page->private); 594 + } 595 + kfree(tags); 596 + } 597 + 582 598 void blk_mq_free_tags(struct blk_mq_tag_set *set, struct blk_mq_tags *tags) 583 599 { 584 600 sbitmap_queue_free(&tags->bitmap_tags); 585 601 sbitmap_queue_free(&tags->breserved_tags); 586 - kfree(tags); 602 + call_srcu(&set->tags_srcu, &tags->rcu_head, blk_mq_free_tags_callback); 587 603 } 588 604 589 605 int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
+13 -13
block/blk-mq.c
··· 3454 3454 unsigned int hctx_idx) 3455 3455 { 3456 3456 struct blk_mq_tags *drv_tags; 3457 - struct page *page; 3458 3457 3459 3458 if (list_empty(&tags->page_list)) 3460 3459 return; ··· 3477 3478 } 3478 3479 3479 3480 blk_mq_clear_rq_mapping(drv_tags, tags); 3480 - 3481 - while (!list_empty(&tags->page_list)) { 3482 - page = list_first_entry(&tags->page_list, struct page, lru); 3483 - list_del_init(&page->lru); 3484 - /* 3485 - * Remove kmemleak object previously allocated in 3486 - * blk_mq_alloc_rqs(). 3487 - */ 3488 - kmemleak_free(page_address(page)); 3489 - __free_pages(page, page->private); 3490 - } 3481 + /* 3482 + * Free request pages in SRCU callback, which is called from 3483 + * blk_mq_free_tags(). 3484 + */ 3491 3485 } 3492 3486 3493 3487 void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags) ··· 4826 4834 if (ret) 4827 4835 goto out_free_srcu; 4828 4836 } 4837 + ret = init_srcu_struct(&set->tags_srcu); 4838 + if (ret) 4839 + goto out_cleanup_srcu; 4829 4840 4830 4841 init_rwsem(&set->update_nr_hwq_lock); 4831 4842 ··· 4837 4842 sizeof(struct blk_mq_tags *), GFP_KERNEL, 4838 4843 set->numa_node); 4839 4844 if (!set->tags) 4840 - goto out_cleanup_srcu; 4845 + goto out_cleanup_tags_srcu; 4841 4846 4842 4847 for (i = 0; i < set->nr_maps; i++) { 4843 4848 set->map[i].mq_map = kcalloc_node(nr_cpu_ids, ··· 4866 4871 } 4867 4872 kfree(set->tags); 4868 4873 set->tags = NULL; 4874 + out_cleanup_tags_srcu: 4875 + cleanup_srcu_struct(&set->tags_srcu); 4869 4876 out_cleanup_srcu: 4870 4877 if (set->flags & BLK_MQ_F_BLOCKING) 4871 4878 cleanup_srcu_struct(set->srcu); ··· 4913 4916 4914 4917 kfree(set->tags); 4915 4918 set->tags = NULL; 4919 + 4920 + srcu_barrier(&set->tags_srcu); 4921 + cleanup_srcu_struct(&set->tags_srcu); 4916 4922 if (set->flags & BLK_MQ_F_BLOCKING) { 4917 4923 cleanup_srcu_struct(set->srcu); 4918 4924 kfree(set->srcu);
+2
include/linux/blk-mq.h
··· 531 531 struct mutex tag_list_lock; 532 532 struct list_head tag_list; 533 533 struct srcu_struct *srcu; 534 + struct srcu_struct tags_srcu; 534 535 535 536 struct rw_semaphore update_nr_hwq_lock; 536 537 }; ··· 768 767 * request pool 769 768 */ 770 769 spinlock_t lock; 770 + struct rcu_head rcu_head; 771 771 }; 772 772 773 773 static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,