Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

block: split bio_alloc_bioset more clearly into a fast and slowpath

bio_alloc_bioset tries non-waiting slab allocations first for the bio and
bvec array, but does so in a somewhat convoluted way.

Restructure the function so that it first open codes these slab
allocations, and then falls back to the mempools with the original
gfp mask.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> -ck
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://patch.msgid.link/20260316161144.1607877-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Christoph Hellwig and committed by
Jens Axboe
b520c4ee fed406f3

+79 -114
+78 -112
block/bio.c
··· 176 176 * Make the first allocation restricted and don't dump info on allocation 177 177 * failures, since we'll fall back to the mempool in case of failure. 178 178 */ 179 - static inline gfp_t bvec_alloc_gfp(gfp_t gfp) 179 + static inline gfp_t try_alloc_gfp(gfp_t gfp) 180 180 { 181 181 return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) | 182 182 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; 183 - } 184 - 185 - static struct bio_vec *bvec_alloc(struct mempool *pool, unsigned short *nr_vecs, 186 - gfp_t gfp_mask) 187 - { 188 - struct biovec_slab *bvs = biovec_slab(*nr_vecs); 189 - 190 - if (WARN_ON_ONCE(!bvs)) 191 - return NULL; 192 - 193 - /* 194 - * Upgrade the nr_vecs request to take full advantage of the allocation. 195 - * We also rely on this in the bvec_free path. 196 - */ 197 - *nr_vecs = bvs->nr_vecs; 198 - 199 - /* 200 - * Try a slab allocation first for all smaller allocations. If that 201 - * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool. 202 - * The mempool is sized to handle up to BIO_MAX_VECS entries. 203 - */ 204 - if (*nr_vecs < BIO_MAX_VECS) { 205 - struct bio_vec *bvl; 206 - 207 - bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask)); 208 - if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM)) 209 - return bvl; 210 - *nr_vecs = BIO_MAX_VECS; 211 - } 212 - 213 - return mempool_alloc(pool, gfp_mask); 214 183 } 215 184 216 185 void bio_uninit(struct bio *bio) ··· 402 433 } 403 434 } 404 435 436 + /* 437 + * submit_bio_noacct() converts recursion to iteration; this means if we're 438 + * running beneath it, any bios we allocate and submit will not be submitted 439 + * (and thus freed) until after we return. 440 + * 441 + * This exposes us to a potential deadlock if we allocate multiple bios from the 442 + * same bio_set while running underneath submit_bio_noacct(). If we were to 443 + * allocate multiple bios (say a stacking block driver that was splitting bios), 444 + * we would deadlock if we exhausted the mempool's reserve. 445 + * 446 + * We solve this, and guarantee forward progress by punting the bios on 447 + * current->bio_list to a per bio_set rescuer workqueue before blocking to wait 448 + * for elements being returned to the mempool. 449 + */ 405 450 static void punt_bios_to_rescuer(struct bio_set *bs) 406 451 { 407 452 struct bio_list punt, nopunt; 408 453 struct bio *bio; 409 454 410 - if (WARN_ON_ONCE(!bs->rescue_workqueue)) 455 + if (!current->bio_list || !bs->rescue_workqueue) 411 456 return; 457 + if (bio_list_empty(&current->bio_list[0]) && 458 + bio_list_empty(&current->bio_list[1])) 459 + return; 460 + 412 461 /* 413 462 * In order to guarantee forward progress we must punt only bios that 414 463 * were allocated from this bio_set; otherwise, if there was a bio on ··· 473 486 local_irq_restore(flags); 474 487 } 475 488 476 - static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, 477 - unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp, 478 - struct bio_set *bs) 489 + static struct bio *bio_alloc_percpu_cache(struct bio_set *bs) 479 490 { 480 491 struct bio_alloc_cache *cache; 481 492 struct bio *bio; ··· 491 506 cache->free_list = bio->bi_next; 492 507 cache->nr--; 493 508 put_cpu(); 494 - 495 - if (nr_vecs) 496 - bio_init_inline(bio, bdev, nr_vecs, opf); 497 - else 498 - bio_init(bio, bdev, NULL, nr_vecs, opf); 499 509 bio->bi_pool = bs; 500 510 return bio; 501 511 } ··· 500 520 * @bdev: block device to allocate the bio for (can be %NULL) 501 521 * @nr_vecs: number of bvecs to pre-allocate 502 522 * @opf: operation and flags for bio 503 - * @gfp_mask: the GFP_* mask given to the slab allocator 523 + * @gfp: the GFP_* mask given to the slab allocator 504 524 * @bs: the bio_set to allocate from. 505 525 * 506 526 * Allocate a bio from the mempools in @bs. ··· 530 550 * Returns: Pointer to new bio on success, NULL on failure. 531 551 */ 532 552 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, 533 - blk_opf_t opf, gfp_t gfp_mask, 534 - struct bio_set *bs) 553 + blk_opf_t opf, gfp_t gfp, struct bio_set *bs) 535 554 { 536 - gfp_t saved_gfp = gfp_mask; 537 - struct bio *bio; 555 + struct bio_vec *bvecs = NULL; 556 + struct bio *bio = NULL; 557 + gfp_t saved_gfp = gfp; 538 558 void *p; 539 559 540 560 /* should not use nobvec bioset for nr_vecs > 0 */ 541 561 if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0)) 542 562 return NULL; 543 563 564 + gfp = try_alloc_gfp(gfp); 544 565 if (bs->cache && nr_vecs <= BIO_INLINE_VECS) { 545 - opf |= REQ_ALLOC_CACHE; 546 - bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf, 547 - gfp_mask, bs); 548 - if (bio) 549 - return bio; 550 566 /* 551 - * No cached bio available, bio returned below marked with 552 - * REQ_ALLOC_CACHE to participate in per-cpu alloc cache. 567 + * Set REQ_ALLOC_CACHE even if no cached bio is available to 568 + * return the allocated bio to the percpu cache when done. 553 569 */ 554 - } else 555 - opf &= ~REQ_ALLOC_CACHE; 556 - 557 - /* 558 - * submit_bio_noacct() converts recursion to iteration; this means if 559 - * we're running beneath it, any bios we allocate and submit will not be 560 - * submitted (and thus freed) until after we return. 561 - * 562 - * This exposes us to a potential deadlock if we allocate multiple bios 563 - * from the same bio_set() while running underneath submit_bio_noacct(). 564 - * If we were to allocate multiple bios (say a stacking block driver 565 - * that was splitting bios), we would deadlock if we exhausted the 566 - * mempool's reserve. 567 - * 568 - * We solve this, and guarantee forward progress, with a rescuer 569 - * workqueue per bio_set. If we go to allocate and there are bios on 570 - * current->bio_list, we first try the allocation without 571 - * __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be 572 - * blocking to the rescuer workqueue before we retry with the original 573 - * gfp_flags. 574 - */ 575 - if (current->bio_list && 576 - (!bio_list_empty(&current->bio_list[0]) || 577 - !bio_list_empty(&current->bio_list[1])) && 578 - bs->rescue_workqueue) 579 - gfp_mask &= ~__GFP_DIRECT_RECLAIM; 580 - 581 - p = mempool_alloc(&bs->bio_pool, gfp_mask); 582 - if (!p && gfp_mask != saved_gfp) { 583 - punt_bios_to_rescuer(bs); 584 - gfp_mask = saved_gfp; 585 - p = mempool_alloc(&bs->bio_pool, gfp_mask); 586 - } 587 - if (unlikely(!p)) 588 - return NULL; 589 - if (!mempool_is_saturated(&bs->bio_pool)) 590 - opf &= ~REQ_ALLOC_CACHE; 591 - 592 - bio = p + bs->front_pad; 593 - if (nr_vecs > BIO_INLINE_VECS) { 594 - struct bio_vec *bvl = NULL; 595 - 596 - bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask); 597 - if (!bvl && gfp_mask != saved_gfp) { 598 - punt_bios_to_rescuer(bs); 599 - gfp_mask = saved_gfp; 600 - bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask); 601 - } 602 - if (unlikely(!bvl)) 603 - goto err_free; 604 - 605 - bio_init(bio, bdev, bvl, nr_vecs, opf); 606 - } else if (nr_vecs) { 607 - bio_init_inline(bio, bdev, BIO_INLINE_VECS, opf); 570 + opf |= REQ_ALLOC_CACHE; 571 + bio = bio_alloc_percpu_cache(bs); 608 572 } else { 609 - bio_init(bio, bdev, NULL, 0, opf); 573 + opf &= ~REQ_ALLOC_CACHE; 574 + p = kmem_cache_alloc(bs->bio_slab, gfp); 575 + if (p) 576 + bio = p + bs->front_pad; 610 577 } 611 578 579 + if (bio && nr_vecs > BIO_INLINE_VECS) { 580 + struct biovec_slab *bvs = biovec_slab(nr_vecs); 581 + 582 + /* 583 + * Upgrade nr_vecs to take full advantage of the allocation. 584 + * We also rely on this in bvec_free(). 585 + */ 586 + nr_vecs = bvs->nr_vecs; 587 + bvecs = kmem_cache_alloc(bvs->slab, gfp); 588 + if (unlikely(!bvecs)) { 589 + kmem_cache_free(bs->bio_slab, p); 590 + bio = NULL; 591 + } 592 + } 593 + 594 + if (unlikely(!bio)) { 595 + /* 596 + * Give up if we are not allow to sleep as non-blocking mempool 597 + * allocations just go back to the slab allocation. 598 + */ 599 + if (!(saved_gfp & __GFP_DIRECT_RECLAIM)) 600 + return NULL; 601 + 602 + punt_bios_to_rescuer(bs); 603 + 604 + /* 605 + * Don't rob the mempools by returning to the per-CPU cache if 606 + * we're tight on memory. 607 + */ 608 + opf &= ~REQ_ALLOC_CACHE; 609 + 610 + p = mempool_alloc(&bs->bio_pool, gfp); 611 + bio = p + bs->front_pad; 612 + if (nr_vecs > BIO_INLINE_VECS) { 613 + nr_vecs = BIO_MAX_VECS; 614 + bvecs = mempool_alloc(&bs->bvec_pool, gfp); 615 + } 616 + } 617 + 618 + if (nr_vecs && nr_vecs <= BIO_INLINE_VECS) 619 + bio_init_inline(bio, bdev, nr_vecs, opf); 620 + else 621 + bio_init(bio, bdev, bvecs, nr_vecs, opf); 612 622 bio->bi_pool = bs; 613 623 return bio; 614 - 615 - err_free: 616 - mempool_free(p, &bs->bio_pool); 617 - return NULL; 618 624 } 619 625 EXPORT_SYMBOL(bio_alloc_bioset); 620 626
+1 -2
include/linux/bio.h
··· 350 350 extern int biovec_init_pool(mempool_t *pool, int pool_entries); 351 351 352 352 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, 353 - blk_opf_t opf, gfp_t gfp_mask, 354 - struct bio_set *bs); 353 + blk_opf_t opf, gfp_t gfp, struct bio_set *bs); 355 354 struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); 356 355 extern void bio_put(struct bio *); 357 356