mm/readahead: store folio order in struct file_ra_state

Previously the folio order of the previous readahead request was inferred
from the folio who's readahead marker was hit. But due to the way we have
to round to non-natural boundaries sometimes, this first folio in the
readahead block is often smaller than the preferred order for that
request. This means that for cases where the initial sync readahead is
poorly aligned, the folio order will ramp up much more slowly.

So instead, let's store the order in struct file_ra_state so we are not
affected by any required alignment. We previously made enough room in the
struct for a 16 order field. This should be plenty big enough since we
are limited to MAX_PAGECACHE_ORDER anyway, which is certainly never larger
than ~20.

Since we now pass order in struct file_ra_state, page_cache_ra_order() no
longer needs it's new_order parameter, so let's remove that.

Worked example:

Here we are touching pages 17-256 sequentially just as we did in the
previous commit, but now that we are remembering the preferred order
explicitly, we no longer have the slow ramp up problem. Note specifically
that we no longer have 2 rounds (2x ~128K) of order-2 folios:

TYPE STARTOFFS ENDOFFS SIZE STARTPG ENDPG NRPG ORDER RA
----- ---------- ---------- ---------- ------- ------- ----- ----- --
HOLE 0x00000000 0x00001000 4096 0 1 1
FOLIO 0x00001000 0x00002000 4096 1 2 1 0
FOLIO 0x00002000 0x00003000 4096 2 3 1 0
FOLIO 0x00003000 0x00004000 4096 3 4 1 0
FOLIO 0x00004000 0x00005000 4096 4 5 1 0
FOLIO 0x00005000 0x00006000 4096 5 6 1 0
FOLIO 0x00006000 0x00007000 4096 6 7 1 0
FOLIO 0x00007000 0x00008000 4096 7 8 1 0
FOLIO 0x00008000 0x00009000 4096 8 9 1 0
FOLIO 0x00009000 0x0000a000 4096 9 10 1 0
FOLIO 0x0000a000 0x0000b000 4096 10 11 1 0
FOLIO 0x0000b000 0x0000c000 4096 11 12 1 0
FOLIO 0x0000c000 0x0000d000 4096 12 13 1 0
FOLIO 0x0000d000 0x0000e000 4096 13 14 1 0
FOLIO 0x0000e000 0x0000f000 4096 14 15 1 0
FOLIO 0x0000f000 0x00010000 4096 15 16 1 0
FOLIO 0x00010000 0x00011000 4096 16 17 1 0
FOLIO 0x00011000 0x00012000 4096 17 18 1 0
FOLIO 0x00012000 0x00013000 4096 18 19 1 0
FOLIO 0x00013000 0x00014000 4096 19 20 1 0
FOLIO 0x00014000 0x00015000 4096 20 21 1 0
FOLIO 0x00015000 0x00016000 4096 21 22 1 0
FOLIO 0x00016000 0x00017000 4096 22 23 1 0
FOLIO 0x00017000 0x00018000 4096 23 24 1 0
FOLIO 0x00018000 0x00019000 4096 24 25 1 0
FOLIO 0x00019000 0x0001a000 4096 25 26 1 0
FOLIO 0x0001a000 0x0001b000 4096 26 27 1 0
FOLIO 0x0001b000 0x0001c000 4096 27 28 1 0
FOLIO 0x0001c000 0x0001d000 4096 28 29 1 0
FOLIO 0x0001d000 0x0001e000 4096 29 30 1 0
FOLIO 0x0001e000 0x0001f000 4096 30 31 1 0
FOLIO 0x0001f000 0x00020000 4096 31 32 1 0
FOLIO 0x00020000 0x00021000 4096 32 33 1 0
FOLIO 0x00021000 0x00022000 4096 33 34 1 0
FOLIO 0x00022000 0x00024000 8192 34 36 2 1
FOLIO 0x00024000 0x00028000 16384 36 40 4 2
FOLIO 0x00028000 0x0002c000 16384 40 44 4 2
FOLIO 0x0002c000 0x00030000 16384 44 48 4 2
FOLIO 0x00030000 0x00034000 16384 48 52 4 2
FOLIO 0x00034000 0x00038000 16384 52 56 4 2
FOLIO 0x00038000 0x0003c000 16384 56 60 4 2
FOLIO 0x0003c000 0x00040000 16384 60 64 4 2
FOLIO 0x00040000 0x00050000 65536 64 80 16 4
FOLIO 0x00050000 0x00060000 65536 80 96 16 4
FOLIO 0x00060000 0x00080000 131072 96 128 32 5
FOLIO 0x00080000 0x000a0000 131072 128 160 32 5
FOLIO 0x000a0000 0x000c0000 131072 160 192 32 5
FOLIO 0x000c0000 0x000e0000 131072 192 224 32 5
FOLIO 0x000e0000 0x00100000 131072 224 256 32 5
FOLIO 0x00100000 0x00120000 131072 256 288 32 5
FOLIO 0x00120000 0x00140000 131072 288 320 32 5 Y
HOLE 0x00140000 0x00800000 7077888 320 2048 1728

Link: https://lkml.kernel.org/r/20250609092729.274960-5-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Chaitanya S Prakash <chaitanyas.prakash@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Ryan Roberts and committed by

Andrew Morton 11 months ago c4602f9f f5e8b140

+20 -12

4 changed files

expand all

include

linux

fs.h

filemap.c

internal.h

readahead.c

include/linux/fs.h

··· 1043 1043 * and so were/are genuinely "ahead". Start next readahead when 1044 1044 * the first of these pages is accessed. 1045 1045 * @ra_pages: Maximum size of a readahead request, copied from the bdi. 1046 + * @order: Preferred folio order used for most recent readahead. 1046 1047 * @mmap_miss: How many mmap accesses missed in the page cache. 1047 1048 * @prev_pos: The last byte in the most recent read request. 1048 1049 * ··· 1055 1054 unsigned int size; 1056 1055 unsigned int async_size; 1057 1056 unsigned int ra_pages; 1057 + unsigned short order; 1058 1058 unsigned short mmap_miss; 1059 1059 loff_t prev_pos; 1060 1060 };

+4 -2

mm/filemap.c

··· 3232 3232 if (!(vm_flags & VM_RAND_READ)) 3233 3233 ra->size *= 2; 3234 3234 ra->async_size = HPAGE_PMD_NR; 3235 - page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER); 3235 + ra->order = HPAGE_PMD_ORDER; 3236 + page_cache_ra_order(&ractl, ra); 3236 3237 return fpin; 3237 3238 } 3238 3239 #endif ··· 3269 3268 ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2); 3270 3269 ra->size = ra->ra_pages; 3271 3270 ra->async_size = ra->ra_pages / 4; 3271 + ra->order = 0; 3272 3272 ractl._index = ra->start; 3273 - page_cache_ra_order(&ractl, ra, 0); 3273 + page_cache_ra_order(&ractl, ra); 3274 3274 return fpin; 3275 3275 } 3276 3276

+1 -2

mm/internal.h

··· 436 436 int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, 437 437 gfp_t gfp); 438 438 439 - void page_cache_ra_order(struct readahead_control *, struct file_ra_state *, 440 - unsigned int order); 439 + void page_cache_ra_order(struct readahead_control *, struct file_ra_state *); 441 440 void force_page_cache_ra(struct readahead_control *, unsigned long nr); 442 441 static inline void force_page_cache_readahead(struct address_space *mapping, 443 442 struct file *file, pgoff_t index, unsigned long nr_to_read)

+13 -8

mm/readahead.c

··· 457 457 } 458 458 459 459 void page_cache_ra_order(struct readahead_control *ractl, 460 - struct file_ra_state *ra, unsigned int new_order) 460 + struct file_ra_state *ra) 461 461 { 462 462 struct address_space *mapping = ractl->mapping; 463 463 pgoff_t start = readahead_index(ractl); ··· 468 468 unsigned int nofs; 469 469 int err = 0; 470 470 gfp_t gfp = readahead_gfp_mask(mapping); 471 + unsigned int new_order = ra->order; 471 472 472 - if (!mapping_large_folio_support(mapping)) 473 + if (!mapping_large_folio_support(mapping)) { 474 + ra->order = 0; 473 475 goto fallback; 476 + } 474 477 475 478 limit = min(limit, index + ra->size - 1); 476 479 477 480 new_order = min(mapping_max_folio_order(mapping), new_order); 478 481 new_order = min_t(unsigned int, new_order, ilog2(ra->size)); 479 482 new_order = max(new_order, min_order); 483 + 484 + ra->order = new_order; 480 485 481 486 /* See comment in page_cache_ra_unbounded() */ 482 487 nofs = memalloc_nofs_save(); ··· 614 609 ra->size = min(contig_count + req_count, max_pages); 615 610 ra->async_size = 1; 616 611 readit: 612 + ra->order = 0; 617 613 ractl->_index = ra->start; 618 - page_cache_ra_order(ractl, ra, 0); 614 + page_cache_ra_order(ractl, ra); 619 615 } 620 616 EXPORT_SYMBOL_GPL(page_cache_sync_ra); 621 617 ··· 627 621 struct file_ra_state *ra = ractl->ra; 628 622 pgoff_t index = readahead_index(ractl); 629 623 pgoff_t expected, start, end, aligned_end, align; 630 - unsigned int order = folio_order(folio); 631 624 632 625 /* no readahead */ 633 626 if (!ra->ra_pages) ··· 649 644 * Ramp up sizes, and push forward the readahead window. 650 645 */ 651 646 expected = round_down(ra->start + ra->size - ra->async_size, 652 - 1UL << order); 647 + 1UL << folio_order(folio)); 653 648 if (index == expected) { 654 649 ra->start += ra->size; 655 650 /* ··· 678 673 ra->size += req_count; 679 674 ra->size = get_next_ra_size(ra, max_pages); 680 675 readit: 681 - order += 2; 682 - align = 1UL << min(order, ffs(max_pages) - 1); 676 + ra->order += 2; 677 + align = 1UL << min(ra->order, ffs(max_pages) - 1); 683 678 end = ra->start + ra->size; 684 679 aligned_end = round_down(end, align); 685 680 if (aligned_end > ra->start) 686 681 ra->size -= end - aligned_end; 687 682 ra->async_size = ra->size; 688 683 ractl->_index = ra->start; 689 - page_cache_ra_order(ractl, ra, order); 684 + page_cache_ra_order(ractl, ra); 690 685 } 691 686 EXPORT_SYMBOL_GPL(page_cache_async_ra); 692 687

Configure Feed

Configure Feed