Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

gen_init_cpio: add -a <data_align> as reflink optimization

As described in buffer-format.rst, the existing initramfs.c extraction
logic works fine if the cpio filename field is padded out with trailing
zeros, with a caveat that the padded namesize can't exceed PATH_MAX.

Add filename zero-padding logic to gen_init_cpio, which can be triggered
via the new -a <data_align> parameter. Performance and storage
utilization is improved for Btrfs and XFS workloads, as copy_file_range
can reflink the entire source file into a filesystem block-size aligned
destination offset within the cpio archive.

Btrfs benchmarks run on 6.15.8-1-default (Tumbleweed) x86_64 host:
> truncate --size=2G /tmp/backing.img
> /sbin/mkfs.btrfs /tmp/backing.img
...
Sector size: 4096 (CPU page size: 4096)
...
> sudo mount /tmp/backing.img mnt
> sudo chown $USER mnt
> cd mnt
mnt> dd if=/dev/urandom of=foo bs=1M count=20 && cat foo >/dev/null
...
mnt> echo "file /foo foo 0755 0 0" > list
mnt> perf stat -r 10 gen_init_cpio -o unaligned_btrfs list
...
0.023496 +- 0.000472 seconds time elapsed ( +- 2.01% )

mnt> perf stat -r 10 gen_init_cpio -o aligned_btrfs -a 4096 list
...
0.0010010 +- 0.0000565 seconds time elapsed ( +- 5.65% )

mnt> /sbin/xfs_io -c "fiemap -v" unaligned_btrfs
unaligned_btrfs:
EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
0: [0..40967]: 695040..736007 40968 0x1
mnt> /sbin/xfs_io -c "fiemap -v" aligned_btrfs
aligned_btrfs:
EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
0: [0..7]: 26768..26775 8 0x0
1: [8..40967]: 269056..310015 40960 0x2000
2: [40968..40975]: 26776..26783 8 0x1
mnt> /sbin/btrfs fi du unaligned_btrfs aligned_btrfs
Total Exclusive Set shared Filename
20.00MiB 20.00MiB 0.00B unaligned_btrfs
20.01MiB 8.00KiB 20.00MiB aligned_btrfs

XFS benchmarks run on same host:
> sudo umount mnt && rm /tmp/backing.img
> truncate --size=2G /tmp/backing.img
> /sbin/mkfs.xfs /tmp/backing.img
...
= reflink=1 ...
data = bsize=4096 blocks=524288, imaxpct=25
...
> sudo mount /tmp/backing.img mnt
> sudo chown $USER mnt
> cd mnt
mnt> dd if=/dev/urandom of=foo bs=1M count=20 && cat foo >/dev/null
...
mnt> echo "file /foo foo 0755 0 0" > list
mnt> perf stat -r 10 gen_init_cpio -o unaligned_xfs list
...
0.011069 +- 0.000469 seconds time elapsed ( +- 4.24% )

mnt> perf stat -r 10 gen_init_cpio -o aligned_xfs -a 4096 list
...
0.001273 +- 0.000288 seconds time elapsed ( +- 22.60% )

mnt> /sbin/xfs_io -c "fiemap -v" unaligned_xfs
unaligned_xfs:
EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
0: [0..40967]: 106176..147143 40968 0x0
1: [40968..65023]: 147144..171199 24056 0x801
mnt> /sbin/xfs_io -c "fiemap -v" aligned_xfs
aligned_xfs:
EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
0: [0..7]: 120..127 8 0x0
1: [8..40967]: 192..41151 40960 0x2000
2: [40968..40975]: 236728..236735 8 0x0
3: [40976..106495]: 236736..302255 65520 0x801

The alignment is best-effort; a stderr message is printed if alignment
can't be achieved due to PATH_MAX overrun, with fallback to non-padded
filename. This allows it to still be useful for opportunistic alignment,
e.g. on aarch64 Btrfs with 64K block-size. Alignment failure messages
provide an indicator that reordering of the cpio-manifest may be
beneficial.

Archive read performance for reflinked initramfs images may suffer due
to the effects of fragmentation, particularly on spinning disks. To
mitigate excessive fragmentation, files with lengths less than
data_align aren't padded.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Nicolas Schier <nsc@kernel.org>
Link: https://lore.kernel.org/r/20250819032607.28727-8-ddiss@suse.de
Signed-off-by: Nathan Chancellor <nathan@kernel.org>

authored by

David Disseldorp and committed by
Nathan Chancellor
5467e855 7c1f14f6

+38 -11
+38 -11
usr/gen_init_cpio.c
··· 28 28 #define CPIO_TRAILER "TRAILER!!!" 29 29 #define padlen(_off, _align) (((_align) - ((_off) & ((_align) - 1))) % (_align)) 30 30 31 - static char padding[512]; 31 + /* zero-padding the filename field for data alignment is limited by PATH_MAX */ 32 + static char padding[PATH_MAX]; 32 33 static unsigned int offset; 33 34 static unsigned int ino = 721; 34 35 static time_t default_mtime; 35 36 static bool do_file_mtime; 36 37 static bool do_csum = false; 37 38 static int outfd = STDOUT_FILENO; 39 + static unsigned int dalign; 38 40 39 41 struct file_handler { 40 42 const char *type; ··· 361 359 int file, retval, len; 362 360 int rc = -1; 363 361 time_t mtime; 364 - int namesize; 362 + int namesize, namepadlen; 365 363 unsigned int i; 366 364 uint32_t csum = 0; 367 365 ssize_t this_read; ··· 409 407 } 410 408 411 409 size = 0; 410 + namepadlen = 0; 412 411 for (i = 1; i <= nlinks; i++) { 413 - /* data goes on last link */ 414 - if (i == nlinks) 415 - size = buf.st_size; 416 - 417 412 if (name[0] == '/') 418 413 name++; 419 414 namesize = strlen(name) + 1; 415 + 416 + /* data goes on last link, after any alignment padding */ 417 + if (i == nlinks) 418 + size = buf.st_size; 419 + 420 + if (dalign && size > dalign) { 421 + namepadlen = padlen(offset + CPIO_HDR_LEN + namesize, 422 + dalign); 423 + if (namesize + namepadlen > PATH_MAX) { 424 + fprintf(stderr, 425 + "%s: best-effort alignment %u missed\n", 426 + name, dalign); 427 + namepadlen = 0; 428 + } 429 + } 430 + 420 431 len = dprintf(outfd, "%s%08X%08X%08lX%08lX%08X%08lX" 421 432 "%08lX%08X%08X%08X%08X%08X%08X", 422 433 do_csum ? "070702" : "070701", /* magic */ ··· 444 429 1, /* minor */ 445 430 0, /* rmajor */ 446 431 0, /* rminor */ 447 - namesize, /* namesize */ 432 + namesize + namepadlen, /* namesize */ 448 433 size ? csum : 0); /* chksum */ 449 434 offset += len; 450 435 451 436 if (len != CPIO_HDR_LEN || 452 437 push_buf(name, namesize) < 0 || 453 - push_pad(padlen(offset, 4)) < 0) 438 + push_pad(namepadlen ? namepadlen : padlen(offset, 4)) < 0) 454 439 goto error; 455 440 456 441 if (size) { ··· 567 552 static void usage(const char *prog) 568 553 { 569 554 fprintf(stderr, "Usage:\n" 570 - "\t%s [-t <timestamp>] [-c] [-o <output_file>] <cpio_list>\n" 555 + "\t%s [-t <timestamp>] [-c] [-o <output_file>] [-a <data_align>] <cpio_list>\n" 571 556 "\n" 572 557 "<cpio_list> is a file containing newline separated entries that\n" 573 558 "describe the files to be included in the initramfs archive:\n" ··· 605 590 "The default is to use the current time for all files, but\n" 606 591 "preserve modification time for regular files.\n" 607 592 "-c: calculate and store 32-bit checksums for file data.\n" 608 - "<output_file>: write cpio to this file instead of stdout\n", 593 + "<output_file>: write cpio to this file instead of stdout\n" 594 + "<data_align>: attempt to align file data by zero-padding the\n" 595 + "filename field up to data_align. Must be a multiple of 4.\n" 596 + "Alignment is best-effort; PATH_MAX limits filename padding.\n", 609 597 prog); 610 598 } 611 599 ··· 650 632 651 633 default_mtime = time(NULL); 652 634 while (1) { 653 - int opt = getopt(argc, argv, "t:cho:"); 635 + int opt = getopt(argc, argv, "t:cho:a:"); 654 636 char *invalid; 655 637 656 638 if (opt == -1) ··· 675 657 0600); 676 658 if (outfd < 0) { 677 659 fprintf(stderr, "failed to open %s\n", optarg); 660 + usage(argv[0]); 661 + exit(1); 662 + } 663 + break; 664 + case 'a': 665 + dalign = strtoul(optarg, &invalid, 10); 666 + if (!*optarg || *invalid || (dalign & 3)) { 667 + fprintf(stderr, "Invalid data_align: %s\n", 668 + optarg); 678 669 usage(argv[0]); 679 670 exit(1); 680 671 }