Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.15-rc1.pagesize' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs pagesize updates from Christian Brauner:
"This enables block sizes greater than the page size for block devices.

With this we can start supporting block devices with logical block
sizes larger than 4k.

It also allows to lift the device cache sector size support to 64k.
This allows filesystems which can use larger sector sizes up to 64k to
ensure that the filesystem will not generate writes that are smaller
than the specified sector size"

* tag 'vfs-6.15-rc1.pagesize' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
bdev: add back PAGE_SIZE block size validation for sb_set_blocksize()
bdev: use bdev_io_min() for statx block size
block/bdev: lift block size restrictions to 64k
block/bdev: enable large folio support for large logical block sizes
fs/buffer fs/mpage: remove large folio restriction
fs/mpage: use blocks_per_folio instead of blocks_per_page
fs/mpage: avoid negative shift for large blocksize
fs/buffer: remove batching from async read
fs/buffer: simplify block_read_full_folio() with bh_offset()

+65 -69
+8 -5
block/bdev.c
··· 148 148 bsize <<= 1; 149 149 } 150 150 BD_INODE(bdev)->i_blkbits = blksize_bits(bsize); 151 + mapping_set_folio_min_order(BD_INODE(bdev)->i_mapping, 152 + get_order(bsize)); 151 153 } 152 154 153 155 int set_blocksize(struct file *file, int size) ··· 171 169 if (inode->i_blkbits != blksize_bits(size)) { 172 170 sync_blockdev(bdev); 173 171 inode->i_blkbits = blksize_bits(size); 172 + mapping_set_folio_min_order(inode->i_mapping, get_order(size)); 174 173 kill_bdev(bdev); 175 174 } 176 175 return 0; ··· 181 178 182 179 int sb_set_blocksize(struct super_block *sb, int size) 183 180 { 181 + if (!(sb->s_type->fs_flags & FS_LBS) && size > PAGE_SIZE) 182 + return 0; 184 183 if (set_blocksize(sb->s_bdev_file, size)) 185 184 return 0; 186 - /* If we get here, we know size is power of two 187 - * and it's value is between 512 and PAGE_SIZE */ 185 + /* If we get here, we know size is validated */ 188 186 sb->s_blocksize = size; 189 187 sb->s_blocksize_bits = blksize_bits(size); 190 188 return sb->s_blocksize; ··· 1278 1274 struct inode *backing_inode; 1279 1275 struct block_device *bdev; 1280 1276 1281 - if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC))) 1282 - return; 1283 - 1284 1277 backing_inode = d_backing_inode(path->dentry); 1285 1278 1286 1279 /* ··· 1303 1302 queue_atomic_write_unit_min_bytes(bd_queue), 1304 1303 queue_atomic_write_unit_max_bytes(bd_queue)); 1305 1304 } 1305 + 1306 + stat->blksize = bdev_io_min(bdev); 1306 1307 1307 1308 blkdev_put_no_open(bdev); 1308 1309 }
+1 -1
fs/bcachefs/fs.c
··· 2396 2396 .name = "bcachefs", 2397 2397 .init_fs_context = bch2_init_fs_context, 2398 2398 .kill_sb = bch2_kill_sb, 2399 - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 2399 + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_LBS, 2400 2400 }; 2401 2401 2402 2402 MODULE_ALIAS_FS("bcachefs");
+23 -35
fs/buffer.c
··· 2361 2361 { 2362 2362 struct inode *inode = folio->mapping->host; 2363 2363 sector_t iblock, lblock; 2364 - struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 2364 + struct buffer_head *bh, *head, *prev = NULL; 2365 2365 size_t blocksize; 2366 - int nr, i; 2367 2366 int fully_mapped = 1; 2368 2367 bool page_error = false; 2369 2368 loff_t limit = i_size_read(inode); ··· 2371 2372 if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) 2372 2373 limit = inode->i_sb->s_maxbytes; 2373 2374 2374 - VM_BUG_ON_FOLIO(folio_test_large(folio), folio); 2375 - 2376 2375 head = folio_create_buffers(folio, inode, 0); 2377 2376 blocksize = head->b_size; 2378 2377 2379 2378 iblock = div_u64(folio_pos(folio), blocksize); 2380 2379 lblock = div_u64(limit + blocksize - 1, blocksize); 2381 2380 bh = head; 2382 - nr = 0; 2383 - i = 0; 2384 2381 2385 2382 do { 2386 2383 if (buffer_uptodate(bh)) ··· 2393 2398 page_error = true; 2394 2399 } 2395 2400 if (!buffer_mapped(bh)) { 2396 - folio_zero_range(folio, i * blocksize, 2401 + folio_zero_range(folio, bh_offset(bh), 2397 2402 blocksize); 2398 2403 if (!err) 2399 2404 set_buffer_uptodate(bh); ··· 2406 2411 if (buffer_uptodate(bh)) 2407 2412 continue; 2408 2413 } 2409 - arr[nr++] = bh; 2410 - } while (i++, iblock++, (bh = bh->b_this_page) != head); 2414 + 2415 + lock_buffer(bh); 2416 + if (buffer_uptodate(bh)) { 2417 + unlock_buffer(bh); 2418 + continue; 2419 + } 2420 + 2421 + mark_buffer_async_read(bh); 2422 + if (prev) 2423 + submit_bh(REQ_OP_READ, prev); 2424 + prev = bh; 2425 + } while (iblock++, (bh = bh->b_this_page) != head); 2411 2426 2412 2427 if (fully_mapped) 2413 2428 folio_set_mappedtodisk(folio); 2414 2429 2415 - if (!nr) { 2416 - /* 2417 - * All buffers are uptodate or get_block() returned an 2418 - * error when trying to map them - we can finish the read. 2419 - */ 2420 - folio_end_read(folio, !page_error); 2421 - return 0; 2422 - } 2423 - 2424 - /* Stage two: lock the buffers */ 2425 - for (i = 0; i < nr; i++) { 2426 - bh = arr[i]; 2427 - lock_buffer(bh); 2428 - mark_buffer_async_read(bh); 2429 - } 2430 - 2431 2430 /* 2432 - * Stage 3: start the IO. Check for uptodateness 2433 - * inside the buffer lock in case another process reading 2434 - * the underlying blockdev brought it uptodate (the sct fix). 2431 + * All buffers are uptodate or get_block() returned an error 2432 + * when trying to map them - we must finish the read because 2433 + * end_buffer_async_read() will never be called on any buffer 2434 + * in this folio. 2435 2435 */ 2436 - for (i = 0; i < nr; i++) { 2437 - bh = arr[i]; 2438 - if (buffer_uptodate(bh)) 2439 - end_buffer_async_read(bh, 1); 2440 - else 2441 - submit_bh(REQ_OP_READ, bh); 2442 - } 2436 + if (prev) 2437 + submit_bh(REQ_OP_READ, prev); 2438 + else 2439 + folio_end_read(folio, !page_error); 2440 + 2443 2441 return 0; 2444 2442 } 2445 2443 EXPORT_SYMBOL(block_read_full_folio);
+23 -26
fs/mpage.c
··· 107 107 * don't make any buffers if there is only one buffer on 108 108 * the folio and the folio just needs to be set up to date 109 109 */ 110 - if (inode->i_blkbits == PAGE_SHIFT && 110 + if (inode->i_blkbits == folio_shift(folio) && 111 111 buffer_uptodate(bh)) { 112 112 folio_mark_uptodate(folio); 113 113 return; ··· 153 153 struct folio *folio = args->folio; 154 154 struct inode *inode = folio->mapping->host; 155 155 const unsigned blkbits = inode->i_blkbits; 156 - const unsigned blocks_per_page = PAGE_SIZE >> blkbits; 156 + const unsigned blocks_per_folio = folio_size(folio) >> blkbits; 157 157 const unsigned blocksize = 1 << blkbits; 158 158 struct buffer_head *map_bh = &args->map_bh; 159 159 sector_t block_in_file; ··· 161 161 sector_t last_block_in_file; 162 162 sector_t first_block; 163 163 unsigned page_block; 164 - unsigned first_hole = blocks_per_page; 164 + unsigned first_hole = blocks_per_folio; 165 165 struct block_device *bdev = NULL; 166 166 int length; 167 167 int fully_mapped = 1; ··· 169 169 unsigned nblocks; 170 170 unsigned relative_block; 171 171 gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); 172 - 173 - /* MAX_BUF_PER_PAGE, for example */ 174 - VM_BUG_ON_FOLIO(folio_test_large(folio), folio); 175 172 176 173 if (args->is_readahead) { 177 174 opf |= REQ_RAHEAD; ··· 178 181 if (folio_buffers(folio)) 179 182 goto confused; 180 183 181 - block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits); 182 - last_block = block_in_file + args->nr_pages * blocks_per_page; 184 + block_in_file = folio_pos(folio) >> blkbits; 185 + last_block = block_in_file + ((args->nr_pages * PAGE_SIZE) >> blkbits); 183 186 last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; 184 187 if (last_block > last_block_in_file) 185 188 last_block = last_block_in_file; ··· 201 204 clear_buffer_mapped(map_bh); 202 205 break; 203 206 } 204 - if (page_block == blocks_per_page) 207 + if (page_block == blocks_per_folio) 205 208 break; 206 209 page_block++; 207 210 block_in_file++; ··· 213 216 * Then do more get_blocks calls until we are done with this folio. 214 217 */ 215 218 map_bh->b_folio = folio; 216 - while (page_block < blocks_per_page) { 219 + while (page_block < blocks_per_folio) { 217 220 map_bh->b_state = 0; 218 221 map_bh->b_size = 0; 219 222 ··· 226 229 227 230 if (!buffer_mapped(map_bh)) { 228 231 fully_mapped = 0; 229 - if (first_hole == blocks_per_page) 232 + if (first_hole == blocks_per_folio) 230 233 first_hole = page_block; 231 234 page_block++; 232 235 block_in_file++; ··· 244 247 goto confused; 245 248 } 246 249 247 - if (first_hole != blocks_per_page) 250 + if (first_hole != blocks_per_folio) 248 251 goto confused; /* hole -> non-hole */ 249 252 250 253 /* Contiguous blocks? */ ··· 257 260 if (relative_block == nblocks) { 258 261 clear_buffer_mapped(map_bh); 259 262 break; 260 - } else if (page_block == blocks_per_page) 263 + } else if (page_block == blocks_per_folio) 261 264 break; 262 265 page_block++; 263 266 block_in_file++; ··· 265 268 bdev = map_bh->b_bdev; 266 269 } 267 270 268 - if (first_hole != blocks_per_page) { 269 - folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE); 271 + if (first_hole != blocks_per_folio) { 272 + folio_zero_segment(folio, first_hole << blkbits, folio_size(folio)); 270 273 if (first_hole == 0) { 271 274 folio_mark_uptodate(folio); 272 275 folio_unlock(folio); ··· 300 303 relative_block = block_in_file - args->first_logical_block; 301 304 nblocks = map_bh->b_size >> blkbits; 302 305 if ((buffer_boundary(map_bh) && relative_block == nblocks) || 303 - (first_hole != blocks_per_page)) 306 + (first_hole != blocks_per_folio)) 304 307 args->bio = mpage_bio_submit_read(args->bio); 305 308 else 306 - args->last_block_in_bio = first_block + blocks_per_page - 1; 309 + args->last_block_in_bio = first_block + blocks_per_folio - 1; 307 310 out: 308 311 return args->bio; 309 312 ··· 382 385 { 383 386 struct mpage_readpage_args args = { 384 387 .folio = folio, 385 - .nr_pages = 1, 388 + .nr_pages = folio_nr_pages(folio), 386 389 .get_block = get_block, 387 390 }; 388 391 ··· 453 456 struct address_space *mapping = folio->mapping; 454 457 struct inode *inode = mapping->host; 455 458 const unsigned blkbits = inode->i_blkbits; 456 - const unsigned blocks_per_page = PAGE_SIZE >> blkbits; 459 + const unsigned blocks_per_folio = folio_size(folio) >> blkbits; 457 460 sector_t last_block; 458 461 sector_t block_in_file; 459 462 sector_t first_block; 460 463 unsigned page_block; 461 - unsigned first_unmapped = blocks_per_page; 464 + unsigned first_unmapped = blocks_per_folio; 462 465 struct block_device *bdev = NULL; 463 466 int boundary = 0; 464 467 sector_t boundary_block = 0; ··· 483 486 */ 484 487 if (buffer_dirty(bh)) 485 488 goto confused; 486 - if (first_unmapped == blocks_per_page) 489 + if (first_unmapped == blocks_per_folio) 487 490 first_unmapped = page_block; 488 491 continue; 489 492 } 490 493 491 - if (first_unmapped != blocks_per_page) 494 + if (first_unmapped != blocks_per_folio) 492 495 goto confused; /* hole -> non-hole */ 493 496 494 497 if (!buffer_dirty(bh) || !buffer_uptodate(bh)) ··· 524 527 * The page has no buffers: map it to disk 525 528 */ 526 529 BUG_ON(!folio_test_uptodate(folio)); 527 - block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits); 530 + block_in_file = folio_pos(folio) >> blkbits; 528 531 /* 529 532 * Whole page beyond EOF? Skip allocating blocks to avoid leaking 530 533 * space. ··· 533 536 goto page_is_mapped; 534 537 last_block = (i_size - 1) >> blkbits; 535 538 map_bh.b_folio = folio; 536 - for (page_block = 0; page_block < blocks_per_page; ) { 539 + for (page_block = 0; page_block < blocks_per_folio; ) { 537 540 538 541 map_bh.b_state = 0; 539 542 map_bh.b_size = 1 << blkbits; ··· 615 618 BUG_ON(folio_test_writeback(folio)); 616 619 folio_start_writeback(folio); 617 620 folio_unlock(folio); 618 - if (boundary || (first_unmapped != blocks_per_page)) { 621 + if (boundary || (first_unmapped != blocks_per_folio)) { 619 622 bio = mpage_bio_submit_write(bio); 620 623 if (boundary_block) { 621 624 write_boundary_block(boundary_bdev, 622 625 boundary_block, 1 << blkbits); 623 626 } 624 627 } else { 625 - mpd->last_block_in_bio = first_block + blocks_per_page - 1; 628 + mpd->last_block_in_bio = first_block + blocks_per_folio - 1; 626 629 } 627 630 goto out; 628 631
+2 -1
fs/xfs/xfs_super.c
··· 2122 2122 .init_fs_context = xfs_init_fs_context, 2123 2123 .parameters = xfs_fs_parameters, 2124 2124 .kill_sb = xfs_kill_sb, 2125 - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME, 2125 + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME | 2126 + FS_LBS, 2126 2127 }; 2127 2128 MODULE_ALIAS_FS("xfs"); 2128 2129
+7 -1
include/linux/blkdev.h
··· 268 268 return MKDEV(disk->major, disk->first_minor); 269 269 } 270 270 271 + /* 272 + * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) 273 + * however we constrain this to what we can validate and test. 274 + */ 275 + #define BLK_MAX_BLOCK_SIZE SZ_64K 276 + 271 277 /* blk_validate_limits() validates bsize, so drivers don't usually need to */ 272 278 static inline int blk_validate_block_size(unsigned long bsize) 273 279 { 274 - if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) 280 + if (bsize < 512 || bsize > BLK_MAX_BLOCK_SIZE || !is_power_of_2(bsize)) 275 281 return -EINVAL; 276 282 277 283 return 0;
+1
include/linux/fs.h
··· 2606 2606 #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ 2607 2607 #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ 2608 2608 #define FS_MGTIME 64 /* FS uses multigrain timestamps */ 2609 + #define FS_LBS 128 /* FS supports LBS */ 2609 2610 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ 2610 2611 int (*init_fs_context)(struct fs_context *); 2611 2612 const struct fs_parameter_spec *parameters;