Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-7.0-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs iomap updates from Christian Brauner:

- Erofs page cache sharing preliminaries:

Plumb a void *private parameter through iomap_read_folio() and
iomap_readahead() into iomap_iter->private, matching iomap DIO. Erofs
uses this to replace a bogus kmap_to_page() call, as preparatory work
for page cache sharing.

- Fix for invalid folio access:

Fix an invalid folio access when a folio without iomap_folio_state
is fully submitted to the IO helper — the helper may call
folio_end_read() at any time, so ctx->cur_folio must be invalidated
after full submission.

* tag 'vfs-7.0-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
iomap: fix invalid folio access after folio_end_read()
erofs: hold read context in iomap_iter if needed
iomap: stash iomap read ctx in the private field of iomap_iter

+83 -53
+46 -21
fs/erofs/data.c
··· 267 267 folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO))); 268 268 } 269 269 270 + struct erofs_iomap_iter_ctx { 271 + struct page *page; 272 + void *base; 273 + }; 274 + 270 275 static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 271 276 unsigned int flags, struct iomap *iomap, struct iomap *srcmap) 272 277 { 273 - int ret; 278 + struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap); 279 + struct erofs_iomap_iter_ctx *ctx = iter->private; 274 280 struct super_block *sb = inode->i_sb; 275 281 struct erofs_map_blocks map; 276 282 struct erofs_map_dev mdev; 283 + int ret; 277 284 278 285 map.m_la = offset; 279 286 map.m_llen = length; ··· 291 284 iomap->offset = map.m_la; 292 285 iomap->length = map.m_llen; 293 286 iomap->flags = 0; 294 - iomap->private = NULL; 295 287 iomap->addr = IOMAP_NULL_ADDR; 296 288 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 297 289 iomap->type = IOMAP_HOLE; ··· 316 310 } 317 311 318 312 if (map.m_flags & EROFS_MAP_META) { 319 - void *ptr; 320 - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 321 - 322 313 iomap->type = IOMAP_INLINE; 323 - ptr = erofs_read_metabuf(&buf, sb, map.m_pa, 324 - erofs_inode_in_metabox(inode)); 325 - if (IS_ERR(ptr)) 326 - return PTR_ERR(ptr); 327 - iomap->inline_data = ptr; 328 - iomap->private = buf.base; 314 + /* read context should read the inlined data */ 315 + if (ctx) { 316 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 317 + void *ptr; 318 + 319 + ptr = erofs_read_metabuf(&buf, sb, map.m_pa, 320 + erofs_inode_in_metabox(inode)); 321 + if (IS_ERR(ptr)) 322 + return PTR_ERR(ptr); 323 + iomap->inline_data = ptr; 324 + ctx->page = buf.page; 325 + ctx->base = buf.base; 326 + } 329 327 } else { 330 328 iomap->type = IOMAP_MAPPED; 331 329 } ··· 339 329 static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length, 340 330 ssize_t written, unsigned int flags, struct iomap *iomap) 341 331 { 342 - void *ptr = iomap->private; 332 + struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap); 333 + struct erofs_iomap_iter_ctx *ctx = iter->private; 343 334 344 - if (ptr) { 335 + if (ctx && ctx->base) { 345 336 struct erofs_buf buf = { 346 - .page = kmap_to_page(ptr), 347 - .base = ptr, 337 + .page = ctx->page, 338 + .base = ctx->base, 348 339 }; 349 340 350 341 DBG_BUGON(iomap->type != IOMAP_INLINE); 351 342 erofs_put_metabuf(&buf); 352 - } else { 353 - DBG_BUGON(iomap->type == IOMAP_INLINE); 343 + ctx->base = NULL; 354 344 } 355 345 return written; 356 346 } ··· 380 370 */ 381 371 static int erofs_read_folio(struct file *file, struct folio *folio) 382 372 { 373 + struct iomap_read_folio_ctx read_ctx = { 374 + .ops = &iomap_bio_read_ops, 375 + .cur_folio = folio, 376 + }; 377 + struct erofs_iomap_iter_ctx iter_ctx = {}; 378 + 383 379 trace_erofs_read_folio(folio, true); 384 380 385 - iomap_bio_read_folio(folio, &erofs_iomap_ops); 381 + iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx); 386 382 return 0; 387 383 } 388 384 389 385 static void erofs_readahead(struct readahead_control *rac) 390 386 { 387 + struct iomap_read_folio_ctx read_ctx = { 388 + .ops = &iomap_bio_read_ops, 389 + .rac = rac, 390 + }; 391 + struct erofs_iomap_iter_ctx iter_ctx = {}; 392 + 391 393 trace_erofs_readahead(rac->mapping->host, readahead_index(rac), 392 394 readahead_count(rac), true); 393 395 394 - iomap_bio_readahead(rac, &erofs_iomap_ops); 396 + iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx); 395 397 } 396 398 397 399 static sector_t erofs_bmap(struct address_space *mapping, sector_t block) ··· 423 401 if (IS_DAX(inode)) 424 402 return dax_iomap_rw(iocb, to, &erofs_iomap_ops); 425 403 #endif 426 - if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) 404 + if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) { 405 + struct erofs_iomap_iter_ctx iter_ctx = {}; 406 + 427 407 return iomap_dio_rw(iocb, to, &erofs_iomap_ops, 428 - NULL, 0, NULL, 0); 408 + NULL, 0, &iter_ctx, 0); 409 + } 429 410 return filemap_read(iocb, to, 0); 430 411 } 431 412
+2 -2
fs/fuse/file.c
··· 979 979 return -EIO; 980 980 } 981 981 982 - iomap_read_folio(&fuse_iomap_ops, &ctx); 982 + iomap_read_folio(&fuse_iomap_ops, &ctx, NULL); 983 983 fuse_invalidate_atime(inode); 984 984 return 0; 985 985 } ··· 1081 1081 if (fuse_is_bad(inode)) 1082 1082 return; 1083 1083 1084 - iomap_readahead(&fuse_iomap_ops, &ctx); 1084 + iomap_readahead(&fuse_iomap_ops, &ctx, NULL); 1085 1085 } 1086 1086 1087 1087 static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
+31 -26
fs/iomap/buffered-io.c
··· 418 418 struct iomap_folio_state *ifs = folio->private; 419 419 420 420 if (ifs) { 421 - size_t len = folio_size(folio); 422 - 423 421 /* 424 422 * ifs->read_bytes_pending is used to track how many bytes are 425 423 * read in asynchronously by the IO helper. We need to track ··· 425 427 * reading in all the necessary ranges of the folio and can end 426 428 * the read. 427 429 * 428 - * Increase ->read_bytes_pending by the folio size to start, and 429 - * add a +1 bias. We'll subtract the bias and any uptodate / 430 - * zeroed ranges that did not require IO in iomap_read_end() 431 - * after we're done processing the folio. 430 + * Increase ->read_bytes_pending by the folio size to start. 431 + * We'll subtract any uptodate / zeroed ranges that did not 432 + * require IO in iomap_read_end() after we're done processing 433 + * the folio. 432 434 * 433 435 * We do this because otherwise, we would have to increment 434 436 * ifs->read_bytes_pending every time a range in the folio needs 435 437 * to be read in, which can get expensive since the spinlock 436 438 * needs to be held whenever modifying ifs->read_bytes_pending. 437 - * 438 - * We add the bias to ensure the read has not been ended on the 439 - * folio when iomap_read_end() is called, even if the IO helper 440 - * has already finished reading in the entire folio. 441 439 */ 442 440 spin_lock_irq(&ifs->state_lock); 443 441 WARN_ON_ONCE(ifs->read_bytes_pending != 0); 444 - ifs->read_bytes_pending = len + 1; 442 + ifs->read_bytes_pending = folio_size(folio); 445 443 spin_unlock_irq(&ifs->state_lock); 446 444 } 447 445 } ··· 468 474 469 475 /* 470 476 * Subtract any bytes that were initially accounted to 471 - * read_bytes_pending but skipped for IO. The +1 accounts for 472 - * the bias we added in iomap_read_init(). 477 + * read_bytes_pending but skipped for IO. 473 478 */ 474 - ifs->read_bytes_pending -= 475 - (folio_size(folio) + 1 - bytes_submitted); 479 + ifs->read_bytes_pending -= folio_size(folio) - bytes_submitted; 476 480 477 481 /* 478 482 * If !ifs->read_bytes_pending, this means all pending reads by ··· 484 492 spin_unlock_irq(&ifs->state_lock); 485 493 if (end_read) 486 494 folio_end_read(folio, uptodate); 487 - } else if (!bytes_submitted) { 495 + } else { 488 496 /* 489 - * If there were no bytes submitted, this means we are 490 - * responsible for unlocking the folio here, since no IO helper 491 - * has taken ownership of it. If there were bytes submitted, 492 - * then the IO helper will end the read via 493 - * iomap_finish_folio_read(). 497 + * If a folio without an ifs is submitted to the IO helper, the 498 + * read must be on the entire folio and the IO helper takes 499 + * ownership of the folio. This means we should only enter 500 + * iomap_read_end() for the !ifs case if no bytes were submitted 501 + * to the IO helper, in which case we are responsible for 502 + * unlocking the folio here. 494 503 */ 504 + WARN_ON_ONCE(bytes_submitted); 495 505 folio_unlock(folio); 496 506 } 497 507 } ··· 505 511 loff_t pos = iter->pos; 506 512 loff_t length = iomap_length(iter); 507 513 struct folio *folio = ctx->cur_folio; 514 + size_t folio_len = folio_size(folio); 508 515 size_t poff, plen; 509 516 loff_t pos_diff; 510 517 int ret; ··· 519 524 520 525 ifs_alloc(iter->inode, folio, iter->flags); 521 526 522 - length = min_t(loff_t, length, 523 - folio_size(folio) - offset_in_folio(folio, pos)); 527 + length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos)); 524 528 while (length) { 525 529 iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, 526 530 &plen); ··· 549 555 plen, ret, GFP_NOFS); 550 556 if (ret) 551 557 return ret; 558 + 552 559 *bytes_submitted += plen; 560 + /* 561 + * If the entire folio has been read in by the IO 562 + * helper, then the helper owns the folio and will end 563 + * the read on it. 564 + */ 565 + if (*bytes_submitted == folio_len) 566 + ctx->cur_folio = NULL; 553 567 } 554 568 555 569 ret = iomap_iter_advance(iter, plen); ··· 570 568 } 571 569 572 570 void iomap_read_folio(const struct iomap_ops *ops, 573 - struct iomap_read_folio_ctx *ctx) 571 + struct iomap_read_folio_ctx *ctx, void *private) 574 572 { 575 573 struct folio *folio = ctx->cur_folio; 576 574 struct iomap_iter iter = { 577 575 .inode = folio->mapping->host, 578 576 .pos = folio_pos(folio), 579 577 .len = folio_size(folio), 578 + .private = private, 580 579 }; 581 580 size_t bytes_submitted = 0; 582 581 int ret; ··· 591 588 if (ctx->ops->submit_read) 592 589 ctx->ops->submit_read(ctx); 593 590 594 - iomap_read_end(folio, bytes_submitted); 591 + if (ctx->cur_folio) 592 + iomap_read_end(ctx->cur_folio, bytes_submitted); 595 593 } 596 594 EXPORT_SYMBOL_GPL(iomap_read_folio); 597 595 ··· 637 633 * the filesystem to be reentered. 638 634 */ 639 635 void iomap_readahead(const struct iomap_ops *ops, 640 - struct iomap_read_folio_ctx *ctx) 636 + struct iomap_read_folio_ctx *ctx, void *private) 641 637 { 642 638 struct readahead_control *rac = ctx->rac; 643 639 struct iomap_iter iter = { 644 640 .inode = rac->mapping->host, 645 641 .pos = readahead_pos(rac), 646 642 .len = readahead_length(rac), 643 + .private = private, 647 644 }; 648 645 size_t cur_bytes_submitted; 649 646
+4 -4
include/linux/iomap.h
··· 345 345 const struct iomap_ops *ops, 346 346 const struct iomap_write_ops *write_ops, void *private); 347 347 void iomap_read_folio(const struct iomap_ops *ops, 348 - struct iomap_read_folio_ctx *ctx); 348 + struct iomap_read_folio_ctx *ctx, void *private); 349 349 void iomap_readahead(const struct iomap_ops *ops, 350 - struct iomap_read_folio_ctx *ctx); 350 + struct iomap_read_folio_ctx *ctx, void *private); 351 351 bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); 352 352 struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); 353 353 bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); ··· 599 599 .cur_folio = folio, 600 600 }; 601 601 602 - iomap_read_folio(ops, &ctx); 602 + iomap_read_folio(ops, &ctx, NULL); 603 603 } 604 604 605 605 static inline void iomap_bio_readahead(struct readahead_control *rac, ··· 610 610 .rac = rac, 611 611 }; 612 612 613 - iomap_readahead(ops, &ctx); 613 + iomap_readahead(ops, &ctx, NULL); 614 614 } 615 615 #endif /* CONFIG_BLOCK */ 616 616