Merge tag 'erofs-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

+1 -22

fs/erofs/compress.h

··· 29 29 char *name; 30 30 }; 31 31 32 - /* some special page->private (unsigned long, see below) */ 33 32 #define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2) 34 - #define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2) 35 - 36 - /* 37 - * For all pages in a pcluster, page->private should be one of 38 - * Type Last 2bits page->private 39 - * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE 40 - * preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE 41 - * cached/managed page 00 pointer to z_erofs_pcluster 42 - * online page (file-backed, 01/10/11 sub-index << 2 | count 43 - * some pages can be used for inplace I/O) 44 - * 45 - * page->mapping should be one of 46 - * Type page->mapping 47 - * short-lived page NULL 48 - * preallocated page NULL 49 - * cached/managed page non-NULL or NULL (invalidated/truncated page) 50 - * online page non-NULL 51 - * 52 - * For all managed pages, PG_private should be set with 1 extra refcount, 53 - * which is used for page reclaim / migration. 54 - */ 33 + #define Z_EROFS_PREALLOCATED_FOLIO ((void *)(-2UL << 2)) 55 34 56 35 /* 57 36 * Currently, short-lived pages are pages directly from buddy system

+2 -1

fs/erofs/erofs_fs.h

··· 9 9 #ifndef __EROFS_FS_H 10 10 #define __EROFS_FS_H 11 11 12 + /* to allow for x86 boot sectors and other oddities. */ 12 13 #define EROFS_SUPER_OFFSET 1024 13 14 14 15 #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 ··· 55 54 /* erofs on-disk super block (currently 128 bytes) */ 56 55 struct erofs_super_block { 57 56 __le32 magic; /* file system magic number */ 58 - __le32 checksum; /* crc32c(super_block) */ 57 + __le32 checksum; /* crc32c to avoid unexpected on-disk overlap */ 59 58 __le32 feature_compat; 60 59 __u8 blkszbits; /* filesystem block size in bit shift */ 61 60 __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */

+2 -2

fs/erofs/fileio.c

··· 6 6 #include <trace/events/erofs.h> 7 7 8 8 struct erofs_fileio_rq { 9 - struct bio_vec bvecs[BIO_MAX_VECS]; 9 + struct bio_vec bvecs[16]; 10 10 struct bio bio; 11 11 struct kiocb iocb; 12 12 struct super_block *sb; ··· 68 68 struct erofs_fileio_rq *rq = kzalloc(sizeof(*rq), 69 69 GFP_KERNEL | __GFP_NOFAIL); 70 70 71 - bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ); 71 + bio_init(&rq->bio, NULL, rq->bvecs, ARRAY_SIZE(rq->bvecs), REQ_OP_READ); 72 72 rq->iocb.ki_filp = mdev->m_dif->file; 73 73 rq->sb = mdev->m_sb; 74 74 return rq;

+11 -21

fs/erofs/super.c

··· 39 39 40 40 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) 41 41 { 42 - size_t len = 1 << EROFS_SB(sb)->blkszbits; 43 - struct erofs_super_block *dsb; 44 - u32 expected_crc, crc; 42 + struct erofs_super_block *dsb = sbdata + EROFS_SUPER_OFFSET; 43 + u32 len = 1 << EROFS_SB(sb)->blkszbits, crc; 45 44 46 45 if (len > EROFS_SUPER_OFFSET) 47 46 len -= EROFS_SUPER_OFFSET; 47 + len -= offsetof(struct erofs_super_block, checksum) + 48 + sizeof(dsb->checksum); 48 49 49 - dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, len, GFP_KERNEL); 50 - if (!dsb) 51 - return -ENOMEM; 52 - 53 - expected_crc = le32_to_cpu(dsb->checksum); 54 - dsb->checksum = 0; 55 - /* to allow for x86 boot sectors and other oddities. */ 56 - crc = crc32c(~0, dsb, len); 57 - kfree(dsb); 58 - 59 - if (crc != expected_crc) { 60 - erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", 61 - crc, expected_crc); 62 - return -EBADMSG; 63 - } 64 - return 0; 50 + /* skip .magic(pre-verified) and .checksum(0) fields */ 51 + crc = crc32c(0x5045B54A, (&dsb->checksum) + 1, len); 52 + if (crc == le32_to_cpu(dsb->checksum)) 53 + return 0; 54 + erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", 55 + crc, le32_to_cpu(dsb->checksum)); 56 + return -EBADMSG; 65 57 } 66 58 67 59 static void erofs_inode_init_once(void *ptr) ··· 508 516 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 509 517 #endif 510 518 break; 511 - default: 512 - return -ENOPARAM; 513 519 } 514 520 return 0; 515 521 }

+1 -1

fs/erofs/xattr.c

··· 478 478 if (!sbi->xattr_prefix_count) 479 479 return 0; 480 480 481 - pfs = kzalloc(sbi->xattr_prefix_count * sizeof(*pfs), GFP_KERNEL); 481 + pfs = kcalloc(sbi->xattr_prefix_count, sizeof(*pfs), GFP_KERNEL); 482 482 if (!pfs) 483 483 return -ENOMEM; 484 484

+92 -151

fs/erofs/zdata.c

··· 12 12 #define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE) 13 13 #define Z_EROFS_INLINE_BVECS 2 14 14 15 - /* 16 - * let's leave a type here in case of introducing 17 - * another tagged pointer later. 18 - */ 19 - typedef void *z_erofs_next_pcluster_t; 20 - 21 15 struct z_erofs_bvec { 22 16 struct page *page; 23 17 int offset; ··· 42 48 struct lockref lockref; 43 49 44 50 /* A: point to next chained pcluster or TAILs */ 45 - z_erofs_next_pcluster_t next; 51 + struct z_erofs_pcluster *next; 46 52 47 53 /* I: start block address of this pcluster */ 48 54 erofs_off_t index; ··· 88 94 89 95 /* the end of a chain of pclusters */ 90 96 #define Z_EROFS_PCLUSTER_TAIL ((void *) 0x700 + POISON_POINTER_DELTA) 91 - #define Z_EROFS_PCLUSTER_NIL (NULL) 92 97 93 98 struct z_erofs_decompressqueue { 94 99 struct super_block *sb; 100 + struct z_erofs_pcluster *head; 95 101 atomic_t pending_bios; 96 - z_erofs_next_pcluster_t head; 97 102 98 103 union { 99 104 struct completion done; ··· 455 462 } 456 463 457 464 enum z_erofs_pclustermode { 465 + /* It has previously been linked into another processing chain */ 458 466 Z_EROFS_PCLUSTER_INFLIGHT, 459 467 /* 460 - * a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it 461 - * could be dispatched into bypass queue later due to uptodated managed 462 - * pages. All related online pages cannot be reused for inplace I/O (or 463 - * bvpage) since it can be directly decoded without I/O submission. 468 + * A weaker form of Z_EROFS_PCLUSTER_FOLLOWED; the difference is that it 469 + * may be dispatched to the bypass queue later due to uptodated managed 470 + * folios. All file-backed folios related to this pcluster cannot be 471 + * reused for in-place I/O (or bvpage) since the pcluster may be decoded 472 + * in a separate queue (and thus out of order). 464 473 */ 465 474 Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE, 466 475 /* 467 - * The pcluster was just linked to a decompression chain by us. It can 468 - * also be linked with the remaining pclusters, which means if the 469 - * processing page is the tail page of a pcluster, this pcluster can 470 - * safely use the whole page (since the previous pcluster is within the 471 - * same chain) for in-place I/O, as illustrated below: 472 - * ___________________________________________________ 473 - * | tail (partial) page | head (partial) page | 474 - * | (of the current pcl) | (of the previous pcl) | 475 - * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____| 476 - * 477 - * [ (*) the page above can be used as inplace I/O. ] 476 + * The pcluster has just been linked to our processing chain. 477 + * File-backed folios (except for the head page) related to it can be 478 + * used for in-place I/O (or bvpage). 478 479 */ 479 480 Z_EROFS_PCLUSTER_FOLLOWED, 480 481 }; 481 482 482 - struct z_erofs_decompress_frontend { 483 + struct z_erofs_frontend { 483 484 struct inode *const inode; 484 485 struct erofs_map_blocks map; 485 486 struct z_erofs_bvec_iter biter; 486 487 487 488 struct page *pagepool; 488 489 struct page *candidate_bvpage; 489 - struct z_erofs_pcluster *pcl; 490 - z_erofs_next_pcluster_t owned_head; 490 + struct z_erofs_pcluster *pcl, *head; 491 491 enum z_erofs_pclustermode mode; 492 492 493 493 erofs_off_t headoffset; ··· 489 503 unsigned int icur; 490 504 }; 491 505 492 - #define DECOMPRESS_FRONTEND_INIT(__i) { \ 493 - .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ 494 - .mode = Z_EROFS_PCLUSTER_FOLLOWED } 506 + #define Z_EROFS_DEFINE_FRONTEND(fe, i, ho) struct z_erofs_frontend fe = { \ 507 + .inode = i, .head = Z_EROFS_PCLUSTER_TAIL, \ 508 + .mode = Z_EROFS_PCLUSTER_FOLLOWED, .headoffset = ho } 495 509 496 - static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe) 510 + static bool z_erofs_should_alloc_cache(struct z_erofs_frontend *fe) 497 511 { 498 512 unsigned int cachestrategy = EROFS_I_SB(fe->inode)->opt.cache_strategy; 499 513 ··· 510 524 return false; 511 525 } 512 526 513 - static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) 527 + static void z_erofs_bind_cache(struct z_erofs_frontend *fe) 514 528 { 515 529 struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); 516 530 struct z_erofs_pcluster *pcl = fe->pcl; 517 531 unsigned int pclusterpages = z_erofs_pclusterpages(pcl); 518 532 bool shouldalloc = z_erofs_should_alloc_cache(fe); 519 - bool standalone = true; 520 - /* 521 - * optimistic allocation without direct reclaim since inplace I/O 522 - * can be used if low memory otherwise. 523 - */ 533 + bool may_bypass = true; 534 + /* Optimistic allocation, as in-place I/O can be used as a fallback */ 524 535 gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | 525 536 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; 537 + struct folio *folio, *newfolio; 526 538 unsigned int i; 527 539 528 540 if (i_blocksize(fe->inode) != PAGE_SIZE || ··· 528 544 return; 529 545 530 546 for (i = 0; i < pclusterpages; ++i) { 531 - struct page *page, *newpage; 532 - 533 547 /* Inaccurate check w/o locking to avoid unneeded lookups */ 534 548 if (READ_ONCE(pcl->compressed_bvecs[i].page)) 535 549 continue; 536 550 537 - page = find_get_page(mc, pcl->index + i); 538 - if (!page) { 539 - /* I/O is needed, no possible to decompress directly */ 540 - standalone = false; 551 + folio = filemap_get_folio(mc, pcl->index + i); 552 + if (IS_ERR(folio)) { 553 + may_bypass = false; 541 554 if (!shouldalloc) 542 555 continue; 543 556 544 557 /* 545 - * Try cached I/O if allocation succeeds or fallback to 546 - * in-place I/O instead to avoid any direct reclaim. 558 + * Allocate a managed folio for cached I/O, or it may be 559 + * then filled with a file-backed folio for in-place I/O 547 560 */ 548 - newpage = erofs_allocpage(&fe->pagepool, gfp); 549 - if (!newpage) 561 + newfolio = filemap_alloc_folio(gfp, 0); 562 + if (!newfolio) 550 563 continue; 551 - set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); 564 + newfolio->private = Z_EROFS_PREALLOCATED_FOLIO; 565 + folio = NULL; 552 566 } 553 567 spin_lock(&pcl->lockref.lock); 554 568 if (!pcl->compressed_bvecs[i].page) { 555 - pcl->compressed_bvecs[i].page = page ? page : newpage; 569 + pcl->compressed_bvecs[i].page = 570 + folio_page(folio ?: newfolio, 0); 556 571 spin_unlock(&pcl->lockref.lock); 557 572 continue; 558 573 } 559 574 spin_unlock(&pcl->lockref.lock); 560 - 561 - if (page) 562 - put_page(page); 563 - else if (newpage) 564 - erofs_pagepool_add(&fe->pagepool, newpage); 575 + folio_put(folio ?: newfolio); 565 576 } 566 577 567 578 /* 568 - * don't do inplace I/O if all compressed pages are available in 569 - * managed cache since it can be moved to the bypass queue instead. 579 + * Don't perform in-place I/O if all compressed pages are available in 580 + * the managed cache, as the pcluster can be moved to the bypass queue. 570 581 */ 571 - if (standalone) 582 + if (may_bypass) 572 583 fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; 573 584 } 574 585 ··· 660 681 } 661 682 662 683 /* callers must be with pcluster lock held */ 663 - static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, 684 + static int z_erofs_attach_page(struct z_erofs_frontend *fe, 664 685 struct z_erofs_bvec *bvec, bool exclusive) 665 686 { 666 687 struct z_erofs_pcluster *pcl = fe->pcl; ··· 706 727 return true; 707 728 } 708 729 709 - static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) 730 + static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) 710 731 { 711 732 struct erofs_map_blocks *map = &fe->map; 712 733 struct super_block *sb = fe->inode->i_sb; ··· 730 751 pcl->algorithmformat = map->m_algorithmformat; 731 752 pcl->length = 0; 732 753 pcl->partial = true; 733 - 734 - /* new pclusters should be claimed as type 1, primary and followed */ 735 - pcl->next = fe->owned_head; 754 + pcl->next = fe->head; 736 755 pcl->pageofs_out = map->m_la & ~PAGE_MASK; 737 756 fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; 738 757 ··· 766 789 goto err_out; 767 790 } 768 791 } 769 - fe->owned_head = &pcl->next; 770 - fe->pcl = pcl; 792 + fe->head = fe->pcl = pcl; 771 793 return 0; 772 794 773 795 err_out: ··· 775 799 return err; 776 800 } 777 801 778 - static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) 802 + static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) 779 803 { 780 804 struct erofs_map_blocks *map = &fe->map; 781 805 struct super_block *sb = fe->inode->i_sb; ··· 785 809 786 810 DBG_BUGON(fe->pcl); 787 811 /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */ 788 - DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); 812 + DBG_BUGON(!fe->head); 789 813 790 814 if (!(map->m_flags & EROFS_MAP_META)) { 791 815 while (1) { ··· 813 837 if (ret == -EEXIST) { 814 838 mutex_lock(&fe->pcl->lock); 815 839 /* check if this pcluster hasn't been linked into any chain. */ 816 - if (cmpxchg(&fe->pcl->next, Z_EROFS_PCLUSTER_NIL, 817 - fe->owned_head) == Z_EROFS_PCLUSTER_NIL) { 840 + if (!cmpxchg(&fe->pcl->next, NULL, fe->head)) { 818 841 /* .. so it can be attached to our submission chain */ 819 - fe->owned_head = &fe->pcl->next; 842 + fe->head = fe->pcl; 820 843 fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; 821 844 } else { /* otherwise, it belongs to an inflight chain */ 822 845 fe->mode = Z_EROFS_PCLUSTER_INFLIGHT; ··· 848 873 return 0; 849 874 } 850 875 851 - /* 852 - * keep in mind that no referenced pclusters will be freed 853 - * only after a RCU grace period. 854 - */ 855 876 static void z_erofs_rcu_callback(struct rcu_head *head) 856 877 { 857 - z_erofs_free_pcluster(container_of(head, 858 - struct z_erofs_pcluster, rcu)); 878 + z_erofs_free_pcluster(container_of(head, struct z_erofs_pcluster, rcu)); 859 879 } 860 880 861 881 static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi, ··· 892 922 return free; 893 923 } 894 924 895 - unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, 896 - unsigned long nr_shrink) 925 + unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, unsigned long nr) 897 926 { 898 927 struct z_erofs_pcluster *pcl; 899 - unsigned int freed = 0; 900 - unsigned long index; 928 + unsigned long index, freed = 0; 901 929 902 930 xa_lock(&sbi->managed_pslots); 903 931 xa_for_each(&sbi->managed_pslots, index, pcl) { ··· 905 937 xa_unlock(&sbi->managed_pslots); 906 938 907 939 ++freed; 908 - if (!--nr_shrink) 940 + if (!--nr) 909 941 return freed; 910 942 xa_lock(&sbi->managed_pslots); 911 943 } ··· 934 966 call_rcu(&pcl->rcu, z_erofs_rcu_callback); 935 967 } 936 968 937 - static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) 969 + static void z_erofs_pcluster_end(struct z_erofs_frontend *fe) 938 970 { 939 971 struct z_erofs_pcluster *pcl = fe->pcl; 940 972 ··· 947 979 if (fe->candidate_bvpage) 948 980 fe->candidate_bvpage = NULL; 949 981 950 - /* 951 - * if all pending pages are added, don't hold its reference 952 - * any longer if the pcluster isn't hosted by ourselves. 953 - */ 982 + /* Drop refcount if it doesn't belong to our processing chain */ 954 983 if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE) 955 984 z_erofs_put_pcluster(EROFS_I_SB(fe->inode), pcl, false); 956 - 957 985 fe->pcl = NULL; 958 986 } 959 987 ··· 978 1014 return 0; 979 1015 } 980 1016 981 - static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f, 1017 + static int z_erofs_scan_folio(struct z_erofs_frontend *f, 982 1018 struct folio *folio, bool ra) 983 1019 { 984 1020 struct inode *const inode = f->inode; ··· 1093 1129 return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page); 1094 1130 } 1095 1131 1096 - struct z_erofs_decompress_backend { 1132 + struct z_erofs_backend { 1097 1133 struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES]; 1098 1134 struct super_block *sb; 1099 1135 struct z_erofs_pcluster *pcl; ··· 1113 1149 struct list_head list; 1114 1150 }; 1115 1151 1116 - static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, 1152 + static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be, 1117 1153 struct z_erofs_bvec *bvec) 1118 1154 { 1119 1155 struct z_erofs_bvec_item *item; ··· 1136 1172 list_add(&item->list, &be->decompressed_secondary_bvecs); 1137 1173 } 1138 1174 1139 - static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, 1140 - int err) 1175 + static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err) 1141 1176 { 1142 1177 unsigned int off0 = be->pcl->pageofs_out; 1143 1178 struct list_head *p, *n; ··· 1177 1214 } 1178 1215 } 1179 1216 1180 - static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be) 1217 + static void z_erofs_parse_out_bvecs(struct z_erofs_backend *be) 1181 1218 { 1182 1219 struct z_erofs_pcluster *pcl = be->pcl; 1183 1220 struct z_erofs_bvec_iter biter; ··· 1202 1239 z_erofs_put_shortlivedpage(be->pagepool, old_bvpage); 1203 1240 } 1204 1241 1205 - static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, 1206 - bool *overlapped) 1242 + static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped) 1207 1243 { 1208 1244 struct z_erofs_pcluster *pcl = be->pcl; 1209 1245 unsigned int pclusterpages = z_erofs_pclusterpages(pcl); ··· 1237 1275 return err; 1238 1276 } 1239 1277 1240 - static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, 1241 - int err) 1278 + static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) 1242 1279 { 1243 1280 struct erofs_sb_info *const sbi = EROFS_SB(be->sb); 1244 1281 struct z_erofs_pcluster *pcl = be->pcl; ··· 1354 1393 pcl->vcnt = 0; 1355 1394 1356 1395 /* pcluster lock MUST be taken before the following line */ 1357 - WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL); 1396 + WRITE_ONCE(pcl->next, NULL); 1358 1397 mutex_unlock(&pcl->lock); 1359 1398 1360 1399 if (z_erofs_is_inline_pcluster(pcl)) ··· 1367 1406 static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, 1368 1407 struct page **pagepool) 1369 1408 { 1370 - struct z_erofs_decompress_backend be = { 1409 + struct z_erofs_backend be = { 1371 1410 .sb = io->sb, 1372 1411 .pagepool = pagepool, 1373 1412 .decompressed_secondary_bvecs = 1374 1413 LIST_HEAD_INIT(be.decompressed_secondary_bvecs), 1414 + .pcl = io->head, 1375 1415 }; 1376 - z_erofs_next_pcluster_t owned = io->head; 1416 + struct z_erofs_pcluster *next; 1377 1417 int err = io->eio ? -EIO : 0; 1378 1418 1379 - while (owned != Z_EROFS_PCLUSTER_TAIL) { 1380 - DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL); 1381 - 1382 - be.pcl = container_of(owned, struct z_erofs_pcluster, next); 1383 - owned = READ_ONCE(be.pcl->next); 1384 - 1419 + for (; be.pcl != Z_EROFS_PCLUSTER_TAIL; be.pcl = next) { 1420 + DBG_BUGON(!be.pcl); 1421 + next = READ_ONCE(be.pcl->next); 1385 1422 err = z_erofs_decompress_pcluster(&be, err) ?: err; 1386 1423 } 1387 1424 return err; ··· 1445 1486 } 1446 1487 1447 1488 static void z_erofs_fill_bio_vec(struct bio_vec *bvec, 1448 - struct z_erofs_decompress_frontend *f, 1489 + struct z_erofs_frontend *f, 1449 1490 struct z_erofs_pcluster *pcl, 1450 1491 unsigned int nr, 1451 1492 struct address_space *mc) ··· 1472 1513 DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); 1473 1514 1474 1515 folio = page_folio(zbv.page); 1475 - /* 1476 - * Handle preallocated cached folios. We tried to allocate such folios 1477 - * without triggering direct reclaim. If allocation failed, inplace 1478 - * file-backed folios will be used instead. 1479 - */ 1480 - if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { 1516 + /* For preallocated managed folios, add them to page cache here */ 1517 + if (folio->private == Z_EROFS_PREALLOCATED_FOLIO) { 1481 1518 tocache = true; 1482 1519 goto out_tocache; 1483 1520 } ··· 1585 1630 NR_JOBQUEUES, 1586 1631 }; 1587 1632 1588 - static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, 1589 - z_erofs_next_pcluster_t qtail[], 1590 - z_erofs_next_pcluster_t owned_head) 1633 + static void z_erofs_move_to_bypass_queue(struct z_erofs_pcluster *pcl, 1634 + struct z_erofs_pcluster *next, 1635 + struct z_erofs_pcluster **qtail[]) 1591 1636 { 1592 - z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT]; 1593 - z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS]; 1594 - 1595 1637 WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL); 1596 - 1597 - WRITE_ONCE(*submit_qtail, owned_head); 1598 - WRITE_ONCE(*bypass_qtail, &pcl->next); 1599 - 1638 + WRITE_ONCE(*qtail[JQ_SUBMIT], next); 1639 + WRITE_ONCE(*qtail[JQ_BYPASS], pcl); 1600 1640 qtail[JQ_BYPASS] = &pcl->next; 1601 1641 } 1602 1642 ··· 1620 1670 bio_put(bio); 1621 1671 } 1622 1672 1623 - static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, 1673 + static void z_erofs_submit_queue(struct z_erofs_frontend *f, 1624 1674 struct z_erofs_decompressqueue *fgq, 1625 1675 bool *force_fg, bool readahead) 1626 1676 { 1627 1677 struct super_block *sb = f->inode->i_sb; 1628 1678 struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb)); 1629 - z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; 1679 + struct z_erofs_pcluster **qtail[NR_JOBQUEUES]; 1630 1680 struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; 1631 - z_erofs_next_pcluster_t owned_head = f->owned_head; 1681 + struct z_erofs_pcluster *pcl, *next; 1632 1682 /* bio is NULL initially, so no need to initialize last_{index,bdev} */ 1633 1683 erofs_off_t last_pa; 1634 1684 unsigned int nr_bios = 0; ··· 1644 1694 qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head; 1645 1695 1646 1696 /* by default, all need io submission */ 1647 - q[JQ_SUBMIT]->head = owned_head; 1697 + q[JQ_SUBMIT]->head = next = f->head; 1648 1698 1649 1699 do { 1650 1700 struct erofs_map_dev mdev; 1651 - struct z_erofs_pcluster *pcl; 1652 1701 erofs_off_t cur, end; 1653 1702 struct bio_vec bvec; 1654 1703 unsigned int i = 0; 1655 1704 bool bypass = true; 1656 1705 1657 - DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL); 1658 - pcl = container_of(owned_head, struct z_erofs_pcluster, next); 1659 - owned_head = READ_ONCE(pcl->next); 1660 - 1706 + pcl = next; 1707 + next = READ_ONCE(pcl->next); 1661 1708 if (z_erofs_is_inline_pcluster(pcl)) { 1662 - move_to_bypass_jobqueue(pcl, qtail, owned_head); 1709 + z_erofs_move_to_bypass_queue(pcl, next, qtail); 1663 1710 continue; 1664 1711 } 1665 1712 ··· 1728 1781 if (!bypass) 1729 1782 qtail[JQ_SUBMIT] = &pcl->next; 1730 1783 else 1731 - move_to_bypass_jobqueue(pcl, qtail, owned_head); 1732 - } while (owned_head != Z_EROFS_PCLUSTER_TAIL); 1784 + z_erofs_move_to_bypass_queue(pcl, next, qtail); 1785 + } while (next != Z_EROFS_PCLUSTER_TAIL); 1733 1786 1734 1787 if (bio) { 1735 1788 if (erofs_is_fileio_mode(EROFS_SB(sb))) ··· 1753 1806 z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios); 1754 1807 } 1755 1808 1756 - static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f, 1757 - unsigned int ra_folios) 1809 + static int z_erofs_runqueue(struct z_erofs_frontend *f, unsigned int rapages) 1758 1810 { 1759 1811 struct z_erofs_decompressqueue io[NR_JOBQUEUES]; 1760 1812 struct erofs_sb_info *sbi = EROFS_I_SB(f->inode); 1761 - bool force_fg = z_erofs_is_sync_decompress(sbi, ra_folios); 1813 + bool force_fg = z_erofs_is_sync_decompress(sbi, rapages); 1762 1814 int err; 1763 1815 1764 - if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) 1816 + if (f->head == Z_EROFS_PCLUSTER_TAIL) 1765 1817 return 0; 1766 - z_erofs_submit_queue(f, io, &force_fg, !!ra_folios); 1818 + z_erofs_submit_queue(f, io, &force_fg, !!rapages); 1767 1819 1768 1820 /* handle bypass queue (no i/o pclusters) immediately */ 1769 1821 err = z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool); ··· 1780 1834 * Since partial uptodate is still unimplemented for now, we have to use 1781 1835 * approximate readmore strategies as a start. 1782 1836 */ 1783 - static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, 1837 + static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f, 1784 1838 struct readahead_control *rac, bool backmost) 1785 1839 { 1786 1840 struct inode *inode = f->inode; ··· 1835 1889 static int z_erofs_read_folio(struct file *file, struct folio *folio) 1836 1890 { 1837 1891 struct inode *const inode = folio->mapping->host; 1838 - struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); 1892 + Z_EROFS_DEFINE_FRONTEND(f, inode, folio_pos(folio)); 1839 1893 int err; 1840 1894 1841 1895 trace_erofs_read_folio(folio, false); 1842 - f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; 1843 - 1844 1896 z_erofs_pcluster_readmore(&f, NULL, true); 1845 1897 err = z_erofs_scan_folio(&f, folio, false); 1846 1898 z_erofs_pcluster_readmore(&f, NULL, false); ··· 1858 1914 static void z_erofs_readahead(struct readahead_control *rac) 1859 1915 { 1860 1916 struct inode *const inode = rac->mapping->host; 1861 - struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); 1917 + Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac)); 1862 1918 struct folio *head = NULL, *folio; 1863 - unsigned int nr_folios; 1919 + unsigned int nrpages = readahead_count(rac); 1864 1920 int err; 1865 1921 1866 - f.headoffset = readahead_pos(rac); 1867 - 1868 1922 z_erofs_pcluster_readmore(&f, rac, true); 1869 - nr_folios = readahead_count(rac); 1870 - trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false); 1871 - 1923 + nrpages = readahead_count(rac); 1924 + trace_erofs_readpages(inode, readahead_index(rac), nrpages, false); 1872 1925 while ((folio = readahead_folio(rac))) { 1873 1926 folio->private = head; 1874 1927 head = folio; ··· 1884 1943 z_erofs_pcluster_readmore(&f, rac, false); 1885 1944 z_erofs_pcluster_end(&f); 1886 1945 1887 - (void)z_erofs_runqueue(&f, nr_folios); 1946 + (void)z_erofs_runqueue(&f, nrpages); 1888 1947 erofs_put_metabuf(&f.map.buf); 1889 1948 erofs_release_pages(&f.pagepool); 1890 1949 }

+52 -73

fs/erofs/zmap.c

··· 97 97 return d1; 98 98 } 99 99 100 - static int unpack_compacted_index(struct z_erofs_maprecorder *m, 101 - unsigned int amortizedshift, 102 - erofs_off_t pos, bool lookahead) 100 + static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, 101 + unsigned long lcn, bool lookahead) 103 102 { 104 - struct erofs_inode *const vi = EROFS_I(m->inode); 103 + struct inode *const inode = m->inode; 104 + struct erofs_inode *const vi = EROFS_I(inode); 105 + const erofs_off_t ebase = sizeof(struct z_erofs_map_header) + 106 + ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); 105 107 const unsigned int lclusterbits = vi->z_logical_clusterbits; 108 + const unsigned int totalidx = erofs_iblks(inode); 109 + unsigned int compacted_4b_initial, compacted_2b, amortizedshift; 106 110 unsigned int vcnt, lo, lobits, encodebits, nblk, bytes; 107 - bool big_pcluster; 111 + bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; 112 + erofs_off_t pos; 108 113 u8 *in, type; 109 114 int i; 110 115 116 + if (lcn >= totalidx || lclusterbits > 14) 117 + return -EINVAL; 118 + 119 + m->lcn = lcn; 120 + /* used to align to 32-byte (compacted_2b) alignment */ 121 + compacted_4b_initial = ((32 - ebase % 32) / 4) & 7; 122 + compacted_2b = 0; 123 + if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) && 124 + compacted_4b_initial < totalidx) 125 + compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); 126 + 127 + pos = ebase; 128 + amortizedshift = 2; /* compact_4b */ 129 + if (lcn >= compacted_4b_initial) { 130 + pos += compacted_4b_initial * 4; 131 + lcn -= compacted_4b_initial; 132 + if (lcn < compacted_2b) { 133 + amortizedshift = 1; 134 + } else { 135 + pos += compacted_2b * 2; 136 + lcn -= compacted_2b; 137 + } 138 + } 139 + pos += lcn * (1 << amortizedshift); 140 + 141 + /* figure out the lcluster count in this pack */ 111 142 if (1 << amortizedshift == 4 && lclusterbits <= 14) 112 143 vcnt = 2; 113 144 else if (1 << amortizedshift == 2 && lclusterbits <= 12) ··· 153 122 /* it doesn't equal to round_up(..) */ 154 123 m->nextpackoff = round_down(pos, vcnt << amortizedshift) + 155 124 (vcnt << amortizedshift); 156 - big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; 157 125 lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U); 158 126 encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; 159 127 bytes = pos & ((vcnt << amortizedshift) - 1); ··· 237 207 return 0; 238 208 } 239 209 240 - static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, 241 - unsigned long lcn, bool lookahead) 242 - { 243 - struct inode *const inode = m->inode; 244 - struct erofs_inode *const vi = EROFS_I(inode); 245 - const erofs_off_t ebase = sizeof(struct z_erofs_map_header) + 246 - ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); 247 - unsigned int totalidx = erofs_iblks(inode); 248 - unsigned int compacted_4b_initial, compacted_2b; 249 - unsigned int amortizedshift; 250 - erofs_off_t pos; 251 - 252 - if (lcn >= totalidx || vi->z_logical_clusterbits > 14) 253 - return -EINVAL; 254 - 255 - m->lcn = lcn; 256 - /* used to align to 32-byte (compacted_2b) alignment */ 257 - compacted_4b_initial = (32 - ebase % 32) / 4; 258 - if (compacted_4b_initial == 32 / 4) 259 - compacted_4b_initial = 0; 260 - 261 - if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) && 262 - compacted_4b_initial < totalidx) 263 - compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); 264 - else 265 - compacted_2b = 0; 266 - 267 - pos = ebase; 268 - if (lcn < compacted_4b_initial) { 269 - amortizedshift = 2; 270 - goto out; 271 - } 272 - pos += compacted_4b_initial * 4; 273 - lcn -= compacted_4b_initial; 274 - 275 - if (lcn < compacted_2b) { 276 - amortizedshift = 1; 277 - goto out; 278 - } 279 - pos += compacted_2b * 2; 280 - lcn -= compacted_2b; 281 - amortizedshift = 2; 282 - out: 283 - pos += lcn * (1 << amortizedshift); 284 - return unpack_compacted_index(m, amortizedshift, pos, lookahead); 285 - } 286 - 287 210 static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, 288 211 unsigned int lcn, bool lookahead) 289 212 { ··· 294 311 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, 295 312 unsigned int initial_lcn) 296 313 { 297 - struct super_block *sb = m->inode->i_sb; 298 - struct erofs_inode *const vi = EROFS_I(m->inode); 299 - struct erofs_map_blocks *const map = m->map; 300 - const unsigned int lclusterbits = vi->z_logical_clusterbits; 301 - unsigned long lcn; 314 + struct inode *inode = m->inode; 315 + struct super_block *sb = inode->i_sb; 316 + struct erofs_inode *vi = EROFS_I(inode); 317 + bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; 318 + bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2; 319 + unsigned long lcn = m->lcn + 1; 302 320 int err; 303 321 304 - DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN && 305 - m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 && 306 - m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2); 322 + DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); 307 323 DBG_BUGON(m->type != m->headtype); 308 324 309 - if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || 310 - ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) && 311 - !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) || 312 - ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && 313 - !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { 314 - map->m_plen = 1ULL << lclusterbits; 315 - return 0; 316 - } 317 - lcn = m->lcn + 1; 325 + if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) || 326 + ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || 327 + m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) || 328 + (lcn << vi->z_logical_clusterbits) >= inode->i_size) 329 + m->compressedblks = 1; 330 + 318 331 if (m->compressedblks) 319 332 goto out; 320 333 ··· 335 356 case Z_EROFS_LCLUSTER_TYPE_HEAD2: 336 357 /* 337 358 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type 338 - * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. 359 + * rather than CBLKCNT, it's a 1 block-sized pcluster. 339 360 */ 340 - m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits); 361 + m->compressedblks = 1; 341 362 break; 342 363 case Z_EROFS_LCLUSTER_TYPE_NONHEAD: 343 364 if (m->delta[0] != 1) ··· 352 373 return -EFSCORRUPTED; 353 374 } 354 375 out: 355 - map->m_plen = erofs_pos(sb, m->compressedblks); 376 + m->map->m_plen = erofs_pos(sb, m->compressedblks); 356 377 return 0; 357 378 err_bonus_cblkcnt: 358 379 erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);

+1 -1

fs/erofs/zutil.c

··· 243 243 static unsigned long erofs_shrink_count(struct shrinker *shrink, 244 244 struct shrink_control *sc) 245 245 { 246 - return atomic_long_read(&erofs_global_shrink_cnt); 246 + return atomic_long_read(&erofs_global_shrink_cnt) ?: SHRINK_EMPTY; 247 247 } 248 248 249 249 static unsigned long erofs_shrink_scan(struct shrinker *shrink,

Configure Feed

Configure Feed