Merge tag 'erofs-for-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'erofs-for-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
"In this cycle, we continue converting to use meta buffers for all
remaining uncompressed paths to prepare for the upcoming subpage,
folio and fscache features.

We also fixed a double-free issue when sysfs initialization fails,
which was reported by syzbot.

Besides, in order for the userspace to control per-file timestamp
easier, we now switch to record mtime instead of ctime with a
compatible feature marked. And there are also some code cleanups and
documentation update as usual.

Summary:

- Avoid using page structure directly for all uncompressed paths

- Fix a double-free issue when sysfs initialization fails

- Complete DAX description for erofs

- Use mtime instead since there's no (easy) way for users to control
ctime

- Several code cleanups"

* tag 'erofs-for-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: rename ctime to mtime
erofs: use meta buffers for inode lookup
erofs: use meta buffers for reading directories
fs: erofs: add sanity check for kobject in erofs_unregister_sysfs
erofs: refine managed inode stuffs
erofs: clean up z_erofs_extent_lookback
erofs: silence warnings related to impossible m_plen
Documentation/filesystem/dax: update DAX description on erofs
erofs: clean up preload_compressed_pages()
erofs: get rid of `struct z_erofs_collector'
erofs: use meta buffers for erofs_read_superblock()

Linus Torvalds 4 years ago aab4ed58 881b5687

+189 -201

12 changed files

expand all collapse all

Documentation

filesystems

dax.rst

erofs.rst

erofs

data.c

dir.c

erofs_fs.h

inode.c

internal.h

namei.c

super.c

sysfs.c

zdata.c

zmap.c

+3 -3

Documentation/filesystems/dax.rst

reviewed

··· 23 23 size equal to your kernel's `PAGE_SIZE`, so you may need to specify a block 24 24 size when creating the filesystem. 25 25 26 26 - Currently 4 filesystems support `DAX`: ext2, ext4, xfs and virtiofs. 26 26 + Currently 5 filesystems support `DAX`: ext2, ext4, xfs, virtiofs and erofs. 27 27 Enabling `DAX` on them is different. 28 28 29 29 - Enabling DAX on ext2 30 30 - -------------------- 29 29 + Enabling DAX on ext2 and erofs 30 30 + ------------------------------ 31 31 32 32 When mounting the filesystem, use the ``-o dax`` option on the command line or 33 33 add 'dax' to the options in ``/etc/fstab``. This works to enable `DAX` on all files

+1 -1

Documentation/filesystems/erofs.rst

reviewed

··· 40 40 Inode metadata size 32 bytes 64 bytes 41 41 Max file size 4 GB 16 EB (also limited by max. vol size) 42 42 Max uids/gids 65536 4294967296 43 43 - File change time no yes (64 + 32-bit timestamp) 43 43 + Per-inode timestamp no yes (64 + 32-bit timestamp) 44 44 Max hardlinks 65536 4294967296 45 45 Metadata reserved 4 bytes 14 bytes 46 46 ===================== ============ =====================================

+9 -3

fs/erofs/data.c

reviewed

··· 28 28 buf->page = NULL; 29 29 } 30 30 31 31 - void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, 32 32 - erofs_blk_t blkaddr, enum erofs_kmap_type type) 31 31 + void *erofs_bread(struct erofs_buf *buf, struct inode *inode, 32 32 + erofs_blk_t blkaddr, enum erofs_kmap_type type) 33 33 { 34 34 - struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; 34 34 + struct address_space *const mapping = inode->i_mapping; 35 35 erofs_off_t offset = blknr_to_addr(blkaddr); 36 36 pgoff_t index = offset >> PAGE_SHIFT; 37 37 struct page *page = buf->page; ··· 58 58 if (type == EROFS_NO_KMAP) 59 59 return NULL; 60 60 return buf->base + (offset & ~PAGE_MASK); 61 61 + } 62 62 + 63 63 + void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, 64 64 + erofs_blk_t blkaddr, enum erofs_kmap_type type) 65 65 + { 66 66 + return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); 61 67 } 62 68 63 69 static int erofs_map_blocks_flatmode(struct inode *inode,

+6 -15

fs/erofs/dir.c

reviewed

··· 2 2 /* 3 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 4 * https://www.huawei.com/ 5 5 + * Copyright (C) 2022, Alibaba Cloud 5 6 */ 6 7 #include "internal.h" 7 8 ··· 68 67 static int erofs_readdir(struct file *f, struct dir_context *ctx) 69 68 { 70 69 struct inode *dir = file_inode(f); 71 71 - struct address_space *mapping = dir->i_mapping; 70 70 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 72 71 const size_t dirsize = i_size_read(dir); 73 72 unsigned int i = ctx->pos / EROFS_BLKSIZ; 74 73 unsigned int ofs = ctx->pos % EROFS_BLKSIZ; ··· 76 75 bool initial = true; 77 76 78 77 while (ctx->pos < dirsize) { 79 79 - struct page *dentry_page; 80 78 struct erofs_dirent *de; 81 79 unsigned int nameoff, maxsize; 82 80 83 83 - dentry_page = read_mapping_page(mapping, i, NULL); 84 84 - if (dentry_page == ERR_PTR(-ENOMEM)) { 85 85 - err = -ENOMEM; 86 86 - break; 87 87 - } else if (IS_ERR(dentry_page)) { 81 81 + de = erofs_bread(&buf, dir, i, EROFS_KMAP); 82 82 + if (IS_ERR(de)) { 88 83 erofs_err(dir->i_sb, 89 84 "fail to readdir of logical block %u of nid %llu", 90 85 i, EROFS_I(dir)->nid); 91 91 - err = -EFSCORRUPTED; 86 86 + err = PTR_ERR(de); 92 87 break; 93 88 } 94 89 95 95 - de = (struct erofs_dirent *)kmap(dentry_page); 96 96 - 97 90 nameoff = le16_to_cpu(de->nameoff); 98 98 - 99 91 if (nameoff < sizeof(struct erofs_dirent) || 100 92 nameoff >= PAGE_SIZE) { 101 93 erofs_err(dir->i_sb, ··· 113 119 err = erofs_fill_dentries(dir, ctx, de, &ofs, 114 120 nameoff, maxsize); 115 121 skip_this: 116 116 - kunmap(dentry_page); 117 117 - 118 118 - put_page(dentry_page); 119 119 - 120 122 ctx->pos = blknr_to_addr(i) + ofs; 121 123 122 124 if (err) ··· 120 130 ++i; 121 131 ofs = 0; 122 132 } 133 133 + erofs_put_metabuf(&buf); 123 134 return err < 0 ? err : 0; 124 135 } 125 136

+3 -2

fs/erofs/erofs_fs.h

reviewed

··· 12 12 #define EROFS_SUPER_OFFSET 1024 13 13 14 14 #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 15 15 + #define EROFS_FEATURE_COMPAT_MTIME 0x00000002 15 16 16 17 /* 17 18 * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should ··· 187 186 188 187 __le32 i_uid; 189 188 __le32 i_gid; 190 190 - __le64 i_ctime; 191 191 - __le32 i_ctime_nsec; 189 189 + __le64 i_mtime; 190 190 + __le32 i_mtime_nsec; 192 191 __le32 i_nlink; 193 192 __u8 i_reserved2[16]; 194 193 };

+2 -2

fs/erofs/inode.c

reviewed

··· 113 113 set_nlink(inode, le32_to_cpu(die->i_nlink)); 114 114 115 115 /* extended inode has its own timestamp */ 116 116 - inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime); 117 117 - inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec); 116 116 + inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime); 117 117 + inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec); 118 118 119 119 inode->i_size = le64_to_cpu(die->i_size); 120 120

fs/erofs/internal.h

reviewed

··· 479 479 extern const struct file_operations erofs_file_fops; 480 480 void erofs_unmap_metabuf(struct erofs_buf *buf); 481 481 void erofs_put_metabuf(struct erofs_buf *buf); 482 482 + void *erofs_bread(struct erofs_buf *buf, struct inode *inode, 483 483 + erofs_blk_t blkaddr, enum erofs_kmap_type type); 482 484 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, 483 485 erofs_blk_t blkaddr, enum erofs_kmap_type type); 484 486 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);

+24 -30

fs/erofs/namei.c

reviewed

··· 2 2 /* 3 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 4 * https://www.huawei.com/ 5 5 + * Copyright (C) 2022, Alibaba Cloud 5 6 */ 6 7 #include "xattr.h" 7 8 ··· 87 86 return ERR_PTR(-ENOENT); 88 87 } 89 88 90 90 - static struct page *find_target_block_classic(struct inode *dir, 91 91 - struct erofs_qstr *name, 92 92 - int *_ndirents) 89 89 + static void *find_target_block_classic(struct erofs_buf *target, 90 90 + struct inode *dir, 91 91 + struct erofs_qstr *name, 92 92 + int *_ndirents) 93 93 { 94 94 unsigned int startprfx, endprfx; 95 95 int head, back; 96 96 - struct address_space *const mapping = dir->i_mapping; 97 97 - struct page *candidate = ERR_PTR(-ENOENT); 96 96 + void *candidate = ERR_PTR(-ENOENT); 98 97 99 98 startprfx = endprfx = 0; 100 99 head = 0; ··· 102 101 103 102 while (head <= back) { 104 103 const int mid = head + (back - head) / 2; 105 105 - struct page *page = read_mapping_page(mapping, mid, NULL); 104 104 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 105 105 + struct erofs_dirent *de; 106 106 107 107 - if (!IS_ERR(page)) { 108 108 - struct erofs_dirent *de = kmap_atomic(page); 107 107 + de = erofs_bread(&buf, dir, mid, EROFS_KMAP); 108 108 + if (!IS_ERR(de)) { 109 109 const int nameoff = nameoff_from_disk(de->nameoff, 110 110 EROFS_BLKSIZ); 111 111 const int ndirents = nameoff / sizeof(*de); ··· 115 113 struct erofs_qstr dname; 116 114 117 115 if (!ndirents) { 118 118 - kunmap_atomic(de); 119 119 - put_page(page); 116 116 + erofs_put_metabuf(&buf); 120 117 erofs_err(dir->i_sb, 121 118 "corrupted dir block %d @ nid %llu", 122 119 mid, EROFS_I(dir)->nid); 123 120 DBG_BUGON(1); 124 124 - page = ERR_PTR(-EFSCORRUPTED); 121 121 + de = ERR_PTR(-EFSCORRUPTED); 125 122 goto out; 126 123 } 127 124 ··· 136 135 137 136 /* string comparison without already matched prefix */ 138 137 diff = erofs_dirnamecmp(name, &dname, &matched); 139 139 - kunmap_atomic(de); 140 138 141 139 if (!diff) { 142 140 *_ndirents = 0; ··· 145 145 startprfx = matched; 146 146 147 147 if (!IS_ERR(candidate)) 148 148 - put_page(candidate); 149 149 - candidate = page; 148 148 + erofs_put_metabuf(target); 149 149 + *target = buf; 150 150 + candidate = de; 150 151 *_ndirents = ndirents; 151 152 } else { 152 152 - put_page(page); 153 153 + erofs_put_metabuf(&buf); 153 154 154 155 back = mid - 1; 155 156 endprfx = matched; ··· 159 158 } 160 159 out: /* free if the candidate is valid */ 161 160 if (!IS_ERR(candidate)) 162 162 - put_page(candidate); 163 163 - return page; 161 161 + erofs_put_metabuf(target); 162 162 + return de; 164 163 } 165 164 return candidate; 166 165 } ··· 170 169 erofs_nid_t *nid, unsigned int *d_type) 171 170 { 172 171 int ndirents; 173 173 - struct page *page; 174 174 - void *data; 172 172 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 175 173 struct erofs_dirent *de; 176 174 struct erofs_qstr qn; 177 175 ··· 181 181 qn.end = name->name + name->len; 182 182 183 183 ndirents = 0; 184 184 - page = find_target_block_classic(dir, &qn, &ndirents); 185 184 186 186 - if (IS_ERR(page)) 187 187 - return PTR_ERR(page); 185 185 + de = find_target_block_classic(&buf, dir, &qn, &ndirents); 186 186 + if (IS_ERR(de)) 187 187 + return PTR_ERR(de); 188 188 189 189 - data = kmap_atomic(page); 190 189 /* the target page has been mapped */ 191 190 if (ndirents) 192 192 - de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); 193 193 - else 194 194 - de = (struct erofs_dirent *)data; 191 191 + de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents); 195 192 196 193 if (!IS_ERR(de)) { 197 194 *nid = le64_to_cpu(de->nid); 198 195 *d_type = de->file_type; 199 196 } 200 200 - 201 201 - kunmap_atomic(data); 202 202 - put_page(page); 203 203 - 197 197 + erofs_put_metabuf(&buf); 204 198 return PTR_ERR_OR_ZERO(de); 205 199 } 206 200

+11 -10

fs/erofs/super.c

reviewed

··· 281 281 static int erofs_read_superblock(struct super_block *sb) 282 282 { 283 283 struct erofs_sb_info *sbi; 284 284 - struct page *page; 284 284 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 285 285 struct erofs_super_block *dsb; 286 286 unsigned int blkszbits; 287 287 void *data; 288 288 int ret; 289 289 290 290 - page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); 291 291 - if (IS_ERR(page)) { 290 290 + data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); 291 291 + if (IS_ERR(data)) { 292 292 erofs_err(sb, "cannot read erofs superblock"); 293 293 - return PTR_ERR(page); 293 293 + return PTR_ERR(data); 294 294 } 295 295 296 296 sbi = EROFS_SB(sb); 297 297 - 298 298 - data = kmap(page); 299 297 dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); 300 298 301 299 ret = -EINVAL; ··· 363 365 if (erofs_sb_has_ztailpacking(sbi)) 364 366 erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); 365 367 out: 366 366 - kunmap(page); 367 367 - put_page(page); 368 368 + erofs_put_metabuf(&buf); 368 369 return ret; 369 370 } 370 371 ··· 532 535 return ret; 533 536 } 534 537 538 538 + /* 539 539 + * It will be called only on inode eviction. In case that there are still some 540 540 + * decompression requests in progress, wait with rescheduling for a bit here. 541 541 + * We could introduce an extra locking instead but it seems unnecessary. 542 542 + */ 535 543 static void erofs_managed_cache_invalidatepage(struct page *page, 536 544 unsigned int offset, 537 545 unsigned int length) ··· 570 568 inode->i_size = OFFSET_MAX; 571 569 572 570 inode->i_mapping->a_ops = &managed_cache_aops; 573 573 - mapping_set_gfp_mask(inode->i_mapping, 574 574 - GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE); 571 571 + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 575 572 sbi->managed_cache = inode; 576 573 return 0; 577 574 }

+5 -3

fs/erofs/sysfs.c

reviewed

··· 221 221 { 222 222 struct erofs_sb_info *sbi = EROFS_SB(sb); 223 223 224 224 - kobject_del(&sbi->s_kobj); 225 225 - kobject_put(&sbi->s_kobj); 226 226 - wait_for_completion(&sbi->s_kobj_unregister); 224 224 + if (sbi->s_kobj.state_in_sysfs) { 225 225 + kobject_del(&sbi->s_kobj); 226 226 + kobject_put(&sbi->s_kobj); 227 227 + wait_for_completion(&sbi->s_kobj_unregister); 228 228 + } 227 229 } 228 230 229 231 int __init erofs_init_sysfs(void)

+88 -96

fs/erofs/zdata.c

reviewed

··· 192 192 COLLECT_PRIMARY_FOLLOWED, 193 193 }; 194 194 195 195 - struct z_erofs_collector { 195 195 + struct z_erofs_decompress_frontend { 196 196 + struct inode *const inode; 197 197 + struct erofs_map_blocks map; 198 198 + 196 199 struct z_erofs_pagevec_ctor vector; 197 200 198 201 struct z_erofs_pcluster *pcl, *tailpcl; ··· 205 202 z_erofs_next_pcluster_t owned_head; 206 203 207 204 enum z_erofs_collectmode mode; 208 208 - }; 209 209 - 210 210 - struct z_erofs_decompress_frontend { 211 211 - struct inode *const inode; 212 212 - 213 213 - struct z_erofs_collector clt; 214 214 - struct erofs_map_blocks map; 215 205 216 206 bool readahead; 217 207 /* used for applying cache strategy on the fly */ ··· 212 216 erofs_off_t headoffset; 213 217 }; 214 218 215 215 - #define COLLECTOR_INIT() { \ 216 216 - .owned_head = Z_EROFS_PCLUSTER_TAIL, \ 217 217 - .mode = COLLECT_PRIMARY_FOLLOWED } 218 218 - 219 219 #define DECOMPRESS_FRONTEND_INIT(__i) { \ 220 220 - .inode = __i, .clt = COLLECTOR_INIT(), \ 221 221 - .backmost = true, } 220 220 + .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ 221 221 + .mode = COLLECT_PRIMARY_FOLLOWED } 222 222 223 223 static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES]; 224 224 static DEFINE_MUTEX(z_pagemap_global_lock); 225 225 226 226 - static void preload_compressed_pages(struct z_erofs_collector *clt, 227 227 - struct address_space *mc, 228 228 - enum z_erofs_cache_alloctype type, 229 229 - struct page **pagepool) 226 226 + static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, 227 227 + enum z_erofs_cache_alloctype type, 228 228 + struct page **pagepool) 230 229 { 231 231 - struct z_erofs_pcluster *pcl = clt->pcl; 230 230 + struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); 231 231 + struct z_erofs_pcluster *pcl = fe->pcl; 232 232 bool standalone = true; 233 233 + /* 234 234 + * optimistic allocation without direct reclaim since inplace I/O 235 235 + * can be used if low memory otherwise. 236 236 + */ 233 237 gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | 234 238 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; 235 239 struct page **pages; 236 240 pgoff_t index; 237 241 238 238 - if (clt->mode < COLLECT_PRIMARY_FOLLOWED) 242 242 + if (fe->mode < COLLECT_PRIMARY_FOLLOWED) 239 243 return; 240 244 241 245 pages = pcl->compressed_pages; ··· 284 288 * managed cache since it can be moved to the bypass queue instead. 285 289 */ 286 290 if (standalone) 287 287 - clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; 291 291 + fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; 288 292 } 289 293 290 294 /* called by erofs_shrinker to get rid of all compressed_pages */ ··· 346 350 } 347 351 348 352 /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ 349 349 - static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, 353 353 + static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, 350 354 struct page *page) 351 355 { 352 352 - struct z_erofs_pcluster *const pcl = clt->pcl; 356 356 + struct z_erofs_pcluster *const pcl = fe->pcl; 353 357 354 354 - while (clt->icpage_ptr > pcl->compressed_pages) 355 355 - if (!cmpxchg(--clt->icpage_ptr, NULL, page)) 358 358 + while (fe->icpage_ptr > pcl->compressed_pages) 359 359 + if (!cmpxchg(--fe->icpage_ptr, NULL, page)) 356 360 return true; 357 361 return false; 358 362 } 359 363 360 364 /* callers must be with collection lock held */ 361 361 - static int z_erofs_attach_page(struct z_erofs_collector *clt, 365 365 + static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, 362 366 struct page *page, enum z_erofs_page_type type, 363 367 bool pvec_safereuse) 364 368 { 365 369 int ret; 366 370 367 371 /* give priority for inplaceio */ 368 368 - if (clt->mode >= COLLECT_PRIMARY && 372 372 + if (fe->mode >= COLLECT_PRIMARY && 369 373 type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && 370 370 - z_erofs_try_inplace_io(clt, page)) 374 374 + z_erofs_try_inplace_io(fe, page)) 371 375 return 0; 372 376 373 373 - ret = z_erofs_pagevec_enqueue(&clt->vector, page, type, 377 377 + ret = z_erofs_pagevec_enqueue(&fe->vector, page, type, 374 378 pvec_safereuse); 375 375 - clt->cl->vcnt += (unsigned int)ret; 379 379 + fe->cl->vcnt += (unsigned int)ret; 376 380 return ret ? 0 : -EAGAIN; 377 381 } 378 382 379 379 - static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) 383 383 + static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) 380 384 { 381 381 - struct z_erofs_pcluster *pcl = clt->pcl; 382 382 - z_erofs_next_pcluster_t *owned_head = &clt->owned_head; 385 385 + struct z_erofs_pcluster *pcl = f->pcl; 386 386 + z_erofs_next_pcluster_t *owned_head = &f->owned_head; 383 387 384 388 /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ 385 389 if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, 386 390 *owned_head) == Z_EROFS_PCLUSTER_NIL) { 387 391 *owned_head = &pcl->next; 388 392 /* so we can attach this pcluster to our submission chain. */ 389 389 - clt->mode = COLLECT_PRIMARY_FOLLOWED; 393 393 + f->mode = COLLECT_PRIMARY_FOLLOWED; 390 394 return; 391 395 } 392 396 ··· 397 401 if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, 398 402 *owned_head) == Z_EROFS_PCLUSTER_TAIL) { 399 403 *owned_head = Z_EROFS_PCLUSTER_TAIL; 400 400 - clt->mode = COLLECT_PRIMARY_HOOKED; 401 401 - clt->tailpcl = NULL; 404 404 + f->mode = COLLECT_PRIMARY_HOOKED; 405 405 + f->tailpcl = NULL; 402 406 return; 403 407 } 404 408 /* type 3, it belongs to a chain, but it isn't the end of the chain */ 405 405 - clt->mode = COLLECT_PRIMARY; 409 409 + f->mode = COLLECT_PRIMARY; 406 410 } 407 411 408 408 - static int z_erofs_lookup_collection(struct z_erofs_collector *clt, 412 412 + static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe, 409 413 struct inode *inode, 410 414 struct erofs_map_blocks *map) 411 415 { 412 412 - struct z_erofs_pcluster *pcl = clt->pcl; 416 416 + struct z_erofs_pcluster *pcl = fe->pcl; 413 417 struct z_erofs_collection *cl; 414 418 unsigned int length; 415 419 416 420 /* to avoid unexpected loop formed by corrupted images */ 417 417 - if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) { 421 421 + if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) { 418 422 DBG_BUGON(1); 419 423 return -EFSCORRUPTED; 420 424 } ··· 445 449 } 446 450 mutex_lock(&cl->lock); 447 451 /* used to check tail merging loop due to corrupted images */ 448 448 - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) 449 449 - clt->tailpcl = pcl; 452 452 + if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) 453 453 + fe->tailpcl = pcl; 450 454 451 451 - z_erofs_try_to_claim_pcluster(clt); 452 452 - clt->cl = cl; 455 455 + z_erofs_try_to_claim_pcluster(fe); 456 456 + fe->cl = cl; 453 457 return 0; 454 458 } 455 459 456 456 - static int z_erofs_register_collection(struct z_erofs_collector *clt, 460 460 + static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe, 457 461 struct inode *inode, 458 462 struct erofs_map_blocks *map) 459 463 { ··· 481 485 Z_EROFS_PCLUSTER_FULL_LENGTH : 0); 482 486 483 487 /* new pclusters should be claimed as type 1, primary and followed */ 484 484 - pcl->next = clt->owned_head; 485 485 - clt->mode = COLLECT_PRIMARY_FOLLOWED; 488 488 + pcl->next = fe->owned_head; 489 489 + fe->mode = COLLECT_PRIMARY_FOLLOWED; 486 490 487 491 cl = z_erofs_primarycollection(pcl); 488 492 cl->pageofs = map->m_la & ~PAGE_MASK; ··· 508 512 } 509 513 510 514 if (grp != &pcl->obj) { 511 511 - clt->pcl = container_of(grp, 515 515 + fe->pcl = container_of(grp, 512 516 struct z_erofs_pcluster, obj); 513 517 err = -EEXIST; 514 518 goto err_out; 515 519 } 516 520 } 517 521 /* used to check tail merging loop due to corrupted images */ 518 518 - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) 519 519 - clt->tailpcl = pcl; 520 520 - clt->owned_head = &pcl->next; 521 521 - clt->pcl = pcl; 522 522 - clt->cl = cl; 522 522 + if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) 523 523 + fe->tailpcl = pcl; 524 524 + fe->owned_head = &pcl->next; 525 525 + fe->pcl = pcl; 526 526 + fe->cl = cl; 523 527 return 0; 524 528 525 529 err_out: ··· 528 532 return err; 529 533 } 530 534 531 531 - static int z_erofs_collector_begin(struct z_erofs_collector *clt, 535 535 + static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe, 532 536 struct inode *inode, 533 537 struct erofs_map_blocks *map) 534 538 { 535 539 struct erofs_workgroup *grp; 536 540 int ret; 537 541 538 538 - DBG_BUGON(clt->cl); 542 542 + DBG_BUGON(fe->cl); 539 543 540 544 /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */ 541 541 - DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL); 542 542 - DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); 545 545 + DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); 546 546 + DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); 543 547 544 548 if (map->m_flags & EROFS_MAP_META) { 545 549 if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) { ··· 551 555 552 556 grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT); 553 557 if (grp) { 554 554 - clt->pcl = container_of(grp, struct z_erofs_pcluster, obj); 558 558 + fe->pcl = container_of(grp, struct z_erofs_pcluster, obj); 555 559 } else { 556 560 tailpacking: 557 557 - ret = z_erofs_register_collection(clt, inode, map); 561 561 + ret = z_erofs_register_collection(fe, inode, map); 558 562 if (!ret) 559 563 goto out; 560 564 if (ret != -EEXIST) 561 565 return ret; 562 566 } 563 567 564 564 - ret = z_erofs_lookup_collection(clt, inode, map); 568 568 + ret = z_erofs_lookup_collection(fe, inode, map); 565 569 if (ret) { 566 566 - erofs_workgroup_put(&clt->pcl->obj); 570 570 + erofs_workgroup_put(&fe->pcl->obj); 567 571 return ret; 568 572 } 569 573 570 574 out: 571 571 - z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, 572 572 - clt->cl->pagevec, clt->cl->vcnt); 575 575 + z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS, 576 576 + fe->cl->pagevec, fe->cl->vcnt); 573 577 /* since file-backed online pages are traversed in reverse order */ 574 574 - clt->icpage_ptr = clt->pcl->compressed_pages + 575 575 - z_erofs_pclusterpages(clt->pcl); 578 578 + fe->icpage_ptr = fe->pcl->compressed_pages + 579 579 + z_erofs_pclusterpages(fe->pcl); 576 580 return 0; 577 581 } 578 582 ··· 606 610 erofs_workgroup_put(&pcl->obj); 607 611 } 608 612 609 609 - static bool z_erofs_collector_end(struct z_erofs_collector *clt) 613 613 + static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) 610 614 { 611 611 - struct z_erofs_collection *cl = clt->cl; 615 615 + struct z_erofs_collection *cl = fe->cl; 612 616 613 617 if (!cl) 614 618 return false; 615 619 616 616 - z_erofs_pagevec_ctor_exit(&clt->vector, false); 620 620 + z_erofs_pagevec_ctor_exit(&fe->vector, false); 617 621 mutex_unlock(&cl->lock); 618 622 619 623 /* 620 624 * if all pending pages are added, don't hold its reference 621 625 * any longer if the pcluster isn't hosted by ourselves. 622 626 */ 623 623 - if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) 627 627 + if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) 624 628 z_erofs_collection_put(cl); 625 629 626 626 - clt->cl = NULL; 630 630 + fe->cl = NULL; 627 631 return true; 628 632 } 629 633 ··· 647 651 struct inode *const inode = fe->inode; 648 652 struct erofs_sb_info *const sbi = EROFS_I_SB(inode); 649 653 struct erofs_map_blocks *const map = &fe->map; 650 650 - struct z_erofs_collector *const clt = &fe->clt; 651 654 const loff_t offset = page_offset(page); 652 655 bool tight = true; 653 656 ··· 667 672 if (offset + cur >= map->m_la && 668 673 offset + cur < map->m_la + map->m_llen) { 669 674 /* didn't get a valid collection previously (very rare) */ 670 670 - if (!clt->cl) 675 675 + if (!fe->cl) 671 676 goto restart_now; 672 677 goto hitted; 673 678 } ··· 675 680 /* go ahead the next map_blocks */ 676 681 erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur); 677 682 678 678 - if (z_erofs_collector_end(clt)) 683 683 + if (z_erofs_collector_end(fe)) 679 684 fe->backmost = false; 680 685 681 686 map->m_la = offset + cur; ··· 688 693 if (!(map->m_flags & EROFS_MAP_MAPPED)) 689 694 goto hitted; 690 695 691 691 - err = z_erofs_collector_begin(clt, inode, map); 696 696 + err = z_erofs_collector_begin(fe, inode, map); 692 697 if (err) 693 698 goto err_out; 694 699 695 695 - if (z_erofs_is_inline_pcluster(clt->pcl)) { 700 700 + if (z_erofs_is_inline_pcluster(fe->pcl)) { 696 701 void *mp; 697 702 698 703 mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb, ··· 704 709 goto err_out; 705 710 } 706 711 get_page(fe->map.buf.page); 707 707 - WRITE_ONCE(clt->pcl->compressed_pages[0], fe->map.buf.page); 708 708 - clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; 712 712 + WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page); 713 713 + fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; 709 714 } else { 710 710 - /* preload all compressed pages (can change mode if needed) */ 715 715 + /* bind cache first when cached decompression is preferred */ 711 716 if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, 712 717 map->m_la)) 713 718 cache_strategy = TRYALLOC; 714 719 else 715 720 cache_strategy = DONTALLOC; 716 721 717 717 - preload_compressed_pages(clt, MNGD_MAPPING(sbi), 718 718 - cache_strategy, pagepool); 722 722 + z_erofs_bind_cache(fe, cache_strategy, pagepool); 719 723 } 720 720 - 721 724 hitted: 722 725 /* 723 726 * Ensure the current partial page belongs to this submit chain rather ··· 723 730 * those chains are handled asynchronously thus the page cannot be used 724 731 * for inplace I/O or pagevec (should be processed in strict order.) 725 732 */ 726 726 - tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && 727 727 - clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); 733 733 + tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED && 734 734 + fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); 728 735 729 736 cur = end - min_t(unsigned int, offset + end - map->m_la, end); 730 737 if (!(map->m_flags & EROFS_MAP_MAPPED)) { ··· 739 746 Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); 740 747 741 748 if (cur) 742 742 - tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED); 749 749 + tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED); 743 750 744 751 retry: 745 745 - err = z_erofs_attach_page(clt, page, page_type, 746 746 - clt->mode >= COLLECT_PRIMARY_FOLLOWED); 752 752 + err = z_erofs_attach_page(fe, page, page_type, 753 753 + fe->mode >= COLLECT_PRIMARY_FOLLOWED); 747 754 /* should allocate an additional short-lived page for pagevec */ 748 755 if (err == -EAGAIN) { 749 756 struct page *const newpage = 750 757 alloc_page(GFP_NOFS | __GFP_NOFAIL); 751 758 752 759 set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE); 753 753 - err = z_erofs_attach_page(clt, newpage, 760 760 + err = z_erofs_attach_page(fe, newpage, 754 761 Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); 755 762 if (!err) 756 763 goto retry; ··· 766 773 /* bump up the number of spiltted parts of a page */ 767 774 ++spiltted; 768 775 /* also update nr_pages */ 769 769 - clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1); 776 776 + fe->cl->nr_pages = max_t(pgoff_t, fe->cl->nr_pages, index + 1); 770 777 next_part: 771 778 /* can be used for verification */ 772 779 map->m_llen = offset + cur - map->m_la; ··· 1091 1098 static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, 1092 1099 unsigned int nr, 1093 1100 struct page **pagepool, 1094 1094 - struct address_space *mc, 1095 1095 - gfp_t gfp) 1101 1101 + struct address_space *mc) 1096 1102 { 1097 1103 const pgoff_t index = pcl->obj.index; 1104 1104 + gfp_t gfp = mapping_gfp_mask(mc); 1098 1105 bool tocache = false; 1099 1106 1100 1107 struct address_space *mapping; ··· 1302 1309 z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; 1303 1310 struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; 1304 1311 void *bi_private; 1305 1305 - z_erofs_next_pcluster_t owned_head = f->clt.owned_head; 1312 1312 + z_erofs_next_pcluster_t owned_head = f->owned_head; 1306 1313 /* bio is NULL initially, so no need to initialize last_{index,bdev} */ 1307 1314 pgoff_t last_index; 1308 1315 struct block_device *last_bdev; ··· 1350 1357 struct page *page; 1351 1358 1352 1359 page = pickup_page_for_submission(pcl, i++, pagepool, 1353 1353 - MNGD_MAPPING(sbi), 1354 1354 - GFP_NOFS); 1360 1360 + MNGD_MAPPING(sbi)); 1355 1361 if (!page) 1356 1362 continue; 1357 1363 ··· 1408 1416 { 1409 1417 struct z_erofs_decompressqueue io[NR_JOBQUEUES]; 1410 1418 1411 1411 - if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL) 1419 1419 + if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) 1412 1420 return; 1413 1421 z_erofs_submit_queue(sb, f, pagepool, io, &force_fg); 1414 1422 ··· 1508 1516 err = z_erofs_do_read_page(&f, page, &pagepool); 1509 1517 z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false); 1510 1518 1511 1511 - (void)z_erofs_collector_end(&f.clt); 1519 1519 + (void)z_erofs_collector_end(&f); 1512 1520 1513 1521 /* if some compressed cluster ready, need submit them anyway */ 1514 1522 z_erofs_runqueue(inode->i_sb, &f, &pagepool, ··· 1558 1566 put_page(page); 1559 1567 } 1560 1568 z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false); 1561 1561 - (void)z_erofs_collector_end(&f.clt); 1569 1569 + (void)z_erofs_collector_end(&f); 1562 1570 1563 1571 z_erofs_runqueue(inode->i_sb, &f, &pagepool, 1564 1572 z_erofs_get_sync_decompress_policy(sbi, nr_pages));

+35 -36

fs/erofs/zmap.c

reviewed

··· 431 431 unsigned int lookback_distance) 432 432 { 433 433 struct erofs_inode *const vi = EROFS_I(m->inode); 434 434 - struct erofs_map_blocks *const map = m->map; 435 434 const unsigned int lclusterbits = vi->z_logical_clusterbits; 436 436 - unsigned long lcn = m->lcn; 437 437 - int err; 438 435 439 439 - if (lcn < lookback_distance) { 440 440 - erofs_err(m->inode->i_sb, 441 441 - "bogus lookback distance @ nid %llu", vi->nid); 442 442 - DBG_BUGON(1); 443 443 - return -EFSCORRUPTED; 444 444 - } 436 436 + while (m->lcn >= lookback_distance) { 437 437 + unsigned long lcn = m->lcn - lookback_distance; 438 438 + int err; 445 439 446 446 - /* load extent head logical cluster if needed */ 447 447 - lcn -= lookback_distance; 448 448 - err = z_erofs_load_cluster_from_disk(m, lcn, false); 449 449 - if (err) 450 450 - return err; 440 440 + /* load extent head logical cluster if needed */ 441 441 + err = z_erofs_load_cluster_from_disk(m, lcn, false); 442 442 + if (err) 443 443 + return err; 451 444 452 452 - switch (m->type) { 453 453 - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: 454 454 - if (!m->delta[0]) { 445 445 + switch (m->type) { 446 446 + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: 447 447 + if (!m->delta[0]) { 448 448 + erofs_err(m->inode->i_sb, 449 449 + "invalid lookback distance 0 @ nid %llu", 450 450 + vi->nid); 451 451 + DBG_BUGON(1); 452 452 + return -EFSCORRUPTED; 453 453 + } 454 454 + lookback_distance = m->delta[0]; 455 455 + continue; 456 456 + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: 457 457 + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: 458 458 + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: 459 459 + m->headtype = m->type; 460 460 + m->map->m_la = (lcn << lclusterbits) | m->clusterofs; 461 461 + return 0; 462 462 + default: 455 463 erofs_err(m->inode->i_sb, 456 456 - "invalid lookback distance 0 @ nid %llu", 457 457 - vi->nid); 464 464 + "unknown type %u @ lcn %lu of nid %llu", 465 465 + m->type, lcn, vi->nid); 458 466 DBG_BUGON(1); 459 459 - return -EFSCORRUPTED; 467 467 + return -EOPNOTSUPP; 460 468 } 461 461 - return z_erofs_extent_lookback(m, m->delta[0]); 462 462 - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: 463 463 - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: 464 464 - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: 465 465 - m->headtype = m->type; 466 466 - map->m_la = (lcn << lclusterbits) | m->clusterofs; 467 467 - break; 468 468 - default: 469 469 - erofs_err(m->inode->i_sb, 470 470 - "unknown type %u @ lcn %lu of nid %llu", 471 471 - m->type, lcn, vi->nid); 472 472 - DBG_BUGON(1); 473 473 - return -EOPNOTSUPP; 474 469 } 475 475 - return 0; 470 470 + 471 471 + erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu", 472 472 + vi->nid); 473 473 + DBG_BUGON(1); 474 474 + return -EFSCORRUPTED; 476 475 } 477 476 478 477 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, ··· 493 494 !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) || 494 495 ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) && 495 496 !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { 496 496 - map->m_plen = 1 << lclusterbits; 497 497 + map->m_plen = 1ULL << lclusterbits; 497 498 return 0; 498 499 } 499 500 lcn = m->lcn + 1; ··· 539 540 return -EFSCORRUPTED; 540 541 } 541 542 out: 542 542 - map->m_plen = m->compressedlcs << lclusterbits; 543 543 + map->m_plen = (u64)m->compressedlcs << lclusterbits; 543 544 return 0; 544 545 err_bonus_cblkcnt: 545 546 erofs_err(m->inode->i_sb,