Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'erofs-for-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
"In this cycle, for container use cases, fscache-based shared domain is
introduced [1] so that data blobs in the same domain will be storage
deduplicated and it will also be used for page cache sharing later.

Also, a special packed inode is now introduced to record inode
fragments which keep the tail part of files by Yue Hu [2]. You can
keep arbitary length or (at will) the whole file as a fragment and
then fragments can be optionally compressed in the packed inode
together and even deduplicated for smaller image sizes.

In addition to that, global compressed data deduplication by sharing
partial-referenced pclusters is also supported in this cycle.

Summary:

- Introduce fscache-based domain to share blobs between images

- Support recording fragments in a special packed inode

- Support partial-referenced pclusters for global compressed data
deduplication

- Fix an order >= MAX_ORDER warning due to crafted negative i_size

- Several cleanups"

Link: https://lore.kernel.org/r/20220916085940.89392-1-zhujia.zj@bytedance.com [1]
Link: https://lore.kernel.org/r/cover.1663065968.git.huyue2@coolpad.com [2]

* tag 'erofs-for-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: clean up erofs_iget()
erofs: clean up unnecessary code and comments
erofs: fold in z_erofs_reload_indexes()
erofs: introduce partial-referenced pclusters
erofs: support on-disk compressed fragments data
erofs: support interlaced uncompressed data for compressed files
erofs: clean up .read_folio() and .readahead() in fscache mode
erofs: introduce 'domain_id' mount option
erofs: Support sharing cookies in the same domain
erofs: introduce a pseudo mnt to manage shared cookies
erofs: introduce fscache-based domain
erofs: code clean up for fscache
erofs: use kill_anon_super() to kill super in fscache mode
erofs: fix order >= MAX_ORDER warning due to crafted negative i_size

+652 -300
+28 -19
fs/erofs/decompressor.c
··· 317 317 return ret; 318 318 } 319 319 320 - static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, 321 - struct page **pagepool) 320 + static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, 321 + struct page **pagepool) 322 322 { 323 - const unsigned int nrpages_out = 323 + const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; 324 + const unsigned int outpages = 324 325 PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; 325 326 const unsigned int righthalf = min_t(unsigned int, rq->outputsize, 326 327 PAGE_SIZE - rq->pageofs_out); 327 328 const unsigned int lefthalf = rq->outputsize - righthalf; 329 + const unsigned int interlaced_offset = 330 + rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out; 328 331 unsigned char *src, *dst; 329 332 330 - if (nrpages_out > 2) { 333 + if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) { 331 334 DBG_BUGON(1); 332 - return -EIO; 335 + return -EFSCORRUPTED; 333 336 } 334 337 335 338 if (rq->out[0] == *rq->in) { 336 - DBG_BUGON(nrpages_out != 1); 339 + DBG_BUGON(rq->pageofs_out); 337 340 return 0; 338 341 } 339 342 340 - src = kmap_atomic(*rq->in) + rq->pageofs_in; 343 + src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in; 341 344 if (rq->out[0]) { 342 - dst = kmap_atomic(rq->out[0]); 343 - memcpy(dst + rq->pageofs_out, src, righthalf); 344 - kunmap_atomic(dst); 345 + dst = kmap_local_page(rq->out[0]); 346 + memcpy(dst + rq->pageofs_out, src + interlaced_offset, 347 + righthalf); 348 + kunmap_local(dst); 345 349 } 346 350 347 - if (nrpages_out == 2) { 348 - DBG_BUGON(!rq->out[1]); 349 - if (rq->out[1] == *rq->in) { 351 + if (outpages > inpages) { 352 + DBG_BUGON(!rq->out[outpages - 1]); 353 + if (rq->out[outpages - 1] != rq->in[inpages - 1]) { 354 + dst = kmap_local_page(rq->out[outpages - 1]); 355 + memcpy(dst, interlaced_offset ? src : 356 + (src + righthalf), lefthalf); 357 + kunmap_local(dst); 358 + } else if (!interlaced_offset) { 350 359 memmove(src, src + righthalf, lefthalf); 351 - } else { 352 - dst = kmap_atomic(rq->out[1]); 353 - memcpy(dst, src + righthalf, lefthalf); 354 - kunmap_atomic(dst); 355 360 } 356 361 } 357 - kunmap_atomic(src); 362 + kunmap_local(src); 358 363 return 0; 359 364 } 360 365 361 366 static struct z_erofs_decompressor decompressors[] = { 362 367 [Z_EROFS_COMPRESSION_SHIFTED] = { 363 - .decompress = z_erofs_shifted_transform, 368 + .decompress = z_erofs_transform_plain, 364 369 .name = "shifted" 370 + }, 371 + [Z_EROFS_COMPRESSION_INTERLACED] = { 372 + .decompress = z_erofs_transform_plain, 373 + .name = "interlaced" 365 374 }, 366 375 [Z_EROFS_COMPRESSION_LZ4] = { 367 376 .decompress = z_erofs_lz4_decompress,
+3
fs/erofs/decompressor_lzma.c
··· 217 217 strm->buf.out_size = min_t(u32, outlen, 218 218 PAGE_SIZE - pageofs); 219 219 outlen -= strm->buf.out_size; 220 + if (!rq->out[no] && rq->fillgaps) /* deduped */ 221 + rq->out[no] = erofs_allocpage(pagepool, 222 + GFP_KERNEL | __GFP_NOFAIL); 220 223 if (rq->out[no]) 221 224 strm->buf.out = kmap(rq->out[no]) + pageofs; 222 225 pageofs = 0;
+34 -6
fs/erofs/erofs_fs.h
··· 25 25 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 26 26 #define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 27 27 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010 28 + #define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 29 + #define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 28 30 #define EROFS_ALL_FEATURE_INCOMPAT \ 29 31 (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ 30 32 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ ··· 34 32 EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ 35 33 EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ 36 34 EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ 37 - EROFS_FEATURE_INCOMPAT_ZTAILPACKING) 35 + EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ 36 + EROFS_FEATURE_INCOMPAT_FRAGMENTS | \ 37 + EROFS_FEATURE_INCOMPAT_DEDUPE) 38 38 39 39 #define EROFS_SB_EXTSLOT_SIZE 16 40 40 ··· 75 71 } __packed u1; 76 72 __le16 extra_devices; /* # of devices besides the primary device */ 77 73 __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ 78 - __u8 reserved2[38]; 74 + __u8 reserved[6]; 75 + __le64 packed_nid; /* nid of the special packed inode */ 76 + __u8 reserved2[24]; 79 77 }; 80 78 81 79 /* ··· 301 295 * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) 302 296 * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) 303 297 * bit 3 : tailpacking inline pcluster (0 - off; 1 - on) 298 + * bit 4 : interlaced plain pcluster (0 - off; 1 - on) 299 + * bit 5 : fragment pcluster (0 - off; 1 - on) 304 300 */ 305 301 #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 306 302 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 307 303 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 308 304 #define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008 305 + #define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010 306 + #define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020 309 307 308 + #define Z_EROFS_FRAGMENT_INODE_BIT 7 310 309 struct z_erofs_map_header { 311 - __le16 h_reserved1; 312 - /* indicates the encoded size of tailpacking data */ 313 - __le16 h_idata_size; 310 + union { 311 + /* fragment data offset in the packed inode */ 312 + __le32 h_fragmentoff; 313 + struct { 314 + __le16 h_reserved1; 315 + /* indicates the encoded size of tailpacking data */ 316 + __le16 h_idata_size; 317 + }; 318 + }; 314 319 __le16 h_advise; 315 320 /* 316 321 * bit 0-3 : algorithm type of head 1 (logical cluster type 01); ··· 330 313 __u8 h_algorithmtype; 331 314 /* 332 315 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; 333 - * bit 3-7 : reserved. 316 + * bit 3-6 : reserved; 317 + * bit 7 : move the whole file into packed inode or not. 334 318 */ 335 319 __u8 h_clusterbits; 336 320 }; ··· 372 354 373 355 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 374 356 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 357 + 358 + /* (noncompact only, HEAD) This pcluster refers to partial decompressed data */ 359 + #define Z_EROFS_VLE_DI_PARTIAL_REF (1 << 15) 375 360 376 361 /* 377 362 * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the ··· 423 402 /* check the EROFS on-disk layout strictly at compile time */ 424 403 static inline void erofs_check_ondisk_layout_definitions(void) 425 404 { 405 + const __le64 fmh = *(__le64 *)&(struct z_erofs_map_header) { 406 + .h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT 407 + }; 408 + 426 409 BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); 427 410 BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); 428 411 BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); ··· 444 419 445 420 BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < 446 421 Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); 422 + /* exclude old compiler versions like gcc 7.5.0 */ 423 + BUILD_BUG_ON(__builtin_constant_p(fmh) ? 424 + fmh != cpu_to_le64(1ULL << 63) : 0); 447 425 } 448 426 449 427 #endif
+313 -166
fs/erofs/fscache.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 2 /* 3 3 * Copyright (C) 2022, Alibaba Cloud 4 + * Copyright (C) 2022, Bytedance Inc. All rights reserved. 4 5 */ 5 6 #include <linux/fscache.h> 6 7 #include "internal.h" 8 + 9 + static DEFINE_MUTEX(erofs_domain_list_lock); 10 + static DEFINE_MUTEX(erofs_domain_cookies_lock); 11 + static LIST_HEAD(erofs_domain_list); 12 + static struct vfsmount *erofs_pseudo_mnt; 7 13 8 14 static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, 9 15 loff_t start, size_t len) ··· 240 234 return ret; 241 235 } 242 236 243 - static int erofs_fscache_read_folio_inline(struct folio *folio, 244 - struct erofs_map_blocks *map) 237 + /* 238 + * Read into page cache in the range described by (@pos, @len). 239 + * 240 + * On return, the caller is responsible for page unlocking if the output @unlock 241 + * is true, or the callee will take this responsibility through netfs_io_request 242 + * interface. 243 + * 244 + * The return value is the number of bytes successfully handled, or negative 245 + * error code on failure. The only exception is that, the length of the range 246 + * instead of the error code is returned on failure after netfs_io_request is 247 + * allocated, so that .readahead() could advance rac accordingly. 248 + */ 249 + static int erofs_fscache_data_read(struct address_space *mapping, 250 + loff_t pos, size_t len, bool *unlock) 245 251 { 246 - struct super_block *sb = folio_mapping(folio)->host->i_sb; 247 - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 248 - erofs_blk_t blknr; 249 - size_t offset, len; 250 - void *src, *dst; 251 - 252 - /* For tail packing layout, the offset may be non-zero. */ 253 - offset = erofs_blkoff(map->m_pa); 254 - blknr = erofs_blknr(map->m_pa); 255 - len = map->m_llen; 256 - 257 - src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 258 - if (IS_ERR(src)) 259 - return PTR_ERR(src); 260 - 261 - dst = kmap_local_folio(folio, 0); 262 - memcpy(dst, src + offset, len); 263 - memset(dst + len, 0, PAGE_SIZE - len); 264 - kunmap_local(dst); 265 - 266 - erofs_put_metabuf(&buf); 267 - return 0; 268 - } 269 - 270 - static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 271 - { 272 - struct inode *inode = folio_mapping(folio)->host; 252 + struct inode *inode = mapping->host; 273 253 struct super_block *sb = inode->i_sb; 254 + struct netfs_io_request *rreq; 274 255 struct erofs_map_blocks map; 275 256 struct erofs_map_dev mdev; 276 - struct netfs_io_request *rreq; 277 - erofs_off_t pos; 278 - loff_t pstart; 257 + struct iov_iter iter; 258 + size_t count; 279 259 int ret; 280 260 281 - DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); 261 + *unlock = true; 282 262 283 - pos = folio_pos(folio); 284 263 map.m_la = pos; 285 - 286 264 ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 287 265 if (ret) 288 - goto out_unlock; 289 - 290 - if (!(map.m_flags & EROFS_MAP_MAPPED)) { 291 - folio_zero_range(folio, 0, folio_size(folio)); 292 - goto out_uptodate; 293 - } 266 + return ret; 294 267 295 268 if (map.m_flags & EROFS_MAP_META) { 296 - ret = erofs_fscache_read_folio_inline(folio, &map); 297 - goto out_uptodate; 269 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 270 + erofs_blk_t blknr; 271 + size_t offset, size; 272 + void *src; 273 + 274 + /* For tail packing layout, the offset may be non-zero. */ 275 + offset = erofs_blkoff(map.m_pa); 276 + blknr = erofs_blknr(map.m_pa); 277 + size = map.m_llen; 278 + 279 + src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 280 + if (IS_ERR(src)) 281 + return PTR_ERR(src); 282 + 283 + iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); 284 + if (copy_to_iter(src + offset, size, &iter) != size) 285 + return -EFAULT; 286 + iov_iter_zero(PAGE_SIZE - size, &iter); 287 + erofs_put_metabuf(&buf); 288 + return PAGE_SIZE; 289 + } 290 + 291 + count = min_t(size_t, map.m_llen - (pos - map.m_la), len); 292 + DBG_BUGON(!count || count % PAGE_SIZE); 293 + 294 + if (!(map.m_flags & EROFS_MAP_MAPPED)) { 295 + iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); 296 + iov_iter_zero(count, &iter); 297 + return count; 298 298 } 299 299 300 300 mdev = (struct erofs_map_dev) { 301 301 .m_deviceid = map.m_deviceid, 302 302 .m_pa = map.m_pa, 303 303 }; 304 - 305 304 ret = erofs_map_dev(sb, &mdev); 306 305 if (ret) 307 - goto out_unlock; 306 + return ret; 308 307 308 + rreq = erofs_fscache_alloc_request(mapping, pos, count); 309 + if (IS_ERR(rreq)) 310 + return PTR_ERR(rreq); 309 311 310 - rreq = erofs_fscache_alloc_request(folio_mapping(folio), 311 - folio_pos(folio), folio_size(folio)); 312 - if (IS_ERR(rreq)) { 313 - ret = PTR_ERR(rreq); 314 - goto out_unlock; 315 - } 316 - 317 - pstart = mdev.m_pa + (pos - map.m_la); 318 - return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 319 - rreq, pstart); 320 - 321 - out_uptodate: 322 - if (!ret) 323 - folio_mark_uptodate(folio); 324 - out_unlock: 325 - folio_unlock(folio); 326 - return ret; 312 + *unlock = false; 313 + erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 314 + rreq, mdev.m_pa + (pos - map.m_la)); 315 + return count; 327 316 } 328 317 329 - static void erofs_fscache_advance_folios(struct readahead_control *rac, 330 - size_t len, bool unlock) 318 + static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 331 319 { 332 - while (len) { 333 - struct folio *folio = readahead_folio(rac); 334 - len -= folio_size(folio); 335 - if (unlock) { 320 + bool unlock; 321 + int ret; 322 + 323 + DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); 324 + 325 + ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), 326 + folio_size(folio), &unlock); 327 + if (unlock) { 328 + if (ret > 0) 336 329 folio_mark_uptodate(folio); 337 - folio_unlock(folio); 338 - } 330 + folio_unlock(folio); 339 331 } 332 + return ret < 0 ? ret : 0; 340 333 } 341 334 342 335 static void erofs_fscache_readahead(struct readahead_control *rac) 343 336 { 344 - struct inode *inode = rac->mapping->host; 345 - struct super_block *sb = inode->i_sb; 346 - size_t len, count, done = 0; 347 - erofs_off_t pos; 348 - loff_t start, offset; 349 - int ret; 337 + struct folio *folio; 338 + size_t len, done = 0; 339 + loff_t start, pos; 340 + bool unlock; 341 + int ret, size; 350 342 351 343 if (!readahead_count(rac)) 352 344 return; ··· 353 349 len = readahead_length(rac); 354 350 355 351 do { 356 - struct erofs_map_blocks map; 357 - struct erofs_map_dev mdev; 358 - struct netfs_io_request *rreq; 359 - 360 352 pos = start + done; 361 - map.m_la = pos; 362 - 363 - ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 364 - if (ret) 353 + ret = erofs_fscache_data_read(rac->mapping, pos, 354 + len - done, &unlock); 355 + if (ret <= 0) 365 356 return; 366 357 367 - offset = start + done; 368 - count = min_t(size_t, map.m_llen - (pos - map.m_la), 369 - len - done); 370 - 371 - if (!(map.m_flags & EROFS_MAP_MAPPED)) { 372 - struct iov_iter iter; 373 - 374 - iov_iter_xarray(&iter, READ, &rac->mapping->i_pages, 375 - offset, count); 376 - iov_iter_zero(count, &iter); 377 - 378 - erofs_fscache_advance_folios(rac, count, true); 379 - ret = count; 380 - continue; 381 - } 382 - 383 - if (map.m_flags & EROFS_MAP_META) { 384 - struct folio *folio = readahead_folio(rac); 385 - 386 - ret = erofs_fscache_read_folio_inline(folio, &map); 387 - if (!ret) { 358 + size = ret; 359 + while (size) { 360 + folio = readahead_folio(rac); 361 + size -= folio_size(folio); 362 + if (unlock) { 388 363 folio_mark_uptodate(folio); 389 - ret = folio_size(folio); 364 + folio_unlock(folio); 390 365 } 391 - 392 - folio_unlock(folio); 393 - continue; 394 366 } 395 - 396 - mdev = (struct erofs_map_dev) { 397 - .m_deviceid = map.m_deviceid, 398 - .m_pa = map.m_pa, 399 - }; 400 - ret = erofs_map_dev(sb, &mdev); 401 - if (ret) 402 - return; 403 - 404 - rreq = erofs_fscache_alloc_request(rac->mapping, offset, count); 405 - if (IS_ERR(rreq)) 406 - return; 407 - /* 408 - * Drop the ref of folios here. Unlock them in 409 - * rreq_unlock_folios() when rreq complete. 410 - */ 411 - erofs_fscache_advance_folios(rac, count, false); 412 - ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 413 - rreq, mdev.m_pa + (pos - map.m_la)); 414 - if (!ret) 415 - ret = count; 416 - } while (ret > 0 && ((done += ret) < len)); 367 + } while ((done += ret) < len); 417 368 } 418 369 419 370 static const struct address_space_operations erofs_fscache_meta_aops = { ··· 380 421 .readahead = erofs_fscache_readahead, 381 422 }; 382 423 383 - int erofs_fscache_register_cookie(struct super_block *sb, 384 - struct erofs_fscache **fscache, 385 - char *name, bool need_inode) 424 + static void erofs_fscache_domain_put(struct erofs_domain *domain) 425 + { 426 + if (!domain) 427 + return; 428 + mutex_lock(&erofs_domain_list_lock); 429 + if (refcount_dec_and_test(&domain->ref)) { 430 + list_del(&domain->list); 431 + if (list_empty(&erofs_domain_list)) { 432 + kern_unmount(erofs_pseudo_mnt); 433 + erofs_pseudo_mnt = NULL; 434 + } 435 + mutex_unlock(&erofs_domain_list_lock); 436 + fscache_relinquish_volume(domain->volume, NULL, false); 437 + kfree(domain->domain_id); 438 + kfree(domain); 439 + return; 440 + } 441 + mutex_unlock(&erofs_domain_list_lock); 442 + } 443 + 444 + static int erofs_fscache_register_volume(struct super_block *sb) 445 + { 446 + struct erofs_sb_info *sbi = EROFS_SB(sb); 447 + char *domain_id = sbi->opt.domain_id; 448 + struct fscache_volume *volume; 449 + char *name; 450 + int ret = 0; 451 + 452 + name = kasprintf(GFP_KERNEL, "erofs,%s", 453 + domain_id ? domain_id : sbi->opt.fsid); 454 + if (!name) 455 + return -ENOMEM; 456 + 457 + volume = fscache_acquire_volume(name, NULL, NULL, 0); 458 + if (IS_ERR_OR_NULL(volume)) { 459 + erofs_err(sb, "failed to register volume for %s", name); 460 + ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 461 + volume = NULL; 462 + } 463 + 464 + sbi->volume = volume; 465 + kfree(name); 466 + return ret; 467 + } 468 + 469 + static int erofs_fscache_init_domain(struct super_block *sb) 470 + { 471 + int err; 472 + struct erofs_domain *domain; 473 + struct erofs_sb_info *sbi = EROFS_SB(sb); 474 + 475 + domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 476 + if (!domain) 477 + return -ENOMEM; 478 + 479 + domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL); 480 + if (!domain->domain_id) { 481 + kfree(domain); 482 + return -ENOMEM; 483 + } 484 + 485 + err = erofs_fscache_register_volume(sb); 486 + if (err) 487 + goto out; 488 + 489 + if (!erofs_pseudo_mnt) { 490 + erofs_pseudo_mnt = kern_mount(&erofs_fs_type); 491 + if (IS_ERR(erofs_pseudo_mnt)) { 492 + err = PTR_ERR(erofs_pseudo_mnt); 493 + goto out; 494 + } 495 + } 496 + 497 + domain->volume = sbi->volume; 498 + refcount_set(&domain->ref, 1); 499 + list_add(&domain->list, &erofs_domain_list); 500 + sbi->domain = domain; 501 + return 0; 502 + out: 503 + kfree(domain->domain_id); 504 + kfree(domain); 505 + return err; 506 + } 507 + 508 + static int erofs_fscache_register_domain(struct super_block *sb) 509 + { 510 + int err; 511 + struct erofs_domain *domain; 512 + struct erofs_sb_info *sbi = EROFS_SB(sb); 513 + 514 + mutex_lock(&erofs_domain_list_lock); 515 + list_for_each_entry(domain, &erofs_domain_list, list) { 516 + if (!strcmp(domain->domain_id, sbi->opt.domain_id)) { 517 + sbi->domain = domain; 518 + sbi->volume = domain->volume; 519 + refcount_inc(&domain->ref); 520 + mutex_unlock(&erofs_domain_list_lock); 521 + return 0; 522 + } 523 + } 524 + err = erofs_fscache_init_domain(sb); 525 + mutex_unlock(&erofs_domain_list_lock); 526 + return err; 527 + } 528 + 529 + static 530 + struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 531 + char *name, bool need_inode) 386 532 { 387 533 struct fscache_volume *volume = EROFS_SB(sb)->volume; 388 534 struct erofs_fscache *ctx; ··· 496 432 497 433 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 498 434 if (!ctx) 499 - return -ENOMEM; 435 + return ERR_PTR(-ENOMEM); 500 436 501 437 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 502 438 name, strlen(name), NULL, 0, 0); ··· 526 462 ctx->inode = inode; 527 463 } 528 464 529 - *fscache = ctx; 530 - return 0; 465 + return ctx; 531 466 532 467 err_cookie: 533 468 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 534 469 fscache_relinquish_cookie(ctx->cookie, false); 535 - ctx->cookie = NULL; 536 470 err: 537 471 kfree(ctx); 538 - return ret; 472 + return ERR_PTR(ret); 539 473 } 540 474 541 - void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) 475 + static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 542 476 { 543 - struct erofs_fscache *ctx = *fscache; 477 + fscache_unuse_cookie(ctx->cookie, NULL, NULL); 478 + fscache_relinquish_cookie(ctx->cookie, false); 479 + iput(ctx->inode); 480 + kfree(ctx->name); 481 + kfree(ctx); 482 + } 483 + 484 + static 485 + struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, 486 + char *name, bool need_inode) 487 + { 488 + int err; 489 + struct inode *inode; 490 + struct erofs_fscache *ctx; 491 + struct erofs_domain *domain = EROFS_SB(sb)->domain; 492 + 493 + ctx = erofs_fscache_acquire_cookie(sb, name, need_inode); 494 + if (IS_ERR(ctx)) 495 + return ctx; 496 + 497 + ctx->name = kstrdup(name, GFP_KERNEL); 498 + if (!ctx->name) { 499 + err = -ENOMEM; 500 + goto out; 501 + } 502 + 503 + inode = new_inode(erofs_pseudo_mnt->mnt_sb); 504 + if (!inode) { 505 + err = -ENOMEM; 506 + goto out; 507 + } 508 + 509 + ctx->domain = domain; 510 + ctx->anon_inode = inode; 511 + inode->i_private = ctx; 512 + refcount_inc(&domain->ref); 513 + return ctx; 514 + out: 515 + erofs_fscache_relinquish_cookie(ctx); 516 + return ERR_PTR(err); 517 + } 518 + 519 + static 520 + struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 521 + char *name, bool need_inode) 522 + { 523 + struct inode *inode; 524 + struct erofs_fscache *ctx; 525 + struct erofs_domain *domain = EROFS_SB(sb)->domain; 526 + struct super_block *psb = erofs_pseudo_mnt->mnt_sb; 527 + 528 + mutex_lock(&erofs_domain_cookies_lock); 529 + list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { 530 + ctx = inode->i_private; 531 + if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) 532 + continue; 533 + igrab(inode); 534 + mutex_unlock(&erofs_domain_cookies_lock); 535 + return ctx; 536 + } 537 + ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); 538 + mutex_unlock(&erofs_domain_cookies_lock); 539 + return ctx; 540 + } 541 + 542 + struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 543 + char *name, bool need_inode) 544 + { 545 + if (EROFS_SB(sb)->opt.domain_id) 546 + return erofs_domain_register_cookie(sb, name, need_inode); 547 + return erofs_fscache_acquire_cookie(sb, name, need_inode); 548 + } 549 + 550 + void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 551 + { 552 + bool drop; 553 + struct erofs_domain *domain; 544 554 545 555 if (!ctx) 546 556 return; 557 + domain = ctx->domain; 558 + if (domain) { 559 + mutex_lock(&erofs_domain_cookies_lock); 560 + drop = atomic_read(&ctx->anon_inode->i_count) == 1; 561 + iput(ctx->anon_inode); 562 + mutex_unlock(&erofs_domain_cookies_lock); 563 + if (!drop) 564 + return; 565 + } 547 566 548 - fscache_unuse_cookie(ctx->cookie, NULL, NULL); 549 - fscache_relinquish_cookie(ctx->cookie, false); 550 - ctx->cookie = NULL; 551 - 552 - iput(ctx->inode); 553 - ctx->inode = NULL; 554 - 555 - kfree(ctx); 556 - *fscache = NULL; 567 + erofs_fscache_relinquish_cookie(ctx); 568 + erofs_fscache_domain_put(domain); 557 569 } 558 570 559 571 int erofs_fscache_register_fs(struct super_block *sb) 560 572 { 573 + int ret; 561 574 struct erofs_sb_info *sbi = EROFS_SB(sb); 562 - struct fscache_volume *volume; 563 - char *name; 564 - int ret = 0; 575 + struct erofs_fscache *fscache; 565 576 566 - name = kasprintf(GFP_KERNEL, "erofs,%s", sbi->opt.fsid); 567 - if (!name) 568 - return -ENOMEM; 577 + if (sbi->opt.domain_id) 578 + ret = erofs_fscache_register_domain(sb); 579 + else 580 + ret = erofs_fscache_register_volume(sb); 581 + if (ret) 582 + return ret; 569 583 570 - volume = fscache_acquire_volume(name, NULL, NULL, 0); 571 - if (IS_ERR_OR_NULL(volume)) { 572 - erofs_err(sb, "failed to register volume for %s", name); 573 - ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 574 - volume = NULL; 575 - } 584 + /* acquired domain/volume will be relinquished in kill_sb() on error */ 585 + fscache = erofs_fscache_register_cookie(sb, sbi->opt.fsid, true); 586 + if (IS_ERR(fscache)) 587 + return PTR_ERR(fscache); 576 588 577 - sbi->volume = volume; 578 - kfree(name); 579 - return ret; 589 + sbi->s_fscache = fscache; 590 + return 0; 580 591 } 581 592 582 593 void erofs_fscache_unregister_fs(struct super_block *sb) 583 594 { 584 595 struct erofs_sb_info *sbi = EROFS_SB(sb); 585 596 586 - fscache_relinquish_volume(sbi->volume, NULL, false); 597 + erofs_fscache_unregister_cookie(sbi->s_fscache); 598 + 599 + if (sbi->domain) 600 + erofs_fscache_domain_put(sbi->domain); 601 + else 602 + fscache_relinquish_volume(sbi->volume, NULL, false); 603 + 604 + sbi->s_fscache = NULL; 587 605 sbi->volume = NULL; 606 + sbi->domain = NULL; 588 607 }
+9 -17
fs/erofs/inode.c
··· 214 214 215 215 /* if it cannot be handled with fast symlink scheme */ 216 216 if (vi->datalayout != EROFS_INODE_FLAT_INLINE || 217 - inode->i_size >= EROFS_BLKSIZ) { 217 + inode->i_size >= EROFS_BLKSIZ || inode->i_size < 0) { 218 218 inode->i_op = &erofs_symlink_iops; 219 219 return 0; 220 220 } ··· 241 241 return 0; 242 242 } 243 243 244 - static int erofs_fill_inode(struct inode *inode, int isdir) 244 + static int erofs_fill_inode(struct inode *inode) 245 245 { 246 246 struct erofs_inode *vi = EROFS_I(inode); 247 247 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; ··· 249 249 unsigned int ofs; 250 250 int err = 0; 251 251 252 - trace_erofs_fill_inode(inode, isdir); 252 + trace_erofs_fill_inode(inode); 253 253 254 254 /* read inode base data from disk */ 255 255 kaddr = erofs_read_inode(&buf, inode, &ofs); ··· 324 324 return 0; 325 325 } 326 326 327 - static inline struct inode *erofs_iget_locked(struct super_block *sb, 328 - erofs_nid_t nid) 327 + struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid) 329 328 { 330 329 const unsigned long hashval = erofs_inode_hash(nid); 330 + struct inode *inode; 331 331 332 - return iget5_locked(sb, hashval, erofs_ilookup_test_actor, 332 + inode = iget5_locked(sb, hashval, erofs_ilookup_test_actor, 333 333 erofs_iget_set_actor, &nid); 334 - } 335 - 336 - struct inode *erofs_iget(struct super_block *sb, 337 - erofs_nid_t nid, 338 - bool isdir) 339 - { 340 - struct inode *inode = erofs_iget_locked(sb, nid); 341 - 342 334 if (!inode) 343 335 return ERR_PTR(-ENOMEM); 344 336 ··· 340 348 341 349 vi->nid = nid; 342 350 343 - err = erofs_fill_inode(inode, isdir); 344 - if (!err) 351 + err = erofs_fill_inode(inode); 352 + if (!err) { 345 353 unlock_new_inode(inode); 346 - else { 354 + } else { 347 355 iget_failed(inode); 348 356 inode = ERR_PTR(err); 349 357 }
+41 -16
fs/erofs/internal.h
··· 76 76 #endif 77 77 unsigned int mount_opt; 78 78 char *fsid; 79 + char *domain_id; 79 80 }; 80 81 81 82 struct erofs_dev_context { ··· 99 98 u16 max_pclusterblks; 100 99 }; 101 100 101 + struct erofs_domain { 102 + refcount_t ref; 103 + struct list_head list; 104 + struct fscache_volume *volume; 105 + char *domain_id; 106 + }; 107 + 102 108 struct erofs_fscache { 103 109 struct fscache_cookie *cookie; 104 110 struct inode *inode; 111 + struct inode *anon_inode; 112 + struct erofs_domain *domain; 113 + char *name; 105 114 }; 106 115 107 116 struct erofs_sb_info { ··· 131 120 struct inode *managed_cache; 132 121 133 122 struct erofs_sb_lz4_info lz4; 123 + struct inode *packed_inode; 134 124 #endif /* CONFIG_EROFS_FS_ZIP */ 135 125 struct erofs_dev_context *devs; 136 126 struct dax_device *dax_dev; ··· 169 157 /* fscache support */ 170 158 struct fscache_volume *volume; 171 159 struct erofs_fscache *s_fscache; 160 + struct erofs_domain *domain; 172 161 }; 173 162 174 163 #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) ··· 196 183 EROFS_ZIP_CACHE_READAROUND 197 184 }; 198 185 199 - #ifdef CONFIG_EROFS_FS_ZIP 200 186 #define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) 201 187 202 188 /* basic unit of the workstation of a super_block */ ··· 235 223 return atomic_cond_read_relaxed(&grp->refcount, 236 224 VAL != EROFS_LOCKED_MAGIC); 237 225 } 238 - #endif /* !CONFIG_EROFS_FS_ZIP */ 239 226 240 227 /* we strictly follow PAGE_SIZE and no buffer head yet */ 241 228 #define LOG_BLOCK_SIZE PAGE_SHIFT ··· 288 277 EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) 289 278 EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2) 290 279 EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) 280 + EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) 281 + EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) 291 282 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) 292 283 293 284 /* atomic flag definitions */ ··· 325 312 unsigned char z_algorithmtype[2]; 326 313 unsigned char z_logical_clusterbits; 327 314 unsigned long z_tailextent_headlcn; 328 - erofs_off_t z_idataoff; 329 - unsigned short z_idata_size; 315 + union { 316 + struct { 317 + erofs_off_t z_idataoff; 318 + unsigned short z_idata_size; 319 + }; 320 + erofs_off_t z_fragmentoff; 321 + }; 330 322 }; 331 323 #endif /* CONFIG_EROFS_FS_ZIP */ 332 324 }; ··· 382 364 } 383 365 384 366 extern const struct super_operations erofs_sops; 367 + extern struct file_system_type erofs_fs_type; 385 368 386 369 extern const struct address_space_operations erofs_raw_access_aops; 387 370 extern const struct address_space_operations z_erofs_aops; ··· 390 371 enum { 391 372 BH_Encoded = BH_PrivateStart, 392 373 BH_FullMapped, 374 + BH_Fragment, 375 + BH_Partialref, 393 376 }; 394 377 395 378 /* Has a disk mapping */ ··· 402 381 #define EROFS_MAP_ENCODED (1 << BH_Encoded) 403 382 /* The length of extent is full */ 404 383 #define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) 384 + /* Located in the special packed inode */ 385 + #define EROFS_MAP_FRAGMENT (1 << BH_Fragment) 386 + /* The extent refers to partial decompressed data */ 387 + #define EROFS_MAP_PARTIAL_REF (1 << BH_Partialref) 405 388 406 389 struct erofs_map_blocks { 407 390 struct erofs_buf buf; ··· 427 402 #define EROFS_GET_BLOCKS_FIEMAP 0x0002 428 403 /* Used to map the whole extent if non-negligible data is requested for LZMA */ 429 404 #define EROFS_GET_BLOCKS_READMORE 0x0004 430 - /* Used to map tail extent for tailpacking inline pcluster */ 405 + /* Used to map tail extent for tailpacking inline or fragment pcluster */ 431 406 #define EROFS_GET_BLOCKS_FINDTAIL 0x0008 432 407 433 408 enum { 434 409 Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, 410 + Z_EROFS_COMPRESSION_INTERLACED, 435 411 Z_EROFS_COMPRESSION_RUNTIME_MAX 436 412 }; 437 413 ··· 492 466 extern const struct inode_operations erofs_symlink_iops; 493 467 extern const struct inode_operations erofs_fast_symlink_iops; 494 468 495 - struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir); 469 + struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); 496 470 int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, 497 471 struct kstat *stat, u32 request_mask, 498 472 unsigned int query_flags); ··· 607 581 int erofs_fscache_register_fs(struct super_block *sb); 608 582 void erofs_fscache_unregister_fs(struct super_block *sb); 609 583 610 - int erofs_fscache_register_cookie(struct super_block *sb, 611 - struct erofs_fscache **fscache, 612 - char *name, bool need_inode); 613 - void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache); 584 + struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 585 + char *name, bool need_inode); 586 + void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); 614 587 615 588 extern const struct address_space_operations erofs_fscache_access_aops; 616 589 #else 617 590 static inline int erofs_fscache_register_fs(struct super_block *sb) 618 591 { 619 - return 0; 592 + return -EOPNOTSUPP; 620 593 } 621 594 static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} 622 595 623 - static inline int erofs_fscache_register_cookie(struct super_block *sb, 624 - struct erofs_fscache **fscache, 625 - char *name, bool need_inode) 596 + static inline 597 + struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 598 + char *name, bool need_inode) 626 599 { 627 - return -EOPNOTSUPP; 600 + return ERR_PTR(-EOPNOTSUPP); 628 601 } 629 602 630 - static inline void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) 603 + static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache) 631 604 { 632 605 } 633 606 #endif
+2 -11
fs/erofs/namei.c
··· 185 185 if (IS_ERR(de)) 186 186 return PTR_ERR(de); 187 187 188 - /* the target page has been mapped */ 189 188 if (ndirents) 190 189 de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents); 191 190 ··· 196 197 return PTR_ERR_OR_ZERO(de); 197 198 } 198 199 199 - /* NOTE: i_mutex is already held by vfs */ 200 - static struct dentry *erofs_lookup(struct inode *dir, 201 - struct dentry *dentry, 200 + static struct dentry *erofs_lookup(struct inode *dir, struct dentry *dentry, 202 201 unsigned int flags) 203 202 { 204 203 int err; ··· 204 207 unsigned int d_type; 205 208 struct inode *inode; 206 209 207 - DBG_BUGON(!d_really_is_negative(dentry)); 208 - /* dentry must be unhashed in lookup, no need to worry about */ 209 - DBG_BUGON(!d_unhashed(dentry)); 210 - 211 210 trace_erofs_lookup(dir, dentry, flags); 212 211 213 - /* file name exceeds fs limit */ 214 212 if (dentry->d_name.len > EROFS_NAME_LEN) 215 213 return ERR_PTR(-ENAMETOOLONG); 216 214 217 - /* false uninitialized warnings on gcc 4.8.x */ 218 215 err = erofs_namei(dir, &dentry->d_name, &nid, &d_type); 219 216 220 217 if (err == -ENOENT) { ··· 219 228 } else { 220 229 erofs_dbg("%s, %pd (nid %llu) found, d_type %u", __func__, 221 230 dentry, nid, d_type); 222 - inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR); 231 + inode = erofs_iget(dir->i_sb, nid); 223 232 } 224 233 return d_splice_alias(inode, dentry); 225 234 }
+77 -19
fs/erofs/super.c
··· 224 224 struct erofs_device_info *dif, erofs_off_t *pos) 225 225 { 226 226 struct erofs_sb_info *sbi = EROFS_SB(sb); 227 + struct erofs_fscache *fscache; 227 228 struct erofs_deviceslot *dis; 228 229 struct block_device *bdev; 229 230 void *ptr; 230 - int ret; 231 231 232 232 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); 233 233 if (IS_ERR(ptr)) ··· 245 245 } 246 246 247 247 if (erofs_is_fscache_mode(sb)) { 248 - ret = erofs_fscache_register_cookie(sb, &dif->fscache, 249 - dif->path, false); 250 - if (ret) 251 - return ret; 248 + fscache = erofs_fscache_register_cookie(sb, dif->path, false); 249 + if (IS_ERR(fscache)) 250 + return PTR_ERR(fscache); 251 + dif->fscache = fscache; 252 252 } else { 253 253 bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, 254 254 sb->s_type); ··· 381 381 #endif 382 382 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 383 383 sbi->root_nid = le16_to_cpu(dsb->root_nid); 384 + #ifdef CONFIG_EROFS_FS_ZIP 385 + sbi->packed_inode = NULL; 386 + if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) { 387 + sbi->packed_inode = 388 + erofs_iget(sb, le64_to_cpu(dsb->packed_nid)); 389 + if (IS_ERR(sbi->packed_inode)) { 390 + ret = PTR_ERR(sbi->packed_inode); 391 + goto out; 392 + } 393 + } 394 + #endif 384 395 sbi->inos = le64_to_cpu(dsb->inos); 385 396 386 397 sbi->build_time = le64_to_cpu(dsb->build_time); ··· 422 411 erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); 423 412 if (erofs_is_fscache_mode(sb)) 424 413 erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); 414 + if (erofs_sb_has_fragments(sbi)) 415 + erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); 416 + if (erofs_sb_has_dedupe(sbi)) 417 + erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!"); 425 418 out: 426 419 erofs_put_metabuf(&buf); 427 420 return ret; ··· 455 440 Opt_dax_enum, 456 441 Opt_device, 457 442 Opt_fsid, 443 + Opt_domain_id, 458 444 Opt_err 459 445 }; 460 446 ··· 481 465 fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), 482 466 fsparam_string("device", Opt_device), 483 467 fsparam_string("fsid", Opt_fsid), 468 + fsparam_string("domain_id", Opt_domain_id), 484 469 {} 485 470 }; 486 471 ··· 587 570 errorfc(fc, "fsid option not supported"); 588 571 #endif 589 572 break; 573 + case Opt_domain_id: 574 + #ifdef CONFIG_EROFS_FS_ONDEMAND 575 + kfree(ctx->opt.domain_id); 576 + ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL); 577 + if (!ctx->opt.domain_id) 578 + return -ENOMEM; 579 + #else 580 + errorfc(fc, "domain_id option not supported"); 581 + #endif 582 + break; 590 583 default: 591 584 return -ENOPARAM; 592 585 } ··· 668 641 static struct inode *erofs_nfs_get_inode(struct super_block *sb, 669 642 u64 ino, u32 generation) 670 643 { 671 - return erofs_iget(sb, ino, false); 644 + return erofs_iget(sb, ino); 672 645 } 673 646 674 647 static struct dentry *erofs_fh_to_dentry(struct super_block *sb, ··· 694 667 err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); 695 668 if (err) 696 669 return ERR_PTR(err); 697 - return d_obtain_alias(erofs_iget(child->d_sb, nid, d_type == FT_DIR)); 670 + return d_obtain_alias(erofs_iget(child->d_sb, nid)); 698 671 } 699 672 700 673 static const struct export_operations erofs_export_ops = { ··· 702 675 .fh_to_parent = erofs_fh_to_parent, 703 676 .get_parent = erofs_get_parent, 704 677 }; 678 + 679 + static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) 680 + { 681 + static const struct tree_descr empty_descr = {""}; 682 + 683 + return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); 684 + } 705 685 706 686 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) 707 687 { ··· 729 695 sb->s_fs_info = sbi; 730 696 sbi->opt = ctx->opt; 731 697 ctx->opt.fsid = NULL; 698 + ctx->opt.domain_id = NULL; 732 699 sbi->devs = ctx->devs; 733 700 ctx->devs = NULL; 734 701 ··· 738 703 sb->s_blocksize_bits = LOG_BLOCK_SIZE; 739 704 740 705 err = erofs_fscache_register_fs(sb); 741 - if (err) 742 - return err; 743 - 744 - err = erofs_fscache_register_cookie(sb, &sbi->s_fscache, 745 - sbi->opt.fsid, true); 746 706 if (err) 747 707 return err; 748 708 ··· 782 752 #endif 783 753 784 754 /* get the root inode */ 785 - inode = erofs_iget(sb, ROOT_NID(sbi), true); 755 + inode = erofs_iget(sb, ROOT_NID(sbi)); 786 756 if (IS_ERR(inode)) 787 757 return PTR_ERR(inode); 788 758 ··· 809 779 810 780 erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); 811 781 return 0; 782 + } 783 + 784 + static int erofs_fc_anon_get_tree(struct fs_context *fc) 785 + { 786 + return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); 812 787 } 813 788 814 789 static int erofs_fc_get_tree(struct fs_context *fc) ··· 852 817 fs_put_dax(dif->dax_dev, NULL); 853 818 if (dif->bdev) 854 819 blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); 855 - erofs_fscache_unregister_cookie(&dif->fscache); 820 + erofs_fscache_unregister_cookie(dif->fscache); 821 + dif->fscache = NULL; 856 822 kfree(dif->path); 857 823 kfree(dif); 858 824 return 0; ··· 874 838 875 839 erofs_free_dev_context(ctx->devs); 876 840 kfree(ctx->opt.fsid); 841 + kfree(ctx->opt.domain_id); 877 842 kfree(ctx); 878 843 } 879 844 ··· 885 848 .free = erofs_fc_free, 886 849 }; 887 850 851 + static const struct fs_context_operations erofs_anon_context_ops = { 852 + .get_tree = erofs_fc_anon_get_tree, 853 + }; 854 + 888 855 static int erofs_init_fs_context(struct fs_context *fc) 889 856 { 890 - struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 857 + struct erofs_fs_context *ctx; 891 858 859 + /* pseudo mount for anon inodes */ 860 + if (fc->sb_flags & SB_KERNMOUNT) { 861 + fc->ops = &erofs_anon_context_ops; 862 + return 0; 863 + } 864 + 865 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 892 866 if (!ctx) 893 867 return -ENOMEM; 894 868 ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); ··· 926 878 927 879 WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); 928 880 881 + /* pseudo mount for anon inodes */ 882 + if (sb->s_flags & SB_KERNMOUNT) { 883 + kill_anon_super(sb); 884 + return; 885 + } 886 + 929 887 if (erofs_is_fscache_mode(sb)) 930 - generic_shutdown_super(sb); 888 + kill_anon_super(sb); 931 889 else 932 890 kill_block_super(sb); 933 891 ··· 943 889 944 890 erofs_free_dev_context(sbi->devs); 945 891 fs_put_dax(sbi->dax_dev, NULL); 946 - erofs_fscache_unregister_cookie(&sbi->s_fscache); 947 892 erofs_fscache_unregister_fs(sb); 948 893 kfree(sbi->opt.fsid); 894 + kfree(sbi->opt.domain_id); 949 895 kfree(sbi); 950 896 sb->s_fs_info = NULL; 951 897 } ··· 962 908 #ifdef CONFIG_EROFS_FS_ZIP 963 909 iput(sbi->managed_cache); 964 910 sbi->managed_cache = NULL; 911 + iput(sbi->packed_inode); 912 + sbi->packed_inode = NULL; 965 913 #endif 966 - erofs_fscache_unregister_cookie(&sbi->s_fscache); 914 + erofs_fscache_unregister_fs(sb); 967 915 } 968 916 969 - static struct file_system_type erofs_fs_type = { 917 + struct file_system_type erofs_fs_type = { 970 918 .owner = THIS_MODULE, 971 919 .name = "erofs", 972 920 .init_fs_context = erofs_init_fs_context, ··· 1100 1044 #ifdef CONFIG_EROFS_FS_ONDEMAND 1101 1045 if (opt->fsid) 1102 1046 seq_printf(seq, ",fsid=%s", opt->fsid); 1047 + if (opt->domain_id) 1048 + seq_printf(seq, ",domain_id=%s", opt->domain_id); 1103 1049 #endif 1104 1050 return 0; 1105 1051 }
+21 -2
fs/erofs/sysfs.c
··· 76 76 EROFS_ATTR_FEATURE(compr_head2); 77 77 EROFS_ATTR_FEATURE(sb_chksum); 78 78 EROFS_ATTR_FEATURE(ztailpacking); 79 + EROFS_ATTR_FEATURE(fragments); 80 + EROFS_ATTR_FEATURE(dedupe); 79 81 80 82 static struct attribute *erofs_feat_attrs[] = { 81 83 ATTR_LIST(zero_padding), ··· 88 86 ATTR_LIST(compr_head2), 89 87 ATTR_LIST(sb_chksum), 90 88 ATTR_LIST(ztailpacking), 89 + ATTR_LIST(fragments), 90 + ATTR_LIST(dedupe), 91 91 NULL, 92 92 }; 93 93 ATTRIBUTE_GROUPS(erofs_feat); ··· 205 201 int erofs_register_sysfs(struct super_block *sb) 206 202 { 207 203 struct erofs_sb_info *sbi = EROFS_SB(sb); 204 + char *name; 205 + char *str = NULL; 208 206 int err; 209 207 208 + if (erofs_is_fscache_mode(sb)) { 209 + if (sbi->opt.domain_id) { 210 + str = kasprintf(GFP_KERNEL, "%s,%s", sbi->opt.domain_id, 211 + sbi->opt.fsid); 212 + if (!str) 213 + return -ENOMEM; 214 + name = str; 215 + } else { 216 + name = sbi->opt.fsid; 217 + } 218 + } else { 219 + name = sb->s_id; 220 + } 210 221 sbi->s_kobj.kset = &erofs_root; 211 222 init_completion(&sbi->s_kobj_unregister); 212 - err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", 213 - erofs_is_fscache_mode(sb) ? sbi->opt.fsid : sb->s_id); 223 + err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", name); 224 + kfree(str); 214 225 if (err) 215 226 goto put_sb_kobj; 216 227 return 0;
-2
fs/erofs/xattr.h
··· 39 39 #ifdef CONFIG_EROFS_FS_XATTR 40 40 extern const struct xattr_handler erofs_xattr_user_handler; 41 41 extern const struct xattr_handler erofs_xattr_trusted_handler; 42 - #ifdef CONFIG_EROFS_FS_SECURITY 43 42 extern const struct xattr_handler erofs_xattr_security_handler; 44 - #endif 45 43 46 44 static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx) 47 45 {
+50 -1
fs/erofs/zdata.c
··· 650 650 la < fe->headoffset; 651 651 } 652 652 653 + static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, 654 + struct page *page, unsigned int pageofs, 655 + unsigned int len) 656 + { 657 + struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode; 658 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 659 + u8 *src, *dst; 660 + unsigned int i, cnt; 661 + 662 + pos += EROFS_I(inode)->z_fragmentoff; 663 + for (i = 0; i < len; i += cnt) { 664 + cnt = min_t(unsigned int, len - i, 665 + EROFS_BLKSIZ - erofs_blkoff(pos)); 666 + src = erofs_bread(&buf, packed_inode, 667 + erofs_blknr(pos), EROFS_KMAP); 668 + if (IS_ERR(src)) { 669 + erofs_put_metabuf(&buf); 670 + return PTR_ERR(src); 671 + } 672 + 673 + dst = kmap_local_page(page); 674 + memcpy(dst + pageofs + i, src + erofs_blkoff(pos), cnt); 675 + kunmap_local(dst); 676 + pos += cnt; 677 + } 678 + erofs_put_metabuf(&buf); 679 + return 0; 680 + } 681 + 653 682 static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, 654 683 struct page *page, struct page **pagepool) 655 684 { ··· 717 688 /* didn't get a valid pcluster previously (very rare) */ 718 689 } 719 690 720 - if (!(map->m_flags & EROFS_MAP_MAPPED)) 691 + if (!(map->m_flags & EROFS_MAP_MAPPED) || 692 + map->m_flags & EROFS_MAP_FRAGMENT) 721 693 goto hitted; 722 694 723 695 err = z_erofs_collector_begin(fe); ··· 765 735 zero_user_segment(page, cur, end); 766 736 goto next_part; 767 737 } 738 + if (map->m_flags & EROFS_MAP_FRAGMENT) { 739 + unsigned int pageofs, skip, len; 740 + 741 + if (offset > map->m_la) { 742 + pageofs = 0; 743 + skip = offset - map->m_la; 744 + } else { 745 + pageofs = map->m_la & ~PAGE_MASK; 746 + skip = 0; 747 + } 748 + len = min_t(unsigned int, map->m_llen - skip, end - cur); 749 + err = z_erofs_read_fragment(inode, skip, page, pageofs, len); 750 + if (err) 751 + goto out; 752 + ++spiltted; 753 + tight = false; 754 + goto next_part; 755 + } 768 756 769 757 exclusive = (!cur && (!spiltted || tight)); 770 758 if (cur) ··· 814 766 fe->pcl->multibases = true; 815 767 816 768 if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && 769 + !(map->m_flags & EROFS_MAP_PARTIAL_REF) && 817 770 fe->pcl->length == map->m_llen) 818 771 fe->pcl->partial = false; 819 772 if (fe->pcl->length < offset + end - map->m_la) {
+70 -34
fs/erofs/zmap.c
··· 17 17 struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); 18 18 19 19 if (!erofs_sb_has_big_pcluster(sbi) && 20 - !erofs_sb_has_ztailpacking(sbi) && 20 + !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) && 21 21 vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { 22 22 vi->z_advise = 0; 23 23 vi->z_algorithmtype[0] = 0; ··· 55 55 if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) 56 56 goto out_unlock; 57 57 58 - DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && 59 - !erofs_sb_has_ztailpacking(EROFS_SB(sb)) && 60 - vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); 61 - 62 58 pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + 63 59 vi->xattr_isize, 8); 64 60 kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), ··· 65 69 } 66 70 67 71 h = kaddr + erofs_blkoff(pos); 72 + /* 73 + * if the highest bit of the 8-byte map header is set, the whole file 74 + * is stored in the packed inode. The rest bits keeps z_fragmentoff. 75 + */ 76 + if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) { 77 + vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; 78 + vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); 79 + vi->z_tailextent_headlcn = 0; 80 + goto unmap_done; 81 + } 68 82 vi->z_advise = le16_to_cpu(h->h_advise); 69 83 vi->z_algorithmtype[0] = h->h_algorithmtype & 15; 70 84 vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; ··· 129 123 if (err < 0) 130 124 goto out_unlock; 131 125 } 126 + 127 + if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && 128 + !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) { 129 + struct erofs_map_blocks map = { 130 + .buf = __EROFS_BUF_INITIALIZER 131 + }; 132 + 133 + vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); 134 + err = z_erofs_do_map_blocks(inode, &map, 135 + EROFS_GET_BLOCKS_FINDTAIL); 136 + erofs_put_metabuf(&map.buf); 137 + if (err < 0) 138 + goto out_unlock; 139 + } 132 140 /* paired with smp_mb() at the beginning of the function */ 133 141 smp_mb(); 134 142 set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); ··· 163 143 u16 delta[2]; 164 144 erofs_blk_t pblk, compressedblks; 165 145 erofs_off_t nextpackoff; 146 + bool partialref; 166 147 }; 167 - 168 - static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, 169 - erofs_blk_t eblk) 170 - { 171 - struct super_block *const sb = m->inode->i_sb; 172 - 173 - m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk, 174 - EROFS_KMAP_ATOMIC); 175 - if (IS_ERR(m->kaddr)) 176 - return PTR_ERR(m->kaddr); 177 - return 0; 178 - } 179 148 180 149 static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, 181 150 unsigned long lcn) ··· 178 169 lcn * sizeof(struct z_erofs_vle_decompressed_index); 179 170 struct z_erofs_vle_decompressed_index *di; 180 171 unsigned int advise, type; 181 - int err; 182 172 183 - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); 184 - if (err) 185 - return err; 173 + m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, 174 + erofs_blknr(pos), EROFS_KMAP_ATOMIC); 175 + if (IS_ERR(m->kaddr)) 176 + return PTR_ERR(m->kaddr); 186 177 187 178 m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index); 188 179 m->lcn = lcn; ··· 210 201 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: 211 202 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: 212 203 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: 204 + if (advise & Z_EROFS_VLE_DI_PARTIAL_REF) 205 + m->partialref = true; 213 206 m->clusterofs = le16_to_cpu(di->di_clusterofs); 214 207 m->pblk = le32_to_cpu(di->di_u.blkaddr); 215 208 break; ··· 381 370 unsigned int compacted_4b_initial, compacted_2b; 382 371 unsigned int amortizedshift; 383 372 erofs_off_t pos; 384 - int err; 385 373 386 374 if (lclusterbits != 12) 387 375 return -EOPNOTSUPP; ··· 417 407 amortizedshift = 2; 418 408 out: 419 409 pos += lcn * (1 << amortizedshift); 420 - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); 421 - if (err) 422 - return err; 410 + m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, 411 + erofs_blknr(pos), EROFS_KMAP_ATOMIC); 412 + if (IS_ERR(m->kaddr)) 413 + return PTR_ERR(m->kaddr); 423 414 return unpack_compacted_index(m, amortizedshift, pos, lookahead); 424 415 } 425 416 ··· 609 598 { 610 599 struct erofs_inode *const vi = EROFS_I(inode); 611 600 bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; 601 + bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; 612 602 struct z_erofs_maprecorder m = { 613 603 .inode = inode, 614 604 .map = map, ··· 675 663 err = -EOPNOTSUPP; 676 664 goto unmap_out; 677 665 } 678 - 666 + if (m.partialref) 667 + map->m_flags |= EROFS_MAP_PARTIAL_REF; 679 668 map->m_llen = end - map->m_la; 680 669 681 - if (flags & EROFS_GET_BLOCKS_FINDTAIL) 670 + if (flags & EROFS_GET_BLOCKS_FINDTAIL) { 682 671 vi->z_tailextent_headlcn = m.lcn; 672 + /* for non-compact indexes, fragmentoff is 64 bits */ 673 + if (fragment && 674 + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) 675 + vi->z_fragmentoff |= (u64)m.pblk << 32; 676 + } 683 677 if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { 684 678 map->m_flags |= EROFS_MAP_META; 685 679 map->m_pa = vi->z_idataoff; 686 680 map->m_plen = vi->z_idata_size; 681 + } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { 682 + map->m_flags |= EROFS_MAP_FRAGMENT; 687 683 } else { 688 684 map->m_pa = blknr_to_addr(m.pblk); 689 685 err = z_erofs_get_extent_compressedlen(&m, initial_lcn); ··· 699 679 goto out; 700 680 } 701 681 702 - if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) 703 - map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; 704 - else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) 682 + if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) { 683 + if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) 684 + map->m_algorithmformat = 685 + Z_EROFS_COMPRESSION_INTERLACED; 686 + else 687 + map->m_algorithmformat = 688 + Z_EROFS_COMPRESSION_SHIFTED; 689 + } else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) { 705 690 map->m_algorithmformat = vi->z_algorithmtype[1]; 706 - else 691 + } else { 707 692 map->m_algorithmformat = vi->z_algorithmtype[0]; 693 + } 708 694 709 695 if ((flags & EROFS_GET_BLOCKS_FIEMAP) || 710 696 ((flags & EROFS_GET_BLOCKS_READMORE) && ··· 731 705 return err; 732 706 } 733 707 734 - int z_erofs_map_blocks_iter(struct inode *inode, 735 - struct erofs_map_blocks *map, 708 + int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, 736 709 int flags) 737 710 { 711 + struct erofs_inode *const vi = EROFS_I(inode); 738 712 int err = 0; 739 713 740 714 trace_z_erofs_map_blocks_iter_enter(inode, map, flags); ··· 750 724 err = z_erofs_fill_inode_lazy(inode); 751 725 if (err) 752 726 goto out; 727 + 728 + if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) && 729 + !vi->z_tailextent_headlcn) { 730 + map->m_la = 0; 731 + map->m_llen = inode->i_size; 732 + map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | 733 + EROFS_MAP_FRAGMENT; 734 + goto out; 735 + } 753 736 754 737 err = z_erofs_do_map_blocks(inode, map, flags); 755 738 out: ··· 786 751 iomap->length = map.m_llen; 787 752 if (map.m_flags & EROFS_MAP_MAPPED) { 788 753 iomap->type = IOMAP_MAPPED; 789 - iomap->addr = map.m_pa; 754 + iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? 755 + IOMAP_NULL_ADDR : map.m_pa; 790 756 } else { 791 757 iomap->type = IOMAP_HOLE; 792 758 iomap->addr = IOMAP_NULL_ADDR;
+4 -7
include/trace/events/erofs.h
··· 53 53 ); 54 54 55 55 TRACE_EVENT(erofs_fill_inode, 56 - TP_PROTO(struct inode *inode, int isdir), 57 - TP_ARGS(inode, isdir), 56 + TP_PROTO(struct inode *inode), 57 + TP_ARGS(inode), 58 58 59 59 TP_STRUCT__entry( 60 60 __field(dev_t, dev ) 61 61 __field(erofs_nid_t, nid ) 62 62 __field(erofs_blk_t, blkaddr ) 63 63 __field(unsigned int, ofs ) 64 - __field(int, isdir ) 65 64 ), 66 65 67 66 TP_fast_assign( ··· 68 69 __entry->nid = EROFS_I(inode)->nid; 69 70 __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid)); 70 71 __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid)); 71 - __entry->isdir = isdir; 72 72 ), 73 73 74 - TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d", 74 + TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u", 75 75 show_dev_nid(__entry), 76 - __entry->blkaddr, __entry->ofs, 77 - __entry->isdir) 76 + __entry->blkaddr, __entry->ofs) 78 77 ); 79 78 80 79 TRACE_EVENT(erofs_readpage,