Merge tag 'erofs-for-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

+28 -19

fs/erofs/decompressor.c

··· 317 317 return ret; 318 318 } 319 319 320 - static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, 321 - struct page **pagepool) 320 + static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, 321 + struct page **pagepool) 322 322 { 323 - const unsigned int nrpages_out = 323 + const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; 324 + const unsigned int outpages = 324 325 PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; 325 326 const unsigned int righthalf = min_t(unsigned int, rq->outputsize, 326 327 PAGE_SIZE - rq->pageofs_out); 327 328 const unsigned int lefthalf = rq->outputsize - righthalf; 329 + const unsigned int interlaced_offset = 330 + rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out; 328 331 unsigned char *src, *dst; 329 332 330 - if (nrpages_out > 2) { 333 + if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) { 331 334 DBG_BUGON(1); 332 - return -EIO; 335 + return -EFSCORRUPTED; 333 336 } 334 337 335 338 if (rq->out[0] == *rq->in) { 336 - DBG_BUGON(nrpages_out != 1); 339 + DBG_BUGON(rq->pageofs_out); 337 340 return 0; 338 341 } 339 342 340 - src = kmap_atomic(*rq->in) + rq->pageofs_in; 343 + src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in; 341 344 if (rq->out[0]) { 342 - dst = kmap_atomic(rq->out[0]); 343 - memcpy(dst + rq->pageofs_out, src, righthalf); 344 - kunmap_atomic(dst); 345 + dst = kmap_local_page(rq->out[0]); 346 + memcpy(dst + rq->pageofs_out, src + interlaced_offset, 347 + righthalf); 348 + kunmap_local(dst); 345 349 } 346 350 347 - if (nrpages_out == 2) { 348 - DBG_BUGON(!rq->out[1]); 349 - if (rq->out[1] == *rq->in) { 351 + if (outpages > inpages) { 352 + DBG_BUGON(!rq->out[outpages - 1]); 353 + if (rq->out[outpages - 1] != rq->in[inpages - 1]) { 354 + dst = kmap_local_page(rq->out[outpages - 1]); 355 + memcpy(dst, interlaced_offset ? src : 356 + (src + righthalf), lefthalf); 357 + kunmap_local(dst); 358 + } else if (!interlaced_offset) { 350 359 memmove(src, src + righthalf, lefthalf); 351 - } else { 352 - dst = kmap_atomic(rq->out[1]); 353 - memcpy(dst, src + righthalf, lefthalf); 354 - kunmap_atomic(dst); 355 360 } 356 361 } 357 - kunmap_atomic(src); 362 + kunmap_local(src); 358 363 return 0; 359 364 } 360 365 361 366 static struct z_erofs_decompressor decompressors[] = { 362 367 [Z_EROFS_COMPRESSION_SHIFTED] = { 363 - .decompress = z_erofs_shifted_transform, 368 + .decompress = z_erofs_transform_plain, 364 369 .name = "shifted" 370 + }, 371 + [Z_EROFS_COMPRESSION_INTERLACED] = { 372 + .decompress = z_erofs_transform_plain, 373 + .name = "interlaced" 365 374 }, 366 375 [Z_EROFS_COMPRESSION_LZ4] = { 367 376 .decompress = z_erofs_lz4_decompress,

+3

fs/erofs/decompressor_lzma.c

··· 217 217 strm->buf.out_size = min_t(u32, outlen, 218 218 PAGE_SIZE - pageofs); 219 219 outlen -= strm->buf.out_size; 220 + if (!rq->out[no] && rq->fillgaps) /* deduped */ 221 + rq->out[no] = erofs_allocpage(pagepool, 222 + GFP_KERNEL | __GFP_NOFAIL); 220 223 if (rq->out[no]) 221 224 strm->buf.out = kmap(rq->out[no]) + pageofs; 222 225 pageofs = 0;

+34 -6

fs/erofs/erofs_fs.h

··· 25 25 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 26 26 #define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 27 27 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010 28 + #define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 29 + #define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 28 30 #define EROFS_ALL_FEATURE_INCOMPAT \ 29 31 (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ 30 32 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ ··· 34 32 EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ 35 33 EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ 36 34 EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ 37 - EROFS_FEATURE_INCOMPAT_ZTAILPACKING) 35 + EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ 36 + EROFS_FEATURE_INCOMPAT_FRAGMENTS | \ 37 + EROFS_FEATURE_INCOMPAT_DEDUPE) 38 38 39 39 #define EROFS_SB_EXTSLOT_SIZE 16 40 40 ··· 75 71 } __packed u1; 76 72 __le16 extra_devices; /* # of devices besides the primary device */ 77 73 __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ 78 - __u8 reserved2[38]; 74 + __u8 reserved[6]; 75 + __le64 packed_nid; /* nid of the special packed inode */ 76 + __u8 reserved2[24]; 79 77 }; 80 78 81 79 /* ··· 301 295 * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) 302 296 * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) 303 297 * bit 3 : tailpacking inline pcluster (0 - off; 1 - on) 298 + * bit 4 : interlaced plain pcluster (0 - off; 1 - on) 299 + * bit 5 : fragment pcluster (0 - off; 1 - on) 304 300 */ 305 301 #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 306 302 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 307 303 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 308 304 #define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008 305 + #define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010 306 + #define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020 309 307 308 + #define Z_EROFS_FRAGMENT_INODE_BIT 7 310 309 struct z_erofs_map_header { 311 - __le16 h_reserved1; 312 - /* indicates the encoded size of tailpacking data */ 313 - __le16 h_idata_size; 310 + union { 311 + /* fragment data offset in the packed inode */ 312 + __le32 h_fragmentoff; 313 + struct { 314 + __le16 h_reserved1; 315 + /* indicates the encoded size of tailpacking data */ 316 + __le16 h_idata_size; 317 + }; 318 + }; 314 319 __le16 h_advise; 315 320 /* 316 321 * bit 0-3 : algorithm type of head 1 (logical cluster type 01); ··· 330 313 __u8 h_algorithmtype; 331 314 /* 332 315 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; 333 - * bit 3-7 : reserved. 316 + * bit 3-6 : reserved; 317 + * bit 7 : move the whole file into packed inode or not. 334 318 */ 335 319 __u8 h_clusterbits; 336 320 }; ··· 372 354 373 355 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 374 356 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 357 + 358 + /* (noncompact only, HEAD) This pcluster refers to partial decompressed data */ 359 + #define Z_EROFS_VLE_DI_PARTIAL_REF (1 << 15) 375 360 376 361 /* 377 362 * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the ··· 423 402 /* check the EROFS on-disk layout strictly at compile time */ 424 403 static inline void erofs_check_ondisk_layout_definitions(void) 425 404 { 405 + const __le64 fmh = *(__le64 *)&(struct z_erofs_map_header) { 406 + .h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT 407 + }; 408 + 426 409 BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); 427 410 BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); 428 411 BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); ··· 444 419 445 420 BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < 446 421 Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); 422 + /* exclude old compiler versions like gcc 7.5.0 */ 423 + BUILD_BUG_ON(__builtin_constant_p(fmh) ? 424 + fmh != cpu_to_le64(1ULL << 63) : 0); 447 425 } 448 426 449 427 #endif

+313 -166

fs/erofs/fscache.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 2 /* 3 3 * Copyright (C) 2022, Alibaba Cloud 4 + * Copyright (C) 2022, Bytedance Inc. All rights reserved. 4 5 */ 5 6 #include <linux/fscache.h> 6 7 #include "internal.h" 8 + 9 + static DEFINE_MUTEX(erofs_domain_list_lock); 10 + static DEFINE_MUTEX(erofs_domain_cookies_lock); 11 + static LIST_HEAD(erofs_domain_list); 12 + static struct vfsmount *erofs_pseudo_mnt; 7 13 8 14 static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, 9 15 loff_t start, size_t len) ··· 240 234 return ret; 241 235 } 242 236 243 - static int erofs_fscache_read_folio_inline(struct folio *folio, 244 - struct erofs_map_blocks *map) 237 + /* 238 + * Read into page cache in the range described by (@pos, @len). 239 + * 240 + * On return, the caller is responsible for page unlocking if the output @unlock 241 + * is true, or the callee will take this responsibility through netfs_io_request 242 + * interface. 243 + * 244 + * The return value is the number of bytes successfully handled, or negative 245 + * error code on failure. The only exception is that, the length of the range 246 + * instead of the error code is returned on failure after netfs_io_request is 247 + * allocated, so that .readahead() could advance rac accordingly. 248 + */ 249 + static int erofs_fscache_data_read(struct address_space *mapping, 250 + loff_t pos, size_t len, bool *unlock) 245 251 { 246 - struct super_block *sb = folio_mapping(folio)->host->i_sb; 247 - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 248 - erofs_blk_t blknr; 249 - size_t offset, len; 250 - void *src, *dst; 251 - 252 - /* For tail packing layout, the offset may be non-zero. */ 253 - offset = erofs_blkoff(map->m_pa); 254 - blknr = erofs_blknr(map->m_pa); 255 - len = map->m_llen; 256 - 257 - src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 258 - if (IS_ERR(src)) 259 - return PTR_ERR(src); 260 - 261 - dst = kmap_local_folio(folio, 0); 262 - memcpy(dst, src + offset, len); 263 - memset(dst + len, 0, PAGE_SIZE - len); 264 - kunmap_local(dst); 265 - 266 - erofs_put_metabuf(&buf); 267 - return 0; 268 - } 269 - 270 - static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 271 - { 272 - struct inode *inode = folio_mapping(folio)->host; 252 + struct inode *inode = mapping->host; 273 253 struct super_block *sb = inode->i_sb; 254 + struct netfs_io_request *rreq; 274 255 struct erofs_map_blocks map; 275 256 struct erofs_map_dev mdev; 276 - struct netfs_io_request *rreq; 277 - erofs_off_t pos; 278 - loff_t pstart; 257 + struct iov_iter iter; 258 + size_t count; 279 259 int ret; 280 260 281 - DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); 261 + *unlock = true; 282 262 283 - pos = folio_pos(folio); 284 263 map.m_la = pos; 285 - 286 264 ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 287 265 if (ret) 288 - goto out_unlock; 289 - 290 - if (!(map.m_flags & EROFS_MAP_MAPPED)) { 291 - folio_zero_range(folio, 0, folio_size(folio)); 292 - goto out_uptodate; 293 - } 266 + return ret; 294 267 295 268 if (map.m_flags & EROFS_MAP_META) { 296 - ret = erofs_fscache_read_folio_inline(folio, &map); 297 - goto out_uptodate; 269 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 270 + erofs_blk_t blknr; 271 + size_t offset, size; 272 + void *src; 273 + 274 + /* For tail packing layout, the offset may be non-zero. */ 275 + offset = erofs_blkoff(map.m_pa); 276 + blknr = erofs_blknr(map.m_pa); 277 + size = map.m_llen; 278 + 279 + src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 280 + if (IS_ERR(src)) 281 + return PTR_ERR(src); 282 + 283 + iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); 284 + if (copy_to_iter(src + offset, size, &iter) != size) 285 + return -EFAULT; 286 + iov_iter_zero(PAGE_SIZE - size, &iter); 287 + erofs_put_metabuf(&buf); 288 + return PAGE_SIZE; 289 + } 290 + 291 + count = min_t(size_t, map.m_llen - (pos - map.m_la), len); 292 + DBG_BUGON(!count || count % PAGE_SIZE); 293 + 294 + if (!(map.m_flags & EROFS_MAP_MAPPED)) { 295 + iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); 296 + iov_iter_zero(count, &iter); 297 + return count; 298 298 } 299 299 300 300 mdev = (struct erofs_map_dev) { 301 301 .m_deviceid = map.m_deviceid, 302 302 .m_pa = map.m_pa, 303 303 }; 304 - 305 304 ret = erofs_map_dev(sb, &mdev); 306 305 if (ret) 307 - goto out_unlock; 306 + return ret; 308 307 308 + rreq = erofs_fscache_alloc_request(mapping, pos, count); 309 + if (IS_ERR(rreq)) 310 + return PTR_ERR(rreq); 309 311 310 - rreq = erofs_fscache_alloc_request(folio_mapping(folio), 311 - folio_pos(folio), folio_size(folio)); 312 - if (IS_ERR(rreq)) { 313 - ret = PTR_ERR(rreq); 314 - goto out_unlock; 315 - } 316 - 317 - pstart = mdev.m_pa + (pos - map.m_la); 318 - return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 319 - rreq, pstart); 320 - 321 - out_uptodate: 322 - if (!ret) 323 - folio_mark_uptodate(folio); 324 - out_unlock: 325 - folio_unlock(folio); 326 - return ret; 312 + *unlock = false; 313 + erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 314 + rreq, mdev.m_pa + (pos - map.m_la)); 315 + return count; 327 316 } 328 317 329 - static void erofs_fscache_advance_folios(struct readahead_control *rac, 330 - size_t len, bool unlock) 318 + static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 331 319 { 332 - while (len) { 333 - struct folio *folio = readahead_folio(rac); 334 - len -= folio_size(folio); 335 - if (unlock) { 320 + bool unlock; 321 + int ret; 322 + 323 + DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); 324 + 325 + ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), 326 + folio_size(folio), &unlock); 327 + if (unlock) { 328 + if (ret > 0) 336 329 folio_mark_uptodate(folio); 337 - folio_unlock(folio); 338 - } 330 + folio_unlock(folio); 339 331 } 332 + return ret < 0 ? ret : 0; 340 333 } 341 334 342 335 static void erofs_fscache_readahead(struct readahead_control *rac) 343 336 { 344 - struct inode *inode = rac->mapping->host; 345 - struct super_block *sb = inode->i_sb; 346 - size_t len, count, done = 0; 347 - erofs_off_t pos; 348 - loff_t start, offset; 349 - int ret; 337 + struct folio *folio; 338 + size_t len, done = 0; 339 + loff_t start, pos; 340 + bool unlock; 341 + int ret, size; 350 342 351 343 if (!readahead_count(rac)) 352 344 return; ··· 353 349 len = readahead_length(rac); 354 350 355 351 do { 356 - struct erofs_map_blocks map; 357 - struct erofs_map_dev mdev; 358 - struct netfs_io_request *rreq; 359 - 360 352 pos = start + done; 361 - map.m_la = pos; 362 - 363 - ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 364 - if (ret) 353 + ret = erofs_fscache_data_read(rac->mapping, pos, 354 + len - done, &unlock); 355 + if (ret <= 0) 365 356 return; 366 357 367 - offset = start + done; 368 - count = min_t(size_t, map.m_llen - (pos - map.m_la), 369 - len - done); 370 - 371 - if (!(map.m_flags & EROFS_MAP_MAPPED)) { 372 - struct iov_iter iter; 373 - 374 - iov_iter_xarray(&iter, READ, &rac->mapping->i_pages, 375 - offset, count); 376 - iov_iter_zero(count, &iter); 377 - 378 - erofs_fscache_advance_folios(rac, count, true); 379 - ret = count; 380 - continue; 381 - } 382 - 383 - if (map.m_flags & EROFS_MAP_META) { 384 - struct folio *folio = readahead_folio(rac); 385 - 386 - ret = erofs_fscache_read_folio_inline(folio, &map); 387 - if (!ret) { 358 + size = ret; 359 + while (size) { 360 + folio = readahead_folio(rac); 361 + size -= folio_size(folio); 362 + if (unlock) { 388 363 folio_mark_uptodate(folio); 389 - ret = folio_size(folio); 364 + folio_unlock(folio); 390 365 } 391 - 392 - folio_unlock(folio); 393 - continue; 394 366 } 395 - 396 - mdev = (struct erofs_map_dev) { 397 - .m_deviceid = map.m_deviceid, 398 - .m_pa = map.m_pa, 399 - }; 400 - ret = erofs_map_dev(sb, &mdev); 401 - if (ret) 402 - return; 403 - 404 - rreq = erofs_fscache_alloc_request(rac->mapping, offset, count); 405 - if (IS_ERR(rreq)) 406 - return; 407 - /* 408 - * Drop the ref of folios here. Unlock them in 409 - * rreq_unlock_folios() when rreq complete. 410 - */ 411 - erofs_fscache_advance_folios(rac, count, false); 412 - ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 413 - rreq, mdev.m_pa + (pos - map.m_la)); 414 - if (!ret) 415 - ret = count; 416 - } while (ret > 0 && ((done += ret) < len)); 367 + } while ((done += ret) < len); 417 368 } 418 369 419 370 static const struct address_space_operations erofs_fscache_meta_aops = { ··· 380 421 .readahead = erofs_fscache_readahead, 381 422 }; 382 423 383 - int erofs_fscache_register_cookie(struct super_block *sb, 384 - struct erofs_fscache **fscache, 385 - char *name, bool need_inode) 424 + static void erofs_fscache_domain_put(struct erofs_domain *domain) 425 + { 426 + if (!domain) 427 + return; 428 + mutex_lock(&erofs_domain_list_lock); 429 + if (refcount_dec_and_test(&domain->ref)) { 430 + list_del(&domain->list); 431 + if (list_empty(&erofs_domain_list)) { 432 + kern_unmount(erofs_pseudo_mnt); 433 + erofs_pseudo_mnt = NULL; 434 + } 435 + mutex_unlock(&erofs_domain_list_lock); 436 + fscache_relinquish_volume(domain->volume, NULL, false); 437 + kfree(domain->domain_id); 438 + kfree(domain); 439 + return; 440 + } 441 + mutex_unlock(&erofs_domain_list_lock); 442 + } 443 + 444 + static int erofs_fscache_register_volume(struct super_block *sb) 445 + { 446 + struct erofs_sb_info *sbi = EROFS_SB(sb); 447 + char *domain_id = sbi->opt.domain_id; 448 + struct fscache_volume *volume; 449 + char *name; 450 + int ret = 0; 451 + 452 + name = kasprintf(GFP_KERNEL, "erofs,%s", 453 + domain_id ? domain_id : sbi->opt.fsid); 454 + if (!name) 455 + return -ENOMEM; 456 + 457 + volume = fscache_acquire_volume(name, NULL, NULL, 0); 458 + if (IS_ERR_OR_NULL(volume)) { 459 + erofs_err(sb, "failed to register volume for %s", name); 460 + ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 461 + volume = NULL; 462 + } 463 + 464 + sbi->volume = volume; 465 + kfree(name); 466 + return ret; 467 + } 468 + 469 + static int erofs_fscache_init_domain(struct super_block *sb) 470 + { 471 + int err; 472 + struct erofs_domain *domain; 473 + struct erofs_sb_info *sbi = EROFS_SB(sb); 474 + 475 + domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 476 + if (!domain) 477 + return -ENOMEM; 478 + 479 + domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL); 480 + if (!domain->domain_id) { 481 + kfree(domain); 482 + return -ENOMEM; 483 + } 484 + 485 + err = erofs_fscache_register_volume(sb); 486 + if (err) 487 + goto out; 488 + 489 + if (!erofs_pseudo_mnt) { 490 + erofs_pseudo_mnt = kern_mount(&erofs_fs_type); 491 + if (IS_ERR(erofs_pseudo_mnt)) { 492 + err = PTR_ERR(erofs_pseudo_mnt); 493 + goto out; 494 + } 495 + } 496 + 497 + domain->volume = sbi->volume; 498 + refcount_set(&domain->ref, 1); 499 + list_add(&domain->list, &erofs_domain_list); 500 + sbi->domain = domain; 501 + return 0; 502 + out: 503 + kfree(domain->domain_id); 504 + kfree(domain); 505 + return err; 506 + } 507 + 508 + static int erofs_fscache_register_domain(struct super_block *sb) 509 + { 510 + int err; 511 + struct erofs_domain *domain; 512 + struct erofs_sb_info *sbi = EROFS_SB(sb); 513 + 514 + mutex_lock(&erofs_domain_list_lock); 515 + list_for_each_entry(domain, &erofs_domain_list, list) { 516 + if (!strcmp(domain->domain_id, sbi->opt.domain_id)) { 517 + sbi->domain = domain; 518 + sbi->volume = domain->volume; 519 + refcount_inc(&domain->ref); 520 + mutex_unlock(&erofs_domain_list_lock); 521 + return 0; 522 + } 523 + } 524 + err = erofs_fscache_init_domain(sb); 525 + mutex_unlock(&erofs_domain_list_lock); 526 + return err; 527 + } 528 + 529 + static 530 + struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 531 + char *name, bool need_inode) 386 532 { 387 533 struct fscache_volume *volume = EROFS_SB(sb)->volume; 388 534 struct erofs_fscache *ctx; ··· 496 432 497 433 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 498 434 if (!ctx) 499 - return -ENOMEM; 435 + return ERR_PTR(-ENOMEM); 500 436 501 437 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 502 438 name, strlen(name), NULL, 0, 0); ··· 526 462 ctx->inode = inode; 527 463 } 528 464 529 - *fscache = ctx; 530 - return 0; 465 + return ctx; 531 466 532 467 err_cookie: 533 468 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 534 469 fscache_relinquish_cookie(ctx->cookie, false); 535 - ctx->cookie = NULL; 536 470 err: 537 471 kfree(ctx); 538 - return ret; 472 + return ERR_PTR(ret); 539 473 } 540 474 541 - void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) 475 + static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 542 476 { 543 - struct erofs_fscache *ctx = *fscache; 477 + fscache_unuse_cookie(ctx->cookie, NULL, NULL); 478 + fscache_relinquish_cookie(ctx->cookie, false); 479 + iput(ctx->inode); 480 + kfree(ctx->name); 481 + kfree(ctx); 482 + } 483 + 484 + static 485 + struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, 486 + char *name, bool need_inode) 487 + { 488 + int err; 489 + struct inode *inode; 490 + struct erofs_fscache *ctx; 491 + struct erofs_domain *domain = EROFS_SB(sb)->domain; 492 + 493 + ctx = erofs_fscache_acquire_cookie(sb, name, need_inode); 494 + if (IS_ERR(ctx)) 495 + return ctx; 496 + 497 + ctx->name = kstrdup(name, GFP_KERNEL); 498 + if (!ctx->name) { 499 + err = -ENOMEM; 500 + goto out; 501 + } 502 + 503 + inode = new_inode(erofs_pseudo_mnt->mnt_sb); 504 + if (!inode) { 505 + err = -ENOMEM; 506 + goto out; 507 + } 508 + 509 + ctx->domain = domain; 510 + ctx->anon_inode = inode; 511 + inode->i_private = ctx; 512 + refcount_inc(&domain->ref); 513 + return ctx; 514 + out: 515 + erofs_fscache_relinquish_cookie(ctx); 516 + return ERR_PTR(err); 517 + } 518 + 519 + static 520 + struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 521 + char *name, bool need_inode) 522 + { 523 + struct inode *inode; 524 + struct erofs_fscache *ctx; 525 + struct erofs_domain *domain = EROFS_SB(sb)->domain; 526 + struct super_block *psb = erofs_pseudo_mnt->mnt_sb; 527 + 528 + mutex_lock(&erofs_domain_cookies_lock); 529 + list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { 530 + ctx = inode->i_private; 531 + if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) 532 + continue; 533 + igrab(inode); 534 + mutex_unlock(&erofs_domain_cookies_lock); 535 + return ctx; 536 + } 537 + ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); 538 + mutex_unlock(&erofs_domain_cookies_lock); 539 + return ctx; 540 + } 541 + 542 + struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 543 + char *name, bool need_inode) 544 + { 545 + if (EROFS_SB(sb)->opt.domain_id) 546 + return erofs_domain_register_cookie(sb, name, need_inode); 547 + return erofs_fscache_acquire_cookie(sb, name, need_inode); 548 + } 549 + 550 + void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 551 + { 552 + bool drop; 553 + struct erofs_domain *domain; 544 554 545 555 if (!ctx) 546 556 return; 557 + domain = ctx->domain; 558 + if (domain) { 559 + mutex_lock(&erofs_domain_cookies_lock); 560 + drop = atomic_read(&ctx->anon_inode->i_count) == 1; 561 + iput(ctx->anon_inode); 562 + mutex_unlock(&erofs_domain_cookies_lock); 563 + if (!drop) 564 + return; 565 + } 547 566 548 - fscache_unuse_cookie(ctx->cookie, NULL, NULL); 549 - fscache_relinquish_cookie(ctx->cookie, false); 550 - ctx->cookie = NULL; 551 - 552 - iput(ctx->inode); 553 - ctx->inode = NULL; 554 - 555 - kfree(ctx); 556 - *fscache = NULL; 567 + erofs_fscache_relinquish_cookie(ctx); 568 + erofs_fscache_domain_put(domain); 557 569 } 558 570 559 571 int erofs_fscache_register_fs(struct super_block *sb) 560 572 { 573 + int ret; 561 574 struct erofs_sb_info *sbi = EROFS_SB(sb); 562 - struct fscache_volume *volume; 563 - char *name; 564 - int ret = 0; 575 + struct erofs_fscache *fscache; 565 576 566 - name = kasprintf(GFP_KERNEL, "erofs,%s", sbi->opt.fsid); 567 - if (!name) 568 - return -ENOMEM; 577 + if (sbi->opt.domain_id) 578 + ret = erofs_fscache_register_domain(sb); 579 + else 580 + ret = erofs_fscache_register_volume(sb); 581 + if (ret) 582 + return ret; 569 583 570 - volume = fscache_acquire_volume(name, NULL, NULL, 0); 571 - if (IS_ERR_OR_NULL(volume)) { 572 - erofs_err(sb, "failed to register volume for %s", name); 573 - ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 574 - volume = NULL; 575 - } 584 + /* acquired domain/volume will be relinquished in kill_sb() on error */ 585 + fscache = erofs_fscache_register_cookie(sb, sbi->opt.fsid, true); 586 + if (IS_ERR(fscache)) 587 + return PTR_ERR(fscache); 576 588 577 - sbi->volume = volume; 578 - kfree(name); 579 - return ret; 589 + sbi->s_fscache = fscache; 590 + return 0; 580 591 } 581 592 582 593 void erofs_fscache_unregister_fs(struct super_block *sb) 583 594 { 584 595 struct erofs_sb_info *sbi = EROFS_SB(sb); 585 596 586 - fscache_relinquish_volume(sbi->volume, NULL, false); 597 + erofs_fscache_unregister_cookie(sbi->s_fscache); 598 + 599 + if (sbi->domain) 600 + erofs_fscache_domain_put(sbi->domain); 601 + else 602 + fscache_relinquish_volume(sbi->volume, NULL, false); 603 + 604 + sbi->s_fscache = NULL; 587 605 sbi->volume = NULL; 606 + sbi->domain = NULL; 588 607 }

+9 -17

fs/erofs/inode.c

··· 214 214 215 215 /* if it cannot be handled with fast symlink scheme */ 216 216 if (vi->datalayout != EROFS_INODE_FLAT_INLINE || 217 - inode->i_size >= EROFS_BLKSIZ) { 217 + inode->i_size >= EROFS_BLKSIZ || inode->i_size < 0) { 218 218 inode->i_op = &erofs_symlink_iops; 219 219 return 0; 220 220 } ··· 241 241 return 0; 242 242 } 243 243 244 - static int erofs_fill_inode(struct inode *inode, int isdir) 244 + static int erofs_fill_inode(struct inode *inode) 245 245 { 246 246 struct erofs_inode *vi = EROFS_I(inode); 247 247 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; ··· 249 249 unsigned int ofs; 250 250 int err = 0; 251 251 252 - trace_erofs_fill_inode(inode, isdir); 252 + trace_erofs_fill_inode(inode); 253 253 254 254 /* read inode base data from disk */ 255 255 kaddr = erofs_read_inode(&buf, inode, &ofs); ··· 324 324 return 0; 325 325 } 326 326 327 - static inline struct inode *erofs_iget_locked(struct super_block *sb, 328 - erofs_nid_t nid) 327 + struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid) 329 328 { 330 329 const unsigned long hashval = erofs_inode_hash(nid); 330 + struct inode *inode; 331 331 332 - return iget5_locked(sb, hashval, erofs_ilookup_test_actor, 332 + inode = iget5_locked(sb, hashval, erofs_ilookup_test_actor, 333 333 erofs_iget_set_actor, &nid); 334 - } 335 - 336 - struct inode *erofs_iget(struct super_block *sb, 337 - erofs_nid_t nid, 338 - bool isdir) 339 - { 340 - struct inode *inode = erofs_iget_locked(sb, nid); 341 - 342 334 if (!inode) 343 335 return ERR_PTR(-ENOMEM); 344 336 ··· 340 348 341 349 vi->nid = nid; 342 350 343 - err = erofs_fill_inode(inode, isdir); 344 - if (!err) 351 + err = erofs_fill_inode(inode); 352 + if (!err) { 345 353 unlock_new_inode(inode); 346 - else { 354 + } else { 347 355 iget_failed(inode); 348 356 inode = ERR_PTR(err); 349 357 }

+41 -16

fs/erofs/internal.h

··· 76 76 #endif 77 77 unsigned int mount_opt; 78 78 char *fsid; 79 + char *domain_id; 79 80 }; 80 81 81 82 struct erofs_dev_context { ··· 99 98 u16 max_pclusterblks; 100 99 }; 101 100 101 + struct erofs_domain { 102 + refcount_t ref; 103 + struct list_head list; 104 + struct fscache_volume *volume; 105 + char *domain_id; 106 + }; 107 + 102 108 struct erofs_fscache { 103 109 struct fscache_cookie *cookie; 104 110 struct inode *inode; 111 + struct inode *anon_inode; 112 + struct erofs_domain *domain; 113 + char *name; 105 114 }; 106 115 107 116 struct erofs_sb_info { ··· 131 120 struct inode *managed_cache; 132 121 133 122 struct erofs_sb_lz4_info lz4; 123 + struct inode *packed_inode; 134 124 #endif /* CONFIG_EROFS_FS_ZIP */ 135 125 struct erofs_dev_context *devs; 136 126 struct dax_device *dax_dev; ··· 169 157 /* fscache support */ 170 158 struct fscache_volume *volume; 171 159 struct erofs_fscache *s_fscache; 160 + struct erofs_domain *domain; 172 161 }; 173 162 174 163 #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) ··· 196 183 EROFS_ZIP_CACHE_READAROUND 197 184 }; 198 185 199 - #ifdef CONFIG_EROFS_FS_ZIP 200 186 #define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) 201 187 202 188 /* basic unit of the workstation of a super_block */ ··· 235 223 return atomic_cond_read_relaxed(&grp->refcount, 236 224 VAL != EROFS_LOCKED_MAGIC); 237 225 } 238 - #endif /* !CONFIG_EROFS_FS_ZIP */ 239 226 240 227 /* we strictly follow PAGE_SIZE and no buffer head yet */ 241 228 #define LOG_BLOCK_SIZE PAGE_SHIFT ··· 288 277 EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) 289 278 EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2) 290 279 EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) 280 + EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) 281 + EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) 291 282 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) 292 283 293 284 /* atomic flag definitions */ ··· 325 312 unsigned char z_algorithmtype[2]; 326 313 unsigned char z_logical_clusterbits; 327 314 unsigned long z_tailextent_headlcn; 328 - erofs_off_t z_idataoff; 329 - unsigned short z_idata_size; 315 + union { 316 + struct { 317 + erofs_off_t z_idataoff; 318 + unsigned short z_idata_size; 319 + }; 320 + erofs_off_t z_fragmentoff; 321 + }; 330 322 }; 331 323 #endif /* CONFIG_EROFS_FS_ZIP */ 332 324 }; ··· 382 364 } 383 365 384 366 extern const struct super_operations erofs_sops; 367 + extern struct file_system_type erofs_fs_type; 385 368 386 369 extern const struct address_space_operations erofs_raw_access_aops; 387 370 extern const struct address_space_operations z_erofs_aops; ··· 390 371 enum { 391 372 BH_Encoded = BH_PrivateStart, 392 373 BH_FullMapped, 374 + BH_Fragment, 375 + BH_Partialref, 393 376 }; 394 377 395 378 /* Has a disk mapping */ ··· 402 381 #define EROFS_MAP_ENCODED (1 << BH_Encoded) 403 382 /* The length of extent is full */ 404 383 #define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) 384 + /* Located in the special packed inode */ 385 + #define EROFS_MAP_FRAGMENT (1 << BH_Fragment) 386 + /* The extent refers to partial decompressed data */ 387 + #define EROFS_MAP_PARTIAL_REF (1 << BH_Partialref) 405 388 406 389 struct erofs_map_blocks { 407 390 struct erofs_buf buf; ··· 427 402 #define EROFS_GET_BLOCKS_FIEMAP 0x0002 428 403 /* Used to map the whole extent if non-negligible data is requested for LZMA */ 429 404 #define EROFS_GET_BLOCKS_READMORE 0x0004 430 - /* Used to map tail extent for tailpacking inline pcluster */ 405 + /* Used to map tail extent for tailpacking inline or fragment pcluster */ 431 406 #define EROFS_GET_BLOCKS_FINDTAIL 0x0008 432 407 433 408 enum { 434 409 Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, 410 + Z_EROFS_COMPRESSION_INTERLACED, 435 411 Z_EROFS_COMPRESSION_RUNTIME_MAX 436 412 }; 437 413 ··· 492 466 extern const struct inode_operations erofs_symlink_iops; 493 467 extern const struct inode_operations erofs_fast_symlink_iops; 494 468 495 - struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir); 469 + struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); 496 470 int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, 497 471 struct kstat *stat, u32 request_mask, 498 472 unsigned int query_flags); ··· 607 581 int erofs_fscache_register_fs(struct super_block *sb); 608 582 void erofs_fscache_unregister_fs(struct super_block *sb); 609 583 610 - int erofs_fscache_register_cookie(struct super_block *sb, 611 - struct erofs_fscache **fscache, 612 - char *name, bool need_inode); 613 - void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache); 584 + struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 585 + char *name, bool need_inode); 586 + void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); 614 587 615 588 extern const struct address_space_operations erofs_fscache_access_aops; 616 589 #else 617 590 static inline int erofs_fscache_register_fs(struct super_block *sb) 618 591 { 619 - return 0; 592 + return -EOPNOTSUPP; 620 593 } 621 594 static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} 622 595 623 - static inline int erofs_fscache_register_cookie(struct super_block *sb, 624 - struct erofs_fscache **fscache, 625 - char *name, bool need_inode) 596 + static inline 597 + struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 598 + char *name, bool need_inode) 626 599 { 627 - return -EOPNOTSUPP; 600 + return ERR_PTR(-EOPNOTSUPP); 628 601 } 629 602 630 - static inline void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) 603 + static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache) 631 604 { 632 605 } 633 606 #endif

+2 -11

fs/erofs/namei.c

··· 185 185 if (IS_ERR(de)) 186 186 return PTR_ERR(de); 187 187 188 - /* the target page has been mapped */ 189 188 if (ndirents) 190 189 de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents); 191 190 ··· 196 197 return PTR_ERR_OR_ZERO(de); 197 198 } 198 199 199 - /* NOTE: i_mutex is already held by vfs */ 200 - static struct dentry *erofs_lookup(struct inode *dir, 201 - struct dentry *dentry, 200 + static struct dentry *erofs_lookup(struct inode *dir, struct dentry *dentry, 202 201 unsigned int flags) 203 202 { 204 203 int err; ··· 204 207 unsigned int d_type; 205 208 struct inode *inode; 206 209 207 - DBG_BUGON(!d_really_is_negative(dentry)); 208 - /* dentry must be unhashed in lookup, no need to worry about */ 209 - DBG_BUGON(!d_unhashed(dentry)); 210 - 211 210 trace_erofs_lookup(dir, dentry, flags); 212 211 213 - /* file name exceeds fs limit */ 214 212 if (dentry->d_name.len > EROFS_NAME_LEN) 215 213 return ERR_PTR(-ENAMETOOLONG); 216 214 217 - /* false uninitialized warnings on gcc 4.8.x */ 218 215 err = erofs_namei(dir, &dentry->d_name, &nid, &d_type); 219 216 220 217 if (err == -ENOENT) { ··· 219 228 } else { 220 229 erofs_dbg("%s, %pd (nid %llu) found, d_type %u", __func__, 221 230 dentry, nid, d_type); 222 - inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR); 231 + inode = erofs_iget(dir->i_sb, nid); 223 232 } 224 233 return d_splice_alias(inode, dentry); 225 234 }

+77 -19

fs/erofs/super.c

··· 224 224 struct erofs_device_info *dif, erofs_off_t *pos) 225 225 { 226 226 struct erofs_sb_info *sbi = EROFS_SB(sb); 227 + struct erofs_fscache *fscache; 227 228 struct erofs_deviceslot *dis; 228 229 struct block_device *bdev; 229 230 void *ptr; 230 - int ret; 231 231 232 232 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); 233 233 if (IS_ERR(ptr)) ··· 245 245 } 246 246 247 247 if (erofs_is_fscache_mode(sb)) { 248 - ret = erofs_fscache_register_cookie(sb, &dif->fscache, 249 - dif->path, false); 250 - if (ret) 251 - return ret; 248 + fscache = erofs_fscache_register_cookie(sb, dif->path, false); 249 + if (IS_ERR(fscache)) 250 + return PTR_ERR(fscache); 251 + dif->fscache = fscache; 252 252 } else { 253 253 bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, 254 254 sb->s_type); ··· 381 381 #endif 382 382 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 383 383 sbi->root_nid = le16_to_cpu(dsb->root_nid); 384 + #ifdef CONFIG_EROFS_FS_ZIP 385 + sbi->packed_inode = NULL; 386 + if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) { 387 + sbi->packed_inode = 388 + erofs_iget(sb, le64_to_cpu(dsb->packed_nid)); 389 + if (IS_ERR(sbi->packed_inode)) { 390 + ret = PTR_ERR(sbi->packed_inode); 391 + goto out; 392 + } 393 + } 394 + #endif 384 395 sbi->inos = le64_to_cpu(dsb->inos); 385 396 386 397 sbi->build_time = le64_to_cpu(dsb->build_time); ··· 422 411 erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); 423 412 if (erofs_is_fscache_mode(sb)) 424 413 erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); 414 + if (erofs_sb_has_fragments(sbi)) 415 + erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); 416 + if (erofs_sb_has_dedupe(sbi)) 417 + erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!"); 425 418 out: 426 419 erofs_put_metabuf(&buf); 427 420 return ret; ··· 455 440 Opt_dax_enum, 456 441 Opt_device, 457 442 Opt_fsid, 443 + Opt_domain_id, 458 444 Opt_err 459 445 }; 460 446 ··· 481 465 fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), 482 466 fsparam_string("device", Opt_device), 483 467 fsparam_string("fsid", Opt_fsid), 468 + fsparam_string("domain_id", Opt_domain_id), 484 469 {} 485 470 }; 486 471 ··· 587 570 errorfc(fc, "fsid option not supported"); 588 571 #endif 589 572 break; 573 + case Opt_domain_id: 574 + #ifdef CONFIG_EROFS_FS_ONDEMAND 575 + kfree(ctx->opt.domain_id); 576 + ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL); 577 + if (!ctx->opt.domain_id) 578 + return -ENOMEM; 579 + #else 580 + errorfc(fc, "domain_id option not supported"); 581 + #endif 582 + break; 590 583 default: 591 584 return -ENOPARAM; 592 585 } ··· 668 641 static struct inode *erofs_nfs_get_inode(struct super_block *sb, 669 642 u64 ino, u32 generation) 670 643 { 671 - return erofs_iget(sb, ino, false); 644 + return erofs_iget(sb, ino); 672 645 } 673 646 674 647 static struct dentry *erofs_fh_to_dentry(struct super_block *sb, ··· 694 667 err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); 695 668 if (err) 696 669 return ERR_PTR(err); 697 - return d_obtain_alias(erofs_iget(child->d_sb, nid, d_type == FT_DIR)); 670 + return d_obtain_alias(erofs_iget(child->d_sb, nid)); 698 671 } 699 672 700 673 static const struct export_operations erofs_export_ops = { ··· 702 675 .fh_to_parent = erofs_fh_to_parent, 703 676 .get_parent = erofs_get_parent, 704 677 }; 678 + 679 + static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) 680 + { 681 + static const struct tree_descr empty_descr = {""}; 682 + 683 + return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); 684 + } 705 685 706 686 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) 707 687 { ··· 729 695 sb->s_fs_info = sbi; 730 696 sbi->opt = ctx->opt; 731 697 ctx->opt.fsid = NULL; 698 + ctx->opt.domain_id = NULL; 732 699 sbi->devs = ctx->devs; 733 700 ctx->devs = NULL; 734 701 ··· 738 703 sb->s_blocksize_bits = LOG_BLOCK_SIZE; 739 704 740 705 err = erofs_fscache_register_fs(sb); 741 - if (err) 742 - return err; 743 - 744 - err = erofs_fscache_register_cookie(sb, &sbi->s_fscache, 745 - sbi->opt.fsid, true); 746 706 if (err) 747 707 return err; 748 708 ··· 782 752 #endif 783 753 784 754 /* get the root inode */ 785 - inode = erofs_iget(sb, ROOT_NID(sbi), true); 755 + inode = erofs_iget(sb, ROOT_NID(sbi)); 786 756 if (IS_ERR(inode)) 787 757 return PTR_ERR(inode); 788 758 ··· 809 779 810 780 erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); 811 781 return 0; 782 + } 783 + 784 + static int erofs_fc_anon_get_tree(struct fs_context *fc) 785 + { 786 + return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); 812 787 } 813 788 814 789 static int erofs_fc_get_tree(struct fs_context *fc) ··· 852 817 fs_put_dax(dif->dax_dev, NULL); 853 818 if (dif->bdev) 854 819 blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); 855 - erofs_fscache_unregister_cookie(&dif->fscache); 820 + erofs_fscache_unregister_cookie(dif->fscache); 821 + dif->fscache = NULL; 856 822 kfree(dif->path); 857 823 kfree(dif); 858 824 return 0; ··· 874 838 875 839 erofs_free_dev_context(ctx->devs); 876 840 kfree(ctx->opt.fsid); 841 + kfree(ctx->opt.domain_id); 877 842 kfree(ctx); 878 843 } 879 844 ··· 885 848 .free = erofs_fc_free, 886 849 }; 887 850 851 + static const struct fs_context_operations erofs_anon_context_ops = { 852 + .get_tree = erofs_fc_anon_get_tree, 853 + }; 854 + 888 855 static int erofs_init_fs_context(struct fs_context *fc) 889 856 { 890 - struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 857 + struct erofs_fs_context *ctx; 891 858 859 + /* pseudo mount for anon inodes */ 860 + if (fc->sb_flags & SB_KERNMOUNT) { 861 + fc->ops = &erofs_anon_context_ops; 862 + return 0; 863 + } 864 + 865 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 892 866 if (!ctx) 893 867 return -ENOMEM; 894 868 ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); ··· 926 878 927 879 WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); 928 880 881 + /* pseudo mount for anon inodes */ 882 + if (sb->s_flags & SB_KERNMOUNT) { 883 + kill_anon_super(sb); 884 + return; 885 + } 886 + 929 887 if (erofs_is_fscache_mode(sb)) 930 - generic_shutdown_super(sb); 888 + kill_anon_super(sb); 931 889 else 932 890 kill_block_super(sb); 933 891 ··· 943 889 944 890 erofs_free_dev_context(sbi->devs); 945 891 fs_put_dax(sbi->dax_dev, NULL); 946 - erofs_fscache_unregister_cookie(&sbi->s_fscache); 947 892 erofs_fscache_unregister_fs(sb); 948 893 kfree(sbi->opt.fsid); 894 + kfree(sbi->opt.domain_id); 949 895 kfree(sbi); 950 896 sb->s_fs_info = NULL; 951 897 } ··· 962 908 #ifdef CONFIG_EROFS_FS_ZIP 963 909 iput(sbi->managed_cache); 964 910 sbi->managed_cache = NULL; 911 + iput(sbi->packed_inode); 912 + sbi->packed_inode = NULL; 965 913 #endif 966 - erofs_fscache_unregister_cookie(&sbi->s_fscache); 914 + erofs_fscache_unregister_fs(sb); 967 915 } 968 916 969 - static struct file_system_type erofs_fs_type = { 917 + struct file_system_type erofs_fs_type = { 970 918 .owner = THIS_MODULE, 971 919 .name = "erofs", 972 920 .init_fs_context = erofs_init_fs_context, ··· 1100 1044 #ifdef CONFIG_EROFS_FS_ONDEMAND 1101 1045 if (opt->fsid) 1102 1046 seq_printf(seq, ",fsid=%s", opt->fsid); 1047 + if (opt->domain_id) 1048 + seq_printf(seq, ",domain_id=%s", opt->domain_id); 1103 1049 #endif 1104 1050 return 0; 1105 1051 }

+21 -2

fs/erofs/sysfs.c

··· 76 76 EROFS_ATTR_FEATURE(compr_head2); 77 77 EROFS_ATTR_FEATURE(sb_chksum); 78 78 EROFS_ATTR_FEATURE(ztailpacking); 79 + EROFS_ATTR_FEATURE(fragments); 80 + EROFS_ATTR_FEATURE(dedupe); 79 81 80 82 static struct attribute *erofs_feat_attrs[] = { 81 83 ATTR_LIST(zero_padding), ··· 88 86 ATTR_LIST(compr_head2), 89 87 ATTR_LIST(sb_chksum), 90 88 ATTR_LIST(ztailpacking), 89 + ATTR_LIST(fragments), 90 + ATTR_LIST(dedupe), 91 91 NULL, 92 92 }; 93 93 ATTRIBUTE_GROUPS(erofs_feat); ··· 205 201 int erofs_register_sysfs(struct super_block *sb) 206 202 { 207 203 struct erofs_sb_info *sbi = EROFS_SB(sb); 204 + char *name; 205 + char *str = NULL; 208 206 int err; 209 207 208 + if (erofs_is_fscache_mode(sb)) { 209 + if (sbi->opt.domain_id) { 210 + str = kasprintf(GFP_KERNEL, "%s,%s", sbi->opt.domain_id, 211 + sbi->opt.fsid); 212 + if (!str) 213 + return -ENOMEM; 214 + name = str; 215 + } else { 216 + name = sbi->opt.fsid; 217 + } 218 + } else { 219 + name = sb->s_id; 220 + } 210 221 sbi->s_kobj.kset = &erofs_root; 211 222 init_completion(&sbi->s_kobj_unregister); 212 - err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", 213 - erofs_is_fscache_mode(sb) ? sbi->opt.fsid : sb->s_id); 223 + err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", name); 224 + kfree(str); 214 225 if (err) 215 226 goto put_sb_kobj; 216 227 return 0;

-2

fs/erofs/xattr.h

··· 39 39 #ifdef CONFIG_EROFS_FS_XATTR 40 40 extern const struct xattr_handler erofs_xattr_user_handler; 41 41 extern const struct xattr_handler erofs_xattr_trusted_handler; 42 - #ifdef CONFIG_EROFS_FS_SECURITY 43 42 extern const struct xattr_handler erofs_xattr_security_handler; 44 - #endif 45 43 46 44 static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx) 47 45 {

+50 -1

fs/erofs/zdata.c

··· 650 650 la < fe->headoffset; 651 651 } 652 652 653 + static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, 654 + struct page *page, unsigned int pageofs, 655 + unsigned int len) 656 + { 657 + struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode; 658 + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 659 + u8 *src, *dst; 660 + unsigned int i, cnt; 661 + 662 + pos += EROFS_I(inode)->z_fragmentoff; 663 + for (i = 0; i < len; i += cnt) { 664 + cnt = min_t(unsigned int, len - i, 665 + EROFS_BLKSIZ - erofs_blkoff(pos)); 666 + src = erofs_bread(&buf, packed_inode, 667 + erofs_blknr(pos), EROFS_KMAP); 668 + if (IS_ERR(src)) { 669 + erofs_put_metabuf(&buf); 670 + return PTR_ERR(src); 671 + } 672 + 673 + dst = kmap_local_page(page); 674 + memcpy(dst + pageofs + i, src + erofs_blkoff(pos), cnt); 675 + kunmap_local(dst); 676 + pos += cnt; 677 + } 678 + erofs_put_metabuf(&buf); 679 + return 0; 680 + } 681 + 653 682 static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, 654 683 struct page *page, struct page **pagepool) 655 684 { ··· 717 688 /* didn't get a valid pcluster previously (very rare) */ 718 689 } 719 690 720 - if (!(map->m_flags & EROFS_MAP_MAPPED)) 691 + if (!(map->m_flags & EROFS_MAP_MAPPED) || 692 + map->m_flags & EROFS_MAP_FRAGMENT) 721 693 goto hitted; 722 694 723 695 err = z_erofs_collector_begin(fe); ··· 765 735 zero_user_segment(page, cur, end); 766 736 goto next_part; 767 737 } 738 + if (map->m_flags & EROFS_MAP_FRAGMENT) { 739 + unsigned int pageofs, skip, len; 740 + 741 + if (offset > map->m_la) { 742 + pageofs = 0; 743 + skip = offset - map->m_la; 744 + } else { 745 + pageofs = map->m_la & ~PAGE_MASK; 746 + skip = 0; 747 + } 748 + len = min_t(unsigned int, map->m_llen - skip, end - cur); 749 + err = z_erofs_read_fragment(inode, skip, page, pageofs, len); 750 + if (err) 751 + goto out; 752 + ++spiltted; 753 + tight = false; 754 + goto next_part; 755 + } 768 756 769 757 exclusive = (!cur && (!spiltted || tight)); 770 758 if (cur) ··· 814 766 fe->pcl->multibases = true; 815 767 816 768 if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && 769 + !(map->m_flags & EROFS_MAP_PARTIAL_REF) && 817 770 fe->pcl->length == map->m_llen) 818 771 fe->pcl->partial = false; 819 772 if (fe->pcl->length < offset + end - map->m_la) {

+70 -34

fs/erofs/zmap.c

··· 17 17 struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); 18 18 19 19 if (!erofs_sb_has_big_pcluster(sbi) && 20 - !erofs_sb_has_ztailpacking(sbi) && 20 + !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) && 21 21 vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { 22 22 vi->z_advise = 0; 23 23 vi->z_algorithmtype[0] = 0; ··· 55 55 if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) 56 56 goto out_unlock; 57 57 58 - DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && 59 - !erofs_sb_has_ztailpacking(EROFS_SB(sb)) && 60 - vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); 61 - 62 58 pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + 63 59 vi->xattr_isize, 8); 64 60 kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), ··· 65 69 } 66 70 67 71 h = kaddr + erofs_blkoff(pos); 72 + /* 73 + * if the highest bit of the 8-byte map header is set, the whole file 74 + * is stored in the packed inode. The rest bits keeps z_fragmentoff. 75 + */ 76 + if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) { 77 + vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; 78 + vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); 79 + vi->z_tailextent_headlcn = 0; 80 + goto unmap_done; 81 + } 68 82 vi->z_advise = le16_to_cpu(h->h_advise); 69 83 vi->z_algorithmtype[0] = h->h_algorithmtype & 15; 70 84 vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; ··· 129 123 if (err < 0) 130 124 goto out_unlock; 131 125 } 126 + 127 + if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && 128 + !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) { 129 + struct erofs_map_blocks map = { 130 + .buf = __EROFS_BUF_INITIALIZER 131 + }; 132 + 133 + vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); 134 + err = z_erofs_do_map_blocks(inode, &map, 135 + EROFS_GET_BLOCKS_FINDTAIL); 136 + erofs_put_metabuf(&map.buf); 137 + if (err < 0) 138 + goto out_unlock; 139 + } 132 140 /* paired with smp_mb() at the beginning of the function */ 133 141 smp_mb(); 134 142 set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); ··· 163 143 u16 delta[2]; 164 144 erofs_blk_t pblk, compressedblks; 165 145 erofs_off_t nextpackoff; 146 + bool partialref; 166 147 }; 167 - 168 - static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, 169 - erofs_blk_t eblk) 170 - { 171 - struct super_block *const sb = m->inode->i_sb; 172 - 173 - m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk, 174 - EROFS_KMAP_ATOMIC); 175 - if (IS_ERR(m->kaddr)) 176 - return PTR_ERR(m->kaddr); 177 - return 0; 178 - } 179 148 180 149 static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, 181 150 unsigned long lcn) ··· 178 169 lcn * sizeof(struct z_erofs_vle_decompressed_index); 179 170 struct z_erofs_vle_decompressed_index *di; 180 171 unsigned int advise, type; 181 - int err; 182 172 183 - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); 184 - if (err) 185 - return err; 173 + m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, 174 + erofs_blknr(pos), EROFS_KMAP_ATOMIC); 175 + if (IS_ERR(m->kaddr)) 176 + return PTR_ERR(m->kaddr); 186 177 187 178 m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index); 188 179 m->lcn = lcn; ··· 210 201 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: 211 202 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: 212 203 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: 204 + if (advise & Z_EROFS_VLE_DI_PARTIAL_REF) 205 + m->partialref = true; 213 206 m->clusterofs = le16_to_cpu(di->di_clusterofs); 214 207 m->pblk = le32_to_cpu(di->di_u.blkaddr); 215 208 break; ··· 381 370 unsigned int compacted_4b_initial, compacted_2b; 382 371 unsigned int amortizedshift; 383 372 erofs_off_t pos; 384 - int err; 385 373 386 374 if (lclusterbits != 12) 387 375 return -EOPNOTSUPP; ··· 417 407 amortizedshift = 2; 418 408 out: 419 409 pos += lcn * (1 << amortizedshift); 420 - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); 421 - if (err) 422 - return err; 410 + m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, 411 + erofs_blknr(pos), EROFS_KMAP_ATOMIC); 412 + if (IS_ERR(m->kaddr)) 413 + return PTR_ERR(m->kaddr); 423 414 return unpack_compacted_index(m, amortizedshift, pos, lookahead); 424 415 } 425 416 ··· 609 598 { 610 599 struct erofs_inode *const vi = EROFS_I(inode); 611 600 bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; 601 + bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; 612 602 struct z_erofs_maprecorder m = { 613 603 .inode = inode, 614 604 .map = map, ··· 675 663 err = -EOPNOTSUPP; 676 664 goto unmap_out; 677 665 } 678 - 666 + if (m.partialref) 667 + map->m_flags |= EROFS_MAP_PARTIAL_REF; 679 668 map->m_llen = end - map->m_la; 680 669 681 - if (flags & EROFS_GET_BLOCKS_FINDTAIL) 670 + if (flags & EROFS_GET_BLOCKS_FINDTAIL) { 682 671 vi->z_tailextent_headlcn = m.lcn; 672 + /* for non-compact indexes, fragmentoff is 64 bits */ 673 + if (fragment && 674 + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) 675 + vi->z_fragmentoff |= (u64)m.pblk << 32; 676 + } 683 677 if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { 684 678 map->m_flags |= EROFS_MAP_META; 685 679 map->m_pa = vi->z_idataoff; 686 680 map->m_plen = vi->z_idata_size; 681 + } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { 682 + map->m_flags |= EROFS_MAP_FRAGMENT; 687 683 } else { 688 684 map->m_pa = blknr_to_addr(m.pblk); 689 685 err = z_erofs_get_extent_compressedlen(&m, initial_lcn); ··· 699 679 goto out; 700 680 } 701 681 702 - if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) 703 - map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; 704 - else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) 682 + if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) { 683 + if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) 684 + map->m_algorithmformat = 685 + Z_EROFS_COMPRESSION_INTERLACED; 686 + else 687 + map->m_algorithmformat = 688 + Z_EROFS_COMPRESSION_SHIFTED; 689 + } else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) { 705 690 map->m_algorithmformat = vi->z_algorithmtype[1]; 706 - else 691 + } else { 707 692 map->m_algorithmformat = vi->z_algorithmtype[0]; 693 + } 708 694 709 695 if ((flags & EROFS_GET_BLOCKS_FIEMAP) || 710 696 ((flags & EROFS_GET_BLOCKS_READMORE) && ··· 731 705 return err; 732 706 } 733 707 734 - int z_erofs_map_blocks_iter(struct inode *inode, 735 - struct erofs_map_blocks *map, 708 + int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, 736 709 int flags) 737 710 { 711 + struct erofs_inode *const vi = EROFS_I(inode); 738 712 int err = 0; 739 713 740 714 trace_z_erofs_map_blocks_iter_enter(inode, map, flags); ··· 750 724 err = z_erofs_fill_inode_lazy(inode); 751 725 if (err) 752 726 goto out; 727 + 728 + if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) && 729 + !vi->z_tailextent_headlcn) { 730 + map->m_la = 0; 731 + map->m_llen = inode->i_size; 732 + map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | 733 + EROFS_MAP_FRAGMENT; 734 + goto out; 735 + } 753 736 754 737 err = z_erofs_do_map_blocks(inode, map, flags); 755 738 out: ··· 786 751 iomap->length = map.m_llen; 787 752 if (map.m_flags & EROFS_MAP_MAPPED) { 788 753 iomap->type = IOMAP_MAPPED; 789 - iomap->addr = map.m_pa; 754 + iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? 755 + IOMAP_NULL_ADDR : map.m_pa; 790 756 } else { 791 757 iomap->type = IOMAP_HOLE; 792 758 iomap->addr = IOMAP_NULL_ADDR;

+4 -7

include/trace/events/erofs.h

··· 53 53 ); 54 54 55 55 TRACE_EVENT(erofs_fill_inode, 56 - TP_PROTO(struct inode *inode, int isdir), 57 - TP_ARGS(inode, isdir), 56 + TP_PROTO(struct inode *inode), 57 + TP_ARGS(inode), 58 58 59 59 TP_STRUCT__entry( 60 60 __field(dev_t, dev ) 61 61 __field(erofs_nid_t, nid ) 62 62 __field(erofs_blk_t, blkaddr ) 63 63 __field(unsigned int, ofs ) 64 - __field(int, isdir ) 65 64 ), 66 65 67 66 TP_fast_assign( ··· 68 69 __entry->nid = EROFS_I(inode)->nid; 69 70 __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid)); 70 71 __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid)); 71 - __entry->isdir = isdir; 72 72 ), 73 73 74 - TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d", 74 + TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u", 75 75 show_dev_nid(__entry), 76 - __entry->blkaddr, __entry->ofs, 77 - __entry->isdir) 76 + __entry->blkaddr, __entry->ofs) 78 77 ); 79 78 80 79 TRACE_EVENT(erofs_readpage,

Configure Feed

Configure Feed