Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'erofs-for-6.13-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs fixes from Gao Xiang:
"The first one fixes a syzbot UAF report caused by a commit introduced
in this cycle, but it also addresses a longstanding memory leak. The
second one resolves a PSI memstall mis-accounting issue.

The remaining patches switch file-backed mounts to use buffered I/Os
by default instead of direct I/Os, since the page cache of underlay
files is typically valid and maybe even dirty. This change also aligns
with the default policy of loopback devices. A mount option has been
added to try to use direct I/Os explicitly.

Summary:

- Fix (pcluster) memory leak and (sbi) UAF after umounting

- Fix a case of PSI memstall mis-accounting

- Use buffered I/Os by default for file-backed mounts"

* tag 'erofs-for-6.13-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: use buffered I/O for file-backed mounts by default
erofs: reference `struct erofs_device_info` for erofs_map_dev
erofs: use `struct erofs_device_info` for the primary device
erofs: add erofs_sb_free() helper
MAINTAINERS: erofs: update Yue Hu's email address
erofs: fix PSI memstall accounting
erofs: fix rare pcluster memory leak after unmounting

+80 -83
+1 -1
MAINTAINERS
··· 8453 8453 EROFS FILE SYSTEM 8454 8454 M: Gao Xiang <xiang@kernel.org> 8455 8455 M: Chao Yu <chao@kernel.org> 8456 - R: Yue Hu <huyue2@coolpad.com> 8456 + R: Yue Hu <zbestahu@gmail.com> 8457 8457 R: Jeffle Xu <jefflexu@linux.alibaba.com> 8458 8458 R: Sandeep Dhavale <dhavale@google.com> 8459 8459 L: linux-erofs@lists.ozlabs.org
+13 -23
fs/erofs/data.c
··· 56 56 57 57 buf->file = NULL; 58 58 if (erofs_is_fileio_mode(sbi)) { 59 - buf->file = sbi->fdev; /* some fs like FUSE needs it */ 59 + buf->file = sbi->dif0.file; /* some fs like FUSE needs it */ 60 60 buf->mapping = buf->file->f_mapping; 61 61 } else if (erofs_is_fscache_mode(sb)) 62 - buf->mapping = sbi->s_fscache->inode->i_mapping; 62 + buf->mapping = sbi->dif0.fscache->inode->i_mapping; 63 63 else 64 64 buf->mapping = sb->s_bdev->bd_mapping; 65 65 } ··· 179 179 } 180 180 181 181 static void erofs_fill_from_devinfo(struct erofs_map_dev *map, 182 - struct erofs_device_info *dif) 182 + struct super_block *sb, struct erofs_device_info *dif) 183 183 { 184 + map->m_sb = sb; 185 + map->m_dif = dif; 184 186 map->m_bdev = NULL; 185 - map->m_fp = NULL; 186 - if (dif->file) { 187 - if (S_ISBLK(file_inode(dif->file)->i_mode)) 188 - map->m_bdev = file_bdev(dif->file); 189 - else 190 - map->m_fp = dif->file; 191 - } 192 - map->m_daxdev = dif->dax_dev; 193 - map->m_dax_part_off = dif->dax_part_off; 194 - map->m_fscache = dif->fscache; 187 + if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode)) 188 + map->m_bdev = file_bdev(dif->file); 195 189 } 196 190 197 191 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) ··· 195 201 erofs_off_t startoff, length; 196 202 int id; 197 203 198 - map->m_bdev = sb->s_bdev; 199 - map->m_daxdev = EROFS_SB(sb)->dax_dev; 200 - map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; 201 - map->m_fscache = EROFS_SB(sb)->s_fscache; 202 - map->m_fp = EROFS_SB(sb)->fdev; 203 - 204 + erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0); 205 + map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */ 204 206 if (map->m_deviceid) { 205 207 down_read(&devs->rwsem); 206 208 dif = idr_find(&devs->tree, map->m_deviceid - 1); ··· 209 219 up_read(&devs->rwsem); 210 220 return 0; 211 221 } 212 - erofs_fill_from_devinfo(map, dif); 222 + erofs_fill_from_devinfo(map, sb, dif); 213 223 up_read(&devs->rwsem); 214 224 } else if (devs->extra_devices && !devs->flatdev) { 215 225 down_read(&devs->rwsem); ··· 222 232 if (map->m_pa >= startoff && 223 233 map->m_pa < startoff + length) { 224 234 map->m_pa -= startoff; 225 - erofs_fill_from_devinfo(map, dif); 235 + erofs_fill_from_devinfo(map, sb, dif); 226 236 break; 227 237 } 228 238 } ··· 292 302 293 303 iomap->offset = map.m_la; 294 304 if (flags & IOMAP_DAX) 295 - iomap->dax_dev = mdev.m_daxdev; 305 + iomap->dax_dev = mdev.m_dif->dax_dev; 296 306 else 297 307 iomap->bdev = mdev.m_bdev; 298 308 iomap->length = map.m_llen; ··· 321 331 iomap->type = IOMAP_MAPPED; 322 332 iomap->addr = mdev.m_pa; 323 333 if (flags & IOMAP_DAX) 324 - iomap->addr += mdev.m_dax_part_off; 334 + iomap->addr += mdev.m_dif->dax_part_off; 325 335 } 326 336 return 0; 327 337 }
+6 -3
fs/erofs/fileio.c
··· 9 9 struct bio_vec bvecs[BIO_MAX_VECS]; 10 10 struct bio bio; 11 11 struct kiocb iocb; 12 + struct super_block *sb; 12 13 }; 13 14 14 15 struct erofs_fileio { ··· 53 52 rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT; 54 53 rq->iocb.ki_ioprio = get_current_ioprio(); 55 54 rq->iocb.ki_complete = erofs_fileio_ki_complete; 56 - rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ? 57 - IOCB_DIRECT : 0; 55 + if (test_opt(&EROFS_SB(rq->sb)->opt, DIRECT_IO) && 56 + rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) 57 + rq->iocb.ki_flags = IOCB_DIRECT; 58 58 iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt, 59 59 rq->bio.bi_iter.bi_size); 60 60 ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); ··· 69 67 GFP_KERNEL | __GFP_NOFAIL); 70 68 71 69 bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ); 72 - rq->iocb.ki_filp = mdev->m_fp; 70 + rq->iocb.ki_filp = mdev->m_dif->file; 71 + rq->sb = mdev->m_sb; 73 72 return rq; 74 73 } 75 74
+5 -5
fs/erofs/fscache.c
··· 198 198 199 199 io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); 200 200 bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); 201 - io->io.private = mdev->m_fscache->cookie; 201 + io->io.private = mdev->m_dif->fscache->cookie; 202 202 io->io.end_io = erofs_fscache_bio_endio; 203 203 refcount_set(&io->io.ref, 1); 204 204 return &io->bio; ··· 316 316 if (!io) 317 317 return -ENOMEM; 318 318 iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); 319 - ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie, 319 + ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie, 320 320 mdev.m_pa + (pos - map.m_la), io); 321 321 erofs_fscache_req_io_put(io); 322 322 ··· 657 657 if (IS_ERR(fscache)) 658 658 return PTR_ERR(fscache); 659 659 660 - sbi->s_fscache = fscache; 660 + sbi->dif0.fscache = fscache; 661 661 return 0; 662 662 } 663 663 ··· 665 665 { 666 666 struct erofs_sb_info *sbi = EROFS_SB(sb); 667 667 668 - erofs_fscache_unregister_cookie(sbi->s_fscache); 668 + erofs_fscache_unregister_cookie(sbi->dif0.fscache); 669 669 670 670 if (sbi->domain) 671 671 erofs_fscache_domain_put(sbi->domain); 672 672 else 673 673 fscache_relinquish_volume(sbi->volume, NULL, false); 674 674 675 - sbi->s_fscache = NULL; 675 + sbi->dif0.fscache = NULL; 676 676 sbi->volume = NULL; 677 677 sbi->domain = NULL; 678 678 }
+5 -10
fs/erofs/internal.h
··· 107 107 }; 108 108 109 109 struct erofs_sb_info { 110 + struct erofs_device_info dif0; 110 111 struct erofs_mount_opts opt; /* options */ 111 112 #ifdef CONFIG_EROFS_FS_ZIP 112 113 /* list for all registered superblocks, mainly for shrinker */ ··· 125 124 126 125 struct erofs_sb_lz4_info lz4; 127 126 #endif /* CONFIG_EROFS_FS_ZIP */ 128 - struct file *fdev; 129 127 struct inode *packed_inode; 130 128 struct erofs_dev_context *devs; 131 - struct dax_device *dax_dev; 132 - u64 dax_part_off; 133 129 u64 total_blocks; 134 - u32 primarydevice_blocks; 135 130 136 131 u32 meta_blkaddr; 137 132 #ifdef CONFIG_EROFS_FS_XATTR ··· 163 166 164 167 /* fscache support */ 165 168 struct fscache_volume *volume; 166 - struct erofs_fscache *s_fscache; 167 169 struct erofs_domain *domain; 168 170 char *fsid; 169 171 char *domain_id; ··· 176 180 #define EROFS_MOUNT_POSIX_ACL 0x00000020 177 181 #define EROFS_MOUNT_DAX_ALWAYS 0x00000040 178 182 #define EROFS_MOUNT_DAX_NEVER 0x00000080 183 + #define EROFS_MOUNT_DIRECT_IO 0x00000100 179 184 180 185 #define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option) 181 186 #define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option) ··· 184 187 185 188 static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi) 186 189 { 187 - return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev; 190 + return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file; 188 191 } 189 192 190 193 static inline bool erofs_is_fscache_mode(struct super_block *sb) ··· 354 357 }; 355 358 356 359 struct erofs_map_dev { 357 - struct erofs_fscache *m_fscache; 360 + struct super_block *m_sb; 361 + struct erofs_device_info *m_dif; 358 362 struct block_device *m_bdev; 359 - struct dax_device *m_daxdev; 360 - struct file *m_fp; 361 - u64 m_dax_part_off; 362 363 363 364 erofs_off_t m_pa; 364 365 unsigned int m_deviceid;
+44 -36
fs/erofs/super.c
··· 203 203 struct erofs_device_info *dif; 204 204 int id, err = 0; 205 205 206 - sbi->total_blocks = sbi->primarydevice_blocks; 206 + sbi->total_blocks = sbi->dif0.blocks; 207 207 if (!erofs_sb_has_device_table(sbi)) 208 208 ondisk_extradevs = 0; 209 209 else ··· 307 307 sbi->sb_size); 308 308 goto out; 309 309 } 310 - sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); 310 + sbi->dif0.blocks = le32_to_cpu(dsb->blocks); 311 311 sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); 312 312 #ifdef CONFIG_EROFS_FS_XATTR 313 313 sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); ··· 364 364 } 365 365 366 366 enum { 367 - Opt_user_xattr, 368 - Opt_acl, 369 - Opt_cache_strategy, 370 - Opt_dax, 371 - Opt_dax_enum, 372 - Opt_device, 373 - Opt_fsid, 374 - Opt_domain_id, 367 + Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, 368 + Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, 375 369 Opt_err 376 370 }; 377 371 ··· 392 398 fsparam_string("device", Opt_device), 393 399 fsparam_string("fsid", Opt_fsid), 394 400 fsparam_string("domain_id", Opt_domain_id), 401 + fsparam_flag_no("directio", Opt_directio), 395 402 {} 396 403 }; 397 404 ··· 506 511 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 507 512 break; 508 513 #endif 514 + case Opt_directio: 515 + #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE 516 + if (result.boolean) 517 + set_opt(&sbi->opt, DIRECT_IO); 518 + else 519 + clear_opt(&sbi->opt, DIRECT_IO); 520 + #else 521 + errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 522 + #endif 523 + break; 509 524 default: 510 525 return -ENOPARAM; 511 526 } ··· 607 602 return -EINVAL; 608 603 } 609 604 610 - sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, 611 - &sbi->dax_part_off, 612 - NULL, NULL); 605 + sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev, 606 + &sbi->dif0.dax_part_off, NULL, NULL); 613 607 } 614 608 615 609 err = erofs_read_superblock(sb); ··· 631 627 } 632 628 633 629 if (test_opt(&sbi->opt, DAX_ALWAYS)) { 634 - if (!sbi->dax_dev) { 630 + if (!sbi->dif0.dax_dev) { 635 631 errorfc(fc, "DAX unsupported by block device. Turning off DAX."); 636 632 clear_opt(&sbi->opt, DAX_ALWAYS); 637 633 } else if (sbi->blkszbits != PAGE_SHIFT) { ··· 707 703 GET_TREE_BDEV_QUIET_LOOKUP : 0); 708 704 #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE 709 705 if (ret == -ENOTBLK) { 706 + struct file *file; 707 + 710 708 if (!fc->source) 711 709 return invalf(fc, "No source specified"); 712 - sbi->fdev = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0); 713 - if (IS_ERR(sbi->fdev)) 714 - return PTR_ERR(sbi->fdev); 710 + file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0); 711 + if (IS_ERR(file)) 712 + return PTR_ERR(file); 713 + sbi->dif0.file = file; 715 714 716 - if (S_ISREG(file_inode(sbi->fdev)->i_mode) && 717 - sbi->fdev->f_mapping->a_ops->read_folio) 715 + if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) && 716 + sbi->dif0.file->f_mapping->a_ops->read_folio) 718 717 return get_tree_nodev(fc, erofs_fc_fill_super); 719 - fput(sbi->fdev); 720 718 } 721 719 #endif 722 720 return ret; ··· 769 763 kfree(devs); 770 764 } 771 765 766 + static void erofs_sb_free(struct erofs_sb_info *sbi) 767 + { 768 + erofs_free_dev_context(sbi->devs); 769 + kfree(sbi->fsid); 770 + kfree(sbi->domain_id); 771 + if (sbi->dif0.file) 772 + fput(sbi->dif0.file); 773 + kfree(sbi); 774 + } 775 + 772 776 static void erofs_fc_free(struct fs_context *fc) 773 777 { 774 778 struct erofs_sb_info *sbi = fc->s_fs_info; 775 779 776 - if (!sbi) 777 - return; 778 - 779 - erofs_free_dev_context(sbi->devs); 780 - kfree(sbi->fsid); 781 - kfree(sbi->domain_id); 782 - kfree(sbi); 780 + if (sbi) /* free here if an error occurs before transferring to sb */ 781 + erofs_sb_free(sbi); 783 782 } 784 783 785 784 static const struct fs_context_operations erofs_context_ops = { ··· 820 809 { 821 810 struct erofs_sb_info *sbi = EROFS_SB(sb); 822 811 823 - if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || sbi->fdev) 812 + if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || 813 + sbi->dif0.file) 824 814 kill_anon_super(sb); 825 815 else 826 816 kill_block_super(sb); 827 - 828 - erofs_free_dev_context(sbi->devs); 829 - fs_put_dax(sbi->dax_dev, NULL); 817 + fs_put_dax(sbi->dif0.dax_dev, NULL); 830 818 erofs_fscache_unregister_fs(sb); 831 - kfree(sbi->fsid); 832 - kfree(sbi->domain_id); 833 - if (sbi->fdev) 834 - fput(sbi->fdev); 835 - kfree(sbi); 819 + erofs_sb_free(sbi); 836 820 sb->s_fs_info = NULL; 837 821 } 838 822 ··· 953 947 seq_puts(seq, ",dax=always"); 954 948 if (test_opt(opt, DAX_NEVER)) 955 949 seq_puts(seq, ",dax=never"); 950 + if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO)) 951 + seq_puts(seq, ",directio"); 956 952 #ifdef CONFIG_EROFS_FS_ONDEMAND 957 953 if (sbi->fsid) 958 954 seq_printf(seq, ",fsid=%s", sbi->fsid);
+2 -2
fs/erofs/zdata.c
··· 1792 1792 erofs_fscache_submit_bio(bio); 1793 1793 else 1794 1794 submit_bio(bio); 1795 - if (memstall) 1796 - psi_memstall_leave(&pflags); 1797 1795 } 1796 + if (memstall) 1797 + psi_memstall_leave(&pflags); 1798 1798 1799 1799 /* 1800 1800 * although background is preferred, no one is pending for submission.
+4 -3
fs/erofs/zutil.c
··· 230 230 struct erofs_sb_info *const sbi = EROFS_SB(sb); 231 231 232 232 mutex_lock(&sbi->umount_mutex); 233 - /* clean up all remaining pclusters in memory */ 234 - z_erofs_shrink_scan(sbi, ~0UL); 235 - 233 + while (!xa_empty(&sbi->managed_pslots)) { 234 + z_erofs_shrink_scan(sbi, ~0UL); 235 + cond_resched(); 236 + } 236 237 spin_lock(&erofs_sb_list_lock); 237 238 list_del(&sbi->list); 238 239 spin_unlock(&erofs_sb_list_lock);