Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.9-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
"This contains a few small fixes. This comes with some delay because I
wanted to wait on people running their reproducers and the Easter
Holidays meant that those replies came in a little later than usual:

- Fix handling of preventing writes to mounted block devices.

Since last kernel we allow to prevent writing to mounted block
devices provided CONFIG_BLK_DEV_WRITE_MOUNTED isn't set and the
block device is opened with restricted writes. When we switched to
opening block devices as files we altered the mechanism by which we
recognize when a block device has been opened with write
restrictions.

The detection logic assumed that only read-write mounted
filesystems would apply write restrictions to their block devices
from other openers. That of course is not true since it also makes
sense to apply write restrictions for filesystems that are
read-only.

Fix the detection logic using an FMODE_* bit. We still have a few
left since we freed up a couple a while ago. I also picked up a
patch to free up four additional FMODE_* bits scheduled for the
next merge window.

- Fix counting the number of writers to a block device. This just
changes the logic to be consistent.

- Fix a bug in aio causing a NULL pointer derefernce after we
implemented batched processing in aio.

- Finally, add the changes we discussed that allows to yield block
devices early even though file closing itself is deferred.

This also allows us to remove two holder operations to get and
release the holder to align lifetime of file and holder of the
block device"

* tag 'vfs-6.9-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
aio: Fix null ptr deref in aio_complete() wakeup
fs,block: yield devices early
block: count BLK_OPEN_RESTRICT_WRITES openers
block: handle BLK_OPEN_RESTRICT_WRITES correctly

+89 -66
+66 -18
block/bdev.c
··· 583 583 mutex_unlock(&bdev->bd_holder_lock); 584 584 bd_clear_claiming(whole, holder); 585 585 mutex_unlock(&bdev_lock); 586 - 587 - if (hops && hops->get_holder) 588 - hops->get_holder(holder); 589 586 } 590 587 591 588 /** ··· 605 608 static void bd_end_claim(struct block_device *bdev, void *holder) 606 609 { 607 610 struct block_device *whole = bdev_whole(bdev); 608 - const struct blk_holder_ops *hops = bdev->bd_holder_ops; 609 611 bool unblock = false; 610 612 611 613 /* ··· 626 630 if (!whole->bd_holders) 627 631 whole->bd_holder = NULL; 628 632 mutex_unlock(&bdev_lock); 629 - 630 - if (hops && hops->put_holder) 631 - hops->put_holder(holder); 632 633 633 634 /* 634 635 * If this was the last claim, remove holder link and unblock evpoll if ··· 769 776 770 777 static bool bdev_writes_blocked(struct block_device *bdev) 771 778 { 772 - return bdev->bd_writers == -1; 779 + return bdev->bd_writers < 0; 773 780 } 774 781 775 782 static void bdev_block_writes(struct block_device *bdev) 776 783 { 777 - bdev->bd_writers = -1; 784 + bdev->bd_writers--; 778 785 } 779 786 780 787 static void bdev_unblock_writes(struct block_device *bdev) 781 788 { 782 - bdev->bd_writers = 0; 789 + bdev->bd_writers++; 783 790 } 784 791 785 792 static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode) ··· 806 813 bdev->bd_writers++; 807 814 } 808 815 816 + static inline bool bdev_unclaimed(const struct file *bdev_file) 817 + { 818 + return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host); 819 + } 820 + 809 821 static void bdev_yield_write_access(struct file *bdev_file) 810 822 { 811 823 struct block_device *bdev; ··· 818 820 if (bdev_allow_write_mounted) 819 821 return; 820 822 823 + if (bdev_unclaimed(bdev_file)) 824 + return; 825 + 821 826 bdev = file_bdev(bdev_file); 822 - /* Yield exclusive or shared write access. */ 823 - if (bdev_file->f_mode & FMODE_WRITE) { 824 - if (bdev_writes_blocked(bdev)) 825 - bdev_unblock_writes(bdev); 826 - else 827 - bdev->bd_writers--; 828 - } 827 + 828 + if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED) 829 + bdev_unblock_writes(bdev); 830 + else if (bdev_file->f_mode & FMODE_WRITE) 831 + bdev->bd_writers--; 829 832 } 830 833 831 834 /** ··· 906 907 bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; 907 908 if (bdev_nowait(bdev)) 908 909 bdev_file->f_mode |= FMODE_NOWAIT; 910 + if (mode & BLK_OPEN_RESTRICT_WRITES) 911 + bdev_file->f_mode |= FMODE_WRITE_RESTRICTED; 909 912 bdev_file->f_mapping = bdev->bd_inode->i_mapping; 910 913 bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); 911 914 bdev_file->private_data = holder; ··· 1013 1012 } 1014 1013 EXPORT_SYMBOL(bdev_file_open_by_path); 1015 1014 1015 + static inline void bd_yield_claim(struct file *bdev_file) 1016 + { 1017 + struct block_device *bdev = file_bdev(bdev_file); 1018 + void *holder = bdev_file->private_data; 1019 + 1020 + lockdep_assert_held(&bdev->bd_disk->open_mutex); 1021 + 1022 + if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder))) 1023 + return; 1024 + 1025 + if (!bdev_unclaimed(bdev_file)) 1026 + bd_end_claim(bdev, holder); 1027 + } 1028 + 1016 1029 void bdev_release(struct file *bdev_file) 1017 1030 { 1018 1031 struct block_device *bdev = file_bdev(bdev_file); ··· 1051 1036 bdev_yield_write_access(bdev_file); 1052 1037 1053 1038 if (holder) 1054 - bd_end_claim(bdev, holder); 1039 + bd_yield_claim(bdev_file); 1055 1040 1056 1041 /* 1057 1042 * Trigger event checking and tell drivers to flush MEDIA_CHANGE ··· 1070 1055 put_no_open: 1071 1056 blkdev_put_no_open(bdev); 1072 1057 } 1058 + 1059 + /** 1060 + * bdev_fput - yield claim to the block device and put the file 1061 + * @bdev_file: open block device 1062 + * 1063 + * Yield claim on the block device and put the file. Ensure that the 1064 + * block device can be reclaimed before the file is closed which is a 1065 + * deferred operation. 1066 + */ 1067 + void bdev_fput(struct file *bdev_file) 1068 + { 1069 + if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops)) 1070 + return; 1071 + 1072 + if (bdev_file->private_data) { 1073 + struct block_device *bdev = file_bdev(bdev_file); 1074 + struct gendisk *disk = bdev->bd_disk; 1075 + 1076 + mutex_lock(&disk->open_mutex); 1077 + bdev_yield_write_access(bdev_file); 1078 + bd_yield_claim(bdev_file); 1079 + /* 1080 + * Tell release we already gave up our hold on the 1081 + * device and if write restrictions are available that 1082 + * we already gave up write access to the device. 1083 + */ 1084 + bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host); 1085 + mutex_unlock(&disk->open_mutex); 1086 + } 1087 + 1088 + fput(bdev_file); 1089 + } 1090 + EXPORT_SYMBOL(bdev_fput); 1073 1091 1074 1092 /** 1075 1093 * lookup_bdev() - Look up a struct block_device by name.
+1 -1
drivers/mtd/devices/block2mtd.c
··· 209 209 210 210 if (dev->bdev_file) { 211 211 invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1); 212 - fput(dev->bdev_file); 212 + bdev_fput(dev->bdev_file); 213 213 } 214 214 215 215 kfree(dev);
+1 -1
fs/aio.c
··· 1202 1202 spin_lock_irqsave(&ctx->wait.lock, flags); 1203 1203 list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry) 1204 1204 if (avail >= curr->min_nr) { 1205 - list_del_init_careful(&curr->w.entry); 1206 1205 wake_up_process(curr->w.private); 1206 + list_del_init_careful(&curr->w.entry); 1207 1207 } 1208 1208 spin_unlock_irqrestore(&ctx->wait.lock, flags); 1209 1209 }
+1 -1
fs/bcachefs/super-io.c
··· 143 143 { 144 144 kfree(sb->bio); 145 145 if (!IS_ERR_OR_NULL(sb->s_bdev_file)) 146 - fput(sb->s_bdev_file); 146 + bdev_fput(sb->s_bdev_file); 147 147 kfree(sb->holder); 148 148 kfree(sb->sb_name); 149 149
+1 -1
fs/cramfs/inode.c
··· 495 495 sb->s_mtd = NULL; 496 496 } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { 497 497 sync_blockdev(sb->s_bdev); 498 - fput(sb->s_bdev_file); 498 + bdev_fput(sb->s_bdev_file); 499 499 } 500 500 kfree(sbi); 501 501 }
+4 -4
fs/ext4/super.c
··· 5668 5668 brelse(sbi->s_sbh); 5669 5669 if (sbi->s_journal_bdev_file) { 5670 5670 invalidate_bdev(file_bdev(sbi->s_journal_bdev_file)); 5671 - fput(sbi->s_journal_bdev_file); 5671 + bdev_fput(sbi->s_journal_bdev_file); 5672 5672 } 5673 5673 out_fail: 5674 5674 invalidate_bdev(sb->s_bdev); ··· 5913 5913 out_bh: 5914 5914 brelse(bh); 5915 5915 out_bdev: 5916 - fput(bdev_file); 5916 + bdev_fput(bdev_file); 5917 5917 return ERR_PTR(errno); 5918 5918 } 5919 5919 ··· 5952 5952 out_journal: 5953 5953 jbd2_journal_destroy(journal); 5954 5954 out_bdev: 5955 - fput(bdev_file); 5955 + bdev_fput(bdev_file); 5956 5956 return ERR_PTR(errno); 5957 5957 } 5958 5958 ··· 7327 7327 kill_block_super(sb); 7328 7328 7329 7329 if (bdev_file) 7330 - fput(bdev_file); 7330 + bdev_fput(bdev_file); 7331 7331 } 7332 7332 7333 7333 static struct file_system_type ext4_fs_type = {
+1 -1
fs/f2fs/super.c
··· 1558 1558 1559 1559 for (i = 0; i < sbi->s_ndevs; i++) { 1560 1560 if (i > 0) 1561 - fput(FDEV(i).bdev_file); 1561 + bdev_fput(FDEV(i).bdev_file); 1562 1562 #ifdef CONFIG_BLK_DEV_ZONED 1563 1563 kvfree(FDEV(i).blkz_seq); 1564 1564 #endif
+2 -2
fs/jfs/jfs_logmgr.c
··· 1141 1141 lbmLogShutdown(log); 1142 1142 1143 1143 close: /* close external log device */ 1144 - fput(bdev_file); 1144 + bdev_fput(bdev_file); 1145 1145 1146 1146 free: /* free log descriptor */ 1147 1147 mutex_unlock(&jfs_log_mutex); ··· 1485 1485 bdev_file = log->bdev_file; 1486 1486 rc = lmLogShutdown(log); 1487 1487 1488 - fput(bdev_file); 1488 + bdev_fput(bdev_file); 1489 1489 1490 1490 kfree(log); 1491 1491
+1 -1
fs/reiserfs/journal.c
··· 2589 2589 static void release_journal_dev(struct reiserfs_journal *journal) 2590 2590 { 2591 2591 if (journal->j_bdev_file) { 2592 - fput(journal->j_bdev_file); 2592 + bdev_fput(journal->j_bdev_file); 2593 2593 journal->j_bdev_file = NULL; 2594 2594 } 2595 2595 }
+1 -1
fs/romfs/super.c
··· 594 594 #ifdef CONFIG_ROMFS_ON_BLOCK 595 595 if (sb->s_bdev) { 596 596 sync_blockdev(sb->s_bdev); 597 - fput(sb->s_bdev_file); 597 + bdev_fput(sb->s_bdev_file); 598 598 } 599 599 #endif 600 600 }
+3 -21
fs/super.c
··· 1515 1515 return error; 1516 1516 } 1517 1517 1518 - static void fs_bdev_super_get(void *data) 1519 - { 1520 - struct super_block *sb = data; 1521 - 1522 - spin_lock(&sb_lock); 1523 - sb->s_count++; 1524 - spin_unlock(&sb_lock); 1525 - } 1526 - 1527 - static void fs_bdev_super_put(void *data) 1528 - { 1529 - struct super_block *sb = data; 1530 - 1531 - put_super(sb); 1532 - } 1533 - 1534 1518 const struct blk_holder_ops fs_holder_ops = { 1535 1519 .mark_dead = fs_bdev_mark_dead, 1536 1520 .sync = fs_bdev_sync, 1537 1521 .freeze = fs_bdev_freeze, 1538 1522 .thaw = fs_bdev_thaw, 1539 - .get_holder = fs_bdev_super_get, 1540 - .put_holder = fs_bdev_super_put, 1541 1523 }; 1542 1524 EXPORT_SYMBOL_GPL(fs_holder_ops); 1543 1525 ··· 1544 1562 * writable from userspace even for a read-only block device. 1545 1563 */ 1546 1564 if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { 1547 - fput(bdev_file); 1565 + bdev_fput(bdev_file); 1548 1566 return -EACCES; 1549 1567 } 1550 1568 ··· 1555 1573 if (atomic_read(&bdev->bd_fsfreeze_count) > 0) { 1556 1574 if (fc) 1557 1575 warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); 1558 - fput(bdev_file); 1576 + bdev_fput(bdev_file); 1559 1577 return -EBUSY; 1560 1578 } 1561 1579 spin_lock(&sb_lock); ··· 1675 1693 generic_shutdown_super(sb); 1676 1694 if (bdev) { 1677 1695 sync_blockdev(bdev); 1678 - fput(sb->s_bdev_file); 1696 + bdev_fput(sb->s_bdev_file); 1679 1697 } 1680 1698 } 1681 1699
+1 -1
fs/xfs/xfs_buf.c
··· 2030 2030 fs_put_dax(btp->bt_daxdev, btp->bt_mount); 2031 2031 /* the main block device is closed by kill_block_super */ 2032 2032 if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev) 2033 - fput(btp->bt_bdev_file); 2033 + bdev_fput(btp->bt_bdev_file); 2034 2034 kfree(btp); 2035 2035 } 2036 2036
+3 -3
fs/xfs/xfs_super.c
··· 485 485 mp->m_logdev_targp = mp->m_ddev_targp; 486 486 /* Handle won't be used, drop it */ 487 487 if (logdev_file) 488 - fput(logdev_file); 488 + bdev_fput(logdev_file); 489 489 } 490 490 491 491 return 0; ··· 497 497 xfs_free_buftarg(mp->m_ddev_targp); 498 498 out_close_rtdev: 499 499 if (rtdev_file) 500 - fput(rtdev_file); 500 + bdev_fput(rtdev_file); 501 501 out_close_logdev: 502 502 if (logdev_file) 503 - fput(logdev_file); 503 + bdev_fput(logdev_file); 504 504 return error; 505 505 } 506 506
+1 -10
include/linux/blkdev.h
··· 1505 1505 * Thaw the file system mounted on the block device. 1506 1506 */ 1507 1507 int (*thaw)(struct block_device *bdev); 1508 - 1509 - /* 1510 - * If needed, get a reference to the holder. 1511 - */ 1512 - void (*get_holder)(void *holder); 1513 - 1514 - /* 1515 - * Release the holder. 1516 - */ 1517 - void (*put_holder)(void *holder); 1518 1508 }; 1519 1509 1520 1510 /* ··· 1575 1585 1576 1586 int bdev_freeze(struct block_device *bdev); 1577 1587 int bdev_thaw(struct block_device *bdev); 1588 + void bdev_fput(struct file *bdev_file); 1578 1589 1579 1590 struct io_comp_batch { 1580 1591 struct request *req_list;
+2
include/linux/fs.h
··· 121 121 #define FMODE_PWRITE ((__force fmode_t)0x10) 122 122 /* File is opened for execution with sys_execve / sys_uselib */ 123 123 #define FMODE_EXEC ((__force fmode_t)0x20) 124 + /* File writes are restricted (block device specific) */ 125 + #define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) 124 126 /* 32bit hashes as llseek() offset (for directories) */ 125 127 #define FMODE_32BITHASH ((__force fmode_t)0x200) 126 128 /* 64bit hashes as llseek() offset (for directories) */