Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.17-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull misc VFS updates from Christian Brauner:
"This contains the usual selections of misc updates for this cycle.

Features:

- Add ext4 IOCB_DONTCACHE support

This refactors the address_space_operations write_begin() and
write_end() callbacks to take const struct kiocb * as their first
argument, allowing IOCB flags such as IOCB_DONTCACHE to propagate
to the filesystem's buffered I/O path.

Ext4 is updated to implement handling of the IOCB_DONTCACHE flag
and advertises support via the FOP_DONTCACHE file operation flag.

Additionally, the i915 driver's shmem write paths are updated to
bypass the legacy write_begin/write_end interface in favor of
directly calling write_iter() with a constructed synchronous kiocb.
Another i915 change replaces a manual write loop with
kernel_write() during GEM shmem object creation.

Cleanups:

- don't duplicate vfs_open() in kernel_file_open()

- proc_fd_getattr(): don't bother with S_ISDIR() check

- fs/ecryptfs: replace snprintf with sysfs_emit in show function

- vfs: Remove unnecessary list_for_each_entry_safe() from
evict_inodes()

- filelock: add new locks_wake_up_waiter() helper

- fs: Remove three arguments from block_write_end()

- VFS: change old_dir and new_dir in struct renamedata to dentrys

- netfs: Remove unused declaration netfs_queue_write_request()

Fixes:

- eventpoll: Fix semi-unbounded recursion

- eventpoll: fix sphinx documentation build warning

- fs/read_write: Fix spelling typo

- fs: annotate data race between poll_schedule_timeout() and
pollwake()

- fs/pipe: set FMODE_NOWAIT in create_pipe_files()

- docs/vfs: update references to i_mutex to i_rwsem

- fs/buffer: remove comment about hard sectorsize

- fs/buffer: remove the min and max limit checks in __getblk_slow()

- fs/libfs: don't assume blocksize <= PAGE_SIZE in
generic_check_addressable

- fs_context: fix parameter name in infofc() macro

- fs: Prevent file descriptor table allocations exceeding INT_MAX"

* tag 'vfs-6.17-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (24 commits)
netfs: Remove unused declaration netfs_queue_write_request()
eventpoll: fix sphinx documentation build warning
ext4: support uncached buffered I/O
mm/pagemap: add write_begin_get_folio() helper function
fs: change write_begin/write_end interface to take struct kiocb *
drm/i915: Refactor shmem_pwrite() to use kiocb and write_iter
drm/i915: Use kernel_write() in shmem object create
eventpoll: Fix semi-unbounded recursion
vfs: Remove unnecessary list_for_each_entry_safe() from evict_inodes()
fs/libfs: don't assume blocksize <= PAGE_SIZE in generic_check_addressable
fs/buffer: remove the min and max limit checks in __getblk_slow()
fs: Prevent file descriptor table allocations exceeding INT_MAX
fs: Remove three arguments from block_write_end()
fs/ecryptfs: replace snprintf with sysfs_emit in show function
fs: annotate suspected data race between poll_schedule_timeout() and pollwake()
docs/vfs: update references to i_mutex to i_rwsem
fs/buffer: remove comment about hard sectorsize
fs_context: fix parameter name in infofc() macro
VFS: change old_dir and new_dir in struct renamedata to dentrys
proc_fd_getattr(): don't bother with S_ISDIR() check
...

+517 -454
+2 -2
Documentation/filesystems/locking.rst
··· 253 253 int (*writepages)(struct address_space *, struct writeback_control *); 254 254 bool (*dirty_folio)(struct address_space *, struct folio *folio); 255 255 void (*readahead)(struct readahead_control *); 256 - int (*write_begin)(struct file *, struct address_space *mapping, 256 + int (*write_begin)(const struct kiocb *, struct address_space *mapping, 257 257 loff_t pos, unsigned len, 258 258 struct folio **foliop, void **fsdata); 259 - int (*write_end)(struct file *, struct address_space *mapping, 259 + int (*write_end)(const struct kiocb *, struct address_space *mapping, 260 260 loff_t pos, unsigned len, unsigned copied, 261 261 struct folio *folio, void *fsdata); 262 262 sector_t (*bmap)(struct address_space *, sector_t);
+6 -5
Documentation/filesystems/vfs.rst
··· 758 758 dirty_folio to write data into the address_space, and 759 759 writepages to writeback data to storage. 760 760 761 - Adding and removing pages to/from an address_space is protected by the 762 - inode's i_mutex. 761 + Removing pages from an address_space requires holding the inode's i_rwsem 762 + exclusively, while adding pages to the address_space requires holding the 763 + inode's i_mapping->invalidate_lock exclusively. 763 764 764 765 When data is written to a page, the PG_Dirty flag should be set. It 765 766 typically remains set until writepages asks for it to be written. This ··· 823 822 int (*writepages)(struct address_space *, struct writeback_control *); 824 823 bool (*dirty_folio)(struct address_space *, struct folio *); 825 824 void (*readahead)(struct readahead_control *); 826 - int (*write_begin)(struct file *, struct address_space *mapping, 825 + int (*write_begin)(const struct kiocb *, struct address_space *mapping, 827 826 loff_t pos, unsigned len, 828 - struct page **pagep, void **fsdata); 829 - int (*write_end)(struct file *, struct address_space *mapping, 827 + struct page **pagep, void **fsdata); 828 + int (*write_end)(const struct kiocb *, struct address_space *mapping, 830 829 loff_t pos, unsigned len, unsigned copied, 831 830 struct folio *folio, void *fsdata); 832 831 sector_t (*bmap)(struct address_space *, sector_t);
+9 -6
block/fops.c
··· 496 496 mpage_readahead(rac, blkdev_get_block); 497 497 } 498 498 499 - static int blkdev_write_begin(struct file *file, struct address_space *mapping, 500 - loff_t pos, unsigned len, struct folio **foliop, void **fsdata) 499 + static int blkdev_write_begin(const struct kiocb *iocb, 500 + struct address_space *mapping, loff_t pos, 501 + unsigned len, struct folio **foliop, 502 + void **fsdata) 501 503 { 502 504 return block_write_begin(mapping, pos, len, foliop, blkdev_get_block); 503 505 } 504 506 505 - static int blkdev_write_end(struct file *file, struct address_space *mapping, 506 - loff_t pos, unsigned len, unsigned copied, struct folio *folio, 507 - void *fsdata) 507 + static int blkdev_write_end(const struct kiocb *iocb, 508 + struct address_space *mapping, 509 + loff_t pos, unsigned len, unsigned copied, 510 + struct folio *folio, void *fsdata) 508 511 { 509 512 int ret; 510 - ret = block_write_end(file, mapping, pos, len, copied, folio, fsdata); 513 + ret = block_write_end(pos, len, copied, folio); 511 514 512 515 folio_unlock(folio); 513 516 folio_put(folio);
+28 -81
drivers/gpu/drm/i915/gem/i915_gem_shmem.c
··· 6 6 #include <linux/pagevec.h> 7 7 #include <linux/shmem_fs.h> 8 8 #include <linux/swap.h> 9 + #include <linux/uio.h> 9 10 10 11 #include <drm/drm_cache.h> 11 12 ··· 401 400 shmem_pwrite(struct drm_i915_gem_object *obj, 402 401 const struct drm_i915_gem_pwrite *arg) 403 402 { 404 - struct address_space *mapping = obj->base.filp->f_mapping; 405 - const struct address_space_operations *aops = mapping->a_ops; 406 403 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 407 - u64 remain; 408 - loff_t pos; 409 - unsigned int pg; 404 + struct file *file = obj->base.filp; 405 + struct kiocb kiocb; 406 + struct iov_iter iter; 407 + ssize_t written; 408 + u64 size = arg->size; 410 409 411 410 /* Caller already validated user args */ 412 411 GEM_BUG_ON(!access_ok(user_data, arg->size)); ··· 429 428 if (obj->mm.madv != I915_MADV_WILLNEED) 430 429 return -EFAULT; 431 430 432 - /* 433 - * Before the pages are instantiated the object is treated as being 434 - * in the CPU domain. The pages will be clflushed as required before 435 - * use, and we can freely write into the pages directly. If userspace 436 - * races pwrite with any other operation; corruption will ensue - 437 - * that is userspace's prerogative! 438 - */ 431 + if (size > MAX_RW_COUNT) 432 + return -EFBIG; 439 433 440 - remain = arg->size; 441 - pos = arg->offset; 442 - pg = offset_in_page(pos); 434 + if (!file->f_op->write_iter) 435 + return -EINVAL; 443 436 444 - do { 445 - unsigned int len, unwritten; 446 - struct folio *folio; 447 - void *data, *vaddr; 448 - int err; 449 - char __maybe_unused c; 437 + init_sync_kiocb(&kiocb, file); 438 + kiocb.ki_pos = arg->offset; 439 + iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)user_data, size); 450 440 451 - len = PAGE_SIZE - pg; 452 - if (len > remain) 453 - len = remain; 441 + written = file->f_op->write_iter(&kiocb, &iter); 442 + BUG_ON(written == -EIOCBQUEUED); 454 443 455 - /* Prefault the user page to reduce potential recursion */ 456 - err = __get_user(c, user_data); 457 - if (err) 458 - return err; 444 + if (written != size) 445 + return -EIO; 459 446 460 - err = __get_user(c, user_data + len - 1); 461 - if (err) 462 - return err; 463 - 464 - err = aops->write_begin(obj->base.filp, mapping, pos, len, 465 - &folio, &data); 466 - if (err < 0) 467 - return err; 468 - 469 - vaddr = kmap_local_folio(folio, offset_in_folio(folio, pos)); 470 - pagefault_disable(); 471 - unwritten = __copy_from_user_inatomic(vaddr, user_data, len); 472 - pagefault_enable(); 473 - kunmap_local(vaddr); 474 - 475 - err = aops->write_end(obj->base.filp, mapping, pos, len, 476 - len - unwritten, folio, data); 477 - if (err < 0) 478 - return err; 479 - 480 - /* We don't handle -EFAULT, leave it to the caller to check */ 481 - if (unwritten) 482 - return -ENODEV; 483 - 484 - remain -= len; 485 - user_data += len; 486 - pos += len; 487 - pg = 0; 488 - } while (remain); 447 + if (written < 0) 448 + return written; 489 449 490 450 return 0; 491 451 } ··· 599 637 { 600 638 struct drm_i915_gem_object *obj; 601 639 struct file *file; 602 - const struct address_space_operations *aops; 603 - loff_t pos; 604 - int err; 640 + loff_t pos = 0; 641 + ssize_t err; 605 642 606 643 GEM_WARN_ON(IS_DGFX(i915)); 607 644 obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE)); ··· 610 649 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 611 650 612 651 file = obj->base.filp; 613 - aops = file->f_mapping->a_ops; 614 - pos = 0; 615 - do { 616 - unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 617 - struct folio *folio; 618 - void *fsdata; 652 + err = kernel_write(file, data, size, &pos); 619 653 620 - err = aops->write_begin(file, file->f_mapping, pos, len, 621 - &folio, &fsdata); 622 - if (err < 0) 623 - goto fail; 654 + if (err < 0) 655 + goto fail; 624 656 625 - memcpy_to_folio(folio, offset_in_folio(folio, pos), data, len); 626 - 627 - err = aops->write_end(file, file->f_mapping, pos, len, len, 628 - folio, fsdata); 629 - if (err < 0) 630 - goto fail; 631 - 632 - size -= len; 633 - data += len; 634 - pos += len; 635 - } while (size); 657 + if (err != size) { 658 + err = -EIO; 659 + goto fail; 660 + } 636 661 637 662 return obj; 638 663
+5 -4
fs/adfs/inode.c
··· 53 53 truncate_pagecache(inode, inode->i_size); 54 54 } 55 55 56 - static int adfs_write_begin(struct file *file, struct address_space *mapping, 57 - loff_t pos, unsigned len, 58 - struct folio **foliop, void **fsdata) 56 + static int adfs_write_begin(const struct kiocb *iocb, 57 + struct address_space *mapping, 58 + loff_t pos, unsigned len, 59 + struct folio **foliop, void **fsdata) 59 60 { 60 61 int ret; 61 62 62 - ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata, 63 + ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata, 63 64 adfs_get_block, 64 65 &ADFS_I(mapping->host)->mmu_private); 65 66 if (unlikely(ret))
+15 -11
fs/affs/file.c
··· 415 415 return ret; 416 416 } 417 417 418 - static int affs_write_begin(struct file *file, struct address_space *mapping, 419 - loff_t pos, unsigned len, 420 - struct folio **foliop, void **fsdata) 418 + static int affs_write_begin(const struct kiocb *iocb, 419 + struct address_space *mapping, 420 + loff_t pos, unsigned len, 421 + struct folio **foliop, void **fsdata) 421 422 { 422 423 int ret; 423 424 424 - ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata, 425 + ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata, 425 426 affs_get_block, 426 427 &AFFS_I(mapping->host)->mmu_private); 427 428 if (unlikely(ret)) ··· 431 430 return ret; 432 431 } 433 432 434 - static int affs_write_end(struct file *file, struct address_space *mapping, 435 - loff_t pos, unsigned int len, unsigned int copied, 433 + static int affs_write_end(const struct kiocb *iocb, 434 + struct address_space *mapping, loff_t pos, 435 + unsigned int len, unsigned int copied, 436 436 struct folio *folio, void *fsdata) 437 437 { 438 438 struct inode *inode = mapping->host; 439 439 int ret; 440 440 441 - ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); 441 + ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); 442 442 443 443 /* Clear Archived bit on file writes, as AmigaOS would do */ 444 444 if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) { ··· 647 645 return err; 648 646 } 649 647 650 - static int affs_write_begin_ofs(struct file *file, struct address_space *mapping, 648 + static int affs_write_begin_ofs(const struct kiocb *iocb, 649 + struct address_space *mapping, 651 650 loff_t pos, unsigned len, 652 651 struct folio **foliop, void **fsdata) 653 652 { ··· 687 684 return err; 688 685 } 689 686 690 - static int affs_write_end_ofs(struct file *file, struct address_space *mapping, 691 - loff_t pos, unsigned len, unsigned copied, 692 - struct folio *folio, void *fsdata) 687 + static int affs_write_end_ofs(const struct kiocb *iocb, 688 + struct address_space *mapping, 689 + loff_t pos, unsigned len, unsigned copied, 690 + struct folio *folio, void *fsdata) 693 691 { 694 692 struct inode *inode = mapping->host; 695 693 struct super_block *sb = inode->i_sb;
+5 -5
fs/attr.c
··· 230 230 * @inode: the inode to be truncated 231 231 * @offset: the new size to assign to the inode 232 232 * 233 - * inode_newsize_ok must be called with i_mutex held. 233 + * inode_newsize_ok must be called with i_rwsem held exclusively. 234 234 * 235 235 * inode_newsize_ok will check filesystem limits and ulimits to check that the 236 236 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ ··· 318 318 * @inode: the inode to be updated 319 319 * @attr: the new attributes 320 320 * 321 - * setattr_copy must be called with i_mutex held. 321 + * setattr_copy must be called with i_rwsem held exclusively. 322 322 * 323 323 * setattr_copy updates the inode's metadata with that specified 324 324 * in attr on idmapped mounts. Necessary permission checks to determine ··· 403 403 * @attr: new attributes 404 404 * @delegated_inode: returns inode, if the inode is delegated 405 405 * 406 - * The caller must hold the i_mutex on the affected object. 406 + * The caller must hold the i_rwsem exclusively on the affected object. 407 407 * 408 408 * If notify_change discovers a delegation in need of breaking, 409 409 * it will return -EWOULDBLOCK and return a reference to the inode in 410 410 * delegated_inode. The caller should then break the delegation and 411 411 * retry. Because breaking a delegation may take a long time, the 412 - * caller should drop the i_mutex before doing so. 412 + * caller should drop the i_rwsem before doing so. 413 413 * 414 414 * Alternatively, a caller may pass NULL for delegated_inode. This may 415 415 * be appropriate for callers that expect the underlying filesystem not ··· 456 456 if (S_ISLNK(inode->i_mode)) 457 457 return -EOPNOTSUPP; 458 458 459 - /* Flag setting protected by i_mutex */ 459 + /* Flag setting protected by i_rwsem */ 460 460 if (is_sxid(attr->ia_mode)) 461 461 inode->i_flags &= ~S_NOSEC; 462 462 }
+2 -2
fs/bcachefs/fs-io-buffered.c
··· 674 674 675 675 /* buffered writes: */ 676 676 677 - int bch2_write_begin(struct file *file, struct address_space *mapping, 677 + int bch2_write_begin(const struct kiocb *iocb, struct address_space *mapping, 678 678 loff_t pos, unsigned len, 679 679 struct folio **foliop, void **fsdata) 680 680 { ··· 757 757 return bch2_err_class(ret); 758 758 } 759 759 760 - int bch2_write_end(struct file *file, struct address_space *mapping, 760 + int bch2_write_end(const struct kiocb *iocb, struct address_space *mapping, 761 761 loff_t pos, unsigned len, unsigned copied, 762 762 struct folio *folio, void *fsdata) 763 763 {
+2 -2
fs/bcachefs/fs-io-buffered.h
··· 10 10 int bch2_writepages(struct address_space *, struct writeback_control *); 11 11 void bch2_readahead(struct readahead_control *); 12 12 13 - int bch2_write_begin(struct file *, struct address_space *, loff_t pos, 13 + int bch2_write_begin(const struct kiocb *, struct address_space *, loff_t pos, 14 14 unsigned len, struct folio **, void **); 15 - int bch2_write_end(struct file *, struct address_space *, loff_t, 15 + int bch2_write_end(const struct kiocb *, struct address_space *, loff_t, 16 16 unsigned len, unsigned copied, struct folio *, void *); 17 17 18 18 ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
+4 -3
fs/bfs/file.c
··· 170 170 truncate_pagecache(inode, inode->i_size); 171 171 } 172 172 173 - static int bfs_write_begin(struct file *file, struct address_space *mapping, 174 - loff_t pos, unsigned len, 175 - struct folio **foliop, void **fsdata) 173 + static int bfs_write_begin(const struct kiocb *iocb, 174 + struct address_space *mapping, 175 + loff_t pos, unsigned len, 176 + struct folio **foliop, void **fsdata) 176 177 { 177 178 int ret; 178 179
+20 -27
fs/buffer.c
··· 1122 1122 { 1123 1123 bool blocking = gfpflags_allow_blocking(gfp); 1124 1124 1125 - /* Size must be multiple of hard sectorsize */ 1126 - if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 1127 - (size < 512 || size > PAGE_SIZE))) { 1128 - printk(KERN_ERR "getblk(): invalid block size %d requested\n", 1129 - size); 1130 - printk(KERN_ERR "logical block size: %d\n", 1131 - bdev_logical_block_size(bdev)); 1132 - 1133 - dump_stack(); 1125 + if (WARN_ON_ONCE(!IS_ALIGNED(size, bdev_logical_block_size(bdev)))) { 1126 + printk(KERN_ERR "getblk(): block size %d not aligned to logical block size %d\n", 1127 + size, bdev_logical_block_size(bdev)); 1134 1128 return NULL; 1135 1129 } 1136 1130 ··· 2265 2271 } 2266 2272 EXPORT_SYMBOL(block_write_begin); 2267 2273 2268 - int block_write_end(struct file *file, struct address_space *mapping, 2269 - loff_t pos, unsigned len, unsigned copied, 2270 - struct folio *folio, void *fsdata) 2274 + int block_write_end(loff_t pos, unsigned len, unsigned copied, 2275 + struct folio *folio) 2271 2276 { 2272 2277 size_t start = pos - folio_pos(folio); 2273 2278 ··· 2297 2304 } 2298 2305 EXPORT_SYMBOL(block_write_end); 2299 2306 2300 - int generic_write_end(struct file *file, struct address_space *mapping, 2301 - loff_t pos, unsigned len, unsigned copied, 2302 - struct folio *folio, void *fsdata) 2307 + int generic_write_end(const struct kiocb *iocb, struct address_space *mapping, 2308 + loff_t pos, unsigned len, unsigned copied, 2309 + struct folio *folio, void *fsdata) 2303 2310 { 2304 2311 struct inode *inode = mapping->host; 2305 2312 loff_t old_size = inode->i_size; 2306 2313 bool i_size_changed = false; 2307 2314 2308 - copied = block_write_end(file, mapping, pos, len, copied, folio, fsdata); 2315 + copied = block_write_end(pos, len, copied, folio); 2309 2316 2310 2317 /* 2311 2318 * No need to use i_size_read() here, the i_size cannot change under us ··· 2494 2501 } 2495 2502 EXPORT_SYMBOL(generic_cont_expand_simple); 2496 2503 2497 - static int cont_expand_zero(struct file *file, struct address_space *mapping, 2504 + static int cont_expand_zero(const struct kiocb *iocb, 2505 + struct address_space *mapping, 2498 2506 loff_t pos, loff_t *bytes) 2499 2507 { 2500 2508 struct inode *inode = mapping->host; ··· 2519 2525 } 2520 2526 len = PAGE_SIZE - zerofrom; 2521 2527 2522 - err = aops->write_begin(file, mapping, curpos, len, 2528 + err = aops->write_begin(iocb, mapping, curpos, len, 2523 2529 &folio, &fsdata); 2524 2530 if (err) 2525 2531 goto out; 2526 2532 folio_zero_range(folio, offset_in_folio(folio, curpos), len); 2527 - err = aops->write_end(file, mapping, curpos, len, len, 2533 + err = aops->write_end(iocb, mapping, curpos, len, len, 2528 2534 folio, fsdata); 2529 2535 if (err < 0) 2530 2536 goto out; ··· 2552 2558 } 2553 2559 len = offset - zerofrom; 2554 2560 2555 - err = aops->write_begin(file, mapping, curpos, len, 2561 + err = aops->write_begin(iocb, mapping, curpos, len, 2556 2562 &folio, &fsdata); 2557 2563 if (err) 2558 2564 goto out; 2559 2565 folio_zero_range(folio, offset_in_folio(folio, curpos), len); 2560 - err = aops->write_end(file, mapping, curpos, len, len, 2566 + err = aops->write_end(iocb, mapping, curpos, len, len, 2561 2567 folio, fsdata); 2562 2568 if (err < 0) 2563 2569 goto out; ··· 2572 2578 * For moronic filesystems that do not allow holes in file. 2573 2579 * We may have to extend the file. 2574 2580 */ 2575 - int cont_write_begin(struct file *file, struct address_space *mapping, 2576 - loff_t pos, unsigned len, 2577 - struct folio **foliop, void **fsdata, 2578 - get_block_t *get_block, loff_t *bytes) 2581 + int cont_write_begin(const struct kiocb *iocb, struct address_space *mapping, 2582 + loff_t pos, unsigned len, struct folio **foliop, 2583 + void **fsdata, get_block_t *get_block, loff_t *bytes) 2579 2584 { 2580 2585 struct inode *inode = mapping->host; 2581 2586 unsigned int blocksize = i_blocksize(inode); 2582 2587 unsigned int zerofrom; 2583 2588 int err; 2584 2589 2585 - err = cont_expand_zero(file, mapping, pos, bytes); 2590 + err = cont_expand_zero(iocb, mapping, pos, bytes); 2586 2591 if (err) 2587 2592 return err; 2588 2593 ··· 2603 2610 * holes and correct delalloc and unwritten extent mapping on filesystems that 2604 2611 * support these features. 2605 2612 * 2606 - * We are not allowed to take the i_mutex here so we have to play games to 2613 + * We are not allowed to take the i_rwsem here so we have to play games to 2607 2614 * protect against truncate races as the page could now be beyond EOF. Because 2608 2615 * truncate writes the inode size before removing pages, once we have the 2609 2616 * page lock we can determine safely if the page is beyond EOF. If it is not
+2 -2
fs/cachefiles/namei.c
··· 388 388 } else { 389 389 struct renamedata rd = { 390 390 .old_mnt_idmap = &nop_mnt_idmap, 391 - .old_dir = d_inode(dir), 391 + .old_parent = dir, 392 392 .old_dentry = rep, 393 393 .new_mnt_idmap = &nop_mnt_idmap, 394 - .new_dir = d_inode(cache->graveyard), 394 + .new_parent = cache->graveyard, 395 395 .new_dentry = grave, 396 396 }; 397 397 trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
+7 -3
fs/ceph/addr.c
··· 1864 1864 * We are only allowed to write into/dirty the page if the page is 1865 1865 * clean, or already dirty within the same snap context. 1866 1866 */ 1867 - static int ceph_write_begin(struct file *file, struct address_space *mapping, 1867 + static int ceph_write_begin(const struct kiocb *iocb, 1868 + struct address_space *mapping, 1868 1869 loff_t pos, unsigned len, 1869 1870 struct folio **foliop, void **fsdata) 1870 1871 { 1872 + struct file *file = iocb->ki_filp; 1871 1873 struct inode *inode = file_inode(file); 1872 1874 struct ceph_inode_info *ci = ceph_inode(inode); 1873 1875 int r; ··· 1887 1885 * we don't do anything in here that simple_write_end doesn't do 1888 1886 * except adjust dirty page accounting 1889 1887 */ 1890 - static int ceph_write_end(struct file *file, struct address_space *mapping, 1891 - loff_t pos, unsigned len, unsigned copied, 1888 + static int ceph_write_end(const struct kiocb *iocb, 1889 + struct address_space *mapping, loff_t pos, 1890 + unsigned len, unsigned copied, 1892 1891 struct folio *folio, void *fsdata) 1893 1892 { 1893 + struct file *file = iocb->ki_filp; 1894 1894 struct inode *inode = file_inode(file); 1895 1895 struct ceph_client *cl = ceph_inode_to_client(inode); 1896 1896 bool check_cap = false;
+5 -5
fs/dcache.c
··· 2797 2797 * @target: new dentry 2798 2798 * @exchange: exchange the two dentries 2799 2799 * 2800 - * Update the dcache to reflect the move of a file name. Negative 2801 - * dcache entries should not be moved in this way. Caller must hold 2802 - * rename_lock, the i_mutex of the source and target directories, 2803 - * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). 2800 + * Update the dcache to reflect the move of a file name. Negative dcache 2801 + * entries should not be moved in this way. Caller must hold rename_lock, the 2802 + * i_rwsem of the source and target directories (exclusively), and the sb-> 2803 + * s_vfs_rename_mutex if they differ. See lock_rename(). 2804 2804 */ 2805 2805 static void __d_move(struct dentry *dentry, struct dentry *target, 2806 2806 bool exchange) ··· 2946 2946 * This helper attempts to cope with remotely renamed directories 2947 2947 * 2948 2948 * It assumes that the caller is already holding 2949 - * dentry->d_parent->d_inode->i_mutex, and rename_lock 2949 + * dentry->d_parent->d_inode->i_rwsem, and rename_lock 2950 2950 * 2951 2951 * Note: If ever the locking in lock_rename() changes, then please 2952 2952 * remember to update this too...
+4 -4
fs/direct-io.c
··· 1083 1083 * The locking rules are governed by the flags parameter: 1084 1084 * - if the flags value contains DIO_LOCKING we use a fancy locking 1085 1085 * scheme for dumb filesystems. 1086 - * For writes this function is called under i_mutex and returns with 1087 - * i_mutex held, for reads, i_mutex is not held on entry, but it is 1086 + * For writes this function is called under i_rwsem and returns with 1087 + * i_rwsem held, for reads, i_rwsem is not held on entry, but it is 1088 1088 * taken and dropped again before returning. 1089 1089 * - if the flags value does NOT contain DIO_LOCKING we don't use any 1090 1090 * internal locking but rather rely on the filesystem to synchronize ··· 1094 1094 * counter before starting direct I/O, and decrement it once we are done. 1095 1095 * Truncate can wait for it to reach zero to provide exclusion. It is 1096 1096 * expected that filesystem provide exclusion between new direct I/O 1097 - * and truncates. For DIO_LOCKING filesystems this is done by i_mutex, 1097 + * and truncates. For DIO_LOCKING filesystems this is done by i_rwsem, 1098 1098 * but other filesystems need to take care of this on their own. 1099 1099 * 1100 1100 * NOTE: if you pass "sdio" to anything by pointer make sure that function ··· 1279 1279 1280 1280 /* 1281 1281 * All block lookups have been performed. For READ requests 1282 - * we can let i_mutex go now that its achieved its purpose 1282 + * we can let i_rwsem go now that its achieved its purpose 1283 1283 * of protecting us from looking up uninitialized blocks. 1284 1284 */ 1285 1285 if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
+2 -2
fs/ecryptfs/inode.c
··· 635 635 } 636 636 637 637 rd.old_mnt_idmap = &nop_mnt_idmap; 638 - rd.old_dir = d_inode(lower_old_dir_dentry); 638 + rd.old_parent = lower_old_dir_dentry; 639 639 rd.old_dentry = lower_old_dentry; 640 640 rd.new_mnt_idmap = &nop_mnt_idmap; 641 - rd.new_dir = d_inode(lower_new_dir_dentry); 641 + rd.new_parent = lower_new_dir_dentry; 642 642 rd.new_dentry = lower_new_dentry; 643 643 rc = vfs_rename(&rd); 644 644 if (rc)
+2 -1
fs/ecryptfs/main.c
··· 20 20 #include <linux/fs_context.h> 21 21 #include <linux/fs_parser.h> 22 22 #include <linux/fs_stack.h> 23 + #include <linux/sysfs.h> 23 24 #include <linux/slab.h> 24 25 #include <linux/magic.h> 25 26 #include "ecryptfs_kernel.h" ··· 765 764 static ssize_t version_show(struct kobject *kobj, 766 765 struct kobj_attribute *attr, char *buff) 767 766 { 768 - return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK); 767 + return sysfs_emit(buff, "%d\n", ECRYPTFS_VERSIONING_MASK); 769 768 } 770 769 771 770 static struct kobj_attribute version_attr = __ATTR_RO(version);
+5 -5
fs/ecryptfs/mmap.c
··· 228 228 229 229 /** 230 230 * ecryptfs_write_begin 231 - * @file: The eCryptfs file 231 + * @iocb: I/O control block for the eCryptfs file 232 232 * @mapping: The eCryptfs object 233 233 * @pos: The file offset at which to start writing 234 234 * @len: Length of the write ··· 239 239 * 240 240 * Returns zero on success; non-zero otherwise 241 241 */ 242 - static int ecryptfs_write_begin(struct file *file, 242 + static int ecryptfs_write_begin(const struct kiocb *iocb, 243 243 struct address_space *mapping, 244 244 loff_t pos, unsigned len, 245 245 struct folio **foliop, void **fsdata) ··· 322 322 * Note, this will increase i_size. */ 323 323 if (index != 0) { 324 324 if (prev_page_end_size > i_size_read(mapping->host)) { 325 - rc = ecryptfs_truncate(file->f_path.dentry, 325 + rc = ecryptfs_truncate(iocb->ki_filp->f_path.dentry, 326 326 prev_page_end_size); 327 327 if (rc) { 328 328 printk(KERN_ERR "%s: Error on attempt to " ··· 429 429 430 430 /** 431 431 * ecryptfs_write_end 432 - * @file: The eCryptfs file object 432 + * @iocb: I/O control block for the eCryptfs file 433 433 * @mapping: The eCryptfs object 434 434 * @pos: The file position 435 435 * @len: The length of the data (unused) ··· 437 437 * @folio: The eCryptfs folio 438 438 * @fsdata: The fsdata (unused) 439 439 */ 440 - static int ecryptfs_write_end(struct file *file, 440 + static int ecryptfs_write_end(const struct kiocb *iocb, 441 441 struct address_space *mapping, 442 442 loff_t pos, unsigned len, unsigned copied, 443 443 struct folio *folio, void *fsdata)
+44 -14
fs/eventpoll.c
··· 218 218 /* used to optimize loop detection check */ 219 219 u64 gen; 220 220 struct hlist_head refs; 221 + u8 loop_check_depth; 221 222 222 223 /* 223 224 * usage count, used together with epitem->dying to ··· 2141 2140 } 2142 2141 2143 2142 /** 2144 - * ep_loop_check_proc - verify that adding an epoll file inside another 2145 - * epoll structure does not violate the constraints, in 2146 - * terms of closed loops, or too deep chains (which can 2147 - * result in excessive stack usage). 2143 + * ep_loop_check_proc - verify that adding an epoll file @ep inside another 2144 + * epoll file does not create closed loops, and 2145 + * determine the depth of the subtree starting at @ep 2148 2146 * 2149 2147 * @ep: the &struct eventpoll to be currently checked. 2150 2148 * @depth: Current depth of the path being checked. 2151 2149 * 2152 - * Return: %zero if adding the epoll @file inside current epoll 2153 - * structure @ep does not violate the constraints, or %-1 otherwise. 2150 + * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep. 2154 2151 */ 2155 2152 static int ep_loop_check_proc(struct eventpoll *ep, int depth) 2156 2153 { 2157 - int error = 0; 2154 + int result = 0; 2158 2155 struct rb_node *rbp; 2159 2156 struct epitem *epi; 2157 + 2158 + if (ep->gen == loop_check_gen) 2159 + return ep->loop_check_depth; 2160 2160 2161 2161 mutex_lock_nested(&ep->mtx, depth + 1); 2162 2162 ep->gen = loop_check_gen; ··· 2166 2164 if (unlikely(is_file_epoll(epi->ffd.file))) { 2167 2165 struct eventpoll *ep_tovisit; 2168 2166 ep_tovisit = epi->ffd.file->private_data; 2169 - if (ep_tovisit->gen == loop_check_gen) 2170 - continue; 2171 2167 if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) 2172 - error = -1; 2168 + result = INT_MAX; 2173 2169 else 2174 - error = ep_loop_check_proc(ep_tovisit, depth + 1); 2175 - if (error != 0) 2170 + result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1); 2171 + if (result > EP_MAX_NESTS) 2176 2172 break; 2177 2173 } else { 2178 2174 /* ··· 2184 2184 list_file(epi->ffd.file); 2185 2185 } 2186 2186 } 2187 + ep->loop_check_depth = result; 2187 2188 mutex_unlock(&ep->mtx); 2188 2189 2189 - return error; 2190 + return result; 2191 + } 2192 + 2193 + /* ep_get_upwards_depth_proc - determine depth of @ep when traversed upwards */ 2194 + static int ep_get_upwards_depth_proc(struct eventpoll *ep, int depth) 2195 + { 2196 + int result = 0; 2197 + struct epitem *epi; 2198 + 2199 + if (ep->gen == loop_check_gen) 2200 + return ep->loop_check_depth; 2201 + hlist_for_each_entry_rcu(epi, &ep->refs, fllink) 2202 + result = max(result, ep_get_upwards_depth_proc(epi->ep, depth + 1) + 1); 2203 + ep->gen = loop_check_gen; 2204 + ep->loop_check_depth = result; 2205 + return result; 2190 2206 } 2191 2207 2192 2208 /** ··· 2218 2202 */ 2219 2203 static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) 2220 2204 { 2205 + int depth, upwards_depth; 2206 + 2221 2207 inserting_into = ep; 2222 - return ep_loop_check_proc(to, 0); 2208 + /* 2209 + * Check how deep down we can get from @to, and whether it is possible 2210 + * to loop up to @ep. 2211 + */ 2212 + depth = ep_loop_check_proc(to, 0); 2213 + if (depth > EP_MAX_NESTS) 2214 + return -1; 2215 + /* Check how far up we can go from @ep. */ 2216 + rcu_read_lock(); 2217 + upwards_depth = ep_get_upwards_depth_proc(ep, 0); 2218 + rcu_read_unlock(); 2219 + 2220 + return (depth+1+upwards_depth > EP_MAX_NESTS) ? -1 : 0; 2223 2221 } 2224 2222 2225 2223 static void clear_tfile_check_list(void)
+5 -6
fs/exfat/file.c
··· 532 532 return blkdev_issue_flush(inode->i_sb->s_bdev); 533 533 } 534 534 535 - static int exfat_extend_valid_size(struct file *file, loff_t new_valid_size) 535 + static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size) 536 536 { 537 537 int err; 538 538 loff_t pos; 539 - struct inode *inode = file_inode(file); 540 539 struct exfat_inode_info *ei = EXFAT_I(inode); 541 540 struct address_space *mapping = inode->i_mapping; 542 541 const struct address_space_operations *ops = mapping->a_ops; ··· 550 551 if (pos + len > new_valid_size) 551 552 len = new_valid_size - pos; 552 553 553 - err = ops->write_begin(file, mapping, pos, len, &folio, NULL); 554 + err = ops->write_begin(NULL, mapping, pos, len, &folio, NULL); 554 555 if (err) 555 556 goto out; 556 557 557 558 off = offset_in_folio(folio, pos); 558 559 folio_zero_new_buffers(folio, off, off + len); 559 560 560 - err = ops->write_end(file, mapping, pos, len, len, folio, NULL); 561 + err = ops->write_end(NULL, mapping, pos, len, len, folio, NULL); 561 562 if (err < 0) 562 563 goto out; 563 564 pos += len; ··· 603 604 } 604 605 605 606 if (pos > valid_size) { 606 - ret = exfat_extend_valid_size(file, pos); 607 + ret = exfat_extend_valid_size(inode, pos); 607 608 if (ret < 0 && ret != -ENOSPC) { 608 609 exfat_err(inode->i_sb, 609 610 "write: fail to zero from %llu to %llu(%zd)", ··· 664 665 start + vma->vm_end - vma->vm_start); 665 666 666 667 if (ei->valid_size < end) { 667 - err = exfat_extend_valid_size(file, end); 668 + err = exfat_extend_valid_size(inode, end); 668 669 if (err < 0) { 669 670 inode_unlock(inode); 670 671 return vmf_fs_error(err);
+9 -7
fs/exfat/inode.c
··· 446 446 } 447 447 } 448 448 449 - static int exfat_write_begin(struct file *file, struct address_space *mapping, 450 - loff_t pos, unsigned int len, 451 - struct folio **foliop, void **fsdata) 449 + static int exfat_write_begin(const struct kiocb *iocb, 450 + struct address_space *mapping, 451 + loff_t pos, unsigned int len, 452 + struct folio **foliop, void **fsdata) 452 453 { 453 454 int ret; 454 455 ··· 464 463 return ret; 465 464 } 466 465 467 - static int exfat_write_end(struct file *file, struct address_space *mapping, 468 - loff_t pos, unsigned int len, unsigned int copied, 469 - struct folio *folio, void *fsdata) 466 + static int exfat_write_end(const struct kiocb *iocb, 467 + struct address_space *mapping, 468 + loff_t pos, unsigned int len, unsigned int copied, 469 + struct folio *folio, void *fsdata) 470 470 { 471 471 struct inode *inode = mapping->host; 472 472 struct exfat_inode_info *ei = EXFAT_I(inode); 473 473 int err; 474 474 475 - err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); 475 + err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); 476 476 if (err < len) 477 477 exfat_write_failed(mapping, pos+len); 478 478
+1 -1
fs/ext2/dir.c
··· 87 87 struct inode *dir = mapping->host; 88 88 89 89 inode_inc_iversion(dir); 90 - block_write_end(NULL, mapping, pos, len, len, folio, NULL); 90 + block_write_end(pos, len, len, folio); 91 91 92 92 if (pos+len > dir->i_size) { 93 93 i_size_write(dir, pos+len);
+6 -5
fs/ext2/inode.c
··· 915 915 } 916 916 917 917 static int 918 - ext2_write_begin(struct file *file, struct address_space *mapping, 918 + ext2_write_begin(const struct kiocb *iocb, struct address_space *mapping, 919 919 loff_t pos, unsigned len, struct folio **foliop, void **fsdata) 920 920 { 921 921 int ret; ··· 926 926 return ret; 927 927 } 928 928 929 - static int ext2_write_end(struct file *file, struct address_space *mapping, 930 - loff_t pos, unsigned len, unsigned copied, 931 - struct folio *folio, void *fsdata) 929 + static int ext2_write_end(const struct kiocb *iocb, 930 + struct address_space *mapping, 931 + loff_t pos, unsigned len, unsigned copied, 932 + struct folio *folio, void *fsdata) 932 933 { 933 934 int ret; 934 935 935 - ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); 936 + ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); 936 937 if (ret < len) 937 938 ext2_write_failed(mapping, pos + len); 938 939 return ret;
+2 -1
fs/ext4/file.c
··· 977 977 .splice_write = iter_file_splice_write, 978 978 .fallocate = ext4_fallocate, 979 979 .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | 980 - FOP_DIO_PARALLEL_WRITE, 980 + FOP_DIO_PARALLEL_WRITE | 981 + FOP_DONTCACHE, 981 982 }; 982 983 983 984 const struct inode_operations ext4_file_inode_operations = {
+15 -20
fs/ext4/inode.c
··· 1252 1252 * and the ext4_write_end(). So doing the jbd2_journal_start at the start of 1253 1253 * ext4_write_begin() is the right place. 1254 1254 */ 1255 - static int ext4_write_begin(struct file *file, struct address_space *mapping, 1255 + static int ext4_write_begin(const struct kiocb *iocb, 1256 + struct address_space *mapping, 1256 1257 loff_t pos, unsigned len, 1257 1258 struct folio **foliop, void **fsdata) 1258 1259 { ··· 1264 1263 struct folio *folio; 1265 1264 pgoff_t index; 1266 1265 unsigned from, to; 1267 - fgf_t fgp = FGP_WRITEBEGIN; 1268 1266 1269 1267 ret = ext4_emergency_state(inode->i_sb); 1270 1268 if (unlikely(ret)) ··· 1287 1287 } 1288 1288 1289 1289 /* 1290 - * __filemap_get_folio() can take a long time if the 1290 + * write_begin_get_folio() can take a long time if the 1291 1291 * system is thrashing due to memory pressure, or if the folio 1292 1292 * is being written back. So grab it first before we start 1293 1293 * the transaction handle. This also allows us to allocate 1294 1294 * the folio (if needed) without using GFP_NOFS. 1295 1295 */ 1296 1296 retry_grab: 1297 - fgp |= fgf_set_order(len); 1298 - folio = __filemap_get_folio(mapping, index, fgp, 1299 - mapping_gfp_mask(mapping)); 1297 + folio = write_begin_get_folio(iocb, mapping, index, len); 1300 1298 if (IS_ERR(folio)) 1301 1299 return PTR_ERR(folio); 1302 1300 ··· 1398 1400 1399 1401 /* 1400 1402 * We need to pick up the new inode size which generic_commit_write gave us 1401 - * `file' can be NULL - eg, when called from page_symlink(). 1403 + * `iocb` can be NULL - eg, when called from page_symlink(). 1402 1404 * 1403 1405 * ext4 never places buffers on inode->i_mapping->i_private_list. metadata 1404 1406 * buffers are managed internally. 1405 1407 */ 1406 - static int ext4_write_end(struct file *file, 1408 + static int ext4_write_end(const struct kiocb *iocb, 1407 1409 struct address_space *mapping, 1408 1410 loff_t pos, unsigned len, unsigned copied, 1409 1411 struct folio *folio, void *fsdata) ··· 1422 1424 return ext4_write_inline_data_end(inode, pos, len, copied, 1423 1425 folio); 1424 1426 1425 - copied = block_write_end(file, mapping, pos, len, copied, folio, fsdata); 1427 + copied = block_write_end(pos, len, copied, folio); 1426 1428 /* 1427 1429 * it's important to update i_size while still holding folio lock: 1428 1430 * page writeout could otherwise come in and zero beyond i_size. ··· 1508 1510 } while (bh != head); 1509 1511 } 1510 1512 1511 - static int ext4_journalled_write_end(struct file *file, 1513 + static int ext4_journalled_write_end(const struct kiocb *iocb, 1512 1514 struct address_space *mapping, 1513 1515 loff_t pos, unsigned len, unsigned copied, 1514 1516 struct folio *folio, void *fsdata) ··· 3034 3036 return 0; 3035 3037 } 3036 3038 3037 - static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 3039 + static int ext4_da_write_begin(const struct kiocb *iocb, 3040 + struct address_space *mapping, 3038 3041 loff_t pos, unsigned len, 3039 3042 struct folio **foliop, void **fsdata) 3040 3043 { ··· 3043 3044 struct folio *folio; 3044 3045 pgoff_t index; 3045 3046 struct inode *inode = mapping->host; 3046 - fgf_t fgp = FGP_WRITEBEGIN; 3047 3047 3048 3048 ret = ext4_emergency_state(inode->i_sb); 3049 3049 if (unlikely(ret)) ··· 3052 3054 3053 3055 if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) { 3054 3056 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 3055 - return ext4_write_begin(file, mapping, pos, 3057 + return ext4_write_begin(iocb, mapping, pos, 3056 3058 len, foliop, fsdata); 3057 3059 } 3058 3060 *fsdata = (void *)0; ··· 3068 3070 } 3069 3071 3070 3072 retry: 3071 - fgp |= fgf_set_order(len); 3072 - folio = __filemap_get_folio(mapping, index, fgp, 3073 - mapping_gfp_mask(mapping)); 3073 + folio = write_begin_get_folio(iocb, mapping, index, len); 3074 3074 if (IS_ERR(folio)) 3075 3075 return PTR_ERR(folio); 3076 3076 ··· 3140 3144 * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES 3141 3145 * flag, which all that's needed to trigger page writeback. 3142 3146 */ 3143 - copied = block_write_end(NULL, mapping, pos, len, copied, 3144 - folio, NULL); 3147 + copied = block_write_end(pos, len, copied, folio); 3145 3148 new_i_size = pos + copied; 3146 3149 3147 3150 /* ··· 3191 3196 return copied; 3192 3197 } 3193 3198 3194 - static int ext4_da_write_end(struct file *file, 3199 + static int ext4_da_write_end(const struct kiocb *iocb, 3195 3200 struct address_space *mapping, 3196 3201 loff_t pos, unsigned len, unsigned copied, 3197 3202 struct folio *folio, void *fsdata) ··· 3200 3205 int write_mode = (int)(unsigned long)fsdata; 3201 3206 3202 3207 if (write_mode == FALL_BACK_TO_NONDELALLOC) 3203 - return ext4_write_end(file, mapping, pos, 3208 + return ext4_write_end(iocb, mapping, pos, 3204 3209 len, copied, folio, fsdata); 3205 3210 3206 3211 trace_ext4_da_write_end(inode, pos, len, copied);
+5 -3
fs/f2fs/data.c
··· 3519 3519 return 0; 3520 3520 } 3521 3521 3522 - static int f2fs_write_begin(struct file *file, struct address_space *mapping, 3523 - loff_t pos, unsigned len, struct folio **foliop, void **fsdata) 3522 + static int f2fs_write_begin(const struct kiocb *iocb, 3523 + struct address_space *mapping, 3524 + loff_t pos, unsigned len, struct folio **foliop, 3525 + void **fsdata) 3524 3526 { 3525 3527 struct inode *inode = mapping->host; 3526 3528 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); ··· 3658 3656 return err; 3659 3657 } 3660 3658 3661 - static int f2fs_write_end(struct file *file, 3659 + static int f2fs_write_end(const struct kiocb *iocb, 3662 3660 struct address_space *mapping, 3663 3661 loff_t pos, unsigned len, unsigned copied, 3664 3662 struct folio *folio, void *fsdata)
+10 -8
fs/fat/inode.c
··· 219 219 } 220 220 } 221 221 222 - static int fat_write_begin(struct file *file, struct address_space *mapping, 223 - loff_t pos, unsigned len, 224 - struct folio **foliop, void **fsdata) 222 + static int fat_write_begin(const struct kiocb *iocb, 223 + struct address_space *mapping, 224 + loff_t pos, unsigned len, 225 + struct folio **foliop, void **fsdata) 225 226 { 226 227 int err; 227 228 228 - err = cont_write_begin(file, mapping, pos, len, 229 + err = cont_write_begin(iocb, mapping, pos, len, 229 230 foliop, fsdata, fat_get_block, 230 231 &MSDOS_I(mapping->host)->mmu_private); 231 232 if (err < 0) ··· 234 233 return err; 235 234 } 236 235 237 - static int fat_write_end(struct file *file, struct address_space *mapping, 238 - loff_t pos, unsigned len, unsigned copied, 239 - struct folio *folio, void *fsdata) 236 + static int fat_write_end(const struct kiocb *iocb, 237 + struct address_space *mapping, 238 + loff_t pos, unsigned len, unsigned copied, 239 + struct folio *folio, void *fsdata) 240 240 { 241 241 struct inode *inode = mapping->host; 242 242 int err; 243 - err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); 243 + err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); 244 244 if (err < len) 245 245 fat_write_failed(mapping, pos + len); 246 246 if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
+15
fs/file.c
··· 197 197 return ERR_PTR(-EMFILE); 198 198 } 199 199 200 + /* 201 + * Check if the allocation size would exceed INT_MAX. kvmalloc_array() 202 + * and kvmalloc() will warn if the allocation size is greater than 203 + * INT_MAX, as filp_cachep objects are not __GFP_NOWARN. 204 + * 205 + * This can happen when sysctl_nr_open is set to a very high value and 206 + * a process tries to use a file descriptor near that limit. For example, 207 + * if sysctl_nr_open is set to 1073741816 (0x3ffffff8) - which is what 208 + * systemd typically sets it to - then trying to use a file descriptor 209 + * close to that value will require allocating a file descriptor table 210 + * that exceeds 8GB in size. 211 + */ 212 + if (unlikely(nr > INT_MAX / sizeof(struct file *))) 213 + return ERR_PTR(-EMFILE); 214 + 200 215 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT); 201 216 if (!fdt) 202 217 goto out;
+9 -5
fs/fuse/file.c
··· 2212 2212 * It's worthy to make sure that space is reserved on disk for the write, 2213 2213 * but how to implement it without killing performance need more thinking. 2214 2214 */ 2215 - static int fuse_write_begin(struct file *file, struct address_space *mapping, 2216 - loff_t pos, unsigned len, struct folio **foliop, void **fsdata) 2215 + static int fuse_write_begin(const struct kiocb *iocb, 2216 + struct address_space *mapping, 2217 + loff_t pos, unsigned len, struct folio **foliop, 2218 + void **fsdata) 2217 2219 { 2218 2220 pgoff_t index = pos >> PAGE_SHIFT; 2221 + struct file *file = iocb->ki_filp; 2219 2222 struct fuse_conn *fc = get_fuse_conn(file_inode(file)); 2220 2223 struct folio *folio; 2221 2224 loff_t fsize; ··· 2258 2255 return err; 2259 2256 } 2260 2257 2261 - static int fuse_write_end(struct file *file, struct address_space *mapping, 2262 - loff_t pos, unsigned len, unsigned copied, 2263 - struct folio *folio, void *fsdata) 2258 + static int fuse_write_end(const struct kiocb *iocb, 2259 + struct address_space *mapping, 2260 + loff_t pos, unsigned len, unsigned copied, 2261 + struct folio *folio, void *fsdata) 2264 2262 { 2265 2263 struct inode *inode = folio->mapping->host; 2266 2264
+1 -1
fs/hfs/hfs_fs.h
··· 201 201 extern const struct address_space_operations hfs_aops; 202 202 extern const struct address_space_operations hfs_btree_aops; 203 203 204 - int hfs_write_begin(struct file *file, struct address_space *mapping, 204 + int hfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, 205 205 loff_t pos, unsigned len, struct folio **foliop, void **fsdata); 206 206 extern struct inode *hfs_new_inode(struct inode *, const struct qstr *, umode_t); 207 207 extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
+2 -2
fs/hfs/inode.c
··· 44 44 } 45 45 } 46 46 47 - int hfs_write_begin(struct file *file, struct address_space *mapping, 47 + int hfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, 48 48 loff_t pos, unsigned len, struct folio **foliop, void **fsdata) 49 49 { 50 50 int ret; 51 51 52 - ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata, 52 + ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata, 53 53 hfs_get_block, 54 54 &HFS_I(mapping->host)->phys_size); 55 55 if (unlikely(ret))
+4 -2
fs/hfsplus/hfsplus_fs.h
··· 473 473 extern const struct address_space_operations hfsplus_btree_aops; 474 474 extern const struct dentry_operations hfsplus_dentry_operations; 475 475 476 - int hfsplus_write_begin(struct file *file, struct address_space *mapping, 477 - loff_t pos, unsigned len, struct folio **foliop, void **fsdata); 476 + int hfsplus_write_begin(const struct kiocb *iocb, 477 + struct address_space *mapping, 478 + loff_t pos, unsigned len, struct folio **foliop, 479 + void **fsdata); 478 480 struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir, 479 481 umode_t mode); 480 482 void hfsplus_delete_inode(struct inode *inode);
+5 -3
fs/hfsplus/inode.c
··· 38 38 } 39 39 } 40 40 41 - int hfsplus_write_begin(struct file *file, struct address_space *mapping, 42 - loff_t pos, unsigned len, struct folio **foliop, void **fsdata) 41 + int hfsplus_write_begin(const struct kiocb *iocb, 42 + struct address_space *mapping, loff_t pos, 43 + unsigned len, struct folio **foliop, 44 + void **fsdata) 43 45 { 44 46 int ret; 45 47 46 - ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata, 48 + ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata, 47 49 hfsplus_get_block, 48 50 &HFSPLUS_I(mapping->host)->phys_size); 49 51 if (unlikely(ret))
+5 -3
fs/hostfs/hostfs_kern.c
··· 445 445 return ret; 446 446 } 447 447 448 - static int hostfs_write_begin(struct file *file, struct address_space *mapping, 448 + static int hostfs_write_begin(const struct kiocb *iocb, 449 + struct address_space *mapping, 449 450 loff_t pos, unsigned len, 450 451 struct folio **foliop, void **fsdata) 451 452 { ··· 459 458 return 0; 460 459 } 461 460 462 - static int hostfs_write_end(struct file *file, struct address_space *mapping, 461 + static int hostfs_write_end(const struct kiocb *iocb, 462 + struct address_space *mapping, 463 463 loff_t pos, unsigned len, unsigned copied, 464 464 struct folio *folio, void *fsdata) 465 465 { ··· 470 468 int err; 471 469 472 470 buffer = kmap_local_folio(folio, from); 473 - err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer, copied); 471 + err = write_file(FILE_HOSTFS_I(iocb->ki_filp)->fd, &pos, buffer, copied); 474 472 kunmap_local(buffer); 475 473 476 474 if (!folio_test_uptodate(folio) && err == folio_size(folio))
+10 -8
fs/hpfs/file.c
··· 188 188 hpfs_unlock(inode->i_sb); 189 189 } 190 190 191 - static int hpfs_write_begin(struct file *file, struct address_space *mapping, 192 - loff_t pos, unsigned len, 193 - struct folio **foliop, void **fsdata) 191 + static int hpfs_write_begin(const struct kiocb *iocb, 192 + struct address_space *mapping, 193 + loff_t pos, unsigned len, 194 + struct folio **foliop, void **fsdata) 194 195 { 195 196 int ret; 196 197 197 - ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata, 198 + ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata, 198 199 hpfs_get_block, 199 200 &hpfs_i(mapping->host)->mmu_private); 200 201 if (unlikely(ret)) ··· 204 203 return ret; 205 204 } 206 205 207 - static int hpfs_write_end(struct file *file, struct address_space *mapping, 208 - loff_t pos, unsigned len, unsigned copied, 209 - struct folio *folio, void *fsdata) 206 + static int hpfs_write_end(const struct kiocb *iocb, 207 + struct address_space *mapping, 208 + loff_t pos, unsigned len, unsigned copied, 209 + struct folio *folio, void *fsdata) 210 210 { 211 211 struct inode *inode = mapping->host; 212 212 int err; 213 - err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); 213 + err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); 214 214 if (err < len) 215 215 hpfs_write_failed(mapping, pos + len); 216 216 if (!(err < 0)) {
+5 -4
fs/hugetlbfs/inode.c
··· 311 311 return retval; 312 312 } 313 313 314 - static int hugetlbfs_write_begin(struct file *file, 314 + static int hugetlbfs_write_begin(const struct kiocb *iocb, 315 315 struct address_space *mapping, 316 316 loff_t pos, unsigned len, 317 317 struct folio **foliop, void **fsdata) ··· 319 319 return -EINVAL; 320 320 } 321 321 322 - static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, 323 - loff_t pos, unsigned len, unsigned copied, 324 - struct folio *folio, void *fsdata) 322 + static int hugetlbfs_write_end(const struct kiocb *iocb, 323 + struct address_space *mapping, 324 + loff_t pos, unsigned len, unsigned copied, 325 + struct folio *folio, void *fsdata) 325 326 { 326 327 BUG(); 327 328 return -EINVAL;
+6 -7
fs/inode.c
··· 865 865 */ 866 866 void evict_inodes(struct super_block *sb) 867 867 { 868 - struct inode *inode, *next; 868 + struct inode *inode; 869 869 LIST_HEAD(dispose); 870 870 871 871 again: 872 872 spin_lock(&sb->s_inode_list_lock); 873 - list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 873 + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 874 874 if (atomic_read(&inode->i_count)) 875 875 continue; 876 876 ··· 1158 1158 /* Set new key only if filesystem hasn't already changed it */ 1159 1159 if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) { 1160 1160 /* 1161 - * ensure nobody is actually holding i_mutex 1161 + * ensure nobody is actually holding i_rwsem 1162 1162 */ 1163 - // mutex_destroy(&inode->i_mutex); 1164 1163 init_rwsem(&inode->i_rwsem); 1165 1164 lockdep_set_class(&inode->i_rwsem, 1166 1165 &type->i_mutex_dir_key); ··· 2614 2615 * proceed with a truncate or equivalent operation. 2615 2616 * 2616 2617 * Must be called under a lock that serializes taking new references 2617 - * to i_dio_count, usually by inode->i_mutex. 2618 + * to i_dio_count, usually by inode->i_rwsem. 2618 2619 */ 2619 2620 void inode_dio_wait(struct inode *inode) 2620 2621 { ··· 2632 2633 /* 2633 2634 * inode_set_flags - atomically set some inode flags 2634 2635 * 2635 - * Note: the caller should be holding i_mutex, or else be sure that 2636 + * Note: the caller should be holding i_rwsem exclusively, or else be sure that 2636 2637 * they have exclusive access to the inode structure (i.e., while the 2637 2638 * inode is being instantiated). The reason for the cmpxchg() loop 2638 2639 * --- which wouldn't be necessary if all code paths which modify ··· 2640 2641 * code path which doesn't today so we use cmpxchg() out of an abundance 2641 2642 * of caution. 2642 2643 * 2643 - * In the long run, i_mutex is overkill, and we should probably look 2644 + * In the long run, i_rwsem is overkill, and we should probably look 2644 2645 * at using the i_lock spinlock to protect i_flags, and then make sure 2645 2646 * it is so documented in include/linux/fs.h and that all code follows 2646 2647 * the locking convention!!
+1 -2
fs/iomap/buffered-io.c
··· 926 926 if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { 927 927 size_t bh_written; 928 928 929 - bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, 930 - len, copied, folio, NULL); 929 + bh_written = block_write_end(pos, len, copied, folio); 931 930 WARN_ON_ONCE(bh_written != copied && bh_written != 0); 932 931 return bh_written == copied; 933 932 }
+16 -12
fs/jffs2/file.c
··· 21 21 #include <linux/jffs2.h> 22 22 #include "nodelist.h" 23 23 24 - static int jffs2_write_end(struct file *filp, struct address_space *mapping, 25 - loff_t pos, unsigned len, unsigned copied, 26 - struct folio *folio, void *fsdata); 27 - static int jffs2_write_begin(struct file *filp, struct address_space *mapping, 28 - loff_t pos, unsigned len, 29 - struct folio **foliop, void **fsdata); 24 + static int jffs2_write_end(const struct kiocb *iocb, 25 + struct address_space *mapping, 26 + loff_t pos, unsigned len, unsigned copied, 27 + struct folio *folio, void *fsdata); 28 + static int jffs2_write_begin(const struct kiocb *iocb, 29 + struct address_space *mapping, 30 + loff_t pos, unsigned len, 31 + struct folio **foliop, void **fsdata); 30 32 static int jffs2_read_folio(struct file *filp, struct folio *folio); 31 33 32 34 int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync) ··· 123 121 return ret; 124 122 } 125 123 126 - static int jffs2_write_begin(struct file *filp, struct address_space *mapping, 127 - loff_t pos, unsigned len, 128 - struct folio **foliop, void **fsdata) 124 + static int jffs2_write_begin(const struct kiocb *iocb, 125 + struct address_space *mapping, 126 + loff_t pos, unsigned len, 127 + struct folio **foliop, void **fsdata) 129 128 { 130 129 struct folio *folio; 131 130 struct inode *inode = mapping->host; ··· 238 235 return ret; 239 236 } 240 237 241 - static int jffs2_write_end(struct file *filp, struct address_space *mapping, 242 - loff_t pos, unsigned len, unsigned copied, 243 - struct folio *folio, void *fsdata) 238 + static int jffs2_write_end(const struct kiocb *iocb, 239 + struct address_space *mapping, 240 + loff_t pos, unsigned len, unsigned copied, 241 + struct folio *folio, void *fsdata) 244 242 { 245 243 /* Actually commit the write from the page cache page we're looking at. 246 244 * For now, we write the full page out each time. It sucks, but it's simple
+9 -7
fs/jfs/inode.c
··· 290 290 } 291 291 } 292 292 293 - static int jfs_write_begin(struct file *file, struct address_space *mapping, 294 - loff_t pos, unsigned len, 295 - struct folio **foliop, void **fsdata) 293 + static int jfs_write_begin(const struct kiocb *iocb, 294 + struct address_space *mapping, 295 + loff_t pos, unsigned len, 296 + struct folio **foliop, void **fsdata) 296 297 { 297 298 int ret; 298 299 ··· 304 303 return ret; 305 304 } 306 305 307 - static int jfs_write_end(struct file *file, struct address_space *mapping, 308 - loff_t pos, unsigned len, unsigned copied, struct folio *folio, 309 - void *fsdata) 306 + static int jfs_write_end(const struct kiocb *iocb, 307 + struct address_space *mapping, 308 + loff_t pos, unsigned len, unsigned copied, 309 + struct folio *folio, void *fsdata) 310 310 { 311 311 int ret; 312 312 313 - ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); 313 + ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); 314 314 if (ret < len) 315 315 jfs_write_failed(mapping, pos + len); 316 316 return ret;
+16 -10
fs/libfs.c
··· 921 921 return 0; 922 922 } 923 923 924 - int simple_write_begin(struct file *file, struct address_space *mapping, 924 + int simple_write_begin(const struct kiocb *iocb, struct address_space *mapping, 925 925 loff_t pos, unsigned len, 926 926 struct folio **foliop, void **fsdata) 927 927 { ··· 946 946 947 947 /** 948 948 * simple_write_end - .write_end helper for non-block-device FSes 949 - * @file: See .write_end of address_space_operations 949 + * @iocb: kernel I/O control block 950 950 * @mapping: " 951 951 * @pos: " 952 952 * @len: " ··· 957 957 * simple_write_end does the minimum needed for updating a folio after 958 958 * writing is done. It has the same API signature as the .write_end of 959 959 * address_space_operations vector. So it can just be set onto .write_end for 960 - * FSes that don't need any other processing. i_mutex is assumed to be held. 960 + * FSes that don't need any other processing. i_rwsem is assumed to be held 961 + * exclusively. 961 962 * Block based filesystems should use generic_write_end(). 962 963 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty 963 964 * is not called, so a filesystem that actually does store data in .write_inode ··· 967 966 * 968 967 * Use *ONLY* with simple_read_folio() 969 968 */ 970 - static int simple_write_end(struct file *file, struct address_space *mapping, 971 - loff_t pos, unsigned len, unsigned copied, 972 - struct folio *folio, void *fsdata) 969 + static int simple_write_end(const struct kiocb *iocb, 970 + struct address_space *mapping, 971 + loff_t pos, unsigned len, unsigned copied, 972 + struct folio *folio, void *fsdata) 973 973 { 974 974 struct inode *inode = folio->mapping->host; 975 975 loff_t last_pos = pos + copied; ··· 986 984 } 987 985 /* 988 986 * No need to use i_size_read() here, the i_size 989 - * cannot change under us because we hold the i_mutex. 987 + * cannot change under us because we hold the i_rwsem. 990 988 */ 991 989 if (last_pos > inode->i_size) 992 990 i_size_write(inode, last_pos); ··· 1596 1594 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) 1597 1595 { 1598 1596 u64 last_fs_block = num_blocks - 1; 1599 - u64 last_fs_page = 1600 - last_fs_block >> (PAGE_SHIFT - blocksize_bits); 1597 + u64 last_fs_page, max_bytes; 1598 + 1599 + if (check_shl_overflow(num_blocks, blocksize_bits, &max_bytes)) 1600 + return -EFBIG; 1601 + 1602 + last_fs_page = (max_bytes >> PAGE_SHIFT) - 1; 1601 1603 1602 1604 if (unlikely(num_blocks == 0)) 1603 1605 return 0; 1604 1606 1605 - if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT)) 1607 + if (blocksize_bits < 9) 1606 1608 return -EINVAL; 1607 1609 1608 1610 if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
+2 -2
fs/locks.c
··· 712 712 fl->fl_lmops && fl->fl_lmops->lm_notify) 713 713 fl->fl_lmops->lm_notify(fl); 714 714 else 715 - locks_wake_up(fl); 715 + locks_wake_up_waiter(waiter); 716 716 717 717 /* 718 718 * The setting of flc_blocker to NULL marks the "done" ··· 1794 1794 1795 1795 /* 1796 1796 * In the delegation case we need mutual exclusion with 1797 - * a number of operations that take the i_mutex. We trylock 1797 + * a number of operations that take the i_rwsem. We trylock 1798 1798 * because delegations are an optional optimization, and if 1799 1799 * there's some chance of a conflict--we'd rather not 1800 1800 * bother, maybe that's a sign this just isn't a good file to
+1 -1
fs/minix/dir.c
··· 45 45 struct address_space *mapping = folio->mapping; 46 46 struct inode *dir = mapping->host; 47 47 48 - block_write_end(NULL, mapping, pos, len, len, folio, NULL); 48 + block_write_end(pos, len, len, folio); 49 49 50 50 if (pos+len > dir->i_size) { 51 51 i_size_write(dir, pos+len);
+4 -3
fs/minix/inode.c
··· 442 442 } 443 443 } 444 444 445 - static int minix_write_begin(struct file *file, struct address_space *mapping, 446 - loff_t pos, unsigned len, 447 - struct folio **foliop, void **fsdata) 445 + static int minix_write_begin(const struct kiocb *iocb, 446 + struct address_space *mapping, 447 + loff_t pos, unsigned len, 448 + struct folio **foliop, void **fsdata) 448 449 { 449 450 int ret; 450 451
+15 -14
fs/namei.c
··· 1469 1469 int ret = 0; 1470 1470 1471 1471 while (flags & DCACHE_MANAGED_DENTRY) { 1472 - /* Allow the filesystem to manage the transit without i_mutex 1472 + /* Allow the filesystem to manage the transit without i_rwsem 1473 1473 * being held. */ 1474 1474 if (flags & DCACHE_MANAGE_TRANSIT) { 1475 1475 ret = path->dentry->d_op->d_manage(path, false); ··· 2946 2946 * Note that this routine is purely a helper for filesystem usage and should 2947 2947 * not be called by generic code. It does no permission checking. 2948 2948 * 2949 - * The caller must hold base->i_mutex. 2949 + * The caller must hold base->i_rwsem. 2950 2950 */ 2951 2951 struct dentry *lookup_noperm(struct qstr *name, struct dentry *base) 2952 2952 { ··· 2972 2972 * 2973 2973 * This can be used for in-kernel filesystem clients such as file servers. 2974 2974 * 2975 - * The caller must hold base->i_mutex. 2975 + * The caller must hold base->i_rwsem. 2976 2976 */ 2977 2977 struct dentry *lookup_one(struct mnt_idmap *idmap, struct qstr *name, 2978 2978 struct dentry *base) ··· 4551 4551 * @dentry: victim 4552 4552 * @delegated_inode: returns victim inode, if the inode is delegated. 4553 4553 * 4554 - * The caller must hold dir->i_mutex. 4554 + * The caller must hold dir->i_rwsem exclusively. 4555 4555 * 4556 4556 * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and 4557 4557 * return a reference to the inode in delegated_inode. The caller 4558 4558 * should then break the delegation on that inode and retry. Because 4559 4559 * breaking a delegation may take a long time, the caller should drop 4560 - * dir->i_mutex before doing so. 4560 + * dir->i_rwsem before doing so. 4561 4561 * 4562 4562 * Alternatively, a caller may pass NULL for delegated_inode. This may 4563 4563 * be appropriate for callers that expect the underlying filesystem not ··· 4616 4616 4617 4617 /* 4618 4618 * Make sure that the actual truncation of the file will occur outside its 4619 - * directory's i_mutex. Truncate can take a long time if there is a lot of 4619 + * directory's i_rwsem. Truncate can take a long time if there is a lot of 4620 4620 * writeout happening, and we don't want to prevent access to the directory 4621 4621 * while waiting on the I/O. 4622 4622 */ ··· 4794 4794 * @new_dentry: where to create the new link 4795 4795 * @delegated_inode: returns inode needing a delegation break 4796 4796 * 4797 - * The caller must hold dir->i_mutex 4797 + * The caller must hold dir->i_rwsem exclusively. 4798 4798 * 4799 4799 * If vfs_link discovers a delegation on the to-be-linked file in need 4800 4800 * of breaking, it will return -EWOULDBLOCK and return a reference to the 4801 4801 * inode in delegated_inode. The caller should then break the delegation 4802 4802 * and retry. Because breaking a delegation may take a long time, the 4803 - * caller should drop the i_mutex before doing so. 4803 + * caller should drop the i_rwsem before doing so. 4804 4804 * 4805 4805 * Alternatively, a caller may pass NULL for delegated_inode. This may 4806 4806 * be appropriate for callers that expect the underlying filesystem not ··· 4996 4996 * c) we may have to lock up to _four_ objects - parents and victim (if it exists), 4997 4997 * and source (if it's a non-directory or a subdirectory that moves to 4998 4998 * different parent). 4999 - * And that - after we got ->i_mutex on parents (until then we don't know 4999 + * And that - after we got ->i_rwsem on parents (until then we don't know 5000 5000 * whether the target exists). Solution: try to be smart with locking 5001 5001 * order for inodes. We rely on the fact that tree topology may change 5002 5002 * only under ->s_vfs_rename_mutex _and_ that parent of the object we ··· 5008 5008 * has no more than 1 dentry. If "hybrid" objects will ever appear, 5009 5009 * we'd better make sure that there's no link(2) for them. 5010 5010 * d) conversion from fhandle to dentry may come in the wrong moment - when 5011 - * we are removing the target. Solution: we will have to grab ->i_mutex 5011 + * we are removing the target. Solution: we will have to grab ->i_rwsem 5012 5012 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 5013 - * ->i_mutex on parents, which works but leads to some truly excessive 5013 + * ->i_rwsem on parents, which works but leads to some truly excessive 5014 5014 * locking]. 5015 5015 */ 5016 5016 int vfs_rename(struct renamedata *rd) 5017 5017 { 5018 5018 int error; 5019 - struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir; 5019 + struct inode *old_dir = d_inode(rd->old_parent); 5020 + struct inode *new_dir = d_inode(rd->new_parent); 5020 5021 struct dentry *old_dentry = rd->old_dentry; 5021 5022 struct dentry *new_dentry = rd->new_dentry; 5022 5023 struct inode **delegated_inode = rd->delegated_inode; ··· 5276 5275 if (error) 5277 5276 goto exit5; 5278 5277 5279 - rd.old_dir = old_path.dentry->d_inode; 5278 + rd.old_parent = old_path.dentry; 5280 5279 rd.old_dentry = old_dentry; 5281 5280 rd.old_mnt_idmap = mnt_idmap(old_path.mnt); 5282 - rd.new_dir = new_path.dentry->d_inode; 5281 + rd.new_parent = new_path.dentry; 5283 5282 rd.new_dentry = new_dentry; 5284 5283 rd.new_mnt_idmap = mnt_idmap(new_path.mnt); 5285 5284 rd.delegated_inode = &delegated_inode;
+1 -1
fs/namespace.c
··· 2022 2022 * detach_mounts allows lazily unmounting those mounts instead of 2023 2023 * leaking them. 2024 2024 * 2025 - * The caller may hold dentry->d_inode->i_mutex. 2025 + * The caller may hold dentry->d_inode->i_rwsem. 2026 2026 */ 2027 2027 void __detach_mounts(struct dentry *dentry) 2028 2028 {
+6 -2
fs/nfs/file.c
··· 342 342 * If the writer ends up delaying the write, the writer needs to 343 343 * increment the page use counts until he is done with the page. 344 344 */ 345 - static int nfs_write_begin(struct file *file, struct address_space *mapping, 345 + static int nfs_write_begin(const struct kiocb *iocb, 346 + struct address_space *mapping, 346 347 loff_t pos, unsigned len, struct folio **foliop, 347 348 void **fsdata) 348 349 { 349 350 fgf_t fgp = FGP_WRITEBEGIN; 350 351 struct folio *folio; 352 + struct file *file = iocb->ki_filp; 351 353 int once_thru = 0; 352 354 int ret; 353 355 ··· 379 377 return ret; 380 378 } 381 379 382 - static int nfs_write_end(struct file *file, struct address_space *mapping, 380 + static int nfs_write_end(const struct kiocb *iocb, 381 + struct address_space *mapping, 383 382 loff_t pos, unsigned len, unsigned copied, 384 383 struct folio *folio, void *fsdata) 385 384 { 385 + struct file *file = iocb->ki_filp; 386 386 struct nfs_open_context *ctx = nfs_file_open_context(file); 387 387 unsigned offset = offset_in_folio(folio, pos); 388 388 int status;
+2 -5
fs/nfsd/vfs.c
··· 1867 1867 struct svc_fh *tfhp, char *tname, int tlen) 1868 1868 { 1869 1869 struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; 1870 - struct inode *fdir, *tdir; 1871 1870 int type = S_IFDIR; 1872 1871 __be32 err; 1873 1872 int host_err; ··· 1882 1883 goto out; 1883 1884 1884 1885 fdentry = ffhp->fh_dentry; 1885 - fdir = d_inode(fdentry); 1886 1886 1887 1887 tdentry = tfhp->fh_dentry; 1888 - tdir = d_inode(tdentry); 1889 1888 1890 1889 err = nfserr_perm; 1891 1890 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) ··· 1944 1947 } else { 1945 1948 struct renamedata rd = { 1946 1949 .old_mnt_idmap = &nop_mnt_idmap, 1947 - .old_dir = fdir, 1950 + .old_parent = fdentry, 1948 1951 .old_dentry = odentry, 1949 1952 .new_mnt_idmap = &nop_mnt_idmap, 1950 - .new_dir = tdir, 1953 + .new_parent = tdentry, 1951 1954 .new_dentry = ndentry, 1952 1955 }; 1953 1956 int retries;
+1 -1
fs/nilfs2/dir.c
··· 96 96 int err; 97 97 98 98 nr_dirty = nilfs_page_count_clean_buffers(folio, from, to); 99 - copied = block_write_end(NULL, mapping, pos, len, len, folio, NULL); 99 + copied = block_write_end(pos, len, len, folio); 100 100 if (pos + copied > dir->i_size) 101 101 i_size_write(dir, pos + copied); 102 102 if (IS_DIRSYNC(dir))
+5 -3
fs/nilfs2/inode.c
··· 218 218 } 219 219 } 220 220 221 - static int nilfs_write_begin(struct file *file, struct address_space *mapping, 221 + static int nilfs_write_begin(const struct kiocb *iocb, 222 + struct address_space *mapping, 222 223 loff_t pos, unsigned len, 223 224 struct folio **foliop, void **fsdata) 224 225 ··· 238 237 return err; 239 238 } 240 239 241 - static int nilfs_write_end(struct file *file, struct address_space *mapping, 240 + static int nilfs_write_end(const struct kiocb *iocb, 241 + struct address_space *mapping, 242 242 loff_t pos, unsigned len, unsigned copied, 243 243 struct folio *folio, void *fsdata) 244 244 { ··· 250 248 251 249 nr_dirty = nilfs_page_count_clean_buffers(folio, start, 252 250 start + copied); 253 - copied = generic_write_end(file, mapping, pos, len, copied, folio, 251 + copied = generic_write_end(iocb, mapping, pos, len, copied, folio, 254 252 fsdata); 255 253 nilfs_set_file_dirty(inode, nr_dirty); 256 254 err = nilfs_transaction_commit(inode->i_sb);
+1 -2
fs/nilfs2/recovery.c
··· 560 560 if (unlikely(err)) 561 561 goto failed_folio; 562 562 563 - block_write_end(NULL, inode->i_mapping, pos, blocksize, 564 - blocksize, folio, NULL); 563 + block_write_end(pos, blocksize, blocksize, folio); 565 564 566 565 folio_unlock(folio); 567 566 folio_put(folio);
+2 -2
fs/ntfs3/file.c
··· 162 162 if (pos + len > new_valid) 163 163 len = new_valid - pos; 164 164 165 - err = ntfs_write_begin(file, mapping, pos, len, &folio, NULL); 165 + err = ntfs_write_begin(NULL, mapping, pos, len, &folio, NULL); 166 166 if (err) 167 167 goto out; 168 168 169 169 folio_zero_range(folio, zerofrom, folio_size(folio) - zerofrom); 170 170 171 - err = ntfs_write_end(file, mapping, pos, len, len, folio, NULL); 171 + err = ntfs_write_end(NULL, mapping, pos, len, len, folio, NULL); 172 172 if (err < 0) 173 173 goto out; 174 174 pos += len;
+4 -3
fs/ntfs3/inode.c
··· 920 920 bh_result, create, GET_BLOCK_WRITE_BEGIN); 921 921 } 922 922 923 - int ntfs_write_begin(struct file *file, struct address_space *mapping, 923 + int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, 924 924 loff_t pos, u32 len, struct folio **foliop, void **fsdata) 925 925 { 926 926 int err; ··· 969 969 /* 970 970 * ntfs_write_end - Address_space_operations::write_end. 971 971 */ 972 - int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, 972 + int ntfs_write_end(const struct kiocb *iocb, 973 + struct address_space *mapping, loff_t pos, 973 974 u32 len, u32 copied, struct folio *folio, void *fsdata) 974 975 { 975 976 struct inode *inode = mapping->host; ··· 1002 1001 folio_unlock(folio); 1003 1002 folio_put(folio); 1004 1003 } else { 1005 - err = generic_write_end(file, mapping, pos, len, copied, folio, 1004 + err = generic_write_end(iocb, mapping, pos, len, copied, folio, 1006 1005 fsdata); 1007 1006 } 1008 1007
+6 -4
fs/ntfs3/ntfs_fs.h
··· 708 708 int ntfs_set_size(struct inode *inode, u64 new_size); 709 709 int ntfs_get_block(struct inode *inode, sector_t vbn, 710 710 struct buffer_head *bh_result, int create); 711 - int ntfs_write_begin(struct file *file, struct address_space *mapping, 712 - loff_t pos, u32 len, struct folio **foliop, void **fsdata); 713 - int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, 714 - u32 len, u32 copied, struct folio *folio, void *fsdata); 711 + int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, 712 + loff_t pos, u32 len, struct folio **foliop, 713 + void **fsdata); 714 + int ntfs_write_end(const struct kiocb *iocb, struct address_space *mapping, 715 + loff_t pos, u32 len, u32 copied, struct folio *folio, 716 + void *fsdata); 715 717 int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc); 716 718 int ntfs_sync_inode(struct inode *inode); 717 719 int inode_read_data(struct inode *inode, void *data, size_t bytes);
+4 -2
fs/ocfs2/aops.c
··· 1856 1856 return ret; 1857 1857 } 1858 1858 1859 - static int ocfs2_write_begin(struct file *file, struct address_space *mapping, 1859 + static int ocfs2_write_begin(const struct kiocb *iocb, 1860 + struct address_space *mapping, 1860 1861 loff_t pos, unsigned len, 1861 1862 struct folio **foliop, void **fsdata) 1862 1863 { ··· 2048 2047 return copied; 2049 2048 } 2050 2049 2051 - static int ocfs2_write_end(struct file *file, struct address_space *mapping, 2050 + static int ocfs2_write_end(const struct kiocb *iocb, 2051 + struct address_space *mapping, 2052 2052 loff_t pos, unsigned len, unsigned copied, 2053 2053 struct folio *folio, void *fsdata) 2054 2054 {
+4 -3
fs/omfs/file.c
··· 310 310 } 311 311 } 312 312 313 - static int omfs_write_begin(struct file *file, struct address_space *mapping, 314 - loff_t pos, unsigned len, 315 - struct folio **foliop, void **fsdata) 313 + static int omfs_write_begin(const struct kiocb *iocb, 314 + struct address_space *mapping, 315 + loff_t pos, unsigned len, 316 + struct folio **foliop, void **fsdata) 316 317 { 317 318 int ret; 318 319
+1 -4
fs/open.c
··· 1204 1204 if (IS_ERR(f)) 1205 1205 return f; 1206 1206 1207 - f->f_path = *path; 1208 - error = do_dentry_open(f, NULL); 1207 + error = vfs_open(path, f); 1209 1208 if (error) { 1210 1209 fput(f); 1211 1210 return ERR_PTR(error); 1212 1211 } 1213 - 1214 - fsnotify_open(f); 1215 1212 return f; 1216 1213 } 1217 1214 EXPORT_SYMBOL_GPL(kernel_file_open);
+9 -7
fs/orangefs/inode.c
··· 285 285 return ret; 286 286 } 287 287 288 - static int orangefs_write_begin(struct file *file, 289 - struct address_space *mapping, loff_t pos, unsigned len, 290 - struct folio **foliop, void **fsdata) 288 + static int orangefs_write_begin(const struct kiocb *iocb, 289 + struct address_space *mapping, loff_t pos, 290 + unsigned len, struct folio **foliop, 291 + void **fsdata) 291 292 { 292 293 struct orangefs_write_range *wr; 293 294 struct folio *folio; ··· 341 340 return 0; 342 341 } 343 342 344 - static int orangefs_write_end(struct file *file, struct address_space *mapping, 345 - loff_t pos, unsigned len, unsigned copied, struct folio *folio, 346 - void *fsdata) 343 + static int orangefs_write_end(const struct kiocb *iocb, 344 + struct address_space *mapping, 345 + loff_t pos, unsigned len, unsigned copied, 346 + struct folio *folio, void *fsdata) 347 347 { 348 348 struct inode *inode = folio->mapping->host; 349 349 loff_t last_pos = pos + copied; ··· 374 372 folio_unlock(folio); 375 373 folio_put(folio); 376 374 377 - mark_inode_dirty_sync(file_inode(file)); 375 + mark_inode_dirty_sync(file_inode(iocb->ki_filp)); 378 376 return copied; 379 377 } 380 378
+3 -3
fs/overlayfs/copy_up.c
··· 563 563 if (IS_ERR(index)) { 564 564 err = PTR_ERR(index); 565 565 } else { 566 - err = ovl_do_rename(ofs, dir, temp, dir, index, 0); 566 + err = ovl_do_rename(ofs, indexdir, temp, indexdir, index, 0); 567 567 dput(index); 568 568 } 569 569 out: ··· 762 762 { 763 763 struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); 764 764 struct inode *inode; 765 - struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir); 765 + struct inode *wdir = d_inode(c->workdir); 766 766 struct path path = { .mnt = ovl_upper_mnt(ofs) }; 767 767 struct dentry *temp, *upper, *trap; 768 768 struct ovl_cu_creds cc; ··· 829 829 if (IS_ERR(upper)) 830 830 goto cleanup; 831 831 832 - err = ovl_do_rename(ofs, wdir, temp, udir, upper, 0); 832 + err = ovl_do_rename(ofs, c->workdir, temp, c->destdir, upper, 0); 833 833 dput(upper); 834 834 if (err) 835 835 goto cleanup;
+8 -8
fs/overlayfs/dir.c
··· 107 107 } 108 108 109 109 /* Caller must hold i_mutex on both workdir and dir */ 110 - int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, 110 + int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, 111 111 struct dentry *dentry) 112 112 { 113 113 struct inode *wdir = ofs->workdir->d_inode; ··· 123 123 if (d_is_dir(dentry)) 124 124 flags = RENAME_EXCHANGE; 125 125 126 - err = ovl_do_rename(ofs, wdir, whiteout, dir, dentry, flags); 126 + err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, dentry, flags); 127 127 if (err) 128 128 goto kill_whiteout; 129 129 if (flags) ··· 384 384 if (err) 385 385 goto out_cleanup; 386 386 387 - err = ovl_do_rename(ofs, wdir, opaquedir, udir, upper, RENAME_EXCHANGE); 387 + err = ovl_do_rename(ofs, workdir, opaquedir, upperdir, upper, RENAME_EXCHANGE); 388 388 if (err) 389 389 goto out_cleanup; 390 390 ··· 491 491 if (err) 492 492 goto out_cleanup; 493 493 494 - err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, 494 + err = ovl_do_rename(ofs, workdir, newdentry, upperdir, upper, 495 495 RENAME_EXCHANGE); 496 496 if (err) 497 497 goto out_cleanup; 498 498 499 499 ovl_cleanup(ofs, wdir, upper); 500 500 } else { 501 - err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, 0); 501 + err = ovl_do_rename(ofs, workdir, newdentry, upperdir, upper, 0); 502 502 if (err) 503 503 goto out_cleanup; 504 504 } ··· 774 774 goto out_dput_upper; 775 775 } 776 776 777 - err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper); 777 + err = ovl_cleanup_and_whiteout(ofs, upperdir, upper); 778 778 if (err) 779 779 goto out_d_drop; 780 780 ··· 1246 1246 if (err) 1247 1247 goto out_dput; 1248 1248 1249 - err = ovl_do_rename(ofs, old_upperdir->d_inode, olddentry, 1250 - new_upperdir->d_inode, newdentry, flags); 1249 + err = ovl_do_rename(ofs, old_upperdir, olddentry, 1250 + new_upperdir, newdentry, flags); 1251 1251 if (err) 1252 1252 goto out_dput; 1253 1253
+8 -8
fs/overlayfs/overlayfs.h
··· 355 355 return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name); 356 356 } 357 357 358 - static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, 359 - struct dentry *olddentry, struct inode *newdir, 358 + static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir, 359 + struct dentry *olddentry, struct dentry *newdir, 360 360 struct dentry *newdentry, unsigned int flags) 361 361 { 362 362 int err; 363 363 struct renamedata rd = { 364 364 .old_mnt_idmap = ovl_upper_mnt_idmap(ofs), 365 - .old_dir = olddir, 366 - .old_dentry = olddentry, 365 + .old_parent = olddir, 366 + .old_dentry = olddentry, 367 367 .new_mnt_idmap = ovl_upper_mnt_idmap(ofs), 368 - .new_dir = newdir, 369 - .new_dentry = newdentry, 370 - .flags = flags, 368 + .new_parent = newdir, 369 + .new_dentry = newdentry, 370 + .flags = flags, 371 371 }; 372 372 373 373 pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags); ··· 828 828 829 829 /* dir.c */ 830 830 extern const struct inode_operations ovl_dir_inode_operations; 831 - int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, 831 + int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, 832 832 struct dentry *dentry); 833 833 struct ovl_cattr { 834 834 dev_t rdev;
+1 -1
fs/overlayfs/readdir.c
··· 1235 1235 * Whiteout orphan index to block future open by 1236 1236 * handle after overlay nlink dropped to zero. 1237 1237 */ 1238 - err = ovl_cleanup_and_whiteout(ofs, dir, index); 1238 + err = ovl_cleanup_and_whiteout(ofs, indexdir, index); 1239 1239 } else { 1240 1240 /* Cleanup orphan index entries */ 1241 1241 err = ovl_cleanup(ofs, dir, index);
+1 -1
fs/overlayfs/super.c
··· 580 580 581 581 /* Name is inline and stable - using snapshot as a copy helper */ 582 582 take_dentry_name_snapshot(&name, temp); 583 - err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT); 583 + err = ovl_do_rename(ofs, workdir, temp, workdir, dest, RENAME_WHITEOUT); 584 584 if (err) { 585 585 if (err == -EINVAL) 586 586 err = 0;
+1 -1
fs/overlayfs/util.c
··· 1115 1115 } else if (ovl_index_all(dentry->d_sb)) { 1116 1116 /* Whiteout orphan index to block future open by handle */ 1117 1117 err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb), 1118 - dir, index); 1118 + indexdir, index); 1119 1119 } else { 1120 1120 /* Cleanup orphan index entries */ 1121 1121 err = ovl_cleanup(ofs, dir, index);
+5 -3
fs/pipe.c
··· 963 963 res[1] = f; 964 964 stream_open(inode, res[0]); 965 965 stream_open(inode, res[1]); 966 + 967 + /* pipe groks IOCB_NOWAIT */ 968 + res[0]->f_mode |= FMODE_NOWAIT; 969 + res[1]->f_mode |= FMODE_NOWAIT; 970 + 966 971 /* 967 972 * Disable permission and pre-content events, but enable legacy 968 973 * inotify events for legacy users. ··· 1002 997 audit_fd_pair(fdr, fdw); 1003 998 fd[0] = fdr; 1004 999 fd[1] = fdw; 1005 - /* pipe groks IOCB_NOWAIT */ 1006 - files[0]->f_mode |= FMODE_NOWAIT; 1007 - files[1]->f_mode |= FMODE_NOWAIT; 1008 1000 return 0; 1009 1001 1010 1002 err_fdr:
+1 -10
fs/proc/fd.c
··· 352 352 u32 request_mask, unsigned int query_flags) 353 353 { 354 354 struct inode *inode = d_inode(path->dentry); 355 - int rv = 0; 356 355 357 356 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 358 - 359 - /* If it's a directory, put the number of open fds there */ 360 - if (S_ISDIR(inode->i_mode)) { 361 - rv = proc_readfd_count(inode, &stat->size); 362 - if (rv < 0) 363 - return rv; 364 - } 365 - 366 - return rv; 357 + return proc_readfd_count(inode, &stat->size); 367 358 } 368 359 369 360 const struct inode_operations proc_fd_inode_operations = {
+1 -1
fs/read_write.c
··· 237 237 * @offset: file offset to seek to 238 238 * @whence: type of seek 239 239 * 240 - * This is a generic implemenation of ->llseek useable for all normal local 240 + * This is a generic implementation of ->llseek useable for all normal local 241 241 * filesystems. It just updates the file offset to the value specified by 242 242 * @offset and @whence. 243 243 */
+2 -2
fs/select.c
··· 192 192 * and is paired with smp_store_mb() in poll_schedule_timeout. 193 193 */ 194 194 smp_wmb(); 195 - pwq->triggered = 1; 195 + WRITE_ONCE(pwq->triggered, 1); 196 196 197 197 /* 198 198 * Perform the default wake up operation using a dummy ··· 237 237 int rc = -EINTR; 238 238 239 239 set_current_state(state); 240 - if (!pwq->triggered) 240 + if (!READ_ONCE(pwq->triggered)) 241 241 rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); 242 242 __set_current_state(TASK_RUNNING); 243 243
+2 -2
fs/smb/server/vfs.c
··· 765 765 } 766 766 767 767 rd.old_mnt_idmap = mnt_idmap(old_path->mnt), 768 - rd.old_dir = d_inode(old_parent), 768 + rd.old_parent = old_parent, 769 769 rd.old_dentry = old_child, 770 770 rd.new_mnt_idmap = mnt_idmap(new_path.mnt), 771 - rd.new_dir = new_path.dentry->d_inode, 771 + rd.new_parent = new_path.dentry, 772 772 rd.new_dentry = new_dentry, 773 773 rd.flags = flags, 774 774 rd.delegated_inode = NULL,
+2 -2
fs/stack.c
··· 3 3 #include <linux/fs.h> 4 4 #include <linux/fs_stack.h> 5 5 6 - /* does _NOT_ require i_mutex to be held. 6 + /* does _NOT_ require i_rwsem to be held. 7 7 * 8 8 * This function cannot be inlined since i_size_{read,write} is rather 9 9 * heavy-weight on 32-bit systems ··· 41 41 * If CONFIG_SMP or CONFIG_PREEMPTION on 32-bit, it's vital for 42 42 * fsstack_copy_inode_size() to hold some lock around 43 43 * i_size_write(), otherwise i_size_read() may spin forever (see 44 - * include/linux/fs.h). We don't necessarily hold i_mutex when this 44 + * include/linux/fs.h). We don't necessarily hold i_rwsem when this 45 45 * is called, so take i_lock for that case. 46 46 * 47 47 * And if on 32-bit, continue our effort to keep the two halves of
+5 -3
fs/ubifs/file.c
··· 404 404 * there is a plenty of flash space and the budget will be acquired quickly, 405 405 * without forcing write-back. The slow path does not make this assumption. 406 406 */ 407 - static int ubifs_write_begin(struct file *file, struct address_space *mapping, 407 + static int ubifs_write_begin(const struct kiocb *iocb, 408 + struct address_space *mapping, 408 409 loff_t pos, unsigned len, 409 410 struct folio **foliop, void **fsdata) 410 411 { ··· 515 514 } 516 515 } 517 516 518 - static int ubifs_write_end(struct file *file, struct address_space *mapping, 519 - loff_t pos, unsigned len, unsigned copied, 517 + static int ubifs_write_end(const struct kiocb *iocb, 518 + struct address_space *mapping, loff_t pos, 519 + unsigned len, unsigned copied, 520 520 struct folio *folio, void *fsdata) 521 521 { 522 522 struct inode *inode = mapping->host;
+7 -4
fs/udf/inode.c
··· 244 244 mpage_readahead(rac, udf_get_block); 245 245 } 246 246 247 - static int udf_write_begin(struct file *file, struct address_space *mapping, 247 + static int udf_write_begin(const struct kiocb *iocb, 248 + struct address_space *mapping, 248 249 loff_t pos, unsigned len, 249 250 struct folio **foliop, void **fsdata) 250 251 { 252 + struct file *file = iocb->ki_filp; 251 253 struct udf_inode_info *iinfo = UDF_I(file_inode(file)); 252 254 struct folio *folio; 253 255 int ret; ··· 273 271 return 0; 274 272 } 275 273 276 - static int udf_write_end(struct file *file, struct address_space *mapping, 274 + static int udf_write_end(const struct kiocb *iocb, 275 + struct address_space *mapping, 277 276 loff_t pos, unsigned len, unsigned copied, 278 277 struct folio *folio, void *fsdata) 279 278 { 280 - struct inode *inode = file_inode(file); 279 + struct inode *inode = file_inode(iocb->ki_filp); 281 280 loff_t last_pos; 282 281 283 282 if (UDF_I(inode)->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) 284 - return generic_write_end(file, mapping, pos, len, copied, folio, 283 + return generic_write_end(iocb, mapping, pos, len, copied, folio, 285 284 fsdata); 286 285 last_pos = pos + copied; 287 286 if (last_pos > inode->i_size)
+1 -1
fs/ufs/dir.c
··· 48 48 struct inode *dir = mapping->host; 49 49 50 50 inode_inc_iversion(dir); 51 - block_write_end(NULL, mapping, pos, len, len, folio, NULL); 51 + block_write_end(pos, len, len, folio); 52 52 if (pos+len > dir->i_size) { 53 53 i_size_write(dir, pos+len); 54 54 mark_inode_dirty(dir);
+9 -7
fs/ufs/inode.c
··· 474 474 } 475 475 } 476 476 477 - static int ufs_write_begin(struct file *file, struct address_space *mapping, 478 - loff_t pos, unsigned len, 479 - struct folio **foliop, void **fsdata) 477 + static int ufs_write_begin(const struct kiocb *iocb, 478 + struct address_space *mapping, 479 + loff_t pos, unsigned len, 480 + struct folio **foliop, void **fsdata) 480 481 { 481 482 int ret; 482 483 ··· 488 487 return ret; 489 488 } 490 489 491 - static int ufs_write_end(struct file *file, struct address_space *mapping, 492 - loff_t pos, unsigned len, unsigned copied, 493 - struct folio *folio, void *fsdata) 490 + static int ufs_write_end(const struct kiocb *iocb, 491 + struct address_space *mapping, 492 + loff_t pos, unsigned len, unsigned copied, 493 + struct folio *folio, void *fsdata) 494 494 { 495 495 int ret; 496 496 497 - ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); 497 + ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata); 498 498 if (ret < len) 499 499 ufs_write_failed(mapping, pos + len); 500 500 return ret;
+3 -2
fs/vboxsf/file.c
··· 300 300 return error; 301 301 } 302 302 303 - static int vboxsf_write_end(struct file *file, struct address_space *mapping, 303 + static int vboxsf_write_end(const struct kiocb *iocb, 304 + struct address_space *mapping, 304 305 loff_t pos, unsigned int len, unsigned int copied, 305 306 struct folio *folio, void *fsdata) 306 307 { 307 308 struct inode *inode = mapping->host; 308 - struct vboxsf_handle *sf_handle = file->private_data; 309 + struct vboxsf_handle *sf_handle = iocb->ki_filp->private_data; 309 310 size_t from = offset_in_folio(folio, pos); 310 311 u32 nwritten = len; 311 312 u8 *buf;
+1 -1
fs/xattr.c
··· 215 215 * 216 216 * returns the result of the internal setxattr or setsecurity operations. 217 217 * 218 - * This function requires the caller to lock the inode's i_mutex before it 218 + * This function requires the caller to lock the inode's i_rwsem before it 219 219 * is executed. It also assumes that the caller will make the appropriate 220 220 * permission checks. 221 221 */
+3 -5
include/linux/buffer_head.h
··· 262 262 struct folio **foliop, get_block_t *get_block); 263 263 int __block_write_begin(struct folio *folio, loff_t pos, unsigned len, 264 264 get_block_t *get_block); 265 - int block_write_end(struct file *, struct address_space *, 266 - loff_t, unsigned len, unsigned copied, 267 - struct folio *, void *); 268 - int generic_write_end(struct file *, struct address_space *, 265 + int block_write_end(loff_t pos, unsigned len, unsigned copied, struct folio *); 266 + int generic_write_end(const struct kiocb *, struct address_space *, 269 267 loff_t, unsigned len, unsigned copied, 270 268 struct folio *, void *); 271 269 void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); 272 - int cont_write_begin(struct file *, struct address_space *, loff_t, 270 + int cont_write_begin(const struct kiocb *, struct address_space *, loff_t, 273 271 unsigned, struct folio **, void **, 274 272 get_block_t *, loff_t *); 275 273 int generic_cont_expand_simple(struct inode *inode, loff_t size);
+2 -2
include/linux/exportfs.h
··· 230 230 * directory. The name should be stored in the @name (with the 231 231 * understanding that it is already pointing to a %NAME_MAX+1 sized 232 232 * buffer. get_name() should return %0 on success, a negative error code 233 - * or error. @get_name will be called without @parent->i_mutex held. 233 + * or error. @get_name will be called without @parent->i_rwsem held. 234 234 * 235 235 * get_parent: 236 236 * @get_parent should find the parent directory for the given @child which ··· 247 247 * @commit_metadata should commit metadata changes to stable storage. 248 248 * 249 249 * Locking rules: 250 - * get_parent is called with child->d_inode->i_mutex down 250 + * get_parent is called with child->d_inode->i_rwsem down 251 251 * get_name is not (which is possibly inconsistent) 252 252 */ 253 253
+6 -1
include/linux/filelock.h
··· 175 175 return fl->c.flc_type == F_WRLCK; 176 176 } 177 177 178 + static inline void locks_wake_up_waiter(struct file_lock_core *flc) 179 + { 180 + wake_up(&flc->flc_wait); 181 + } 182 + 178 183 static inline void locks_wake_up(struct file_lock *fl) 179 184 { 180 - wake_up(&fl->c.flc_wait); 185 + locks_wake_up_waiter(&fl->c); 181 186 } 182 187 183 188 static inline bool locks_can_async_lock(const struct file_operations *fops)
+13 -12
include/linux/fs.h
··· 446 446 447 447 void (*readahead)(struct readahead_control *); 448 448 449 - int (*write_begin)(struct file *, struct address_space *mapping, 449 + int (*write_begin)(const struct kiocb *, struct address_space *mapping, 450 450 loff_t pos, unsigned len, 451 451 struct folio **foliop, void **fsdata); 452 - int (*write_end)(struct file *, struct address_space *mapping, 452 + int (*write_end)(const struct kiocb *, struct address_space *mapping, 453 453 loff_t pos, unsigned len, unsigned copied, 454 454 struct folio *folio, void *fsdata); 455 455 ··· 839 839 } 840 840 841 841 /* 842 - * inode->i_mutex nesting subclasses for the lock validator: 842 + * inode->i_rwsem nesting subclasses for the lock validator: 843 843 * 844 844 * 0: the object of the current VFS operation 845 845 * 1: parent ··· 991 991 992 992 /* 993 993 * NOTE: unlike i_size_read(), i_size_write() does need locking around it 994 - * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount 994 + * (normally i_rwsem), otherwise on 32bit/SMP an update of i_size_seqcount 995 995 * can be lost, resulting in subsequent i_size_read() calls spinning forever. 996 996 */ 997 997 static inline void i_size_write(struct inode *inode, loff_t i_size) ··· 1924 1924 * freeze protection should be the outermost lock. In particular, we have: 1925 1925 * 1926 1926 * sb_start_write 1927 - * -> i_mutex (write path, truncate, directory ops, ...) 1927 + * -> i_rwsem (write path, truncate, directory ops, ...) 1928 1928 * -> s_umount (freeze_super, thaw_super) 1929 1929 */ 1930 1930 static inline void sb_start_write(struct super_block *sb) ··· 2007 2007 /** 2008 2008 * struct renamedata - contains all information required for renaming 2009 2009 * @old_mnt_idmap: idmap of the old mount the inode was found from 2010 - * @old_dir: parent of source 2010 + * @old_parent: parent of source 2011 2011 * @old_dentry: source 2012 2012 * @new_mnt_idmap: idmap of the new mount the inode was found from 2013 - * @new_dir: parent of destination 2013 + * @new_parent: parent of destination 2014 2014 * @new_dentry: destination 2015 2015 * @delegated_inode: returns an inode needing a delegation break 2016 2016 * @flags: rename flags 2017 2017 */ 2018 2018 struct renamedata { 2019 2019 struct mnt_idmap *old_mnt_idmap; 2020 - struct inode *old_dir; 2020 + struct dentry *old_parent; 2021 2021 struct dentry *old_dentry; 2022 2022 struct mnt_idmap *new_mnt_idmap; 2023 - struct inode *new_dir; 2023 + struct dentry *new_parent; 2024 2024 struct dentry *new_dentry; 2025 2025 struct inode **delegated_inode; 2026 2026 unsigned int flags; ··· 3605 3605 extern int noop_fsync(struct file *, loff_t, loff_t, int); 3606 3606 extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); 3607 3607 extern int simple_empty(struct dentry *); 3608 - extern int simple_write_begin(struct file *file, struct address_space *mapping, 3609 - loff_t pos, unsigned len, 3610 - struct folio **foliop, void **fsdata); 3608 + extern int simple_write_begin(const struct kiocb *iocb, 3609 + struct address_space *mapping, 3610 + loff_t pos, unsigned len, 3611 + struct folio **foliop, void **fsdata); 3611 3612 extern const struct address_space_operations ram_aops; 3612 3613 extern int always_delete_dentry(const struct dentry *); 3613 3614 extern struct inode *alloc_anon_inode(struct super_block *);
+1 -1
include/linux/fs_context.h
··· 200 200 */ 201 201 #define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__) 202 202 #define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__) 203 - #define infofc(p, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) 203 + #define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) 204 204 205 205 /** 206 206 * warnf - Store supplementary warning message
+1 -1
include/linux/fs_stack.h
··· 3 3 #define _LINUX_FS_STACK_H 4 4 5 5 /* This file defines generic functions used primarily by stackable 6 - * filesystems; none of these functions require i_mutex to be held. 6 + * filesystems; none of these functions require i_rwsem to be held. 7 7 */ 8 8 9 9 #include <linux/fs.h>
-1
include/linux/netfs.h
··· 442 442 size_t max_size, size_t max_segs); 443 443 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); 444 444 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error); 445 - void netfs_queue_write_request(struct netfs_io_subrequest *subreq); 446 445 447 446 int netfs_start_io_read(struct inode *inode); 448 447 void netfs_end_io_read(struct inode *inode);
+27
include/linux/pagemap.h
··· 751 751 fgf_t fgp_flags, gfp_t gfp); 752 752 753 753 /** 754 + * write_begin_get_folio - Get folio for write_begin with flags. 755 + * @iocb: The kiocb passed from write_begin (may be NULL). 756 + * @mapping: The address space to search. 757 + * @index: The page cache index. 758 + * @len: Length of data being written. 759 + * 760 + * This is a helper for filesystem write_begin() implementations. 761 + * It wraps __filemap_get_folio(), setting appropriate flags in 762 + * the write begin context. 763 + * 764 + * Return: A folio or an ERR_PTR. 765 + */ 766 + static inline struct folio *write_begin_get_folio(const struct kiocb *iocb, 767 + struct address_space *mapping, pgoff_t index, size_t len) 768 + { 769 + fgf_t fgp_flags = FGP_WRITEBEGIN; 770 + 771 + fgp_flags |= fgf_set_order(len); 772 + 773 + if (iocb && iocb->ki_flags & IOCB_DONTCACHE) 774 + fgp_flags |= FGP_DONTCACHE; 775 + 776 + return __filemap_get_folio(mapping, index, fgp_flags, 777 + mapping_gfp_mask(mapping)); 778 + } 779 + 780 + /** 754 781 * filemap_get_folio - Find and get a folio. 755 782 * @mapping: The address_space to search. 756 783 * @index: The page index.
+1 -1
include/linux/quotaops.h
··· 19 19 return &sb->s_dquot; 20 20 } 21 21 22 - /* i_mutex must being held */ 22 + /* i_rwsem must being held */ 23 23 static inline bool is_quota_modification(struct mnt_idmap *idmap, 24 24 struct inode *inode, struct iattr *ia) 25 25 {
-2
io_uring/openclose.c
··· 416 416 ret = create_pipe_files(files, p->flags); 417 417 if (ret) 418 418 return ret; 419 - files[0]->f_mode |= FMODE_NOWAIT; 420 - files[1]->f_mode |= FMODE_NOWAIT; 421 419 422 420 if (!!p->file_slot) 423 421 ret = io_pipe_fixed(req, files, issue_flags);
+2 -2
mm/filemap.c
··· 4109 4109 break; 4110 4110 } 4111 4111 4112 - status = a_ops->write_begin(file, mapping, pos, bytes, 4112 + status = a_ops->write_begin(iocb, mapping, pos, bytes, 4113 4113 &folio, &fsdata); 4114 4114 if (unlikely(status < 0)) 4115 4115 break; ··· 4130 4130 copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); 4131 4131 flush_dcache_folio(folio); 4132 4132 4133 - status = a_ops->write_end(file, mapping, pos, bytes, copied, 4133 + status = a_ops->write_end(iocb, mapping, pos, bytes, copied, 4134 4134 folio, fsdata); 4135 4135 if (unlikely(status != copied)) { 4136 4136 iov_iter_revert(i, copied - max(status, 0L));
+6 -6
mm/shmem.c
··· 3270 3270 static const struct inode_operations shmem_short_symlink_operations; 3271 3271 3272 3272 static int 3273 - shmem_write_begin(struct file *file, struct address_space *mapping, 3274 - loff_t pos, unsigned len, 3275 - struct folio **foliop, void **fsdata) 3273 + shmem_write_begin(const struct kiocb *iocb, struct address_space *mapping, 3274 + loff_t pos, unsigned len, 3275 + struct folio **foliop, void **fsdata) 3276 3276 { 3277 3277 struct inode *inode = mapping->host; 3278 3278 struct shmem_inode_info *info = SHMEM_I(inode); ··· 3304 3304 } 3305 3305 3306 3306 static int 3307 - shmem_write_end(struct file *file, struct address_space *mapping, 3308 - loff_t pos, unsigned len, unsigned copied, 3309 - struct folio *folio, void *fsdata) 3307 + shmem_write_end(const struct kiocb *iocb, struct address_space *mapping, 3308 + loff_t pos, unsigned len, unsigned copied, 3309 + struct folio *folio, void *fsdata) 3310 3310 { 3311 3311 struct inode *inode = mapping->host; 3312 3312