Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.15-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull misc vfs updates from Christian Brauner:
"Features:

- Add CONFIG_DEBUG_VFS infrastucture:
- Catch invalid modes in open
- Use the new debug macros in inode_set_cached_link()
- Use debug-only asserts around fd allocation and install

- Place f_ref to 3rd cache line in struct file to resolve false
sharing

Cleanups:

- Start using anon_inode_getfile_fmode() helper in various places

- Don't take f_lock during SEEK_CUR if exclusion is guaranteed by
f_pos_lock

- Add unlikely() to kcmp()

- Remove legacy ->remount_fs method from ecryptfs after port to the
new mount api

- Remove invalidate_inodes() in favour of evict_inodes()

- Simplify ep_busy_loopER by removing unused argument

- Avoid mmap sem relocks when coredumping with many missing pages

- Inline getname()

- Inline new_inode_pseudo() and de-staticize alloc_inode()

- Dodge an atomic in putname if ref == 1

- Consistently deref the files table with rcu_dereference_raw()

- Dedup handling of struct filename init and refcounts bumps

- Use wq_has_sleeper() in end_dir_add()

- Drop the lock trip around I_NEW wake up in evict()

- Load the ->i_sb pointer once in inode_sb_list_{add,del}

- Predict not reaching the limit in alloc_empty_file()

- Tidy up do_sys_openat2() with likely/unlikely

- Call inode_sb_list_add() outside of inode hash lock

- Sort out fd allocation vs dup2 race commentary

- Turn page_offset() into a wrapper around folio_pos()

- Remove locking in exportfs around ->get_parent() call

- try_lookup_one_len() does not need any locks in autofs

- Fix return type of several functions from long to int in open

- Fix return type of several functions from long to int in ioctls

Fixes:

- Fix watch queue accounting mismatch"

* tag 'vfs-6.15-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (30 commits)
fs: sort out fd allocation vs dup2 race commentary, take 2
fs: call inode_sb_list_add() outside of inode hash lock
fs: tidy up do_sys_openat2() with likely/unlikely
fs: predict not reaching the limit in alloc_empty_file()
fs: load the ->i_sb pointer once in inode_sb_list_{add,del}
fs: drop the lock trip around I_NEW wake up in evict()
fs: use wq_has_sleeper() in end_dir_add()
VFS/autofs: try_lookup_one_len() does not need any locks
fs: dedup handling of struct filename init and refcounts bumps
fs: consistently deref the files table with rcu_dereference_raw()
exportfs: remove locking around ->get_parent() call.
fs: use debug-only asserts around fd allocation and install
fs: dodge an atomic in putname if ref == 1
vfs: Remove invalidate_inodes()
ecryptfs: remove NULL remount_fs from super_operations
watch_queue: fix pipe accounting mismatch
fs: place f_ref to 3rd cache line in struct file to resolve false sharing
epoll: simplify ep_busy_loop by removing always 0 argument
fs: Turn page_offset() into a wrapper around folio_pos()
kcmp: improve performance adding an unlikely hint to task comparisons
...

+339 -258
+5
Documentation/filesystems/porting.rst
··· 1157 1157 NOTE: if you need something like full path from the root of filesystem, 1158 1158 you are still on your own - this assists with simple cases, but it's not 1159 1159 magic. 1160 + 1161 + --- 1162 + 1163 + ** mandatory ** 1164 + invalidate_inodes() is gone use evict_inodes() instead.
+2 -1
arch/arm64/kernel/elfcore.c
··· 27 27 int ret = 1; 28 28 unsigned long addr; 29 29 void *tags = NULL; 30 + int locked = 0; 30 31 31 32 for (addr = start; addr < start + len; addr += PAGE_SIZE) { 32 - struct page *page = get_dump_page(addr); 33 + struct page *page = get_dump_page(addr, &locked); 33 34 34 35 /* 35 36 * get_dump_page() returns NULL when encountering an empty
+3 -4
arch/powerpc/platforms/pseries/papr-vpd.c
··· 482 482 goto free_blob; 483 483 } 484 484 485 - file = anon_inode_getfile("[papr-vpd]", &papr_vpd_handle_ops, 486 - (void *)blob, O_RDONLY); 485 + file = anon_inode_getfile_fmode("[papr-vpd]", &papr_vpd_handle_ops, 486 + (void *)blob, O_RDONLY, 487 + FMODE_LSEEK | FMODE_PREAD); 487 488 if (IS_ERR(file)) { 488 489 err = PTR_ERR(file); 489 490 goto put_fd; 490 491 } 491 - 492 - file->f_mode |= FMODE_LSEEK | FMODE_PREAD; 493 492 fd_install(fd, file); 494 493 return fd; 495 494 put_fd:
+2 -14
drivers/vfio/group.c
··· 266 266 if (ret) 267 267 goto err_free; 268 268 269 - /* 270 - * We can't use anon_inode_getfd() because we need to modify 271 - * the f_mode flags directly to allow more than just ioctls 272 - */ 273 - filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 274 - df, O_RDWR); 269 + filep = anon_inode_getfile_fmode("[vfio-device]", &vfio_device_fops, 270 + df, O_RDWR, FMODE_PREAD | FMODE_PWRITE); 275 271 if (IS_ERR(filep)) { 276 272 ret = PTR_ERR(filep); 277 273 goto err_close_device; 278 274 } 279 - 280 - /* 281 - * TODO: add an anon_inode interface to do this. 282 - * Appears to be missing by lack of need rather than 283 - * explicitly prevented. Now there's need. 284 - */ 285 - filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 286 - 287 275 /* 288 276 * Use the pseudo fs inode on the device to link all mmaps 289 277 * to the same address space, allowing us to unmap all vmas
-3
fs/autofs/dev-ioctl.c
··· 442 442 sbi->exp_timeout = timeout * HZ; 443 443 } else { 444 444 struct dentry *base = fp->f_path.dentry; 445 - struct inode *inode = base->d_inode; 446 445 int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1; 447 446 struct dentry *dentry; 448 447 struct autofs_info *ino; ··· 459 460 "the parent autofs mount timeout which could " 460 461 "prevent shutdown\n"); 461 462 462 - inode_lock_shared(inode); 463 463 dentry = try_lookup_one_len(param->path, base, path_len); 464 - inode_unlock_shared(inode); 465 464 if (IS_ERR_OR_NULL(dentry)) 466 465 return dentry ? PTR_ERR(dentry) : -ENOENT; 467 466 ino = autofs_dentry_ino(dentry);
+3 -4
fs/cachefiles/ondemand.c
··· 317 317 goto err_free_id; 318 318 } 319 319 320 - anon_file->file = anon_inode_getfile("[cachefiles]", 321 - &cachefiles_ondemand_fd_fops, object, O_WRONLY); 320 + anon_file->file = anon_inode_getfile_fmode("[cachefiles]", 321 + &cachefiles_ondemand_fd_fops, object, 322 + O_WRONLY, FMODE_PWRITE | FMODE_LSEEK); 322 323 if (IS_ERR(anon_file->file)) { 323 324 ret = PTR_ERR(anon_file->file); 324 325 goto err_put_fd; ··· 333 332 ret = -EEXIST; 334 333 goto err_put_file; 335 334 } 336 - 337 - anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; 338 335 339 336 load = (void *)req->msg.data; 340 337 load->fd = anon_file->fd;
+32 -6
fs/coredump.c
··· 926 926 { 927 927 unsigned long addr; 928 928 struct page *dump_page; 929 + int locked, ret; 929 930 930 931 dump_page = dump_page_alloc(); 931 932 if (!dump_page) 932 933 return 0; 933 934 935 + ret = 0; 936 + locked = 0; 934 937 for (addr = start; addr < start + len; addr += PAGE_SIZE) { 935 938 struct page *page; 939 + 940 + if (!locked) { 941 + if (mmap_read_lock_killable(current->mm)) 942 + goto out; 943 + locked = 1; 944 + } 936 945 937 946 /* 938 947 * To avoid having to allocate page tables for virtual address ··· 950 941 * NULL when encountering an empty page table entry that would 951 942 * otherwise have been filled with the zero page. 952 943 */ 953 - page = get_dump_page(addr); 944 + page = get_dump_page(addr, &locked); 954 945 if (page) { 946 + if (locked) { 947 + mmap_read_unlock(current->mm); 948 + locked = 0; 949 + } 955 950 int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page)); 956 951 put_page(page); 957 - if (stop) { 958 - dump_page_free(dump_page); 959 - return 0; 960 - } 952 + if (stop) 953 + goto out; 961 954 } else { 962 955 dump_skip(cprm, PAGE_SIZE); 963 956 } 957 + 958 + if (dump_interrupted()) 959 + goto out; 960 + 961 + if (!need_resched()) 962 + continue; 963 + if (locked) { 964 + mmap_read_unlock(current->mm); 965 + locked = 0; 966 + } 964 967 cond_resched(); 965 968 } 969 + ret = 1; 970 + out: 971 + if (locked) 972 + mmap_read_unlock(current->mm); 973 + 966 974 dump_page_free(dump_page); 967 - return 1; 975 + return ret; 968 976 } 969 977 #endif 970 978
+2 -1
fs/dcache.c
··· 2480 2480 { 2481 2481 smp_store_release(&dir->i_dir_seq, n + 2); 2482 2482 preempt_enable_nested(); 2483 - wake_up_all(d_wait); 2483 + if (wq_has_sleeper(d_wait)) 2484 + wake_up_all(d_wait); 2484 2485 } 2485 2486 2486 2487 static void d_wait_lookup(struct dentry *dentry)
-1
fs/ecryptfs/super.c
··· 172 172 .destroy_inode = ecryptfs_destroy_inode, 173 173 .free_inode = ecryptfs_free_inode, 174 174 .statfs = ecryptfs_statfs, 175 - .remount_fs = NULL, 176 175 .evict_inode = ecryptfs_evict_inode, 177 176 .show_options = ecryptfs_show_options 178 177 };
+2 -3
fs/eventfd.c
··· 406 406 if (fd < 0) 407 407 goto err; 408 408 409 - file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); 409 + file = anon_inode_getfile_fmode("[eventfd]", &eventfd_fops, 410 + ctx, flags, FMODE_NOWAIT); 410 411 if (IS_ERR(file)) { 411 412 put_unused_fd(fd); 412 413 fd = PTR_ERR(file); 413 414 goto err; 414 415 } 415 - 416 - file->f_mode |= FMODE_NOWAIT; 417 416 fd_install(fd, file); 418 417 return fd; 419 418 err:
+4 -4
fs/eventpoll.c
··· 438 438 * 439 439 * we must do our busy polling with irqs enabled 440 440 */ 441 - static bool ep_busy_loop(struct eventpoll *ep, int nonblock) 441 + static bool ep_busy_loop(struct eventpoll *ep) 442 442 { 443 443 unsigned int napi_id = READ_ONCE(ep->napi_id); 444 444 u16 budget = READ_ONCE(ep->busy_poll_budget); ··· 448 448 budget = BUSY_POLL_BUDGET; 449 449 450 450 if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) { 451 - napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, 451 + napi_busy_loop(napi_id, ep_busy_loop_end, 452 452 ep, prefer_busy_poll, budget); 453 453 if (ep_events_available(ep)) 454 454 return true; ··· 560 560 561 561 #else 562 562 563 - static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock) 563 + static inline bool ep_busy_loop(struct eventpoll *ep) 564 564 { 565 565 return false; 566 566 } ··· 2047 2047 if (timed_out) 2048 2048 return 0; 2049 2049 2050 - eavail = ep_busy_loop(ep, timed_out); 2050 + eavail = ep_busy_loop(ep); 2051 2051 if (eavail) 2052 2052 continue; 2053 2053
-2
fs/exportfs/expfs.c
··· 126 126 int err; 127 127 128 128 parent = ERR_PTR(-EACCES); 129 - inode_lock(dentry->d_inode); 130 129 if (mnt->mnt_sb->s_export_op->get_parent) 131 130 parent = mnt->mnt_sb->s_export_op->get_parent(dentry); 132 - inode_unlock(dentry->d_inode); 133 131 134 132 if (IS_ERR(parent)) { 135 133 dprintk("get_parent of %lu failed, err %ld\n",
+56 -25
fs/file.c
··· 418 418 old_fds = old_fdt->fd; 419 419 new_fds = new_fdt->fd; 420 420 421 + /* 422 + * We may be racing against fd allocation from other threads using this 423 + * files_struct, despite holding ->file_lock. 424 + * 425 + * alloc_fd() might have already claimed a slot, while fd_install() 426 + * did not populate it yet. Note the latter operates locklessly, so 427 + * the file can show up as we are walking the array below. 428 + * 429 + * At the same time we know no files will disappear as all other 430 + * operations take the lock. 431 + * 432 + * Instead of trying to placate userspace racing with itself, we 433 + * ref the file if we see it and mark the fd slot as unused otherwise. 434 + */ 421 435 for (i = open_files; i != 0; i--) { 422 - struct file *f = *old_fds++; 436 + struct file *f = rcu_dereference_raw(*old_fds++); 423 437 if (f) { 424 438 get_file(f); 425 439 } else { 426 - /* 427 - * The fd may be claimed in the fd bitmap but not yet 428 - * instantiated in the files array if a sibling thread 429 - * is partway through open(). So make sure that this 430 - * fd is available to the new process. 431 - */ 432 440 __clear_open_fd(open_files - i, new_fdt); 433 441 } 434 442 rcu_assign_pointer(*new_fds++, f); ··· 585 577 586 578 __set_open_fd(fd, fdt, flags & O_CLOEXEC); 587 579 error = fd; 580 + VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL); 588 581 589 582 out: 590 583 spin_unlock(&files->file_lock); ··· 621 612 622 613 EXPORT_SYMBOL(put_unused_fd); 623 614 624 - /* 625 - * Install a file pointer in the fd array. 626 - * 627 - * The VFS is full of places where we drop the files lock between 628 - * setting the open_fds bitmap and installing the file in the file 629 - * array. At any such point, we are vulnerable to a dup2() race 630 - * installing a file in the array before us. We need to detect this and 631 - * fput() the struct file we are about to overwrite in this case. 632 - * 633 - * It should never happen - if we allow dup2() do it, _really_ bad things 634 - * will follow. 615 + /** 616 + * fd_install - install a file pointer in the fd array 617 + * @fd: file descriptor to install the file in 618 + * @file: the file to install 635 619 * 636 620 * This consumes the "file" refcount, so callers should treat it 637 621 * as if they had called fput(file). 638 622 */ 639 - 640 623 void fd_install(unsigned int fd, struct file *file) 641 624 { 642 625 struct files_struct *files = current->files; ··· 643 642 rcu_read_unlock_sched(); 644 643 spin_lock(&files->file_lock); 645 644 fdt = files_fdtable(files); 646 - WARN_ON(fdt->fd[fd] != NULL); 645 + VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL); 647 646 rcu_assign_pointer(fdt->fd[fd], file); 648 647 spin_unlock(&files->file_lock); 649 648 return; ··· 651 650 /* coupled with smp_wmb() in expand_fdtable() */ 652 651 smp_rmb(); 653 652 fdt = rcu_dereference_sched(files->fdt); 654 - BUG_ON(fdt->fd[fd] != NULL); 653 + VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL); 655 654 rcu_assign_pointer(fdt->fd[fd], file); 656 655 rcu_read_unlock_sched(); 657 656 } ··· 680 679 return NULL; 681 680 682 681 fd = array_index_nospec(fd, fdt->max_fds); 683 - file = fdt->fd[fd]; 682 + file = rcu_dereference_raw(fdt->fd[fd]); 684 683 if (file) { 685 684 rcu_assign_pointer(fdt->fd[fd], NULL); 686 685 __put_unused_fd(files, fd); ··· 1183 1182 (file_count(file) > 1 || file->f_op->iterate_shared); 1184 1183 } 1185 1184 1185 + bool file_seek_cur_needs_f_lock(struct file *file) 1186 + { 1187 + if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared) 1188 + return false; 1189 + 1190 + VFS_WARN_ON_ONCE((file_count(file) > 1) && 1191 + !mutex_is_locked(&file->f_pos_lock)); 1192 + return true; 1193 + } 1194 + 1186 1195 struct fd fdget_pos(unsigned int fd) 1187 1196 { 1188 1197 struct fd f = fdget(fd); ··· 1241 1230 struct fdtable *fdt; 1242 1231 1243 1232 /* 1244 - * We need to detect attempts to do dup2() over allocated but still 1245 - * not finished descriptor. 1233 + * dup2() is expected to close the file installed in the target fd slot 1234 + * (if any). However, userspace hand-picking a fd may be racing against 1235 + * its own threads which happened to allocate it in open() et al but did 1236 + * not populate it yet. 1237 + * 1238 + * Broadly speaking we may be racing against the following: 1239 + * fd = get_unused_fd_flags(); // fd slot reserved, ->fd[fd] == NULL 1240 + * file = hard_work_goes_here(); 1241 + * fd_install(fd, file); // only now ->fd[fd] == file 1242 + * 1243 + * It is an invariant that a successfully allocated fd has a NULL entry 1244 + * in the array until the matching fd_install(). 1245 + * 1246 + * If we fit the window, we have the fd to populate, yet no target file 1247 + * to close. Trying to ignore it and install our new file would violate 1248 + * the invariant and make fd_install() overwrite our file. 1249 + * 1250 + * Things can be done(tm) to handle this. However, the issue does not 1251 + * concern legitimate programs and we only need to make sure the kernel 1252 + * does not trip over it. 1253 + * 1254 + * The simplest way out is to return an error if we find ourselves here. 1246 1255 * 1247 1256 * POSIX is silent on the issue, we return -EBUSY. 1248 1257 */ 1249 1258 fdt = files_fdtable(files); 1250 1259 fd = array_index_nospec(fd, fdt->max_fds); 1251 - tofree = fdt->fd[fd]; 1260 + tofree = rcu_dereference_raw(fdt->fd[fd]); 1252 1261 if (!tofree && fd_is_open(fd, fdt)) 1253 1262 goto Ebusy; 1254 1263 get_file(file);
+2 -1
fs/file_table.c
··· 221 221 /* 222 222 * Privileged users can go above max_files 223 223 */ 224 - if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { 224 + if (unlikely(get_nr_files() >= files_stat.max_files) && 225 + !capable(CAP_SYS_ADMIN)) { 225 226 /* 226 227 * percpu_counters are inaccurate. Do an expensive check before 227 228 * we go and fail.
+47 -80
fs/inode.c
··· 327 327 free_inode_nonrcu(inode); 328 328 } 329 329 330 - static struct inode *alloc_inode(struct super_block *sb) 330 + /** 331 + * alloc_inode - obtain an inode 332 + * @sb: superblock 333 + * 334 + * Allocates a new inode for given superblock. 335 + * Inode wont be chained in superblock s_inodes list 336 + * This means : 337 + * - fs can't be unmount 338 + * - quotas, fsnotify, writeback can't work 339 + */ 340 + struct inode *alloc_inode(struct super_block *sb) 331 341 { 332 342 const struct super_operations *ops = sb->s_op; 333 343 struct inode *inode; ··· 623 613 */ 624 614 void inode_sb_list_add(struct inode *inode) 625 615 { 626 - spin_lock(&inode->i_sb->s_inode_list_lock); 627 - list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 628 - spin_unlock(&inode->i_sb->s_inode_list_lock); 616 + struct super_block *sb = inode->i_sb; 617 + 618 + spin_lock(&sb->s_inode_list_lock); 619 + list_add(&inode->i_sb_list, &sb->s_inodes); 620 + spin_unlock(&sb->s_inode_list_lock); 629 621 } 630 622 EXPORT_SYMBOL_GPL(inode_sb_list_add); 631 623 632 624 static inline void inode_sb_list_del(struct inode *inode) 633 625 { 626 + struct super_block *sb = inode->i_sb; 627 + 634 628 if (!list_empty(&inode->i_sb_list)) { 635 - spin_lock(&inode->i_sb->s_inode_list_lock); 629 + spin_lock(&sb->s_inode_list_lock); 636 630 list_del_init(&inode->i_sb_list); 637 - spin_unlock(&inode->i_sb->s_inode_list_lock); 631 + spin_unlock(&sb->s_inode_list_lock); 638 632 } 639 633 } 640 634 ··· 820 806 /* 821 807 * Wake up waiters in __wait_on_freeing_inode(). 822 808 * 823 - * Lockless hash lookup may end up finding the inode before we removed 824 - * it above, but only lock it *after* we are done with the wakeup below. 825 - * In this case the potential waiter cannot safely block. 809 + * It is an invariant that any thread we need to wake up is already 810 + * accounted for before remove_inode_hash() acquires ->i_lock -- both 811 + * sides take the lock and sleep is aborted if the inode is found 812 + * unhashed. Thus either the sleeper wins and goes off CPU, or removal 813 + * wins and the sleeper aborts after testing with the lock. 826 814 * 827 - * The inode being unhashed after the call to remove_inode_hash() is 828 - * used as an indicator whether blocking on it is safe. 815 + * This also means we don't need any fences for the call below. 829 816 */ 830 - spin_lock(&inode->i_lock); 831 - /* 832 - * Pairs with the barrier in prepare_to_wait_event() to make sure 833 - * ___wait_var_event() either sees the bit cleared or 834 - * waitqueue_active() check in wake_up_var() sees the waiter. 835 - */ 836 - smp_mb__after_spinlock(); 837 817 inode_wake_up_bit(inode, __I_NEW); 838 818 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 839 - spin_unlock(&inode->i_lock); 840 819 841 820 destroy_inode(inode); 842 821 } ··· 906 899 dispose_list(&dispose); 907 900 } 908 901 EXPORT_SYMBOL_GPL(evict_inodes); 909 - 910 - /** 911 - * invalidate_inodes - attempt to free all inodes on a superblock 912 - * @sb: superblock to operate on 913 - * 914 - * Attempts to free all inodes (including dirty inodes) for a given superblock. 915 - */ 916 - void invalidate_inodes(struct super_block *sb) 917 - { 918 - struct inode *inode, *next; 919 - LIST_HEAD(dispose); 920 - 921 - again: 922 - spin_lock(&sb->s_inode_list_lock); 923 - list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 924 - spin_lock(&inode->i_lock); 925 - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 926 - spin_unlock(&inode->i_lock); 927 - continue; 928 - } 929 - if (atomic_read(&inode->i_count)) { 930 - spin_unlock(&inode->i_lock); 931 - continue; 932 - } 933 - 934 - inode->i_state |= I_FREEING; 935 - inode_lru_list_del(inode); 936 - spin_unlock(&inode->i_lock); 937 - list_add(&inode->i_lru, &dispose); 938 - if (need_resched()) { 939 - spin_unlock(&sb->s_inode_list_lock); 940 - cond_resched(); 941 - dispose_list(&dispose); 942 - goto again; 943 - } 944 - } 945 - spin_unlock(&sb->s_inode_list_lock); 946 - 947 - dispose_list(&dispose); 948 - } 949 902 950 903 /* 951 904 * Isolate the inode from the LRU in preparation for freeing it. ··· 1127 1160 EXPORT_SYMBOL(get_next_ino); 1128 1161 1129 1162 /** 1130 - * new_inode_pseudo - obtain an inode 1131 - * @sb: superblock 1132 - * 1133 - * Allocates a new inode for given superblock. 1134 - * Inode wont be chained in superblock s_inodes list 1135 - * This means : 1136 - * - fs can't be unmount 1137 - * - quotas, fsnotify, writeback can't work 1138 - */ 1139 - struct inode *new_inode_pseudo(struct super_block *sb) 1140 - { 1141 - return alloc_inode(sb); 1142 - } 1143 - 1144 - /** 1145 1163 * new_inode - obtain an inode 1146 1164 * @sb: superblock 1147 1165 * ··· 1142 1190 { 1143 1191 struct inode *inode; 1144 1192 1145 - inode = new_inode_pseudo(sb); 1193 + inode = alloc_inode(sb); 1146 1194 if (inode) 1147 1195 inode_sb_list_add(inode); 1148 1196 return inode; ··· 1300 1348 } 1301 1349 1302 1350 if (set && unlikely(set(inode, data))) { 1303 - inode = NULL; 1304 - goto unlock; 1351 + spin_unlock(&inode_hash_lock); 1352 + return NULL; 1305 1353 } 1306 1354 1307 1355 /* ··· 1313 1361 hlist_add_head_rcu(&inode->i_hash, head); 1314 1362 spin_unlock(&inode->i_lock); 1315 1363 1364 + spin_unlock(&inode_hash_lock); 1365 + 1316 1366 /* 1317 1367 * Add inode to the sb list if it's not already. It has I_NEW at this 1318 1368 * point, so it should be safe to test i_sb_list locklessly. 1319 1369 */ 1320 1370 if (list_empty(&inode->i_sb_list)) 1321 1371 inode_sb_list_add(inode); 1322 - unlock: 1323 - spin_unlock(&inode_hash_lock); 1324 1372 1325 1373 return inode; 1326 1374 } ··· 1449 1497 inode->i_state = I_NEW; 1450 1498 hlist_add_head_rcu(&inode->i_hash, head); 1451 1499 spin_unlock(&inode->i_lock); 1452 - inode_sb_list_add(inode); 1453 1500 spin_unlock(&inode_hash_lock); 1501 + inode_sb_list_add(inode); 1454 1502 1455 1503 /* Return the locked inode with I_NEW set, the 1456 1504 * caller is responsible for filling in the contents ··· 2905 2953 return mode & ~S_ISGID; 2906 2954 } 2907 2955 EXPORT_SYMBOL(mode_strip_sgid); 2956 + 2957 + #ifdef CONFIG_DEBUG_VFS 2958 + /* 2959 + * Dump an inode. 2960 + * 2961 + * TODO: add a proper inode dumping routine, this is a stub to get debug off the 2962 + * ground. 2963 + */ 2964 + void dump_inode(struct inode *inode, const char *reason) 2965 + { 2966 + pr_warn("%s encountered for inode %px", reason, inode); 2967 + } 2968 + 2969 + EXPORT_SYMBOL(dump_inode); 2970 + #endif
+3 -3
fs/internal.h
··· 187 187 extern int build_open_flags(const struct open_how *how, struct open_flags *op); 188 188 struct file *file_close_fd_locked(struct files_struct *files, unsigned fd); 189 189 190 - long do_ftruncate(struct file *file, loff_t length, int small); 191 - long do_sys_ftruncate(unsigned int fd, loff_t length, int small); 190 + int do_ftruncate(struct file *file, loff_t length, int small); 191 + int do_sys_ftruncate(unsigned int fd, loff_t length, int small); 192 192 int chmod_common(const struct path *path, umode_t mode); 193 193 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, 194 194 int flag); ··· 207 207 * fs-writeback.c 208 208 */ 209 209 extern long get_nr_dirty_inodes(void); 210 - void invalidate_inodes(struct super_block *sb); 211 210 212 211 /* 213 212 * dcache.c ··· 337 338 return path->mnt->mnt_root == path->dentry; 338 339 } 339 340 void file_f_owner_release(struct file *file); 341 + bool file_seek_cur_needs_f_lock(struct file *file);
+5 -5
fs/ioctl.c
··· 41 41 * 42 42 * Returns 0 on success, -errno on error. 43 43 */ 44 - long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 44 + int vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 45 45 { 46 46 int error = -ENOTTY; 47 47 ··· 228 228 return error; 229 229 } 230 230 231 - static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, 232 - u64 off, u64 olen, u64 destoff) 231 + static int ioctl_file_clone(struct file *dst_file, unsigned long srcfd, 232 + u64 off, u64 olen, u64 destoff) 233 233 { 234 234 CLASS(fd, src_file)(srcfd); 235 235 loff_t cloned; ··· 248 248 return ret; 249 249 } 250 250 251 - static long ioctl_file_clone_range(struct file *file, 252 - struct file_clone_range __user *argp) 251 + static int ioctl_file_clone_range(struct file *file, 252 + struct file_clone_range __user *argp) 253 253 { 254 254 struct file_clone_range args; 255 255
+22 -20
fs/namei.c
··· 125 125 126 126 #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) 127 127 128 + static inline void initname(struct filename *name) 129 + { 130 + name->uptr = NULL; 131 + name->aname = NULL; 132 + atomic_set(&name->refcnt, 1); 133 + } 134 + 128 135 struct filename * 129 136 getname_flags(const char __user *filename, int flags) 130 137 { ··· 210 203 return ERR_PTR(-ENAMETOOLONG); 211 204 } 212 205 } 213 - 214 - atomic_set(&result->refcnt, 1); 215 - result->uptr = filename; 216 - result->aname = NULL; 206 + initname(result); 217 207 audit_getname(result); 218 208 return result; 219 209 } ··· 220 216 int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; 221 217 222 218 return getname_flags(filename, flags); 223 - } 224 - 225 - struct filename *getname(const char __user * filename) 226 - { 227 - return getname_flags(filename, 0); 228 219 } 229 220 230 221 struct filename *__getname_maybe_null(const char __user *pathname) ··· 268 269 return ERR_PTR(-ENAMETOOLONG); 269 270 } 270 271 memcpy((char *)result->name, filename, len); 271 - result->uptr = NULL; 272 - result->aname = NULL; 273 - atomic_set(&result->refcnt, 1); 272 + initname(result); 274 273 audit_getname(result); 275 - 276 274 return result; 277 275 } 278 276 EXPORT_SYMBOL(getname_kernel); 279 277 280 278 void putname(struct filename *name) 281 279 { 280 + int refcnt; 281 + 282 282 if (IS_ERR_OR_NULL(name)) 283 283 return; 284 284 285 - if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) 286 - return; 285 + refcnt = atomic_read(&name->refcnt); 286 + if (refcnt != 1) { 287 + if (WARN_ON_ONCE(!refcnt)) 288 + return; 287 289 288 - if (!atomic_dec_and_test(&name->refcnt)) 289 - return; 290 + if (!atomic_dec_and_test(&name->refcnt)) 291 + return; 292 + } 290 293 291 294 if (name->name != name->iname) { 292 295 __putname(name->name); ··· 2864 2863 * Note that this routine is purely a helper for filesystem usage and should 2865 2864 * not be called by generic code. 2866 2865 * 2867 - * The caller must hold base->i_mutex. 2866 + * No locks need be held - only a counted reference to @base is needed. 2867 + * 2868 2868 */ 2869 2869 struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len) 2870 2870 { 2871 2871 struct qstr this; 2872 2872 int err; 2873 - 2874 - WARN_ON_ONCE(!inode_is_locked(base->d_inode)); 2875 2873 2876 2874 err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); 2877 2875 if (err) ··· 3415 3415 if ((acc_mode & MAY_EXEC) && path_noexec(path)) 3416 3416 return -EACCES; 3417 3417 break; 3418 + default: 3419 + VFS_BUG_ON_INODE(1, inode); 3418 3420 } 3419 3421 3420 3422 error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
+15 -14
fs/open.c
··· 67 67 return ret; 68 68 } 69 69 70 - long vfs_truncate(const struct path *path, loff_t length) 70 + int vfs_truncate(const struct path *path, loff_t length) 71 71 { 72 72 struct mnt_idmap *idmap; 73 73 struct inode *inode; 74 - long error; 74 + int error; 75 75 76 76 inode = path->dentry->d_inode; 77 77 ··· 123 123 } 124 124 EXPORT_SYMBOL_GPL(vfs_truncate); 125 125 126 - long do_sys_truncate(const char __user *pathname, loff_t length) 126 + int do_sys_truncate(const char __user *pathname, loff_t length) 127 127 { 128 128 unsigned int lookup_flags = LOOKUP_FOLLOW; 129 129 struct path path; ··· 157 157 } 158 158 #endif 159 159 160 - long do_ftruncate(struct file *file, loff_t length, int small) 160 + int do_ftruncate(struct file *file, loff_t length, int small) 161 161 { 162 162 struct inode *inode; 163 163 struct dentry *dentry; ··· 196 196 return error; 197 197 } 198 198 199 - long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 199 + int do_sys_ftruncate(unsigned int fd, loff_t length, int small) 200 200 { 201 201 if (length < 0) 202 202 return -EINVAL; ··· 251 251 int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 252 252 { 253 253 struct inode *inode = file_inode(file); 254 - long ret; 254 + int ret; 255 255 loff_t sum; 256 256 257 257 if (offset < 0 || len <= 0) ··· 460 460 return override_creds(override_cred); 461 461 } 462 462 463 - static long do_faccessat(int dfd, const char __user *filename, int mode, int flags) 463 + static int do_faccessat(int dfd, const char __user *filename, int mode, int flags) 464 464 { 465 465 struct path path; 466 466 struct inode *inode; ··· 1409 1409 } 1410 1410 EXPORT_SYMBOL(file_open_root); 1411 1411 1412 - static long do_sys_openat2(int dfd, const char __user *filename, 1413 - struct open_how *how) 1412 + static int do_sys_openat2(int dfd, const char __user *filename, 1413 + struct open_how *how) 1414 1414 { 1415 1415 struct open_flags op; 1416 - int fd = build_open_flags(how, &op); 1417 1416 struct filename *tmp; 1417 + int err, fd; 1418 1418 1419 - if (fd) 1420 - return fd; 1419 + err = build_open_flags(how, &op); 1420 + if (unlikely(err)) 1421 + return err; 1421 1422 1422 1423 tmp = getname(filename); 1423 1424 if (IS_ERR(tmp)) 1424 1425 return PTR_ERR(tmp); 1425 1426 1426 1427 fd = get_unused_fd_flags(how->flags); 1427 - if (fd >= 0) { 1428 + if (likely(fd >= 0)) { 1428 1429 struct file *f = do_filp_open(dfd, tmp, &op); 1429 1430 if (IS_ERR(f)) { 1430 1431 put_unused_fd(fd); ··· 1438 1437 return fd; 1439 1438 } 1440 1439 1441 - long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) 1440 + int do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) 1442 1441 { 1443 1442 struct open_how how = build_open_how(flags, mode); 1444 1443 return do_sys_openat2(dfd, filename, &how);
+9 -4
fs/read_write.c
··· 169 169 170 170 if (whence == SEEK_CUR) { 171 171 /* 172 - * f_lock protects against read/modify/write race with 173 - * other SEEK_CURs. Note that parallel writes and reads 174 - * behave like SEEK_SET. 172 + * If the file requires locking via f_pos_lock we know 173 + * that mutual exclusion for SEEK_CUR on the same file 174 + * is guaranteed. If the file isn't locked, we take 175 + * f_lock to protect against f_pos races with other 176 + * SEEK_CURs. 175 177 */ 176 - guard(spinlock)(&file->f_lock); 178 + if (file_seek_cur_needs_f_lock(file)) { 179 + guard(spinlock)(&file->f_lock); 180 + return vfs_setpos(file, file->f_pos + offset, maxsize); 181 + } 177 182 return vfs_setpos(file, file->f_pos + offset, maxsize); 178 183 } 179 184
+3 -4
fs/signalfd.c
··· 277 277 return ufd; 278 278 } 279 279 280 - file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx, 281 - O_RDWR | (flags & O_NONBLOCK)); 280 + file = anon_inode_getfile_fmode("[signalfd]", &signalfd_fops, 281 + ctx, O_RDWR | (flags & O_NONBLOCK), 282 + FMODE_NOWAIT); 282 283 if (IS_ERR(file)) { 283 284 put_unused_fd(ufd); 284 285 kfree(ctx); 285 286 return PTR_ERR(file); 286 287 } 287 - file->f_mode |= FMODE_NOWAIT; 288 - 289 288 fd_install(ufd, file); 290 289 } else { 291 290 CLASS(fd, f)(ufd);
+1 -1
fs/smb/client/file.c
··· 388 388 spin_unlock(&tcon->tc_lock); 389 389 390 390 /* 391 - * BB Add call to invalidate_inodes(sb) for all superblocks mounted 391 + * BB Add call to evict_inodes(sb) for all superblocks mounted 392 392 * to this tcon. 393 393 */ 394 394 }
+1 -1
fs/super.c
··· 1417 1417 if (!surprise) 1418 1418 sync_filesystem(sb); 1419 1419 shrink_dcache_sb(sb); 1420 - invalidate_inodes(sb); 1420 + evict_inodes(sb); 1421 1421 if (sb->s_op->shutdown) 1422 1422 sb->s_op->shutdown(sb); 1423 1423
+3 -3
fs/timerfd.c
··· 439 439 return ufd; 440 440 } 441 441 442 - file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx, 443 - O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 442 + file = anon_inode_getfile_fmode("[timerfd]", &timerfd_fops, ctx, 443 + O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS), 444 + FMODE_NOWAIT); 444 445 if (IS_ERR(file)) { 445 446 put_unused_fd(ufd); 446 447 kfree(ctx); 447 448 return PTR_ERR(file); 448 449 } 449 450 450 - file->f_mode |= FMODE_NOWAIT; 451 451 fd_install(ufd, file); 452 452 return ufd; 453 453 }
+26 -23
include/linux/fs.h
··· 2 2 #ifndef _LINUX_FS_H 3 3 #define _LINUX_FS_H 4 4 5 + #include <linux/vfsdebug.h> 5 6 #include <linux/linkage.h> 6 7 #include <linux/wait_bit.h> 7 8 #include <linux/kdev_t.h> ··· 791 790 792 791 static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) 793 792 { 794 - int testlen; 795 - 796 - /* 797 - * TODO: patch it into a debug-only check if relevant macros show up. 798 - * In the meantime, since we are suffering strlen even on production kernels 799 - * to find the right length, do a fixup if the wrong value got passed. 800 - */ 801 - testlen = strlen(link); 802 - if (testlen != linklen) { 803 - WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)", 804 - link, linklen, testlen); 805 - linklen = testlen; 806 - } 793 + VFS_WARN_ON_INODE(strlen(link) != linklen, inode); 794 + VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode); 807 795 inode->i_link = link; 808 796 inode->i_linklen = linklen; 809 797 inode->i_opflags |= IOP_CACHED_LINK; ··· 1057 1067 1058 1068 /** 1059 1069 * struct file - Represents a file 1060 - * @f_ref: reference count 1061 1070 * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. 1062 1071 * @f_mode: FMODE_* flags often used in hotpaths 1063 1072 * @f_op: file operations ··· 1066 1077 * @f_flags: file flags 1067 1078 * @f_iocb_flags: iocb flags 1068 1079 * @f_cred: stashed credentials of creator/opener 1080 + * @f_owner: file owner 1069 1081 * @f_path: path of the file 1070 1082 * @f_pos_lock: lock protecting file position 1071 1083 * @f_pipe: specific to pipes 1072 1084 * @f_pos: file position 1073 1085 * @f_security: LSM security context of this file 1074 - * @f_owner: file owner 1075 1086 * @f_wb_err: writeback error 1076 1087 * @f_sb_err: per sb writeback errors 1077 1088 * @f_ep: link of all epoll hooks for this file ··· 1079 1090 * @f_llist: work queue entrypoint 1080 1091 * @f_ra: file's readahead state 1081 1092 * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) 1093 + * @f_ref: reference count 1082 1094 */ 1083 1095 struct file { 1084 - file_ref_t f_ref; 1085 1096 spinlock_t f_lock; 1086 1097 fmode_t f_mode; 1087 1098 const struct file_operations *f_op; ··· 1091 1102 unsigned int f_flags; 1092 1103 unsigned int f_iocb_flags; 1093 1104 const struct cred *f_cred; 1105 + struct fown_struct *f_owner; 1094 1106 /* --- cacheline 1 boundary (64 bytes) --- */ 1095 1107 struct path f_path; 1096 1108 union { ··· 1105 1115 void *f_security; 1106 1116 #endif 1107 1117 /* --- cacheline 2 boundary (128 bytes) --- */ 1108 - struct fown_struct *f_owner; 1109 1118 errseq_t f_wb_err; 1110 1119 errseq_t f_sb_err; 1111 1120 #ifdef CONFIG_EPOLL ··· 1116 1127 struct file_ra_state f_ra; 1117 1128 freeptr_t f_freeptr; 1118 1129 }; 1130 + file_ref_t f_ref; 1119 1131 /* --- cacheline 3 boundary (192 bytes) --- */ 1120 1132 } __randomize_layout 1121 1133 __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ ··· 2029 2039 int vfs_fchmod(struct file *file, umode_t mode); 2030 2040 int vfs_utimes(const struct path *path, struct timespec64 *times); 2031 2041 2032 - extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 2042 + int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 2033 2043 2034 2044 #ifdef CONFIG_COMPAT 2035 2045 extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, ··· 2781 2791 return mnt_idmap(mnt) != &nop_mnt_idmap; 2782 2792 } 2783 2793 2784 - extern long vfs_truncate(const struct path *, loff_t); 2794 + int vfs_truncate(const struct path *, loff_t); 2785 2795 int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start, 2786 2796 unsigned int time_attrs, struct file *filp); 2787 2797 extern int vfs_fallocate(struct file *file, int mode, loff_t offset, 2788 2798 loff_t len); 2789 - extern long do_sys_open(int dfd, const char __user *filename, int flags, 2790 - umode_t mode); 2799 + int do_sys_open(int dfd, const char __user *filename, int flags, 2800 + umode_t mode); 2791 2801 extern struct file *file_open_name(struct filename *, int, umode_t); 2792 2802 extern struct file *filp_open(const char *, int, umode_t); 2793 2803 extern struct file *file_open_root(const struct path *, ··· 2838 2848 2839 2849 extern struct filename *getname_flags(const char __user *, int); 2840 2850 extern struct filename *getname_uflags(const char __user *, int); 2841 - extern struct filename *getname(const char __user *); 2851 + static inline struct filename *getname(const char __user *name) 2852 + { 2853 + return getname_flags(name, 0); 2854 + } 2842 2855 extern struct filename *getname_kernel(const char *); 2843 2856 extern struct filename *__getname_maybe_null(const char __user *); 2844 2857 static inline struct filename *getname_maybe_null(const char __user *name, int flags) ··· 2854 2861 return __getname_maybe_null(name); 2855 2862 } 2856 2863 extern void putname(struct filename *name); 2864 + 2865 + static inline struct filename *refname(struct filename *name) 2866 + { 2867 + atomic_inc(&name->refcnt); 2868 + return name; 2869 + } 2857 2870 2858 2871 extern int finish_open(struct file *file, struct dentry *dentry, 2859 2872 int (*open)(struct inode *, struct file *)); ··· 3293 3294 extern void iget_failed(struct inode *); 3294 3295 extern void clear_inode(struct inode *); 3295 3296 extern void __destroy_inode(struct inode *); 3296 - extern struct inode *new_inode_pseudo(struct super_block *sb); 3297 + struct inode *alloc_inode(struct super_block *sb); 3298 + static inline struct inode *new_inode_pseudo(struct super_block *sb) 3299 + { 3300 + return alloc_inode(sb); 3301 + } 3297 3302 extern struct inode *new_inode(struct super_block *sb); 3298 3303 extern void free_inode_nonrcu(struct inode *inode); 3299 3304 extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
+1 -1
include/linux/mm.h
··· 2555 2555 struct task_struct *task, bool bypass_rlim); 2556 2556 2557 2557 struct kvec; 2558 - struct page *get_dump_page(unsigned long addr); 2558 + struct page *get_dump_page(unsigned long addr, int *locked); 2559 2559 2560 2560 bool folio_mark_dirty(struct folio *folio); 2561 2561 bool folio_mark_dirty_lock(struct folio *folio);
+11 -9
include/linux/pagemap.h
··· 1044 1044 return folio->index + folio_page_idx(folio, page); 1045 1045 } 1046 1046 1047 + /** 1048 + * folio_pos - Returns the byte position of this folio in its file. 1049 + * @folio: The folio. 1050 + */ 1051 + static inline loff_t folio_pos(const struct folio *folio) 1052 + { 1053 + return ((loff_t)folio->index) * PAGE_SIZE; 1054 + } 1055 + 1047 1056 /* 1048 1057 * Return byte-offset into filesystem object for page. 1049 1058 */ 1050 1059 static inline loff_t page_offset(struct page *page) 1051 1060 { 1052 - return ((loff_t)page->index) << PAGE_SHIFT; 1053 - } 1061 + struct folio *folio = page_folio(page); 1054 1062 1055 - /** 1056 - * folio_pos - Returns the byte position of this folio in its file. 1057 - * @folio: The folio. 1058 - */ 1059 - static inline loff_t folio_pos(struct folio *folio) 1060 - { 1061 - return page_offset(&folio->page); 1063 + return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE; 1062 1064 } 1063 1065 1064 1066 /*
+2 -2
include/linux/syscalls.h
··· 1266 1266 AT_SYMLINK_NOFOLLOW); 1267 1267 } 1268 1268 1269 - extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); 1269 + int do_sys_ftruncate(unsigned int fd, loff_t length, int small); 1270 1270 1271 1271 static inline long ksys_ftruncate(unsigned int fd, loff_t length) 1272 1272 { 1273 1273 return do_sys_ftruncate(fd, length, 1); 1274 1274 } 1275 1275 1276 - extern long do_sys_truncate(const char __user *pathname, loff_t length); 1276 + int do_sys_truncate(const char __user *pathname, loff_t length); 1277 1277 1278 1278 static inline long ksys_truncate(const char __user *pathname, loff_t length) 1279 1279 {
+45
include/linux/vfsdebug.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef LINUX_VFS_DEBUG_H 3 + #define LINUX_VFS_DEBUG_H 1 4 + 5 + #include <linux/bug.h> 6 + 7 + struct inode; 8 + 9 + #ifdef CONFIG_DEBUG_VFS 10 + void dump_inode(struct inode *inode, const char *reason); 11 + 12 + #define VFS_BUG_ON(cond) BUG_ON(cond) 13 + #define VFS_WARN_ON(cond) (void)WARN_ON(cond) 14 + #define VFS_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) 15 + #define VFS_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) 16 + #define VFS_WARN(cond, format...) (void)WARN(cond, format) 17 + 18 + #define VFS_BUG_ON_INODE(cond, inode) ({ \ 19 + if (unlikely(!!(cond))) { \ 20 + dump_inode(inode, "VFS_BUG_ON_INODE(" #cond")");\ 21 + BUG_ON(1); \ 22 + } \ 23 + }) 24 + 25 + #define VFS_WARN_ON_INODE(cond, inode) ({ \ 26 + int __ret_warn = !!(cond); \ 27 + \ 28 + if (unlikely(__ret_warn)) { \ 29 + dump_inode(inode, "VFS_WARN_ON_INODE(" #cond")");\ 30 + WARN_ON(1); \ 31 + } \ 32 + unlikely(__ret_warn); \ 33 + }) 34 + #else 35 + #define VFS_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) 36 + #define VFS_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) 37 + #define VFS_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) 38 + #define VFS_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) 39 + #define VFS_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) 40 + 41 + #define VFS_BUG_ON_INODE(cond, inode) VFS_BUG_ON(cond) 42 + #define VFS_WARN_ON_INODE(cond, inode) BUILD_BUG_ON_INVALID(cond) 43 + #endif /* CONFIG_DEBUG_VFS */ 44 + 45 + #endif
+5 -7
kernel/auditsc.c
··· 2207 2207 list_for_each_entry(n, &context->names_list, list) { 2208 2208 if (!n->name) 2209 2209 continue; 2210 - if (n->name->uptr == uptr) { 2211 - atomic_inc(&n->name->refcnt); 2212 - return n->name; 2213 - } 2210 + if (n->name->uptr == uptr) 2211 + return refname(n->name); 2214 2212 } 2215 2213 return NULL; 2216 2214 } ··· 2235 2237 n->name = name; 2236 2238 n->name_len = AUDIT_NAME_FULL; 2237 2239 name->aname = n; 2238 - atomic_inc(&name->refcnt); 2240 + refname(name); 2239 2241 } 2240 2242 2241 2243 static inline int audit_copy_fcaps(struct audit_names *name, ··· 2367 2369 return; 2368 2370 if (name) { 2369 2371 n->name = name; 2370 - atomic_inc(&name->refcnt); 2372 + refname(name); 2371 2373 } 2372 2374 2373 2375 out: ··· 2494 2496 if (found_parent) { 2495 2497 found_child->name = found_parent->name; 2496 2498 found_child->name_len = AUDIT_NAME_FULL; 2497 - atomic_inc(&found_child->name->refcnt); 2499 + refname(found_child->name); 2498 2500 } 2499 2501 } 2500 2502
+1 -1
kernel/kcmp.c
··· 145 145 */ 146 146 task1 = find_task_by_vpid(pid1); 147 147 task2 = find_task_by_vpid(pid2); 148 - if (!task1 || !task2) 148 + if (unlikely(!task1 || !task2)) 149 149 goto err_no_task; 150 150 151 151 get_task_struct(task1);
+9
kernel/watch_queue.c
··· 269 269 if (ret < 0) 270 270 goto error; 271 271 272 + /* 273 + * pipe_resize_ring() does not update nr_accounted for watch_queue 274 + * pipes, because the above vastly overprovisions. Set nr_accounted on 275 + * and max_usage this pipe to the number that was actually charged to 276 + * the user above via account_pipe_buffers. 277 + */ 278 + pipe->max_usage = nr_pages; 279 + pipe->nr_accounted = nr_pages; 280 + 272 281 ret = -ENOMEM; 273 282 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 274 283 if (!pages)
+9
lib/Kconfig.debug
··· 808 808 An architecture should select this when it can successfully 809 809 build and run DEBUG_VM_PGTABLE. 810 810 811 + config DEBUG_VFS 812 + bool "Debug VFS" 813 + depends on DEBUG_KERNEL 814 + help 815 + Enable this to turn on extended checks in the VFS layer that may impact 816 + performance. 817 + 818 + If unsure, say N. 819 + 811 820 config DEBUG_VM_IRQSOFF 812 821 def_bool DEBUG_VM && !PREEMPT_RT 813 822
+3 -3
mm/gup.c
··· 2254 2254 /** 2255 2255 * get_dump_page() - pin user page in memory while writing it to core dump 2256 2256 * @addr: user address 2257 + * @locked: a pointer to an int denoting whether the mmap sem is held 2257 2258 * 2258 2259 * Returns struct page pointer of user page pinned for dump, 2259 2260 * to be freed afterwards by put_page(). ··· 2267 2266 * Called without mmap_lock (takes and releases the mmap_lock by itself). 2268 2267 */ 2269 2268 #ifdef CONFIG_ELF_CORE 2270 - struct page *get_dump_page(unsigned long addr) 2269 + struct page *get_dump_page(unsigned long addr, int *locked) 2271 2270 { 2272 2271 struct page *page; 2273 - int locked = 0; 2274 2272 int ret; 2275 2273 2276 - ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked, 2274 + ret = __get_user_pages_locked(current->mm, addr, 1, &page, locked, 2277 2275 FOLL_FORCE | FOLL_DUMP | FOLL_GET); 2278 2276 return (ret == 1) ? page : NULL; 2279 2277 }
+1 -1
security/landlock/fs.c
··· 1216 1216 /* 1217 1217 * Release the inodes used in a security policy. 1218 1218 * 1219 - * Cf. fsnotify_unmount_inodes() and invalidate_inodes() 1219 + * Cf. fsnotify_unmount_inodes() and evict_inodes() 1220 1220 */ 1221 1221 static void hook_sb_delete(struct super_block *const sb) 1222 1222 {
+4 -7
virt/kvm/kvm_main.c
··· 4224 4224 if (fd < 0) 4225 4225 return fd; 4226 4226 4227 - file = anon_inode_getfile(name, &kvm_vcpu_stats_fops, vcpu, O_RDONLY); 4227 + file = anon_inode_getfile_fmode(name, &kvm_vcpu_stats_fops, vcpu, 4228 + O_RDONLY, FMODE_PREAD); 4228 4229 if (IS_ERR(file)) { 4229 4230 put_unused_fd(fd); 4230 4231 return PTR_ERR(file); 4231 4232 } 4232 4233 4233 4234 kvm_get_kvm(vcpu->kvm); 4234 - 4235 - file->f_mode |= FMODE_PREAD; 4236 4235 fd_install(fd, file); 4237 4236 4238 4237 return fd; ··· 5019 5020 if (fd < 0) 5020 5021 return fd; 5021 5022 5022 - file = anon_inode_getfile("kvm-vm-stats", 5023 - &kvm_vm_stats_fops, kvm, O_RDONLY); 5023 + file = anon_inode_getfile_fmode("kvm-vm-stats", 5024 + &kvm_vm_stats_fops, kvm, O_RDONLY, FMODE_PREAD); 5024 5025 if (IS_ERR(file)) { 5025 5026 put_unused_fd(fd); 5026 5027 return PTR_ERR(file); 5027 5028 } 5028 5029 5029 5030 kvm_get_kvm(kvm); 5030 - 5031 - file->f_mode |= FMODE_PREAD; 5032 5031 fd_install(fd, file); 5033 5032 5034 5033 return fd;