Merge tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull misc vfs updates from Christian Brauner:
"This contains the usual miscellaneous features, cleanups, and fixes
for vfs and individual fses.

Features:

- Free up FMODE_* bits. I've freed up bits 6, 7, 8, and 24. That
means we now have six free FMODE_* bits in total (but bit #6
already got used for FMODE_WRITE_RESTRICTED)

- Add FOP_HUGE_PAGES flag (follow-up to FMODE_* cleanup)

- Add fd_raw cleanup class so we can make use of automatic cleanup
provided by CLASS(fd_raw, f)(fd) for O_PATH fds as well

- Optimize seq_puts()

- Simplify __seq_puts()

- Add new anon_inode_getfile_fmode() api to allow specifying f_mode
instead of open-coding it in multiple places

- Annotate struct file_handle with __counted_by() and use
struct_size()

- Warn in get_file() whether f_count resurrection from zero is
attempted (epoll/drm discussion)

- Folio-sophize aio

- Export the subvolume id in statx() for both btrfs and bcachefs

- Relax linkat(AT_EMPTY_PATH) requirements

- Add F_DUPFD_QUERY fcntl() allowing to compare two file descriptors
for dup*() equality replacing kcmp()

Cleanups:

- Compile out swapfile inode checks when swap isn't enabled

- Use (1 << n) notation for FMODE_* bitshifts for clarity

- Remove redundant variable assignment in fs/direct-io

- Cleanup uses of strncpy in orangefs

- Speed up and cleanup writeback

- Move fsparam_string_empty() helper into header since it's currently
open-coded in multiple places

- Add kernel-doc comments to proc_create_net_data_write()

- Don't needlessly read dentry->d_flags twice

Fixes:

- Fix out-of-range warning in nilfs2

- Fix ecryptfs overflow due to wrong encryption packet size
calculation

- Fix overly long line in xfs file_operations (follow-up to FMODE_*
cleanup)

- Don't raise FOP_BUFFER_{R,W}ASYNC for directories in xfs (follow-up
to FMODE_* cleanup)

- Don't call xfs_file_open from xfs_dir_open (follow-up to FMODE_*
cleanup)

- Fix stable offset api to prevent endless loops

- Fix afs file server rotations

- Prevent xattr node from overflowing the eraseblock in jffs2

- Move fdinfo PTRACE_MODE_READ procfs check into the .permission()
operation instead of .open() operation since this caused userspace
regressions"

* tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (39 commits)
afs: Fix fileserver rotation getting stuck
selftests: add F_DUPDFD_QUERY selftests
fcntl: add F_DUPFD_QUERY fcntl()
file: add fd_raw cleanup class
fs: WARN when f_count resurrection is attempted
seq_file: Simplify __seq_puts()
seq_file: Optimize seq_puts()
proc: Move fdinfo PTRACE_MODE_READ check into the inode .permission operation
fs: Create anon_inode_getfile_fmode()
xfs: don't call xfs_file_open from xfs_dir_open
xfs: drop fop_flags for directories
xfs: fix overly long line in the file_operations
shmem: Fix shmem_rename2()
libfs: Add simple_offset_rename() API
libfs: Fix simple_offset_rename_exchange()
jffs2: prevent xattr node from overflowing the eraseblock
vfs, swap: compile out IS_SWAPFILE() on swapless configs
vfs: relax linkat() AT_EMPTY_PATH - aka flink() - requirements
fs/direct-io: remove redundant assignment to variable retval
fs/dcache: Re-use value stored to dentry->d_flags instead of re-reading
...

Linus Torvalds 2 years ago 1b0aabcc c117a437

+439 -250

50 changed files

expand all

block

bdev.c

fops.c

drivers

dax

device.c

afs

rotate.c

aio.c

anon_inodes.c

bcachefs

fs.c

btrfs

file.c

inode.c

dcache.c

direct-io.c

ecryptfs

keystore.c

ext4

file.c

super.c

f2fs

file.c

fcntl.c

fhandle.c

fs-writeback.c

hugetlbfs

inode.c

jffs2

xattr.c

libfs.c

namei.c

nilfs2

ioctl.c

orangefs

dcache.c

namei.c

super.c

overlayfs

params.c

proc

fd.c

proc_net.c

read_write.c

seq_file.c

stat.c

xfs

xfs_file.c

include

linux

anon_inodes.h

file.h

fs.h

fs_parser.h

hugetlb.h

namei.h

seq_file.h

shm.h

stat.h

uapi

linux

fcntl.h

stat.h

io_uring

io_uring.c

rw.c

ipc

shm.c

mmap.c

shmem.c

tools

testing

selftests

core

close_range_test.c

+1 -1

block/bdev.c

··· 912 912 disk_unblock_events(disk); 913 913 914 914 bdev_file->f_flags |= O_LARGEFILE; 915 - bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; 915 + bdev_file->f_mode |= FMODE_CAN_ODIRECT; 916 916 if (bdev_nowait(bdev)) 917 917 bdev_file->f_mode |= FMODE_NOWAIT; 918 918 if (mode & BLK_OPEN_RESTRICT_WRITES)

block/fops.c

··· 863 863 .splice_read = filemap_splice_read, 864 864 .splice_write = iter_file_splice_write, 865 865 .fallocate = blkdev_fallocate, 866 + .fop_flags = FOP_BUFFER_RASYNC, 866 867 }; 867 868 868 869 static __init int blkdev_init(void)

+1 -1

drivers/dax/device.c

··· 377 377 .release = dax_release, 378 378 .get_unmapped_area = dax_get_unmapped_area, 379 379 .mmap = dax_mmap, 380 - .mmap_supported_flags = MAP_SYNC, 380 + .fop_flags = FOP_MMAP_SYNC, 381 381 }; 382 382 383 383 static void dev_dax_cdev_del(void *cdev)

+6 -2

fs/afs/rotate.c

··· 541 541 test_bit(AFS_SE_EXCLUDED, &se->flags) || 542 542 !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) 543 543 continue; 544 - es = op->server_states->endpoint_state; 544 + es = op->server_states[i].endpoint_state; 545 545 sal = es->addresses; 546 546 547 547 afs_get_address_preferences_rcu(op->net, sal); 548 548 for (j = 0; j < sal->nr_addrs; j++) { 549 + if (es->failed_set & (1 << j)) 550 + continue; 549 551 if (!sal->addrs[j].peer) 550 552 continue; 551 553 if (sal->addrs[j].prio > best_prio) { ··· 607 605 best_prio = -1; 608 606 addr_index = 0; 609 607 for (i = 0; i < alist->nr_addrs; i++) { 608 + if (!(set & (1 << i))) 609 + continue; 610 610 if (alist->addrs[i].prio > best_prio) { 611 611 addr_index = i; 612 612 best_prio = alist->addrs[i].prio; ··· 678 674 for (i = 0; i < op->server_list->nr_servers; i++) { 679 675 struct afs_endpoint_state *estate; 680 676 681 - estate = op->server_states->endpoint_state; 677 + estate = op->server_states[i].endpoint_state; 682 678 error = READ_ONCE(estate->error); 683 679 if (error < 0) 684 680 afs_op_accumulate_error(op, error, estate->abort_code);

+48 -45

fs/aio.c

··· 122 122 unsigned long mmap_base; 123 123 unsigned long mmap_size; 124 124 125 - struct page **ring_pages; 125 + struct folio **ring_folios; 126 126 long nr_pages; 127 127 128 128 struct rcu_work free_rwork; /* see free_ioctx() */ ··· 160 160 spinlock_t completion_lock; 161 161 } ____cacheline_aligned_in_smp; 162 162 163 - struct page *internal_pages[AIO_RING_PAGES]; 163 + struct folio *internal_folios[AIO_RING_PAGES]; 164 164 struct file *aio_ring_file; 165 165 166 166 unsigned id; ··· 334 334 put_aio_ring_file(ctx); 335 335 336 336 for (i = 0; i < ctx->nr_pages; i++) { 337 - struct page *page; 338 - pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, 339 - page_count(ctx->ring_pages[i])); 340 - page = ctx->ring_pages[i]; 341 - if (!page) 337 + struct folio *folio = ctx->ring_folios[i]; 338 + 339 + if (!folio) 342 340 continue; 343 - ctx->ring_pages[i] = NULL; 344 - put_page(page); 341 + 342 + pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i, 343 + folio_ref_count(folio)); 344 + ctx->ring_folios[i] = NULL; 345 + folio_put(folio); 345 346 } 346 347 347 - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { 348 - kfree(ctx->ring_pages); 349 - ctx->ring_pages = NULL; 348 + if (ctx->ring_folios && ctx->ring_folios != ctx->internal_folios) { 349 + kfree(ctx->ring_folios); 350 + ctx->ring_folios = NULL; 350 351 } 351 352 } 352 353 ··· 442 441 idx = src->index; 443 442 if (idx < (pgoff_t)ctx->nr_pages) { 444 443 /* Make sure the old folio hasn't already been changed */ 445 - if (ctx->ring_pages[idx] != &src->page) 444 + if (ctx->ring_folios[idx] != src) 446 445 rc = -EAGAIN; 447 446 } else 448 447 rc = -EINVAL; ··· 466 465 */ 467 466 spin_lock_irqsave(&ctx->completion_lock, flags); 468 467 folio_migrate_copy(dst, src); 469 - BUG_ON(ctx->ring_pages[idx] != &src->page); 470 - ctx->ring_pages[idx] = &dst->page; 468 + BUG_ON(ctx->ring_folios[idx] != src); 469 + ctx->ring_folios[idx] = dst; 471 470 spin_unlock_irqrestore(&ctx->completion_lock, flags); 472 471 473 472 /* The old folio is no longer accessible. */ ··· 517 516 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) 518 517 / sizeof(struct io_event); 519 518 520 - ctx->ring_pages = ctx->internal_pages; 519 + ctx->ring_folios = ctx->internal_folios; 521 520 if (nr_pages > AIO_RING_PAGES) { 522 - ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), 523 - GFP_KERNEL); 524 - if (!ctx->ring_pages) { 521 + ctx->ring_folios = kcalloc(nr_pages, sizeof(struct folio *), 522 + GFP_KERNEL); 523 + if (!ctx->ring_folios) { 525 524 put_aio_ring_file(ctx); 526 525 return -ENOMEM; 527 526 } 528 527 } 529 528 530 529 for (i = 0; i < nr_pages; i++) { 531 - struct page *page; 532 - page = find_or_create_page(file->f_mapping, 533 - i, GFP_USER | __GFP_ZERO); 534 - if (!page) 535 - break; 536 - pr_debug("pid(%d) page[%d]->count=%d\n", 537 - current->pid, i, page_count(page)); 538 - SetPageUptodate(page); 539 - unlock_page(page); 530 + struct folio *folio; 540 531 541 - ctx->ring_pages[i] = page; 532 + folio = __filemap_get_folio(file->f_mapping, i, 533 + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, 534 + GFP_USER | __GFP_ZERO); 535 + if (IS_ERR(folio)) 536 + break; 537 + 538 + pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i, 539 + folio_ref_count(folio)); 540 + folio_end_read(folio, true); 541 + 542 + ctx->ring_folios[i] = folio; 542 543 } 543 544 ctx->nr_pages = i; 544 545 ··· 573 570 ctx->user_id = ctx->mmap_base; 574 571 ctx->nr_events = nr_events; /* trusted copy */ 575 572 576 - ring = page_address(ctx->ring_pages[0]); 573 + ring = folio_address(ctx->ring_folios[0]); 577 574 ring->nr = nr_events; /* user copy */ 578 575 ring->id = ~0U; 579 576 ring->head = ring->tail = 0; ··· 581 578 ring->compat_features = AIO_RING_COMPAT_FEATURES; 582 579 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; 583 580 ring->header_length = sizeof(struct aio_ring); 584 - flush_dcache_page(ctx->ring_pages[0]); 581 + flush_dcache_folio(ctx->ring_folios[0]); 585 582 586 583 return 0; 587 584 } ··· 692 689 693 690 /* While kioctx setup is in progress, 694 691 * we are protected from page migration 695 - * changes ring_pages by ->ring_lock. 692 + * changes ring_folios by ->ring_lock. 696 693 */ 697 - ring = page_address(ctx->ring_pages[0]); 694 + ring = folio_address(ctx->ring_folios[0]); 698 695 ring->id = ctx->id; 699 696 return 0; 700 697 } ··· 1036 1033 * against ctx->completed_events below will make sure we do the 1037 1034 * safe/right thing. 1038 1035 */ 1039 - ring = page_address(ctx->ring_pages[0]); 1036 + ring = folio_address(ctx->ring_folios[0]); 1040 1037 head = ring->head; 1041 1038 1042 1039 refill_reqs_available(ctx, head, ctx->tail); ··· 1148 1145 if (++tail >= ctx->nr_events) 1149 1146 tail = 0; 1150 1147 1151 - ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1148 + ev_page = folio_address(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]); 1152 1149 event = ev_page + pos % AIO_EVENTS_PER_PAGE; 1153 1150 1154 1151 *event = iocb->ki_res; 1155 1152 1156 - flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1153 + flush_dcache_folio(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]); 1157 1154 1158 1155 pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb, 1159 1156 (void __user *)(unsigned long)iocb->ki_res.obj, ··· 1166 1163 1167 1164 ctx->tail = tail; 1168 1165 1169 - ring = page_address(ctx->ring_pages[0]); 1166 + ring = folio_address(ctx->ring_folios[0]); 1170 1167 head = ring->head; 1171 1168 ring->tail = tail; 1172 - flush_dcache_page(ctx->ring_pages[0]); 1169 + flush_dcache_folio(ctx->ring_folios[0]); 1173 1170 1174 1171 ctx->completed_events++; 1175 1172 if (ctx->completed_events > 1) ··· 1241 1238 sched_annotate_sleep(); 1242 1239 mutex_lock(&ctx->ring_lock); 1243 1240 1244 - /* Access to ->ring_pages here is protected by ctx->ring_lock. */ 1245 - ring = page_address(ctx->ring_pages[0]); 1241 + /* Access to ->ring_folios here is protected by ctx->ring_lock. */ 1242 + ring = folio_address(ctx->ring_folios[0]); 1246 1243 head = ring->head; 1247 1244 tail = ring->tail; 1248 1245 ··· 1263 1260 while (ret < nr) { 1264 1261 long avail; 1265 1262 struct io_event *ev; 1266 - struct page *page; 1263 + struct folio *folio; 1267 1264 1268 1265 avail = (head <= tail ? tail : ctx->nr_events) - head; 1269 1266 if (head == tail) 1270 1267 break; 1271 1268 1272 1269 pos = head + AIO_EVENTS_OFFSET; 1273 - page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; 1270 + folio = ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]; 1274 1271 pos %= AIO_EVENTS_PER_PAGE; 1275 1272 1276 1273 avail = min(avail, nr - ret); 1277 1274 avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos); 1278 1275 1279 - ev = page_address(page); 1276 + ev = folio_address(folio); 1280 1277 copy_ret = copy_to_user(event + ret, ev + pos, 1281 1278 sizeof(*ev) * avail); 1282 1279 ··· 1290 1287 head %= ctx->nr_events; 1291 1288 } 1292 1289 1293 - ring = page_address(ctx->ring_pages[0]); 1290 + ring = folio_address(ctx->ring_folios[0]); 1294 1291 ring->head = head; 1295 - flush_dcache_page(ctx->ring_pages[0]); 1292 + flush_dcache_folio(ctx->ring_folios[0]); 1296 1293 1297 1294 pr_debug("%li h%u t%u\n", ret, head, tail); 1298 1295 out:

+33

fs/anon_inodes.c

··· 149 149 EXPORT_SYMBOL_GPL(anon_inode_getfile); 150 150 151 151 /** 152 + * anon_inode_getfile_fmode - creates a new file instance by hooking it up to an 153 + * anonymous inode, and a dentry that describe the "class" 154 + * of the file 155 + * 156 + * @name: [in] name of the "class" of the new file 157 + * @fops: [in] file operations for the new file 158 + * @priv: [in] private data for the new file (will be file's private_data) 159 + * @flags: [in] flags 160 + * @f_mode: [in] fmode 161 + * 162 + * Creates a new file by hooking it on a single inode. This is useful for files 163 + * that do not need to have a full-fledged inode in order to operate correctly. 164 + * All the files created with anon_inode_getfile() will share a single inode, 165 + * hence saving memory and avoiding code duplication for the file/inode/dentry 166 + * setup. Allows setting the fmode. Returns the newly created file* or an error 167 + * pointer. 168 + */ 169 + struct file *anon_inode_getfile_fmode(const char *name, 170 + const struct file_operations *fops, 171 + void *priv, int flags, fmode_t f_mode) 172 + { 173 + struct file *file; 174 + 175 + file = __anon_inode_getfile(name, fops, priv, flags, NULL, false); 176 + if (!IS_ERR(file)) 177 + file->f_mode |= f_mode; 178 + 179 + return file; 180 + } 181 + EXPORT_SYMBOL_GPL(anon_inode_getfile_fmode); 182 + 183 + /** 152 184 * anon_inode_create_getfile - Like anon_inode_getfile(), but creates a new 153 185 * !S_PRIVATE anon inode rather than reuse the 154 186 * singleton anon inode and calls the ··· 302 270 { 303 271 return __anon_inode_getfd(name, fops, priv, flags, context_inode, true); 304 272 } 273 + 305 274 306 275 static int __init anon_inode_init(void) 307 276 {

fs/bcachefs/fs.c

··· 844 844 stat->blksize = block_bytes(c); 845 845 stat->blocks = inode->v.i_blocks; 846 846 847 + stat->subvol = inode->ei_subvol; 848 + stat->result_mask |= STATX_SUBVOL; 849 + 847 850 if (request_mask & STATX_BTIME) { 848 851 stat->result_mask |= STATX_BTIME; 849 852 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);

+2 -2

fs/btrfs/file.c

··· 3719 3719 { 3720 3720 int ret; 3721 3721 3722 - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | 3723 - FMODE_CAN_ODIRECT; 3722 + filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; 3724 3723 3725 3724 ret = fsverity_file_open(inode, filp); 3726 3725 if (ret) ··· 3849 3850 .compat_ioctl = btrfs_compat_ioctl, 3850 3851 #endif 3851 3852 .remap_file_range = btrfs_remap_file_range, 3853 + .fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC, 3852 3854 }; 3853 3855 3854 3856 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)

fs/btrfs/inode.c

··· 8789 8789 generic_fillattr(idmap, request_mask, inode, stat); 8790 8790 stat->dev = BTRFS_I(inode)->root->anon_dev; 8791 8791 8792 + stat->subvol = BTRFS_I(inode)->root->root_key.objectid; 8793 + stat->result_mask |= STATX_SUBVOL; 8794 + 8792 8795 spin_lock(&BTRFS_I(inode)->lock); 8793 8796 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes; 8794 8797 inode_bytes = inode_get_bytes(inode);

+1 -1

fs/dcache.c

··· 355 355 flags &= ~DCACHE_ENTRY_TYPE; 356 356 WRITE_ONCE(dentry->d_flags, flags); 357 357 dentry->d_inode = NULL; 358 - if (dentry->d_flags & DCACHE_LRU_LIST) 358 + if (flags & DCACHE_LRU_LIST) 359 359 this_cpu_inc(nr_dentry_negative); 360 360 } 361 361

-1

fs/direct-io.c

··· 1217 1217 */ 1218 1218 inode_dio_begin(inode); 1219 1219 1220 - retval = 0; 1221 1220 sdio.blkbits = blkbits; 1222 1221 sdio.blkfactor = i_blkbits - blkbits; 1223 1222 sdio.block_in_file = offset >> blkbits;

+3 -1

fs/ecryptfs/keystore.c

··· 300 300 * | Key Identifier Size | 1 or 2 bytes | 301 301 * | Key Identifier | arbitrary | 302 302 * | File Encryption Key Size | 1 or 2 bytes | 303 + * | Cipher Code | 1 byte | 303 304 * | File Encryption Key | arbitrary | 305 + * | Checksum | 2 bytes | 304 306 */ 305 - data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); 307 + data_len = (8 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); 306 308 *packet = kmalloc(data_len, GFP_KERNEL); 307 309 message = *packet; 308 310 if (!message) {

+3 -3

fs/ext4/file.c

··· 885 885 return ret; 886 886 } 887 887 888 - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | 889 - FMODE_DIO_PARALLEL_WRITE; 888 + filp->f_mode |= FMODE_NOWAIT; 890 889 return dquot_file_open(inode, filp); 891 890 } 892 891 ··· 937 938 .compat_ioctl = ext4_compat_ioctl, 938 939 #endif 939 940 .mmap = ext4_file_mmap, 940 - .mmap_supported_flags = MAP_SYNC, 941 941 .open = ext4_file_open, 942 942 .release = ext4_release_file, 943 943 .fsync = ext4_sync_file, ··· 944 946 .splice_read = ext4_file_splice_read, 945 947 .splice_write = iter_file_splice_write, 946 948 .fallocate = ext4_fallocate, 949 + .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | 950 + FOP_DIO_PARALLEL_WRITE, 947 951 }; 948 952 949 953 const struct inode_operations ext4_file_inode_operations = {

-4

fs/ext4/super.c

··· 1723 1723 {} 1724 1724 }; 1725 1725 1726 - /* String parameter that allows empty argument */ 1727 - #define fsparam_string_empty(NAME, OPT) \ 1728 - __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) 1729 - 1730 1726 /* 1731 1727 * Mount option specification 1732 1728 * We don't use fsparam_flag_no because of the way we set the

+2 -1

fs/f2fs/file.c

··· 569 569 if (err) 570 570 return err; 571 571 572 - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 572 + filp->f_mode |= FMODE_NOWAIT; 573 573 filp->f_mode |= FMODE_CAN_ODIRECT; 574 574 575 575 return dquot_file_open(inode, filp); ··· 5045 5045 .splice_read = f2fs_file_splice_read, 5046 5046 .splice_write = iter_file_splice_write, 5047 5047 .fadvise = f2fs_file_fadvise, 5048 + .fop_flags = FOP_BUFFER_RASYNC, 5048 5049 };

+20

fs/fcntl.c

··· 327 327 return 0; 328 328 } 329 329 330 + /* Is the file descriptor a dup of the file? */ 331 + static long f_dupfd_query(int fd, struct file *filp) 332 + { 333 + CLASS(fd_raw, f)(fd); 334 + 335 + /* 336 + * We can do the 'fdput()' immediately, as the only thing that 337 + * matters is the pointer value which isn't changed by the fdput. 338 + * 339 + * Technically we didn't need a ref at all, and 'fdget()' was 340 + * overkill, but given our lockless file pointer lookup, the 341 + * alternatives are complicated. 342 + */ 343 + return f.file == filp; 344 + } 345 + 330 346 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 331 347 struct file *filp) 332 348 { ··· 357 341 break; 358 342 case F_DUPFD_CLOEXEC: 359 343 err = f_dupfd(argi, filp, O_CLOEXEC); 344 + break; 345 + case F_DUPFD_QUERY: 346 + err = f_dupfd_query(argi, filp); 360 347 break; 361 348 case F_GETFD: 362 349 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; ··· 465 446 switch (cmd) { 466 447 case F_DUPFD: 467 448 case F_DUPFD_CLOEXEC: 449 + case F_DUPFD_QUERY: 468 450 case F_GETFD: 469 451 case F_SETFD: 470 452 case F_GETFL:

+3 -3

fs/fhandle.c

··· 36 36 if (f_handle.handle_bytes > MAX_HANDLE_SZ) 37 37 return -EINVAL; 38 38 39 - handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, 39 + handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes), 40 40 GFP_KERNEL); 41 41 if (!handle) 42 42 return -ENOMEM; ··· 71 71 /* copy the mount id */ 72 72 if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || 73 73 copy_to_user(ufh, handle, 74 - sizeof(struct file_handle) + handle_bytes)) 74 + struct_size(handle, f_handle, handle_bytes))) 75 75 retval = -EFAULT; 76 76 kfree(handle); 77 77 return retval; ··· 192 192 retval = -EINVAL; 193 193 goto out_err; 194 194 } 195 - handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, 195 + handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), 196 196 GFP_KERNEL); 197 197 if (!handle) { 198 198 retval = -ENOMEM;

+33 -24

fs/fs-writeback.c

··· 166 166 spin_unlock_irq(&wb->work_lock); 167 167 } 168 168 169 - static void finish_writeback_work(struct bdi_writeback *wb, 170 - struct wb_writeback_work *work) 169 + static void finish_writeback_work(struct wb_writeback_work *work) 171 170 { 172 171 struct wb_completion *done = work->done; 173 172 ··· 195 196 list_add_tail(&work->list, &wb->work_list); 196 197 mod_delayed_work(bdi_wq, &wb->dwork, 0); 197 198 } else 198 - finish_writeback_work(wb, work); 199 + finish_writeback_work(work); 199 200 200 201 spin_unlock_irq(&wb->work_lock); 201 202 } ··· 1560 1561 * thread's back can have unexpected consequences. 1561 1562 */ 1562 1563 static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, 1563 - struct writeback_control *wbc) 1564 + struct writeback_control *wbc, 1565 + unsigned long dirtied_before) 1564 1566 { 1565 1567 if (inode->i_state & I_FREEING) 1566 1568 return; ··· 1594 1594 * We didn't write back all the pages. nfs_writepages() 1595 1595 * sometimes bales out without doing anything. 1596 1596 */ 1597 - if (wbc->nr_to_write <= 0) { 1597 + if (wbc->nr_to_write <= 0 && 1598 + !inode_dirtied_after(inode, dirtied_before)) { 1598 1599 /* Slice used up. Queue for next turn. */ 1599 1600 requeue_io(inode, wb); 1600 1601 } else { ··· 1863 1862 unsigned long start_time = jiffies; 1864 1863 long write_chunk; 1865 1864 long total_wrote = 0; /* count both pages and inodes */ 1865 + unsigned long dirtied_before = jiffies; 1866 + 1867 + if (work->for_kupdate) 1868 + dirtied_before = jiffies - 1869 + msecs_to_jiffies(dirty_expire_interval * 10); 1866 1870 1867 1871 while (!list_empty(&wb->b_io)) { 1868 1872 struct inode *inode = wb_inode(wb->b_io.prev); ··· 1973 1967 spin_lock(&inode->i_lock); 1974 1968 if (!(inode->i_state & I_DIRTY_ALL)) 1975 1969 total_wrote++; 1976 - requeue_inode(inode, tmp_wb, &wbc); 1970 + requeue_inode(inode, tmp_wb, &wbc, dirtied_before); 1977 1971 inode_sync_complete(inode); 1978 1972 spin_unlock(&inode->i_lock); 1979 1973 ··· 2075 2069 struct inode *inode; 2076 2070 long progress; 2077 2071 struct blk_plug plug; 2072 + bool queued = false; 2078 2073 2079 2074 blk_start_plug(&plug); 2080 2075 for (;;) { ··· 2105 2098 2106 2099 spin_lock(&wb->list_lock); 2107 2100 2108 - /* 2109 - * Kupdate and background works are special and we want to 2110 - * include all inodes that need writing. Livelock avoidance is 2111 - * handled by these works yielding to any other work so we are 2112 - * safe. 2113 - */ 2114 - if (work->for_kupdate) { 2115 - dirtied_before = jiffies - 2116 - msecs_to_jiffies(dirty_expire_interval * 10); 2117 - } else if (work->for_background) 2118 - dirtied_before = jiffies; 2119 - 2120 2101 trace_writeback_start(wb, work); 2121 - if (list_empty(&wb->b_io)) 2102 + if (list_empty(&wb->b_io)) { 2103 + /* 2104 + * Kupdate and background works are special and we want 2105 + * to include all inodes that need writing. Livelock 2106 + * avoidance is handled by these works yielding to any 2107 + * other work so we are safe. 2108 + */ 2109 + if (work->for_kupdate) { 2110 + dirtied_before = jiffies - 2111 + msecs_to_jiffies(dirty_expire_interval * 2112 + 10); 2113 + } else if (work->for_background) 2114 + dirtied_before = jiffies; 2115 + 2122 2116 queue_io(wb, work, dirtied_before); 2117 + queued = true; 2118 + } 2123 2119 if (work->sb) 2124 2120 progress = writeback_sb_inodes(work->sb, wb, work); 2125 2121 else ··· 2137 2127 * mean the overall work is done. So we keep looping as long 2138 2128 * as made some progress on cleaning pages or inodes. 2139 2129 */ 2140 - if (progress) { 2130 + if (progress || !queued) { 2141 2131 spin_unlock(&wb->list_lock); 2142 2132 continue; 2143 2133 } ··· 2272 2262 while ((work = get_next_work_item(wb)) != NULL) { 2273 2263 trace_writeback_exec(wb, work); 2274 2264 wrote += wb_writeback(wb, work); 2275 - finish_writeback_work(wb, work); 2265 + finish_writeback_work(work); 2276 2266 } 2277 2267 2278 2268 /* ··· 2332 2322 } 2333 2323 2334 2324 /* 2335 - * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero, 2336 - * write back the whole world. 2325 + * Start writeback of all dirty pages on this bdi. 2337 2326 */ 2338 2327 static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, 2339 2328 enum wb_reason reason) ··· 2735 2726 */ 2736 2727 void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) 2737 2728 { 2738 - return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); 2729 + writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); 2739 2730 } 2740 2731 EXPORT_SYMBOL(writeback_inodes_sb); 2741 2732

+3 -2

fs/hugetlbfs/inode.c

··· 40 40 #include <linux/sched/mm.h> 41 41 42 42 static const struct address_space_operations hugetlbfs_aops; 43 - const struct file_operations hugetlbfs_file_operations; 43 + static const struct file_operations hugetlbfs_file_operations; 44 44 static const struct inode_operations hugetlbfs_dir_inode_operations; 45 45 static const struct inode_operations hugetlbfs_inode_operations; 46 46 ··· 1301 1301 inode_init_once(&ei->vfs_inode); 1302 1302 } 1303 1303 1304 - const struct file_operations hugetlbfs_file_operations = { 1304 + static const struct file_operations hugetlbfs_file_operations = { 1305 1305 .read_iter = hugetlbfs_read_iter, 1306 1306 .mmap = hugetlbfs_file_mmap, 1307 1307 .fsync = noop_fsync, 1308 1308 .get_unmapped_area = hugetlb_get_unmapped_area, 1309 1309 .llseek = default_llseek, 1310 1310 .fallocate = hugetlbfs_fallocate, 1311 + .fop_flags = FOP_HUGE_PAGES, 1311 1312 }; 1312 1313 1313 1314 static const struct inode_operations hugetlbfs_dir_inode_operations = {

fs/jffs2/xattr.c

··· 1110 1110 return rc; 1111 1111 1112 1112 request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size); 1113 + if (request > c->sector_size - c->cleanmarker_size) 1114 + return -ERANGE; 1115 + 1113 1116 rc = jffs2_reserve_space(c, request, &length, 1114 1117 ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE); 1115 1118 if (rc) {

+49 -6

fs/libfs.c

··· 295 295 return 0; 296 296 } 297 297 298 + static int simple_offset_replace(struct offset_ctx *octx, struct dentry *dentry, 299 + long offset) 300 + { 301 + int ret; 302 + 303 + ret = mtree_store(&octx->mt, offset, dentry, GFP_KERNEL); 304 + if (ret) 305 + return ret; 306 + offset_set(dentry, offset); 307 + return 0; 308 + } 309 + 298 310 /** 299 311 * simple_offset_remove - Remove an entry to a directory's offset map 300 312 * @octx: directory offset ctx to be updated ··· 358 346 } 359 347 360 348 /** 349 + * simple_offset_rename - handle directory offsets for rename 350 + * @old_dir: parent directory of source entry 351 + * @old_dentry: dentry of source entry 352 + * @new_dir: parent_directory of destination entry 353 + * @new_dentry: dentry of destination 354 + * 355 + * Caller provides appropriate serialization. 356 + * 357 + * User space expects the directory offset value of the replaced 358 + * (new) directory entry to be unchanged after a rename. 359 + * 360 + * Returns zero on success, a negative errno value on failure. 361 + */ 362 + int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, 363 + struct inode *new_dir, struct dentry *new_dentry) 364 + { 365 + struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); 366 + struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); 367 + long new_offset = dentry2offset(new_dentry); 368 + 369 + simple_offset_remove(old_ctx, old_dentry); 370 + 371 + if (new_offset) { 372 + offset_set(new_dentry, 0); 373 + return simple_offset_replace(new_ctx, old_dentry, new_offset); 374 + } 375 + return simple_offset_add(new_ctx, old_dentry); 376 + } 377 + 378 + /** 361 379 * simple_offset_rename_exchange - exchange rename with directory offsets 362 380 * @old_dir: parent of dentry being moved 363 381 * @old_dentry: dentry being moved 364 382 * @new_dir: destination parent 365 383 * @new_dentry: destination dentry 384 + * 385 + * This API preserves the directory offset values. Caller provides 386 + * appropriate serialization. 366 387 * 367 388 * Returns zero on success. Otherwise a negative errno is returned and the 368 389 * rename is rolled back. ··· 414 369 simple_offset_remove(old_ctx, old_dentry); 415 370 simple_offset_remove(new_ctx, new_dentry); 416 371 417 - ret = simple_offset_add(new_ctx, old_dentry); 372 + ret = simple_offset_replace(new_ctx, old_dentry, new_index); 418 373 if (ret) 419 374 goto out_restore; 420 375 421 - ret = simple_offset_add(old_ctx, new_dentry); 376 + ret = simple_offset_replace(old_ctx, new_dentry, old_index); 422 377 if (ret) { 423 378 simple_offset_remove(new_ctx, old_dentry); 424 379 goto out_restore; ··· 433 388 return 0; 434 389 435 390 out_restore: 436 - offset_set(old_dentry, old_index); 437 - mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL); 438 - offset_set(new_dentry, new_index); 439 - mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL); 391 + (void)simple_offset_replace(old_ctx, old_dentry, old_index); 392 + (void)simple_offset_replace(new_ctx, new_dentry, new_index); 440 393 return ret; 441 394 } 442 395

+13 -6

fs/namei.c

··· 2422 2422 if (!f.file) 2423 2423 return ERR_PTR(-EBADF); 2424 2424 2425 + if (flags & LOOKUP_LINKAT_EMPTY) { 2426 + if (f.file->f_cred != current_cred() && 2427 + !ns_capable(f.file->f_cred->user_ns, CAP_DAC_READ_SEARCH)) { 2428 + fdput(f); 2429 + return ERR_PTR(-ENOENT); 2430 + } 2431 + } 2432 + 2425 2433 dentry = f.file->f_path.dentry; 2426 2434 2427 2435 if (*s && unlikely(!d_can_lookup(dentry))) { ··· 4649 4641 goto out_putnames; 4650 4642 } 4651 4643 /* 4652 - * To use null names we require CAP_DAC_READ_SEARCH 4644 + * To use null names we require CAP_DAC_READ_SEARCH or 4645 + * that the open-time creds of the dfd matches current. 4653 4646 * This ensures that not everyone will be able to create 4654 - * handlink using the passed filedescriptor. 4647 + * a hardlink using the passed file descriptor. 4655 4648 */ 4656 - if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) { 4657 - error = -ENOENT; 4658 - goto out_putnames; 4659 - } 4649 + if (flags & AT_EMPTY_PATH) 4650 + how |= LOOKUP_LINKAT_EMPTY; 4660 4651 4661 4652 if (flags & AT_SYMLINK_FOLLOW) 4662 4653 how |= LOOKUP_FOLLOW;

+1 -1

fs/nilfs2/ioctl.c

··· 60 60 if (argv->v_nmembs == 0) 61 61 return 0; 62 62 63 - if (argv->v_size > PAGE_SIZE) 63 + if ((size_t)argv->v_size > PAGE_SIZE) 64 64 return -EINVAL; 65 65 66 66 /*

+1 -3

fs/orangefs/dcache.c

··· 33 33 34 34 new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; 35 35 new_op->upcall.req.lookup.parent_refn = parent->refn; 36 - strncpy(new_op->upcall.req.lookup.d_name, 37 - dentry->d_name.name, 38 - ORANGEFS_NAME_MAX - 1); 36 + strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name); 39 37 40 38 gossip_debug(GOSSIP_DCACHE_DEBUG, 41 39 "%s:%s:%d interrupt flag [%d]\n",

+8 -18

fs/orangefs/namei.c

··· 41 41 fill_default_sys_attrs(new_op->upcall.req.create.attributes, 42 42 ORANGEFS_TYPE_METAFILE, mode); 43 43 44 - strncpy(new_op->upcall.req.create.d_name, 45 - dentry->d_name.name, ORANGEFS_NAME_MAX - 1); 44 + strscpy(new_op->upcall.req.create.d_name, dentry->d_name.name); 46 45 47 46 ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); 48 47 ··· 136 137 &parent->refn.khandle); 137 138 new_op->upcall.req.lookup.parent_refn = parent->refn; 138 139 139 - strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, 140 - ORANGEFS_NAME_MAX - 1); 140 + strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name); 141 141 142 142 gossip_debug(GOSSIP_NAME_DEBUG, 143 143 "%s: doing lookup on %s under %pU,%d\n", ··· 190 192 return -ENOMEM; 191 193 192 194 new_op->upcall.req.remove.parent_refn = parent->refn; 193 - strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, 194 - ORANGEFS_NAME_MAX - 1); 195 + strscpy(new_op->upcall.req.remove.d_name, dentry->d_name.name); 195 196 196 197 ret = service_operation(new_op, "orangefs_unlink", 197 198 get_interruptible_flag(inode)); ··· 244 247 ORANGEFS_TYPE_SYMLINK, 245 248 mode); 246 249 247 - strncpy(new_op->upcall.req.sym.entry_name, 248 - dentry->d_name.name, 249 - ORANGEFS_NAME_MAX - 1); 250 - strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX - 1); 250 + strscpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name); 251 + strscpy(new_op->upcall.req.sym.target, symname); 251 252 252 253 ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); 253 254 ··· 319 324 fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes, 320 325 ORANGEFS_TYPE_DIRECTORY, mode); 321 326 322 - strncpy(new_op->upcall.req.mkdir.d_name, 323 - dentry->d_name.name, ORANGEFS_NAME_MAX - 1); 327 + strscpy(new_op->upcall.req.mkdir.d_name, dentry->d_name.name); 324 328 325 329 ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); 326 330 ··· 399 405 new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn; 400 406 new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn; 401 407 402 - strncpy(new_op->upcall.req.rename.d_old_name, 403 - old_dentry->d_name.name, 404 - ORANGEFS_NAME_MAX - 1); 405 - strncpy(new_op->upcall.req.rename.d_new_name, 406 - new_dentry->d_name.name, 407 - ORANGEFS_NAME_MAX - 1); 408 + strscpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name); 409 + strscpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name); 408 410 409 411 ret = service_operation(new_op, 410 412 "orangefs_rename",

+6 -11

fs/orangefs/super.c

··· 253 253 new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT); 254 254 if (!new_op) 255 255 return -ENOMEM; 256 - strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, 257 - orangefs_sb->devname, 258 - ORANGEFS_MAX_SERVER_ADDR_LEN); 256 + strscpy(new_op->upcall.req.fs_mount.orangefs_config_server, 257 + orangefs_sb->devname); 259 258 260 259 gossip_debug(GOSSIP_SUPER_DEBUG, 261 260 "Attempting ORANGEFS Remount via host %s\n", ··· 399 400 return -ENOMEM; 400 401 op->upcall.req.fs_umount.id = id; 401 402 op->upcall.req.fs_umount.fs_id = fs_id; 402 - strncpy(op->upcall.req.fs_umount.orangefs_config_server, 403 - devname, ORANGEFS_MAX_SERVER_ADDR_LEN - 1); 403 + strscpy(op->upcall.req.fs_umount.orangefs_config_server, devname); 404 404 r = service_operation(op, "orangefs_fs_umount", 0); 405 405 /* Not much to do about an error here. */ 406 406 if (r) ··· 492 494 if (!new_op) 493 495 return ERR_PTR(-ENOMEM); 494 496 495 - strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, 496 - devname, 497 - ORANGEFS_MAX_SERVER_ADDR_LEN - 1); 497 + strscpy(new_op->upcall.req.fs_mount.orangefs_config_server, devname); 498 498 499 499 gossip_debug(GOSSIP_SUPER_DEBUG, 500 500 "Attempting ORANGEFS Mount via host %s\n", ··· 539 543 * on successful mount, store the devname and data 540 544 * used 541 545 */ 542 - strncpy(ORANGEFS_SB(sb)->devname, 543 - devname, 544 - ORANGEFS_MAX_SERVER_ADDR_LEN - 1); 546 + strscpy(ORANGEFS_SB(sb)->devname, devname); 547 + 545 548 546 549 /* mount_pending must be cleared */ 547 550 ORANGEFS_SB(sb)->mount_pending = 0;

-4

fs/overlayfs/params.c

··· 139 139 return OVL_VERITY_OFF; 140 140 } 141 141 142 - #define fsparam_string_empty(NAME, OPT) \ 143 - __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) 144 - 145 - 146 142 const struct fs_parameter_spec ovl_parameter_spec[] = { 147 143 fsparam_string_empty("lowerdir", Opt_lowerdir), 148 144 fsparam_string("lowerdir+", Opt_lowerdir_add),

+20 -22

fs/proc/fd.c

··· 74 74 return 0; 75 75 } 76 76 77 - static int proc_fdinfo_access_allowed(struct inode *inode) 77 + static int seq_fdinfo_open(struct inode *inode, struct file *file) 78 + { 79 + return single_open(file, seq_show, inode); 80 + } 81 + 82 + /** 83 + * Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure 84 + * that the current task has PTRACE_MODE_READ in addition to the normal 85 + * POSIX-like checks. 86 + */ 87 + static int proc_fdinfo_permission(struct mnt_idmap *idmap, struct inode *inode, 88 + int mask) 78 89 { 79 90 bool allowed = false; 80 91 struct task_struct *task = get_proc_task(inode); ··· 99 88 if (!allowed) 100 89 return -EACCES; 101 90 102 - return 0; 91 + return generic_permission(idmap, inode, mask); 103 92 } 104 93 105 - static int seq_fdinfo_open(struct inode *inode, struct file *file) 106 - { 107 - int ret = proc_fdinfo_access_allowed(inode); 108 - 109 - if (ret) 110 - return ret; 111 - 112 - return single_open(file, seq_show, inode); 113 - } 94 + static const struct inode_operations proc_fdinfo_file_inode_operations = { 95 + .permission = proc_fdinfo_permission, 96 + .setattr = proc_setattr, 97 + }; 114 98 115 99 static const struct file_operations proc_fdinfo_file_operations = { 116 100 .open = seq_fdinfo_open, ··· 394 388 ei = PROC_I(inode); 395 389 ei->fd = data->fd; 396 390 391 + inode->i_op = &proc_fdinfo_file_inode_operations; 392 + 397 393 inode->i_fop = &proc_fdinfo_file_operations; 398 394 tid_fd_update_inode(task, inode, 0); 399 395 ··· 415 407 proc_fdinfo_instantiate); 416 408 } 417 409 418 - static int proc_open_fdinfo(struct inode *inode, struct file *file) 419 - { 420 - int ret = proc_fdinfo_access_allowed(inode); 421 - 422 - if (ret) 423 - return ret; 424 - 425 - return 0; 426 - } 427 - 428 410 const struct inode_operations proc_fdinfo_inode_operations = { 429 411 .lookup = proc_lookupfdinfo, 412 + .permission = proc_fdinfo_permission, 430 413 .setattr = proc_setattr, 431 414 }; 432 415 433 416 const struct file_operations proc_fdinfo_operations = { 434 - .open = proc_open_fdinfo, 435 417 .read = generic_read_dir, 436 418 .iterate_shared = proc_readfdinfo, 437 419 .llseek = generic_file_llseek,

fs/proc/proc_net.c

··· 135 135 * @parent: The parent directory in which to create. 136 136 * @ops: The seq_file ops with which to read the file. 137 137 * @write: The write method with which to 'modify' the file. 138 + * @state_size: The size of the per-file private state to allocate. 138 139 * @data: Data for retrieval by pde_data(). 139 140 * 140 141 * Create a network namespaced proc file in the @parent directory with the

+1 -1

fs/read_write.c

··· 1685 1685 1686 1686 if ((iocb->ki_flags & IOCB_NOWAIT) && 1687 1687 !((iocb->ki_flags & IOCB_DIRECT) || 1688 - (file->f_mode & FMODE_BUF_WASYNC))) 1688 + (file->f_op->fop_flags & FOP_BUFFER_WASYNC))) 1689 1689 return -EINVAL; 1690 1690 1691 1691 return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);

+3 -10

fs/seq_file.c

··· 669 669 } 670 670 EXPORT_SYMBOL(seq_putc); 671 671 672 - void seq_puts(struct seq_file *m, const char *s) 672 + void __seq_puts(struct seq_file *m, const char *s) 673 673 { 674 - int len = strlen(s); 675 - 676 - if (m->count + len >= m->size) { 677 - seq_set_overflow(m); 678 - return; 679 - } 680 - memcpy(m->buf + m->count, s, len); 681 - m->count += len; 674 + seq_write(m, s, strlen(s)); 682 675 } 683 - EXPORT_SYMBOL(seq_puts); 676 + EXPORT_SYMBOL(__seq_puts); 684 677 685 678 /** 686 679 * seq_put_decimal_ull_width - A helper routine for putting decimal numbers

fs/stat.c

··· 658 658 tmp.stx_mnt_id = stat->mnt_id; 659 659 tmp.stx_dio_mem_align = stat->dio_mem_align; 660 660 tmp.stx_dio_offset_align = stat->dio_offset_align; 661 + tmp.stx_subvol = stat->subvol; 661 662 662 663 return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; 663 664 }

+6 -4

fs/xfs/xfs_file.c

··· 1230 1230 { 1231 1231 if (xfs_is_shutdown(XFS_M(inode->i_sb))) 1232 1232 return -EIO; 1233 - file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | 1234 - FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT; 1233 + file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; 1235 1234 return generic_file_open(inode, file); 1236 1235 } 1237 1236 ··· 1243 1244 unsigned int mode; 1244 1245 int error; 1245 1246 1246 - error = xfs_file_open(inode, file); 1247 + if (xfs_is_shutdown(ip->i_mount)) 1248 + return -EIO; 1249 + error = generic_file_open(inode, file); 1247 1250 if (error) 1248 1251 return error; 1249 1252 ··· 1491 1490 .compat_ioctl = xfs_file_compat_ioctl, 1492 1491 #endif 1493 1492 .mmap = xfs_file_mmap, 1494 - .mmap_supported_flags = MAP_SYNC, 1495 1493 .open = xfs_file_open, 1496 1494 .release = xfs_file_release, 1497 1495 .fsync = xfs_file_fsync, ··· 1498 1498 .fallocate = xfs_file_fallocate, 1499 1499 .fadvise = xfs_file_fadvise, 1500 1500 .remap_file_range = xfs_file_remap_range, 1501 + .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | 1502 + FOP_BUFFER_WASYNC | FOP_DIO_PARALLEL_WRITE, 1501 1503 }; 1502 1504 1503 1505 const struct file_operations xfs_dir_file_operations = {

include/linux/anon_inodes.h

··· 9 9 #ifndef _LINUX_ANON_INODES_H 10 10 #define _LINUX_ANON_INODES_H 11 11 12 + #include <linux/types.h> 13 + 12 14 struct file_operations; 13 15 struct inode; 14 16 15 17 struct file *anon_inode_getfile(const char *name, 16 18 const struct file_operations *fops, 17 19 void *priv, int flags); 20 + struct file *anon_inode_getfile_fmode(const char *name, 21 + const struct file_operations *fops, 22 + void *priv, int flags, fmode_t f_mode); 18 23 struct file *anon_inode_create_getfile(const char *name, 19 24 const struct file_operations *fops, 20 25 void *priv, int flags,

include/linux/file.h

··· 84 84 } 85 85 86 86 DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) 87 + DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd) 87 88 88 89 extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); 89 90 extern int replace_fd(unsigned fd, struct file *file, unsigned flags);

+56 -38

include/linux/fs.h

··· 110 110 */ 111 111 112 112 /* file is open for reading */ 113 - #define FMODE_READ ((__force fmode_t)0x1) 113 + #define FMODE_READ ((__force fmode_t)(1 << 0)) 114 114 /* file is open for writing */ 115 - #define FMODE_WRITE ((__force fmode_t)0x2) 115 + #define FMODE_WRITE ((__force fmode_t)(1 << 1)) 116 116 /* file is seekable */ 117 - #define FMODE_LSEEK ((__force fmode_t)0x4) 117 + #define FMODE_LSEEK ((__force fmode_t)(1 << 2)) 118 118 /* file can be accessed using pread */ 119 - #define FMODE_PREAD ((__force fmode_t)0x8) 119 + #define FMODE_PREAD ((__force fmode_t)(1 << 3)) 120 120 /* file can be accessed using pwrite */ 121 - #define FMODE_PWRITE ((__force fmode_t)0x10) 121 + #define FMODE_PWRITE ((__force fmode_t)(1 << 4)) 122 122 /* File is opened for execution with sys_execve / sys_uselib */ 123 - #define FMODE_EXEC ((__force fmode_t)0x20) 123 + #define FMODE_EXEC ((__force fmode_t)(1 << 5)) 124 124 /* File writes are restricted (block device specific) */ 125 - #define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) 125 + #define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6)) 126 + 127 + /* FMODE_* bits 7 to 8 */ 128 + 126 129 /* 32bit hashes as llseek() offset (for directories) */ 127 - #define FMODE_32BITHASH ((__force fmode_t)0x200) 130 + #define FMODE_32BITHASH ((__force fmode_t)(1 << 9)) 128 131 /* 64bit hashes as llseek() offset (for directories) */ 129 - #define FMODE_64BITHASH ((__force fmode_t)0x400) 132 + #define FMODE_64BITHASH ((__force fmode_t)(1 << 10)) 130 133 131 134 /* 132 135 * Don't update ctime and mtime. ··· 137 134 * Currently a special hack for the XFS open_by_handle ioctl, but we'll 138 135 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. 139 136 */ 140 - #define FMODE_NOCMTIME ((__force fmode_t)0x800) 137 + #define FMODE_NOCMTIME ((__force fmode_t)(1 << 11)) 141 138 142 139 /* Expect random access pattern */ 143 - #define FMODE_RANDOM ((__force fmode_t)0x1000) 140 + #define FMODE_RANDOM ((__force fmode_t)(1 << 12)) 144 141 145 142 /* File is huge (eg. /dev/mem): treat loff_t as unsigned */ 146 - #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) 143 + #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13)) 147 144 148 145 /* File is opened with O_PATH; almost nothing can be done with it */ 149 - #define FMODE_PATH ((__force fmode_t)0x4000) 146 + #define FMODE_PATH ((__force fmode_t)(1 << 14)) 150 147 151 148 /* File needs atomic accesses to f_pos */ 152 - #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) 149 + #define FMODE_ATOMIC_POS ((__force fmode_t)(1 << 15)) 153 150 /* Write access to underlying fs */ 154 - #define FMODE_WRITER ((__force fmode_t)0x10000) 151 + #define FMODE_WRITER ((__force fmode_t)(1 << 16)) 155 152 /* Has read method(s) */ 156 - #define FMODE_CAN_READ ((__force fmode_t)0x20000) 153 + #define FMODE_CAN_READ ((__force fmode_t)(1 << 17)) 157 154 /* Has write method(s) */ 158 - #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) 155 + #define FMODE_CAN_WRITE ((__force fmode_t)(1 << 18)) 159 156 160 - #define FMODE_OPENED ((__force fmode_t)0x80000) 161 - #define FMODE_CREATED ((__force fmode_t)0x100000) 157 + #define FMODE_OPENED ((__force fmode_t)(1 << 19)) 158 + #define FMODE_CREATED ((__force fmode_t)(1 << 20)) 162 159 163 160 /* File is stream-like */ 164 - #define FMODE_STREAM ((__force fmode_t)0x200000) 161 + #define FMODE_STREAM ((__force fmode_t)(1 << 21)) 165 162 166 163 /* File supports DIRECT IO */ 167 - #define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) 164 + #define FMODE_CAN_ODIRECT ((__force fmode_t)(1 << 22)) 168 165 169 - #define FMODE_NOREUSE ((__force fmode_t)0x800000) 166 + #define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) 170 167 171 - /* File supports non-exclusive O_DIRECT writes from multiple threads */ 172 - #define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000) 168 + /* FMODE_* bit 24 */ 173 169 174 170 /* File is embedded in backing_file object */ 175 - #define FMODE_BACKING ((__force fmode_t)0x2000000) 171 + #define FMODE_BACKING ((__force fmode_t)(1 << 25)) 176 172 177 173 /* File was opened by fanotify and shouldn't generate fanotify events */ 178 - #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) 174 + #define FMODE_NONOTIFY ((__force fmode_t)(1 << 26)) 179 175 180 176 /* File is capable of returning -EAGAIN if I/O will block */ 181 - #define FMODE_NOWAIT ((__force fmode_t)0x8000000) 177 + #define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) 182 178 183 179 /* File represents mount that needs unmounting */ 184 - #define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) 180 + #define FMODE_NEED_UNMOUNT ((__force fmode_t)(1 << 28)) 185 181 186 182 /* File does not contribute to nr_files count */ 187 - #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) 188 - 189 - /* File supports async buffered reads */ 190 - #define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) 191 - 192 - /* File supports async nowait buffered writes */ 193 - #define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000) 183 + #define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) 194 184 195 185 /* 196 186 * Attribute flags. These should be or-ed together to figure out what ··· 1031 1035 __u32 handle_bytes; 1032 1036 int handle_type; 1033 1037 /* file identifier */ 1034 - unsigned char f_handle[]; 1038 + unsigned char f_handle[] __counted_by(handle_bytes); 1035 1039 }; 1036 1040 1037 1041 static inline struct file *get_file(struct file *f) 1038 1042 { 1039 - atomic_long_inc(&f->f_count); 1043 + long prior = atomic_long_fetch_inc_relaxed(&f->f_count); 1044 + WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n"); 1040 1045 return f; 1041 1046 } 1042 1047 ··· 2000 2003 struct io_uring_cmd; 2001 2004 struct offset_ctx; 2002 2005 2006 + typedef unsigned int __bitwise fop_flags_t; 2007 + 2003 2008 struct file_operations { 2004 2009 struct module *owner; 2010 + fop_flags_t fop_flags; 2005 2011 loff_t (*llseek) (struct file *, loff_t, int); 2006 2012 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); 2007 2013 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ··· 2017 2017 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 2018 2018 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 2019 2019 int (*mmap) (struct file *, struct vm_area_struct *); 2020 - unsigned long mmap_supported_flags; 2021 2020 int (*open) (struct inode *, struct file *); 2022 2021 int (*flush) (struct file *, fl_owner_t id); 2023 2022 int (*release) (struct inode *, struct file *); ··· 2046 2047 int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, 2047 2048 unsigned int poll_flags); 2048 2049 } __randomize_layout; 2050 + 2051 + /* Supports async buffered reads */ 2052 + #define FOP_BUFFER_RASYNC ((__force fop_flags_t)(1 << 0)) 2053 + /* Supports async buffered writes */ 2054 + #define FOP_BUFFER_WASYNC ((__force fop_flags_t)(1 << 1)) 2055 + /* Supports synchronous page faults for mappings */ 2056 + #define FOP_MMAP_SYNC ((__force fop_flags_t)(1 << 2)) 2057 + /* Supports non-exclusive O_DIRECT writes from multiple threads */ 2058 + #define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) 2059 + /* Contains huge pages */ 2060 + #define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) 2049 2061 2050 2062 /* Wrap a directory iterator that needs exclusive inode access */ 2051 2063 int wrap_directory_iterator(struct file *, struct dir_context *, ··· 2263 2253 2264 2254 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) 2265 2255 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) 2256 + 2257 + #ifdef CONFIG_SWAP 2266 2258 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) 2259 + #else 2260 + #define IS_SWAPFILE(inode) ((void)(inode), 0U) 2261 + #endif 2262 + 2267 2263 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) 2268 2264 #define IS_IMA(inode) ((inode)->i_flags & S_IMA) 2269 2265 #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) ··· 3356 3340 int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); 3357 3341 void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); 3358 3342 int simple_offset_empty(struct dentry *dentry); 3343 + int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, 3344 + struct inode *new_dir, struct dentry *new_dentry); 3359 3345 int simple_offset_rename_exchange(struct inode *old_dir, 3360 3346 struct dentry *old_dentry, 3361 3347 struct inode *new_dir,

include/linux/fs_parser.h

··· 132 132 #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) 133 133 #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) 134 134 135 + /* String parameter that allows empty argument */ 136 + #define fsparam_string_empty(NAME, OPT) \ 137 + __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) 138 + 135 139 #endif /* _LINUX_FS_PARSER_H */

+2 -6

include/linux/hugetlb.h

··· 554 554 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 555 555 } 556 556 557 - extern const struct file_operations hugetlbfs_file_operations; 558 557 extern const struct vm_operations_struct hugetlb_vm_ops; 559 558 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 560 559 int creat_flags, int page_size_log); 561 560 562 - static inline bool is_file_hugepages(struct file *file) 561 + static inline bool is_file_hugepages(const struct file *file) 563 562 { 564 - if (file->f_op == &hugetlbfs_file_operations) 565 - return true; 566 - 567 - return is_file_shm_hugepages(file); 563 + return file->f_op->fop_flags & FOP_HUGE_PAGES; 568 564 } 569 565 570 566 static inline struct hstate *hstate_inode(struct inode *i)

include/linux/namei.h

··· 44 44 #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ 45 45 #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ 46 46 #define LOOKUP_CACHED 0x200000 /* Only do cached lookup */ 47 + #define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */ 47 48 /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ 48 49 #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) 49 50

+12 -1

include/linux/seq_file.h

··· 118 118 __printf(2, 3) 119 119 void seq_printf(struct seq_file *m, const char *fmt, ...); 120 120 void seq_putc(struct seq_file *m, char c); 121 - void seq_puts(struct seq_file *m, const char *s); 121 + void __seq_puts(struct seq_file *m, const char *s); 122 + 123 + static __always_inline void seq_puts(struct seq_file *m, const char *s) 124 + { 125 + if (!__builtin_constant_p(*s)) 126 + __seq_puts(m, s); 127 + else if (s[0] && !s[1]) 128 + seq_putc(m, s[0]); 129 + else 130 + seq_write(m, s, __builtin_strlen(s)); 131 + } 132 + 122 133 void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, 123 134 unsigned long long num, unsigned int width); 124 135 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,

-5

include/linux/shm.h

··· 16 16 17 17 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, 18 18 unsigned long shmlba); 19 - bool is_file_shm_hugepages(struct file *file); 20 19 void exit_shm(struct task_struct *task); 21 20 #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) 22 21 #else ··· 28 29 unsigned long shmlba) 29 30 { 30 31 return -ENOSYS; 31 - } 32 - static inline bool is_file_shm_hugepages(struct file *file) 33 - { 34 - return false; 35 32 } 36 33 static inline void exit_shm(struct task_struct *task) 37 34 {

include/linux/stat.h

··· 53 53 u32 dio_mem_align; 54 54 u32 dio_offset_align; 55 55 u64 change_cookie; 56 + u64 subvol; 56 57 }; 57 58 58 59 /* These definitions are internal to the kernel for now. Mainly used by nfsd. */

+8 -6

include/uapi/linux/fcntl.h

··· 9 9 #define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) 10 10 11 11 /* 12 + * Request nofications on a directory. 13 + * See below for events that may be notified. 14 + */ 15 + #define F_NOTIFY (F_LINUX_SPECIFIC_BASE + 2) 16 + 17 + #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) 18 + 19 + /* 12 20 * Cancel a blocking posix lock; internal use only until we expose an 13 21 * asynchronous lock api to userspace: 14 22 */ ··· 24 16 25 17 /* Create a file descriptor with FD_CLOEXEC set. */ 26 18 #define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6) 27 - 28 - /* 29 - * Request nofications on a directory. 30 - * See below for events that may be notified. 31 - */ 32 - #define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2) 33 19 34 20 /* 35 21 * Set and get of pipe page size array

+3 -1

include/uapi/linux/stat.h

··· 126 126 __u64 stx_mnt_id; 127 127 __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ 128 128 __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ 129 + __u64 stx_subvol; /* Subvolume identifier */ 129 130 /* 0xa0 */ 130 - __u64 __spare3[12]; /* Spare space for future expansion */ 131 + __u64 __spare3[11]; /* Spare space for future expansion */ 131 132 /* 0x100 */ 132 133 }; 133 134 ··· 156 155 #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ 157 156 #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ 158 157 #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ 158 + #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ 159 159 160 160 #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ 161 161

+1 -1

io_uring/io_uring.c

··· 471 471 472 472 /* don't serialize this request if the fs doesn't need it */ 473 473 if (should_hash && (req->file->f_flags & O_DIRECT) && 474 - (req->file->f_mode & FMODE_DIO_PARALLEL_WRITE)) 474 + (req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE)) 475 475 should_hash = false; 476 476 if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL)) 477 477 io_wq_hash_work(&req->work, file_inode(req->file));

+5 -4

io_uring/rw.c

··· 683 683 * just use poll if we can, and don't attempt if the fs doesn't 684 684 * support callback based unlocks 685 685 */ 686 - if (io_file_can_poll(req) || !(req->file->f_mode & FMODE_BUF_RASYNC)) 686 + if (io_file_can_poll(req) || 687 + !(req->file->f_op->fop_flags & FOP_BUFFER_RASYNC)) 687 688 return false; 688 689 689 690 wait->wait.func = io_async_buf_func; ··· 1030 1029 if (unlikely(!io_file_supports_nowait(req))) 1031 1030 goto copy_iov; 1032 1031 1033 - /* File path supports NOWAIT for non-direct_IO only for block devices. */ 1032 + /* Check if we can support NOWAIT. */ 1034 1033 if (!(kiocb->ki_flags & IOCB_DIRECT) && 1035 - !(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) && 1036 - (req->flags & REQ_F_ISREG)) 1034 + !(req->file->f_op->fop_flags & FOP_BUFFER_WASYNC) && 1035 + (req->flags & REQ_F_ISREG)) 1037 1036 goto copy_iov; 1038 1037 1039 1038 kiocb->ki_flags |= IOCB_NOWAIT;

+3 -7

ipc/shm.c

··· 662 662 }; 663 663 664 664 /* 665 - * shm_file_operations_huge is now identical to shm_file_operations, 666 - * but we keep it distinct for the sake of is_file_shm_hugepages(). 665 + * shm_file_operations_huge is now identical to shm_file_operations 666 + * except for fop_flags 667 667 */ 668 668 static const struct file_operations shm_file_operations_huge = { 669 669 .mmap = shm_mmap, ··· 672 672 .get_unmapped_area = shm_get_unmapped_area, 673 673 .llseek = noop_llseek, 674 674 .fallocate = shm_fallocate, 675 + .fop_flags = FOP_HUGE_PAGES, 675 676 }; 676 - 677 - bool is_file_shm_hugepages(struct file *file) 678 - { 679 - return file->f_op == &shm_file_operations_huge; 680 - } 681 677 682 678 static const struct vm_operations_struct shm_vm_ops = { 683 679 .open = shm_open, /* callback for a new vm-area open */

+3 -1

mm/mmap.c

··· 1294 1294 if (!file_mmap_ok(file, inode, pgoff, len)) 1295 1295 return -EOVERFLOW; 1296 1296 1297 - flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; 1297 + flags_mask = LEGACY_MAP_MASK; 1298 + if (file->f_op->fop_flags & FOP_MMAP_SYNC) 1299 + flags_mask |= MAP_SYNC; 1298 1300 1299 1301 switch (flags & MAP_TYPE) { 1300 1302 case MAP_SHARED:

+1 -2

mm/shmem.c

··· 3467 3467 return error; 3468 3468 } 3469 3469 3470 - simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry); 3471 - error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry); 3470 + error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); 3472 3471 if (error) 3473 3472 return error; 3474 3473

+54 -1

tools/testing/selftests/core/close_range_test.c

··· 17 17 #include "../kselftest_harness.h" 18 18 #include "../clone3/clone3_selftests.h" 19 19 20 + 21 + #ifndef F_LINUX_SPECIFIC_BASE 22 + #define F_LINUX_SPECIFIC_BASE 1024 23 + #endif 24 + 25 + #ifndef F_DUPFD_QUERY 26 + #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) 27 + #endif 28 + 20 29 static inline int sys_close_range(unsigned int fd, unsigned int max_fd, 21 30 unsigned int flags) 22 31 { ··· 52 43 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { 53 44 if (errno == ENOSYS) 54 45 SKIP(return, "close_range() syscall not supported"); 46 + } 47 + 48 + for (i = 0; i < 100; i++) { 49 + ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]); 50 + if (ret < 0) { 51 + EXPECT_EQ(errno, EINVAL); 52 + } else { 53 + EXPECT_EQ(ret, 0); 54 + } 55 55 } 56 56 57 57 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); ··· 376 358 */ 377 359 TEST(close_range_cloexec_syzbot) 378 360 { 379 - int fd1, fd2, fd3, flags, ret, status; 361 + int fd1, fd2, fd3, fd4, flags, ret, status; 380 362 pid_t pid; 381 363 struct __clone_args args = { 382 364 .flags = CLONE_FILES, ··· 389 371 390 372 fd2 = dup2(fd1, 1000); 391 373 EXPECT_GT(fd2, 0); 374 + 375 + flags = fcntl(fd1, F_DUPFD_QUERY, fd2); 376 + if (flags < 0) { 377 + EXPECT_EQ(errno, EINVAL); 378 + } else { 379 + EXPECT_EQ(flags, 1); 380 + } 392 381 393 382 pid = sys_clone3(&args, sizeof(args)); 394 383 ASSERT_GE(pid, 0); ··· 420 395 421 396 fd3 = dup2(fd1, 42); 422 397 EXPECT_GT(fd3, 0); 398 + 399 + flags = fcntl(fd1, F_DUPFD_QUERY, fd3); 400 + if (flags < 0) { 401 + EXPECT_EQ(errno, EINVAL); 402 + } else { 403 + EXPECT_EQ(flags, 1); 404 + } 405 + 406 + 423 407 424 408 /* 425 409 * Duplicating the file descriptor must remove the ··· 460 426 fd3 = dup2(fd1, 42); 461 427 EXPECT_GT(fd3, 0); 462 428 429 + flags = fcntl(fd1, F_DUPFD_QUERY, fd3); 430 + if (flags < 0) { 431 + EXPECT_EQ(errno, EINVAL); 432 + } else { 433 + EXPECT_EQ(flags, 1); 434 + } 435 + 436 + fd4 = open("/dev/null", O_RDWR); 437 + EXPECT_GT(fd4, 0); 438 + 439 + /* Same inode, different file pointers. */ 440 + flags = fcntl(fd1, F_DUPFD_QUERY, fd4); 441 + if (flags < 0) { 442 + EXPECT_EQ(errno, EINVAL); 443 + } else { 444 + EXPECT_EQ(flags, 0); 445 + } 446 + 463 447 flags = fcntl(fd3, F_GETFD); 464 448 EXPECT_GT(flags, -1); 465 449 EXPECT_EQ(flags & FD_CLOEXEC, 0); ··· 485 433 EXPECT_EQ(close(fd1), 0); 486 434 EXPECT_EQ(close(fd2), 0); 487 435 EXPECT_EQ(close(fd3), 0); 436 + EXPECT_EQ(close(fd4), 0); 488 437 } 489 438 490 439 /*

Configure Feed

Configure Feed