Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'pull-fixes.pathwalk-rcu-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull RCU pathwalk fixes from Al Viro:
"We still have some races in filesystem methods when exposed to RCU
pathwalk. This series is a result of code audit (the second round of
it) and it should deal with most of that stuff.

Still pending: ntfs3 ->d_hash()/->d_compare() and ceph_d_revalidate().
Up to maintainers (a note for NTFS folks - when documentation says
that a method may not block, it *does* imply that blocking allocations
are to be avoided. Really)"

[ More explanations for people who aren't familiar with the vagaries of
RCU path walking: most of it is hidden from filesystems, but if a
filesystem actively participates in the low-level path walking it
needs to make sure the fields involved in that walk are RCU-safe.

That "actively participate in low-level path walking" includes things
like having its own ->d_hash()/->d_compare() routines, or by having
its own directory permission function that doesn't just use the common
helpers. Having a ->d_revalidate() function will also have this issue.

Note that instead of making everything RCU safe you can also choose to
abort the RCU pathwalk if your operation cannot be done safely under
RCU, but that obviously comes with a performance penalty. One common
pattern is to allow the simple cases under RCU, and abort only if you
need to do something more complicated.

So not everything needs to be RCU-safe, and things like the inode etc
that the VFS itself maintains obviously already are. But these fixes
tend to be about properly RCU-delaying things like ->s_fs_info that
are maintained by the filesystem and that got potentially released too
early. - Linus ]

* tag 'pull-fixes.pathwalk-rcu-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
ext4_get_link(): fix breakage in RCU mode
cifs_get_link(): bail out in unsafe case
fuse: fix UAF in rcu pathwalks
procfs: make freeing proc_fs_info rcu-delayed
procfs: move dropping pde and pid from ->evict_inode() to ->free_inode()
nfs: fix UAF on pathwalk running into umount
nfs: make nfs_set_verifier() safe for use in RCU pathwalk
afs: fix __afs_break_callback() / afs_drop_open_mmap() race
hfsplus: switch to rcu-delayed unloading of nls and freeing ->s_fs_info
exfat: move freeing sbi, upcase table and dropping nls into rcu-delayed helper
affs: free affs_sb_info with kfree_rcu()
rcu pathwalk: prevent bogus hard errors from may_lookup()
fs/super.c: don't drop ->s_user_ns until we free struct super_block itself

+88 -63
+1
fs/affs/affs.h
··· 105 105 int work_queued; /* non-zero delayed work is queued */ 106 106 struct delayed_work sb_work; /* superblock flush delayed work */ 107 107 spinlock_t work_lock; /* protects sb_work and work_queued */ 108 + struct rcu_head rcu; 108 109 }; 109 110 110 111 #define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */
+1 -1
fs/affs/super.c
··· 640 640 affs_brelse(sbi->s_root_bh); 641 641 kfree(sbi->s_prefix); 642 642 mutex_destroy(&sbi->s_bmlock); 643 - kfree(sbi); 643 + kfree_rcu(sbi, rcu); 644 644 } 645 645 } 646 646
+6 -2
fs/afs/file.c
··· 417 417 418 418 static void afs_drop_open_mmap(struct afs_vnode *vnode) 419 419 { 420 - if (!atomic_dec_and_test(&vnode->cb_nr_mmap)) 420 + if (atomic_add_unless(&vnode->cb_nr_mmap, -1, 1)) 421 421 return; 422 422 423 423 down_write(&vnode->volume->open_mmaps_lock); 424 424 425 - if (atomic_read(&vnode->cb_nr_mmap) == 0) 425 + read_seqlock_excl(&vnode->cb_lock); 426 + // the only place where ->cb_nr_mmap may hit 0 427 + // see __afs_break_callback() for the other side... 428 + if (atomic_dec_and_test(&vnode->cb_nr_mmap)) 426 429 list_del_init(&vnode->cb_mmap_link); 430 + read_sequnlock_excl(&vnode->cb_lock); 427 431 428 432 up_write(&vnode->volume->open_mmaps_lock); 429 433 flush_work(&vnode->cb_work);
+1
fs/exfat/exfat_fs.h
··· 275 275 276 276 spinlock_t inode_hash_lock; 277 277 struct hlist_head inode_hashtable[EXFAT_HASH_SIZE]; 278 + struct rcu_head rcu; 278 279 }; 279 280 280 281 #define EXFAT_CACHE_VALID 0
+4 -10
fs/exfat/nls.c
··· 655 655 unsigned int sect_size = sb->s_blocksize; 656 656 unsigned int i, index = 0; 657 657 u32 chksum = 0; 658 - int ret; 659 658 unsigned char skip = false; 660 659 unsigned short *upcase_table; 661 660 ··· 672 673 if (!bh) { 673 674 exfat_err(sb, "failed to read sector(0x%llx)", 674 675 (unsigned long long)sector); 675 - ret = -EIO; 676 - goto free_table; 676 + return -EIO; 677 677 } 678 678 sector++; 679 679 for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) { ··· 699 701 700 702 exfat_err(sb, "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)", 701 703 index, chksum, utbl_checksum); 702 - ret = -EINVAL; 703 - free_table: 704 - exfat_free_upcase_table(sbi); 705 - return ret; 704 + return -EINVAL; 706 705 } 707 706 708 707 static int exfat_load_default_upcase_table(struct super_block *sb) 709 708 { 710 - int i, ret = -EIO; 709 + int i; 711 710 struct exfat_sb_info *sbi = EXFAT_SB(sb); 712 711 unsigned char skip = false; 713 712 unsigned short uni = 0, *upcase_table; ··· 735 740 return 0; 736 741 737 742 /* FATAL error: default upcase table has error */ 738 - exfat_free_upcase_table(sbi); 739 - return ret; 743 + return -EIO; 740 744 } 741 745 742 746 int exfat_create_upcase_table(struct super_block *sb)
+11 -9
fs/exfat/super.c
··· 39 39 exfat_free_bitmap(sbi); 40 40 brelse(sbi->boot_bh); 41 41 mutex_unlock(&sbi->s_lock); 42 - 43 - unload_nls(sbi->nls_io); 44 - exfat_free_upcase_table(sbi); 45 42 } 46 43 47 44 static int exfat_sync_fs(struct super_block *sb, int wait) ··· 597 600 ret = exfat_load_bitmap(sb); 598 601 if (ret) { 599 602 exfat_err(sb, "failed to load alloc-bitmap"); 600 - goto free_upcase_table; 603 + goto free_bh; 601 604 } 602 605 603 606 ret = exfat_count_used_clusters(sb, &sbi->used_clusters); ··· 610 613 611 614 free_alloc_bitmap: 612 615 exfat_free_bitmap(sbi); 613 - free_upcase_table: 614 - exfat_free_upcase_table(sbi); 615 616 free_bh: 616 617 brelse(sbi->boot_bh); 617 618 return ret; ··· 696 701 sb->s_root = NULL; 697 702 698 703 free_table: 699 - exfat_free_upcase_table(sbi); 700 704 exfat_free_bitmap(sbi); 701 705 brelse(sbi->boot_bh); 702 706 703 707 check_nls_io: 704 - unload_nls(sbi->nls_io); 705 708 return err; 706 709 } 707 710 ··· 764 771 return 0; 765 772 } 766 773 774 + static void delayed_free(struct rcu_head *p) 775 + { 776 + struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu); 777 + 778 + unload_nls(sbi->nls_io); 779 + exfat_free_upcase_table(sbi); 780 + exfat_free_sbi(sbi); 781 + } 782 + 767 783 static void exfat_kill_sb(struct super_block *sb) 768 784 { 769 785 struct exfat_sb_info *sbi = sb->s_fs_info; 770 786 771 787 kill_block_super(sb); 772 788 if (sbi) 773 - exfat_free_sbi(sbi); 789 + call_rcu(&sbi->rcu, delayed_free); 774 790 } 775 791 776 792 static struct file_system_type exfat_fs_type = {
+5 -3
fs/ext4/symlink.c
··· 92 92 93 93 if (!dentry) { 94 94 bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT); 95 - if (IS_ERR(bh)) 96 - return ERR_CAST(bh); 97 - if (!bh || !ext4_buffer_uptodate(bh)) 95 + if (IS_ERR(bh) || !bh) 98 96 return ERR_PTR(-ECHILD); 97 + if (!ext4_buffer_uptodate(bh)) { 98 + brelse(bh); 99 + return ERR_PTR(-ECHILD); 100 + } 99 101 } else { 100 102 bh = ext4_bread(NULL, inode, 0, 0); 101 103 if (IS_ERR(bh))
+1 -2
fs/fuse/cuse.c
··· 474 474 475 475 static void cuse_fc_release(struct fuse_conn *fc) 476 476 { 477 - struct cuse_conn *cc = fc_to_cc(fc); 478 - kfree_rcu(cc, fc.rcu); 477 + kfree(fc_to_cc(fc)); 479 478 } 480 479 481 480 /**
+1
fs/fuse/fuse_i.h
··· 888 888 889 889 /* Entry on fc->mounts */ 890 890 struct list_head fc_entry; 891 + struct rcu_head rcu; 891 892 }; 892 893 893 894 static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
+11 -4
fs/fuse/inode.c
··· 930 930 } 931 931 EXPORT_SYMBOL_GPL(fuse_conn_init); 932 932 933 + static void delayed_release(struct rcu_head *p) 934 + { 935 + struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); 936 + 937 + put_user_ns(fc->user_ns); 938 + fc->release(fc); 939 + } 940 + 933 941 void fuse_conn_put(struct fuse_conn *fc) 934 942 { 935 943 if (refcount_dec_and_test(&fc->count)) { ··· 949 941 if (fiq->ops->release) 950 942 fiq->ops->release(fiq); 951 943 put_pid_ns(fc->pid_ns); 952 - put_user_ns(fc->user_ns); 953 944 bucket = rcu_dereference_protected(fc->curr_bucket, 1); 954 945 if (bucket) { 955 946 WARN_ON(atomic_read(&bucket->count) != 1); 956 947 kfree(bucket); 957 948 } 958 - fc->release(fc); 949 + call_rcu(&fc->rcu, delayed_release); 959 950 } 960 951 } 961 952 EXPORT_SYMBOL_GPL(fuse_conn_put); ··· 1373 1366 void fuse_free_conn(struct fuse_conn *fc) 1374 1367 { 1375 1368 WARN_ON(!list_empty(&fc->devices)); 1376 - kfree_rcu(fc, rcu); 1369 + kfree(fc); 1377 1370 } 1378 1371 EXPORT_SYMBOL_GPL(fuse_free_conn); 1379 1372 ··· 1909 1902 void fuse_mount_destroy(struct fuse_mount *fm) 1910 1903 { 1911 1904 fuse_conn_put(fm->fc); 1912 - kfree(fm); 1905 + kfree_rcu(fm, rcu); 1913 1906 } 1914 1907 EXPORT_SYMBOL(fuse_mount_destroy); 1915 1908
+1
fs/hfsplus/hfsplus_fs.h
··· 190 190 int work_queued; /* non-zero delayed work is queued */ 191 191 struct delayed_work sync_work; /* FS sync delayed work */ 192 192 spinlock_t work_lock; /* protects sync_work and work_queued */ 193 + struct rcu_head rcu; 193 194 }; 194 195 195 196 #define HFSPLUS_SB_WRITEBACKUP 0
+9 -3
fs/hfsplus/super.c
··· 277 277 spin_unlock(&sbi->work_lock); 278 278 } 279 279 280 + static void delayed_free(struct rcu_head *p) 281 + { 282 + struct hfsplus_sb_info *sbi = container_of(p, struct hfsplus_sb_info, rcu); 283 + 284 + unload_nls(sbi->nls); 285 + kfree(sbi); 286 + } 287 + 280 288 static void hfsplus_put_super(struct super_block *sb) 281 289 { 282 290 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); ··· 310 302 hfs_btree_close(sbi->ext_tree); 311 303 kfree(sbi->s_vhdr_buf); 312 304 kfree(sbi->s_backup_vhdr_buf); 313 - unload_nls(sbi->nls); 314 - kfree(sb->s_fs_info); 315 - sb->s_fs_info = NULL; 305 + call_rcu(&sbi->rcu, delayed_free); 316 306 } 317 307 318 308 static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
+5 -1
fs/namei.c
··· 1717 1717 { 1718 1718 if (nd->flags & LOOKUP_RCU) { 1719 1719 int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); 1720 - if (err != -ECHILD || !try_to_unlazy(nd)) 1720 + if (!err) // success, keep going 1721 + return 0; 1722 + if (!try_to_unlazy(nd)) 1723 + return -ECHILD; // redo it all non-lazy 1724 + if (err != -ECHILD) // hard error 1721 1725 return err; 1722 1726 } 1723 1727 return inode_permission(idmap, nd->inode, MAY_EXEC);
+10 -3
fs/nfs/client.c
··· 246 246 put_nfs_version(clp->cl_nfs_mod); 247 247 kfree(clp->cl_hostname); 248 248 kfree(clp->cl_acceptor); 249 - kfree(clp); 249 + kfree_rcu(clp, rcu); 250 250 } 251 251 EXPORT_SYMBOL_GPL(nfs_free_client); 252 252 ··· 1006 1006 } 1007 1007 EXPORT_SYMBOL_GPL(nfs_alloc_server); 1008 1008 1009 + static void delayed_free(struct rcu_head *p) 1010 + { 1011 + struct nfs_server *server = container_of(p, struct nfs_server, rcu); 1012 + 1013 + nfs_free_iostats(server->io_stats); 1014 + kfree(server); 1015 + } 1016 + 1009 1017 /* 1010 1018 * Free up a server record 1011 1019 */ ··· 1039 1031 1040 1032 ida_destroy(&server->lockowner_id); 1041 1033 ida_destroy(&server->openowner_id); 1042 - nfs_free_iostats(server->io_stats); 1043 1034 put_cred(server->cred); 1044 - kfree(server); 1045 1035 nfs_release_automount_timer(); 1036 + call_rcu(&server->rcu, delayed_free); 1046 1037 } 1047 1038 EXPORT_SYMBOL_GPL(nfs_free_server); 1048 1039
+2 -2
fs/nfs/dir.c
··· 1431 1431 static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) 1432 1432 { 1433 1433 struct inode *inode = d_inode(dentry); 1434 - struct inode *dir = d_inode(dentry->d_parent); 1434 + struct inode *dir = d_inode_rcu(dentry->d_parent); 1435 1435 1436 - if (!nfs_verify_change_attribute(dir, verf)) 1436 + if (!dir || !nfs_verify_change_attribute(dir, verf)) 1437 1437 return; 1438 1438 if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) 1439 1439 nfs_set_verifier_delegated(&verf);
-2
fs/proc/base.c
··· 1878 1878 hlist_del_init_rcu(&ei->sibling_inodes); 1879 1879 spin_unlock(&pid->lock); 1880 1880 } 1881 - 1882 - put_pid(pid); 1883 1881 } 1884 1882 1885 1883 struct inode *proc_pid_make_inode(struct super_block *sb,
+8 -11
fs/proc/inode.c
··· 30 30 31 31 static void proc_evict_inode(struct inode *inode) 32 32 { 33 - struct proc_dir_entry *de; 34 33 struct ctl_table_header *head; 35 34 struct proc_inode *ei = PROC_I(inode); 36 35 ··· 37 38 clear_inode(inode); 38 39 39 40 /* Stop tracking associated processes */ 40 - if (ei->pid) { 41 + if (ei->pid) 41 42 proc_pid_evict_inode(ei); 42 - ei->pid = NULL; 43 - } 44 - 45 - /* Let go of any associated proc directory entry */ 46 - de = ei->pde; 47 - if (de) { 48 - pde_put(de); 49 - ei->pde = NULL; 50 - } 51 43 52 44 head = ei->sysctl; 53 45 if (head) { ··· 70 80 71 81 static void proc_free_inode(struct inode *inode) 72 82 { 83 + struct proc_inode *ei = PROC_I(inode); 84 + 85 + if (ei->pid) 86 + put_pid(ei->pid); 87 + /* Let go of any associated proc directory entry */ 88 + if (ei->pde) 89 + pde_put(ei->pde); 73 90 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 74 91 } 75 92
+1 -1
fs/proc/root.c
··· 271 271 272 272 kill_anon_super(sb); 273 273 put_pid_ns(fs_info->pid_ns); 274 - kfree(fs_info); 274 + kfree_rcu(fs_info, rcu); 275 275 } 276 276 277 277 static struct file_system_type proc_fs_type = {
+3
fs/smb/client/cifsfs.c
··· 1172 1172 { 1173 1173 char *target_path; 1174 1174 1175 + if (!dentry) 1176 + return ERR_PTR(-ECHILD); 1177 + 1175 1178 target_path = kmalloc(PATH_MAX, GFP_KERNEL); 1176 1179 if (!target_path) 1177 1180 return ERR_PTR(-ENOMEM);
+4 -9
fs/super.c
··· 274 274 { 275 275 struct super_block *s = container_of(work, struct super_block, 276 276 destroy_work); 277 - int i; 278 - 279 - for (i = 0; i < SB_FREEZE_LEVELS; i++) 277 + security_sb_free(s); 278 + put_user_ns(s->s_user_ns); 279 + kfree(s->s_subtype); 280 + for (int i = 0; i < SB_FREEZE_LEVELS; i++) 280 281 percpu_free_rwsem(&s->s_writers.rw_sem[i]); 281 282 kfree(s); 282 283 } ··· 297 296 super_unlock_excl(s); 298 297 list_lru_destroy(&s->s_dentry_lru); 299 298 list_lru_destroy(&s->s_inode_lru); 300 - security_sb_free(s); 301 - put_user_ns(s->s_user_ns); 302 - kfree(s->s_subtype); 303 299 shrinker_free(s->s_shrink); 304 300 /* no delays needed */ 305 301 destroy_super_work(&s->destroy_work); ··· 407 409 WARN_ON(s->s_dentry_lru.node); 408 410 WARN_ON(s->s_inode_lru.node); 409 411 WARN_ON(!list_empty(&s->s_mounts)); 410 - security_sb_free(s); 411 - put_user_ns(s->s_user_ns); 412 - kfree(s->s_subtype); 413 412 call_rcu(&s->rcu, destroy_super_rcu); 414 413 } 415 414 }
+2
include/linux/nfs_fs_sb.h
··· 124 124 char cl_ipaddr[48]; 125 125 struct net *cl_net; 126 126 struct list_head pending_cb_stateids; 127 + struct rcu_head rcu; 127 128 }; 128 129 129 130 /* ··· 266 265 const struct cred *cred; 267 266 bool has_sec_mnt_opts; 268 267 struct kobject kobj; 268 + struct rcu_head rcu; 269 269 }; 270 270 271 271 /* Server capabilities */
+1
include/linux/proc_fs.h
··· 65 65 kgid_t pid_gid; 66 66 enum proc_hidepid hide_pid; 67 67 enum proc_pidonly pidonly; 68 + struct rcu_head rcu; 68 69 }; 69 70 70 71 static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)