Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

kernel/acct.c: saner struct file treatment

Instead of switching ->f_path.mnt of an opened file to internal
clone, get a struct path with ->mnt set to internal clone of that
->f_path.mnt, then dentry_open() that to get the file with right ->f_path.mnt
from the very beginning.

The only subtle part here is that on failure exits we need to
close the file with __fput_sync() and make sure we do that *before*
dropping the original mount.

With that done, only fs/{file_table,open,namei}.c ever store
anything to file->f_path and only prior to file->f_mode & FMODE_OPENED
becoming true. Analysis of mount write count handling also becomes
less brittle and convoluted...

[AV: folded a fix for a bug spotted by Jan Kara - we do need a full-blown
open of the original file, not just user_path_at() or we end up skipping
permission checks]

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Al Viro cdc59a62 b320789d

+45 -69
+45 -69
kernel/acct.c
··· 44 44 * a struct file opened for write. Fixed. 2/6/2000, AV. 45 45 */ 46 46 47 - #include <linux/mm.h> 48 47 #include <linux/slab.h> 49 48 #include <linux/acct.h> 50 49 #include <linux/capability.h> 51 - #include <linux/file.h> 52 50 #include <linux/tty.h> 53 - #include <linux/security.h> 54 - #include <linux/vfs.h> 51 + #include <linux/statfs.h> 55 52 #include <linux/jiffies.h> 56 - #include <linux/times.h> 57 53 #include <linux/syscalls.h> 58 - #include <linux/mount.h> 59 - #include <linux/uaccess.h> 54 + #include <linux/namei.h> 60 55 #include <linux/sched/cputime.h> 61 56 62 57 #include <asm/div64.h> ··· 212 217 complete(&acct->done); 213 218 } 214 219 215 - static int acct_on(struct filename *pathname) 220 + DEFINE_FREE(fput_sync, struct file *, if (!IS_ERR_OR_NULL(_T)) __fput_sync(_T)) 221 + static int acct_on(const char __user *name) 216 222 { 217 - struct file *file; 218 - struct vfsmount *mnt, *internal; 223 + /* Difference from BSD - they don't do O_APPEND */ 224 + const int open_flags = O_WRONLY|O_APPEND|O_LARGEFILE; 219 225 struct pid_namespace *ns = task_active_pid_ns(current); 226 + struct filename *pathname __free(putname) = getname(name); 227 + struct file *original_file __free(fput) = NULL; // in that order 228 + struct path internal __free(path_put) = {}; // in that order 229 + struct file *file __free(fput_sync) = NULL; // in that order 220 230 struct bsd_acct_struct *acct; 231 + struct vfsmount *mnt; 221 232 struct fs_pin *old; 222 - int err; 233 + 234 + if (IS_ERR(pathname)) 235 + return PTR_ERR(pathname); 236 + original_file = file_open_name(pathname, open_flags, 0); 237 + if (IS_ERR(original_file)) 238 + return PTR_ERR(original_file); 239 + 240 + mnt = mnt_clone_internal(&original_file->f_path); 241 + if (IS_ERR(mnt)) 242 + return PTR_ERR(mnt); 243 + 244 + internal.mnt = mnt; 245 + internal.dentry = dget(mnt->mnt_root); 246 + 247 + file = dentry_open(&internal, open_flags, current_cred()); 248 + if (IS_ERR(file)) 249 + return PTR_ERR(file); 250 + 251 + if (!S_ISREG(file_inode(file)->i_mode)) 252 + return -EACCES; 253 + 254 + /* Exclude kernel kernel internal filesystems. */ 255 + if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) 256 + return -EINVAL; 257 + 258 + /* Exclude procfs and sysfs. */ 259 + if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) 260 + return -EINVAL; 261 + 262 + if (!(file->f_mode & FMODE_CAN_WRITE)) 263 + return -EIO; 223 264 224 265 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); 225 266 if (!acct) 226 267 return -ENOMEM; 227 268 228 - /* Difference from BSD - they don't do O_APPEND */ 229 - file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); 230 - if (IS_ERR(file)) { 231 - kfree(acct); 232 - return PTR_ERR(file); 233 - } 234 - 235 - if (!S_ISREG(file_inode(file)->i_mode)) { 236 - kfree(acct); 237 - filp_close(file, NULL); 238 - return -EACCES; 239 - } 240 - 241 - /* Exclude kernel kernel internal filesystems. */ 242 - if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) { 243 - kfree(acct); 244 - filp_close(file, NULL); 245 - return -EINVAL; 246 - } 247 - 248 - /* Exclude procfs and sysfs. */ 249 - if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) { 250 - kfree(acct); 251 - filp_close(file, NULL); 252 - return -EINVAL; 253 - } 254 - 255 - if (!(file->f_mode & FMODE_CAN_WRITE)) { 256 - kfree(acct); 257 - filp_close(file, NULL); 258 - return -EIO; 259 - } 260 - internal = mnt_clone_internal(&file->f_path); 261 - if (IS_ERR(internal)) { 262 - kfree(acct); 263 - filp_close(file, NULL); 264 - return PTR_ERR(internal); 265 - } 266 - err = mnt_get_write_access(internal); 267 - if (err) { 268 - mntput(internal); 269 - kfree(acct); 270 - filp_close(file, NULL); 271 - return err; 272 - } 273 - mnt = file->f_path.mnt; 274 - file->f_path.mnt = internal; 275 - 276 269 atomic_long_set(&acct->count, 1); 277 270 init_fs_pin(&acct->pin, acct_pin_kill); 278 - acct->file = file; 271 + acct->file = no_free_ptr(file); 279 272 acct->needcheck = jiffies; 280 273 acct->ns = ns; 281 274 mutex_init(&acct->lock); 282 275 INIT_WORK(&acct->work, close_work); 283 276 init_completion(&acct->done); 284 277 mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ 285 - pin_insert(&acct->pin, mnt); 278 + pin_insert(&acct->pin, original_file->f_path.mnt); 286 279 287 280 rcu_read_lock(); 288 281 old = xchg(&ns->bacct, &acct->pin); 289 282 mutex_unlock(&acct->lock); 290 283 pin_kill(old); 291 - mnt_put_write_access(mnt); 292 - mntput(mnt); 293 284 return 0; 294 285 } 295 286 ··· 300 319 return -EPERM; 301 320 302 321 if (name) { 303 - struct filename *tmp = getname(name); 304 - 305 - if (IS_ERR(tmp)) 306 - return PTR_ERR(tmp); 307 322 mutex_lock(&acct_on_mutex); 308 - error = acct_on(tmp); 323 + error = acct_on(name); 309 324 mutex_unlock(&acct_on_mutex); 310 - putname(tmp); 311 325 } else { 312 326 rcu_read_lock(); 313 327 pin_kill(task_active_pid_ns(current)->bacct);