Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fs: try an opportunistic lookup for O_CREAT opens too

Today, when opening a file we'll typically do a fast lookup, but if
O_CREAT is set, the kernel always takes the exclusive inode lock. I
assume this was done with the expectation that O_CREAT means that we
always expect to do the create, but that's often not the case. Many
programs set O_CREAT even in scenarios where the file already exists.

This patch rearranges the pathwalk-for-open code to also attempt a
fast_lookup in certain O_CREAT cases. If a positive dentry is found, the
inode_lock can be avoided altogether, and if auditing isn't enabled, it
can stay in rcuwalk mode for the last step_into.

One notable exception that is hopefully temporary: if we're doing an
rcuwalk and auditing is enabled, skip the lookup_fast. Legitimizing the
dentry in that case is more expensive than taking the i_rwsem for now.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20240807-openfast-v3-1-040d132d2559@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Jeff Layton and committed by
Christian Brauner
e747e151 b9ca079d

+64 -10
+64 -10
fs/namei.c
··· 3605 3605 return ERR_PTR(error); 3606 3606 } 3607 3607 3608 + static inline bool trailing_slashes(struct nameidata *nd) 3609 + { 3610 + return (bool)nd->last.name[nd->last.len]; 3611 + } 3612 + 3613 + static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag) 3614 + { 3615 + struct dentry *dentry; 3616 + 3617 + if (open_flag & O_CREAT) { 3618 + /* Don't bother on an O_EXCL create */ 3619 + if (open_flag & O_EXCL) 3620 + return NULL; 3621 + 3622 + /* 3623 + * FIXME: If auditing is enabled, then we'll have to unlazy to 3624 + * use the dentry. For now, don't do this, since it shifts 3625 + * contention from parent's i_rwsem to its d_lockref spinlock. 3626 + * Reconsider this once dentry refcounting handles heavy 3627 + * contention better. 3628 + */ 3629 + if ((nd->flags & LOOKUP_RCU) && !audit_dummy_context()) 3630 + return NULL; 3631 + } 3632 + 3633 + if (trailing_slashes(nd)) 3634 + nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 3635 + 3636 + dentry = lookup_fast(nd); 3637 + if (IS_ERR_OR_NULL(dentry)) 3638 + return dentry; 3639 + 3640 + if (open_flag & O_CREAT) { 3641 + /* Discard negative dentries. Need inode_lock to do the create */ 3642 + if (!dentry->d_inode) { 3643 + if (!(nd->flags & LOOKUP_RCU)) 3644 + dput(dentry); 3645 + dentry = NULL; 3646 + } 3647 + } 3648 + return dentry; 3649 + } 3650 + 3608 3651 static const char *open_last_lookups(struct nameidata *nd, 3609 3652 struct file *file, const struct open_flags *op) 3610 3653 { ··· 3665 3622 return handle_dots(nd, nd->last_type); 3666 3623 } 3667 3624 3625 + /* We _can_ be in RCU mode here */ 3626 + dentry = lookup_fast_for_open(nd, open_flag); 3627 + if (IS_ERR(dentry)) 3628 + return ERR_CAST(dentry); 3629 + 3668 3630 if (!(open_flag & O_CREAT)) { 3669 - if (nd->last.name[nd->last.len]) 3670 - nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 3671 - /* we _can_ be in RCU mode here */ 3672 - dentry = lookup_fast(nd); 3673 - if (IS_ERR(dentry)) 3674 - return ERR_CAST(dentry); 3675 3631 if (likely(dentry)) 3676 3632 goto finish_lookup; 3677 3633 3678 3634 if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU)) 3679 3635 return ERR_PTR(-ECHILD); 3680 3636 } else { 3681 - /* create side of things */ 3682 3637 if (nd->flags & LOOKUP_RCU) { 3683 - if (!try_to_unlazy(nd)) 3638 + bool unlazied; 3639 + 3640 + /* can stay in rcuwalk if not auditing */ 3641 + if (dentry && audit_dummy_context()) { 3642 + if (trailing_slashes(nd)) 3643 + return ERR_PTR(-EISDIR); 3644 + goto finish_lookup; 3645 + } 3646 + unlazied = dentry ? try_to_unlazy_next(nd, dentry) : 3647 + try_to_unlazy(nd); 3648 + if (!unlazied) 3684 3649 return ERR_PTR(-ECHILD); 3685 3650 } 3686 3651 audit_inode(nd->name, dir, AUDIT_INODE_PARENT); 3687 - /* trailing slashes? */ 3688 - if (unlikely(nd->last.name[nd->last.len])) 3652 + if (trailing_slashes(nd)) { 3653 + dput(dentry); 3689 3654 return ERR_PTR(-EISDIR); 3655 + } 3656 + if (dentry) 3657 + goto finish_lookup; 3690 3658 } 3691 3659 3692 3660 if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {