Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

struct mount: relocate MNT_WRITE_HOLD bit

... from ->mnt_flags to LSB of ->mnt_pprev_for_sb.

This is safe - we always set and clear it within the same mount_lock
scope, so we won't interfere with list operations - traversals are
always forward, so they don't even look at ->mnt_prev_for_sb and
both insertions and removals are in mount_lock scopes of their own,
so that bit will be clear in *all* mount instances during those.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Al Viro 3371fa2f 09a1b33c

+42 -20
+24 -1
fs/mount.h
··· 66 66 struct list_head mnt_child; /* and going through their mnt_child */ 67 67 struct mount *mnt_next_for_sb; /* the next two fields are hlist_node, */ 68 68 struct mount * __aligned(1) *mnt_pprev_for_sb; 69 - /* except that LSB of pprev will be stolen */ 69 + /* except that LSB of pprev is stolen */ 70 + #define WRITE_HOLD 1 /* ... for use by mnt_hold_writers() */ 70 71 const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ 71 72 struct list_head mnt_list; 72 73 struct list_head mnt_expire; /* link in fs-specific expiry list */ ··· 243 242 while (m->overmount) 244 243 m = m->overmount; 245 244 return m; 245 + } 246 + 247 + static inline bool __test_write_hold(struct mount * __aligned(1) *val) 248 + { 249 + return (unsigned long)val & WRITE_HOLD; 250 + } 251 + 252 + static inline bool test_write_hold(const struct mount *m) 253 + { 254 + return __test_write_hold(m->mnt_pprev_for_sb); 255 + } 256 + 257 + static inline void set_write_hold(struct mount *m) 258 + { 259 + m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb 260 + | WRITE_HOLD); 261 + } 262 + 263 + static inline void clear_write_hold(struct mount *m) 264 + { 265 + m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb 266 + & ~WRITE_HOLD); 246 267 } 247 268 248 269 struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
+17 -17
fs/namespace.c
··· 509 509 mnt_inc_writers(mnt); 510 510 /* 511 511 * The store to mnt_inc_writers must be visible before we pass 512 - * MNT_WRITE_HOLD loop below, so that the slowpath can see our 513 - * incremented count after it has set MNT_WRITE_HOLD. 512 + * WRITE_HOLD loop below, so that the slowpath can see our 513 + * incremented count after it has set WRITE_HOLD. 514 514 */ 515 515 smp_mb(); 516 516 might_lock(&mount_lock.lock); 517 - while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { 517 + while (__test_write_hold(READ_ONCE(mnt->mnt_pprev_for_sb))) { 518 518 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 519 519 cpu_relax(); 520 520 } else { 521 521 /* 522 522 * This prevents priority inversion, if the task 523 - * setting MNT_WRITE_HOLD got preempted on a remote 523 + * setting WRITE_HOLD got preempted on a remote 524 524 * CPU, and it prevents life lock if the task setting 525 - * MNT_WRITE_HOLD has a lower priority and is bound to 525 + * WRITE_HOLD has a lower priority and is bound to 526 526 * the same CPU as the task that is spinning here. 527 527 */ 528 528 preempt_enable(); ··· 533 533 } 534 534 /* 535 535 * The barrier pairs with the barrier sb_start_ro_state_change() making 536 - * sure that if we see MNT_WRITE_HOLD cleared, we will also see 536 + * sure that if we see WRITE_HOLD cleared, we will also see 537 537 * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in 538 538 * mnt_is_readonly() and bail in case we are racing with remount 539 539 * read-only. ··· 672 672 * @mnt. 673 673 * 674 674 * Context: This function expects lock_mount_hash() to be held serializing 675 - * setting MNT_WRITE_HOLD. 675 + * setting WRITE_HOLD. 676 676 * Return: On success 0 is returned. 677 677 * On error, -EBUSY is returned. 678 678 */ 679 679 static inline int mnt_hold_writers(struct mount *mnt) 680 680 { 681 - mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; 681 + set_write_hold(mnt); 682 682 /* 683 - * After storing MNT_WRITE_HOLD, we'll read the counters. This store 683 + * After storing WRITE_HOLD, we'll read the counters. This store 684 684 * should be visible before we do. 685 685 */ 686 686 smp_mb(); ··· 696 696 * sum up each counter, if we read a counter before it is incremented, 697 697 * but then read another CPU's count which it has been subsequently 698 698 * decremented from -- we would see more decrements than we should. 699 - * MNT_WRITE_HOLD protects against this scenario, because 699 + * WRITE_HOLD protects against this scenario, because 700 700 * mnt_want_write first increments count, then smp_mb, then spins on 701 - * MNT_WRITE_HOLD, so it can't be decremented by another CPU while 701 + * WRITE_HOLD, so it can't be decremented by another CPU while 702 702 * we're counting up here. 703 703 */ 704 704 if (mnt_get_writers(mnt) > 0) ··· 720 720 */ 721 721 static inline void mnt_unhold_writers(struct mount *mnt) 722 722 { 723 - if (!(mnt->mnt_flags & MNT_WRITE_HOLD)) 723 + if (!test_write_hold(mnt)) 724 724 return; 725 725 /* 726 - * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers 726 + * MNT_READONLY must become visible before ~WRITE_HOLD, so writers 727 727 * that become unheld will see MNT_READONLY. 728 728 */ 729 729 smp_wmb(); 730 - mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 730 + clear_write_hold(mnt); 731 731 } 732 732 733 733 static inline void mnt_del_instance(struct mount *m) ··· 766 766 { 767 767 int err = 0; 768 768 769 - /* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */ 769 + /* Racy optimization. Recheck the counter under WRITE_HOLD */ 770 770 if (atomic_long_read(&sb->s_remove_count)) 771 771 return -EBUSY; 772 772 ··· 784 784 if (!err) 785 785 sb_start_ro_state_change(sb); 786 786 for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) { 787 - if (m->mnt.mnt_flags & MNT_WRITE_HOLD) 788 - m->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 787 + if (test_write_hold(m)) 788 + clear_write_hold(m); 789 789 } 790 790 unlock_mount_hash(); 791 791
+1 -2
include/linux/mount.h
··· 33 33 MNT_NOSYMFOLLOW = 0x80, 34 34 35 35 MNT_SHRINKABLE = 0x100, 36 - MNT_WRITE_HOLD = 0x200, 37 36 38 37 MNT_INTERNAL = 0x4000, 39 38 ··· 51 52 | MNT_READONLY | MNT_NOSYMFOLLOW, 52 53 MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, 53 54 54 - MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | 55 + MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED | 55 56 MNT_SYNC_UMOUNT | MNT_LOCKED 56 57 }; 57 58