Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'trace-v6.9-rc6-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing and tracefs fixes from Steven Rostedt:

- Fix RCU callback of freeing an eventfs_inode.

The freeing of the eventfs_inode from the kref going to zero freed
the contents of the eventfs_inode and then used kfree_rcu() to free
the inode itself. But the contents should also be protected by RCU.
Switch to a call_rcu() that calls a function to free all of the
eventfs_inode after the RCU synchronization.

- The tracing subsystem maps its own descriptor to a file represented
by eventfs. The freeing of this descriptor needs to know when the
last reference of an eventfs_inode is released, but currently there
is no interface for that.

Add a "release" callback to the eventfs_inode entry array that allows
for freeing of data that can be referenced by the eventfs_inode being
opened. Then increment the ref counter for this descriptor when the
eventfs_inode file is created, and decrement/free it when the last
reference to the eventfs_inode is released and the file is removed.
This prevents races between freeing the descriptor and the opening of
the eventfs file.

- Fix the permission processing of eventfs.

The change to make the permissions of eventfs default to the mount
point but keep track of when changes were made had a side effect that
could cause security concerns. When the tracefs is remounted with a
given gid or uid, all the files within it should inherit that gid or
uid. But if the admin had changed the permission of some file within
the tracefs file system, it would not get updated by the remount.

This caused the kselftest of file permissions to fail the second time
it is run. The first time, all changes would look fine, but the
second time, because the changes were "saved", the remount did not
reset them.

Create a link list of all existing tracefs inodes, and clear the
saved flags on them on a remount if the remount changes the
corresponding gid or uid fields.

This also simplifies the code by removing the distinction between the
toplevel eventfs and an instance eventfs. They should both act the
same. They were different because of a misconception due to the
remount not resetting the flags. Now that remount resets all the
files and directories to default to the root node if a uid/gid is
specified, it makes the logic simpler to implement.

* tag 'trace-v6.9-rc6-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
eventfs: Have "events" directory get permissions from its parent
eventfs: Do not treat events directory different than other directories
eventfs: Do not differentiate the toplevel events directory
tracefs: Still use mount point as default permissions for instances
tracefs: Reset permissions on remount if permissions are options
eventfs: Free all of the eventfs_inode after RCU
eventfs/tracing: Add callback for release of an eventfs_inode

+210 -59
+97 -51
fs/tracefs/event_inode.c
··· 37 37 38 38 struct eventfs_root_inode { 39 39 struct eventfs_inode ei; 40 + struct inode *parent_inode; 40 41 struct dentry *events_dir; 41 42 }; 42 43 ··· 69 68 EVENTFS_SAVE_MODE = BIT(16), 70 69 EVENTFS_SAVE_UID = BIT(17), 71 70 EVENTFS_SAVE_GID = BIT(18), 72 - EVENTFS_TOPLEVEL = BIT(19), 73 71 }; 74 72 75 73 #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) 74 + 75 + static void free_ei_rcu(struct rcu_head *rcu) 76 + { 77 + struct eventfs_inode *ei = container_of(rcu, struct eventfs_inode, rcu); 78 + struct eventfs_root_inode *rei; 79 + 80 + kfree(ei->entry_attrs); 81 + kfree_const(ei->name); 82 + if (ei->is_events) { 83 + rei = get_root_inode(ei); 84 + kfree(rei); 85 + } else { 86 + kfree(ei); 87 + } 88 + } 76 89 77 90 /* 78 91 * eventfs_inode reference count management. ··· 99 84 static void release_ei(struct kref *ref) 100 85 { 101 86 struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); 102 - struct eventfs_root_inode *rei; 87 + const struct eventfs_entry *entry; 103 88 104 89 WARN_ON_ONCE(!ei->is_freed); 105 90 106 - kfree(ei->entry_attrs); 107 - kfree_const(ei->name); 108 - if (ei->is_events) { 109 - rei = get_root_inode(ei); 110 - kfree_rcu(rei, ei.rcu); 111 - } else { 112 - kfree_rcu(ei, rcu); 91 + for (int i = 0; i < ei->nr_entries; i++) { 92 + entry = &ei->entries[i]; 93 + if (entry->release) 94 + entry->release(entry->name, ei->data); 113 95 } 96 + 97 + call_rcu(&ei->rcu, free_ei_rcu); 114 98 } 115 99 116 100 static inline void put_ei(struct eventfs_inode *ei) ··· 123 109 if (ei) { 124 110 ei->is_freed = 1; 125 111 put_ei(ei); 112 + } 113 + } 114 + 115 + /* 116 + * Called when creation of an ei fails, do not call release() functions. 117 + */ 118 + static inline void cleanup_ei(struct eventfs_inode *ei) 119 + { 120 + if (ei) { 121 + /* Set nr_entries to 0 to prevent release() function being called */ 122 + ei->nr_entries = 0; 123 + free_ei(ei); 126 124 } 127 125 } 128 126 ··· 207 181 * determined by the parent directory. 208 182 */ 209 183 if (dentry->d_inode->i_mode & S_IFDIR) { 210 - /* 211 - * The events directory dentry is never freed, unless its 212 - * part of an instance that is deleted. It's attr is the 213 - * default for its child files and directories. 214 - * Do not update it. It's not used for its own mode or ownership. 215 - */ 216 - if (ei->is_events) { 217 - /* But it still needs to know if it was modified */ 218 - if (iattr->ia_valid & ATTR_UID) 219 - ei->attr.mode |= EVENTFS_SAVE_UID; 220 - if (iattr->ia_valid & ATTR_GID) 221 - ei->attr.mode |= EVENTFS_SAVE_GID; 222 - } else { 223 - update_attr(&ei->attr, iattr); 224 - } 184 + update_attr(&ei->attr, iattr); 225 185 226 186 } else { 227 187 name = dentry->d_name.name; ··· 225 213 return ret; 226 214 } 227 215 228 - static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb) 216 + static void update_events_attr(struct eventfs_inode *ei, struct super_block *sb) 229 217 { 230 - struct inode *root; 218 + struct eventfs_root_inode *rei; 219 + struct inode *parent; 231 220 232 - /* Only update if the "events" was on the top level */ 233 - if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) 234 - return; 221 + rei = get_root_inode(ei); 235 222 236 - /* Get the tracefs root inode. */ 237 - root = d_inode(sb->s_root); 238 - ei->attr.uid = root->i_uid; 239 - ei->attr.gid = root->i_gid; 223 + /* Use the parent inode permissions unless root set its permissions */ 224 + parent = rei->parent_inode; 225 + 226 + if (rei->ei.attr.mode & EVENTFS_SAVE_UID) 227 + ei->attr.uid = rei->ei.attr.uid; 228 + else 229 + ei->attr.uid = parent->i_uid; 230 + 231 + if (rei->ei.attr.mode & EVENTFS_SAVE_GID) 232 + ei->attr.gid = rei->ei.attr.gid; 233 + else 234 + ei->attr.gid = parent->i_gid; 240 235 } 241 236 242 237 static void set_top_events_ownership(struct inode *inode) ··· 252 233 struct eventfs_inode *ei = ti->private; 253 234 254 235 /* The top events directory doesn't get automatically updated */ 255 - if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) 236 + if (!ei || !ei->is_events) 256 237 return; 257 238 258 - update_top_events_attr(ei, inode->i_sb); 239 + update_events_attr(ei, inode->i_sb); 259 240 260 241 if (!(ei->attr.mode & EVENTFS_SAVE_UID)) 261 242 inode->i_uid = ei->attr.uid; ··· 284 265 return generic_permission(idmap, inode, mask); 285 266 } 286 267 287 - static const struct inode_operations eventfs_root_dir_inode_operations = { 268 + static const struct inode_operations eventfs_dir_inode_operations = { 288 269 .lookup = eventfs_root_lookup, 289 270 .setattr = eventfs_set_attr, 290 271 .getattr = eventfs_get_attr, ··· 300 281 .iterate_shared = eventfs_iterate, 301 282 .llseek = generic_file_llseek, 302 283 }; 284 + 285 + /* 286 + * On a remount of tracefs, if UID or GID options are set, then 287 + * the mount point inode permissions should be used. 288 + * Reset the saved permission flags appropriately. 289 + */ 290 + void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) 291 + { 292 + struct eventfs_inode *ei = ti->private; 293 + 294 + if (!ei) 295 + return; 296 + 297 + if (update_uid) 298 + ei->attr.mode &= ~EVENTFS_SAVE_UID; 299 + 300 + if (update_gid) 301 + ei->attr.mode &= ~EVENTFS_SAVE_GID; 302 + 303 + if (!ei->entry_attrs) 304 + return; 305 + 306 + for (int i = 0; i < ei->nr_entries; i++) { 307 + if (update_uid) 308 + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; 309 + if (update_gid) 310 + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; 311 + } 312 + } 303 313 304 314 /* Return the evenfs_inode of the "events" directory */ 305 315 static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) ··· 352 304 // Walk upwards until you find the events inode 353 305 } while (!ei->is_events); 354 306 355 - update_top_events_attr(ei, dentry->d_sb); 307 + update_events_attr(ei, dentry->d_sb); 356 308 357 309 return ei; 358 310 } ··· 458 410 update_inode_attr(dentry, inode, &ei->attr, 459 411 S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); 460 412 461 - inode->i_op = &eventfs_root_dir_inode_operations; 413 + inode->i_op = &eventfs_dir_inode_operations; 462 414 inode->i_fop = &eventfs_file_operations; 463 415 464 416 /* All directories will have the same inode number */ ··· 782 734 783 735 /* Was the parent freed? */ 784 736 if (list_empty(&ei->list)) { 785 - free_ei(ei); 737 + cleanup_ei(ei); 786 738 ei = NULL; 787 739 } 788 740 return ei; ··· 829 781 // Note: we have a ref to the dentry from tracefs_start_creating() 830 782 rei = get_root_inode(ei); 831 783 rei->events_dir = dentry; 784 + rei->parent_inode = d_inode(dentry->d_sb->s_root); 832 785 833 786 ei->entries = entries; 834 787 ei->nr_entries = size; ··· 839 790 uid = d_inode(dentry->d_parent)->i_uid; 840 791 gid = d_inode(dentry->d_parent)->i_gid; 841 792 842 - /* 843 - * If the events directory is of the top instance, then parent 844 - * is NULL. Set the attr.mode to reflect this and its permissions will 845 - * default to the tracefs root dentry. 846 - */ 847 - if (!parent) 848 - ei->attr.mode = EVENTFS_TOPLEVEL; 849 - 850 - /* This is used as the default ownership of the files and directories */ 851 793 ei->attr.uid = uid; 852 794 ei->attr.gid = gid; 795 + 796 + /* 797 + * When the "events" directory is created, it takes on the 798 + * permissions of its parent. But can be reset on remount. 799 + */ 800 + ei->attr.mode |= EVENTFS_SAVE_UID | EVENTFS_SAVE_GID; 853 801 854 802 INIT_LIST_HEAD(&ei->children); 855 803 INIT_LIST_HEAD(&ei->list); 856 804 857 805 ti = get_tracefs(inode); 858 - ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; 806 + ti->flags |= TRACEFS_EVENT_INODE; 859 807 ti->private = ei; 860 808 861 809 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 862 810 inode->i_uid = uid; 863 811 inode->i_gid = gid; 864 - inode->i_op = &eventfs_root_dir_inode_operations; 812 + inode->i_op = &eventfs_dir_inode_operations; 865 813 inode->i_fop = &eventfs_file_operations; 866 814 867 815 dentry->d_fsdata = get_ei(ei); ··· 881 835 return ei; 882 836 883 837 fail: 884 - free_ei(ei); 838 + cleanup_ei(ei); 885 839 tracefs_failed_creating(dentry); 886 840 return ERR_PTR(-ENOMEM); 887 841 }
+89 -3
fs/tracefs/inode.c
··· 30 30 static int tracefs_mount_count; 31 31 static bool tracefs_registered; 32 32 33 + /* 34 + * Keep track of all tracefs_inodes in order to update their 35 + * flags if necessary on a remount. 36 + */ 37 + static DEFINE_SPINLOCK(tracefs_inode_lock); 38 + static LIST_HEAD(tracefs_inodes); 39 + 33 40 static struct inode *tracefs_alloc_inode(struct super_block *sb) 34 41 { 35 42 struct tracefs_inode *ti; 43 + unsigned long flags; 36 44 37 45 ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); 38 46 if (!ti) 39 47 return NULL; 40 48 49 + spin_lock_irqsave(&tracefs_inode_lock, flags); 50 + list_add_rcu(&ti->list, &tracefs_inodes); 51 + spin_unlock_irqrestore(&tracefs_inode_lock, flags); 52 + 41 53 return &ti->vfs_inode; 54 + } 55 + 56 + static void tracefs_free_inode_rcu(struct rcu_head *rcu) 57 + { 58 + struct tracefs_inode *ti; 59 + 60 + ti = container_of(rcu, struct tracefs_inode, rcu); 61 + kmem_cache_free(tracefs_inode_cachep, ti); 42 62 } 43 63 44 64 static void tracefs_free_inode(struct inode *inode) 45 65 { 46 - kmem_cache_free(tracefs_inode_cachep, get_tracefs(inode)); 66 + struct tracefs_inode *ti = get_tracefs(inode); 67 + unsigned long flags; 68 + 69 + spin_lock_irqsave(&tracefs_inode_lock, flags); 70 + list_del_rcu(&ti->list); 71 + spin_unlock_irqrestore(&tracefs_inode_lock, flags); 72 + 73 + call_rcu(&ti->rcu, tracefs_free_inode_rcu); 47 74 } 48 75 49 76 static ssize_t default_read_file(struct file *file, char __user *buf, ··· 180 153 { 181 154 struct tracefs_inode *ti = get_tracefs(inode); 182 155 struct inode *root_inode = ti->private; 156 + kuid_t uid; 157 + kgid_t gid; 158 + 159 + uid = root_inode->i_uid; 160 + gid = root_inode->i_gid; 161 + 162 + /* 163 + * If the root is not the mount point, then check the root's 164 + * permissions. If it was never set, then default to the 165 + * mount point. 166 + */ 167 + if (root_inode != d_inode(root_inode->i_sb->s_root)) { 168 + struct tracefs_inode *rti; 169 + 170 + rti = get_tracefs(root_inode); 171 + root_inode = d_inode(root_inode->i_sb->s_root); 172 + 173 + if (!(rti->flags & TRACEFS_UID_PERM_SET)) 174 + uid = root_inode->i_uid; 175 + 176 + if (!(rti->flags & TRACEFS_GID_PERM_SET)) 177 + gid = root_inode->i_gid; 178 + } 183 179 184 180 /* 185 181 * If this inode has never been referenced, then update 186 182 * the permissions to the superblock. 187 183 */ 188 184 if (!(ti->flags & TRACEFS_UID_PERM_SET)) 189 - inode->i_uid = root_inode->i_uid; 185 + inode->i_uid = uid; 190 186 191 187 if (!(ti->flags & TRACEFS_GID_PERM_SET)) 192 - inode->i_gid = root_inode->i_gid; 188 + inode->i_gid = gid; 193 189 } 194 190 195 191 static int tracefs_permission(struct mnt_idmap *idmap, ··· 363 313 struct tracefs_fs_info *fsi = sb->s_fs_info; 364 314 struct inode *inode = d_inode(sb->s_root); 365 315 struct tracefs_mount_opts *opts = &fsi->mount_opts; 316 + struct tracefs_inode *ti; 317 + bool update_uid, update_gid; 366 318 umode_t tmp_mode; 367 319 368 320 /* ··· 383 331 384 332 if (!remount || opts->opts & BIT(Opt_gid)) 385 333 inode->i_gid = opts->gid; 334 + 335 + if (remount && (opts->opts & BIT(Opt_uid) || opts->opts & BIT(Opt_gid))) { 336 + 337 + update_uid = opts->opts & BIT(Opt_uid); 338 + update_gid = opts->opts & BIT(Opt_gid); 339 + 340 + rcu_read_lock(); 341 + list_for_each_entry_rcu(ti, &tracefs_inodes, list) { 342 + if (update_uid) 343 + ti->flags &= ~TRACEFS_UID_PERM_SET; 344 + 345 + if (update_gid) 346 + ti->flags &= ~TRACEFS_GID_PERM_SET; 347 + 348 + if (ti->flags & TRACEFS_EVENT_INODE) 349 + eventfs_remount(ti, update_uid, update_gid); 350 + } 351 + rcu_read_unlock(); 352 + } 386 353 387 354 return 0; 388 355 } ··· 469 398 return !(ei && ei->is_freed); 470 399 } 471 400 401 + static void tracefs_d_iput(struct dentry *dentry, struct inode *inode) 402 + { 403 + struct tracefs_inode *ti = get_tracefs(inode); 404 + 405 + /* 406 + * This inode is being freed and cannot be used for 407 + * eventfs. Clear the flag so that it doesn't call into 408 + * eventfs during the remount flag updates. The eventfs_inode 409 + * gets freed after an RCU cycle, so the content will still 410 + * be safe if the iteration is going on now. 411 + */ 412 + ti->flags &= ~TRACEFS_EVENT_INODE; 413 + } 414 + 472 415 static const struct dentry_operations tracefs_dentry_operations = { 416 + .d_iput = tracefs_d_iput, 473 417 .d_revalidate = tracefs_d_revalidate, 474 418 .d_release = tracefs_d_release, 475 419 };
+9 -5
fs/tracefs/internal.h
··· 4 4 5 5 enum { 6 6 TRACEFS_EVENT_INODE = BIT(1), 7 - TRACEFS_EVENT_TOP_INODE = BIT(2), 8 - TRACEFS_GID_PERM_SET = BIT(3), 9 - TRACEFS_UID_PERM_SET = BIT(4), 10 - TRACEFS_INSTANCE_INODE = BIT(5), 7 + TRACEFS_GID_PERM_SET = BIT(2), 8 + TRACEFS_UID_PERM_SET = BIT(3), 9 + TRACEFS_INSTANCE_INODE = BIT(4), 11 10 }; 12 11 13 12 struct tracefs_inode { 14 - struct inode vfs_inode; 13 + union { 14 + struct inode vfs_inode; 15 + struct rcu_head rcu; 16 + }; 15 17 /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ 18 + struct list_head list; 16 19 unsigned long flags; 17 20 void *private; 18 21 }; ··· 76 73 struct dentry *tracefs_failed_creating(struct dentry *dentry); 77 74 struct inode *tracefs_get_inode(struct super_block *sb); 78 75 76 + void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid); 79 77 void eventfs_d_release(struct dentry *dentry); 80 78 81 79 #endif /* _TRACEFS_INTERNAL_H */
+3
include/linux/tracefs.h
··· 62 62 typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, 63 63 const struct file_operations **fops); 64 64 65 + typedef void (*eventfs_release)(const char *name, void *data); 66 + 65 67 /** 66 68 * struct eventfs_entry - dynamically created eventfs file call back handler 67 69 * @name: Then name of the dynamic file in an eventfs directory ··· 74 72 struct eventfs_entry { 75 73 const char *name; 76 74 eventfs_callback callback; 75 + eventfs_release release; 77 76 }; 78 77 79 78 struct eventfs_inode;
+12
kernel/trace/trace_events.c
··· 2552 2552 return 0; 2553 2553 } 2554 2554 2555 + /* The file is incremented on creation and freeing the enable file decrements it */ 2556 + static void event_release(const char *name, void *data) 2557 + { 2558 + struct trace_event_file *file = data; 2559 + 2560 + event_file_put(file); 2561 + } 2562 + 2555 2563 static int 2556 2564 event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) 2557 2565 { ··· 2574 2566 { 2575 2567 .name = "enable", 2576 2568 .callback = event_callback, 2569 + .release = event_release, 2577 2570 }, 2578 2571 { 2579 2572 .name = "filter", ··· 2642 2633 pr_warn("Could not initialize trace point events/%s\n", name); 2643 2634 return ret; 2644 2635 } 2636 + 2637 + /* Gets decremented on freeing of the "enable" file */ 2638 + event_file_get(file); 2645 2639 2646 2640 return 0; 2647 2641 }