Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'trace-v6.7-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing fixes from Steven Rostedt:

- Fix readers that are blocked on the ring buffer when buffer_percent
is 100%. They are supposed to wake up when the buffer is full, but
because the sub-buffer that the writer is on is never considered
"dirty" in the calculation, dirty pages will never equal nr_pages.
Add +1 to the dirty count in order to count for the sub-buffer that
the writer is on.

- When a reader is blocked on the "snapshot_raw" file, it is to be
woken up when a snapshot is done and be able to read the snapshot
buffer. But because the snapshot swaps the buffers (the main one with
the snapshot one), and the snapshot reader is waiting on the old
snapshot buffer, it was not woken up (because it is now on the main
buffer after the swap). Worse yet, when it reads the buffer after a
snapshot, it's not reading the snapshot buffer, it's reading the live
active main buffer.

Fix this by forcing a wakeup of all readers on the snapshot buffer
when a new snapshot happens, and then update the buffer that the
reader is reading to be back on the snapshot buffer.

- Fix the modification of the direct_function hash. There was a race
when new functions were added to the direct_function hash as when it
moved function entries from the old hash to the new one, a direct
function trace could be hit and not see its entry.

This is fixed by allocating the new hash, copy all the old entries
onto it as well as the new entries, and then use rcu_assign_pointer()
to update the new direct_function hash with it.

This also fixes a memory leak in that code.

- Fix eventfs ownership

* tag 'trace-v6.7-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
ftrace: Fix modification of direct_function hash while in use
tracing: Fix blocked reader of snapshot buffer
ring-buffer: Fix wake ups when buffer_percent is set to 100
eventfs: Fix file and directory uid and gid ownership

+176 -69
+95 -10
fs/tracefs/event_inode.c
··· 113 113 * determined by the parent directory. 114 114 */ 115 115 if (dentry->d_inode->i_mode & S_IFDIR) { 116 - update_attr(&ei->attr, iattr); 116 + /* 117 + * The events directory dentry is never freed, unless its 118 + * part of an instance that is deleted. It's attr is the 119 + * default for its child files and directories. 120 + * Do not update it. It's not used for its own mode or ownership 121 + */ 122 + if (!ei->is_events) 123 + update_attr(&ei->attr, iattr); 117 124 118 125 } else { 119 126 name = dentry->d_name.name; ··· 155 148 .release = eventfs_release, 156 149 }; 157 150 151 + /* Return the evenfs_inode of the "events" directory */ 152 + static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) 153 + { 154 + struct eventfs_inode *ei; 155 + 156 + mutex_lock(&eventfs_mutex); 157 + do { 158 + /* The parent always has an ei, except for events itself */ 159 + ei = dentry->d_parent->d_fsdata; 160 + 161 + /* 162 + * If the ei is being freed, the ownership of the children 163 + * doesn't matter. 164 + */ 165 + if (ei->is_freed) { 166 + ei = NULL; 167 + break; 168 + } 169 + 170 + dentry = ei->dentry; 171 + } while (!ei->is_events); 172 + mutex_unlock(&eventfs_mutex); 173 + 174 + return ei; 175 + } 176 + 158 177 static void update_inode_attr(struct dentry *dentry, struct inode *inode, 159 178 struct eventfs_attr *attr, umode_t mode) 160 179 { 161 - if (!attr) { 162 - inode->i_mode = mode; 180 + struct eventfs_inode *events_ei = eventfs_find_events(dentry); 181 + 182 + if (!events_ei) 163 183 return; 164 - } 184 + 185 + inode->i_mode = mode; 186 + inode->i_uid = events_ei->attr.uid; 187 + inode->i_gid = events_ei->attr.gid; 188 + 189 + if (!attr) 190 + return; 165 191 166 192 if (attr->mode & EVENTFS_SAVE_MODE) 167 193 inode->i_mode = attr->mode & EVENTFS_MODE_MASK; 168 - else 169 - inode->i_mode = mode; 170 194 171 195 if (attr->mode & EVENTFS_SAVE_UID) 172 196 inode->i_uid = attr->uid; 173 - else 174 - inode->i_uid = d_inode(dentry->d_parent)->i_uid; 175 197 176 198 if (attr->mode & EVENTFS_SAVE_GID) 177 199 inode->i_gid = attr->gid; 178 - else 179 - inode->i_gid = d_inode(dentry->d_parent)->i_gid; 200 + } 201 + 202 + static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level) 203 + { 204 + struct eventfs_inode *ei_child; 205 + 206 + /* at most we have events/system/event */ 207 + if (WARN_ON_ONCE(level > 3)) 208 + return; 209 + 210 + ei->attr.gid = gid; 211 + 212 + if (ei->entry_attrs) { 213 + for (int i = 0; i < ei->nr_entries; i++) { 214 + ei->entry_attrs[i].gid = gid; 215 + } 216 + } 217 + 218 + /* 219 + * Only eventfs_inode with dentries are updated, make sure 220 + * all eventfs_inodes are updated. If one of the children 221 + * do not have a dentry, this function must traverse it. 222 + */ 223 + list_for_each_entry_srcu(ei_child, &ei->children, list, 224 + srcu_read_lock_held(&eventfs_srcu)) { 225 + if (!ei_child->dentry) 226 + update_gid(ei_child, gid, level + 1); 227 + } 228 + } 229 + 230 + void eventfs_update_gid(struct dentry *dentry, kgid_t gid) 231 + { 232 + struct eventfs_inode *ei = dentry->d_fsdata; 233 + int idx; 234 + 235 + idx = srcu_read_lock(&eventfs_srcu); 236 + update_gid(ei, gid, 0); 237 + srcu_read_unlock(&eventfs_srcu, idx); 180 238 } 181 239 182 240 /** ··· 932 860 struct eventfs_inode *ei; 933 861 struct tracefs_inode *ti; 934 862 struct inode *inode; 863 + kuid_t uid; 864 + kgid_t gid; 935 865 936 866 if (security_locked_down(LOCKDOWN_TRACEFS)) 937 867 return NULL; ··· 958 884 ei->dentry = dentry; 959 885 ei->entries = entries; 960 886 ei->nr_entries = size; 887 + ei->is_events = 1; 961 888 ei->data = data; 962 889 ei->name = kstrdup_const(name, GFP_KERNEL); 963 890 if (!ei->name) 964 891 goto fail; 892 + 893 + /* Save the ownership of this directory */ 894 + uid = d_inode(dentry->d_parent)->i_uid; 895 + gid = d_inode(dentry->d_parent)->i_gid; 896 + 897 + /* This is used as the default ownership of the files and directories */ 898 + ei->attr.uid = uid; 899 + ei->attr.gid = gid; 965 900 966 901 INIT_LIST_HEAD(&ei->children); 967 902 INIT_LIST_HEAD(&ei->list); ··· 980 897 ti->private = ei; 981 898 982 899 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 900 + inode->i_uid = uid; 901 + inode->i_gid = gid; 983 902 inode->i_op = &eventfs_root_dir_inode_operations; 984 903 inode->i_fop = &eventfs_file_operations; 985 904
+6
fs/tracefs/inode.c
··· 210 210 next = this_parent->d_subdirs.next; 211 211 resume: 212 212 while (next != &this_parent->d_subdirs) { 213 + struct tracefs_inode *ti; 213 214 struct list_head *tmp = next; 214 215 struct dentry *dentry = list_entry(tmp, struct dentry, d_child); 215 216 next = tmp->next; ··· 218 217 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 219 218 220 219 change_gid(dentry, gid); 220 + 221 + /* If this is the events directory, update that too */ 222 + ti = get_tracefs(dentry->d_inode); 223 + if (ti && (ti->flags & TRACEFS_EVENT_INODE)) 224 + eventfs_update_gid(dentry, gid); 221 225 222 226 if (!list_empty(&dentry->d_subdirs)) { 223 227 spin_unlock(&this_parent->d_lock);
+2
fs/tracefs/internal.h
··· 62 62 struct rcu_head rcu; 63 63 }; 64 64 unsigned int is_freed:1; 65 + unsigned int is_events:1; 65 66 unsigned int nr_entries:31; 66 67 }; 67 68 ··· 78 77 struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); 79 78 struct dentry *eventfs_failed_creating(struct dentry *dentry); 80 79 struct dentry *eventfs_end_creating(struct dentry *dentry); 80 + void eventfs_update_gid(struct dentry *dentry, kgid_t gid); 81 81 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); 82 82 83 83 #endif /* _TRACEFS_INTERNAL_H */
+47 -53
kernel/trace/ftrace.c
··· 1183 1183 hash->count++; 1184 1184 } 1185 1185 1186 - static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip) 1186 + static struct ftrace_func_entry * 1187 + add_hash_entry(struct ftrace_hash *hash, unsigned long ip) 1187 1188 { 1188 1189 struct ftrace_func_entry *entry; 1189 1190 1190 1191 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 1191 1192 if (!entry) 1192 - return -ENOMEM; 1193 + return NULL; 1193 1194 1194 1195 entry->ip = ip; 1195 1196 __add_hash_entry(hash, entry); 1196 1197 1197 - return 0; 1198 + return entry; 1198 1199 } 1199 1200 1200 1201 static void ··· 1350 1349 struct ftrace_func_entry *entry; 1351 1350 struct ftrace_hash *new_hash; 1352 1351 int size; 1353 - int ret; 1354 1352 int i; 1355 1353 1356 1354 new_hash = alloc_ftrace_hash(size_bits); ··· 1366 1366 size = 1 << hash->size_bits; 1367 1367 for (i = 0; i < size; i++) { 1368 1368 hlist_for_each_entry(entry, &hash->buckets[i], hlist) { 1369 - ret = add_hash_entry(new_hash, entry->ip); 1370 - if (ret < 0) 1369 + if (add_hash_entry(new_hash, entry->ip) == NULL) 1371 1370 goto free_hash; 1372 1371 } 1373 1372 } ··· 2535 2536 2536 2537 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 2537 2538 /* Protected by rcu_tasks for reading, and direct_mutex for writing */ 2538 - static struct ftrace_hash *direct_functions = EMPTY_HASH; 2539 + static struct ftrace_hash __rcu *direct_functions = EMPTY_HASH; 2539 2540 static DEFINE_MUTEX(direct_mutex); 2540 2541 int ftrace_direct_func_count; 2541 2542 ··· 2552 2553 return 0; 2553 2554 2554 2555 return entry->direct; 2555 - } 2556 - 2557 - static struct ftrace_func_entry* 2558 - ftrace_add_rec_direct(unsigned long ip, unsigned long addr, 2559 - struct ftrace_hash **free_hash) 2560 - { 2561 - struct ftrace_func_entry *entry; 2562 - 2563 - if (ftrace_hash_empty(direct_functions) || 2564 - direct_functions->count > 2 * (1 << direct_functions->size_bits)) { 2565 - struct ftrace_hash *new_hash; 2566 - int size = ftrace_hash_empty(direct_functions) ? 0 : 2567 - direct_functions->count + 1; 2568 - 2569 - if (size < 32) 2570 - size = 32; 2571 - 2572 - new_hash = dup_hash(direct_functions, size); 2573 - if (!new_hash) 2574 - return NULL; 2575 - 2576 - *free_hash = direct_functions; 2577 - direct_functions = new_hash; 2578 - } 2579 - 2580 - entry = kmalloc(sizeof(*entry), GFP_KERNEL); 2581 - if (!entry) 2582 - return NULL; 2583 - 2584 - entry->ip = ip; 2585 - entry->direct = addr; 2586 - __add_hash_entry(direct_functions, entry); 2587 - return entry; 2588 2556 } 2589 2557 2590 2558 static void call_direct_funcs(unsigned long ip, unsigned long pip, ··· 4189 4223 /* Do nothing if it exists */ 4190 4224 if (entry) 4191 4225 return 0; 4192 - 4193 - ret = add_hash_entry(hash, rec->ip); 4226 + if (add_hash_entry(hash, rec->ip) == NULL) 4227 + ret = -ENOMEM; 4194 4228 } 4195 4229 return ret; 4196 4230 } ··· 5232 5266 return 0; 5233 5267 } 5234 5268 5235 - return add_hash_entry(hash, ip); 5269 + entry = add_hash_entry(hash, ip); 5270 + return entry ? 0 : -ENOMEM; 5236 5271 } 5237 5272 5238 5273 static int ··· 5377 5410 */ 5378 5411 int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) 5379 5412 { 5380 - struct ftrace_hash *hash, *free_hash = NULL; 5413 + struct ftrace_hash *hash, *new_hash = NULL, *free_hash = NULL; 5381 5414 struct ftrace_func_entry *entry, *new; 5382 5415 int err = -EBUSY, size, i; 5383 5416 ··· 5403 5436 } 5404 5437 } 5405 5438 5406 - /* ... and insert them to direct_functions hash. */ 5407 5439 err = -ENOMEM; 5440 + 5441 + /* Make a copy hash to place the new and the old entries in */ 5442 + size = hash->count + direct_functions->count; 5443 + if (size > 32) 5444 + size = 32; 5445 + new_hash = alloc_ftrace_hash(fls(size)); 5446 + if (!new_hash) 5447 + goto out_unlock; 5448 + 5449 + /* Now copy over the existing direct entries */ 5450 + size = 1 << direct_functions->size_bits; 5451 + for (i = 0; i < size; i++) { 5452 + hlist_for_each_entry(entry, &direct_functions->buckets[i], hlist) { 5453 + new = add_hash_entry(new_hash, entry->ip); 5454 + if (!new) 5455 + goto out_unlock; 5456 + new->direct = entry->direct; 5457 + } 5458 + } 5459 + 5460 + /* ... and add the new entries */ 5461 + size = 1 << hash->size_bits; 5408 5462 for (i = 0; i < size; i++) { 5409 5463 hlist_for_each_entry(entry, &hash->buckets[i], hlist) { 5410 - new = ftrace_add_rec_direct(entry->ip, addr, &free_hash); 5464 + new = add_hash_entry(new_hash, entry->ip); 5411 5465 if (!new) 5412 - goto out_remove; 5466 + goto out_unlock; 5467 + /* Update both the copy and the hash entry */ 5468 + new->direct = addr; 5413 5469 entry->direct = addr; 5414 5470 } 5415 5471 } 5472 + 5473 + free_hash = direct_functions; 5474 + rcu_assign_pointer(direct_functions, new_hash); 5475 + new_hash = NULL; 5416 5476 5417 5477 ops->func = call_direct_funcs; 5418 5478 ops->flags = MULTI_FLAGS; ··· 5448 5454 5449 5455 err = register_ftrace_function_nolock(ops); 5450 5456 5451 - out_remove: 5452 - if (err) 5453 - remove_direct_functions_hash(hash, addr); 5454 - 5455 5457 out_unlock: 5456 5458 mutex_unlock(&direct_mutex); 5457 5459 5458 - if (free_hash) { 5460 + if (free_hash && free_hash != EMPTY_HASH) { 5459 5461 synchronize_rcu_tasks(); 5460 5462 free_ftrace_hash(free_hash); 5461 5463 } 5464 + 5465 + if (new_hash) 5466 + free_ftrace_hash(new_hash); 5467 + 5462 5468 return err; 5463 5469 } 5464 5470 EXPORT_SYMBOL_GPL(register_ftrace_direct); ··· 6303 6309 6304 6310 if (entry) 6305 6311 continue; 6306 - if (add_hash_entry(hash, rec->ip) < 0) 6312 + if (add_hash_entry(hash, rec->ip) == NULL) 6307 6313 goto out; 6308 6314 } else { 6309 6315 if (entry) {
+9 -3
kernel/trace/ring_buffer.c
··· 881 881 if (!nr_pages || !full) 882 882 return true; 883 883 884 - dirty = ring_buffer_nr_dirty_pages(buffer, cpu); 884 + /* 885 + * Add one as dirty will never equal nr_pages, as the sub-buffer 886 + * that the writer is on is not counted as dirty. 887 + * This is needed if "buffer_percent" is set to 100. 888 + */ 889 + dirty = ring_buffer_nr_dirty_pages(buffer, cpu) + 1; 885 890 886 - return (dirty * 100) > (full * nr_pages); 891 + return (dirty * 100) >= (full * nr_pages); 887 892 } 888 893 889 894 /* ··· 949 944 /* make sure the waiters see the new index */ 950 945 smp_wmb(); 951 946 952 - rb_wake_up_waiters(&rbwork->work); 947 + /* This can be called in any context */ 948 + irq_work_queue(&rbwork->work); 953 949 } 954 950 955 951 /**
+17 -3
kernel/trace/trace.c
··· 1894 1894 __update_max_tr(tr, tsk, cpu); 1895 1895 1896 1896 arch_spin_unlock(&tr->max_lock); 1897 + 1898 + /* Any waiters on the old snapshot buffer need to wake up */ 1899 + ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); 1897 1900 } 1898 1901 1899 1902 /** ··· 1948 1945 1949 1946 static int wait_on_pipe(struct trace_iterator *iter, int full) 1950 1947 { 1948 + int ret; 1949 + 1951 1950 /* Iterators are static, they should be filled or empty */ 1952 1951 if (trace_buffer_iter(iter, iter->cpu_file)) 1953 1952 return 0; 1954 1953 1955 - return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, 1956 - full); 1954 + ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full); 1955 + 1956 + #ifdef CONFIG_TRACER_MAX_TRACE 1957 + /* 1958 + * Make sure this is still the snapshot buffer, as if a snapshot were 1959 + * to happen, this would now be the main buffer. 1960 + */ 1961 + if (iter->snapshot) 1962 + iter->array_buffer = &iter->tr->max_buffer; 1963 + #endif 1964 + return ret; 1957 1965 } 1958 1966 1959 1967 #ifdef CONFIG_FTRACE_STARTUP_TEST ··· 8531 8517 8532 8518 wait_index = READ_ONCE(iter->wait_index); 8533 8519 8534 - ret = wait_on_pipe(iter, iter->tr->buffer_percent); 8520 + ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 8535 8521 if (ret) 8536 8522 goto out; 8537 8523