Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'trace-v6.16-3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull more tracing fixes from Steven Rostedt:

- Fix regression of waiting a long time on updating trace event filters

When the faultable trace points were added, it needed task trace RCU
synchronization.

This was added to the tracepoint_synchronize_unregister() function.
The filter logic always called this function whenever it updated the
trace event filters before freeing the old filters. This increased
the time of "trace-cmd record" from taking 13 seconds to running over
2 minutes to complete.

Move the freeing of the filters to call_rcu*() logic, which brings
the time back down to 13 seconds.

- Fix ring_buffer_subbuf_order_set() error path lock protection

The error path of the ring_buffer_subbuf_order_set() released the
mutex too early and allowed subsequent accesses to setting the
subbuffer size to corrupt the data and cause a bug.

By moving the mutex locking to the end of the error path, it prevents
the reentrant access to the critical data and also allows the
function to convert the taking of the mutex over to the guard()
logic.

- Remove unused power management clock events

The clock events were added in 2010 for power management. In 2011 arm
used them. In 2013 the code they were used in was removed. These
events have been wasting memory since then.

- Fix sparse warnings

There was a few places that sparse warned about trace_events_filter.c
where file->filter was referenced directly, but it is annotated with
an __rcu tag. Use the helper functions and fix them up to use
rcu_dereference() properly.

* tag 'trace-v6.16-3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
tracing: Add rcu annotation around file->filter accesses
tracing: PM: Remove unused clock events
ring-buffer: Fix buffer locking in ring_buffer_subbuf_order_set()
tracing: Fix regression of filter waiting a long time on RCU synchronization

+143 -100
-47
include/trace/events/power.h
··· 338 338 ); 339 339 340 340 /* 341 - * The clock events are used for clock enable/disable and for 342 - * clock rate change 343 - */ 344 - DECLARE_EVENT_CLASS(clock, 345 - 346 - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), 347 - 348 - TP_ARGS(name, state, cpu_id), 349 - 350 - TP_STRUCT__entry( 351 - __string( name, name ) 352 - __field( u64, state ) 353 - __field( u64, cpu_id ) 354 - ), 355 - 356 - TP_fast_assign( 357 - __assign_str(name); 358 - __entry->state = state; 359 - __entry->cpu_id = cpu_id; 360 - ), 361 - 362 - TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), 363 - (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) 364 - ); 365 - 366 - DEFINE_EVENT(clock, clock_enable, 367 - 368 - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), 369 - 370 - TP_ARGS(name, state, cpu_id) 371 - ); 372 - 373 - DEFINE_EVENT(clock, clock_disable, 374 - 375 - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), 376 - 377 - TP_ARGS(name, state, cpu_id) 378 - ); 379 - 380 - DEFINE_EVENT(clock, clock_set_rate, 381 - 382 - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), 383 - 384 - TP_ARGS(name, state, cpu_id) 385 - ); 386 - 387 - /* 388 341 * The power domain events are used for power domains transitions 389 342 */ 390 343 DECLARE_EVENT_CLASS(power_domain,
+1 -3
kernel/trace/ring_buffer.c
··· 6795 6795 old_size = buffer->subbuf_size; 6796 6796 6797 6797 /* prevent another thread from changing buffer sizes */ 6798 - mutex_lock(&buffer->mutex); 6798 + guard(mutex)(&buffer->mutex); 6799 6799 atomic_inc(&buffer->record_disabled); 6800 6800 6801 6801 /* Make sure all commits have finished */ ··· 6900 6900 } 6901 6901 6902 6902 atomic_dec(&buffer->record_disabled); 6903 - mutex_unlock(&buffer->mutex); 6904 6903 6905 6904 return 0; 6906 6905 ··· 6908 6909 buffer->subbuf_size = old_size; 6909 6910 6910 6911 atomic_dec(&buffer->record_disabled); 6911 - mutex_unlock(&buffer->mutex); 6912 6912 6913 6913 for_each_buffer_cpu(buffer, cpu) { 6914 6914 cpu_buffer = buffer->buffers[cpu];
+142 -50
kernel/trace/trace_events_filter.c
··· 1250 1250 1251 1251 static inline struct event_filter *event_filter(struct trace_event_file *file) 1252 1252 { 1253 - return file->filter; 1253 + return rcu_dereference_protected(file->filter, 1254 + lockdep_is_held(&event_mutex)); 1255 + 1254 1256 } 1255 1257 1256 1258 /* caller must hold event_mutex */ ··· 1322 1320 static inline void __remove_filter(struct trace_event_file *file) 1323 1321 { 1324 1322 filter_disable(file); 1325 - remove_filter_string(file->filter); 1323 + remove_filter_string(event_filter(file)); 1326 1324 } 1327 1325 1328 1326 static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir, ··· 1337 1335 } 1338 1336 } 1339 1337 1338 + struct filter_list { 1339 + struct list_head list; 1340 + struct event_filter *filter; 1341 + }; 1342 + 1343 + struct filter_head { 1344 + struct list_head list; 1345 + struct rcu_head rcu; 1346 + }; 1347 + 1348 + 1349 + static void free_filter_list(struct rcu_head *rhp) 1350 + { 1351 + struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu); 1352 + struct filter_list *filter_item, *tmp; 1353 + 1354 + list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) { 1355 + __free_filter(filter_item->filter); 1356 + list_del(&filter_item->list); 1357 + kfree(filter_item); 1358 + } 1359 + kfree(filter_list); 1360 + } 1361 + 1362 + static void free_filter_list_tasks(struct rcu_head *rhp) 1363 + { 1364 + call_rcu(rhp, free_filter_list); 1365 + } 1366 + 1367 + /* 1368 + * The tracepoint_synchronize_unregister() is a double rcu call. 1369 + * It calls synchronize_rcu_tasks_trace() followed by synchronize_rcu(). 1370 + * Instead of waiting for it, simply call these via the call_rcu*() 1371 + * variants. 1372 + */ 1373 + static void delay_free_filter(struct filter_head *head) 1374 + { 1375 + call_rcu_tasks_trace(&head->rcu, free_filter_list_tasks); 1376 + } 1377 + 1378 + static void try_delay_free_filter(struct event_filter *filter) 1379 + { 1380 + struct filter_head *head; 1381 + struct filter_list *item; 1382 + 1383 + head = kmalloc(sizeof(*head), GFP_KERNEL); 1384 + if (!head) 1385 + goto free_now; 1386 + 1387 + INIT_LIST_HEAD(&head->list); 1388 + 1389 + item = kmalloc(sizeof(*item), GFP_KERNEL); 1390 + if (!item) { 1391 + kfree(head); 1392 + goto free_now; 1393 + } 1394 + 1395 + item->filter = filter; 1396 + list_add_tail(&item->list, &head->list); 1397 + delay_free_filter(head); 1398 + return; 1399 + 1400 + free_now: 1401 + /* Make sure the filter is not being used */ 1402 + tracepoint_synchronize_unregister(); 1403 + __free_filter(filter); 1404 + } 1405 + 1340 1406 static inline void __free_subsystem_filter(struct trace_event_file *file) 1341 1407 { 1342 - __free_filter(file->filter); 1408 + __free_filter(event_filter(file)); 1343 1409 file->filter = NULL; 1344 1410 } 1345 1411 1412 + static inline void event_set_filter(struct trace_event_file *file, 1413 + struct event_filter *filter) 1414 + { 1415 + rcu_assign_pointer(file->filter, filter); 1416 + } 1417 + 1418 + static inline void event_clear_filter(struct trace_event_file *file) 1419 + { 1420 + RCU_INIT_POINTER(file->filter, NULL); 1421 + } 1422 + 1346 1423 static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, 1347 - struct trace_array *tr) 1424 + struct trace_array *tr, 1425 + struct event_filter *filter) 1348 1426 { 1349 1427 struct trace_event_file *file; 1428 + struct filter_head *head; 1429 + struct filter_list *item; 1430 + 1431 + head = kmalloc(sizeof(*head), GFP_KERNEL); 1432 + if (!head) 1433 + goto free_now; 1434 + 1435 + INIT_LIST_HEAD(&head->list); 1436 + 1437 + item = kmalloc(sizeof(*item), GFP_KERNEL); 1438 + if (!item) { 1439 + kfree(head); 1440 + goto free_now; 1441 + } 1442 + 1443 + item->filter = filter; 1444 + list_add_tail(&item->list, &head->list); 1350 1445 1351 1446 list_for_each_entry(file, &tr->events, list) { 1352 1447 if (file->system != dir) 1353 1448 continue; 1449 + item = kmalloc(sizeof(*item), GFP_KERNEL); 1450 + if (!item) 1451 + goto free_now; 1452 + item->filter = event_filter(file); 1453 + list_add_tail(&item->list, &head->list); 1454 + event_clear_filter(file); 1455 + } 1456 + 1457 + delay_free_filter(head); 1458 + return; 1459 + free_now: 1460 + tracepoint_synchronize_unregister(); 1461 + 1462 + if (head) 1463 + free_filter_list(&head->rcu); 1464 + 1465 + list_for_each_entry(file, &tr->events, list) { 1466 + if (file->system != dir || !file->filter) 1467 + continue; 1354 1468 __free_subsystem_filter(file); 1355 1469 } 1470 + __free_filter(filter); 1356 1471 } 1357 1472 1358 1473 int filter_assign_type(const char *type) ··· 2239 2120 trace_buffered_event_enable(); 2240 2121 } 2241 2122 2242 - static inline void event_set_filter(struct trace_event_file *file, 2243 - struct event_filter *filter) 2244 - { 2245 - rcu_assign_pointer(file->filter, filter); 2246 - } 2247 - 2248 - static inline void event_clear_filter(struct trace_event_file *file) 2249 - { 2250 - RCU_INIT_POINTER(file->filter, NULL); 2251 - } 2252 - 2253 - struct filter_list { 2254 - struct list_head list; 2255 - struct event_filter *filter; 2256 - }; 2257 - 2258 2123 static int process_system_preds(struct trace_subsystem_dir *dir, 2259 2124 struct trace_array *tr, 2260 2125 struct filter_parse_error *pe, ··· 2247 2144 struct trace_event_file *file; 2248 2145 struct filter_list *filter_item; 2249 2146 struct event_filter *filter = NULL; 2250 - struct filter_list *tmp; 2251 - LIST_HEAD(filter_list); 2147 + struct filter_head *filter_list; 2252 2148 bool fail = true; 2253 2149 int err; 2150 + 2151 + filter_list = kmalloc(sizeof(*filter_list), GFP_KERNEL); 2152 + if (!filter_list) 2153 + return -ENOMEM; 2154 + 2155 + INIT_LIST_HEAD(&filter_list->list); 2254 2156 2255 2157 list_for_each_entry(file, &tr->events, list) { 2256 2158 ··· 2283 2175 if (!filter_item) 2284 2176 goto fail_mem; 2285 2177 2286 - list_add_tail(&filter_item->list, &filter_list); 2178 + list_add_tail(&filter_item->list, &filter_list->list); 2287 2179 /* 2288 2180 * Regardless of if this returned an error, we still 2289 2181 * replace the filter for the call. ··· 2303 2195 * Do a synchronize_rcu() and to ensure all calls are 2304 2196 * done with them before we free them. 2305 2197 */ 2306 - tracepoint_synchronize_unregister(); 2307 - list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { 2308 - __free_filter(filter_item->filter); 2309 - list_del(&filter_item->list); 2310 - kfree(filter_item); 2311 - } 2198 + delay_free_filter(filter_list); 2312 2199 return 0; 2313 2200 fail: 2314 2201 /* No call succeeded */ 2315 - list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { 2316 - list_del(&filter_item->list); 2317 - kfree(filter_item); 2318 - } 2202 + free_filter_list(&filter_list->rcu); 2319 2203 parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); 2320 2204 return -EINVAL; 2321 2205 fail_mem: 2322 2206 __free_filter(filter); 2207 + 2323 2208 /* If any call succeeded, we still need to sync */ 2324 2209 if (!fail) 2325 - tracepoint_synchronize_unregister(); 2326 - list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { 2327 - __free_filter(filter_item->filter); 2328 - list_del(&filter_item->list); 2329 - kfree(filter_item); 2330 - } 2210 + delay_free_filter(filter_list); 2211 + else 2212 + free_filter_list(&filter_list->rcu); 2213 + 2331 2214 return -ENOMEM; 2332 2215 } 2333 2216 ··· 2460 2361 2461 2362 event_clear_filter(file); 2462 2363 2463 - /* Make sure the filter is not being used */ 2464 - tracepoint_synchronize_unregister(); 2465 - __free_filter(filter); 2364 + try_delay_free_filter(filter); 2466 2365 2467 2366 return 0; 2468 2367 } ··· 2484 2387 2485 2388 event_set_filter(file, filter); 2486 2389 2487 - if (tmp) { 2488 - /* Make sure the call is done with the filter */ 2489 - tracepoint_synchronize_unregister(); 2490 - __free_filter(tmp); 2491 - } 2390 + if (tmp) 2391 + try_delay_free_filter(tmp); 2492 2392 } 2493 2393 2494 2394 return err; ··· 2511 2417 filter = system->filter; 2512 2418 system->filter = NULL; 2513 2419 /* Ensure all filters are no longer used */ 2514 - tracepoint_synchronize_unregister(); 2515 - filter_free_subsystem_filters(dir, tr); 2516 - __free_filter(filter); 2420 + filter_free_subsystem_filters(dir, tr, filter); 2517 2421 return 0; 2518 2422 } 2519 2423