Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tracing: Add non-consuming read to trace remotes

Allow reading the trace file for trace remotes. This performs a
non-consuming read of the trace buffer.

Link: https://patch.msgid.link/20260309162516.2623589-8-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Vincent Donnefort and committed by
Steven Rostedt (Google)
330b0cce 9af4ab0e

+326 -16
+1 -1
kernel/trace/trace.c
··· 3856 3856 * Should be used after trace_array_get(), trace_types_lock 3857 3857 * ensures that i_cdev was already initialized. 3858 3858 */ 3859 - static inline int tracing_get_cpu(struct inode *inode) 3859 + int tracing_get_cpu(struct inode *inode) 3860 3860 { 3861 3861 if (inode->i_cdev) /* See trace_create_cpu_file() */ 3862 3862 return (long)inode->i_cdev - 1;
+1
kernel/trace/trace.h
··· 695 695 void *data, 696 696 long cpu, 697 697 const struct file_operations *fops); 698 + int tracing_get_cpu(struct inode *inode); 698 699 699 700 700 701 /**
+324 -15
kernel/trace/trace_remote.c
··· 18 18 #define TRACEFS_MODE_WRITE 0640 19 19 #define TRACEFS_MODE_READ 0440 20 20 21 + enum tri_type { 22 + TRI_CONSUMING, 23 + TRI_NONCONSUMING, 24 + }; 25 + 21 26 struct trace_remote_iterator { 22 27 struct trace_remote *remote; 23 28 struct trace_seq seq; 24 29 struct delayed_work poll_work; 25 30 unsigned long lost_events; 26 31 u64 ts; 32 + struct ring_buffer_iter *rb_iter; 33 + struct ring_buffer_iter **rb_iters; 27 34 int cpu; 28 35 int evt_cpu; 36 + loff_t pos; 37 + enum tri_type type; 29 38 }; 30 39 31 40 struct trace_remote { ··· 45 36 unsigned long trace_buffer_size; 46 37 struct ring_buffer_remote rb_remote; 47 38 struct mutex lock; 39 + struct rw_semaphore reader_lock; 40 + struct rw_semaphore *pcpu_reader_locks; 48 41 unsigned int nr_readers; 49 42 unsigned int poll_ms; 50 43 bool tracing_on; ··· 241 230 if (ret) 242 231 return ret; 243 232 233 + if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) { 234 + int lock_cpu; 235 + 236 + remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks), 237 + GFP_KERNEL); 238 + if (!remote->pcpu_reader_locks) { 239 + trace_remote_try_unload(remote); 240 + return -ENOMEM; 241 + } 242 + 243 + for_each_possible_cpu(lock_cpu) 244 + init_rwsem(&remote->pcpu_reader_locks[lock_cpu]); 245 + } 246 + 244 247 remote->nr_readers++; 245 248 246 249 return 0; ··· 268 243 remote->nr_readers--; 269 244 if (remote->nr_readers) 270 245 return; 246 + 247 + kfree(remote->pcpu_reader_locks); 248 + remote->pcpu_reader_locks = NULL; 271 249 272 250 trace_remote_try_unload(remote); 273 251 } ··· 286 258 msecs_to_jiffies(iter->remote->poll_ms)); 287 259 } 288 260 289 - static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu) 261 + static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu) 262 + { 263 + if (cpu != RING_BUFFER_ALL_CPUS) { 264 + ring_buffer_read_finish(iter->rb_iter); 265 + return; 266 + } 267 + 268 + for_each_possible_cpu(cpu) { 269 + if (iter->rb_iters[cpu]) 270 + ring_buffer_read_finish(iter->rb_iters[cpu]); 271 + } 272 + 273 + kfree(iter->rb_iters); 274 + } 275 + 276 + static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu) 277 + { 278 + if (cpu != RING_BUFFER_ALL_CPUS) { 279 + iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL); 280 + 281 + return iter->rb_iter ? 0 : -ENOMEM; 282 + } 283 + 284 + iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL); 285 + if (!iter->rb_iters) 286 + return -ENOMEM; 287 + 288 + for_each_possible_cpu(cpu) { 289 + iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu, 290 + GFP_KERNEL); 291 + if (!iter->rb_iters[cpu]) { 292 + __free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS); 293 + return -ENOMEM; 294 + } 295 + } 296 + 297 + return 0; 298 + } 299 + 300 + static struct trace_remote_iterator 301 + *trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type) 290 302 { 291 303 struct trace_remote_iterator *iter = NULL; 292 304 int ret; 293 305 294 306 lockdep_assert_held(&remote->lock); 295 307 308 + if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote)) 309 + return NULL; 296 310 297 311 ret = trace_remote_get(remote, cpu); 298 312 if (ret) ··· 349 279 if (iter) { 350 280 iter->remote = remote; 351 281 iter->cpu = cpu; 282 + iter->type = type; 352 283 trace_seq_init(&iter->seq); 353 - INIT_DELAYED_WORK(&iter->poll_work, __poll_remote); 354 - schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms)); 284 + 285 + switch (type) { 286 + case TRI_CONSUMING: 287 + INIT_DELAYED_WORK(&iter->poll_work, __poll_remote); 288 + schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms)); 289 + break; 290 + case TRI_NONCONSUMING: 291 + ret = __alloc_ring_buffer_iter(iter, cpu); 292 + break; 293 + } 294 + 295 + if (ret) 296 + goto err; 355 297 356 298 return iter; 357 299 } ··· 387 305 388 306 lockdep_assert_held(&remote->lock); 389 307 308 + switch (iter->type) { 309 + case TRI_CONSUMING: 310 + cancel_delayed_work_sync(&iter->poll_work); 311 + break; 312 + case TRI_NONCONSUMING: 313 + __free_ring_buffer_iter(iter, iter->cpu); 314 + break; 315 + } 316 + 390 317 kfree(iter); 391 318 trace_remote_put(remote); 319 + } 320 + 321 + static void trace_remote_iter_read_start(struct trace_remote_iterator *iter) 322 + { 323 + struct trace_remote *remote = iter->remote; 324 + int cpu = iter->cpu; 325 + 326 + /* Acquire global reader lock */ 327 + if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING) 328 + down_write(&remote->reader_lock); 329 + else 330 + down_read(&remote->reader_lock); 331 + 332 + if (cpu == RING_BUFFER_ALL_CPUS) 333 + return; 334 + 335 + /* 336 + * No need for the remote lock here, iter holds a reference on 337 + * remote->nr_readers 338 + */ 339 + 340 + /* Get the per-CPU one */ 341 + if (WARN_ON_ONCE(!remote->pcpu_reader_locks)) 342 + return; 343 + 344 + if (iter->type == TRI_CONSUMING) 345 + down_write(&remote->pcpu_reader_locks[cpu]); 346 + else 347 + down_read(&remote->pcpu_reader_locks[cpu]); 348 + } 349 + 350 + static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter) 351 + { 352 + struct trace_remote *remote = iter->remote; 353 + int cpu = iter->cpu; 354 + 355 + /* Release per-CPU reader lock */ 356 + if (cpu != RING_BUFFER_ALL_CPUS) { 357 + /* 358 + * No need for the remote lock here, iter holds a reference on 359 + * remote->nr_readers 360 + */ 361 + if (iter->type == TRI_CONSUMING) 362 + up_write(&remote->pcpu_reader_locks[cpu]); 363 + else 364 + up_read(&remote->pcpu_reader_locks[cpu]); 365 + } 366 + 367 + /* Release global reader lock */ 368 + if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING) 369 + up_write(&remote->reader_lock); 370 + else 371 + up_read(&remote->reader_lock); 372 + } 373 + 374 + static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu) 375 + { 376 + return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu]; 377 + } 378 + 379 + static struct ring_buffer_event * 380 + __peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events) 381 + { 382 + struct ring_buffer_event *rb_evt; 383 + struct ring_buffer_iter *rb_iter; 384 + 385 + switch (iter->type) { 386 + case TRI_CONSUMING: 387 + return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events); 388 + case TRI_NONCONSUMING: 389 + rb_iter = __get_rb_iter(iter, cpu); 390 + rb_evt = ring_buffer_iter_peek(rb_iter, ts); 391 + if (!rb_evt) 392 + return NULL; 393 + 394 + *lost_events = ring_buffer_iter_dropped(rb_iter); 395 + 396 + return rb_evt; 397 + } 398 + 399 + return NULL; 392 400 } 393 401 394 402 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter) ··· 490 318 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 491 319 return false; 492 320 493 - if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events)) 321 + if (!__peek_event(iter, cpu, &iter->ts, &iter->lost_events)) 494 322 return false; 495 323 496 324 iter->evt_cpu = cpu; ··· 505 333 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 506 334 continue; 507 335 508 - if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events)) 336 + if (!__peek_event(iter, cpu, &ts, &lost_events)) 509 337 continue; 510 338 511 339 if (ts >= iter->ts) ··· 517 345 } 518 346 519 347 return iter->ts != U64_MAX; 348 + } 349 + 350 + static void trace_remote_iter_move(struct trace_remote_iterator *iter) 351 + { 352 + struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 353 + 354 + switch (iter->type) { 355 + case TRI_CONSUMING: 356 + ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL); 357 + break; 358 + case TRI_NONCONSUMING: 359 + ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu)); 360 + break; 361 + } 520 362 } 521 363 522 364 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter) ··· 555 369 { 556 370 struct trace_remote *remote = inode->i_private; 557 371 struct trace_remote_iterator *iter; 558 - int cpu = RING_BUFFER_ALL_CPUS; 559 - 560 - if (inode->i_cdev) 561 - cpu = (long)inode->i_cdev - 1; 372 + int cpu = tracing_get_cpu(inode); 562 373 563 374 guard(mutex)(&remote->lock); 564 - iter = trace_remote_iter(remote, cpu); 375 + 376 + iter = trace_remote_iter(remote, cpu, TRI_CONSUMING); 377 + if (IS_ERR(iter)) 378 + return PTR_ERR(iter); 379 + 565 380 filp->private_data = iter; 566 381 567 382 return IS_ERR(iter) ? PTR_ERR(iter) : 0; ··· 597 410 if (ret < 0) 598 411 return ret; 599 412 413 + trace_remote_iter_read_start(iter); 414 + 600 415 while (trace_remote_iter_read_event(iter)) { 601 416 int prev_len = iter->seq.seq.len; 602 417 ··· 607 418 break; 608 419 } 609 420 610 - ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL); 421 + trace_remote_iter_move(iter); 611 422 } 423 + 424 + trace_remote_iter_read_finished(iter); 612 425 613 426 goto copy_to_user; 614 427 } ··· 621 430 .release = trace_pipe_release, 622 431 }; 623 432 433 + static void *trace_next(struct seq_file *m, void *v, loff_t *pos) 434 + { 435 + struct trace_remote_iterator *iter = m->private; 436 + 437 + ++*pos; 438 + 439 + if (!iter || !trace_remote_iter_read_event(iter)) 440 + return NULL; 441 + 442 + trace_remote_iter_move(iter); 443 + iter->pos++; 444 + 445 + return iter; 446 + } 447 + 448 + static void *trace_start(struct seq_file *m, loff_t *pos) 449 + { 450 + struct trace_remote_iterator *iter = m->private; 451 + loff_t i; 452 + 453 + if (!iter) 454 + return NULL; 455 + 456 + trace_remote_iter_read_start(iter); 457 + 458 + if (!*pos) { 459 + iter->pos = -1; 460 + return trace_next(m, NULL, &i); 461 + } 462 + 463 + i = iter->pos; 464 + while (i < *pos) { 465 + iter = trace_next(m, NULL, &i); 466 + if (!iter) 467 + return NULL; 468 + } 469 + 470 + return iter; 471 + } 472 + 473 + static int trace_show(struct seq_file *m, void *v) 474 + { 475 + struct trace_remote_iterator *iter = v; 476 + 477 + trace_seq_init(&iter->seq); 478 + 479 + if (trace_remote_iter_print_event(iter)) { 480 + seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id); 481 + return 0; 482 + } 483 + 484 + return trace_print_seq(m, &iter->seq); 485 + } 486 + 487 + static void trace_stop(struct seq_file *m, void *v) 488 + { 489 + struct trace_remote_iterator *iter = m->private; 490 + 491 + if (iter) 492 + trace_remote_iter_read_finished(iter); 493 + } 494 + 495 + static const struct seq_operations trace_sops = { 496 + .start = trace_start, 497 + .next = trace_next, 498 + .show = trace_show, 499 + .stop = trace_stop, 500 + }; 501 + 502 + static int trace_open(struct inode *inode, struct file *filp) 503 + { 504 + struct trace_remote *remote = inode->i_private; 505 + struct trace_remote_iterator *iter = NULL; 506 + int cpu = tracing_get_cpu(inode); 507 + int ret; 508 + 509 + if (!(filp->f_mode & FMODE_READ)) 510 + return 0; 511 + 512 + guard(mutex)(&remote->lock); 513 + 514 + iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING); 515 + if (IS_ERR(iter)) 516 + return PTR_ERR(iter); 517 + 518 + ret = seq_open(filp, &trace_sops); 519 + if (ret) { 520 + trace_remote_iter_free(iter); 521 + return ret; 522 + } 523 + 524 + ((struct seq_file *)filp->private_data)->private = (void *)iter; 525 + 526 + return 0; 527 + } 528 + 529 + static int trace_release(struct inode *inode, struct file *filp) 530 + { 531 + struct trace_remote_iterator *iter; 532 + 533 + if (!(filp->f_mode & FMODE_READ)) 534 + return 0; 535 + 536 + iter = ((struct seq_file *)filp->private_data)->private; 537 + seq_release(inode, filp); 538 + 539 + if (!iter) 540 + return 0; 541 + 542 + guard(mutex)(&iter->remote->lock); 543 + 544 + trace_remote_iter_free(iter); 545 + 546 + return 0; 547 + } 548 + 624 549 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 625 550 { 626 551 struct inode *inode = file_inode(filp); 627 552 struct trace_remote *remote = inode->i_private; 628 - int cpu = RING_BUFFER_ALL_CPUS; 629 - 630 - if (inode->i_cdev) 631 - cpu = (long)inode->i_cdev - 1; 553 + int cpu = tracing_get_cpu(inode); 632 554 633 555 guard(mutex)(&remote->lock); 634 556 ··· 751 447 } 752 448 753 449 static const struct file_operations trace_fops = { 450 + .open = trace_open, 754 451 .write = trace_write, 452 + .read = seq_read, 453 + .read_iter = seq_read_iter, 454 + .release = trace_release, 755 455 }; 756 456 757 457 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote) ··· 873 565 remote->trace_buffer_size = 7 << 10; 874 566 remote->poll_ms = 100; 875 567 mutex_init(&remote->lock); 568 + init_rwsem(&remote->reader_lock); 876 569 877 570 if (trace_remote_init_tracefs(name, remote)) { 878 571 kfree(remote);