Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ring-buffer: Introduce ring-buffer remotes

Add ring-buffer remotes to support entities outside of the kernel (such
as firmware or a hypervisor) that writes events into a ring-buffer using
the tracefs format

Require a description of the ring-buffer pages (struct
trace_buffer_desc) and callbacks (swap_reader_page and reset) to set up
the ring-buffer on the kernel side.

Expect the remote entity to maintain and update the meta-page.

Link: https://patch.msgid.link/20260309162516.2623589-4-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Vincent Donnefort and committed by
Steven Rostedt (Google)
2e67fabd e682207b

+283 -8
+58
include/linux/ring_buffer.h
··· 251 251 void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu); 252 252 int ring_buffer_unmap(struct trace_buffer *buffer, int cpu); 253 253 int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu); 254 + 255 + struct ring_buffer_desc { 256 + int cpu; 257 + unsigned int nr_page_va; /* excludes the meta page */ 258 + unsigned long meta_va; 259 + unsigned long page_va[] __counted_by(nr_page_va); 260 + }; 261 + 262 + struct trace_buffer_desc { 263 + int nr_cpus; 264 + size_t struct_len; 265 + char __data[]; /* list of ring_buffer_desc */ 266 + }; 267 + 268 + static inline struct ring_buffer_desc *__next_ring_buffer_desc(struct ring_buffer_desc *desc) 269 + { 270 + size_t len = struct_size(desc, page_va, desc->nr_page_va); 271 + 272 + return (struct ring_buffer_desc *)((void *)desc + len); 273 + } 274 + 275 + static inline struct ring_buffer_desc *__first_ring_buffer_desc(struct trace_buffer_desc *desc) 276 + { 277 + return (struct ring_buffer_desc *)(&desc->__data[0]); 278 + } 279 + 280 + static inline size_t trace_buffer_desc_size(size_t buffer_size, unsigned int nr_cpus) 281 + { 282 + unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1; 283 + struct ring_buffer_desc *rbdesc; 284 + 285 + return size_add(offsetof(struct trace_buffer_desc, __data), 286 + size_mul(nr_cpus, struct_size(rbdesc, page_va, nr_pages))); 287 + } 288 + 289 + #define for_each_ring_buffer_desc(__pdesc, __cpu, __trace_pdesc) \ 290 + for (__pdesc = __first_ring_buffer_desc(__trace_pdesc), __cpu = 0; \ 291 + (__cpu) < (__trace_pdesc)->nr_cpus; \ 292 + (__cpu)++, __pdesc = __next_ring_buffer_desc(__pdesc)) 293 + 294 + struct ring_buffer_remote { 295 + struct trace_buffer_desc *desc; 296 + int (*swap_reader_page)(unsigned int cpu, void *priv); 297 + int (*reset)(unsigned int cpu, void *priv); 298 + void *priv; 299 + }; 300 + 301 + int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu); 302 + 303 + struct trace_buffer * 304 + __ring_buffer_alloc_remote(struct ring_buffer_remote *remote, 305 + struct lock_class_key *key); 306 + 307 + #define ring_buffer_alloc_remote(remote) \ 308 + ({ \ 309 + static struct lock_class_key __key; \ 310 + __ring_buffer_alloc_remote(remote, &__key); \ 311 + }) 254 312 #endif /* _LINUX_RING_BUFFER_H */
+225 -8
kernel/trace/ring_buffer.c
··· 559 559 struct trace_buffer_meta *meta_page; 560 560 struct ring_buffer_cpu_meta *ring_meta; 561 561 562 + struct ring_buffer_remote *remote; 563 + 562 564 /* ring buffer pages to update, > 0 to add, < 0 to remove */ 563 565 long nr_pages_to_update; 564 566 struct list_head new_pages; /* new pages to add */ ··· 582 580 struct mutex mutex; 583 581 584 582 struct ring_buffer_per_cpu **buffers; 583 + 584 + struct ring_buffer_remote *remote; 585 585 586 586 struct hlist_node node; 587 587 u64 (*clock)(void); ··· 2242 2238 } 2243 2239 } 2244 2240 2241 + static struct ring_buffer_desc *ring_buffer_desc(struct trace_buffer_desc *trace_desc, int cpu) 2242 + { 2243 + struct ring_buffer_desc *desc, *end; 2244 + size_t len; 2245 + int i; 2246 + 2247 + if (!trace_desc) 2248 + return NULL; 2249 + 2250 + if (cpu >= trace_desc->nr_cpus) 2251 + return NULL; 2252 + 2253 + end = (struct ring_buffer_desc *)((void *)trace_desc + trace_desc->struct_len); 2254 + desc = __first_ring_buffer_desc(trace_desc); 2255 + len = struct_size(desc, page_va, desc->nr_page_va); 2256 + desc = (struct ring_buffer_desc *)((void *)desc + (len * cpu)); 2257 + 2258 + if (desc < end && desc->cpu == cpu) 2259 + return desc; 2260 + 2261 + /* Missing CPUs, need to linear search */ 2262 + for_each_ring_buffer_desc(desc, i, trace_desc) { 2263 + if (desc->cpu == cpu) 2264 + return desc; 2265 + } 2266 + 2267 + return NULL; 2268 + } 2269 + 2270 + static void *ring_buffer_desc_page(struct ring_buffer_desc *desc, int page_id) 2271 + { 2272 + return page_id > desc->nr_page_va ? NULL : (void *)desc->page_va[page_id]; 2273 + } 2274 + 2245 2275 static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 2246 2276 long nr_pages, struct list_head *pages) 2247 2277 { ··· 2283 2245 struct ring_buffer_cpu_meta *meta = NULL; 2284 2246 struct buffer_page *bpage, *tmp; 2285 2247 bool user_thread = current->mm != NULL; 2248 + struct ring_buffer_desc *desc = NULL; 2286 2249 long i; 2287 2250 2288 2251 /* ··· 2312 2273 if (buffer->range_addr_start) 2313 2274 meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu); 2314 2275 2276 + if (buffer->remote) { 2277 + desc = ring_buffer_desc(buffer->remote->desc, cpu_buffer->cpu); 2278 + if (!desc || WARN_ON(desc->nr_page_va != (nr_pages + 1))) 2279 + return -EINVAL; 2280 + } 2281 + 2315 2282 for (i = 0; i < nr_pages; i++) { 2316 2283 2317 2284 bpage = alloc_cpu_page(cpu_buffer->cpu); ··· 2342 2297 rb_meta_buffer_update(cpu_buffer, bpage); 2343 2298 bpage->range = 1; 2344 2299 bpage->id = i + 1; 2300 + } else if (desc) { 2301 + void *p = ring_buffer_desc_page(desc, i + 1); 2302 + 2303 + if (WARN_ON(!p)) 2304 + goto free_pages; 2305 + 2306 + bpage->page = p; 2307 + bpage->range = 1; /* bpage->page can't be freed */ 2308 + bpage->id = i + 1; 2309 + cpu_buffer->subbuf_ids[i + 1] = bpage; 2345 2310 } else { 2346 2311 int order = cpu_buffer->buffer->subbuf_order; 2347 2312 bpage->page = alloc_cpu_data(cpu_buffer->cpu, order); ··· 2449 2394 if (cpu_buffer->ring_meta->head_buffer) 2450 2395 rb_meta_buffer_update(cpu_buffer, bpage); 2451 2396 bpage->range = 1; 2397 + } else if (buffer->remote) { 2398 + struct ring_buffer_desc *desc = ring_buffer_desc(buffer->remote->desc, cpu); 2399 + 2400 + if (!desc) 2401 + goto fail_free_reader; 2402 + 2403 + cpu_buffer->remote = buffer->remote; 2404 + cpu_buffer->meta_page = (struct trace_buffer_meta *)(void *)desc->meta_va; 2405 + cpu_buffer->nr_pages = nr_pages; 2406 + cpu_buffer->subbuf_ids = kcalloc(cpu_buffer->nr_pages + 1, 2407 + sizeof(*cpu_buffer->subbuf_ids), GFP_KERNEL); 2408 + if (!cpu_buffer->subbuf_ids) 2409 + goto fail_free_reader; 2410 + 2411 + /* Remote buffers are read-only and immutable */ 2412 + atomic_inc(&cpu_buffer->record_disabled); 2413 + atomic_inc(&cpu_buffer->resize_disabled); 2414 + 2415 + bpage->page = ring_buffer_desc_page(desc, cpu_buffer->meta_page->reader.id); 2416 + if (!bpage->page) 2417 + goto fail_free_reader; 2418 + 2419 + bpage->range = 1; 2420 + cpu_buffer->subbuf_ids[0] = bpage; 2452 2421 } else { 2453 2422 int order = cpu_buffer->buffer->subbuf_order; 2454 2423 bpage->page = alloc_cpu_data(cpu, order); ··· 2532 2453 2533 2454 irq_work_sync(&cpu_buffer->irq_work.work); 2534 2455 2456 + if (cpu_buffer->remote) 2457 + kfree(cpu_buffer->subbuf_ids); 2458 + 2535 2459 free_buffer_page(cpu_buffer->reader_page); 2536 2460 2537 2461 if (head) { ··· 2557 2475 int order, unsigned long start, 2558 2476 unsigned long end, 2559 2477 unsigned long scratch_size, 2560 - struct lock_class_key *key) 2478 + struct lock_class_key *key, 2479 + struct ring_buffer_remote *remote) 2561 2480 { 2562 2481 struct trace_buffer *buffer __free(kfree) = NULL; 2563 2482 long nr_pages; ··· 2597 2514 GFP_KERNEL); 2598 2515 if (!buffer->buffers) 2599 2516 goto fail_free_cpumask; 2517 + 2518 + cpu = raw_smp_processor_id(); 2600 2519 2601 2520 /* If start/end are specified, then that overrides size */ 2602 2521 if (start && end) { ··· 2655 2570 buffer->range_addr_end = end; 2656 2571 2657 2572 rb_range_meta_init(buffer, nr_pages, scratch_size); 2573 + } else if (remote) { 2574 + struct ring_buffer_desc *desc = ring_buffer_desc(remote->desc, cpu); 2575 + 2576 + buffer->remote = remote; 2577 + /* The writer is remote. This ring-buffer is read-only */ 2578 + atomic_inc(&buffer->record_disabled); 2579 + nr_pages = desc->nr_page_va - 1; 2580 + if (nr_pages < 2) 2581 + goto fail_free_buffers; 2658 2582 } else { 2659 2583 2660 2584 /* need at least two pages */ ··· 2672 2578 nr_pages = 2; 2673 2579 } 2674 2580 2675 - cpu = raw_smp_processor_id(); 2676 2581 cpumask_set_cpu(cpu, buffer->cpumask); 2677 2582 buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu); 2678 2583 if (!buffer->buffers[cpu]) ··· 2713 2620 struct lock_class_key *key) 2714 2621 { 2715 2622 /* Default buffer page size - one system page */ 2716 - return alloc_buffer(size, flags, 0, 0, 0, 0, key); 2623 + return alloc_buffer(size, flags, 0, 0, 0, 0, key, NULL); 2717 2624 2718 2625 } 2719 2626 EXPORT_SYMBOL_GPL(__ring_buffer_alloc); ··· 2740 2647 struct lock_class_key *key) 2741 2648 { 2742 2649 return alloc_buffer(size, flags, order, start, start + range_size, 2743 - scratch_size, key); 2650 + scratch_size, key, NULL); 2651 + } 2652 + 2653 + /** 2654 + * __ring_buffer_alloc_remote - allocate a new ring_buffer from a remote 2655 + * @remote: Contains a description of the ring-buffer pages and remote callbacks. 2656 + * @key: ring buffer reader_lock_key. 2657 + */ 2658 + struct trace_buffer *__ring_buffer_alloc_remote(struct ring_buffer_remote *remote, 2659 + struct lock_class_key *key) 2660 + { 2661 + return alloc_buffer(0, 0, 0, 0, 0, 0, key, remote); 2744 2662 } 2745 2663 2746 2664 void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size) ··· 5378 5274 } 5379 5275 EXPORT_SYMBOL_GPL(ring_buffer_overruns); 5380 5276 5277 + static bool rb_read_remote_meta_page(struct ring_buffer_per_cpu *cpu_buffer) 5278 + { 5279 + local_set(&cpu_buffer->entries, READ_ONCE(cpu_buffer->meta_page->entries)); 5280 + local_set(&cpu_buffer->overrun, READ_ONCE(cpu_buffer->meta_page->overrun)); 5281 + local_set(&cpu_buffer->pages_touched, READ_ONCE(cpu_buffer->meta_page->pages_touched)); 5282 + local_set(&cpu_buffer->pages_lost, READ_ONCE(cpu_buffer->meta_page->pages_lost)); 5283 + 5284 + return rb_num_of_entries(cpu_buffer); 5285 + } 5286 + 5381 5287 static void rb_iter_reset(struct ring_buffer_iter *iter) 5382 5288 { 5383 5289 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; ··· 5542 5428 } 5543 5429 5544 5430 static struct buffer_page * 5545 - rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 5431 + __rb_get_reader_page_from_remote(struct ring_buffer_per_cpu *cpu_buffer) 5432 + { 5433 + struct buffer_page *new_reader, *prev_reader; 5434 + 5435 + if (!rb_read_remote_meta_page(cpu_buffer)) 5436 + return NULL; 5437 + 5438 + /* More to read on the reader page */ 5439 + if (cpu_buffer->reader_page->read < rb_page_size(cpu_buffer->reader_page)) { 5440 + if (!cpu_buffer->reader_page->read) 5441 + cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; 5442 + return cpu_buffer->reader_page; 5443 + } 5444 + 5445 + prev_reader = cpu_buffer->subbuf_ids[cpu_buffer->meta_page->reader.id]; 5446 + 5447 + WARN_ON_ONCE(cpu_buffer->remote->swap_reader_page(cpu_buffer->cpu, 5448 + cpu_buffer->remote->priv)); 5449 + /* nr_pages doesn't include the reader page */ 5450 + if (WARN_ON_ONCE(cpu_buffer->meta_page->reader.id > cpu_buffer->nr_pages)) 5451 + return NULL; 5452 + 5453 + new_reader = cpu_buffer->subbuf_ids[cpu_buffer->meta_page->reader.id]; 5454 + 5455 + WARN_ON_ONCE(prev_reader == new_reader); 5456 + 5457 + cpu_buffer->reader_page->page = new_reader->page; 5458 + cpu_buffer->reader_page->id = new_reader->id; 5459 + cpu_buffer->reader_page->read = 0; 5460 + cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; 5461 + cpu_buffer->lost_events = cpu_buffer->meta_page->reader.lost_events; 5462 + 5463 + return rb_page_size(cpu_buffer->reader_page) ? cpu_buffer->reader_page : NULL; 5464 + } 5465 + 5466 + static struct buffer_page * 5467 + __rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 5546 5468 { 5547 5469 struct buffer_page *reader = NULL; 5548 5470 unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size); ··· 5746 5596 5747 5597 5748 5598 return reader; 5599 + } 5600 + 5601 + static struct buffer_page * 5602 + rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 5603 + { 5604 + return cpu_buffer->remote ? __rb_get_reader_page_from_remote(cpu_buffer) : 5605 + __rb_get_reader_page(cpu_buffer); 5749 5606 } 5750 5607 5751 5608 static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) ··· 6155 5998 struct ring_buffer_per_cpu *cpu_buffer; 6156 5999 struct ring_buffer_iter *iter; 6157 6000 6158 - if (!cpumask_test_cpu(cpu, buffer->cpumask)) 6001 + if (!cpumask_test_cpu(cpu, buffer->cpumask) || buffer->remote) 6159 6002 return NULL; 6160 6003 6161 6004 iter = kzalloc_obj(*iter, flags); ··· 6322 6165 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 6323 6166 { 6324 6167 struct buffer_page *page; 6168 + 6169 + if (cpu_buffer->remote) { 6170 + if (!cpu_buffer->remote->reset) 6171 + return; 6172 + 6173 + cpu_buffer->remote->reset(cpu_buffer->cpu, cpu_buffer->remote->priv); 6174 + rb_read_remote_meta_page(cpu_buffer); 6175 + 6176 + /* Read related values, not covered by the meta-page */ 6177 + local_set(&cpu_buffer->pages_read, 0); 6178 + cpu_buffer->read = 0; 6179 + cpu_buffer->read_bytes = 0; 6180 + cpu_buffer->last_overrun = 0; 6181 + cpu_buffer->reader_page->read = 0; 6182 + 6183 + return; 6184 + } 6325 6185 6326 6186 rb_head_page_deactivate(cpu_buffer); 6327 6187 ··· 6569 6395 return ret; 6570 6396 } 6571 6397 EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 6398 + 6399 + int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu) 6400 + { 6401 + struct ring_buffer_per_cpu *cpu_buffer; 6402 + 6403 + if (cpu != RING_BUFFER_ALL_CPUS) { 6404 + if (!cpumask_test_cpu(cpu, buffer->cpumask)) 6405 + return -EINVAL; 6406 + 6407 + cpu_buffer = buffer->buffers[cpu]; 6408 + 6409 + guard(raw_spinlock)(&cpu_buffer->reader_lock); 6410 + if (rb_read_remote_meta_page(cpu_buffer)) 6411 + rb_wakeups(buffer, cpu_buffer); 6412 + 6413 + return 0; 6414 + } 6415 + 6416 + guard(cpus_read_lock)(); 6417 + 6418 + /* 6419 + * Make sure all the ring buffers are up to date before we start reading 6420 + * them. 6421 + */ 6422 + for_each_buffer_cpu(buffer, cpu) { 6423 + cpu_buffer = buffer->buffers[cpu]; 6424 + 6425 + guard(raw_spinlock)(&cpu_buffer->reader_lock); 6426 + rb_read_remote_meta_page(cpu_buffer); 6427 + } 6428 + 6429 + for_each_buffer_cpu(buffer, cpu) { 6430 + cpu_buffer = buffer->buffers[cpu]; 6431 + 6432 + if (rb_num_of_entries(cpu_buffer)) 6433 + rb_wakeups(buffer, cpu_buffer); 6434 + } 6435 + 6436 + return 0; 6437 + } 6572 6438 6573 6439 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 6574 6440 /** ··· 6848 6634 unsigned int commit; 6849 6635 unsigned int read; 6850 6636 u64 save_timestamp; 6637 + bool force_memcpy; 6851 6638 6852 6639 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 6853 6640 return -1; ··· 6886 6671 /* Check if any events were dropped */ 6887 6672 missed_events = cpu_buffer->lost_events; 6888 6673 6674 + force_memcpy = cpu_buffer->mapped || cpu_buffer->remote; 6675 + 6889 6676 /* 6890 6677 * If this page has been partially read or 6891 6678 * if len is not big enough to read the rest of the page or ··· 6897 6680 */ 6898 6681 if (read || (len < (commit - read)) || 6899 6682 cpu_buffer->reader_page == cpu_buffer->commit_page || 6900 - cpu_buffer->mapped) { 6683 + force_memcpy) { 6901 6684 struct buffer_data_page *rpage = cpu_buffer->reader_page->page; 6902 6685 unsigned int rpos = read; 6903 6686 unsigned int pos = 0; ··· 7476 7259 unsigned long flags; 7477 7260 int err; 7478 7261 7479 - if (!cpumask_test_cpu(cpu, buffer->cpumask)) 7262 + if (!cpumask_test_cpu(cpu, buffer->cpumask) || buffer->remote) 7480 7263 return -EINVAL; 7481 7264 7482 7265 cpu_buffer = buffer->buffers[cpu];