Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf: Fix __perf_event_overflow() vs perf_remove_from_context() race

Make sure that __perf_event_overflow() runs with IRQs disabled for all
possible callchains. Specifically the software events can end up running
it with only preemption disabled.

This opens up a race vs perf_event_exit_event() and friends that will go
and free various things the overflow path expects to be present, like
the BPF program.

Fixes: 592903cdcbf6 ("perf_counter: add an event_list")
Reported-by: Simond Hu <cmdhh1767@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Simond Hu <cmdhh1767@gmail.com>
Link: https://patch.msgid.link/20260224122909.GV1395416@noisy.programming.kicks-ass.net

+41 -1
+41 -1
kernel/events/core.c
··· 10777 10777 struct perf_sample_data *data, 10778 10778 struct pt_regs *regs) 10779 10779 { 10780 + /* 10781 + * Entry point from hardware PMI, interrupts should be disabled here. 10782 + * This serializes us against perf_event_remove_from_context() in 10783 + * things like perf_event_release_kernel(). 10784 + */ 10785 + lockdep_assert_irqs_disabled(); 10786 + 10780 10787 return __perf_event_overflow(event, 1, data, regs); 10781 10788 } 10782 10789 ··· 10860 10853 { 10861 10854 struct hw_perf_event *hwc = &event->hw; 10862 10855 10856 + /* 10857 + * This is: 10858 + * - software preempt 10859 + * - tracepoint preempt 10860 + * - tp_target_task irq (ctx->lock) 10861 + * - uprobes preempt/irq 10862 + * - kprobes preempt/irq 10863 + * - hw_breakpoint irq 10864 + * 10865 + * Any of these are sufficient to hold off RCU and thus ensure @event 10866 + * exists. 10867 + */ 10868 + lockdep_assert_preemption_disabled(); 10863 10869 local64_add(nr, &event->count); 10864 10870 10865 10871 if (!regs) 10866 10872 return; 10867 10873 10868 10874 if (!is_sampling_event(event)) 10875 + return; 10876 + 10877 + /* 10878 + * Serialize against event_function_call() IPIs like normal overflow 10879 + * event handling. Specifically, must not allow 10880 + * perf_event_release_kernel() -> perf_remove_from_context() to make 10881 + * progress and 'release' the event from under us. 10882 + */ 10883 + guard(irqsave)(); 10884 + if (event->state != PERF_EVENT_STATE_ACTIVE) 10869 10885 return; 10870 10886 10871 10887 if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) { ··· 11389 11359 struct perf_sample_data data; 11390 11360 struct perf_event *event; 11391 11361 11362 + /* 11363 + * Per being a tracepoint, this runs with preemption disabled. 11364 + */ 11365 + lockdep_assert_preemption_disabled(); 11366 + 11392 11367 struct perf_raw_record raw = { 11393 11368 .frag = { 11394 11369 .size = entry_size, ··· 11725 11690 { 11726 11691 struct perf_sample_data sample; 11727 11692 struct pt_regs *regs = data; 11693 + 11694 + /* 11695 + * Exception context, will have interrupts disabled. 11696 + */ 11697 + lockdep_assert_irqs_disabled(); 11728 11698 11729 11699 perf_sample_data_init(&sample, bp->attr.bp_addr, 0); 11730 11700 ··· 12195 12155 12196 12156 if (regs && !perf_exclude_event(event, regs)) { 12197 12157 if (!(event->attr.exclude_idle && is_idle_task(current))) 12198 - if (__perf_event_overflow(event, 1, &data, regs)) 12158 + if (perf_event_overflow(event, &data, regs)) 12199 12159 ret = HRTIMER_NORESTART; 12200 12160 } 12201 12161