Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-urgent-2025-04-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc perf events fixes from Ingo Molnar:

- Fix __free_event() corner case splat

- Fix false-positive uprobes related lockdep splat on
CONFIG_PREEMPT_RT=y kernels

- Fix a complicated perf sigtrap race that may result in hangs

* tag 'perf-urgent-2025-04-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf: Fix hang while freeing sigtrap event
uprobes: Avoid false-positive lockdep splat on CONFIG_PREEMPT_RT=y in the ri_timer() uprobe timer callback, use raw_write_seqcount_*()
perf/core: Fix WARN_ON(!ctx) in __free_event() for partial init

+34 -52
-1
include/linux/perf_event.h
··· 823 823 struct irq_work pending_disable_irq; 824 824 struct callback_head pending_task; 825 825 unsigned int pending_work; 826 - struct rcuwait pending_work_wait; 827 826 828 827 atomic_t event_limit; 829 828
+21 -49
kernel/events/core.c
··· 5518 5518 5519 5519 static void perf_free_addr_filters(struct perf_event *event); 5520 5520 5521 - static void perf_pending_task_sync(struct perf_event *event) 5522 - { 5523 - struct callback_head *head = &event->pending_task; 5524 - 5525 - if (!event->pending_work) 5526 - return; 5527 - /* 5528 - * If the task is queued to the current task's queue, we 5529 - * obviously can't wait for it to complete. Simply cancel it. 5530 - */ 5531 - if (task_work_cancel(current, head)) { 5532 - event->pending_work = 0; 5533 - local_dec(&event->ctx->nr_no_switch_fast); 5534 - return; 5535 - } 5536 - 5537 - /* 5538 - * All accesses related to the event are within the same RCU section in 5539 - * perf_pending_task(). The RCU grace period before the event is freed 5540 - * will make sure all those accesses are complete by then. 5541 - */ 5542 - rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE); 5543 - } 5544 - 5545 5521 /* vs perf_event_alloc() error */ 5546 5522 static void __free_event(struct perf_event *event) 5547 5523 { ··· 5575 5599 { 5576 5600 irq_work_sync(&event->pending_irq); 5577 5601 irq_work_sync(&event->pending_disable_irq); 5578 - perf_pending_task_sync(event); 5579 5602 5580 5603 unaccount_event(event); 5581 5604 ··· 5667 5692 5668 5693 static void put_event(struct perf_event *event) 5669 5694 { 5695 + struct perf_event *parent; 5696 + 5670 5697 if (!atomic_long_dec_and_test(&event->refcount)) 5671 5698 return; 5672 5699 5700 + parent = event->parent; 5673 5701 _free_event(event); 5702 + 5703 + /* Matches the refcount bump in inherit_event() */ 5704 + if (parent) 5705 + put_event(parent); 5674 5706 } 5675 5707 5676 5708 /* ··· 5761 5779 if (tmp == child) { 5762 5780 perf_remove_from_context(child, DETACH_GROUP); 5763 5781 list_move(&child->child_list, &free_list); 5764 - /* 5765 - * This matches the refcount bump in inherit_event(); 5766 - * this can't be the last reference. 5767 - */ 5768 - put_event(event); 5769 5782 } else { 5770 5783 var = &ctx->refcount; 5771 5784 } ··· 5786 5809 void *var = &child->ctx->refcount; 5787 5810 5788 5811 list_del(&child->child_list); 5789 - free_event(child); 5812 + /* Last reference unless ->pending_task work is pending */ 5813 + put_event(child); 5790 5814 5791 5815 /* 5792 5816 * Wake any perf_event_free_task() waiting for this event to be ··· 5798 5820 } 5799 5821 5800 5822 no_ctx: 5801 - put_event(event); /* Must be the 'last' reference */ 5823 + /* 5824 + * Last reference unless ->pending_task work is pending on this event 5825 + * or any of its children. 5826 + */ 5827 + put_event(event); 5802 5828 return 0; 5803 5829 } 5804 5830 EXPORT_SYMBOL_GPL(perf_event_release_kernel); ··· 7218 7236 int rctx; 7219 7237 7220 7238 /* 7221 - * All accesses to the event must belong to the same implicit RCU read-side 7222 - * critical section as the ->pending_work reset. See comment in 7223 - * perf_pending_task_sync(). 7224 - */ 7225 - rcu_read_lock(); 7226 - /* 7227 7239 * If we 'fail' here, that's OK, it means recursion is already disabled 7228 7240 * and we won't recurse 'further'. 7229 7241 */ ··· 7227 7251 event->pending_work = 0; 7228 7252 perf_sigtrap(event); 7229 7253 local_dec(&event->ctx->nr_no_switch_fast); 7230 - rcuwait_wake_up(&event->pending_work_wait); 7231 7254 } 7232 - rcu_read_unlock(); 7255 + put_event(event); 7233 7256 7234 7257 if (rctx >= 0) 7235 7258 perf_swevent_put_recursion_context(rctx); ··· 10223 10248 !task_work_add(current, &event->pending_task, notify_mode)) { 10224 10249 event->pending_work = pending_id; 10225 10250 local_inc(&event->ctx->nr_no_switch_fast); 10251 + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); 10226 10252 10227 10253 event->pending_addr = 0; 10228 10254 if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) ··· 12586 12610 init_irq_work(&event->pending_irq, perf_pending_irq); 12587 12611 event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable); 12588 12612 init_task_work(&event->pending_task, perf_pending_task); 12589 - rcuwait_init(&event->pending_work_wait); 12590 12613 12591 12614 mutex_init(&event->mmap_mutex); 12592 12615 raw_spin_lock_init(&event->addr_filters.lock); ··· 13722 13747 * Kick perf_poll() for is_event_hup(); 13723 13748 */ 13724 13749 perf_event_wakeup(parent_event); 13725 - free_event(event); 13726 - put_event(parent_event); 13750 + put_event(event); 13727 13751 return; 13728 13752 } 13729 13753 ··· 13846 13872 list_del_init(&event->child_list); 13847 13873 mutex_unlock(&parent->child_mutex); 13848 13874 13849 - put_event(parent); 13850 - 13851 13875 raw_spin_lock_irq(&ctx->lock); 13852 13876 perf_group_detach(event); 13853 13877 list_del_event(event, ctx); 13854 13878 raw_spin_unlock_irq(&ctx->lock); 13855 - free_event(event); 13879 + put_event(event); 13856 13880 } 13857 13881 13858 13882 /* ··· 13988 14016 if (IS_ERR(child_event)) 13989 14017 return child_event; 13990 14018 14019 + get_ctx(child_ctx); 14020 + child_event->ctx = child_ctx; 14021 + 13991 14022 pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); 13992 14023 if (IS_ERR(pmu_ctx)) { 13993 14024 free_event(child_event); ··· 14012 14037 return NULL; 14013 14038 } 14014 14039 14015 - get_ctx(child_ctx); 14016 - 14017 14040 /* 14018 14041 * Make the child state follow the state of the parent event, 14019 14042 * not its attr.disabled bit. We hold the parent's mutex, ··· 14032 14059 local64_set(&hwc->period_left, sample_period); 14033 14060 } 14034 14061 14035 - child_event->ctx = child_ctx; 14036 14062 child_event->overflow_handler = parent_event->overflow_handler; 14037 14063 child_event->overflow_handler_context 14038 14064 = parent_event->overflow_handler_context;
+13 -2
kernel/events/uprobes.c
··· 1956 1956 * to-be-reused return instances for future uretprobes. If ri_timer() 1957 1957 * happens to be running right now, though, we fallback to safety and 1958 1958 * just perform RCU-delated freeing of ri. 1959 + * Admittedly, this is a rather simple use of seqcount, but it nicely 1960 + * abstracts away all the necessary memory barriers, so we use 1961 + * a well-supported kernel primitive here. 1959 1962 */ 1960 1963 if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) { 1961 1964 /* immediate reuse of ri without RCU GP is OK */ ··· 2019 2016 /* RCU protects return_instance from freeing. */ 2020 2017 guard(rcu)(); 2021 2018 2022 - write_seqcount_begin(&utask->ri_seqcount); 2019 + /* 2020 + * See free_ret_instance() for notes on seqcount use. 2021 + * We also employ raw API variants to avoid lockdep false-positive 2022 + * warning complaining about enabled preemption. The timer can only be 2023 + * invoked once for a uprobe_task. Therefore there can only be one 2024 + * writer. The reader does not require an even sequence count to make 2025 + * progress, so it is OK to remain preemptible on PREEMPT_RT. 2026 + */ 2027 + raw_write_seqcount_begin(&utask->ri_seqcount); 2023 2028 2024 2029 for_each_ret_instance_rcu(ri, utask->return_instances) 2025 2030 hprobe_expire(&ri->hprobe, false); 2026 2031 2027 - write_seqcount_end(&utask->ri_seqcount); 2032 + raw_write_seqcount_end(&utask->ri_seqcount); 2028 2033 } 2029 2034 2030 2035 static struct uprobe_task *alloc_utask(void)