Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events fixes from Ingo Molnar:

- Fix deadlock in the perf_mmap() failure path (Peter Zijlstra)

- Intel ACR (Auto Counter Reload) fixes (Dapeng Mi):
- Fix validation and configuration of ACR masks
- Fix ACR rescheduling bug causing stale masks
- Disable the PMI on ACR-enabled hardware
- Enable ACR on Panther Cover uarch too

* tag 'perf-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel: Enable auto counter reload for DMR
perf/x86/intel: Disable PMI for self-reloaded ACR events
perf/x86/intel: Always reprogram ACR events to prevent stale masks
perf/x86/intel: Improve validation and configuration of ACR masks
perf/core: Fix deadlock in perf_mmap() failure path

+115 -31
+8 -5
arch/x86/events/core.c
··· 1294 1294 return event->hw.event_base_rdpmc; 1295 1295 } 1296 1296 1297 - static inline int match_prev_assignment(struct hw_perf_event *hwc, 1297 + static inline int match_prev_assignment(struct perf_event *event, 1298 1298 struct cpu_hw_events *cpuc, 1299 1299 int i) 1300 1300 { 1301 + struct hw_perf_event *hwc = &event->hw; 1302 + 1301 1303 return hwc->idx == cpuc->assign[i] && 1302 - hwc->last_cpu == smp_processor_id() && 1303 - hwc->last_tag == cpuc->tags[i]; 1304 + hwc->last_cpu == smp_processor_id() && 1305 + hwc->last_tag == cpuc->tags[i] && 1306 + !is_acr_event_group(event); 1304 1307 } 1305 1308 1306 1309 static void x86_pmu_start(struct perf_event *event, int flags); ··· 1349 1346 * - no other event has used the counter since 1350 1347 */ 1351 1348 if (hwc->idx == -1 || 1352 - match_prev_assignment(hwc, cpuc, i)) 1349 + match_prev_assignment(event, cpuc, i)) 1353 1350 continue; 1354 1351 1355 1352 /* ··· 1370 1367 event = cpuc->event_list[i]; 1371 1368 hwc = &event->hw; 1372 1369 1373 - if (!match_prev_assignment(hwc, cpuc, i)) 1370 + if (!match_prev_assignment(event, cpuc, i)) 1374 1371 x86_assign_hw_event(event, cpuc, i); 1375 1372 else if (i < n_running) 1376 1373 continue;
+39 -11
arch/x86/events/intel/core.c
··· 3118 3118 intel_set_masks(event, idx); 3119 3119 3120 3120 /* 3121 - * Enable IRQ generation (0x8), if not PEBS, 3122 - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) 3123 - * if requested: 3121 + * Enable IRQ generation (0x8), if not PEBS or self-reloaded 3122 + * ACR event, and enable ring-3 counting (0x2) and ring-0 3123 + * counting (0x1) if requested: 3124 3124 */ 3125 - if (!event->attr.precise_ip) 3125 + if (!event->attr.precise_ip && !is_acr_self_reload_event(event)) 3126 3126 bits |= INTEL_FIXED_0_ENABLE_PMI; 3127 3127 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) 3128 3128 bits |= INTEL_FIXED_0_USER; ··· 3306 3306 intel_set_masks(event, idx); 3307 3307 static_call_cond(intel_pmu_enable_acr_event)(event); 3308 3308 static_call_cond(intel_pmu_enable_event_ext)(event); 3309 + /* 3310 + * For self-reloaded ACR event, don't enable PMI since 3311 + * HW won't set overflow bit in GLOBAL_STATUS. Otherwise, 3312 + * the PMI would be recognized as a suspicious NMI. 3313 + */ 3314 + if (is_acr_self_reload_event(event)) 3315 + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 3316 + else if (!event->attr.precise_ip) 3317 + hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 3309 3318 __x86_pmu_enable_event(hwc, enable_mask); 3310 3319 break; 3311 3320 case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: ··· 3341 3332 static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) 3342 3333 { 3343 3334 struct perf_event *event, *leader; 3344 - int i, j, idx; 3335 + int i, j, k, bit, idx; 3345 3336 3337 + /* 3338 + * FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only). 3339 + * Disabling an ACR event causes bit-shifting errors in the acr_mask of 3340 + * remaining group members. As ACR sampling requires all events to be active, 3341 + * this limitation is acceptable for now. Revisit if independent event toggling 3342 + * is required. 3343 + */ 3346 3344 for (i = 0; i < cpuc->n_events; i++) { 3347 3345 leader = cpuc->event_list[i]; 3348 3346 if (!is_acr_event_group(leader)) 3349 3347 continue; 3350 3348 3351 - /* The ACR events must be contiguous. */ 3349 + /* Find the last event of the ACR group. */ 3352 3350 for (j = i; j < cpuc->n_events; j++) { 3353 3351 event = cpuc->event_list[j]; 3354 3352 if (event->group_leader != leader->group_leader) 3355 3353 break; 3356 - for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { 3357 - if (i + idx >= cpuc->n_events || 3358 - !is_acr_event_group(cpuc->event_list[i + idx])) 3359 - return; 3360 - __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1); 3354 + } 3355 + 3356 + /* 3357 + * Translate the user-space ACR mask (attr.config2) into the physical 3358 + * counter bitmask (hw.config1) for each ACR event in the group. 3359 + * NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config(). 3360 + */ 3361 + for (k = i; k < j; k++) { 3362 + event = cpuc->event_list[k]; 3363 + event->hw.config1 = 0; 3364 + for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { 3365 + idx = i + bit; 3366 + /* Event index of ACR group must locate in [i, j). */ 3367 + if (idx >= j || !is_acr_event_group(cpuc->event_list[idx])) 3368 + continue; 3369 + __set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1); 3361 3370 } 3362 3371 } 3363 3372 i = j - 1; ··· 7531 7504 hybrid(pmu, event_constraints) = intel_pnc_event_constraints; 7532 7505 hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints; 7533 7506 hybrid(pmu, extra_regs) = intel_pnc_extra_regs; 7507 + static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); 7534 7508 } 7535 7509 7536 7510 static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
+10
arch/x86/events/perf_event.h
··· 137 137 return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR); 138 138 } 139 139 140 + static inline bool is_acr_self_reload_event(struct perf_event *event) 141 + { 142 + struct hw_perf_event *hwc = &event->hw; 143 + 144 + if (hwc->idx < 0) 145 + return false; 146 + 147 + return test_bit(hwc->idx, (unsigned long *)&hwc->config1); 148 + } 149 + 140 150 struct amd_nb { 141 151 int nb_id; /* NorthBridge id */ 142 152 int refcnt; /* reference count */
+55 -15
kernel/events/core.c
··· 7006 7006 } 7007 7007 7008 7008 static void perf_pmu_output_stop(struct perf_event *event); 7009 + static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb); 7009 7010 7010 7011 /* 7011 7012 * A buffer can be mmap()ed multiple times; either directly through the same ··· 7022 7021 mapped_f unmapped = get_mapped(event, event_unmapped); 7023 7022 struct perf_buffer *rb = ring_buffer_get(event); 7024 7023 struct user_struct *mmap_user = rb->mmap_user; 7025 - int mmap_locked = rb->mmap_locked; 7026 - unsigned long size = perf_data_size(rb); 7027 7024 bool detach_rest = false; 7028 7025 7029 7026 /* FIXIES vs perf_pmu_unregister() */ ··· 7116 7117 * Aside from that, this buffer is 'fully' detached and unmapped, 7117 7118 * undo the VM accounting. 7118 7119 */ 7119 - 7120 - atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked, 7121 - &mmap_user->locked_vm); 7122 - atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm); 7123 - free_uid(mmap_user); 7120 + perf_mmap_unaccount(vma, rb); 7124 7121 7125 7122 out_put: 7126 7123 ring_buffer_put(rb); /* could be last */ ··· 7256 7261 atomic64_add(extra, &vma->vm_mm->pinned_vm); 7257 7262 } 7258 7263 7264 + static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb) 7265 + { 7266 + struct user_struct *user = rb->mmap_user; 7267 + 7268 + atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked, 7269 + &user->locked_vm); 7270 + atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm); 7271 + } 7272 + 7259 7273 static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, 7260 7274 unsigned long nr_pages) 7261 7275 { ··· 7327 7323 if (!rb) 7328 7324 return -ENOMEM; 7329 7325 7330 - refcount_set(&rb->mmap_count, 1); 7331 - rb->mmap_user = get_current_user(); 7332 7326 rb->mmap_locked = extra; 7333 7327 7334 7328 ring_buffer_attach(event, rb); ··· 7476 7474 mapped(event, vma->vm_mm); 7477 7475 7478 7476 /* 7479 - * Try to map it into the page table. On fail, invoke 7480 - * perf_mmap_close() to undo the above, as the callsite expects 7481 - * full cleanup in this case and therefore does not invoke 7482 - * vmops::close(). 7477 + * Try to map it into the page table. On fail undo the above, 7478 + * as the callsite expects full cleanup in this case and 7479 + * therefore does not invoke vmops::close(). 7483 7480 */ 7484 7481 ret = map_range(event->rb, vma); 7485 - if (ret) 7486 - perf_mmap_close(vma); 7482 + if (likely(!ret)) 7483 + return 0; 7484 + 7485 + /* Error path */ 7486 + 7487 + /* 7488 + * If this is the first mmap(), then event->mmap_count should 7489 + * be stable at 1. It is only modified by: 7490 + * perf_mmap_{open,close}() and perf_mmap(). 7491 + * 7492 + * The former are not possible because this mmap() hasn't been 7493 + * successful yet, and the latter is serialized by 7494 + * event->mmap_mutex which we still hold (note that mmap_lock 7495 + * is not strictly sufficient here, because the event fd can 7496 + * be passed to another process through trivial means like 7497 + * fork(), leading to concurrent mmap() from different mm). 7498 + * 7499 + * Make sure to remove event->rb before releasing 7500 + * event->mmap_mutex, such that any concurrent mmap() will not 7501 + * attempt use this failed buffer. 7502 + */ 7503 + if (refcount_read(&event->mmap_count) == 1) { 7504 + /* 7505 + * Minimal perf_mmap_close(); there can't be AUX or 7506 + * other events on account of this being the first. 7507 + */ 7508 + mapped = get_mapped(event, event_unmapped); 7509 + if (mapped) 7510 + mapped(event, vma->vm_mm); 7511 + perf_mmap_unaccount(vma, event->rb); 7512 + ring_buffer_attach(event, NULL); /* drops last rb->refcount */ 7513 + refcount_set(&event->mmap_count, 0); 7514 + return ret; 7515 + } 7516 + 7517 + /* 7518 + * Otherwise this is an already existing buffer, and there is 7519 + * no race vs first exposure, so fall-through and call 7520 + * perf_mmap_close(). 7521 + */ 7487 7522 } 7488 7523 7524 + perf_mmap_close(vma); 7489 7525 return ret; 7490 7526 } 7491 7527
+1
kernel/events/internal.h
··· 67 67 struct perf_buffer *rb; 68 68 69 69 rb = container_of(rcu_head, struct perf_buffer, rcu_head); 70 + free_uid(rb->mmap_user); 70 71 rb_free(rb); 71 72 } 72 73
+2
kernel/events/ring_buffer.c
··· 340 340 rb->paused = 1; 341 341 342 342 mutex_init(&rb->aux_mutex); 343 + rb->mmap_user = get_current_user(); 344 + refcount_set(&rb->mmap_count, 1); 343 345 } 344 346 345 347 void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)