Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf_urgent_for_v6.4_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Borislav Petkov:

- Make sure the PEBS buffer is flushed before reprogramming the
hardware so that the correct record sizes are used

- Update the sample size for AMD BRS events

- Fix a confusion with using the same on-stack struct with different
events in the event processing path

* tag 'perf_urgent_for_v6.4_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel/ds: Flush PEBS DS when changing PEBS_DATA_CFG
perf/x86: Fix missing sample size update on AMD BRS
perf/core: Fix perf_sample_data not properly initialized for different swevents in perf_tp_event()

+50 -29
+2 -4
arch/x86/events/core.c
··· 1703 1703 1704 1704 perf_sample_data_init(&data, 0, event->hw.last_period); 1705 1705 1706 - if (has_branch_stack(event)) { 1707 - data.br_stack = &cpuc->lbr_stack; 1708 - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; 1709 - } 1706 + if (has_branch_stack(event)) 1707 + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); 1710 1708 1711 1709 if (perf_event_overflow(event, &data, regs)) 1712 1710 x86_pmu_stop(event, 0);
+32 -24
arch/x86/events/intel/ds.c
··· 1229 1229 struct perf_event *event, bool add) 1230 1230 { 1231 1231 struct pmu *pmu = event->pmu; 1232 + 1232 1233 /* 1233 1234 * Make sure we get updated with the first PEBS 1234 1235 * event. It will trigger also during removal, but 1235 1236 * that does not hurt: 1236 1237 */ 1237 - bool update = cpuc->n_pebs == 1; 1238 + if (cpuc->n_pebs == 1) 1239 + cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; 1238 1240 1239 1241 if (needed_cb != pebs_needs_sched_cb(cpuc)) { 1240 1242 if (!needed_cb) ··· 1244 1242 else 1245 1243 perf_sched_cb_dec(pmu); 1246 1244 1247 - update = true; 1245 + cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW; 1248 1246 } 1249 1247 1250 1248 /* ··· 1254 1252 if (x86_pmu.intel_cap.pebs_baseline && add) { 1255 1253 u64 pebs_data_cfg; 1256 1254 1257 - /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */ 1258 - if (cpuc->n_pebs == 1) { 1259 - cpuc->pebs_data_cfg = 0; 1260 - cpuc->pebs_record_size = sizeof(struct pebs_basic); 1261 - } 1262 - 1263 1255 pebs_data_cfg = pebs_update_adaptive_cfg(event); 1264 - 1265 - /* Update pebs_record_size if new event requires more data. */ 1266 - if (pebs_data_cfg & ~cpuc->pebs_data_cfg) { 1267 - cpuc->pebs_data_cfg |= pebs_data_cfg; 1268 - adaptive_pebs_record_size_update(); 1269 - update = true; 1270 - } 1256 + /* 1257 + * Be sure to update the thresholds when we change the record. 1258 + */ 1259 + if (pebs_data_cfg & ~cpuc->pebs_data_cfg) 1260 + cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW; 1271 1261 } 1272 - 1273 - if (update) 1274 - pebs_update_threshold(cpuc); 1275 1262 } 1276 1263 1277 1264 void intel_pmu_pebs_add(struct perf_event *event) ··· 1317 1326 wrmsrl(base + idx, value); 1318 1327 } 1319 1328 1329 + static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc) 1330 + { 1331 + if (cpuc->n_pebs == cpuc->n_large_pebs && 1332 + cpuc->n_pebs != cpuc->n_pebs_via_pt) 1333 + intel_pmu_drain_pebs_buffer(); 1334 + } 1335 + 1320 1336 void intel_pmu_pebs_enable(struct perf_event *event) 1321 1337 { 1322 1338 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1339 + u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW; 1323 1340 struct hw_perf_event *hwc = &event->hw; 1324 1341 struct debug_store *ds = cpuc->ds; 1325 1342 unsigned int idx = hwc->idx; ··· 1343 1344 1344 1345 if (x86_pmu.intel_cap.pebs_baseline) { 1345 1346 hwc->config |= ICL_EVENTSEL_ADAPTIVE; 1346 - if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) { 1347 - wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg); 1348 - cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg; 1347 + if (pebs_data_cfg != cpuc->active_pebs_data_cfg) { 1348 + /* 1349 + * drain_pebs() assumes uniform record size; 1350 + * hence we need to drain when changing said 1351 + * size. 1352 + */ 1353 + intel_pmu_drain_large_pebs(cpuc); 1354 + adaptive_pebs_record_size_update(); 1355 + wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg); 1356 + cpuc->active_pebs_data_cfg = pebs_data_cfg; 1349 1357 } 1358 + } 1359 + if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) { 1360 + cpuc->pebs_data_cfg = pebs_data_cfg; 1361 + pebs_update_threshold(cpuc); 1350 1362 } 1351 1363 1352 1364 if (idx >= INTEL_PMC_IDX_FIXED) { ··· 1401 1391 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1402 1392 struct hw_perf_event *hwc = &event->hw; 1403 1393 1404 - if (cpuc->n_pebs == cpuc->n_large_pebs && 1405 - cpuc->n_pebs != cpuc->n_pebs_via_pt) 1406 - intel_pmu_drain_pebs_buffer(); 1394 + intel_pmu_drain_large_pebs(cpuc); 1407 1395 1408 1396 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 1409 1397
+3
arch/x86/include/asm/perf_event.h
··· 121 121 #define PEBS_DATACFG_LBRS BIT_ULL(3) 122 122 #define PEBS_DATACFG_LBR_SHIFT 24 123 123 124 + /* Steal the highest bit of pebs_data_cfg for SW usage */ 125 + #define PEBS_UPDATE_DS_SW BIT_ULL(63) 126 + 124 127 /* 125 128 * Intel "Architectural Performance Monitoring" CPUID 126 129 * detection/enumeration details:
+13 -1
kernel/events/core.c
··· 10150 10150 perf_trace_buf_update(record, event_type); 10151 10151 10152 10152 hlist_for_each_entry_rcu(event, head, hlist_entry) { 10153 - if (perf_tp_event_match(event, &data, regs)) 10153 + if (perf_tp_event_match(event, &data, regs)) { 10154 10154 perf_swevent_event(event, count, &data, regs); 10155 + 10156 + /* 10157 + * Here use the same on-stack perf_sample_data, 10158 + * some members in data are event-specific and 10159 + * need to be re-computed for different sweveents. 10160 + * Re-initialize data->sample_flags safely to avoid 10161 + * the problem that next event skips preparing data 10162 + * because data->sample_flags is set. 10163 + */ 10164 + perf_sample_data_init(&data, 0, 0); 10165 + perf_sample_save_raw_data(&data, &raw); 10166 + } 10155 10167 } 10156 10168 10157 10169 /*