Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-tools-fixes-for-v5.16-2021-12-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

- Prevent out-of-bounds access to per sample registers.

- Fix NULL vs IS_ERR_OR_NULL() checking on the python binding.

- Intel PT fixes, half of those are one-liners:
- Fix some PGE (packet generation enable/control flow packets) usage.
- Fix sync state when a PSB (synchronization) packet is found.
- Fix intel_pt_fup_event() assumptions about setting state type.
- Fix state setting when receiving overflow (OVF) packet.
- Fix next 'err' value, walking trace.
- Fix missing 'instruction' events with 'q' option.
- Fix error timestamp setting on the decoder error path.

* tag 'perf-tools-fixes-for-v5.16-2021-12-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
perf python: Fix NULL vs IS_ERR_OR_NULL() checking
perf intel-pt: Fix error timestamp setting on the decoder error path
perf intel-pt: Fix missing 'instruction' events with 'q' option
perf intel-pt: Fix next 'err' value, walking trace
perf intel-pt: Fix state setting when receiving overflow (OVF) packet
perf intel-pt: Fix intel_pt_fup_event() assumptions about setting state type
perf intel-pt: Fix sync state when a PSB (synchronization) packet is found
perf intel-pt: Fix some PGE (packet generation enable/control flow packets) usage
perf tools: Prevent out-of-bounds access to registers

+64 -32
+4 -1
tools/perf/util/event.h
··· 44 44 /* perf sample has 16 bits size limit */ 45 45 #define PERF_SAMPLE_MAX_SIZE (1 << 16) 46 46 47 + /* number of register is bound by the number of bits in regs_dump::mask (64) */ 48 + #define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) 49 + 47 50 struct regs_dump { 48 51 u64 abi; 49 52 u64 mask; 50 53 u64 *regs; 51 54 52 55 /* Cached values/mask filled by first register access. */ 53 - u64 cache_regs[PERF_REGS_MAX]; 56 + u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; 54 57 u64 cache_mask; 55 58 }; 56 59
+55 -30
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 1205 1205 1206 1206 static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) 1207 1207 { 1208 + enum intel_pt_sample_type type = decoder->state.type; 1208 1209 bool ret = false; 1210 + 1211 + decoder->state.type &= ~INTEL_PT_BRANCH; 1209 1212 1210 1213 if (decoder->set_fup_tx_flags) { 1211 1214 decoder->set_fup_tx_flags = false; 1212 1215 decoder->tx_flags = decoder->fup_tx_flags; 1213 - decoder->state.type = INTEL_PT_TRANSACTION; 1216 + decoder->state.type |= INTEL_PT_TRANSACTION; 1214 1217 if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) 1215 1218 decoder->state.type |= INTEL_PT_BRANCH; 1216 - decoder->state.from_ip = decoder->ip; 1217 - decoder->state.to_ip = 0; 1218 1219 decoder->state.flags = decoder->fup_tx_flags; 1219 - return true; 1220 + ret = true; 1220 1221 } 1221 1222 if (decoder->set_fup_ptw) { 1222 1223 decoder->set_fup_ptw = false; 1223 - decoder->state.type = INTEL_PT_PTW; 1224 + decoder->state.type |= INTEL_PT_PTW; 1224 1225 decoder->state.flags |= INTEL_PT_FUP_IP; 1225 - decoder->state.from_ip = decoder->ip; 1226 - decoder->state.to_ip = 0; 1227 1226 decoder->state.ptw_payload = decoder->fup_ptw_payload; 1228 - return true; 1227 + ret = true; 1229 1228 } 1230 1229 if (decoder->set_fup_mwait) { 1231 1230 decoder->set_fup_mwait = false; 1232 - decoder->state.type = INTEL_PT_MWAIT_OP; 1233 - decoder->state.from_ip = decoder->ip; 1234 - decoder->state.to_ip = 0; 1231 + decoder->state.type |= INTEL_PT_MWAIT_OP; 1235 1232 decoder->state.mwait_payload = decoder->fup_mwait_payload; 1236 1233 ret = true; 1237 1234 } 1238 1235 if (decoder->set_fup_pwre) { 1239 1236 decoder->set_fup_pwre = false; 1240 1237 decoder->state.type |= INTEL_PT_PWR_ENTRY; 1241 - decoder->state.type &= ~INTEL_PT_BRANCH; 1242 - decoder->state.from_ip = decoder->ip; 1243 - decoder->state.to_ip = 0; 1244 1238 decoder->state.pwre_payload = decoder->fup_pwre_payload; 1245 1239 ret = true; 1246 1240 } 1247 1241 if (decoder->set_fup_exstop) { 1248 1242 decoder->set_fup_exstop = false; 1249 1243 decoder->state.type |= INTEL_PT_EX_STOP; 1250 - decoder->state.type &= ~INTEL_PT_BRANCH; 1251 1244 decoder->state.flags |= INTEL_PT_FUP_IP; 1252 - decoder->state.from_ip = decoder->ip; 1253 - decoder->state.to_ip = 0; 1254 1245 ret = true; 1255 1246 } 1256 1247 if (decoder->set_fup_bep) { 1257 1248 decoder->set_fup_bep = false; 1258 1249 decoder->state.type |= INTEL_PT_BLK_ITEMS; 1259 - decoder->state.type &= ~INTEL_PT_BRANCH; 1250 + ret = true; 1251 + } 1252 + if (decoder->overflow) { 1253 + decoder->overflow = false; 1254 + if (!ret && !decoder->pge) { 1255 + if (decoder->hop) { 1256 + decoder->state.type = 0; 1257 + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; 1258 + } 1259 + decoder->pge = true; 1260 + decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN; 1261 + decoder->state.from_ip = 0; 1262 + decoder->state.to_ip = decoder->ip; 1263 + return true; 1264 + } 1265 + } 1266 + if (ret) { 1260 1267 decoder->state.from_ip = decoder->ip; 1261 1268 decoder->state.to_ip = 0; 1262 - ret = true; 1269 + } else { 1270 + decoder->state.type = type; 1263 1271 } 1264 1272 return ret; 1265 1273 } ··· 1616 1608 intel_pt_clear_tx_flags(decoder); 1617 1609 intel_pt_set_nr(decoder); 1618 1610 decoder->timestamp_insn_cnt = 0; 1619 - decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; 1611 + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 1612 + decoder->state.from_ip = decoder->ip; 1613 + decoder->ip = 0; 1614 + decoder->pge = false; 1615 + decoder->set_fup_tx_flags = false; 1616 + decoder->set_fup_ptw = false; 1617 + decoder->set_fup_mwait = false; 1618 + decoder->set_fup_pwre = false; 1619 + decoder->set_fup_exstop = false; 1620 + decoder->set_fup_bep = false; 1620 1621 decoder->overflow = true; 1621 1622 return -EOVERFLOW; 1622 1623 } ··· 2683 2666 /* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */ 2684 2667 static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err) 2685 2668 { 2669 + *err = 0; 2670 + 2686 2671 /* Leap from PSB to PSB, getting ip from FUP within PSB+ */ 2687 2672 if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) { 2688 2673 *err = intel_pt_scan_for_psb(decoder); ··· 2697 2678 return HOP_IGNORE; 2698 2679 2699 2680 case INTEL_PT_TIP_PGD: 2681 + decoder->pge = false; 2700 2682 if (!decoder->packet.count) { 2701 2683 intel_pt_set_nr(decoder); 2702 2684 return HOP_IGNORE; ··· 2725 2705 if (!decoder->packet.count) 2726 2706 return HOP_IGNORE; 2727 2707 intel_pt_set_ip(decoder); 2728 - if (intel_pt_fup_event(decoder)) 2729 - return HOP_RETURN; 2730 - if (!decoder->branch_enable) 2708 + if (decoder->set_fup_mwait || decoder->set_fup_pwre) 2709 + *no_tip = true; 2710 + if (!decoder->branch_enable || !decoder->pge) 2731 2711 *no_tip = true; 2732 2712 if (*no_tip) { 2733 2713 decoder->state.type = INTEL_PT_INSTRUCTION; 2734 2714 decoder->state.from_ip = decoder->ip; 2735 2715 decoder->state.to_ip = 0; 2716 + intel_pt_fup_event(decoder); 2736 2717 return HOP_RETURN; 2737 2718 } 2719 + intel_pt_fup_event(decoder); 2720 + decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH; 2738 2721 *err = intel_pt_walk_fup_tip(decoder); 2739 - if (!*err) 2722 + if (!*err && decoder->state.to_ip) 2740 2723 decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; 2741 2724 return HOP_RETURN; 2742 2725 ··· 2920 2897 { 2921 2898 struct intel_pt_psb_info data = { .fup = false }; 2922 2899 2923 - if (!decoder->branch_enable || !decoder->pge) 2900 + if (!decoder->branch_enable) 2924 2901 return false; 2925 2902 2926 2903 intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data); ··· 2947 2924 if (err) 2948 2925 return err; 2949 2926 next: 2927 + err = 0; 2950 2928 if (decoder->cyc_threshold) { 2951 2929 if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC) 2952 2930 decoder->sample_cyc = false; ··· 2986 2962 2987 2963 case INTEL_PT_TIP_PGE: { 2988 2964 decoder->pge = true; 2965 + decoder->overflow = false; 2989 2966 intel_pt_mtc_cyc_cnt_pge(decoder); 2990 2967 intel_pt_set_nr(decoder); 2991 2968 if (decoder->packet.count == 0) { ··· 3024 2999 break; 3025 3000 } 3026 3001 intel_pt_set_last_ip(decoder); 3027 - if (!decoder->branch_enable) { 3002 + if (!decoder->branch_enable || !decoder->pge) { 3028 3003 decoder->ip = decoder->last_ip; 3029 3004 if (intel_pt_fup_event(decoder)) 3030 3005 return 0; ··· 3492 3467 decoder->set_fup_pwre = false; 3493 3468 decoder->set_fup_exstop = false; 3494 3469 decoder->set_fup_bep = false; 3470 + decoder->overflow = false; 3495 3471 3496 3472 if (!decoder->branch_enable) { 3497 3473 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 3498 - decoder->overflow = false; 3499 3474 decoder->state.type = 0; /* Do not have a sample */ 3500 3475 return 0; 3501 3476 } ··· 3510 3485 decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; 3511 3486 else 3512 3487 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 3513 - decoder->overflow = false; 3514 3488 3515 3489 decoder->state.from_ip = 0; 3516 3490 decoder->state.to_ip = decoder->ip; ··· 3631 3607 } 3632 3608 3633 3609 decoder->have_last_ip = true; 3634 - decoder->pkt_state = INTEL_PT_STATE_NO_IP; 3610 + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 3635 3611 3636 3612 err = intel_pt_walk_psb(decoder); 3637 3613 if (err) ··· 3728 3704 3729 3705 if (err) { 3730 3706 decoder->state.err = intel_pt_ext_err(err); 3731 - decoder->state.from_ip = decoder->ip; 3707 + if (err != -EOVERFLOW) 3708 + decoder->state.from_ip = decoder->ip; 3732 3709 intel_pt_update_sample_time(decoder); 3733 3710 decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; 3734 3711 intel_pt_set_nr(decoder);
+1
tools/perf/util/intel-pt.c
··· 2565 2565 ptq->sync_switch = false; 2566 2566 intel_pt_next_tid(pt, ptq); 2567 2567 } 2568 + ptq->timestamp = state->est_timestamp; 2568 2569 if (pt->synth_opts.errors) { 2569 2570 err = intel_ptq_synth_error(ptq, state); 2570 2571 if (err)
+3
tools/perf/util/perf_regs.c
··· 25 25 int i, idx = 0; 26 26 u64 mask = regs->mask; 27 27 28 + if ((u64)id >= PERF_SAMPLE_REGS_CACHE_SIZE) 29 + return -EINVAL; 30 + 28 31 if (regs->cache_mask & (1ULL << id)) 29 32 goto out; 30 33
+1 -1
tools/perf/util/python.c
··· 461 461 struct tep_event *tp_format; 462 462 463 463 tp_format = trace_event__tp_format_id(evsel->core.attr.config); 464 - if (!tp_format) 464 + if (IS_ERR_OR_NULL(tp_format)) 465 465 return NULL; 466 466 467 467 evsel->tp_format = tp_format;