Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

+8

arch/powerpc/kernel/perf_counter.c

··· 518 518 struct cpu_hw_counters *cpuhw; 519 519 unsigned long flags; 520 520 521 + if (!ppmu) 522 + return; 521 523 local_irq_save(flags); 522 524 cpuhw = &__get_cpu_var(cpu_hw_counters); 523 525 ··· 574 572 int n_lim; 575 573 int idx; 576 574 575 + if (!ppmu) 576 + return; 577 577 local_irq_save(flags); 578 578 cpuhw = &__get_cpu_var(cpu_hw_counters); 579 579 if (!cpuhw->disabled) { ··· 741 737 long i, n, n0; 742 738 struct perf_counter *sub; 743 739 740 + if (!ppmu) 741 + return 0; 744 742 cpuhw = &__get_cpu_var(cpu_hw_counters); 745 743 n0 = cpuhw->n_counters; 746 744 n = collect_events(group_leader, ppmu->n_counter - n0, ··· 1287 1281 { 1288 1282 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); 1289 1283 1284 + if (!ppmu) 1285 + return; 1290 1286 memset(cpuhw, 0, sizeof(*cpuhw)); 1291 1287 cpuhw->mmcr[0] = MMCR0_FC; 1292 1288 }

+7 -5

include/linux/perf_counter.h

··· 121 121 PERF_SAMPLE_CPU = 1U << 7, 122 122 PERF_SAMPLE_PERIOD = 1U << 8, 123 123 PERF_SAMPLE_STREAM_ID = 1U << 9, 124 - PERF_SAMPLE_TP_RECORD = 1U << 10, 124 + PERF_SAMPLE_RAW = 1U << 10, 125 125 126 126 PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ 127 127 }; ··· 369 369 * 370 370 * { u64 nr, 371 371 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN 372 + * { u32 size; 373 + * char data[size];}&& PERF_SAMPLE_RAW 372 374 * }; 373 375 */ 374 376 PERF_EVENT_SAMPLE = 9, ··· 416 414 __u64 ip[PERF_MAX_STACK_DEPTH]; 417 415 }; 418 416 419 - struct perf_tracepoint_record { 420 - int size; 421 - char *record; 417 + struct perf_raw_record { 418 + u32 size; 419 + void *data; 422 420 }; 423 421 424 422 struct task_struct; ··· 689 687 struct pt_regs *regs; 690 688 u64 addr; 691 689 u64 period; 692 - void *private; 690 + struct perf_raw_record *raw; 693 691 }; 694 692 695 693 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,

+13 -2

include/trace/ftrace.h

··· 637 637 * pc = preempt_count(); 638 638 * 639 639 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); 640 - * __entry_size = __data_size + sizeof(*entry); 640 + * 641 + * // Below we want to get the aligned size by taking into account 642 + * // the u32 field that will later store the buffer size 643 + * __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32), 644 + * sizeof(u64)); 645 + * __entry_size -= sizeof(u32); 641 646 * 642 647 * do { 643 648 * char raw_data[__entry_size]; <- allocate our sample in the stack 644 649 * struct trace_entry *ent; 645 650 * 651 + * zero dead bytes from alignment to avoid stack leak to userspace: 652 + * 653 + * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; 646 654 * entry = (struct ftrace_raw_<call> *)raw_data; 647 655 * ent = &entry->ent; 648 656 * tracing_generic_entry_update(ent, irq_flags, pc); ··· 693 685 pc = preempt_count(); \ 694 686 \ 695 687 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 696 - __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\ 688 + __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ 689 + sizeof(u64)); \ 690 + __entry_size -= sizeof(u32); \ 697 691 \ 698 692 do { \ 699 693 char raw_data[__entry_size]; \ 700 694 struct trace_entry *ent; \ 701 695 \ 696 + *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ 702 697 entry = (struct ftrace_raw_##call *)raw_data; \ 703 698 ent = &entry->ent; \ 704 699 tracing_generic_entry_update(ent, irq_flags, pc); \

+156 -105

kernel/perf_counter.c

··· 2646 2646 u64 counter; 2647 2647 } group_entry; 2648 2648 struct perf_callchain_entry *callchain = NULL; 2649 - struct perf_tracepoint_record *tp; 2650 2649 int callchain_size = 0; 2651 2650 u64 time; 2652 2651 struct { ··· 2714 2715 header.size += sizeof(u64); 2715 2716 } 2716 2717 2717 - if (sample_type & PERF_SAMPLE_TP_RECORD) { 2718 - tp = data->private; 2719 - header.size += tp->size; 2718 + if (sample_type & PERF_SAMPLE_RAW) { 2719 + int size = sizeof(u32); 2720 + 2721 + if (data->raw) 2722 + size += data->raw->size; 2723 + else 2724 + size += sizeof(u32); 2725 + 2726 + WARN_ON_ONCE(size & (sizeof(u64)-1)); 2727 + header.size += size; 2720 2728 } 2721 2729 2722 2730 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); ··· 2789 2783 } 2790 2784 } 2791 2785 2792 - if (sample_type & PERF_SAMPLE_TP_RECORD) 2793 - perf_output_copy(&handle, tp->record, tp->size); 2786 + if (sample_type & PERF_SAMPLE_RAW) { 2787 + if (data->raw) { 2788 + perf_output_put(&handle, data->raw->size); 2789 + perf_output_copy(&handle, data->raw->data, data->raw->size); 2790 + } else { 2791 + struct { 2792 + u32 size; 2793 + u32 data; 2794 + } raw = { 2795 + .size = sizeof(u32), 2796 + .data = 0, 2797 + }; 2798 + perf_output_put(&handle, raw); 2799 + } 2800 + } 2794 2801 2795 2802 perf_output_end(&handle); 2796 2803 } ··· 2868 2849 */ 2869 2850 2870 2851 struct perf_task_event { 2871 - struct task_struct *task; 2852 + struct task_struct *task; 2853 + struct perf_counter_context *task_ctx; 2872 2854 2873 2855 struct { 2874 2856 struct perf_event_header header; ··· 2929 2909 static void perf_counter_task_event(struct perf_task_event *task_event) 2930 2910 { 2931 2911 struct perf_cpu_context *cpuctx; 2932 - struct perf_counter_context *ctx; 2912 + struct perf_counter_context *ctx = task_event->task_ctx; 2933 2913 2934 2914 cpuctx = &get_cpu_var(perf_cpu_context); 2935 2915 perf_counter_task_ctx(&cpuctx->ctx, task_event); 2936 2916 put_cpu_var(perf_cpu_context); 2937 2917 2938 2918 rcu_read_lock(); 2939 - /* 2940 - * doesn't really matter which of the child contexts the 2941 - * events ends up in. 2942 - */ 2943 - ctx = rcu_dereference(current->perf_counter_ctxp); 2919 + if (!ctx) 2920 + ctx = rcu_dereference(task_event->task->perf_counter_ctxp); 2944 2921 if (ctx) 2945 2922 perf_counter_task_ctx(ctx, task_event); 2946 2923 rcu_read_unlock(); 2947 2924 } 2948 2925 2949 - static void perf_counter_task(struct task_struct *task, int new) 2926 + static void perf_counter_task(struct task_struct *task, 2927 + struct perf_counter_context *task_ctx, 2928 + int new) 2950 2929 { 2951 2930 struct perf_task_event task_event; 2952 2931 ··· 2955 2936 return; 2956 2937 2957 2938 task_event = (struct perf_task_event){ 2958 - .task = task, 2959 - .event = { 2939 + .task = task, 2940 + .task_ctx = task_ctx, 2941 + .event = { 2960 2942 .header = { 2961 2943 .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, 2962 2944 .misc = 0, ··· 2975 2955 2976 2956 void perf_counter_fork(struct task_struct *task) 2977 2957 { 2978 - perf_counter_task(task, 1); 2958 + perf_counter_task(task, NULL, 1); 2979 2959 } 2980 2960 2981 2961 /* ··· 3364 3344 * Generic software counter infrastructure 3365 3345 */ 3366 3346 3367 - static void perf_swcounter_update(struct perf_counter *counter) 3347 + /* 3348 + * We directly increment counter->count and keep a second value in 3349 + * counter->hw.period_left to count intervals. This period counter 3350 + * is kept in the range [-sample_period, 0] so that we can use the 3351 + * sign as trigger. 3352 + */ 3353 + 3354 + static u64 perf_swcounter_set_period(struct perf_counter *counter) 3368 3355 { 3369 3356 struct hw_perf_counter *hwc = &counter->hw; 3370 - u64 prev, now; 3371 - s64 delta; 3357 + u64 period = hwc->last_period; 3358 + u64 nr, offset; 3359 + s64 old, val; 3360 + 3361 + hwc->last_period = hwc->sample_period; 3372 3362 3373 3363 again: 3374 - prev = atomic64_read(&hwc->prev_count); 3375 - now = atomic64_read(&hwc->count); 3376 - if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) 3364 + old = val = atomic64_read(&hwc->period_left); 3365 + if (val < 0) 3366 + return 0; 3367 + 3368 + nr = div64_u64(period + val, period); 3369 + offset = nr * period; 3370 + val -= offset; 3371 + if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) 3377 3372 goto again; 3378 3373 3379 - delta = now - prev; 3380 - 3381 - atomic64_add(delta, &counter->count); 3382 - atomic64_sub(delta, &hwc->period_left); 3383 - } 3384 - 3385 - static void perf_swcounter_set_period(struct perf_counter *counter) 3386 - { 3387 - struct hw_perf_counter *hwc = &counter->hw; 3388 - s64 left = atomic64_read(&hwc->period_left); 3389 - s64 period = hwc->sample_period; 3390 - 3391 - if (unlikely(left <= -period)) { 3392 - left = period; 3393 - atomic64_set(&hwc->period_left, left); 3394 - hwc->last_period = period; 3395 - } 3396 - 3397 - if (unlikely(left <= 0)) { 3398 - left += period; 3399 - atomic64_add(period, &hwc->period_left); 3400 - hwc->last_period = period; 3401 - } 3402 - 3403 - atomic64_set(&hwc->prev_count, -left); 3404 - atomic64_set(&hwc->count, -left); 3405 - } 3406 - 3407 - static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) 3408 - { 3409 - enum hrtimer_restart ret = HRTIMER_RESTART; 3410 - struct perf_sample_data data; 3411 - struct perf_counter *counter; 3412 - u64 period; 3413 - 3414 - counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); 3415 - counter->pmu->read(counter); 3416 - 3417 - data.addr = 0; 3418 - data.regs = get_irq_regs(); 3419 - /* 3420 - * In case we exclude kernel IPs or are somehow not in interrupt 3421 - * context, provide the next best thing, the user IP. 3422 - */ 3423 - if ((counter->attr.exclude_kernel || !data.regs) && 3424 - !counter->attr.exclude_user) 3425 - data.regs = task_pt_regs(current); 3426 - 3427 - if (data.regs) { 3428 - if (perf_counter_overflow(counter, 0, &data)) 3429 - ret = HRTIMER_NORESTART; 3430 - } 3431 - 3432 - period = max_t(u64, 10000, counter->hw.sample_period); 3433 - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); 3434 - 3435 - return ret; 3374 + return nr; 3436 3375 } 3437 3376 3438 3377 static void perf_swcounter_overflow(struct perf_counter *counter, 3439 3378 int nmi, struct perf_sample_data *data) 3440 3379 { 3441 - data->period = counter->hw.last_period; 3380 + struct hw_perf_counter *hwc = &counter->hw; 3381 + u64 overflow; 3442 3382 3443 - perf_swcounter_update(counter); 3444 - perf_swcounter_set_period(counter); 3445 - if (perf_counter_overflow(counter, nmi, data)) 3446 - /* soft-disable the counter */ 3447 - ; 3383 + data->period = counter->hw.last_period; 3384 + overflow = perf_swcounter_set_period(counter); 3385 + 3386 + if (hwc->interrupts == MAX_INTERRUPTS) 3387 + return; 3388 + 3389 + for (; overflow; overflow--) { 3390 + if (perf_counter_overflow(counter, nmi, data)) { 3391 + /* 3392 + * We inhibit the overflow from happening when 3393 + * hwc->interrupts == MAX_INTERRUPTS. 3394 + */ 3395 + break; 3396 + } 3397 + } 3398 + } 3399 + 3400 + static void perf_swcounter_unthrottle(struct perf_counter *counter) 3401 + { 3402 + /* 3403 + * Nothing to do, we already reset hwc->interrupts. 3404 + */ 3405 + } 3406 + 3407 + static void perf_swcounter_add(struct perf_counter *counter, u64 nr, 3408 + int nmi, struct perf_sample_data *data) 3409 + { 3410 + struct hw_perf_counter *hwc = &counter->hw; 3411 + 3412 + atomic64_add(nr, &counter->count); 3413 + 3414 + if (!hwc->sample_period) 3415 + return; 3416 + 3417 + if (!data->regs) 3418 + return; 3419 + 3420 + if (!atomic64_add_negative(nr, &hwc->period_left)) 3421 + perf_swcounter_overflow(counter, nmi, data); 3448 3422 } 3449 3423 3450 3424 static int perf_swcounter_is_counting(struct perf_counter *counter) ··· 3500 3486 } 3501 3487 3502 3488 return 1; 3503 - } 3504 - 3505 - static void perf_swcounter_add(struct perf_counter *counter, u64 nr, 3506 - int nmi, struct perf_sample_data *data) 3507 - { 3508 - int neg = atomic64_add_negative(nr, &counter->hw.count); 3509 - 3510 - if (counter->hw.sample_period && !neg && data->regs) 3511 - perf_swcounter_overflow(counter, nmi, data); 3512 3489 } 3513 3490 3514 3491 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, ··· 3580 3575 3581 3576 static void perf_swcounter_read(struct perf_counter *counter) 3582 3577 { 3583 - perf_swcounter_update(counter); 3584 3578 } 3585 3579 3586 3580 static int perf_swcounter_enable(struct perf_counter *counter) 3587 3581 { 3588 - perf_swcounter_set_period(counter); 3582 + struct hw_perf_counter *hwc = &counter->hw; 3583 + 3584 + if (hwc->sample_period) { 3585 + hwc->last_period = hwc->sample_period; 3586 + perf_swcounter_set_period(counter); 3587 + } 3589 3588 return 0; 3590 3589 } 3591 3590 3592 3591 static void perf_swcounter_disable(struct perf_counter *counter) 3593 3592 { 3594 - perf_swcounter_update(counter); 3595 3593 } 3596 3594 3597 3595 static const struct pmu perf_ops_generic = { 3598 3596 .enable = perf_swcounter_enable, 3599 3597 .disable = perf_swcounter_disable, 3600 3598 .read = perf_swcounter_read, 3599 + .unthrottle = perf_swcounter_unthrottle, 3601 3600 }; 3601 + 3602 + /* 3603 + * hrtimer based swcounter callback 3604 + */ 3605 + 3606 + static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) 3607 + { 3608 + enum hrtimer_restart ret = HRTIMER_RESTART; 3609 + struct perf_sample_data data; 3610 + struct perf_counter *counter; 3611 + u64 period; 3612 + 3613 + counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); 3614 + counter->pmu->read(counter); 3615 + 3616 + data.addr = 0; 3617 + data.regs = get_irq_regs(); 3618 + /* 3619 + * In case we exclude kernel IPs or are somehow not in interrupt 3620 + * context, provide the next best thing, the user IP. 3621 + */ 3622 + if ((counter->attr.exclude_kernel || !data.regs) && 3623 + !counter->attr.exclude_user) 3624 + data.regs = task_pt_regs(current); 3625 + 3626 + if (data.regs) { 3627 + if (perf_counter_overflow(counter, 0, &data)) 3628 + ret = HRTIMER_NORESTART; 3629 + } 3630 + 3631 + period = max_t(u64, 10000, counter->hw.sample_period); 3632 + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); 3633 + 3634 + return ret; 3635 + } 3602 3636 3603 3637 /* 3604 3638 * Software counter: cpu wall time clock ··· 3759 3715 void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, 3760 3716 int entry_size) 3761 3717 { 3762 - struct perf_tracepoint_record tp = { 3718 + struct perf_raw_record raw = { 3763 3719 .size = entry_size, 3764 - .record = record, 3720 + .data = record, 3765 3721 }; 3766 3722 3767 3723 struct perf_sample_data data = { 3768 3724 .regs = get_irq_regs(), 3769 3725 .addr = addr, 3770 - .private = &tp, 3726 + .raw = &raw, 3771 3727 }; 3772 3728 3773 3729 if (!data.regs) ··· 3787 3743 3788 3744 static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) 3789 3745 { 3746 + /* 3747 + * Raw tracepoint data is a severe data leak, only allow root to 3748 + * have these. 3749 + */ 3750 + if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && 3751 + !capable(CAP_SYS_ADMIN)) 3752 + return ERR_PTR(-EPERM); 3753 + 3790 3754 if (ftrace_profile_enable(counter->attr.config)) 3791 3755 return NULL; 3792 3756 ··· 4337 4285 unsigned long flags; 4338 4286 4339 4287 if (likely(!child->perf_counter_ctxp)) { 4340 - perf_counter_task(child, 0); 4288 + perf_counter_task(child, NULL, 0); 4341 4289 return; 4342 4290 } 4343 4291 ··· 4357 4305 * incremented the context's refcount before we do put_ctx below. 4358 4306 */ 4359 4307 spin_lock(&child_ctx->lock); 4308 + child->perf_counter_ctxp = NULL; 4360 4309 /* 4361 4310 * If this context is a clone; unclone it so it can't get 4362 4311 * swapped to another process while we're removing all ··· 4371 4318 * won't get any samples after PERF_EVENT_EXIT. We can however still 4372 4319 * get a few PERF_EVENT_READ events. 4373 4320 */ 4374 - perf_counter_task(child, 0); 4375 - 4376 - child->perf_counter_ctxp = NULL; 4321 + perf_counter_task(child, child_ctx, 0); 4377 4322 4378 4323 /* 4379 4324 * We can recurse on the same lock type through:

+225

tools/perf/Documentation/perf-examples.txt

··· 1 + 2 + ------------------------------ 3 + ****** perf by examples ****** 4 + ------------------------------ 5 + 6 + [ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] 7 + 8 + 9 + First, discovery/enumeration of available counters can be done via 10 + 'perf list': 11 + 12 + titan:~> perf list 13 + [...] 14 + kmem:kmalloc [Tracepoint event] 15 + kmem:kmem_cache_alloc [Tracepoint event] 16 + kmem:kmalloc_node [Tracepoint event] 17 + kmem:kmem_cache_alloc_node [Tracepoint event] 18 + kmem:kfree [Tracepoint event] 19 + kmem:kmem_cache_free [Tracepoint event] 20 + kmem:mm_page_free_direct [Tracepoint event] 21 + kmem:mm_pagevec_free [Tracepoint event] 22 + kmem:mm_page_alloc [Tracepoint event] 23 + kmem:mm_page_alloc_zone_locked [Tracepoint event] 24 + kmem:mm_page_pcpu_drain [Tracepoint event] 25 + kmem:mm_page_alloc_extfrag [Tracepoint event] 26 + 27 + Then any (or all) of the above event sources can be activated and 28 + measured. For example the page alloc/free properties of a 'hackbench 29 + run' are: 30 + 31 + titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc 32 + -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 33 + Time: 0.575 34 + 35 + Performance counter stats for './hackbench 10': 36 + 37 + 13857 kmem:mm_page_pcpu_drain 38 + 27576 kmem:mm_page_alloc 39 + 6025 kmem:mm_pagevec_free 40 + 20934 kmem:mm_page_free_direct 41 + 42 + 0.613972165 seconds time elapsed 43 + 44 + You can observe the statistical properties as well, by using the 45 + 'repeat the workload N times' feature of perf stat: 46 + 47 + titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e 48 + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e 49 + kmem:mm_page_free_direct ./hackbench 10 50 + Time: 0.627 51 + Time: 0.644 52 + Time: 0.564 53 + Time: 0.559 54 + Time: 0.626 55 + 56 + Performance counter stats for './hackbench 10' (5 runs): 57 + 58 + 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) 59 + 25035 kmem:mm_page_alloc ( +- 3.783% ) 60 + 6104 kmem:mm_pagevec_free ( +- 0.934% ) 61 + 18376 kmem:mm_page_free_direct ( +- 4.941% ) 62 + 63 + 0.643954516 seconds time elapsed ( +- 2.363% ) 64 + 65 + Furthermore, these tracepoints can be used to sample the workload as 66 + well. For example the page allocations done by a 'git gc' can be 67 + captured the following way: 68 + 69 + titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc 70 + Counting objects: 1148, done. 71 + Delta compression using up to 2 threads. 72 + Compressing objects: 100% (450/450), done. 73 + Writing objects: 100% (1148/1148), done. 74 + Total 1148 (delta 690), reused 1148 (delta 690) 75 + [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] 76 + 77 + To check which functions generated page allocations: 78 + 79 + titan:~/git> perf report 80 + # Samples: 10646 81 + # 82 + # Overhead Command Shared Object 83 + # ........ ............... .......................... 84 + # 85 + 23.57% git-repack /lib64/libc-2.5.so 86 + 21.81% git /lib64/libc-2.5.so 87 + 14.59% git ./git 88 + 11.79% git-repack ./git 89 + 7.12% git /lib64/ld-2.5.so 90 + 3.16% git-repack /lib64/libpthread-2.5.so 91 + 2.09% git-repack /bin/bash 92 + 1.97% rm /lib64/libc-2.5.so 93 + 1.39% mv /lib64/ld-2.5.so 94 + 1.37% mv /lib64/libc-2.5.so 95 + 1.12% git-repack /lib64/ld-2.5.so 96 + 0.95% rm /lib64/ld-2.5.so 97 + 0.90% git-update-serv /lib64/libc-2.5.so 98 + 0.73% git-update-serv /lib64/ld-2.5.so 99 + 0.68% perf /lib64/libpthread-2.5.so 100 + 0.64% git-repack /usr/lib64/libz.so.1.2.3 101 + 102 + Or to see it on a more finegrained level: 103 + 104 + titan:~/git> perf report --sort comm,dso,symbol 105 + # Samples: 10646 106 + # 107 + # Overhead Command Shared Object Symbol 108 + # ........ ............... .......................... ...... 109 + # 110 + 9.35% git-repack ./git [.] insert_obj_hash 111 + 9.12% git ./git [.] insert_obj_hash 112 + 7.31% git /lib64/libc-2.5.so [.] memcpy 113 + 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc 114 + 6.24% git-repack /lib64/libc-2.5.so [.] memcpy 115 + 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork 116 + 5.47% git /lib64/libc-2.5.so [.] _int_malloc 117 + 2.99% git /lib64/libc-2.5.so [.] memset 118 + 119 + Furthermore, call-graph sampling can be done too, of page 120 + allocations - to see precisely what kind of page allocations there 121 + are: 122 + 123 + titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc 124 + Counting objects: 1148, done. 125 + Delta compression using up to 2 threads. 126 + Compressing objects: 100% (450/450), done. 127 + Writing objects: 100% (1148/1148), done. 128 + Total 1148 (delta 690), reused 1148 (delta 690) 129 + [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] 130 + 131 + titan:~/git> perf report -g 132 + # Samples: 10686 133 + # 134 + # Overhead Command Shared Object 135 + # ........ ............... .......................... 136 + # 137 + 23.25% git-repack /lib64/libc-2.5.so 138 + | 139 + |--50.00%-- _int_free 140 + | 141 + |--37.50%-- __GI___fork 142 + | make_child 143 + | 144 + |--12.50%-- ptmalloc_unlock_all2 145 + | make_child 146 + | 147 + --6.25%-- __GI_strcpy 148 + 21.61% git /lib64/libc-2.5.so 149 + | 150 + |--30.00%-- __GI_read 151 + | | 152 + | --83.33%-- git_config_from_file 153 + | git_config 154 + | | 155 + [...] 156 + 157 + Or you can observe the whole system's page allocations for 10 158 + seconds: 159 + 160 + titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e 161 + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e 162 + kmem:mm_page_free_direct sleep 10 163 + 164 + Performance counter stats for 'sleep 10': 165 + 166 + 171585 kmem:mm_page_pcpu_drain 167 + 322114 kmem:mm_page_alloc 168 + 73623 kmem:mm_pagevec_free 169 + 254115 kmem:mm_page_free_direct 170 + 171 + 10.000591410 seconds time elapsed 172 + 173 + Or observe how fluctuating the page allocations are, via statistical 174 + analysis done over ten 1-second intervals: 175 + 176 + titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e 177 + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e 178 + kmem:mm_page_free_direct sleep 1 179 + 180 + Performance counter stats for 'sleep 1' (10 runs): 181 + 182 + 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) 183 + 34394 kmem:mm_page_alloc ( +- 4.617% ) 184 + 7509 kmem:mm_pagevec_free ( +- 4.820% ) 185 + 25653 kmem:mm_page_free_direct ( +- 3.672% ) 186 + 187 + 1.058135029 seconds time elapsed ( +- 3.089% ) 188 + 189 + Or you can annotate the recorded 'git gc' run on a per symbol basis 190 + and check which instructions/source-code generated page allocations: 191 + 192 + titan:~/git> perf annotate __GI___fork 193 + ------------------------------------------------ 194 + Percent | Source code & Disassembly of libc-2.5.so 195 + ------------------------------------------------ 196 + : 197 + : 198 + : Disassembly of section .plt: 199 + : Disassembly of section .text: 200 + : 201 + : 00000031a2e95560 <__fork>: 202 + [...] 203 + 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax 204 + 0.00 : 31a2e95607: 0f 05 syscall 205 + 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 206 + 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> 207 + 0.00 : 31a2e95615: 85 c0 test %eax,%eax 208 + 209 + ( this shows that 83.42% of __GI___fork's page allocations come from 210 + the 0x38 system call it performs. ) 211 + 212 + etc. etc. - a lot more is possible. I could list a dozen of 213 + other different usecases straight away - neither of which is 214 + possible via /proc/vmstat. 215 + 216 + /proc/vmstat is not in the same league really, in terms of 217 + expressive power of system analysis and performance 218 + analysis. 219 + 220 + All that the above results needed were those new tracepoints 221 + in include/tracing/events/kmem.h. 222 + 223 + Ingo 224 + 225 +

+1 -1

tools/perf/Documentation/perf-stat.txt

··· 40 40 -a:: 41 41 system-wide collection 42 42 43 - -S:: 43 + -c:: 44 44 scale counter values 45 45 46 46 EXAMPLES

+99 -13

tools/perf/Documentation/perf-top.txt

··· 3 3 4 4 NAME 5 5 ---- 6 - perf-top - Run a command and profile it 6 + perf-top - System profiling tool. 7 7 8 8 SYNOPSIS 9 9 -------- 10 10 [verse] 11 - 'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> 11 + 'perf top' [-e <EVENT> | --event=EVENT] [<options>] 12 12 13 13 DESCRIPTION 14 14 ----------- 15 - This command runs a command and gathers a performance counter profile 16 - from it. 15 + This command generates and displays a performance counter profile in realtime. 17 16 18 17 19 18 OPTIONS 20 19 ------- 21 - <command>...:: 22 - Any command you can specify in a shell. 20 + -a:: 21 + --all-cpus:: 22 + System-wide collection. (default) 23 23 24 - -e:: 25 - --event=:: 24 + -c <count>:: 25 + --count=<count>:: 26 + Event period to sample. 27 + 28 + -C <cpu>:: 29 + --CPU=<cpu>:: 30 + CPU to profile. 31 + 32 + -d <seconds>:: 33 + --delay=<seconds>:: 34 + Number of seconds to delay between refreshes. 35 + 36 + -e <event>:: 37 + --event=<event>:: 26 38 Select the PMU event. Selection can be a symbolic event name 27 39 (use 'perf list' to list all events) or a raw PMU 28 40 event (eventsel+umask) in the form of rNNN where NNN is a 29 - hexadecimal event descriptor. 41 + hexadecimal event descriptor. 30 42 31 - -a:: 32 - system-wide collection 43 + -E <entries>:: 44 + --entries=<entries>:: 45 + Display this many functions. 33 46 34 - -l:: 35 - scale counter values 47 + -f <count>:: 48 + --count-filter=<count>:: 49 + Only display functions with more events than this. 50 + 51 + -F <freq>:: 52 + --freq=<freq>:: 53 + Profile at this frequency. 54 + 55 + -i:: 56 + --inherit:: 57 + Child tasks inherit counters, only makes sens with -p option. 58 + 59 + -k <path>:: 60 + --vmlinux=<path>:: 61 + Path to vmlinux. Required for annotation functionality. 62 + 63 + -m <pages>:: 64 + --mmap-pages=<pages>:: 65 + Number of mmapped data pages. 66 + 67 + -p <pid>:: 68 + --pid=<pid>:: 69 + Profile events on existing pid. 70 + 71 + -r <priority>:: 72 + --realtime=<priority>:: 73 + Collect data with this RT SCHED_FIFO priority. 74 + 75 + -s <symbol>:: 76 + --sym-annotate=<symbol>:: 77 + Annotate this symbol. Requires -k option. 78 + 79 + -v:: 80 + --verbose:: 81 + Be more verbose (show counter open errors, etc). 82 + 83 + -z:: 84 + --zero:: 85 + Zero history across display updates. 86 + 87 + INTERACTIVE PROMPTING KEYS 88 + -------------------------- 89 + 90 + [d]:: 91 + Display refresh delay. 92 + 93 + [e]:: 94 + Number of entries to display. 95 + 96 + [E]:: 97 + Event to display when multiple counters are active. 98 + 99 + [f]:: 100 + Profile display filter (>= hit count). 101 + 102 + [F]:: 103 + Annotation display filter (>= % of total). 104 + 105 + [s]:: 106 + Annotate symbol. 107 + 108 + [S]:: 109 + Stop annotation, return to full profile display. 110 + 111 + [w]:: 112 + Toggle between weighted sum and individual count[E]r profile. 113 + 114 + [z]:: 115 + Toggle event count zeroing across display updates. 116 + 117 + [qQ]:: 118 + Quit. 119 + 120 + Pressing any unmapped key displays a menu, and prompts for input. 121 + 36 122 37 123 SEE ALSO 38 124 --------

+4

tools/perf/Makefile

··· 387 387 388 388 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") 389 389 390 + has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") 391 + 390 392 ifeq ($(has_bfd),y) 391 393 EXTLIBS += -lbfd 392 394 else ifeq ($(has_bfd_iberty),y) 393 395 EXTLIBS += -lbfd -liberty 396 + else ifeq ($(has_bfd_iberty_z),y) 397 + EXTLIBS += -lbfd -liberty -lz 394 398 else 395 399 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) 396 400 BASIC_CFLAGS += -DNO_DEMANGLE

+8 -4

tools/perf/builtin-record.c

··· 525 525 signal(SIGCHLD, sig_handler); 526 526 signal(SIGINT, sig_handler); 527 527 528 - if (!stat(output_name, &st) && !force && !append_file) { 529 - fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", 530 - output_name); 531 - exit(-1); 528 + if (!stat(output_name, &st) && st.st_size) { 529 + if (!force && !append_file) { 530 + fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", 531 + output_name); 532 + exit(-1); 533 + } 534 + } else { 535 + append_file = 0; 532 536 } 533 537 534 538 flags = O_CREAT|O_RDWR;

+89 -10

tools/perf/builtin-report.c

··· 68 68 69 69 static 70 70 struct callchain_param callchain_param = { 71 - .mode = CHAIN_GRAPH_ABS, 71 + .mode = CHAIN_GRAPH_REL, 72 72 .min_percent = 0.5 73 73 }; 74 74 ··· 112 112 struct perf_event_header header; 113 113 u32 pid,tid; 114 114 u64 value; 115 - u64 format[3]; 115 + u64 time_enabled; 116 + u64 time_running; 117 + u64 id; 116 118 }; 117 119 118 120 typedef union event_union { ··· 700 698 size_t ret = 0; 701 699 702 700 if (verbose) 703 - ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); 701 + ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, 702 + dso__symtab_origin(self->dso)); 704 703 705 704 ret += repsep_fprintf(fp, "[%c] ", self->level); 706 705 if (self->sym) { ··· 891 888 return ret; 892 889 } 893 890 891 + static struct symbol *rem_sq_bracket; 892 + static struct callchain_list rem_hits; 893 + 894 + static void init_rem_hits(void) 895 + { 896 + rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); 897 + if (!rem_sq_bracket) { 898 + fprintf(stderr, "Not enough memory to display remaining hits\n"); 899 + return; 900 + } 901 + 902 + strcpy(rem_sq_bracket->name, "[...]"); 903 + rem_hits.sym = rem_sq_bracket; 904 + } 905 + 894 906 static size_t 895 907 callchain__fprintf_graph(FILE *fp, struct callchain_node *self, 896 908 u64 total_samples, int depth, int depth_mask) ··· 915 897 struct callchain_list *chain; 916 898 int new_depth_mask = depth_mask; 917 899 u64 new_total; 900 + u64 remaining; 918 901 size_t ret = 0; 919 902 int i; 920 903 921 904 if (callchain_param.mode == CHAIN_GRAPH_REL) 922 - new_total = self->cumul_hit; 905 + new_total = self->children_hit; 923 906 else 924 907 new_total = total_samples; 925 908 909 + remaining = new_total; 910 + 926 911 node = rb_first(&self->rb_root); 927 912 while (node) { 913 + u64 cumul; 914 + 928 915 child = rb_entry(node, struct callchain_node, rb_node); 916 + cumul = cumul_hits(child); 917 + remaining -= cumul; 929 918 930 919 /* 931 920 * The depth mask manages the output of pipes that show 932 921 * the depth. We don't want to keep the pipes of the current 933 - * level for the last child of this depth 922 + * level for the last child of this depth. 923 + * Except if we have remaining filtered hits. They will 924 + * supersede the last child 934 925 */ 935 926 next = rb_next(node); 936 - if (!next) 927 + if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) 937 928 new_depth_mask &= ~(1 << (depth - 1)); 938 929 939 930 /* ··· 957 930 ret += ipchain__fprintf_graph(fp, chain, depth, 958 931 new_depth_mask, i++, 959 932 new_total, 960 - child->cumul_hit); 933 + cumul); 961 934 } 962 935 ret += callchain__fprintf_graph(fp, child, new_total, 963 936 depth + 1, 964 937 new_depth_mask | (1 << depth)); 965 938 node = next; 939 + } 940 + 941 + if (callchain_param.mode == CHAIN_GRAPH_REL && 942 + remaining && remaining != new_total) { 943 + 944 + if (!rem_sq_bracket) 945 + return ret; 946 + 947 + new_depth_mask &= ~(1 << (depth - 1)); 948 + 949 + ret += ipchain__fprintf_graph(fp, &rem_hits, depth, 950 + new_depth_mask, 0, new_total, 951 + remaining); 966 952 } 967 953 968 954 return ret; ··· 1398 1358 unsigned int width; 1399 1359 char *col_width = col_width_list_str; 1400 1360 1361 + init_rem_hits(); 1362 + 1401 1363 fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); 1402 1364 fprintf(fp, "#\n"); 1403 1365 ··· 1470 1428 fprintf(fp, "#\n"); 1471 1429 } 1472 1430 fprintf(fp, "\n"); 1431 + 1432 + free(rem_sq_bracket); 1473 1433 1474 1434 return ret; 1475 1435 } ··· 1734 1690 dprintf(".\n"); 1735 1691 } 1736 1692 1693 + static struct perf_header *header; 1694 + 1695 + static struct perf_counter_attr *perf_header__find_attr(u64 id) 1696 + { 1697 + int i; 1698 + 1699 + for (i = 0; i < header->attrs; i++) { 1700 + struct perf_header_attr *attr = header->attr[i]; 1701 + int j; 1702 + 1703 + for (j = 0; j < attr->ids; j++) { 1704 + if (attr->id[j] == id) 1705 + return &attr->attr; 1706 + } 1707 + } 1708 + 1709 + return NULL; 1710 + } 1711 + 1737 1712 static int 1738 1713 process_read_event(event_t *event, unsigned long offset, unsigned long head) 1739 1714 { 1740 - dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", 1715 + struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); 1716 + 1717 + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", 1741 1718 (void *)(offset + head), 1742 1719 (void *)(long)(event->header.size), 1743 1720 event->read.pid, 1744 1721 event->read.tid, 1722 + attr ? __event_name(attr->type, attr->config) 1723 + : "FAIL", 1745 1724 event->read.value); 1746 1725 1747 1726 return 0; ··· 1809 1742 1810 1743 return 0; 1811 1744 } 1812 - 1813 - static struct perf_header *header; 1814 1745 1815 1746 static u64 perf_header__sample_type(void) 1816 1747 { ··· 1877 1812 " -g?\n"); 1878 1813 exit(-1); 1879 1814 } 1815 + } else if (callchain_param.mode != CHAIN_NONE && !callchain) { 1816 + callchain = 1; 1817 + if (register_callchain_param(&callchain_param) < 0) { 1818 + fprintf(stderr, "Can't register callchain" 1819 + " params\n"); 1820 + exit(-1); 1821 + } 1880 1822 } 1881 1823 1882 1824 if (load_kernel() < 0) { ··· 2021 1949 2022 1950 else if (!strncmp(tok, "fractal", strlen(arg))) 2023 1951 callchain_param.mode = CHAIN_GRAPH_REL; 1952 + 1953 + else if (!strncmp(tok, "none", strlen(arg))) { 1954 + callchain_param.mode = CHAIN_NONE; 1955 + callchain = 0; 1956 + 1957 + return 0; 1958 + } 2024 1959 2025 1960 else 2026 1961 return -1;

+1 -1

tools/perf/builtin-stat.c

··· 496 496 "stat events on existing pid"), 497 497 OPT_BOOLEAN('a', "all-cpus", &system_wide, 498 498 "system-wide collection from all CPUs"), 499 - OPT_BOOLEAN('S', "scale", &scale, 499 + OPT_BOOLEAN('c', "scale", &scale, 500 500 "scale/normalize counters"), 501 501 OPT_BOOLEAN('v', "verbose", &verbose, 502 502 "be more verbose (show counter open errors, etc)"),

+508 -44

tools/perf/builtin-top.c

··· 31 31 #include <fcntl.h> 32 32 33 33 #include <stdio.h> 34 + #include <termios.h> 35 + #include <unistd.h> 34 36 35 37 #include <errno.h> 36 38 #include <time.h> ··· 56 54 57 55 static int default_interval = 100000; 58 56 59 - static u64 count_filter = 5; 57 + static int count_filter = 5; 60 58 static int print_entries = 15; 61 59 62 60 static int target_pid = -1; ··· 71 69 static int verbose = 0; 72 70 static char *vmlinux = NULL; 73 71 74 - static char *sym_filter; 75 - static unsigned long filter_start; 76 - static unsigned long filter_end; 77 - 78 72 static int delay_secs = 2; 79 73 static int zero; 80 74 static int dump_symtab; 75 + 76 + /* 77 + * Source 78 + */ 79 + 80 + struct source_line { 81 + u64 eip; 82 + unsigned long count[MAX_COUNTERS]; 83 + char *line; 84 + struct source_line *next; 85 + }; 86 + 87 + static char *sym_filter = NULL; 88 + struct sym_entry *sym_filter_entry = NULL; 89 + static int sym_pcnt_filter = 5; 90 + static int sym_counter = 0; 91 + static int display_weighted = -1; 81 92 82 93 /* 83 94 * Symbols ··· 106 91 unsigned long snap_count; 107 92 double weight; 108 93 int skip; 94 + struct source_line *source; 95 + struct source_line *lines; 96 + struct source_line **lines_tail; 97 + pthread_mutex_t source_lock; 109 98 }; 110 99 111 - struct sym_entry *sym_filter_entry; 100 + /* 101 + * Source functions 102 + */ 103 + 104 + static void parse_source(struct sym_entry *syme) 105 + { 106 + struct symbol *sym; 107 + struct module *module; 108 + struct section *section = NULL; 109 + FILE *file; 110 + char command[PATH_MAX*2], *path = vmlinux; 111 + u64 start, end, len; 112 + 113 + if (!syme) 114 + return; 115 + 116 + if (syme->lines) { 117 + pthread_mutex_lock(&syme->source_lock); 118 + goto out_assign; 119 + } 120 + 121 + sym = (struct symbol *)(syme + 1); 122 + module = sym->module; 123 + 124 + if (module) 125 + path = module->path; 126 + if (!path) 127 + return; 128 + 129 + start = sym->obj_start; 130 + if (!start) 131 + start = sym->start; 132 + 133 + if (module) { 134 + section = module->sections->find_section(module->sections, ".text"); 135 + if (section) 136 + start -= section->vma; 137 + } 138 + 139 + end = start + sym->end - sym->start + 1; 140 + len = sym->end - sym->start; 141 + 142 + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); 143 + 144 + file = popen(command, "r"); 145 + if (!file) 146 + return; 147 + 148 + pthread_mutex_lock(&syme->source_lock); 149 + syme->lines_tail = &syme->lines; 150 + while (!feof(file)) { 151 + struct source_line *src; 152 + size_t dummy = 0; 153 + char *c; 154 + 155 + src = malloc(sizeof(struct source_line)); 156 + assert(src != NULL); 157 + memset(src, 0, sizeof(struct source_line)); 158 + 159 + if (getline(&src->line, &dummy, file) < 0) 160 + break; 161 + if (!src->line) 162 + break; 163 + 164 + c = strchr(src->line, '\n'); 165 + if (c) 166 + *c = 0; 167 + 168 + src->next = NULL; 169 + *syme->lines_tail = src; 170 + syme->lines_tail = &src->next; 171 + 172 + if (strlen(src->line)>8 && src->line[8] == ':') { 173 + src->eip = strtoull(src->line, NULL, 16); 174 + if (section) 175 + src->eip += section->vma; 176 + } 177 + if (strlen(src->line)>8 && src->line[16] == ':') { 178 + src->eip = strtoull(src->line, NULL, 16); 179 + if (section) 180 + src->eip += section->vma; 181 + } 182 + } 183 + pclose(file); 184 + out_assign: 185 + sym_filter_entry = syme; 186 + pthread_mutex_unlock(&syme->source_lock); 187 + } 188 + 189 + static void __zero_source_counters(struct sym_entry *syme) 190 + { 191 + int i; 192 + struct source_line *line; 193 + 194 + line = syme->lines; 195 + while (line) { 196 + for (i = 0; i < nr_counters; i++) 197 + line->count[i] = 0; 198 + line = line->next; 199 + } 200 + } 201 + 202 + static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 203 + { 204 + struct source_line *line; 205 + 206 + if (syme != sym_filter_entry) 207 + return; 208 + 209 + if (pthread_mutex_trylock(&syme->source_lock)) 210 + return; 211 + 212 + if (!syme->source) 213 + goto out_unlock; 214 + 215 + for (line = syme->lines; line; line = line->next) { 216 + if (line->eip == ip) { 217 + line->count[counter]++; 218 + break; 219 + } 220 + if (line->eip > ip) 221 + break; 222 + } 223 + out_unlock: 224 + pthread_mutex_unlock(&syme->source_lock); 225 + } 226 + 227 + static void lookup_sym_source(struct sym_entry *syme) 228 + { 229 + struct symbol *symbol = (struct symbol *)(syme + 1); 230 + struct source_line *line; 231 + char pattern[PATH_MAX]; 232 + char *idx; 233 + 234 + sprintf(pattern, "<%s>:", symbol->name); 235 + 236 + if (symbol->module) { 237 + idx = strstr(pattern, "\t"); 238 + if (idx) 239 + *idx = 0; 240 + } 241 + 242 + pthread_mutex_lock(&syme->source_lock); 243 + for (line = syme->lines; line; line = line->next) { 244 + if (strstr(line->line, pattern)) { 245 + syme->source = line; 246 + break; 247 + } 248 + } 249 + pthread_mutex_unlock(&syme->source_lock); 250 + } 251 + 252 + static void show_lines(struct source_line *queue, int count, int total) 253 + { 254 + int i; 255 + struct source_line *line; 256 + 257 + line = queue; 258 + for (i = 0; i < count; i++) { 259 + float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; 260 + 261 + printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 262 + line = line->next; 263 + } 264 + } 265 + 266 + #define TRACE_COUNT 3 267 + 268 + static void show_details(struct sym_entry *syme) 269 + { 270 + struct symbol *symbol; 271 + struct source_line *line; 272 + struct source_line *line_queue = NULL; 273 + int displayed = 0; 274 + int line_queue_count = 0, total = 0, more = 0; 275 + 276 + if (!syme) 277 + return; 278 + 279 + if (!syme->source) 280 + lookup_sym_source(syme); 281 + 282 + if (!syme->source) 283 + return; 284 + 285 + symbol = (struct symbol *)(syme + 1); 286 + printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 287 + printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 288 + 289 + pthread_mutex_lock(&syme->source_lock); 290 + line = syme->source; 291 + while (line) { 292 + total += line->count[sym_counter]; 293 + line = line->next; 294 + } 295 + 296 + line = syme->source; 297 + while (line) { 298 + float pcnt = 0.0; 299 + 300 + if (!line_queue_count) 301 + line_queue = line; 302 + line_queue_count++; 303 + 304 + if (line->count[sym_counter]) 305 + pcnt = 100.0 * line->count[sym_counter] / (float)total; 306 + if (pcnt >= (float)sym_pcnt_filter) { 307 + if (displayed <= print_entries) 308 + show_lines(line_queue, line_queue_count, total); 309 + else more++; 310 + displayed += line_queue_count; 311 + line_queue_count = 0; 312 + line_queue = NULL; 313 + } else if (line_queue_count > TRACE_COUNT) { 314 + line_queue = line_queue->next; 315 + line_queue_count--; 316 + } 317 + 318 + line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; 319 + line = line->next; 320 + } 321 + pthread_mutex_unlock(&syme->source_lock); 322 + if (more) 323 + printf("%d lines not displayed, maybe increase display entries [e]\n", more); 324 + } 112 325 113 326 struct dso *kernel_dso; 114 327 ··· 354 111 { 355 112 double weight = sym->snap_count; 356 113 int counter; 114 + 115 + if (!display_weighted) 116 + return weight; 357 117 358 118 for (counter = 1; counter < nr_counters-1; counter++) 359 119 weight *= sym->count[counter]; ··· 405 159 static void print_sym_table(void) 406 160 { 407 161 int printed = 0, j; 408 - int counter; 162 + int counter, snap = !display_weighted ? sym_counter : 0; 409 163 float samples_per_sec = samples/delay_secs; 410 164 float ksamples_per_sec = (samples-userspace_samples)/delay_secs; 411 165 float sum_ksamples = 0.0; ··· 421 175 pthread_mutex_unlock(&active_symbols_lock); 422 176 423 177 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 424 - syme->snap_count = syme->count[0]; 178 + syme->snap_count = syme->count[snap]; 425 179 if (syme->snap_count != 0) { 426 180 syme->weight = sym_weight(syme); 427 181 rb_insert_active_sym(&tmp, syme); ··· 441 195 samples_per_sec, 442 196 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 443 197 444 - if (nr_counters == 1) { 198 + if (nr_counters == 1 || !display_weighted) { 445 199 printf("%Ld", (u64)attrs[0].sample_period); 446 200 if (freq) 447 201 printf("Hz "); ··· 449 203 printf(" "); 450 204 } 451 205 452 - for (counter = 0; counter < nr_counters; counter++) { 206 + if (!display_weighted) 207 + printf("%s", event_name(sym_counter)); 208 + else for (counter = 0; counter < nr_counters; counter++) { 453 209 if (counter) 454 210 printf("/"); 455 211 ··· 476 228 477 229 printf("------------------------------------------------------------------------------\n\n"); 478 230 231 + if (sym_filter_entry) { 232 + show_details(sym_filter_entry); 233 + return; 234 + } 235 + 479 236 if (nr_counters == 1) 480 237 printf(" samples pcnt"); 481 238 else ··· 495 242 struct symbol *sym = (struct symbol *)(syme + 1); 496 243 double pcnt; 497 244 498 - if (++printed > print_entries || syme->snap_count < count_filter) 245 + if (++printed > print_entries || (int)syme->snap_count < count_filter) 499 246 continue; 500 247 501 248 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 502 249 sum_ksamples)); 503 250 504 - if (nr_counters == 1) 251 + if (nr_counters == 1 || !display_weighted) 505 252 printf("%20.2f - ", syme->weight); 506 253 else 507 254 printf("%9.1f %10ld - ", syme->weight, syme->snap_count); ··· 514 261 } 515 262 } 516 263 264 + static void prompt_integer(int *target, const char *msg) 265 + { 266 + char *buf = malloc(0), *p; 267 + size_t dummy = 0; 268 + int tmp; 269 + 270 + fprintf(stdout, "\n%s: ", msg); 271 + if (getline(&buf, &dummy, stdin) < 0) 272 + return; 273 + 274 + p = strchr(buf, '\n'); 275 + if (p) 276 + *p = 0; 277 + 278 + p = buf; 279 + while(*p) { 280 + if (!isdigit(*p)) 281 + goto out_free; 282 + p++; 283 + } 284 + tmp = strtoul(buf, NULL, 10); 285 + *target = tmp; 286 + out_free: 287 + free(buf); 288 + } 289 + 290 + static void prompt_percent(int *target, const char *msg) 291 + { 292 + int tmp = 0; 293 + 294 + prompt_integer(&tmp, msg); 295 + if (tmp >= 0 && tmp <= 100) 296 + *target = tmp; 297 + } 298 + 299 + static void prompt_symbol(struct sym_entry **target, const char *msg) 300 + { 301 + char *buf = malloc(0), *p; 302 + struct sym_entry *syme = *target, *n, *found = NULL; 303 + size_t dummy = 0; 304 + 305 + /* zero counters of active symbol */ 306 + if (syme) { 307 + pthread_mutex_lock(&syme->source_lock); 308 + __zero_source_counters(syme); 309 + *target = NULL; 310 + pthread_mutex_unlock(&syme->source_lock); 311 + } 312 + 313 + fprintf(stdout, "\n%s: ", msg); 314 + if (getline(&buf, &dummy, stdin) < 0) 315 + goto out_free; 316 + 317 + p = strchr(buf, '\n'); 318 + if (p) 319 + *p = 0; 320 + 321 + pthread_mutex_lock(&active_symbols_lock); 322 + syme = list_entry(active_symbols.next, struct sym_entry, node); 323 + pthread_mutex_unlock(&active_symbols_lock); 324 + 325 + list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 326 + struct symbol *sym = (struct symbol *)(syme + 1); 327 + 328 + if (!strcmp(buf, sym->name)) { 329 + found = syme; 330 + break; 331 + } 332 + } 333 + 334 + if (!found) { 335 + fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); 336 + sleep(1); 337 + return; 338 + } else 339 + parse_source(found); 340 + 341 + out_free: 342 + free(buf); 343 + } 344 + 345 + static void print_mapped_keys(void) 346 + { 347 + char *name = NULL; 348 + 349 + if (sym_filter_entry) { 350 + struct symbol *sym = (struct symbol *)(sym_filter_entry+1); 351 + name = sym->name; 352 + } 353 + 354 + fprintf(stdout, "\nMapped keys:\n"); 355 + fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 356 + fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 357 + 358 + if (nr_counters > 1) 359 + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); 360 + 361 + fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 362 + 363 + if (vmlinux) { 364 + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 365 + fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 366 + fprintf(stdout, "\t[S] stop annotation.\n"); 367 + } 368 + 369 + if (nr_counters > 1) 370 + fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 371 + 372 + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 373 + fprintf(stdout, "\t[qQ] quit.\n"); 374 + } 375 + 376 + static int key_mapped(int c) 377 + { 378 + switch (c) { 379 + case 'd': 380 + case 'e': 381 + case 'f': 382 + case 'z': 383 + case 'q': 384 + case 'Q': 385 + return 1; 386 + case 'E': 387 + case 'w': 388 + return nr_counters > 1 ? 1 : 0; 389 + case 'F': 390 + case 's': 391 + case 'S': 392 + return vmlinux ? 1 : 0; 393 + } 394 + 395 + return 0; 396 + } 397 + 398 + static void handle_keypress(int c) 399 + { 400 + if (!key_mapped(c)) { 401 + struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 402 + struct termios tc, save; 403 + 404 + print_mapped_keys(); 405 + fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 406 + fflush(stdout); 407 + 408 + tcgetattr(0, &save); 409 + tc = save; 410 + tc.c_lflag &= ~(ICANON | ECHO); 411 + tc.c_cc[VMIN] = 0; 412 + tc.c_cc[VTIME] = 0; 413 + tcsetattr(0, TCSANOW, &tc); 414 + 415 + poll(&stdin_poll, 1, -1); 416 + c = getc(stdin); 417 + 418 + tcsetattr(0, TCSAFLUSH, &save); 419 + if (!key_mapped(c)) 420 + return; 421 + } 422 + 423 + switch (c) { 424 + case 'd': 425 + prompt_integer(&delay_secs, "Enter display delay"); 426 + break; 427 + case 'e': 428 + prompt_integer(&print_entries, "Enter display entries (lines)"); 429 + break; 430 + case 'E': 431 + if (nr_counters > 1) { 432 + int i; 433 + 434 + fprintf(stderr, "\nAvailable events:"); 435 + for (i = 0; i < nr_counters; i++) 436 + fprintf(stderr, "\n\t%d %s", i, event_name(i)); 437 + 438 + prompt_integer(&sym_counter, "Enter details event counter"); 439 + 440 + if (sym_counter >= nr_counters) { 441 + fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); 442 + sym_counter = 0; 443 + sleep(1); 444 + } 445 + } else sym_counter = 0; 446 + break; 447 + case 'f': 448 + prompt_integer(&count_filter, "Enter display event count filter"); 449 + break; 450 + case 'F': 451 + prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 452 + break; 453 + case 'q': 454 + case 'Q': 455 + printf("exiting.\n"); 456 + exit(0); 457 + case 's': 458 + prompt_symbol(&sym_filter_entry, "Enter details symbol"); 459 + break; 460 + case 'S': 461 + if (!sym_filter_entry) 462 + break; 463 + else { 464 + struct sym_entry *syme = sym_filter_entry; 465 + 466 + pthread_mutex_lock(&syme->source_lock); 467 + sym_filter_entry = NULL; 468 + __zero_source_counters(syme); 469 + pthread_mutex_unlock(&syme->source_lock); 470 + } 471 + break; 472 + case 'w': 473 + display_weighted = ~display_weighted; 474 + break; 475 + case 'z': 476 + zero = ~zero; 477 + break; 478 + } 479 + } 480 + 517 481 static void *display_thread(void *arg __used) 518 482 { 519 483 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 520 - int delay_msecs = delay_secs * 1000; 484 + struct termios tc, save; 485 + int delay_msecs, c; 521 486 522 - printf("PerfTop refresh period: %d seconds\n", delay_secs); 487 + tcgetattr(0, &save); 488 + tc = save; 489 + tc.c_lflag &= ~(ICANON | ECHO); 490 + tc.c_cc[VMIN] = 0; 491 + tc.c_cc[VTIME] = 0; 492 + 493 + repeat: 494 + delay_msecs = delay_secs * 1000; 495 + tcsetattr(0, TCSANOW, &tc); 496 + /* trash return*/ 497 + getc(stdin); 523 498 524 499 do { 525 500 print_sym_table(); 526 501 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 527 502 528 - printf("key pressed - exiting.\n"); 529 - exit(0); 503 + c = getc(stdin); 504 + tcsetattr(0, TCSAFLUSH, &save); 505 + 506 + handle_keypress(c); 507 + goto repeat; 530 508 531 509 return NULL; 532 510 } ··· 777 293 778 294 static int symbol_filter(struct dso *self, struct symbol *sym) 779 295 { 780 - static int filter_match; 781 296 struct sym_entry *syme; 782 297 const char *name = sym->name; 783 298 int i; ··· 798 315 return 1; 799 316 800 317 syme = dso__sym_priv(self, sym); 318 + pthread_mutex_init(&syme->source_lock, NULL); 319 + if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) 320 + sym_filter_entry = syme; 321 + 801 322 for (i = 0; skip_symbols[i]; i++) { 802 323 if (!strcmp(skip_symbols[i], name)) { 803 324 syme->skip = 1; 804 325 break; 805 326 } 806 327 } 807 - 808 - if (filter_match == 1) { 809 - filter_end = sym->start; 810 - filter_match = -1; 811 - if (filter_end - filter_start > 10000) { 812 - fprintf(stderr, 813 - "hm, too large filter symbol <%s> - skipping.\n", 814 - sym_filter); 815 - fprintf(stderr, "symbol filter start: %016lx\n", 816 - filter_start); 817 - fprintf(stderr, " end: %016lx\n", 818 - filter_end); 819 - filter_end = filter_start = 0; 820 - sym_filter = NULL; 821 - sleep(1); 822 - } 823 - } 824 - 825 - if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) { 826 - filter_match = 1; 827 - filter_start = sym->start; 828 - } 829 - 830 328 831 329 return 0; 832 330 } ··· 844 380 return -1; 845 381 } 846 382 847 - #define TRACE_COUNT 3 848 - 849 383 /* 850 384 * Binary search in the histogram table and record the hit: 851 385 */ ··· 856 394 857 395 if (!syme->skip) { 858 396 syme->count[counter]++; 397 + record_precise_ip(syme, counter, ip); 859 398 pthread_mutex_lock(&active_symbols_lock); 860 399 if (list_empty(&syme->node) || !syme->node.next) 861 400 __list_insert_active_sym(syme); ··· 1153 690 "put the counters into a counter group"), 1154 691 OPT_BOOLEAN('i', "inherit", &inherit, 1155 692 "child tasks inherit counters"), 1156 - OPT_STRING('s', "sym-filter", &sym_filter, "pattern", 1157 - "only display symbols matchig this pattern"), 693 + OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 694 + "symbol to annotate - requires -k option"), 1158 695 OPT_BOOLEAN('z', "zero", &zero, 1159 696 "zero history across updates"), 1160 697 OPT_INTEGER('F', "freq", &freq, ··· 1197 734 delay_secs = 1; 1198 735 1199 736 parse_symbols(); 737 + parse_source(sym_filter_entry); 1200 738 1201 739 /* 1202 740 * Fill in the ones not specifically initialized via -c:

+20 -12

tools/perf/util/callchain.c

··· 13 13 #include <stdio.h> 14 14 #include <stdbool.h> 15 15 #include <errno.h> 16 + #include <math.h> 16 17 17 18 #include "callchain.h" 18 19 ··· 27 26 struct rb_node **p = &root->rb_node; 28 27 struct rb_node *parent = NULL; 29 28 struct callchain_node *rnode; 29 + u64 chain_cumul = cumul_hits(chain); 30 30 31 31 while (*p) { 32 + u64 rnode_cumul; 33 + 32 34 parent = *p; 33 35 rnode = rb_entry(parent, struct callchain_node, rb_node); 36 + rnode_cumul = cumul_hits(rnode); 34 37 35 38 switch (mode) { 36 39 case CHAIN_FLAT: ··· 45 40 break; 46 41 case CHAIN_GRAPH_ABS: /* Falldown */ 47 42 case CHAIN_GRAPH_REL: 48 - if (rnode->cumul_hit < chain->cumul_hit) 43 + if (rnode_cumul < chain_cumul) 49 44 p = &(*p)->rb_left; 50 45 else 51 46 p = &(*p)->rb_right; ··· 92 87 93 88 chain_for_each_child(child, node) { 94 89 __sort_chain_graph_abs(child, min_hit); 95 - if (child->cumul_hit >= min_hit) 90 + if (cumul_hits(child) >= min_hit) 96 91 rb_insert_callchain(&node->rb_root, child, 97 92 CHAIN_GRAPH_ABS); 98 93 } ··· 113 108 u64 min_hit; 114 109 115 110 node->rb_root = RB_ROOT; 116 - min_hit = node->cumul_hit * min_percent / 100.0; 111 + min_hit = ceil(node->children_hit * min_percent); 117 112 118 113 chain_for_each_child(child, node) { 119 114 __sort_chain_graph_rel(child, min_percent); 120 - if (child->cumul_hit >= min_hit) 115 + if (cumul_hits(child) >= min_hit) 121 116 rb_insert_callchain(&node->rb_root, child, 122 117 CHAIN_GRAPH_REL); 123 118 } ··· 127 122 sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, 128 123 u64 min_hit __used, struct callchain_param *param) 129 124 { 130 - __sort_chain_graph_rel(chain_root, param->min_percent); 125 + __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); 131 126 rb_root->rb_node = chain_root->rb_root.rb_node; 132 127 } 133 128 ··· 216 211 new = create_child(parent, false); 217 212 fill_node(new, chain, start, syms); 218 213 219 - new->cumul_hit = new->hit = 1; 214 + new->children_hit = 0; 215 + new->hit = 1; 220 216 } 221 217 222 218 /* ··· 247 241 248 242 /* split the hits */ 249 243 new->hit = parent->hit; 250 - new->cumul_hit = parent->cumul_hit; 244 + new->children_hit = parent->children_hit; 245 + parent->children_hit = cumul_hits(new); 251 246 new->val_nr = parent->val_nr - idx_local; 252 247 parent->val_nr = idx_local; 253 248 ··· 256 249 if (idx_total < chain->nr) { 257 250 parent->hit = 0; 258 251 add_child(parent, chain, idx_total, syms); 252 + parent->children_hit++; 259 253 } else { 260 254 parent->hit = 1; 261 255 } ··· 277 269 unsigned int ret = __append_chain(rnode, chain, start, syms); 278 270 279 271 if (!ret) 280 - goto cumul; 272 + goto inc_children_hit; 281 273 } 282 274 /* nothing in children, add to the current node */ 283 275 add_child(root, chain, start, syms); 284 276 285 - cumul: 286 - root->cumul_hit++; 277 + inc_children_hit: 278 + root->children_hit++; 287 279 } 288 280 289 281 static int ··· 325 317 /* we match 100% of the path, increment the hit */ 326 318 if (i - start == root->val_nr && i == chain->nr) { 327 319 root->hit++; 328 - root->cumul_hit++; 329 - 330 320 return 0; 331 321 } 332 322 ··· 337 331 void append_chain(struct callchain_node *root, struct ip_callchain *chain, 338 332 struct symbol **syms) 339 333 { 334 + if (!chain->nr) 335 + return; 340 336 __append_chain_children(root, chain, syms, 0); 341 337 }

+7 -1

tools/perf/util/callchain.h

··· 7 7 #include "symbol.h" 8 8 9 9 enum chain_mode { 10 + CHAIN_NONE, 10 11 CHAIN_FLAT, 11 12 CHAIN_GRAPH_ABS, 12 13 CHAIN_GRAPH_REL ··· 22 21 struct rb_root rb_root; /* sorted tree of children */ 23 22 unsigned int val_nr; 24 23 u64 hit; 25 - u64 cumul_hit; /* hit + hits of children */ 24 + u64 children_hit; 26 25 }; 27 26 28 27 struct callchain_param; ··· 47 46 INIT_LIST_HEAD(&node->brothers); 48 47 INIT_LIST_HEAD(&node->children); 49 48 INIT_LIST_HEAD(&node->val); 49 + } 50 + 51 + static inline u64 cumul_hits(struct callchain_node *node) 52 + { 53 + return node->hit + node->children_hit; 50 54 } 51 55 52 56 int register_callchain_param(struct callchain_param *param);

+4 -1

tools/perf/util/header.c

··· 185 185 186 186 if (ret < 0) 187 187 die("failed to read"); 188 + if (ret == 0) 189 + die("failed to read: missing data"); 188 190 189 191 size -= ret; 190 192 buf += ret; ··· 215 213 216 214 for (i = 0; i < nr_attrs; i++) { 217 215 struct perf_header_attr *attr; 218 - off_t tmp = lseek(fd, 0, SEEK_CUR); 216 + off_t tmp; 219 217 220 218 do_read(fd, &f_attr, sizeof(f_attr)); 219 + tmp = lseek(fd, 0, SEEK_CUR); 221 220 222 221 attr = perf_header_attr__new(&f_attr.attr); 223 222

+24 -2

tools/perf/util/parse-events.c

··· 121 121 (strcmp(sys_dirent.d_name, ".")) && \ 122 122 (strcmp(sys_dirent.d_name, ".."))) 123 123 124 + static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) 125 + { 126 + char evt_path[MAXPATHLEN]; 127 + int fd; 128 + 129 + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, 130 + sys_dir->d_name, evt_dir->d_name); 131 + fd = open(evt_path, O_RDONLY); 132 + if (fd < 0) 133 + return -EINVAL; 134 + close(fd); 135 + 136 + return 0; 137 + } 138 + 124 139 #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ 125 140 while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ 126 141 if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ 127 142 sys_dirent.d_name, evt_dirent.d_name) && \ 128 143 (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ 129 144 (strcmp(evt_dirent.d_name, ".")) && \ 130 - (strcmp(evt_dirent.d_name, ".."))) 145 + (strcmp(evt_dirent.d_name, "..")) && \ 146 + (!tp_event_has_id(&sys_dirent, &evt_dirent))) 131 147 132 148 #define MAX_EVENT_LENGTH 30 133 149 ··· 239 223 { 240 224 u64 config = attrs[counter].config; 241 225 int type = attrs[counter].type; 226 + 227 + return __event_name(type, config); 228 + } 229 + 230 + char *__event_name(int type, u64 config) 231 + { 242 232 static char buf[32]; 243 233 244 - if (attrs[counter].type == PERF_TYPE_RAW) { 234 + if (type == PERF_TYPE_RAW) { 245 235 sprintf(buf, "raw 0x%llx", config); 246 236 return buf; 247 237 }

+1

tools/perf/util/parse-events.h

··· 10 10 extern struct perf_counter_attr attrs[MAX_COUNTERS]; 11 11 12 12 extern char *event_name(int ctr); 13 + extern char *__event_name(int type, u64 config); 13 14 14 15 extern int parse_events(const struct option *opt, const char *str, int unset); 15 16

+46 -11

tools/perf/util/symbol.c

··· 24 24 #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ 25 25 #endif 26 26 27 + enum dso_origin { 28 + DSO__ORIG_KERNEL = 0, 29 + DSO__ORIG_JAVA_JIT, 30 + DSO__ORIG_FEDORA, 31 + DSO__ORIG_UBUNTU, 32 + DSO__ORIG_BUILDID, 33 + DSO__ORIG_DSO, 34 + DSO__ORIG_NOT_FOUND, 35 + }; 36 + 27 37 static struct symbol *symbol__new(u64 start, u64 len, 28 38 const char *name, unsigned int priv_size, 29 39 u64 obj_start, int verbose) ··· 91 81 self->sym_priv_size = sym_priv_size; 92 82 self->find_symbol = dso__find_symbol; 93 83 self->slen_calculated = 0; 84 + self->origin = DSO__ORIG_NOT_FOUND; 94 85 } 95 86 96 87 return self; ··· 721 710 ++raw; 722 711 bid += 2; 723 712 } 724 - if (verbose) 713 + if (verbose >= 2) 725 714 printf("%s(%s): %s\n", __func__, self->name, build_id); 726 715 out_elf_end: 727 716 elf_end(elf); ··· 731 720 return build_id; 732 721 } 733 722 723 + char dso__symtab_origin(const struct dso *self) 724 + { 725 + static const char origin[] = { 726 + [DSO__ORIG_KERNEL] = 'k', 727 + [DSO__ORIG_JAVA_JIT] = 'j', 728 + [DSO__ORIG_FEDORA] = 'f', 729 + [DSO__ORIG_UBUNTU] = 'u', 730 + [DSO__ORIG_BUILDID] = 'b', 731 + [DSO__ORIG_DSO] = 'd', 732 + }; 733 + 734 + if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) 735 + return '!'; 736 + return origin[self->origin]; 737 + } 738 + 734 739 int dso__load(struct dso *self, symbol_filter_t filter, int verbose) 735 740 { 736 741 int size = PATH_MAX; 737 742 char *name = malloc(size), *build_id = NULL; 738 - int variant = 0; 739 743 int ret = -1; 740 744 int fd; 741 745 ··· 759 733 760 734 self->adjust_symbols = 0; 761 735 762 - if (strncmp(self->name, "/tmp/perf-", 10) == 0) 763 - return dso__load_perf_map(self, filter, verbose); 736 + if (strncmp(self->name, "/tmp/perf-", 10) == 0) { 737 + ret = dso__load_perf_map(self, filter, verbose); 738 + self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : 739 + DSO__ORIG_NOT_FOUND; 740 + return ret; 741 + } 742 + 743 + self->origin = DSO__ORIG_FEDORA - 1; 764 744 765 745 more: 766 746 do { 767 - switch (variant) { 768 - case 0: /* Fedora */ 747 + self->origin++; 748 + switch (self->origin) { 749 + case DSO__ORIG_FEDORA: 769 750 snprintf(name, size, "/usr/lib/debug%s.debug", self->name); 770 751 break; 771 - case 1: /* Ubuntu */ 752 + case DSO__ORIG_UBUNTU: 772 753 snprintf(name, size, "/usr/lib/debug%s", self->name); 773 754 break; 774 - case 2: 755 + case DSO__ORIG_BUILDID: 775 756 build_id = dso__read_build_id(self, verbose); 776 757 if (build_id != NULL) { 777 758 snprintf(name, size, ··· 787 754 free(build_id); 788 755 break; 789 756 } 790 - variant++; 757 + self->origin++; 791 758 /* Fall thru */ 792 - case 3: /* Sane people */ 759 + case DSO__ORIG_DSO: 793 760 snprintf(name, size, "%s", self->name); 794 761 break; 795 762 796 763 default: 797 764 goto out; 798 765 } 799 - variant++; 800 766 801 767 fd = open(name, O_RDONLY); 802 768 } while (fd < 0); ··· 930 898 931 899 if (err <= 0) 932 900 err = dso__load_kallsyms(self, filter, verbose); 901 + 902 + if (err > 0) 903 + self->origin = DSO__ORIG_KERNEL; 933 904 934 905 return err; 935 906 }

+2

tools/perf/util/symbol.h

··· 26 26 unsigned int sym_priv_size; 27 27 unsigned char adjust_symbols; 28 28 unsigned char slen_calculated; 29 + unsigned char origin; 29 30 char name[0]; 30 31 }; 31 32 ··· 50 49 int dso__load(struct dso *self, symbol_filter_t filter, int verbose); 51 50 52 51 size_t dso__fprintf(struct dso *self, FILE *fp); 52 + char dso__symtab_origin(const struct dso *self); 53 53 54 54 void symbol__init(void); 55 55 #endif /* _PERF_SYMBOL_ */

Configure Feed

Configure Feed