Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'perf-counters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-perf

* 'perf-counters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-perf: (31 commits)
perf_counter tools: Give perf top inherit option
perf_counter tools: Fix vmlinux symbol generation breakage
perf_counter: Detect debugfs location
perf_counter: Add tracepoint support to perf list, perf stat
perf symbol: C++ demangling
perf: avoid structure size confusion by using a fixed size
perf_counter: Fix throttle/unthrottle event logging
perf_counter: Improve perf stat and perf record option parsing
perf_counter: PERF_SAMPLE_ID and inherited counters
perf_counter: Plug more stack leaks
perf: Fix stack data leak
perf_counter: Remove unused variables
perf_counter: Make call graph option consistent
perf_counter: Add perf record option to log addresses
perf_counter: Log vfork as a fork event
perf_counter: Synthesize VDSO mmap event
perf_counter: Make sure we dont leak kernel memory to userspace
perf_counter tools: Fix index boundary check
perf_counter: Fix the tracepoint channel to perfcounters
perf_counter, x86: Extend perf_counter Pentium M support
...

+949 -300
+233 -20
arch/x86/kernel/cpu/perf_counter.c
··· 66 66 }; 67 67 68 68 /* 69 + * Not sure about some of these 70 + */ 71 + static const u64 p6_perfmon_event_map[] = 72 + { 73 + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, 74 + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 75 + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, 76 + [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, 77 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 78 + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 79 + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, 80 + }; 81 + 82 + static u64 p6_pmu_event_map(int event) 83 + { 84 + return p6_perfmon_event_map[event]; 85 + } 86 + 87 + /* 88 + * Counter setting that is specified not to count anything. 89 + * We use this to effectively disable a counter. 90 + * 91 + * L2_RQSTS with 0 MESI unit mask. 92 + */ 93 + #define P6_NOP_COUNTER 0x0000002EULL 94 + 95 + static u64 p6_pmu_raw_event(u64 event) 96 + { 97 + #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL 98 + #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL 99 + #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL 100 + #define P6_EVNTSEL_INV_MASK 0x00800000ULL 101 + #define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL 102 + 103 + #define P6_EVNTSEL_MASK \ 104 + (P6_EVNTSEL_EVENT_MASK | \ 105 + P6_EVNTSEL_UNIT_MASK | \ 106 + P6_EVNTSEL_EDGE_MASK | \ 107 + P6_EVNTSEL_INV_MASK | \ 108 + P6_EVNTSEL_COUNTER_MASK) 109 + 110 + return event & P6_EVNTSEL_MASK; 111 + } 112 + 113 + 114 + /* 69 115 * Intel PerfMon v3. Used on Core2 and later. 70 116 */ 71 117 static const u64 intel_perfmon_event_map[] = ··· 712 666 { 713 667 struct perf_counter_attr *attr = &counter->attr; 714 668 struct hw_perf_counter *hwc = &counter->hw; 669 + u64 config; 715 670 int err; 716 671 717 672 if (!x86_pmu_initialized()) ··· 765 718 766 719 if (attr->config >= x86_pmu.max_events) 767 720 return -EINVAL; 721 + 768 722 /* 769 723 * The generic map: 770 724 */ 771 - hwc->config |= x86_pmu.event_map(attr->config); 725 + config = x86_pmu.event_map(attr->config); 726 + 727 + if (config == 0) 728 + return -ENOENT; 729 + 730 + if (config == -1LL) 731 + return -EINVAL; 732 + 733 + hwc->config |= config; 772 734 773 735 return 0; 736 + } 737 + 738 + static void p6_pmu_disable_all(void) 739 + { 740 + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 741 + u64 val; 742 + 743 + if (!cpuc->enabled) 744 + return; 745 + 746 + cpuc->enabled = 0; 747 + barrier(); 748 + 749 + /* p6 only has one enable register */ 750 + rdmsrl(MSR_P6_EVNTSEL0, val); 751 + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 752 + wrmsrl(MSR_P6_EVNTSEL0, val); 774 753 } 775 754 776 755 static void intel_pmu_disable_all(void) ··· 840 767 return x86_pmu.disable_all(); 841 768 } 842 769 770 + static void p6_pmu_enable_all(void) 771 + { 772 + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 773 + unsigned long val; 774 + 775 + if (cpuc->enabled) 776 + return; 777 + 778 + cpuc->enabled = 1; 779 + barrier(); 780 + 781 + /* p6 only has one enable register */ 782 + rdmsrl(MSR_P6_EVNTSEL0, val); 783 + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 784 + wrmsrl(MSR_P6_EVNTSEL0, val); 785 + } 786 + 843 787 static void intel_pmu_enable_all(void) 844 788 { 845 789 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); ··· 874 784 barrier(); 875 785 876 786 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 787 + struct perf_counter *counter = cpuc->counters[idx]; 877 788 u64 val; 878 789 879 790 if (!test_bit(idx, cpuc->active_mask)) 880 791 continue; 881 - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 882 - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) 883 - continue; 792 + 793 + val = counter->hw.config; 884 794 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 885 795 wrmsrl(MSR_K7_EVNTSEL0 + idx, val); 886 796 } ··· 909 819 910 820 static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 911 821 { 912 - int err; 913 - err = checking_wrmsrl(hwc->config_base + idx, 822 + (void)checking_wrmsrl(hwc->config_base + idx, 914 823 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); 915 824 } 916 825 917 826 static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 918 827 { 919 - int err; 920 - err = checking_wrmsrl(hwc->config_base + idx, 921 - hwc->config); 828 + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 922 829 } 923 830 924 831 static inline void ··· 923 836 { 924 837 int idx = __idx - X86_PMC_IDX_FIXED; 925 838 u64 ctrl_val, mask; 926 - int err; 927 839 928 840 mask = 0xfULL << (idx * 4); 929 841 930 842 rdmsrl(hwc->config_base, ctrl_val); 931 843 ctrl_val &= ~mask; 932 - err = checking_wrmsrl(hwc->config_base, ctrl_val); 844 + (void)checking_wrmsrl(hwc->config_base, ctrl_val); 845 + } 846 + 847 + static inline void 848 + p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 849 + { 850 + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 851 + u64 val = P6_NOP_COUNTER; 852 + 853 + if (cpuc->enabled) 854 + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 855 + 856 + (void)checking_wrmsrl(hwc->config_base + idx, val); 933 857 } 934 858 935 859 static inline void ··· 1041 943 err = checking_wrmsrl(hwc->config_base, ctrl_val); 1042 944 } 1043 945 946 + static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 947 + { 948 + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 949 + u64 val; 950 + 951 + val = hwc->config; 952 + if (cpuc->enabled) 953 + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 954 + 955 + (void)checking_wrmsrl(hwc->config_base + idx, val); 956 + } 957 + 958 + 1044 959 static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1045 960 { 1046 961 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { ··· 1070 959 1071 960 if (cpuc->enabled) 1072 961 x86_pmu_enable_counter(hwc, idx); 1073 - else 1074 - x86_pmu_disable_counter(hwc, idx); 1075 962 } 1076 963 1077 964 static int ··· 1285 1176 local_irq_restore(flags); 1286 1177 } 1287 1178 1179 + static int p6_pmu_handle_irq(struct pt_regs *regs) 1180 + { 1181 + struct perf_sample_data data; 1182 + struct cpu_hw_counters *cpuc; 1183 + struct perf_counter *counter; 1184 + struct hw_perf_counter *hwc; 1185 + int idx, handled = 0; 1186 + u64 val; 1187 + 1188 + data.regs = regs; 1189 + data.addr = 0; 1190 + 1191 + cpuc = &__get_cpu_var(cpu_hw_counters); 1192 + 1193 + for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1194 + if (!test_bit(idx, cpuc->active_mask)) 1195 + continue; 1196 + 1197 + counter = cpuc->counters[idx]; 1198 + hwc = &counter->hw; 1199 + 1200 + val = x86_perf_counter_update(counter, hwc, idx); 1201 + if (val & (1ULL << (x86_pmu.counter_bits - 1))) 1202 + continue; 1203 + 1204 + /* 1205 + * counter overflow 1206 + */ 1207 + handled = 1; 1208 + data.period = counter->hw.last_period; 1209 + 1210 + if (!x86_perf_counter_set_period(counter, hwc, idx)) 1211 + continue; 1212 + 1213 + if (perf_counter_overflow(counter, 1, &data)) 1214 + p6_pmu_disable_counter(hwc, idx); 1215 + } 1216 + 1217 + if (handled) 1218 + inc_irq_stat(apic_perf_irqs); 1219 + 1220 + return handled; 1221 + } 1288 1222 1289 1223 /* 1290 1224 * This handler is triggered by the local APIC, so the APIC IRQ handling ··· 1337 1185 { 1338 1186 struct perf_sample_data data; 1339 1187 struct cpu_hw_counters *cpuc; 1340 - int bit, cpu, loops; 1188 + int bit, loops; 1341 1189 u64 ack, status; 1342 1190 1343 1191 data.regs = regs; 1344 1192 data.addr = 0; 1345 1193 1346 - cpu = smp_processor_id(); 1347 - cpuc = &per_cpu(cpu_hw_counters, cpu); 1194 + cpuc = &__get_cpu_var(cpu_hw_counters); 1348 1195 1349 1196 perf_disable(); 1350 1197 status = intel_pmu_get_status(); ··· 1400 1249 struct cpu_hw_counters *cpuc; 1401 1250 struct perf_counter *counter; 1402 1251 struct hw_perf_counter *hwc; 1403 - int cpu, idx, handled = 0; 1252 + int idx, handled = 0; 1404 1253 u64 val; 1405 1254 1406 1255 data.regs = regs; 1407 1256 data.addr = 0; 1408 1257 1409 - cpu = smp_processor_id(); 1410 - cpuc = &per_cpu(cpu_hw_counters, cpu); 1258 + cpuc = &__get_cpu_var(cpu_hw_counters); 1411 1259 1412 1260 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1413 1261 if (!test_bit(idx, cpuc->active_mask)) ··· 1503 1353 .priority = 1 1504 1354 }; 1505 1355 1356 + static struct x86_pmu p6_pmu = { 1357 + .name = "p6", 1358 + .handle_irq = p6_pmu_handle_irq, 1359 + .disable_all = p6_pmu_disable_all, 1360 + .enable_all = p6_pmu_enable_all, 1361 + .enable = p6_pmu_enable_counter, 1362 + .disable = p6_pmu_disable_counter, 1363 + .eventsel = MSR_P6_EVNTSEL0, 1364 + .perfctr = MSR_P6_PERFCTR0, 1365 + .event_map = p6_pmu_event_map, 1366 + .raw_event = p6_pmu_raw_event, 1367 + .max_events = ARRAY_SIZE(p6_perfmon_event_map), 1368 + .max_period = (1ULL << 31) - 1, 1369 + .version = 0, 1370 + .num_counters = 2, 1371 + /* 1372 + * Counters have 40 bits implemented. However they are designed such 1373 + * that bits [32-39] are sign extensions of bit 31. As such the 1374 + * effective width of a counter for P6-like PMU is 32 bits only. 1375 + * 1376 + * See IA-32 Intel Architecture Software developer manual Vol 3B 1377 + */ 1378 + .counter_bits = 32, 1379 + .counter_mask = (1ULL << 32) - 1, 1380 + }; 1381 + 1506 1382 static struct x86_pmu intel_pmu = { 1507 1383 .name = "Intel", 1508 1384 .handle_irq = intel_pmu_handle_irq, ··· 1568 1392 .max_period = (1ULL << 47) - 1, 1569 1393 }; 1570 1394 1395 + static int p6_pmu_init(void) 1396 + { 1397 + switch (boot_cpu_data.x86_model) { 1398 + case 1: 1399 + case 3: /* Pentium Pro */ 1400 + case 5: 1401 + case 6: /* Pentium II */ 1402 + case 7: 1403 + case 8: 1404 + case 11: /* Pentium III */ 1405 + break; 1406 + case 9: 1407 + case 13: 1408 + /* Pentium M */ 1409 + break; 1410 + default: 1411 + pr_cont("unsupported p6 CPU model %d ", 1412 + boot_cpu_data.x86_model); 1413 + return -ENODEV; 1414 + } 1415 + 1416 + if (!cpu_has_apic) { 1417 + pr_info("no Local APIC, try rebooting with lapic"); 1418 + return -ENODEV; 1419 + } 1420 + 1421 + x86_pmu = p6_pmu; 1422 + 1423 + return 0; 1424 + } 1425 + 1571 1426 static int intel_pmu_init(void) 1572 1427 { 1573 1428 union cpuid10_edx edx; ··· 1607 1400 unsigned int ebx; 1608 1401 int version; 1609 1402 1610 - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 1403 + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 1404 + /* check for P6 processor family */ 1405 + if (boot_cpu_data.x86 == 6) { 1406 + return p6_pmu_init(); 1407 + } else { 1611 1408 return -ENODEV; 1409 + } 1410 + } 1612 1411 1613 1412 /* 1614 1413 * Check whether the Architectural PerfMon supports
+4 -11
include/linux/perf_counter.h
··· 120 120 PERF_SAMPLE_ID = 1U << 6, 121 121 PERF_SAMPLE_CPU = 1U << 7, 122 122 PERF_SAMPLE_PERIOD = 1U << 8, 123 + PERF_SAMPLE_STREAM_ID = 1U << 9, 123 124 124 - PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */ 125 + PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ 125 126 }; 126 127 127 128 /* ··· 313 312 * struct perf_event_header header; 314 313 * u64 time; 315 314 * u64 id; 316 - * u64 sample_period; 317 - * }; 318 - */ 319 - PERF_EVENT_PERIOD = 4, 320 - 321 - /* 322 - * struct { 323 - * struct perf_event_header header; 324 - * u64 time; 325 - * u64 id; 315 + * u64 stream_id; 326 316 * }; 327 317 */ 328 318 PERF_EVENT_THROTTLE = 5, ··· 348 356 * { u64 time; } && PERF_SAMPLE_TIME 349 357 * { u64 addr; } && PERF_SAMPLE_ADDR 350 358 * { u64 id; } && PERF_SAMPLE_ID 359 + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID 351 360 * { u32 cpu, res; } && PERF_SAMPLE_CPU 352 361 * { u64 period; } && PERF_SAMPLE_PERIOD 353 362 *
+1 -1
init/Kconfig
··· 962 962 963 963 config EVENT_PROFILE 964 964 bool "Tracepoint profile sources" 965 - depends on PERF_COUNTERS && EVENT_TRACER 965 + depends on PERF_COUNTERS && EVENT_TRACING 966 966 default y 967 967 968 968 endmenu
+3 -6
kernel/fork.c
··· 1407 1407 if (clone_flags & CLONE_VFORK) { 1408 1408 p->vfork_done = &vfork; 1409 1409 init_completion(&vfork); 1410 - } else if (!(clone_flags & CLONE_VM)) { 1411 - /* 1412 - * vfork will do an exec which will call 1413 - * set_task_comm() 1414 - */ 1415 - perf_counter_fork(p); 1416 1410 } 1411 + 1412 + if (!(clone_flags & CLONE_THREAD)) 1413 + perf_counter_fork(p); 1417 1414 1418 1415 audit_finish_fork(p); 1419 1416 tracehook_report_clone(regs, clone_flags, nr, p);
+91 -94
kernel/perf_counter.c
··· 146 146 } 147 147 } 148 148 149 + static void unclone_ctx(struct perf_counter_context *ctx) 150 + { 151 + if (ctx->parent_ctx) { 152 + put_ctx(ctx->parent_ctx); 153 + ctx->parent_ctx = NULL; 154 + } 155 + } 156 + 157 + /* 158 + * If we inherit counters we want to return the parent counter id 159 + * to userspace. 160 + */ 161 + static u64 primary_counter_id(struct perf_counter *counter) 162 + { 163 + u64 id = counter->id; 164 + 165 + if (counter->parent) 166 + id = counter->parent->id; 167 + 168 + return id; 169 + } 170 + 149 171 /* 150 172 * Get the perf_counter_context for a task and lock it. 151 173 * This has to cope with with the fact that until it is locked, ··· 1310 1288 #define MAX_INTERRUPTS (~0ULL) 1311 1289 1312 1290 static void perf_log_throttle(struct perf_counter *counter, int enable); 1313 - static void perf_log_period(struct perf_counter *counter, u64 period); 1314 1291 1315 1292 static void perf_adjust_period(struct perf_counter *counter, u64 events) 1316 1293 { ··· 1327 1306 1328 1307 if (!sample_period) 1329 1308 sample_period = 1; 1330 - 1331 - perf_log_period(counter, sample_period); 1332 1309 1333 1310 hwc->sample_period = sample_period; 1334 1311 } ··· 1482 1463 /* 1483 1464 * Unclone this context if we enabled any counter. 1484 1465 */ 1485 - if (enabled && ctx->parent_ctx) { 1486 - put_ctx(ctx->parent_ctx); 1487 - ctx->parent_ctx = NULL; 1488 - } 1466 + if (enabled) 1467 + unclone_ctx(ctx); 1489 1468 1490 1469 spin_unlock(&ctx->lock); 1491 1470 ··· 1543 1526 1544 1527 static struct perf_counter_context *find_get_context(pid_t pid, int cpu) 1545 1528 { 1546 - struct perf_counter_context *parent_ctx; 1547 1529 struct perf_counter_context *ctx; 1548 1530 struct perf_cpu_context *cpuctx; 1549 1531 struct task_struct *task; ··· 1602 1586 retry: 1603 1587 ctx = perf_lock_task_context(task, &flags); 1604 1588 if (ctx) { 1605 - parent_ctx = ctx->parent_ctx; 1606 - if (parent_ctx) { 1607 - put_ctx(parent_ctx); 1608 - ctx->parent_ctx = NULL; /* no longer a clone */ 1609 - } 1589 + unclone_ctx(ctx); 1610 1590 spin_unlock_irqrestore(&ctx->lock, flags); 1611 1591 } 1612 1592 ··· 1716 1704 values[n++] = counter->total_time_running + 1717 1705 atomic64_read(&counter->child_total_time_running); 1718 1706 if (counter->attr.read_format & PERF_FORMAT_ID) 1719 - values[n++] = counter->id; 1707 + values[n++] = primary_counter_id(counter); 1720 1708 mutex_unlock(&counter->child_mutex); 1721 1709 1722 1710 if (count < n * sizeof(u64)) ··· 1823 1811 1824 1812 counter->attr.sample_freq = value; 1825 1813 } else { 1826 - perf_log_period(counter, value); 1827 - 1828 1814 counter->attr.sample_period = value; 1829 1815 counter->hw.sample_period = value; 1830 1816 } ··· 2671 2661 if (sample_type & PERF_SAMPLE_ID) 2672 2662 header.size += sizeof(u64); 2673 2663 2664 + if (sample_type & PERF_SAMPLE_STREAM_ID) 2665 + header.size += sizeof(u64); 2666 + 2674 2667 if (sample_type & PERF_SAMPLE_CPU) { 2675 2668 header.size += sizeof(cpu_entry); 2676 2669 ··· 2717 2704 if (sample_type & PERF_SAMPLE_ADDR) 2718 2705 perf_output_put(&handle, data->addr); 2719 2706 2720 - if (sample_type & PERF_SAMPLE_ID) 2707 + if (sample_type & PERF_SAMPLE_ID) { 2708 + u64 id = primary_counter_id(counter); 2709 + 2710 + perf_output_put(&handle, id); 2711 + } 2712 + 2713 + if (sample_type & PERF_SAMPLE_STREAM_ID) 2721 2714 perf_output_put(&handle, counter->id); 2722 2715 2723 2716 if (sample_type & PERF_SAMPLE_CPU) ··· 2746 2727 if (sub != counter) 2747 2728 sub->pmu->read(sub); 2748 2729 2749 - group_entry.id = sub->id; 2730 + group_entry.id = primary_counter_id(sub); 2750 2731 group_entry.counter = atomic64_read(&sub->count); 2751 2732 2752 2733 perf_output_put(&handle, group_entry); ··· 2806 2787 } 2807 2788 2808 2789 if (counter->attr.read_format & PERF_FORMAT_ID) { 2809 - u64 id; 2810 - 2811 2790 event.header.size += sizeof(u64); 2812 - if (counter->parent) 2813 - id = counter->parent->id; 2814 - else 2815 - id = counter->id; 2816 - 2817 - event.format[i++] = id; 2791 + event.format[i++] = primary_counter_id(counter); 2818 2792 } 2819 2793 2820 2794 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); ··· 2908 2896 .event = { 2909 2897 .header = { 2910 2898 .type = PERF_EVENT_FORK, 2899 + .misc = 0, 2911 2900 .size = sizeof(fork_event.event), 2912 2901 }, 2902 + /* .pid */ 2903 + /* .ppid */ 2913 2904 }, 2914 2905 }; 2915 2906 ··· 2984 2969 struct perf_cpu_context *cpuctx; 2985 2970 struct perf_counter_context *ctx; 2986 2971 unsigned int size; 2987 - char *comm = comm_event->task->comm; 2972 + char comm[TASK_COMM_LEN]; 2988 2973 2974 + memset(comm, 0, sizeof(comm)); 2975 + strncpy(comm, comm_event->task->comm, sizeof(comm)); 2989 2976 size = ALIGN(strlen(comm)+1, sizeof(u64)); 2990 2977 2991 2978 comm_event->comm = comm; ··· 3022 3005 3023 3006 comm_event = (struct perf_comm_event){ 3024 3007 .task = task, 3008 + /* .comm */ 3009 + /* .comm_size */ 3025 3010 .event = { 3026 - .header = { .type = PERF_EVENT_COMM, }, 3011 + .header = { 3012 + .type = PERF_EVENT_COMM, 3013 + .misc = 0, 3014 + /* .size */ 3015 + }, 3016 + /* .pid */ 3017 + /* .tid */ 3027 3018 }, 3028 3019 }; 3029 3020 ··· 3114 3089 char *buf = NULL; 3115 3090 const char *name; 3116 3091 3092 + memset(tmp, 0, sizeof(tmp)); 3093 + 3117 3094 if (file) { 3118 - buf = kzalloc(PATH_MAX, GFP_KERNEL); 3095 + /* 3096 + * d_path works from the end of the buffer backwards, so we 3097 + * need to add enough zero bytes after the string to handle 3098 + * the 64bit alignment we do later. 3099 + */ 3100 + buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); 3119 3101 if (!buf) { 3120 3102 name = strncpy(tmp, "//enomem", sizeof(tmp)); 3121 3103 goto got_name; ··· 3133 3101 goto got_name; 3134 3102 } 3135 3103 } else { 3136 - name = arch_vma_name(mmap_event->vma); 3137 - if (name) 3104 + if (arch_vma_name(mmap_event->vma)) { 3105 + name = strncpy(tmp, arch_vma_name(mmap_event->vma), 3106 + sizeof(tmp)); 3138 3107 goto got_name; 3108 + } 3139 3109 3140 3110 if (!vma->vm_mm) { 3141 3111 name = strncpy(tmp, "[vdso]", sizeof(tmp)); ··· 3182 3148 3183 3149 mmap_event = (struct perf_mmap_event){ 3184 3150 .vma = vma, 3151 + /* .file_name */ 3152 + /* .file_size */ 3185 3153 .event = { 3186 - .header = { .type = PERF_EVENT_MMAP, }, 3154 + .header = { 3155 + .type = PERF_EVENT_MMAP, 3156 + .misc = 0, 3157 + /* .size */ 3158 + }, 3159 + /* .pid */ 3160 + /* .tid */ 3187 3161 .start = vma->vm_start, 3188 3162 .len = vma->vm_end - vma->vm_start, 3189 3163 .pgoff = vma->vm_pgoff, ··· 3199 3157 }; 3200 3158 3201 3159 perf_counter_mmap_event(&mmap_event); 3202 - } 3203 - 3204 - /* 3205 - * Log sample_period changes so that analyzing tools can re-normalize the 3206 - * event flow. 3207 - */ 3208 - 3209 - struct freq_event { 3210 - struct perf_event_header header; 3211 - u64 time; 3212 - u64 id; 3213 - u64 period; 3214 - }; 3215 - 3216 - static void perf_log_period(struct perf_counter *counter, u64 period) 3217 - { 3218 - struct perf_output_handle handle; 3219 - struct freq_event event; 3220 - int ret; 3221 - 3222 - if (counter->hw.sample_period == period) 3223 - return; 3224 - 3225 - if (counter->attr.sample_type & PERF_SAMPLE_PERIOD) 3226 - return; 3227 - 3228 - event = (struct freq_event) { 3229 - .header = { 3230 - .type = PERF_EVENT_PERIOD, 3231 - .misc = 0, 3232 - .size = sizeof(event), 3233 - }, 3234 - .time = sched_clock(), 3235 - .id = counter->id, 3236 - .period = period, 3237 - }; 3238 - 3239 - ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0); 3240 - if (ret) 3241 - return; 3242 - 3243 - perf_output_put(&handle, event); 3244 - perf_output_end(&handle); 3245 3160 } 3246 3161 3247 3162 /* ··· 3214 3215 struct perf_event_header header; 3215 3216 u64 time; 3216 3217 u64 id; 3218 + u64 stream_id; 3217 3219 } throttle_event = { 3218 3220 .header = { 3219 - .type = PERF_EVENT_THROTTLE + 1, 3221 + .type = PERF_EVENT_THROTTLE, 3220 3222 .misc = 0, 3221 3223 .size = sizeof(throttle_event), 3222 3224 }, 3223 - .time = sched_clock(), 3224 - .id = counter->id, 3225 + .time = sched_clock(), 3226 + .id = primary_counter_id(counter), 3227 + .stream_id = counter->id, 3225 3228 }; 3229 + 3230 + if (enable) 3231 + throttle_event.header.type = PERF_EVENT_UNTHROTTLE; 3226 3232 3227 3233 ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); 3228 3234 if (ret) ··· 3676 3672 void perf_tpcounter_event(int event_id) 3677 3673 { 3678 3674 struct perf_sample_data data = { 3679 - .regs = get_irq_regs(); 3675 + .regs = get_irq_regs(), 3680 3676 .addr = 0, 3681 3677 }; 3682 3678 ··· 3692 3688 3693 3689 static void tp_perf_counter_destroy(struct perf_counter *counter) 3694 3690 { 3695 - ftrace_profile_disable(perf_event_id(&counter->attr)); 3691 + ftrace_profile_disable(counter->attr.config); 3696 3692 } 3697 3693 3698 3694 static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) 3699 3695 { 3700 - int event_id = perf_event_id(&counter->attr); 3701 - int ret; 3702 - 3703 - ret = ftrace_profile_enable(event_id); 3704 - if (ret) 3696 + if (ftrace_profile_enable(counter->attr.config)) 3705 3697 return NULL; 3706 3698 3707 3699 counter->destroy = tp_perf_counter_destroy; ··· 4256 4256 */ 4257 4257 spin_lock(&child_ctx->lock); 4258 4258 child->perf_counter_ctxp = NULL; 4259 - if (child_ctx->parent_ctx) { 4260 - /* 4261 - * This context is a clone; unclone it so it can't get 4262 - * swapped to another process while we're removing all 4263 - * the counters from it. 4264 - */ 4265 - put_ctx(child_ctx->parent_ctx); 4266 - child_ctx->parent_ctx = NULL; 4267 - } 4259 + /* 4260 + * If this context is a clone; unclone it so it can't get 4261 + * swapped to another process while we're removing all 4262 + * the counters from it. 4263 + */ 4264 + unclone_ctx(child_ctx); 4268 4265 spin_unlock(&child_ctx->lock); 4269 4266 local_irq_restore(flags); 4270 4267
+15
tools/perf/Documentation/perf-report.txt
··· 24 24 --dsos=:: 25 25 Only consider symbols in these dsos. CSV that understands 26 26 file://filename entries. 27 + -n 28 + --show-nr-samples 29 + Show the number of samples for each symbol 27 30 -C:: 28 31 --comms=:: 29 32 Only consider symbols in these comms. CSV that understands ··· 35 32 --symbols=:: 36 33 Only consider these symbols. CSV that understands 37 34 file://filename entries. 35 + 36 + -w:: 37 + --field-width=:: 38 + Force each column width to the provided list, for large terminal 39 + readability. 40 + 41 + -t:: 42 + --field-separator=:: 43 + 44 + Use a special separator character and don't pad with spaces, replacing 45 + all occurances of this separator in symbol names (and other output) 46 + with a '.' character, that thus it's the only non valid separator. 38 47 39 48 SEE ALSO 40 49 --------
+1 -1
tools/perf/Makefile
··· 345 345 BUILTIN_OBJS += builtin-top.o 346 346 347 347 PERFLIBS = $(LIB_FILE) 348 - EXTLIBS = 348 + EXTLIBS = -lbfd 349 349 350 350 # 351 351 # Platform specific tweaks
-24
tools/perf/builtin-annotate.c
··· 74 74 u32 pid, ppid; 75 75 }; 76 76 77 - struct period_event { 78 - struct perf_event_header header; 79 - u64 time; 80 - u64 id; 81 - u64 sample_period; 82 - }; 83 - 84 77 typedef union event_union { 85 78 struct perf_event_header header; 86 79 struct ip_event ip; 87 80 struct mmap_event mmap; 88 81 struct comm_event comm; 89 82 struct fork_event fork; 90 - struct period_event period; 91 83 } event_t; 92 84 93 85 ··· 990 998 } 991 999 992 1000 static int 993 - process_period_event(event_t *event, unsigned long offset, unsigned long head) 994 - { 995 - dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", 996 - (void *)(offset + head), 997 - (void *)(long)(event->header.size), 998 - event->period.time, 999 - event->period.id, 1000 - event->period.sample_period); 1001 - 1002 - return 0; 1003 - } 1004 - 1005 - static int 1006 1001 process_event(event_t *event, unsigned long offset, unsigned long head) 1007 1002 { 1008 1003 switch (event->header.type) { ··· 1004 1025 1005 1026 case PERF_EVENT_FORK: 1006 1027 return process_fork_event(event, offset, head); 1007 - 1008 - case PERF_EVENT_PERIOD: 1009 - return process_period_event(event, offset, head); 1010 1028 /* 1011 1029 * We dont process them right now but they are fine: 1012 1030 */
+12 -1
tools/perf/builtin-record.c
··· 43 43 static int verbose = 0; 44 44 static int inherit_stat = 0; 45 45 static int no_samples = 0; 46 + static int sample_address = 0; 46 47 47 48 static long samples; 48 49 static struct timeval last_read; ··· 314 313 if (*pbf == 'x') { /* vm_exec */ 315 314 char *execname = strchr(bf, '/'); 316 315 316 + /* Catch VDSO */ 317 + if (execname == NULL) 318 + execname = strstr(bf, "[vdso]"); 319 + 317 320 if (execname == NULL) 318 321 continue; 319 322 ··· 405 400 406 401 if (inherit_stat) 407 402 attr->inherit_stat = 1; 403 + 404 + if (sample_address) 405 + attr->sample_type |= PERF_SAMPLE_ADDR; 408 406 409 407 if (call_graph) 410 408 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; ··· 653 645 "be more verbose (show counter open errors, etc)"), 654 646 OPT_BOOLEAN('s', "stat", &inherit_stat, 655 647 "per thread counts"), 648 + OPT_BOOLEAN('d', "data", &sample_address, 649 + "Sample addresses"), 656 650 OPT_BOOLEAN('n', "no-samples", &no_samples, 657 651 "don't sample"), 658 652 OPT_END() ··· 664 654 { 665 655 int counter; 666 656 667 - argc = parse_options(argc, argv, options, record_usage, 0); 657 + argc = parse_options(argc, argv, options, record_usage, 658 + PARSE_OPT_STOP_AT_NON_OPTION); 668 659 if (!argc && target_pid == -1 && !system_wide) 669 660 usage_with_options(record_usage, options); 670 661
+182 -73
tools/perf/builtin-report.c
··· 33 33 34 34 static char default_sort_order[] = "comm,dso"; 35 35 static char *sort_order = default_sort_order; 36 - static char *dso_list_str, *comm_list_str, *sym_list_str; 36 + static char *dso_list_str, *comm_list_str, *sym_list_str, 37 + *col_width_list_str; 37 38 static struct strlist *dso_list, *comm_list, *sym_list; 39 + static char *field_sep; 38 40 39 41 static int input; 40 42 static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; ··· 51 49 static int modules; 52 50 53 51 static int full_paths; 52 + static int show_nr_samples; 54 53 55 54 static unsigned long page_size; 56 55 static unsigned long mmap_window = 32; ··· 101 98 u32 pid, ppid; 102 99 }; 103 100 104 - struct period_event { 105 - struct perf_event_header header; 106 - u64 time; 107 - u64 id; 108 - u64 sample_period; 109 - }; 110 - 111 101 struct lost_event { 112 102 struct perf_event_header header; 113 103 u64 id; ··· 120 124 struct mmap_event mmap; 121 125 struct comm_event comm; 122 126 struct fork_event fork; 123 - struct period_event period; 124 127 struct lost_event lost; 125 128 struct read_event read; 126 129 } event_t; 130 + 131 + static int repsep_fprintf(FILE *fp, const char *fmt, ...) 132 + { 133 + int n; 134 + va_list ap; 135 + 136 + va_start(ap, fmt); 137 + if (!field_sep) 138 + n = vfprintf(fp, fmt, ap); 139 + else { 140 + char *bf = NULL; 141 + n = vasprintf(&bf, fmt, ap); 142 + if (n > 0) { 143 + char *sep = bf; 144 + while (1) { 145 + sep = strchr(sep, *field_sep); 146 + if (sep == NULL) 147 + break; 148 + *sep = '.'; 149 + } 150 + } 151 + fputs(bf, fp); 152 + free(bf); 153 + } 154 + va_end(ap); 155 + return n; 156 + } 127 157 128 158 static LIST_HEAD(dsos); 129 159 static struct dso *kernel_dso; ··· 382 360 return self; 383 361 } 384 362 363 + static unsigned int dsos__col_width, 364 + comms__col_width, 365 + threads__col_width; 366 + 385 367 static int thread__set_comm(struct thread *self, const char *comm) 386 368 { 387 369 if (self->comm) 388 370 free(self->comm); 389 371 self->comm = strdup(comm); 390 - return self->comm ? 0 : -ENOMEM; 372 + if (!self->comm) 373 + return -ENOMEM; 374 + 375 + if (!col_width_list_str && !field_sep && 376 + (!comm_list || strlist__has_entry(comm_list, comm))) { 377 + unsigned int slen = strlen(comm); 378 + if (slen > comms__col_width) { 379 + comms__col_width = slen; 380 + threads__col_width = slen + 6; 381 + } 382 + } 383 + 384 + return 0; 391 385 } 392 386 393 387 static size_t thread__fprintf(struct thread *self, FILE *fp) ··· 574 536 575 537 int64_t (*cmp)(struct hist_entry *, struct hist_entry *); 576 538 int64_t (*collapse)(struct hist_entry *, struct hist_entry *); 577 - size_t (*print)(FILE *fp, struct hist_entry *); 539 + size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); 540 + unsigned int *width; 541 + bool elide; 578 542 }; 579 543 580 544 static int64_t cmp_null(void *l, void *r) ··· 598 558 } 599 559 600 560 static size_t 601 - sort__thread_print(FILE *fp, struct hist_entry *self) 561 + sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) 602 562 { 603 - return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); 563 + return repsep_fprintf(fp, "%*s:%5d", width - 6, 564 + self->thread->comm ?: "", self->thread->pid); 604 565 } 605 566 606 567 static struct sort_entry sort_thread = { 607 - .header = " Command: Pid", 568 + .header = "Command: Pid", 608 569 .cmp = sort__thread_cmp, 609 570 .print = sort__thread_print, 571 + .width = &threads__col_width, 610 572 }; 611 573 612 574 /* --sort comm */ ··· 632 590 } 633 591 634 592 static size_t 635 - sort__comm_print(FILE *fp, struct hist_entry *self) 593 + sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) 636 594 { 637 - return fprintf(fp, "%16s", self->thread->comm); 595 + return repsep_fprintf(fp, "%*s", width, self->thread->comm); 638 596 } 639 597 640 598 static struct sort_entry sort_comm = { 641 - .header = " Command", 599 + .header = "Command", 642 600 .cmp = sort__comm_cmp, 643 601 .collapse = sort__comm_collapse, 644 602 .print = sort__comm_print, 603 + .width = &comms__col_width, 645 604 }; 646 605 647 606 /* --sort dso */ ··· 660 617 } 661 618 662 619 static size_t 663 - sort__dso_print(FILE *fp, struct hist_entry *self) 620 + sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) 664 621 { 665 622 if (self->dso) 666 - return fprintf(fp, "%-25s", self->dso->name); 623 + return repsep_fprintf(fp, "%-*s", width, self->dso->name); 667 624 668 - return fprintf(fp, "%016llx ", (u64)self->ip); 625 + return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); 669 626 } 670 627 671 628 static struct sort_entry sort_dso = { 672 - .header = "Shared Object ", 629 + .header = "Shared Object", 673 630 .cmp = sort__dso_cmp, 674 631 .print = sort__dso_print, 632 + .width = &dsos__col_width, 675 633 }; 676 634 677 635 /* --sort symbol */ ··· 692 648 } 693 649 694 650 static size_t 695 - sort__sym_print(FILE *fp, struct hist_entry *self) 651 + sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) 696 652 { 697 653 size_t ret = 0; 698 654 699 655 if (verbose) 700 - ret += fprintf(fp, "%#018llx ", (u64)self->ip); 656 + ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); 701 657 658 + ret += repsep_fprintf(fp, "[%c] ", self->level); 702 659 if (self->sym) { 703 - ret += fprintf(fp, "[%c] %s", 704 - self->dso == kernel_dso ? 'k' : 705 - self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); 660 + ret += repsep_fprintf(fp, "%s", self->sym->name); 706 661 707 662 if (self->sym->module) 708 - ret += fprintf(fp, "\t[%s]", self->sym->module->name); 663 + ret += repsep_fprintf(fp, "\t[%s]", 664 + self->sym->module->name); 709 665 } else { 710 - ret += fprintf(fp, "%#016llx", (u64)self->ip); 666 + ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); 711 667 } 712 668 713 669 return ret; ··· 734 690 } 735 691 736 692 static size_t 737 - sort__parent_print(FILE *fp, struct hist_entry *self) 693 + sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) 738 694 { 739 - size_t ret = 0; 740 - 741 - ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]"); 742 - 743 - return ret; 695 + return repsep_fprintf(fp, "%-*s", width, 696 + self->parent ? self->parent->name : "[other]"); 744 697 } 745 698 699 + static unsigned int parent_symbol__col_width; 700 + 746 701 static struct sort_entry sort_parent = { 747 - .header = "Parent symbol ", 702 + .header = "Parent symbol", 748 703 .cmp = sort__parent_cmp, 749 704 .print = sort__parent_print, 705 + .width = &parent_symbol__col_width, 750 706 }; 751 707 752 708 static int sort__need_collapse = 0; ··· 1011 967 return 0; 1012 968 1013 969 if (total_samples) 1014 - ret = percent_color_fprintf(fp, " %6.2f%%", 1015 - (self->count * 100.0) / total_samples); 970 + ret = percent_color_fprintf(fp, 971 + field_sep ? "%.2f" : " %6.2f%%", 972 + (self->count * 100.0) / total_samples); 1016 973 else 1017 - ret = fprintf(fp, "%12Ld ", self->count); 974 + ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count); 975 + 976 + if (show_nr_samples) { 977 + if (field_sep) 978 + fprintf(fp, "%c%lld", *field_sep, self->count); 979 + else 980 + fprintf(fp, "%11lld", self->count); 981 + } 1018 982 1019 983 list_for_each_entry(se, &hist_entry__sort_list, list) { 1020 - if (exclude_other && (se == &sort_parent)) 984 + if (se->elide) 1021 985 continue; 1022 986 1023 - fprintf(fp, " "); 1024 - ret += se->print(fp, self); 987 + fprintf(fp, "%s", field_sep ?: " "); 988 + ret += se->print(fp, self, se->width ? *se->width : 0); 1025 989 } 1026 990 1027 991 ret += fprintf(fp, "\n"); ··· 1043 991 /* 1044 992 * 1045 993 */ 994 + 995 + static void dso__calc_col_width(struct dso *self) 996 + { 997 + if (!col_width_list_str && !field_sep && 998 + (!dso_list || strlist__has_entry(dso_list, self->name))) { 999 + unsigned int slen = strlen(self->name); 1000 + if (slen > dsos__col_width) 1001 + dsos__col_width = slen; 1002 + } 1003 + 1004 + self->slen_calculated = 1; 1005 + } 1046 1006 1047 1007 static struct symbol * 1048 1008 resolve_symbol(struct thread *thread, struct map **mapp, ··· 1075 1011 1076 1012 map = thread__find_map(thread, ip); 1077 1013 if (map != NULL) { 1014 + /* 1015 + * We have to do this here as we may have a dso 1016 + * with no symbol hit that has a name longer than 1017 + * the ones with symbols sampled. 1018 + */ 1019 + if (!sort_dso.elide && !map->dso->slen_calculated) 1020 + dso__calc_col_width(map->dso); 1021 + 1078 1022 if (mapp) 1079 1023 *mapp = map; 1080 1024 got_map: ··· 1354 1282 struct sort_entry *se; 1355 1283 struct rb_node *nd; 1356 1284 size_t ret = 0; 1285 + unsigned int width; 1286 + char *col_width = col_width_list_str; 1357 1287 1358 - fprintf(fp, "\n"); 1359 - fprintf(fp, "#\n"); 1360 - fprintf(fp, "# (%Ld samples)\n", (u64)total_samples); 1288 + fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); 1361 1289 fprintf(fp, "#\n"); 1362 1290 1363 1291 fprintf(fp, "# Overhead"); 1292 + if (show_nr_samples) { 1293 + if (field_sep) 1294 + fprintf(fp, "%cSamples", *field_sep); 1295 + else 1296 + fputs(" Samples ", fp); 1297 + } 1364 1298 list_for_each_entry(se, &hist_entry__sort_list, list) { 1365 - if (exclude_other && (se == &sort_parent)) 1299 + if (se->elide) 1366 1300 continue; 1367 - fprintf(fp, " %s", se->header); 1301 + if (field_sep) { 1302 + fprintf(fp, "%c%s", *field_sep, se->header); 1303 + continue; 1304 + } 1305 + width = strlen(se->header); 1306 + if (se->width) { 1307 + if (col_width_list_str) { 1308 + if (col_width) { 1309 + *se->width = atoi(col_width); 1310 + col_width = strchr(col_width, ','); 1311 + if (col_width) 1312 + ++col_width; 1313 + } 1314 + } 1315 + width = *se->width = max(*se->width, width); 1316 + } 1317 + fprintf(fp, " %*s", width, se->header); 1368 1318 } 1369 1319 fprintf(fp, "\n"); 1370 1320 1321 + if (field_sep) 1322 + goto print_entries; 1323 + 1371 1324 fprintf(fp, "# ........"); 1325 + if (show_nr_samples) 1326 + fprintf(fp, " .........."); 1372 1327 list_for_each_entry(se, &hist_entry__sort_list, list) { 1373 1328 unsigned int i; 1374 1329 1375 - if (exclude_other && (se == &sort_parent)) 1330 + if (se->elide) 1376 1331 continue; 1377 1332 1378 1333 fprintf(fp, " "); 1379 - for (i = 0; i < strlen(se->header); i++) 1334 + if (se->width) 1335 + width = *se->width; 1336 + else 1337 + width = strlen(se->header); 1338 + for (i = 0; i < width; i++) 1380 1339 fprintf(fp, "."); 1381 1340 } 1382 1341 fprintf(fp, "\n"); 1383 1342 1384 1343 fprintf(fp, "#\n"); 1385 1344 1345 + print_entries: 1386 1346 for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { 1387 1347 pos = rb_entry(nd, struct hist_entry, rb_node); 1388 1348 ret += hist_entry__fprintf(fp, pos, total_samples); ··· 1628 1524 } 1629 1525 1630 1526 static int 1631 - process_period_event(event_t *event, unsigned long offset, unsigned long head) 1632 - { 1633 - dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", 1634 - (void *)(offset + head), 1635 - (void *)(long)(event->header.size), 1636 - event->period.time, 1637 - event->period.id, 1638 - event->period.sample_period); 1639 - 1640 - return 0; 1641 - } 1642 - 1643 - static int 1644 1527 process_lost_event(event_t *event, unsigned long offset, unsigned long head) 1645 1528 { 1646 1529 dprintf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", ··· 1707 1616 1708 1617 case PERF_EVENT_FORK: 1709 1618 return process_fork_event(event, offset, head); 1710 - 1711 - case PERF_EVENT_PERIOD: 1712 - return process_period_event(event, offset, head); 1713 1619 1714 1620 case PERF_EVENT_LOST: 1715 1621 return process_lost_event(event, offset, head); ··· 1971 1883 OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), 1972 1884 OPT_BOOLEAN('m', "modules", &modules, 1973 1885 "load module symbols - WARNING: use only with -k and LIVE kernel"), 1886 + OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, 1887 + "Show a column with the number of samples"), 1974 1888 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1975 1889 "sort by key(s): pid, comm, dso, symbol, parent"), 1976 1890 OPT_BOOLEAN('P', "full-paths", &full_paths, ··· 1981 1891 "regex filter to identify parent, see: '--sort parent'"), 1982 1892 OPT_BOOLEAN('x', "exclude-other", &exclude_other, 1983 1893 "Only display entries with parent-match"), 1984 - OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type,min_percent", 1894 + OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent", 1985 1895 "Display callchains using output_type and min percent threshold. " 1986 - "Default: flat,0", &parse_callchain_opt, callchain_default_opt), 1896 + "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt), 1987 1897 OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", 1988 1898 "only consider symbols in these dsos"), 1989 1899 OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", 1990 1900 "only consider symbols in these comms"), 1991 1901 OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", 1992 1902 "only consider these symbols"), 1903 + OPT_STRING('w', "column-widths", &col_width_list_str, 1904 + "width[,width...]", 1905 + "don't try to adjust column width, use these fixed values"), 1906 + OPT_STRING('t', "field-separator", &field_sep, "separator", 1907 + "separator for columns, no spaces will be added between " 1908 + "columns '.' is reserved."), 1993 1909 OPT_END() 1994 1910 }; 1995 1911 ··· 2015 1919 } 2016 1920 2017 1921 static void setup_list(struct strlist **list, const char *list_str, 2018 - const char *list_name) 1922 + struct sort_entry *se, const char *list_name, 1923 + FILE *fp) 2019 1924 { 2020 1925 if (list_str) { 2021 1926 *list = strlist__new(true, list_str); ··· 2024 1927 fprintf(stderr, "problems parsing %s list\n", 2025 1928 list_name); 2026 1929 exit(129); 1930 + } 1931 + if (strlist__nr_entries(*list) == 1) { 1932 + fprintf(fp, "# %s: %s\n", list_name, 1933 + strlist__entry(*list, 0)->s); 1934 + se->elide = true; 2027 1935 } 2028 1936 } 2029 1937 } ··· 2043 1941 2044 1942 setup_sorting(); 2045 1943 2046 - if (parent_pattern != default_parent_pattern) 1944 + if (parent_pattern != default_parent_pattern) { 2047 1945 sort_dimension__add("parent"); 2048 - else 1946 + sort_parent.elide = 1; 1947 + } else 2049 1948 exclude_other = 0; 2050 1949 2051 1950 /* ··· 2055 1952 if (argc) 2056 1953 usage_with_options(report_usage, options); 2057 1954 2058 - setup_list(&dso_list, dso_list_str, "dso"); 2059 - setup_list(&comm_list, comm_list_str, "comm"); 2060 - setup_list(&sym_list, sym_list_str, "symbol"); 2061 - 2062 1955 setup_pager(); 1956 + 1957 + setup_list(&dso_list, dso_list_str, &sort_dso, "dso", stdout); 1958 + setup_list(&comm_list, comm_list_str, &sort_comm, "comm", stdout); 1959 + setup_list(&sym_list, sym_list_str, &sort_sym, "symbol", stdout); 1960 + 1961 + if (field_sep && *field_sep == '.') { 1962 + fputs("'.' is the only non valid --field-separator argument\n", 1963 + stderr); 1964 + exit(129); 1965 + } 2063 1966 2064 1967 return __cmd_report(); 2065 1968 }
+2 -1
tools/perf/builtin-stat.c
··· 511 511 { 512 512 int status; 513 513 514 - argc = parse_options(argc, argv, options, stat_usage, 0); 514 + argc = parse_options(argc, argv, options, stat_usage, 515 + PARSE_OPT_STOP_AT_NON_OPTION); 515 516 if (!argc) 516 517 usage_with_options(stat_usage, options); 517 518 if (run_count <= 0 || run_count > MAX_RUN)
+5 -1
tools/perf/builtin-top.c
··· 58 58 static int print_entries = 15; 59 59 60 60 static int target_pid = -1; 61 + static int inherit = 0; 61 62 static int profile_cpu = -1; 62 63 static int nr_cpus = 0; 63 64 static unsigned int realtime_prio = 0; ··· 550 549 static void start_counter(int i, int counter) 551 550 { 552 551 struct perf_counter_attr *attr; 553 - unsigned int cpu; 552 + int cpu; 554 553 555 554 cpu = profile_cpu; 556 555 if (target_pid == -1 && profile_cpu == -1) ··· 560 559 561 560 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 562 561 attr->freq = freq; 562 + attr->inherit = (cpu < 0) && inherit; 563 563 564 564 try_again: 565 565 fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0); ··· 687 685 "only display functions with more events than this"), 688 686 OPT_BOOLEAN('g', "group", &group, 689 687 "put the counters into a counter group"), 688 + OPT_BOOLEAN('i', "inherit", &inherit, 689 + "child tasks inherit counters"), 690 690 OPT_STRING('s', "sym-filter", &sym_filter, "pattern", 691 691 "only display symbols matchig this pattern"), 692 692 OPT_BOOLEAN('z', "zero", &zero,
+76 -1
tools/perf/perf.c
··· 12 12 #include "util/cache.h" 13 13 #include "util/quote.h" 14 14 #include "util/run-command.h" 15 + #include "util/parse-events.h" 16 + #include "util/string.h" 15 17 16 18 const char perf_usage_string[] = 17 19 "perf [--version] [--help] COMMAND [ARGS]"; ··· 26 24 const char *cmd; 27 25 int val; 28 26 }; 27 + 28 + static char debugfs_mntpt[MAXPATHLEN]; 29 29 30 30 static int pager_command_config(const char *var, const char *value, void *data) 31 31 { ··· 58 54 default: 59 55 break; 60 56 } 57 + } 58 + 59 + static void set_debugfs_path(void) 60 + { 61 + char *path; 62 + 63 + path = getenv(PERF_DEBUGFS_ENVIRONMENT); 64 + snprintf(debugfs_path, MAXPATHLEN, "%s/%s", path ?: debugfs_mntpt, 65 + "tracing/events"); 61 66 } 62 67 63 68 static int handle_options(const char*** argv, int* argc, int* envchanged) ··· 133 120 (*argc)--; 134 121 } else if (!prefixcmp(cmd, "--work-tree=")) { 135 122 setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); 123 + if (envchanged) 124 + *envchanged = 1; 125 + } else if (!strcmp(cmd, "--debugfs-dir")) { 126 + if (*argc < 2) { 127 + fprintf(stderr, "No directory given for --debugfs-dir.\n"); 128 + usage(perf_usage_string); 129 + } 130 + strncpy(debugfs_mntpt, (*argv)[1], MAXPATHLEN); 131 + debugfs_mntpt[MAXPATHLEN - 1] = '\0'; 132 + if (envchanged) 133 + *envchanged = 1; 134 + (*argv)++; 135 + (*argc)--; 136 + } else if (!prefixcmp(cmd, "--debugfs-dir=")) { 137 + strncpy(debugfs_mntpt, cmd + 14, MAXPATHLEN); 138 + debugfs_mntpt[MAXPATHLEN - 1] = '\0'; 136 139 if (envchanged) 137 140 *envchanged = 1; 138 141 } else { ··· 257 228 if (use_pager == -1 && p->option & USE_PAGER) 258 229 use_pager = 1; 259 230 commit_pager_choice(); 231 + set_debugfs_path(); 260 232 261 233 status = p->fn(argc, argv, prefix); 262 234 if (status) ··· 376 346 return done_alias; 377 347 } 378 348 349 + /* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ 350 + static void get_debugfs_mntpt(void) 351 + { 352 + FILE *file; 353 + char fs_type[100]; 354 + char debugfs[MAXPATHLEN]; 355 + 356 + /* 357 + * try the standard location 358 + */ 359 + if (valid_debugfs_mount("/sys/kernel/debug/") == 0) { 360 + strcpy(debugfs_mntpt, "/sys/kernel/debug/"); 361 + return; 362 + } 363 + 364 + /* 365 + * try the sane location 366 + */ 367 + if (valid_debugfs_mount("/debug/") == 0) { 368 + strcpy(debugfs_mntpt, "/debug/"); 369 + return; 370 + } 371 + 372 + /* 373 + * give up and parse /proc/mounts 374 + */ 375 + file = fopen("/proc/mounts", "r"); 376 + if (file == NULL) 377 + return; 378 + 379 + while (fscanf(file, "%*s %" 380 + STR(MAXPATHLEN) 381 + "s %99s %*s %*d %*d\n", 382 + debugfs, fs_type) == 2) { 383 + if (strcmp(fs_type, "debugfs") == 0) 384 + break; 385 + } 386 + fclose(file); 387 + if (strcmp(fs_type, "debugfs") == 0) { 388 + strncpy(debugfs_mntpt, debugfs, MAXPATHLEN); 389 + debugfs_mntpt[MAXPATHLEN - 1] = '\0'; 390 + } 391 + } 379 392 380 393 int main(int argc, const char **argv) 381 394 { ··· 427 354 cmd = perf_extract_argv0_path(argv[0]); 428 355 if (!cmd) 429 356 cmd = "perf-help"; 430 - 357 + /* get debugfs mount point from /proc/mounts */ 358 + get_debugfs_mntpt(); 431 359 /* 432 360 * "perf-xxxx" is the same as "perf xxxx", but we obviously: 433 361 * ··· 451 377 argc--; 452 378 handle_options(&argv, &argc, NULL); 453 379 commit_pager_choice(); 380 + set_debugfs_path(); 454 381 if (argc > 0) { 455 382 if (!prefixcmp(argv[0], "--")) 456 383 argv[0] += 2;
+7 -1
tools/perf/perf.h
··· 1 1 #ifndef _PERF_PERF_H 2 2 #define _PERF_PERF_H 3 3 4 - #if defined(__x86_64__) || defined(__i386__) 4 + #if defined(__i386__) 5 + #include "../../arch/x86/include/asm/unistd.h" 6 + #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") 7 + #define cpu_relax() asm volatile("rep; nop" ::: "memory"); 8 + #endif 9 + 10 + #if defined(__x86_64__) 5 11 #include "../../arch/x86/include/asm/unistd.h" 6 12 #define rmb() asm volatile("lfence" ::: "memory") 7 13 #define cpu_relax() asm volatile("rep; nop" ::: "memory");
+1
tools/perf/util/cache.h
··· 18 18 #define PERFATTRIBUTES_FILE ".perfattributes" 19 19 #define INFOATTRIBUTES_FILE "info/attributes" 20 20 #define ATTRIBUTE_MACRO_PREFIX "[attr]" 21 + #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" 21 22 22 23 typedef int (*config_fn_t)(const char *, const char *, void *); 23 24 extern int perf_default_config(const char *, const char *, void *);
+1 -1
tools/perf/util/header.h
··· 16 16 int frozen; 17 17 int attrs, size; 18 18 struct perf_header_attr **attr; 19 - off_t attr_offset; 19 + s64 attr_offset; 20 20 u64 data_offset; 21 21 u64 data_size; 22 22 };
+8
tools/perf/util/include/linux/kernel.h
··· 18 18 (type *)((char *)__mptr - offsetof(type, member)); }) 19 19 #endif 20 20 21 + #ifndef max 22 + #define max(x, y) ({ \ 23 + typeof(x) _max1 = (x); \ 24 + typeof(y) _max2 = (y); \ 25 + (void) (&_max1 == &_max2); \ 26 + _max1 > _max2 ? _max1 : _max2; }) 27 + #endif 28 + 21 29 #endif
+178 -4
tools/perf/util/parse-events.c
··· 5 5 #include "parse-events.h" 6 6 #include "exec_cmd.h" 7 7 #include "string.h" 8 + #include "cache.h" 8 9 9 10 extern char *strcasestr(const char *haystack, const char *needle); 10 11 ··· 19 18 char *symbol; 20 19 char *alias; 21 20 }; 21 + 22 + char debugfs_path[MAXPATHLEN]; 22 23 23 24 #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x 24 25 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x ··· 74 71 #define MAX_ALIASES 8 75 72 76 73 static char *hw_cache[][MAX_ALIASES] = { 77 - { "L1-d$", "l1-d", "l1d", "L1-data", }, 78 - { "L1-i$", "l1-i", "l1i", "L1-instruction", }, 74 + { "L1-dcache", "l1-d", "l1d", "L1-data", }, 75 + { "L1-icache", "l1-i", "l1i", "L1-instruction", }, 79 76 { "LLC", "L2" }, 80 77 { "dTLB", "d-tlb", "Data-TLB", }, 81 78 { "iTLB", "i-tlb", "Instruction-TLB", }, ··· 112 109 [C(ITLB)] = (CACHE_READ), 113 110 [C(BPU)] = (CACHE_READ), 114 111 }; 112 + 113 + #define for_each_subsystem(sys_dir, sys_dirent, sys_next, file, st) \ 114 + while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ 115 + if (snprintf(file, MAXPATHLEN, "%s/%s", debugfs_path, \ 116 + sys_dirent.d_name) && \ 117 + (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ 118 + (strcmp(sys_dirent.d_name, ".")) && \ 119 + (strcmp(sys_dirent.d_name, ".."))) 120 + 121 + #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ 122 + while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ 123 + if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ 124 + sys_dirent.d_name, evt_dirent.d_name) && \ 125 + (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ 126 + (strcmp(evt_dirent.d_name, ".")) && \ 127 + (strcmp(evt_dirent.d_name, ".."))) 128 + 129 + #define MAX_EVENT_LENGTH 30 130 + 131 + int valid_debugfs_mount(const char *debugfs) 132 + { 133 + struct statfs st_fs; 134 + 135 + if (statfs(debugfs, &st_fs) < 0) 136 + return -ENOENT; 137 + else if (st_fs.f_type != (long) DEBUGFS_MAGIC) 138 + return -ENOENT; 139 + return 0; 140 + } 141 + 142 + static char *tracepoint_id_to_name(u64 config) 143 + { 144 + static char tracepoint_name[2 * MAX_EVENT_LENGTH]; 145 + DIR *sys_dir, *evt_dir; 146 + struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; 147 + struct stat st; 148 + char id_buf[4]; 149 + int fd; 150 + u64 id; 151 + char evt_path[MAXPATHLEN]; 152 + 153 + if (valid_debugfs_mount(debugfs_path)) 154 + return "unkown"; 155 + 156 + sys_dir = opendir(debugfs_path); 157 + if (!sys_dir) 158 + goto cleanup; 159 + 160 + for_each_subsystem(sys_dir, sys_dirent, sys_next, evt_path, st) { 161 + evt_dir = opendir(evt_path); 162 + if (!evt_dir) 163 + goto cleanup; 164 + for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, 165 + evt_path, st) { 166 + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", 167 + debugfs_path, sys_dirent.d_name, 168 + evt_dirent.d_name); 169 + fd = open(evt_path, O_RDONLY); 170 + if (fd < 0) 171 + continue; 172 + if (read(fd, id_buf, sizeof(id_buf)) < 0) { 173 + close(fd); 174 + continue; 175 + } 176 + close(fd); 177 + id = atoll(id_buf); 178 + if (id == config) { 179 + closedir(evt_dir); 180 + closedir(sys_dir); 181 + snprintf(tracepoint_name, 2 * MAX_EVENT_LENGTH, 182 + "%s:%s", sys_dirent.d_name, 183 + evt_dirent.d_name); 184 + return tracepoint_name; 185 + } 186 + } 187 + closedir(evt_dir); 188 + } 189 + 190 + cleanup: 191 + closedir(sys_dir); 192 + return "unkown"; 193 + } 115 194 116 195 static int is_cache_op_valid(u8 cache_type, u8 cache_op) 117 196 { ··· 261 176 if (config < PERF_COUNT_SW_MAX) 262 177 return sw_event_names[config]; 263 178 return "unknown-software"; 179 + 180 + case PERF_TYPE_TRACEPOINT: 181 + return tracepoint_id_to_name(config); 264 182 265 183 default: 266 184 break; ··· 350 262 attr->type = PERF_TYPE_HW_CACHE; 351 263 352 264 *str = s; 265 + return 1; 266 + } 267 + 268 + static int parse_tracepoint_event(const char **strp, 269 + struct perf_counter_attr *attr) 270 + { 271 + const char *evt_name; 272 + char sys_name[MAX_EVENT_LENGTH]; 273 + char id_buf[4]; 274 + int fd; 275 + unsigned int sys_length, evt_length; 276 + u64 id; 277 + char evt_path[MAXPATHLEN]; 278 + 279 + if (valid_debugfs_mount(debugfs_path)) 280 + return 0; 281 + 282 + evt_name = strchr(*strp, ':'); 283 + if (!evt_name) 284 + return 0; 285 + 286 + sys_length = evt_name - *strp; 287 + if (sys_length >= MAX_EVENT_LENGTH) 288 + return 0; 289 + 290 + strncpy(sys_name, *strp, sys_length); 291 + sys_name[sys_length] = '\0'; 292 + evt_name = evt_name + 1; 293 + evt_length = strlen(evt_name); 294 + if (evt_length >= MAX_EVENT_LENGTH) 295 + return 0; 296 + 297 + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, 298 + sys_name, evt_name); 299 + fd = open(evt_path, O_RDONLY); 300 + if (fd < 0) 301 + return 0; 302 + 303 + if (read(fd, id_buf, sizeof(id_buf)) < 0) { 304 + close(fd); 305 + return 0; 306 + } 307 + close(fd); 308 + id = atoll(id_buf); 309 + attr->config = id; 310 + attr->type = PERF_TYPE_TRACEPOINT; 311 + *strp = evt_name + evt_length; 353 312 return 1; 354 313 } 355 314 ··· 509 374 */ 510 375 static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) 511 376 { 512 - if (!(parse_raw_event(str, attr) || 377 + if (!(parse_tracepoint_event(str, attr) || 378 + parse_raw_event(str, attr) || 513 379 parse_numeric_event(str, attr) || 514 380 parse_symbolic_event(str, attr) || 515 381 parse_generic_hw_event(str, attr))) ··· 559 423 }; 560 424 561 425 /* 426 + * Print the events from <debugfs_mount_point>/tracing/events 427 + */ 428 + 429 + static void print_tracepoint_events(void) 430 + { 431 + DIR *sys_dir, *evt_dir; 432 + struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; 433 + struct stat st; 434 + char evt_path[MAXPATHLEN]; 435 + 436 + if (valid_debugfs_mount(debugfs_path)) 437 + return; 438 + 439 + sys_dir = opendir(debugfs_path); 440 + if (!sys_dir) 441 + goto cleanup; 442 + 443 + for_each_subsystem(sys_dir, sys_dirent, sys_next, evt_path, st) { 444 + evt_dir = opendir(evt_path); 445 + if (!evt_dir) 446 + goto cleanup; 447 + for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, 448 + evt_path, st) { 449 + snprintf(evt_path, MAXPATHLEN, "%s:%s", 450 + sys_dirent.d_name, evt_dirent.d_name); 451 + fprintf(stderr, " %-40s [%s]\n", evt_path, 452 + event_type_descriptors[PERF_TYPE_TRACEPOINT+1]); 453 + } 454 + closedir(evt_dir); 455 + } 456 + 457 + cleanup: 458 + closedir(sys_dir); 459 + } 460 + 461 + /* 562 462 * Print the help text for the event symbols: 563 463 */ 564 464 void print_events(void) ··· 608 436 609 437 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { 610 438 type = syms->type + 1; 611 - if (type > ARRAY_SIZE(event_type_descriptors)) 439 + if (type >= ARRAY_SIZE(event_type_descriptors)) 612 440 type = 0; 613 441 614 442 if (type != prev_type) ··· 643 471 fprintf(stderr, " %-40s [raw hardware event descriptor]\n", 644 472 "rNNN"); 645 473 fprintf(stderr, "\n"); 474 + 475 + print_tracepoint_events(); 646 476 647 477 exit(129); 648 478 }
+5
tools/perf/util/parse-events.h
··· 3 3 * Parse symbolic events/counts passed in as options: 4 4 */ 5 5 6 + struct option; 7 + 6 8 extern int nr_counters; 7 9 8 10 extern struct perf_counter_attr attrs[MAX_COUNTERS]; ··· 16 14 #define EVENTS_HELP_MAX (128*1024) 17 15 18 16 extern void print_events(void); 17 + 18 + extern char debugfs_path[]; 19 + extern int valid_debugfs_mount(const char *debugfs); 19 20
+3
tools/perf/util/string.h
··· 5 5 6 6 int hex2u64(const char *ptr, u64 *val); 7 7 8 + #define _STR(x) #x 9 + #define STR(x) _STR(x) 10 + 8 11 #endif
+18 -2
tools/perf/util/strlist.c
··· 64 64 65 65 rb_link_node(&sn->rb_node, parent, p); 66 66 rb_insert_color(&sn->rb_node, &self->entries); 67 + ++self->nr_entries; 67 68 68 69 return 0; 69 70 } ··· 156 155 struct strlist *self = malloc(sizeof(*self)); 157 156 158 157 if (self != NULL) { 159 - self->entries = RB_ROOT; 160 - self->dupstr = dupstr; 158 + self->entries = RB_ROOT; 159 + self->dupstr = dupstr; 160 + self->nr_entries = 0; 161 161 if (slist && strlist__parse_list(self, slist) != 0) 162 162 goto out_error; 163 163 } ··· 183 181 self->entries = RB_ROOT; 184 182 free(self); 185 183 } 184 + } 185 + 186 + struct str_node *strlist__entry(const struct strlist *self, unsigned int idx) 187 + { 188 + struct rb_node *nd; 189 + 190 + for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { 191 + struct str_node *pos = rb_entry(nd, struct str_node, rb_node); 192 + 193 + if (!idx--) 194 + return pos; 195 + } 196 + 197 + return NULL; 186 198 }
+9 -2
tools/perf/util/strlist.h
··· 11 11 12 12 struct strlist { 13 13 struct rb_root entries; 14 - bool dupstr; 14 + unsigned int nr_entries; 15 + bool dupstr; 15 16 }; 16 17 17 18 struct strlist *strlist__new(bool dupstr, const char *slist); ··· 22 21 int strlist__load(struct strlist *self, const char *filename); 23 22 int strlist__add(struct strlist *self, const char *str); 24 23 24 + struct str_node *strlist__entry(const struct strlist *self, unsigned int idx); 25 25 bool strlist__has_entry(struct strlist *self, const char *entry); 26 26 27 27 static inline bool strlist__empty(const struct strlist *self) 28 28 { 29 - return rb_first(&self->entries) == NULL; 29 + return self->nr_entries == 0; 30 + } 31 + 32 + static inline unsigned int strlist__nr_entries(const struct strlist *self) 33 + { 34 + return self->nr_entries; 30 35 } 31 36 32 37 int strlist__parse_list(struct strlist *self, const char *s);
+91 -56
tools/perf/util/symbol.c
··· 6 6 #include <libelf.h> 7 7 #include <gelf.h> 8 8 #include <elf.h> 9 + #include <bfd.h> 9 10 10 11 const char *sym_hist_filter; 12 + 13 + #ifndef DMGL_PARAMS 14 + #define DMGL_PARAMS (1 << 0) /* Include function args */ 15 + #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ 16 + #endif 11 17 12 18 static struct symbol *symbol__new(u64 start, u64 len, 13 19 const char *name, unsigned int priv_size, ··· 71 65 self->syms = RB_ROOT; 72 66 self->sym_priv_size = sym_priv_size; 73 67 self->find_symbol = dso__find_symbol; 68 + self->slen_calculated = 0; 74 69 } 75 70 76 71 return self; ··· 380 373 idx < nr_entries; \ 381 374 ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) 382 375 383 - static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, 384 - GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym, 385 - GElf_Shdr *shdr_dynsym, 386 - size_t dynsym_idx, int verbose) 376 + /* 377 + * We need to check if we have a .dynsym, so that we can handle the 378 + * .plt, synthesizing its symbols, that aren't on the symtabs (be it 379 + * .dynsym or .symtab). 380 + * And always look at the original dso, not at debuginfo packages, that 381 + * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). 382 + */ 383 + static int dso__synthesize_plt_symbols(struct dso *self, int verbose) 387 384 { 388 385 uint32_t nr_rel_entries, idx; 389 386 GElf_Sym sym; 390 387 u64 plt_offset; 391 388 GElf_Shdr shdr_plt; 392 389 struct symbol *f; 393 - GElf_Shdr shdr_rel_plt; 390 + GElf_Shdr shdr_rel_plt, shdr_dynsym; 394 391 Elf_Data *reldata, *syms, *symstrs; 395 - Elf_Scn *scn_plt_rel, *scn_symstrs; 392 + Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; 393 + size_t dynsym_idx; 394 + GElf_Ehdr ehdr; 396 395 char sympltname[1024]; 397 - int nr = 0, symidx; 396 + Elf *elf; 397 + int nr = 0, symidx, fd, err = 0; 398 398 399 - scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, 399 + fd = open(self->name, O_RDONLY); 400 + if (fd < 0) 401 + goto out; 402 + 403 + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 404 + if (elf == NULL) 405 + goto out_close; 406 + 407 + if (gelf_getehdr(elf, &ehdr) == NULL) 408 + goto out_elf_end; 409 + 410 + scn_dynsym = elf_section_by_name(elf, &ehdr, &shdr_dynsym, 411 + ".dynsym", &dynsym_idx); 412 + if (scn_dynsym == NULL) 413 + goto out_elf_end; 414 + 415 + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, 400 416 ".rela.plt", NULL); 401 417 if (scn_plt_rel == NULL) { 402 - scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, 418 + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, 403 419 ".rel.plt", NULL); 404 420 if (scn_plt_rel == NULL) 405 - return 0; 421 + goto out_elf_end; 406 422 } 407 423 408 - if (shdr_rel_plt.sh_link != dynsym_idx) 409 - return 0; 424 + err = -1; 410 425 411 - if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL) 412 - return 0; 426 + if (shdr_rel_plt.sh_link != dynsym_idx) 427 + goto out_elf_end; 428 + 429 + if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) 430 + goto out_elf_end; 413 431 414 432 /* 415 433 * Fetch the relocation section to find the indexes to the GOT ··· 442 410 */ 443 411 reldata = elf_getdata(scn_plt_rel, NULL); 444 412 if (reldata == NULL) 445 - return -1; 413 + goto out_elf_end; 446 414 447 415 syms = elf_getdata(scn_dynsym, NULL); 448 416 if (syms == NULL) 449 - return -1; 417 + goto out_elf_end; 450 418 451 - scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link); 419 + scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link); 452 420 if (scn_symstrs == NULL) 453 - return -1; 421 + goto out_elf_end; 454 422 455 423 symstrs = elf_getdata(scn_symstrs, NULL); 456 424 if (symstrs == NULL) 457 - return -1; 425 + goto out_elf_end; 458 426 459 427 nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; 460 428 plt_offset = shdr_plt.sh_offset; ··· 473 441 f = symbol__new(plt_offset, shdr_plt.sh_entsize, 474 442 sympltname, self->sym_priv_size, 0, verbose); 475 443 if (!f) 476 - return -1; 444 + goto out_elf_end; 477 445 478 446 dso__insert_symbol(self, f); 479 447 ++nr; ··· 491 459 f = symbol__new(plt_offset, shdr_plt.sh_entsize, 492 460 sympltname, self->sym_priv_size, 0, verbose); 493 461 if (!f) 494 - return -1; 462 + goto out_elf_end; 495 463 496 464 dso__insert_symbol(self, f); 497 465 ++nr; 498 466 } 499 - } else { 500 - /* 501 - * TODO: There are still one more shdr_rel_plt.sh_type 502 - * I have to investigate, but probably should be ignored. 503 - */ 504 467 } 505 468 506 - return nr; 469 + err = 0; 470 + out_elf_end: 471 + elf_end(elf); 472 + out_close: 473 + close(fd); 474 + 475 + if (err == 0) 476 + return nr; 477 + out: 478 + fprintf(stderr, "%s: problems reading %s PLT info.\n", 479 + __func__, self->name); 480 + return 0; 507 481 } 508 482 509 483 static int dso__load_sym(struct dso *self, int fd, const char *name, ··· 523 485 GElf_Shdr shdr; 524 486 Elf_Data *syms; 525 487 GElf_Sym sym; 526 - Elf_Scn *sec, *sec_dynsym, *sec_strndx; 488 + Elf_Scn *sec, *sec_strndx; 527 489 Elf *elf; 528 - size_t dynsym_idx; 529 - int nr = 0; 490 + int nr = 0, kernel = !strcmp("[kernel]", self->name); 530 491 531 492 elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 532 493 if (elf == NULL) { ··· 541 504 goto out_elf_end; 542 505 } 543 506 544 - /* 545 - * We need to check if we have a .dynsym, so that we can handle the 546 - * .plt, synthesizing its symbols, that aren't on the symtabs (be it 547 - * .dynsym or .symtab) 548 - */ 549 - sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr, 550 - ".dynsym", &dynsym_idx); 551 - if (sec_dynsym != NULL) { 552 - nr = dso__synthesize_plt_symbols(self, elf, &ehdr, 553 - sec_dynsym, &shdr, 554 - dynsym_idx, verbose); 555 - if (nr < 0) 556 - goto out_elf_end; 557 - } 558 - 559 - /* 560 - * But if we have a full .symtab (that is a superset of .dynsym) we 561 - * should add the symbols not in the .dynsyn 562 - */ 563 507 sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); 564 508 if (sec == NULL) { 565 - if (sec_dynsym == NULL) 509 + sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL); 510 + if (sec == NULL) 566 511 goto out_elf_end; 567 - 568 - sec = sec_dynsym; 569 - gelf_getshdr(sec, &shdr); 570 512 } 571 513 572 514 syms = elf_getdata(sec, NULL); ··· 571 555 nr_syms = shdr.sh_size / shdr.sh_entsize; 572 556 573 557 memset(&sym, 0, sizeof(sym)); 574 - self->adjust_symbols = (ehdr.e_type == ET_EXEC || 558 + if (!kernel) { 559 + self->adjust_symbols = (ehdr.e_type == ET_EXEC || 575 560 elf_section_by_name(elf, &ehdr, &shdr, 576 561 ".gnu.prelink_undo", 577 562 NULL) != NULL); 563 + } else self->adjust_symbols = 0; 564 + 578 565 elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { 579 566 struct symbol *f; 567 + const char *name; 568 + char *demangled; 580 569 u64 obj_start; 581 570 struct section *section = NULL; 582 571 int is_label = elf_sym__is_label(&sym); ··· 620 599 goto out_elf_end; 621 600 } 622 601 } 602 + /* 603 + * We need to figure out if the object was created from C++ sources 604 + * DWARF DW_compile_unit has this, but we don't always have access 605 + * to it... 606 + */ 607 + name = elf_sym__name(&sym, symstrs); 608 + demangled = bfd_demangle(NULL, name, DMGL_PARAMS | DMGL_ANSI); 609 + if (demangled != NULL) 610 + name = demangled; 623 611 624 - f = symbol__new(sym.st_value, sym.st_size, 625 - elf_sym__name(&sym, symstrs), 612 + f = symbol__new(sym.st_value, sym.st_size, name, 626 613 self->sym_priv_size, obj_start, verbose); 614 + free(demangled); 627 615 if (!f) 628 616 goto out_elf_end; 629 617 ··· 698 668 if (!ret) 699 669 goto more; 700 670 671 + if (ret > 0) { 672 + int nr_plt = dso__synthesize_plt_symbols(self, verbose); 673 + if (nr_plt > 0) 674 + ret += nr_plt; 675 + } 701 676 out: 702 677 free(name); 703 678 return ret;
+1
tools/perf/util/symbol.h
··· 25 25 struct symbol *(*find_symbol)(struct dso *, u64 ip); 26 26 unsigned int sym_priv_size; 27 27 unsigned char adjust_symbols; 28 + unsigned char slen_calculated; 28 29 char name[0]; 29 30 }; 30 31
+2
tools/perf/util/util.h
··· 50 50 #include <unistd.h> 51 51 #include <stdio.h> 52 52 #include <sys/stat.h> 53 + #include <sys/statfs.h> 53 54 #include <fcntl.h> 54 55 #include <stddef.h> 55 56 #include <stdlib.h> ··· 81 80 #include <netdb.h> 82 81 #include <pwd.h> 83 82 #include <inttypes.h> 83 + #include "../../../include/linux/magic.h" 84 84 85 85 #ifndef NO_ICONV 86 86 #include <iconv.h>