Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
"The main changes in this cycle on the kernel side were:

- CPU PMU and uncore driver updates to Intel Snow Ridge, IceLake,
KabyLake, AmberLake and WhiskeyLake CPUs.

- Rework the MSR probing infrastructure to make it more robust, make
it work better on virtualized systems and to better expose it on
sysfs.

- Rework PMU attributes group support based on the feedback from
Greg. The core sysfs patch that adds sysfs_update_groups() was
acked by Greg.

There's a lot of perf tooling changes as well, all around the place:

- vendor updates to Intel, cs-etm (ARM), ARM64, s390,

- various enhancements to Intel PT tooling support:
- Improve CBR (Core to Bus Ratio) packets support.
- Export power and ptwrite events to sqlite and postgresql.
- Add support for decoding PEBS via PT packets.
- Add support for samples to contain IPC ratio, collecting cycles
information from CYC packets, showing the IPC info periodically
- Allow using time ranges

- lots of updates to perf pmu, perf stat, perf trace, eBPF support,
perf record, perf diff, etc. - please see the shortlog and Git log
for details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (252 commits)
tools arch x86: Sync asm/cpufeatures.h with the with the kernel
tools build: Check if gettid() is available before providing helper
perf jvmti: Address gcc string overflow warning for strncpy()
perf python: Remove -fstack-protector-strong if clang doesn't have it
perf annotate TUI browser: Do not use member from variable within its own initialization
perf tests: Fix record+probe_libc_inet_pton.sh for powerpc64
perf evsel: Do not rely on errno values for precise_ip fallback
perf thread: Allow references to thread objects after machine__exit()
perf header: Assign proper ff->ph in perf_event__synthesize_features()
tools arch kvm: Sync kvm headers with the kernel sources
perf script: Allow specifying the files to process guest samples
perf tools metric: Don't include duration_time in group
perf list: Avoid extra : for --raw metrics
perf vendor events intel: Metric fixes for SKX/CLX
perf tools: Fix typos / broken sentences
perf jevents: Add support for Hisi hip08 L3C PMU aliasing
perf jevents: Add support for Hisi hip08 HHA PMU aliasing
perf jevents: Add support for Hisi hip08 DDRC PMU aliasing
perf pmu: Support more complex PMU event aliasing
perf diff: Documentation -c cycles option
...

+8991 -2121
+1 -1
arch/x86/events/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - obj-y += core.o 2 + obj-y += core.o probe.o 3 3 obj-y += amd/ 4 4 obj-$(CONFIG_X86_LOCAL_APIC) += msr.o 5 5 obj-$(CONFIG_CPU_SUP_INTEL) += intel/
+16 -90
arch/x86/events/core.c
··· 1618 1618 .attrs = NULL, 1619 1619 }; 1620 1620 1621 - /* 1622 - * Remove all undefined events (x86_pmu.event_map(id) == 0) 1623 - * out of events_attr attributes. 1624 - */ 1625 - static void __init filter_events(struct attribute **attrs) 1626 - { 1627 - struct device_attribute *d; 1628 - struct perf_pmu_events_attr *pmu_attr; 1629 - int offset = 0; 1630 - int i, j; 1631 - 1632 - for (i = 0; attrs[i]; i++) { 1633 - d = (struct device_attribute *)attrs[i]; 1634 - pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); 1635 - /* str trumps id */ 1636 - if (pmu_attr->event_str) 1637 - continue; 1638 - if (x86_pmu.event_map(i + offset)) 1639 - continue; 1640 - 1641 - for (j = i; attrs[j]; j++) 1642 - attrs[j] = attrs[j + 1]; 1643 - 1644 - /* Check the shifted attr. */ 1645 - i--; 1646 - 1647 - /* 1648 - * event_map() is index based, the attrs array is organized 1649 - * by increasing event index. If we shift the events, then 1650 - * we need to compensate for the event_map(), otherwise 1651 - * we are looking up the wrong event in the map 1652 - */ 1653 - offset++; 1654 - } 1655 - } 1656 - 1657 - /* Merge two pointer arrays */ 1658 - __init struct attribute **merge_attr(struct attribute **a, struct attribute **b) 1659 - { 1660 - struct attribute **new; 1661 - int j, i; 1662 - 1663 - for (j = 0; a && a[j]; j++) 1664 - ; 1665 - for (i = 0; b && b[i]; i++) 1666 - j++; 1667 - j++; 1668 - 1669 - new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL); 1670 - if (!new) 1671 - return NULL; 1672 - 1673 - j = 0; 1674 - for (i = 0; a && a[i]; i++) 1675 - new[j++] = a[i]; 1676 - for (i = 0; b && b[i]; i++) 1677 - new[j++] = b[i]; 1678 - new[j] = NULL; 1679 - 1680 - return new; 1681 - } 1682 - 1683 1621 ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) 1684 1622 { 1685 1623 struct perf_pmu_events_attr *pmu_attr = \ ··· 1682 1744 NULL, 1683 1745 }; 1684 1746 1747 + /* 1748 + * Remove all undefined events (x86_pmu.event_map(id) == 0) 1749 + * out of events_attr attributes. 1750 + */ 1751 + static umode_t 1752 + is_visible(struct kobject *kobj, struct attribute *attr, int idx) 1753 + { 1754 + struct perf_pmu_events_attr *pmu_attr; 1755 + 1756 + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); 1757 + /* str trumps id */ 1758 + return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0; 1759 + } 1760 + 1685 1761 static struct attribute_group x86_pmu_events_group __ro_after_init = { 1686 1762 .name = "events", 1687 1763 .attrs = events_attr, 1764 + .is_visible = is_visible, 1688 1765 }; 1689 1766 1690 1767 ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) ··· 1795 1842 1796 1843 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1797 1844 1798 - if (x86_pmu.caps_attrs) { 1799 - struct attribute **tmp; 1800 - 1801 - tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs); 1802 - if (!WARN_ON(!tmp)) 1803 - x86_pmu_caps_group.attrs = tmp; 1804 - } 1805 - 1806 - if (x86_pmu.event_attrs) 1807 - x86_pmu_events_group.attrs = x86_pmu.event_attrs; 1808 - 1809 1845 if (!x86_pmu.events_sysfs_show) 1810 1846 x86_pmu_events_group.attrs = &empty_attrs; 1811 - else 1812 - filter_events(x86_pmu_events_group.attrs); 1813 1847 1814 - if (x86_pmu.cpu_events) { 1815 - struct attribute **tmp; 1816 - 1817 - tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); 1818 - if (!WARN_ON(!tmp)) 1819 - x86_pmu_events_group.attrs = tmp; 1820 - } 1821 - 1822 - if (x86_pmu.attrs) { 1823 - struct attribute **tmp; 1824 - 1825 - tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs); 1826 - if (!WARN_ON(!tmp)) 1827 - x86_pmu_attr_group.attrs = tmp; 1828 - } 1848 + pmu.attr_update = x86_pmu.attr_update; 1829 1849 1830 1850 pr_info("... version: %d\n", x86_pmu.version); 1831 1851 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
+127 -64
arch/x86/events/intel/core.c
··· 20 20 #include <asm/intel-family.h> 21 21 #include <asm/apic.h> 22 22 #include <asm/cpu_device_id.h> 23 + #include <asm/hypervisor.h> 23 24 24 25 #include "../perf_event.h" 25 26 ··· 3898 3897 .check_period = intel_pmu_check_period, 3899 3898 }; 3900 3899 3901 - static struct attribute *intel_pmu_attrs[]; 3902 - 3903 3900 static __initconst const struct x86_pmu intel_pmu = { 3904 3901 .name = "Intel", 3905 3902 .handle_irq = intel_pmu_handle_irq, ··· 3928 3929 3929 3930 .format_attrs = intel_arch3_formats_attr, 3930 3931 .events_sysfs_show = intel_event_sysfs_show, 3931 - 3932 - .attrs = intel_pmu_attrs, 3933 3932 3934 3933 .cpu_prepare = intel_pmu_cpu_prepare, 3935 3934 .cpu_starting = intel_pmu_cpu_starting, ··· 4050 4053 static bool check_msr(unsigned long msr, u64 mask) 4051 4054 { 4052 4055 u64 val_old, val_new, val_tmp; 4056 + 4057 + /* 4058 + * Disable the check for real HW, so we don't 4059 + * mess with potentionaly enabled registers: 4060 + */ 4061 + if (hypervisor_is_type(X86_HYPER_NATIVE)) 4062 + return true; 4053 4063 4054 4064 /* 4055 4065 * Read the current value, change it and read it back to see if it ··· 4278 4274 NULL, 4279 4275 }; 4280 4276 4281 - static __init struct attribute **get_icl_events_attrs(void) 4282 - { 4283 - return boot_cpu_has(X86_FEATURE_RTM) ? 4284 - merge_attr(icl_events_attrs, icl_tsx_events_attrs) : 4285 - icl_events_attrs; 4286 - } 4287 - 4288 4277 static ssize_t freeze_on_smi_show(struct device *cdev, 4289 4278 struct device_attribute *attr, 4290 4279 char *buf) ··· 4399 4402 4400 4403 static struct attribute *intel_pmu_attrs[] = { 4401 4404 &dev_attr_freeze_on_smi.attr, 4402 - NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */ 4405 + &dev_attr_allow_tsx_force_abort.attr, 4403 4406 NULL, 4404 4407 }; 4405 4408 4406 - static __init struct attribute ** 4407 - get_events_attrs(struct attribute **base, 4408 - struct attribute **mem, 4409 - struct attribute **tsx) 4409 + static umode_t 4410 + tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) 4410 4411 { 4411 - struct attribute **attrs = base; 4412 - struct attribute **old; 4413 - 4414 - if (mem && x86_pmu.pebs) 4415 - attrs = merge_attr(attrs, mem); 4416 - 4417 - if (tsx && boot_cpu_has(X86_FEATURE_RTM)) { 4418 - old = attrs; 4419 - attrs = merge_attr(attrs, tsx); 4420 - if (old != base) 4421 - kfree(old); 4422 - } 4423 - 4424 - return attrs; 4412 + return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0; 4425 4413 } 4414 + 4415 + static umode_t 4416 + pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) 4417 + { 4418 + return x86_pmu.pebs ? attr->mode : 0; 4419 + } 4420 + 4421 + static umode_t 4422 + lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) 4423 + { 4424 + return x86_pmu.lbr_nr ? attr->mode : 0; 4425 + } 4426 + 4427 + static umode_t 4428 + exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) 4429 + { 4430 + return x86_pmu.version >= 2 ? attr->mode : 0; 4431 + } 4432 + 4433 + static umode_t 4434 + default_is_visible(struct kobject *kobj, struct attribute *attr, int i) 4435 + { 4436 + if (attr == &dev_attr_allow_tsx_force_abort.attr) 4437 + return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; 4438 + 4439 + return attr->mode; 4440 + } 4441 + 4442 + static struct attribute_group group_events_td = { 4443 + .name = "events", 4444 + }; 4445 + 4446 + static struct attribute_group group_events_mem = { 4447 + .name = "events", 4448 + .is_visible = pebs_is_visible, 4449 + }; 4450 + 4451 + static struct attribute_group group_events_tsx = { 4452 + .name = "events", 4453 + .is_visible = tsx_is_visible, 4454 + }; 4455 + 4456 + static struct attribute_group group_caps_gen = { 4457 + .name = "caps", 4458 + .attrs = intel_pmu_caps_attrs, 4459 + }; 4460 + 4461 + static struct attribute_group group_caps_lbr = { 4462 + .name = "caps", 4463 + .attrs = lbr_attrs, 4464 + .is_visible = lbr_is_visible, 4465 + }; 4466 + 4467 + static struct attribute_group group_format_extra = { 4468 + .name = "format", 4469 + .is_visible = exra_is_visible, 4470 + }; 4471 + 4472 + static struct attribute_group group_format_extra_skl = { 4473 + .name = "format", 4474 + .is_visible = exra_is_visible, 4475 + }; 4476 + 4477 + static struct attribute_group group_default = { 4478 + .attrs = intel_pmu_attrs, 4479 + .is_visible = default_is_visible, 4480 + }; 4481 + 4482 + static const struct attribute_group *attr_update[] = { 4483 + &group_events_td, 4484 + &group_events_mem, 4485 + &group_events_tsx, 4486 + &group_caps_gen, 4487 + &group_caps_lbr, 4488 + &group_format_extra, 4489 + &group_format_extra_skl, 4490 + &group_default, 4491 + NULL, 4492 + }; 4493 + 4494 + static struct attribute *empty_attrs; 4426 4495 4427 4496 __init int intel_pmu_init(void) 4428 4497 { 4429 - struct attribute **extra_attr = NULL; 4430 - struct attribute **mem_attr = NULL; 4431 - struct attribute **tsx_attr = NULL; 4432 - struct attribute **to_free = NULL; 4498 + struct attribute **extra_skl_attr = &empty_attrs; 4499 + struct attribute **extra_attr = &empty_attrs; 4500 + struct attribute **td_attr = &empty_attrs; 4501 + struct attribute **mem_attr = &empty_attrs; 4502 + struct attribute **tsx_attr = &empty_attrs; 4433 4503 union cpuid10_edx edx; 4434 4504 union cpuid10_eax eax; 4435 4505 union cpuid10_ebx ebx; 4436 4506 struct event_constraint *c; 4437 4507 unsigned int unused; 4438 4508 struct extra_reg *er; 4509 + bool pmem = false; 4439 4510 int version, i; 4440 4511 char *name; 4441 4512 ··· 4661 4596 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; 4662 4597 x86_pmu.extra_regs = intel_slm_extra_regs; 4663 4598 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 4664 - x86_pmu.cpu_events = slm_events_attrs; 4599 + td_attr = slm_events_attrs; 4665 4600 extra_attr = slm_format_attr; 4666 4601 pr_cont("Silvermont events, "); 4667 4602 name = "silvermont"; ··· 4689 4624 x86_pmu.pebs_prec_dist = true; 4690 4625 x86_pmu.lbr_pt_coexist = true; 4691 4626 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 4692 - x86_pmu.cpu_events = glm_events_attrs; 4627 + td_attr = glm_events_attrs; 4693 4628 extra_attr = slm_format_attr; 4694 4629 pr_cont("Goldmont events, "); 4695 4630 name = "goldmont"; ··· 4716 4651 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 4717 4652 x86_pmu.flags |= PMU_FL_PEBS_ALL; 4718 4653 x86_pmu.get_event_constraints = glp_get_event_constraints; 4719 - x86_pmu.cpu_events = glm_events_attrs; 4654 + td_attr = glm_events_attrs; 4720 4655 /* Goldmont Plus has 4-wide pipeline */ 4721 4656 event_attr_td_total_slots_scale_glm.event_str = "4"; 4722 4657 extra_attr = slm_format_attr; ··· 4805 4740 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 4806 4741 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 4807 4742 4808 - x86_pmu.cpu_events = snb_events_attrs; 4743 + td_attr = snb_events_attrs; 4809 4744 mem_attr = snb_mem_events_attrs; 4810 4745 4811 4746 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ ··· 4846 4781 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 4847 4782 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 4848 4783 4849 - x86_pmu.cpu_events = snb_events_attrs; 4784 + td_attr = snb_events_attrs; 4850 4785 mem_attr = snb_mem_events_attrs; 4851 4786 4852 4787 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ ··· 4883 4818 4884 4819 x86_pmu.hw_config = hsw_hw_config; 4885 4820 x86_pmu.get_event_constraints = hsw_get_event_constraints; 4886 - x86_pmu.cpu_events = hsw_events_attrs; 4887 4821 x86_pmu.lbr_double_abort = true; 4888 4822 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 4889 4823 hsw_format_attr : nhm_format_attr; 4824 + td_attr = hsw_events_attrs; 4890 4825 mem_attr = hsw_mem_events_attrs; 4891 4826 tsx_attr = hsw_tsx_events_attrs; 4892 4827 pr_cont("Haswell events, "); ··· 4925 4860 4926 4861 x86_pmu.hw_config = hsw_hw_config; 4927 4862 x86_pmu.get_event_constraints = hsw_get_event_constraints; 4928 - x86_pmu.cpu_events = hsw_events_attrs; 4929 4863 x86_pmu.limit_period = bdw_limit_period; 4930 4864 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 4931 4865 hsw_format_attr : nhm_format_attr; 4866 + td_attr = hsw_events_attrs; 4932 4867 mem_attr = hsw_mem_events_attrs; 4933 4868 tsx_attr = hsw_tsx_events_attrs; 4934 4869 pr_cont("Broadwell events, "); ··· 4955 4890 name = "knights-landing"; 4956 4891 break; 4957 4892 4893 + case INTEL_FAM6_SKYLAKE_X: 4894 + pmem = true; 4958 4895 case INTEL_FAM6_SKYLAKE_MOBILE: 4959 4896 case INTEL_FAM6_SKYLAKE_DESKTOP: 4960 - case INTEL_FAM6_SKYLAKE_X: 4961 4897 case INTEL_FAM6_KABYLAKE_MOBILE: 4962 4898 case INTEL_FAM6_KABYLAKE_DESKTOP: 4963 4899 x86_add_quirk(intel_pebs_isolation_quirk); ··· 4986 4920 x86_pmu.get_event_constraints = hsw_get_event_constraints; 4987 4921 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 4988 4922 hsw_format_attr : nhm_format_attr; 4989 - extra_attr = merge_attr(extra_attr, skl_format_attr); 4990 - to_free = extra_attr; 4991 - x86_pmu.cpu_events = hsw_events_attrs; 4923 + extra_skl_attr = skl_format_attr; 4924 + td_attr = hsw_events_attrs; 4992 4925 mem_attr = hsw_mem_events_attrs; 4993 4926 tsx_attr = hsw_tsx_events_attrs; 4994 - intel_pmu_pebs_data_source_skl( 4995 - boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); 4927 + intel_pmu_pebs_data_source_skl(pmem); 4996 4928 4997 4929 if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { 4998 4930 x86_pmu.flags |= PMU_FL_TFA; 4999 4931 x86_pmu.get_event_constraints = tfa_get_event_constraints; 5000 4932 x86_pmu.enable_all = intel_tfa_pmu_enable_all; 5001 4933 x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; 5002 - intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr; 5003 4934 } 5004 4935 5005 4936 pr_cont("Skylake events, "); 5006 4937 name = "skylake"; 5007 4938 break; 5008 4939 4940 + case INTEL_FAM6_ICELAKE_X: 4941 + case INTEL_FAM6_ICELAKE_XEON_D: 4942 + pmem = true; 5009 4943 case INTEL_FAM6_ICELAKE_MOBILE: 4944 + case INTEL_FAM6_ICELAKE_DESKTOP: 5010 4945 x86_pmu.late_ack = true; 5011 4946 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 5012 4947 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); ··· 5026 4959 x86_pmu.get_event_constraints = icl_get_event_constraints; 5027 4960 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 5028 4961 hsw_format_attr : nhm_format_attr; 5029 - extra_attr = merge_attr(extra_attr, skl_format_attr); 5030 - x86_pmu.cpu_events = get_icl_events_attrs(); 4962 + extra_skl_attr = skl_format_attr; 4963 + mem_attr = icl_events_attrs; 4964 + tsx_attr = icl_tsx_events_attrs; 5031 4965 x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); 5032 4966 x86_pmu.lbr_pt_coexist = true; 5033 - intel_pmu_pebs_data_source_skl(false); 4967 + intel_pmu_pebs_data_source_skl(pmem); 5034 4968 pr_cont("Icelake events, "); 5035 4969 name = "icelake"; 5036 4970 break; ··· 5056 4988 5057 4989 snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name); 5058 4990 5059 - if (version >= 2 && extra_attr) { 5060 - x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, 5061 - extra_attr); 5062 - WARN_ON(!x86_pmu.format_attrs); 5063 - } 5064 4991 5065 - x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events, 5066 - mem_attr, tsx_attr); 4992 + group_events_td.attrs = td_attr; 4993 + group_events_mem.attrs = mem_attr; 4994 + group_events_tsx.attrs = tsx_attr; 4995 + group_format_extra.attrs = extra_attr; 4996 + group_format_extra_skl.attrs = extra_skl_attr; 4997 + 4998 + x86_pmu.attr_update = attr_update; 5067 4999 5068 5000 if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { 5069 5001 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", ··· 5111 5043 x86_pmu.lbr_nr = 0; 5112 5044 } 5113 5045 5114 - x86_pmu.caps_attrs = intel_pmu_caps_attrs; 5115 - 5116 - if (x86_pmu.lbr_nr) { 5117 - x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs); 5046 + if (x86_pmu.lbr_nr) 5118 5047 pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); 5119 - } 5120 5048 5121 5049 /* 5122 5050 * Access extra MSR may cause #GP under certain circumstances. ··· 5142 5078 if (x86_pmu.counter_freezing) 5143 5079 x86_pmu.handle_irq = intel_pmu_handle_irq_v4; 5144 5080 5145 - kfree(to_free); 5146 5081 return 0; 5147 5082 } 5148 5083
+86 -63
arch/x86/events/intel/cstate.c
··· 96 96 #include <asm/cpu_device_id.h> 97 97 #include <asm/intel-family.h> 98 98 #include "../perf_event.h" 99 + #include "../probe.h" 99 100 100 101 MODULE_LICENSE("GPL"); 101 102 ··· 145 144 PERF_CSTATE_CORE_EVENT_MAX, 146 145 }; 147 146 148 - PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00"); 149 - PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01"); 150 - PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02"); 151 - PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03"); 147 + PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00"); 148 + PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01"); 149 + PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02"); 150 + PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03"); 152 151 153 - static struct perf_cstate_msr core_msr[] = { 154 - [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 }, 155 - [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 }, 156 - [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 }, 157 - [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 }, 152 + static unsigned long core_msr_mask; 153 + 154 + PMU_EVENT_GROUP(events, cstate_core_c1); 155 + PMU_EVENT_GROUP(events, cstate_core_c3); 156 + PMU_EVENT_GROUP(events, cstate_core_c6); 157 + PMU_EVENT_GROUP(events, cstate_core_c7); 158 + 159 + static bool test_msr(int idx, void *data) 160 + { 161 + return test_bit(idx, (unsigned long *) data); 162 + } 163 + 164 + static struct perf_msr core_msr[] = { 165 + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &group_cstate_core_c1, test_msr }, 166 + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &group_cstate_core_c3, test_msr }, 167 + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &group_cstate_core_c6, test_msr }, 168 + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &group_cstate_core_c7, test_msr }, 158 169 }; 159 170 160 - static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = { 171 + static struct attribute *attrs_empty[] = { 161 172 NULL, 162 173 }; 163 174 175 + /* 176 + * There are no default events, but we need to create 177 + * "events" group (with empty attrs) before updating 178 + * it with detected events. 179 + */ 164 180 static struct attribute_group core_events_attr_group = { 165 181 .name = "events", 166 - .attrs = core_events_attrs, 182 + .attrs = attrs_empty, 167 183 }; 168 184 169 185 DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); ··· 229 211 PERF_CSTATE_PKG_EVENT_MAX, 230 212 }; 231 213 232 - PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00"); 233 - PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01"); 234 - PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02"); 235 - PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03"); 236 - PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04"); 237 - PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05"); 238 - PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06"); 214 + PMU_EVENT_ATTR_STRING(c2-residency, attr_cstate_pkg_c2, "event=0x00"); 215 + PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_pkg_c3, "event=0x01"); 216 + PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_pkg_c6, "event=0x02"); 217 + PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_pkg_c7, "event=0x03"); 218 + PMU_EVENT_ATTR_STRING(c8-residency, attr_cstate_pkg_c8, "event=0x04"); 219 + PMU_EVENT_ATTR_STRING(c9-residency, attr_cstate_pkg_c9, "event=0x05"); 220 + PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06"); 239 221 240 - static struct perf_cstate_msr pkg_msr[] = { 241 - [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 }, 242 - [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 }, 243 - [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 }, 244 - [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 }, 245 - [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 }, 246 - [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 }, 247 - [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 }, 248 - }; 222 + static unsigned long pkg_msr_mask; 249 223 250 - static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = { 251 - NULL, 224 + PMU_EVENT_GROUP(events, cstate_pkg_c2); 225 + PMU_EVENT_GROUP(events, cstate_pkg_c3); 226 + PMU_EVENT_GROUP(events, cstate_pkg_c6); 227 + PMU_EVENT_GROUP(events, cstate_pkg_c7); 228 + PMU_EVENT_GROUP(events, cstate_pkg_c8); 229 + PMU_EVENT_GROUP(events, cstate_pkg_c9); 230 + PMU_EVENT_GROUP(events, cstate_pkg_c10); 231 + 232 + static struct perf_msr pkg_msr[] = { 233 + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2, test_msr }, 234 + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3, test_msr }, 235 + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6, test_msr }, 236 + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7, test_msr }, 237 + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8, test_msr }, 238 + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9, test_msr }, 239 + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, 252 240 }; 253 241 254 242 static struct attribute_group pkg_events_attr_group = { 255 243 .name = "events", 256 - .attrs = pkg_events_attrs, 244 + .attrs = attrs_empty, 257 245 }; 258 246 259 247 DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); ··· 313 289 if (event->pmu == &cstate_core_pmu) { 314 290 if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) 315 291 return -EINVAL; 316 - if (!core_msr[cfg].attr) 292 + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX); 293 + if (!(core_msr_mask & (1 << cfg))) 317 294 return -EINVAL; 318 295 event->hw.event_base = core_msr[cfg].msr; 319 296 cpu = cpumask_any_and(&cstate_core_cpu_mask, ··· 323 298 if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) 324 299 return -EINVAL; 325 300 cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); 326 - if (!pkg_msr[cfg].attr) 301 + if (!(pkg_msr_mask & (1 << cfg))) 327 302 return -EINVAL; 328 303 event->hw.event_base = pkg_msr[cfg].msr; 329 304 cpu = cpumask_any_and(&cstate_pkg_cpu_mask, ··· 446 421 return 0; 447 422 } 448 423 424 + const struct attribute_group *core_attr_update[] = { 425 + &group_cstate_core_c1, 426 + &group_cstate_core_c3, 427 + &group_cstate_core_c6, 428 + &group_cstate_core_c7, 429 + NULL, 430 + }; 431 + 432 + const struct attribute_group *pkg_attr_update[] = { 433 + &group_cstate_pkg_c2, 434 + &group_cstate_pkg_c3, 435 + &group_cstate_pkg_c6, 436 + &group_cstate_pkg_c7, 437 + &group_cstate_pkg_c8, 438 + &group_cstate_pkg_c9, 439 + &group_cstate_pkg_c10, 440 + NULL, 441 + }; 442 + 449 443 static struct pmu cstate_core_pmu = { 450 444 .attr_groups = core_attr_groups, 445 + .attr_update = core_attr_update, 451 446 .name = "cstate_core", 452 447 .task_ctx_nr = perf_invalid_context, 453 448 .event_init = cstate_pmu_event_init, ··· 482 437 483 438 static struct pmu cstate_pkg_pmu = { 484 439 .attr_groups = pkg_attr_groups, 440 + .attr_update = pkg_attr_update, 485 441 .name = "cstate_pkg", 486 442 .task_ctx_nr = perf_invalid_context, 487 443 .event_init = cstate_pmu_event_init, ··· 626 580 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), 627 581 628 582 X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates), 583 + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates), 629 584 { }, 630 585 }; 631 586 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); 632 - 633 - /* 634 - * Probe the cstate events and insert the available one into sysfs attrs 635 - * Return false if there are no available events. 636 - */ 637 - static bool __init cstate_probe_msr(const unsigned long evmsk, int max, 638 - struct perf_cstate_msr *msr, 639 - struct attribute **attrs) 640 - { 641 - bool found = false; 642 - unsigned int bit; 643 - u64 val; 644 - 645 - for (bit = 0; bit < max; bit++) { 646 - if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) { 647 - *attrs++ = &msr[bit].attr->attr.attr; 648 - found = true; 649 - } else { 650 - msr[bit].attr = NULL; 651 - } 652 - } 653 - *attrs = NULL; 654 - 655 - return found; 656 - } 657 587 658 588 static int __init cstate_probe(const struct cstate_model *cm) 659 589 { ··· 642 620 pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; 643 621 644 622 645 - has_cstate_core = cstate_probe_msr(cm->core_events, 646 - PERF_CSTATE_CORE_EVENT_MAX, 647 - core_msr, core_events_attrs); 623 + core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX, 624 + true, (void *) &cm->core_events); 648 625 649 - has_cstate_pkg = cstate_probe_msr(cm->pkg_events, 650 - PERF_CSTATE_PKG_EVENT_MAX, 651 - pkg_msr, pkg_events_attrs); 626 + pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX, 627 + true, (void *) &cm->pkg_events); 628 + 629 + has_cstate_core = !!core_msr_mask; 630 + has_cstate_pkg = !!pkg_msr_mask; 652 631 653 632 return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; 654 633 }
+181 -220
arch/x86/events/intel/rapl.c
··· 55 55 #include <linux/module.h> 56 56 #include <linux/slab.h> 57 57 #include <linux/perf_event.h> 58 + #include <linux/nospec.h> 58 59 #include <asm/cpu_device_id.h> 59 60 #include <asm/intel-family.h> 60 61 #include "../perf_event.h" 62 + #include "../probe.h" 61 63 62 64 MODULE_LICENSE("GPL"); 63 65 64 66 /* 65 67 * RAPL energy status counters 66 68 */ 67 - #define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ 68 - #define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ 69 - #define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ 70 - #define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ 71 - #define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ 72 - #define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ 73 - #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ 74 - #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ 75 - #define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */ 76 - #define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */ 69 + enum perf_rapl_events { 70 + PERF_RAPL_PP0 = 0, /* all cores */ 71 + PERF_RAPL_PKG, /* entire package */ 72 + PERF_RAPL_RAM, /* DRAM */ 73 + PERF_RAPL_PP1, /* gpu */ 74 + PERF_RAPL_PSYS, /* psys */ 77 75 78 - #define NR_RAPL_DOMAINS 0x5 76 + PERF_RAPL_MAX, 77 + NR_RAPL_DOMAINS = PERF_RAPL_MAX, 78 + }; 79 + 79 80 static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { 80 81 "pp0-core", 81 82 "package", ··· 84 83 "pp1-gpu", 85 84 "psys", 86 85 }; 87 - 88 - /* Clients have PP0, PKG */ 89 - #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ 90 - 1<<RAPL_IDX_PKG_NRG_STAT|\ 91 - 1<<RAPL_IDX_PP1_NRG_STAT) 92 - 93 - /* Servers have PP0, PKG, RAM */ 94 - #define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\ 95 - 1<<RAPL_IDX_PKG_NRG_STAT|\ 96 - 1<<RAPL_IDX_RAM_NRG_STAT) 97 - 98 - /* Servers have PP0, PKG, RAM, PP1 */ 99 - #define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\ 100 - 1<<RAPL_IDX_PKG_NRG_STAT|\ 101 - 1<<RAPL_IDX_RAM_NRG_STAT|\ 102 - 1<<RAPL_IDX_PP1_NRG_STAT) 103 - 104 - /* SKL clients have PP0, PKG, RAM, PP1, PSYS */ 105 - #define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ 106 - 1<<RAPL_IDX_PKG_NRG_STAT|\ 107 - 1<<RAPL_IDX_RAM_NRG_STAT|\ 108 - 1<<RAPL_IDX_PP1_NRG_STAT|\ 109 - 1<<RAPL_IDX_PSYS_NRG_STAT) 110 - 111 - /* Knights Landing has PKG, RAM */ 112 - #define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\ 113 - 1<<RAPL_IDX_RAM_NRG_STAT) 114 86 115 87 /* 116 88 * event code: LSB 8 bits, passed in attr->config ··· 127 153 struct rapl_pmu *pmus[]; 128 154 }; 129 155 156 + struct rapl_model { 157 + unsigned long events; 158 + bool apply_quirk; 159 + }; 160 + 130 161 /* 1/2^hw_unit Joule */ 131 162 static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; 132 163 static struct rapl_pmus *rapl_pmus; 133 164 static cpumask_t rapl_cpu_mask; 134 165 static unsigned int rapl_cntr_mask; 135 166 static u64 rapl_timer_ms; 167 + static struct perf_msr rapl_msrs[]; 136 168 137 169 static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) 138 170 { ··· 330 350 static int rapl_pmu_event_init(struct perf_event *event) 331 351 { 332 352 u64 cfg = event->attr.config & RAPL_EVENT_MASK; 333 - int bit, msr, ret = 0; 353 + int bit, ret = 0; 334 354 struct rapl_pmu *pmu; 335 355 336 356 /* only look at RAPL events */ ··· 346 366 347 367 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; 348 368 349 - /* 350 - * check event is known (determines counter) 351 - */ 352 - switch (cfg) { 353 - case INTEL_RAPL_PP0: 354 - bit = RAPL_IDX_PP0_NRG_STAT; 355 - msr = MSR_PP0_ENERGY_STATUS; 356 - break; 357 - case INTEL_RAPL_PKG: 358 - bit = RAPL_IDX_PKG_NRG_STAT; 359 - msr = MSR_PKG_ENERGY_STATUS; 360 - break; 361 - case INTEL_RAPL_RAM: 362 - bit = RAPL_IDX_RAM_NRG_STAT; 363 - msr = MSR_DRAM_ENERGY_STATUS; 364 - break; 365 - case INTEL_RAPL_PP1: 366 - bit = RAPL_IDX_PP1_NRG_STAT; 367 - msr = MSR_PP1_ENERGY_STATUS; 368 - break; 369 - case INTEL_RAPL_PSYS: 370 - bit = RAPL_IDX_PSYS_NRG_STAT; 371 - msr = MSR_PLATFORM_ENERGY_STATUS; 372 - break; 373 - default: 369 + if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) 374 370 return -EINVAL; 375 - } 371 + 372 + cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); 373 + bit = cfg - 1; 374 + 376 375 /* check event supported */ 377 376 if (!(rapl_cntr_mask & (1 << bit))) 378 377 return -EINVAL; ··· 366 407 return -EINVAL; 367 408 event->cpu = pmu->cpu; 368 409 event->pmu_private = pmu; 369 - event->hw.event_base = msr; 410 + event->hw.event_base = rapl_msrs[bit].msr; 370 411 event->hw.config = cfg; 371 412 event->hw.idx = bit; 372 413 ··· 416 457 RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); 417 458 RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); 418 459 419 - static struct attribute *rapl_events_srv_attr[] = { 420 - EVENT_PTR(rapl_cores), 421 - EVENT_PTR(rapl_pkg), 422 - EVENT_PTR(rapl_ram), 423 - 424 - EVENT_PTR(rapl_cores_unit), 425 - EVENT_PTR(rapl_pkg_unit), 426 - EVENT_PTR(rapl_ram_unit), 427 - 428 - EVENT_PTR(rapl_cores_scale), 429 - EVENT_PTR(rapl_pkg_scale), 430 - EVENT_PTR(rapl_ram_scale), 431 - NULL, 432 - }; 433 - 434 - static struct attribute *rapl_events_cln_attr[] = { 435 - EVENT_PTR(rapl_cores), 436 - EVENT_PTR(rapl_pkg), 437 - EVENT_PTR(rapl_gpu), 438 - 439 - EVENT_PTR(rapl_cores_unit), 440 - EVENT_PTR(rapl_pkg_unit), 441 - EVENT_PTR(rapl_gpu_unit), 442 - 443 - EVENT_PTR(rapl_cores_scale), 444 - EVENT_PTR(rapl_pkg_scale), 445 - EVENT_PTR(rapl_gpu_scale), 446 - NULL, 447 - }; 448 - 449 - static struct attribute *rapl_events_hsw_attr[] = { 450 - EVENT_PTR(rapl_cores), 451 - EVENT_PTR(rapl_pkg), 452 - EVENT_PTR(rapl_gpu), 453 - EVENT_PTR(rapl_ram), 454 - 455 - EVENT_PTR(rapl_cores_unit), 456 - EVENT_PTR(rapl_pkg_unit), 457 - EVENT_PTR(rapl_gpu_unit), 458 - EVENT_PTR(rapl_ram_unit), 459 - 460 - EVENT_PTR(rapl_cores_scale), 461 - EVENT_PTR(rapl_pkg_scale), 462 - EVENT_PTR(rapl_gpu_scale), 463 - EVENT_PTR(rapl_ram_scale), 464 - NULL, 465 - }; 466 - 467 - static struct attribute *rapl_events_skl_attr[] = { 468 - EVENT_PTR(rapl_cores), 469 - EVENT_PTR(rapl_pkg), 470 - EVENT_PTR(rapl_gpu), 471 - EVENT_PTR(rapl_ram), 472 - EVENT_PTR(rapl_psys), 473 - 474 - EVENT_PTR(rapl_cores_unit), 475 - EVENT_PTR(rapl_pkg_unit), 476 - EVENT_PTR(rapl_gpu_unit), 477 - EVENT_PTR(rapl_ram_unit), 478 - EVENT_PTR(rapl_psys_unit), 479 - 480 - EVENT_PTR(rapl_cores_scale), 481 - EVENT_PTR(rapl_pkg_scale), 482 - EVENT_PTR(rapl_gpu_scale), 483 - EVENT_PTR(rapl_ram_scale), 484 - EVENT_PTR(rapl_psys_scale), 485 - NULL, 486 - }; 487 - 488 - static struct attribute *rapl_events_knl_attr[] = { 489 - EVENT_PTR(rapl_pkg), 490 - EVENT_PTR(rapl_ram), 491 - 492 - EVENT_PTR(rapl_pkg_unit), 493 - EVENT_PTR(rapl_ram_unit), 494 - 495 - EVENT_PTR(rapl_pkg_scale), 496 - EVENT_PTR(rapl_ram_scale), 460 + /* 461 + * There are no default events, but we need to create 462 + * "events" group (with empty attrs) before updating 463 + * it with detected events. 464 + */ 465 + static struct attribute *attrs_empty[] = { 497 466 NULL, 498 467 }; 499 468 500 469 static struct attribute_group rapl_pmu_events_group = { 501 470 .name = "events", 502 - .attrs = NULL, /* patched at runtime */ 471 + .attrs = attrs_empty, 503 472 }; 504 473 505 474 DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); ··· 446 559 &rapl_pmu_format_group, 447 560 &rapl_pmu_events_group, 448 561 NULL, 562 + }; 563 + 564 + static struct attribute *rapl_events_cores[] = { 565 + EVENT_PTR(rapl_cores), 566 + EVENT_PTR(rapl_cores_unit), 567 + EVENT_PTR(rapl_cores_scale), 568 + NULL, 569 + }; 570 + 571 + static struct attribute_group rapl_events_cores_group = { 572 + .name = "events", 573 + .attrs = rapl_events_cores, 574 + }; 575 + 576 + static struct attribute *rapl_events_pkg[] = { 577 + EVENT_PTR(rapl_pkg), 578 + EVENT_PTR(rapl_pkg_unit), 579 + EVENT_PTR(rapl_pkg_scale), 580 + NULL, 581 + }; 582 + 583 + static struct attribute_group rapl_events_pkg_group = { 584 + .name = "events", 585 + .attrs = rapl_events_pkg, 586 + }; 587 + 588 + static struct attribute *rapl_events_ram[] = { 589 + EVENT_PTR(rapl_ram), 590 + EVENT_PTR(rapl_ram_unit), 591 + EVENT_PTR(rapl_ram_scale), 592 + NULL, 593 + }; 594 + 595 + static struct attribute_group rapl_events_ram_group = { 596 + .name = "events", 597 + .attrs = rapl_events_ram, 598 + }; 599 + 600 + static struct attribute *rapl_events_gpu[] = { 601 + EVENT_PTR(rapl_gpu), 602 + EVENT_PTR(rapl_gpu_unit), 603 + EVENT_PTR(rapl_gpu_scale), 604 + NULL, 605 + }; 606 + 607 + static struct attribute_group rapl_events_gpu_group = { 608 + .name = "events", 609 + .attrs = rapl_events_gpu, 610 + }; 611 + 612 + static struct attribute *rapl_events_psys[] = { 613 + EVENT_PTR(rapl_psys), 614 + EVENT_PTR(rapl_psys_unit), 615 + EVENT_PTR(rapl_psys_scale), 616 + NULL, 617 + }; 618 + 619 + static struct attribute_group rapl_events_psys_group = { 620 + .name = "events", 621 + .attrs = rapl_events_psys, 622 + }; 623 + 624 + static bool test_msr(int idx, void *data) 625 + { 626 + return test_bit(idx, (unsigned long *) data); 627 + } 628 + 629 + static struct perf_msr rapl_msrs[] = { 630 + [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, 631 + [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, 632 + [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, 633 + [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr }, 634 + [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, 449 635 }; 450 636 451 637 static int rapl_cpu_offline(unsigned int cpu) ··· 593 633 * of 2. Datasheet, September 2014, Reference Number: 330784-001 " 594 634 */ 595 635 if (apply_quirk) 596 - rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; 636 + rapl_hw_unit[PERF_RAPL_RAM] = 16; 597 637 598 638 /* 599 639 * Calculate the timer rate: ··· 634 674 kfree(rapl_pmus); 635 675 } 636 676 677 + const struct attribute_group *rapl_attr_update[] = { 678 + &rapl_events_cores_group, 679 + &rapl_events_pkg_group, 680 + &rapl_events_ram_group, 681 + &rapl_events_gpu_group, 682 + &rapl_events_gpu_group, 683 + NULL, 684 + }; 685 + 637 686 static int __init init_rapl_pmus(void) 638 687 { 639 688 int maxdie = topology_max_packages() * topology_max_die_per_package(); ··· 655 686 656 687 rapl_pmus->maxdie = maxdie; 657 688 rapl_pmus->pmu.attr_groups = rapl_attr_groups; 689 + rapl_pmus->pmu.attr_update = rapl_attr_update; 658 690 rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; 659 691 rapl_pmus->pmu.event_init = rapl_pmu_event_init; 660 692 rapl_pmus->pmu.add = rapl_pmu_event_add; ··· 671 701 #define X86_RAPL_MODEL_MATCH(model, init) \ 672 702 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } 673 703 674 - struct intel_rapl_init_fun { 675 - bool apply_quirk; 676 - int cntr_mask; 677 - struct attribute **attrs; 704 + static struct rapl_model model_snb = { 705 + .events = BIT(PERF_RAPL_PP0) | 706 + BIT(PERF_RAPL_PKG) | 707 + BIT(PERF_RAPL_PP1), 708 + .apply_quirk = false, 678 709 }; 679 710 680 - static const struct intel_rapl_init_fun snb_rapl_init __initconst = { 681 - .apply_quirk = false, 682 - .cntr_mask = RAPL_IDX_CLN, 683 - .attrs = rapl_events_cln_attr, 711 + static struct rapl_model model_snbep = { 712 + .events = BIT(PERF_RAPL_PP0) | 713 + BIT(PERF_RAPL_PKG) | 714 + BIT(PERF_RAPL_RAM), 715 + .apply_quirk = false, 684 716 }; 685 717 686 - static const struct intel_rapl_init_fun hsx_rapl_init __initconst = { 687 - .apply_quirk = true, 688 - .cntr_mask = RAPL_IDX_SRV, 689 - .attrs = rapl_events_srv_attr, 718 + static struct rapl_model model_hsw = { 719 + .events = BIT(PERF_RAPL_PP0) | 720 + BIT(PERF_RAPL_PKG) | 721 + BIT(PERF_RAPL_RAM) | 722 + BIT(PERF_RAPL_PP1), 723 + .apply_quirk = false, 690 724 }; 691 725 692 - static const struct intel_rapl_init_fun hsw_rapl_init __initconst = { 693 - .apply_quirk = false, 694 - .cntr_mask = RAPL_IDX_HSW, 695 - .attrs = rapl_events_hsw_attr, 726 + static struct rapl_model model_hsx = { 727 + .events = BIT(PERF_RAPL_PP0) | 728 + BIT(PERF_RAPL_PKG) | 729 + BIT(PERF_RAPL_RAM), 730 + .apply_quirk = true, 696 731 }; 697 732 698 - static const struct intel_rapl_init_fun snbep_rapl_init __initconst = { 699 - .apply_quirk = false, 700 - .cntr_mask = RAPL_IDX_SRV, 701 - .attrs = rapl_events_srv_attr, 733 + static struct rapl_model model_knl = { 734 + .events = BIT(PERF_RAPL_PKG) | 735 + BIT(PERF_RAPL_RAM), 736 + .apply_quirk = true, 702 737 }; 703 738 704 - static const struct intel_rapl_init_fun knl_rapl_init __initconst = { 705 - .apply_quirk = true, 706 - .cntr_mask = RAPL_IDX_KNL, 707 - .attrs = rapl_events_knl_attr, 739 + static struct rapl_model model_skl = { 740 + .events = BIT(PERF_RAPL_PP0) | 741 + BIT(PERF_RAPL_PKG) | 742 + BIT(PERF_RAPL_RAM) | 743 + BIT(PERF_RAPL_PP1) | 744 + BIT(PERF_RAPL_PSYS), 745 + .apply_quirk = false, 708 746 }; 709 747 710 - static const struct intel_rapl_init_fun skl_rapl_init __initconst = { 711 - .apply_quirk = false, 712 - .cntr_mask = RAPL_IDX_SKL_CLN, 713 - .attrs = rapl_events_skl_attr, 714 - }; 715 - 716 - static const struct x86_cpu_id rapl_cpu_match[] __initconst = { 717 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init), 718 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init), 719 - 720 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init), 721 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), 722 - 723 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), 724 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init), 725 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), 726 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), 727 - 728 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), 729 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), 730 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), 731 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init), 732 - 733 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), 734 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), 735 - 736 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), 737 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), 738 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), 739 - 740 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init), 741 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), 742 - 743 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init), 744 - 745 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), 746 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), 747 - 748 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), 749 - 750 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init), 748 + static const struct x86_cpu_id rapl_model_match[] __initconst = { 749 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb), 750 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep), 751 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb), 752 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep), 753 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, model_hsw), 754 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx), 755 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, model_hsw), 756 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, model_hsw), 757 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, model_hsw), 758 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, model_hsw), 759 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx), 760 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, model_hsx), 761 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl), 762 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl), 763 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, model_skl), 764 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, model_skl), 765 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx), 766 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, model_skl), 767 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, model_skl), 768 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, model_skl), 769 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw), 770 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, model_hsw), 771 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), 772 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, model_skl), 773 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, model_skl), 751 774 {}, 752 775 }; 753 776 754 - MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match); 777 + MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); 755 778 756 779 static int __init rapl_pmu_init(void) 757 780 { 758 781 const struct x86_cpu_id *id; 759 - struct intel_rapl_init_fun *rapl_init; 760 - bool apply_quirk; 782 + struct rapl_model *rm; 761 783 int ret; 762 784 763 - id = x86_match_cpu(rapl_cpu_match); 785 + id = x86_match_cpu(rapl_model_match); 764 786 if (!id) 765 787 return -ENODEV; 766 788 767 - rapl_init = (struct intel_rapl_init_fun *)id->driver_data; 768 - apply_quirk = rapl_init->apply_quirk; 769 - rapl_cntr_mask = rapl_init->cntr_mask; 770 - rapl_pmu_events_group.attrs = rapl_init->attrs; 789 + rm = (struct rapl_model *) id->driver_data; 790 + rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, 791 + false, (void *) &rm->events); 771 792 772 - ret = rapl_check_hw_unit(apply_quirk); 793 + ret = rapl_check_hw_unit(rm->apply_quirk); 773 794 if (ret) 774 795 return ret; 775 796
+100 -22
arch/x86/events/intel/uncore.c
··· 8 8 static struct intel_uncore_type *empty_uncore[] = { NULL, }; 9 9 struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 10 10 struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 11 + struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; 11 12 12 13 static bool pcidrv_registered; 13 14 struct pci_driver *uncore_pci_driver; ··· 29 28 30 29 MODULE_LICENSE("GPL"); 31 30 32 - static int uncore_pcibus_to_physid(struct pci_bus *bus) 31 + int uncore_pcibus_to_physid(struct pci_bus *bus) 33 32 { 34 33 struct pci2phy_map *map; 35 34 int phys_id = -1; ··· 118 117 rdmsrl(event->hw.event_base, count); 119 118 120 119 return count; 120 + } 121 + 122 + void uncore_mmio_exit_box(struct intel_uncore_box *box) 123 + { 124 + if (box->io_addr) 125 + iounmap(box->io_addr); 126 + } 127 + 128 + u64 uncore_mmio_read_counter(struct intel_uncore_box *box, 129 + struct perf_event *event) 130 + { 131 + if (!box->io_addr) 132 + return 0; 133 + 134 + return readq(box->io_addr + event->hw.event_base); 121 135 } 122 136 123 137 /* ··· 1159 1143 uncore_change_type_ctx(*uncores, old_cpu, new_cpu); 1160 1144 } 1161 1145 1162 - static int uncore_event_cpu_offline(unsigned int cpu) 1146 + static void uncore_box_unref(struct intel_uncore_type **types, int id) 1163 1147 { 1164 - struct intel_uncore_type *type, **types = uncore_msr_uncores; 1148 + struct intel_uncore_type *type; 1165 1149 struct intel_uncore_pmu *pmu; 1166 1150 struct intel_uncore_box *box; 1167 - int i, die, target; 1151 + int i; 1152 + 1153 + for (; *types; types++) { 1154 + type = *types; 1155 + pmu = type->pmus; 1156 + for (i = 0; i < type->num_boxes; i++, pmu++) { 1157 + box = pmu->boxes[id]; 1158 + if (box && atomic_dec_return(&box->refcnt) == 0) 1159 + uncore_box_exit(box); 1160 + } 1161 + } 1162 + } 1163 + 1164 + static int uncore_event_cpu_offline(unsigned int cpu) 1165 + { 1166 + int die, target; 1168 1167 1169 1168 /* Check if exiting cpu is used for collecting uncore events */ 1170 1169 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) ··· 1194 1163 target = -1; 1195 1164 1196 1165 uncore_change_context(uncore_msr_uncores, cpu, target); 1166 + uncore_change_context(uncore_mmio_uncores, cpu, target); 1197 1167 uncore_change_context(uncore_pci_uncores, cpu, target); 1198 1168 1199 1169 unref: 1200 1170 /* Clear the references */ 1201 1171 die = topology_logical_die_id(cpu); 1202 - for (; *types; types++) { 1203 - type = *types; 1204 - pmu = type->pmus; 1205 - for (i = 0; i < type->num_boxes; i++, pmu++) { 1206 - box = pmu->boxes[die]; 1207 - if (box && atomic_dec_return(&box->refcnt) == 0) 1208 - uncore_box_exit(box); 1209 - } 1210 - } 1172 + uncore_box_unref(uncore_msr_uncores, die); 1173 + uncore_box_unref(uncore_mmio_uncores, die); 1211 1174 return 0; 1212 1175 } 1213 1176 ··· 1244 1219 return -ENOMEM; 1245 1220 } 1246 1221 1247 - static int uncore_event_cpu_online(unsigned int cpu) 1222 + static int uncore_box_ref(struct intel_uncore_type **types, 1223 + int id, unsigned int cpu) 1248 1224 { 1249 - struct intel_uncore_type *type, **types = uncore_msr_uncores; 1225 + struct intel_uncore_type *type; 1250 1226 struct intel_uncore_pmu *pmu; 1251 1227 struct intel_uncore_box *box; 1252 - int i, ret, die, target; 1228 + int i, ret; 1253 1229 1254 - die = topology_logical_die_id(cpu); 1255 - ret = allocate_boxes(types, die, cpu); 1230 + ret = allocate_boxes(types, id, cpu); 1256 1231 if (ret) 1257 1232 return ret; 1258 1233 ··· 1260 1235 type = *types; 1261 1236 pmu = type->pmus; 1262 1237 for (i = 0; i < type->num_boxes; i++, pmu++) { 1263 - box = pmu->boxes[die]; 1238 + box = pmu->boxes[id]; 1264 1239 if (box && atomic_inc_return(&box->refcnt) == 1) 1265 1240 uncore_box_init(box); 1266 1241 } 1267 1242 } 1243 + return 0; 1244 + } 1245 + 1246 + static int uncore_event_cpu_online(unsigned int cpu) 1247 + { 1248 + int die, target, msr_ret, mmio_ret; 1249 + 1250 + die = topology_logical_die_id(cpu); 1251 + msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); 1252 + mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); 1253 + if (msr_ret && mmio_ret) 1254 + return -ENOMEM; 1268 1255 1269 1256 /* 1270 1257 * Check if there is an online cpu in the package ··· 1288 1251 1289 1252 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1290 1253 1291 - uncore_change_context(uncore_msr_uncores, -1, cpu); 1254 + if (!msr_ret) 1255 + uncore_change_context(uncore_msr_uncores, -1, cpu); 1256 + if (!mmio_ret) 1257 + uncore_change_context(uncore_mmio_uncores, -1, cpu); 1292 1258 uncore_change_context(uncore_pci_uncores, -1, cpu); 1293 1259 return 0; 1294 1260 } ··· 1339 1299 return ret; 1340 1300 } 1341 1301 1302 + static int __init uncore_mmio_init(void) 1303 + { 1304 + struct intel_uncore_type **types = uncore_mmio_uncores; 1305 + int ret; 1306 + 1307 + ret = uncore_types_init(types, true); 1308 + if (ret) 1309 + goto err; 1310 + 1311 + for (; *types; types++) { 1312 + ret = type_pmu_register(*types); 1313 + if (ret) 1314 + goto err; 1315 + } 1316 + return 0; 1317 + err: 1318 + uncore_types_exit(uncore_mmio_uncores); 1319 + uncore_mmio_uncores = empty_uncore; 1320 + return ret; 1321 + } 1322 + 1323 + 1342 1324 #define X86_UNCORE_MODEL_MATCH(model, init) \ 1343 1325 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } 1344 1326 1345 1327 struct intel_uncore_init_fun { 1346 1328 void (*cpu_init)(void); 1347 1329 int (*pci_init)(void); 1330 + void (*mmio_init)(void); 1348 1331 }; 1349 1332 1350 1333 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { ··· 1438 1375 .pci_init = skl_uncore_pci_init, 1439 1376 }; 1440 1377 1378 + static const struct intel_uncore_init_fun snr_uncore_init __initconst = { 1379 + .cpu_init = snr_uncore_cpu_init, 1380 + .pci_init = snr_uncore_pci_init, 1381 + .mmio_init = snr_uncore_mmio_init, 1382 + }; 1383 + 1441 1384 static const struct x86_cpu_id intel_uncore_match[] __initconst = { 1442 1385 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), 1443 1386 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), ··· 1472 1403 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), 1473 1404 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init), 1474 1405 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), 1406 + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, icl_uncore_init), 1407 + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_X, snr_uncore_init), 1475 1408 {}, 1476 1409 }; 1477 1410 ··· 1483 1412 { 1484 1413 const struct x86_cpu_id *id; 1485 1414 struct intel_uncore_init_fun *uncore_init; 1486 - int pret = 0, cret = 0, ret; 1415 + int pret = 0, cret = 0, mret = 0, ret; 1487 1416 1488 1417 id = x86_match_cpu(intel_uncore_match); 1489 1418 if (!id) ··· 1506 1435 cret = uncore_cpu_init(); 1507 1436 } 1508 1437 1509 - if (cret && pret) 1438 + if (uncore_init->mmio_init) { 1439 + uncore_init->mmio_init(); 1440 + mret = uncore_mmio_init(); 1441 + } 1442 + 1443 + if (cret && pret && mret) 1510 1444 return -ENODEV; 1511 1445 1512 1446 /* Install hotplug callbacks to setup the targets for each package */ ··· 1525 1449 1526 1450 err: 1527 1451 uncore_types_exit(uncore_msr_uncores); 1452 + uncore_types_exit(uncore_mmio_uncores); 1528 1453 uncore_pci_exit(); 1529 1454 return ret; 1530 1455 } ··· 1535 1458 { 1536 1459 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); 1537 1460 uncore_types_exit(uncore_msr_uncores); 1461 + uncore_types_exit(uncore_mmio_uncores); 1538 1462 uncore_pci_exit(); 1539 1463 } 1540 1464 module_exit(intel_uncore_exit);
+35 -6
arch/x86/events/intel/uncore.h
··· 2 2 #include <linux/slab.h> 3 3 #include <linux/pci.h> 4 4 #include <asm/apicdef.h> 5 + #include <linux/io-64-nonatomic-lo-hi.h> 5 6 6 7 #include <linux/perf_event.h> 7 8 #include "../perf_event.h" ··· 57 56 unsigned fixed_ctr; 58 57 unsigned fixed_ctl; 59 58 unsigned box_ctl; 60 - unsigned msr_offset; 59 + union { 60 + unsigned msr_offset; 61 + unsigned mmio_offset; 62 + }; 61 63 unsigned num_shared_regs:8; 62 64 unsigned single_fixed:1; 63 65 unsigned pair_ctr_ctl:1; ··· 129 125 struct hrtimer hrtimer; 130 126 struct list_head list; 131 127 struct list_head active_list; 132 - void *io_addr; 128 + void __iomem *io_addr; 133 129 struct intel_uncore_extra_reg shared_regs[0]; 134 130 }; 135 131 ··· 163 159 }; 164 160 165 161 struct pci2phy_map *__find_pci2phy_map(int segment); 162 + int uncore_pcibus_to_physid(struct pci_bus *bus); 166 163 167 164 ssize_t uncore_event_show(struct kobject *kobj, 168 165 struct kobj_attribute *attr, char *buf); ··· 193 188 static inline bool uncore_pmc_freerunning(int idx) 194 189 { 195 190 return idx == UNCORE_PMC_IDX_FREERUNNING; 191 + } 192 + 193 + static inline 194 + unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box) 195 + { 196 + return box->pmu->type->box_ctl + 197 + box->pmu->type->mmio_offset * box->pmu->pmu_idx; 196 198 } 197 199 198 200 static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) ··· 342 330 static inline 343 331 unsigned uncore_fixed_ctl(struct intel_uncore_box *box) 344 332 { 345 - if (box->pci_dev) 333 + if (box->pci_dev || box->io_addr) 346 334 return uncore_pci_fixed_ctl(box); 347 335 else 348 336 return uncore_msr_fixed_ctl(box); ··· 351 339 static inline 352 340 unsigned uncore_fixed_ctr(struct intel_uncore_box *box) 353 341 { 354 - if (box->pci_dev) 342 + if (box->pci_dev || box->io_addr) 355 343 return uncore_pci_fixed_ctr(box); 356 344 else 357 345 return uncore_msr_fixed_ctr(box); ··· 360 348 static inline 361 349 unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) 362 350 { 363 - if (box->pci_dev) 351 + if (box->pci_dev || box->io_addr) 364 352 return uncore_pci_event_ctl(box, idx); 365 353 else 366 354 return uncore_msr_event_ctl(box, idx); ··· 369 357 static inline 370 358 unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) 371 359 { 372 - if (box->pci_dev) 360 + if (box->pci_dev || box->io_addr) 373 361 return uncore_pci_perf_ctr(box, idx); 374 362 else 375 363 return uncore_msr_perf_ctr(box, idx); ··· 429 417 430 418 return ((cfg & UNCORE_FIXED_EVENT) == UNCORE_FIXED_EVENT) && 431 419 (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START); 420 + } 421 + 422 + /* Check and reject invalid config */ 423 + static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box, 424 + struct perf_event *event) 425 + { 426 + if (is_freerunning_event(event)) 427 + return 0; 428 + 429 + return -EINVAL; 432 430 } 433 431 434 432 static inline void uncore_disable_box(struct intel_uncore_box *box) ··· 504 482 505 483 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); 506 484 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); 485 + void uncore_mmio_exit_box(struct intel_uncore_box *box); 486 + u64 uncore_mmio_read_counter(struct intel_uncore_box *box, 487 + struct perf_event *event); 507 488 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); 508 489 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); 509 490 void uncore_pmu_event_start(struct perf_event *event, int flags); ··· 522 497 523 498 extern struct intel_uncore_type **uncore_msr_uncores; 524 499 extern struct intel_uncore_type **uncore_pci_uncores; 500 + extern struct intel_uncore_type **uncore_mmio_uncores; 525 501 extern struct pci_driver *uncore_pci_driver; 526 502 extern raw_spinlock_t pci2phy_map_lock; 527 503 extern struct list_head pci2phy_map_head; ··· 554 528 void knl_uncore_cpu_init(void); 555 529 int skx_uncore_pci_init(void); 556 530 void skx_uncore_cpu_init(void); 531 + int snr_uncore_pci_init(void); 532 + void snr_uncore_cpu_init(void); 533 + void snr_uncore_mmio_init(void); 557 534 558 535 /* uncore_nhmex.c */ 559 536 void nhmex_uncore_cpu_init(void);
+66 -35
arch/x86/events/intel/uncore_snb.c
··· 3 3 #include "uncore.h" 4 4 5 5 /* Uncore IMC PCI IDs */ 6 - #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 7 - #define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 8 - #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 9 - #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 10 - #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 11 - #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 12 - #define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 13 - #define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c 14 - #define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 15 - #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 16 - #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f 17 - #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f 18 - #define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c 19 - #define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 20 - #define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 21 - #define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f 22 - #define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f 23 - #define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc 24 - #define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 25 - #define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 26 - #define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 6 + #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 7 + #define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 8 + #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 9 + #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 10 + #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 11 + #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 12 + #define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 13 + #define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c 14 + #define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 15 + #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 16 + #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f 17 + #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f 18 + #define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c 19 + #define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 20 + #define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 21 + #define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f 22 + #define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f 23 + #define PCI_DEVICE_ID_INTEL_KBL_HQ_IMC 0x5910 24 + #define PCI_DEVICE_ID_INTEL_KBL_WQ_IMC 0x5918 25 + #define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc 26 + #define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 27 + #define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 28 + #define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 27 29 #define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f 28 30 #define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f 29 31 #define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 ··· 36 34 #define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 37 35 #define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca 38 36 #define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 37 + #define PCI_DEVICE_ID_INTEL_AML_YD_IMC 0x590c 38 + #define PCI_DEVICE_ID_INTEL_AML_YQ_IMC 0x590d 39 + #define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0 40 + #define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34 41 + #define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 39 42 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 40 43 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 44 + 41 45 42 46 /* SNB event control */ 43 47 #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff ··· 428 420 box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; 429 421 } 430 422 431 - static void snb_uncore_imc_exit_box(struct intel_uncore_box *box) 432 - { 433 - iounmap(box->io_addr); 434 - } 435 - 436 423 static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) 437 424 {} 438 425 ··· 439 436 440 437 static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) 441 438 {} 442 - 443 - static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) 444 - { 445 - struct hw_perf_event *hwc = &event->hw; 446 - 447 - return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); 448 - } 449 439 450 440 /* 451 441 * Keep the custom event_init() function compatible with old event ··· 566 570 567 571 static struct intel_uncore_ops snb_uncore_imc_ops = { 568 572 .init_box = snb_uncore_imc_init_box, 569 - .exit_box = snb_uncore_imc_exit_box, 573 + .exit_box = uncore_mmio_exit_box, 570 574 .enable_box = snb_uncore_imc_enable_box, 571 575 .disable_box = snb_uncore_imc_disable_box, 572 576 .disable_event = snb_uncore_imc_disable_event, 573 577 .enable_event = snb_uncore_imc_enable_event, 574 578 .hw_config = snb_uncore_imc_hw_config, 575 - .read_counter = snb_uncore_imc_read_counter, 579 + .read_counter = uncore_mmio_read_counter, 576 580 }; 577 581 578 582 static struct intel_uncore_type snb_uncore_imc = { ··· 678 682 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 679 683 }, 680 684 { /* IMC */ 685 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC), 686 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 687 + }, 688 + { /* IMC */ 689 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC), 690 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 691 + }, 692 + { /* IMC */ 681 693 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), 682 694 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 683 695 }, ··· 739 735 }, 740 736 { /* IMC */ 741 737 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), 738 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 739 + }, 740 + { /* IMC */ 741 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC), 742 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 743 + }, 744 + { /* IMC */ 745 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC), 746 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 747 + }, 748 + { /* IMC */ 749 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC), 750 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 751 + }, 752 + { /* IMC */ 753 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC), 754 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 755 + }, 756 + { /* IMC */ 757 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC), 742 758 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 743 759 }, 744 760 { /* end: all zeroes */ }, ··· 831 807 IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ 832 808 IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ 833 809 IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ 810 + IMC_DEV(KBL_HQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core H Quad Core */ 811 + IMC_DEV(KBL_WQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S 4 cores Work Station */ 834 812 IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ 835 813 IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ 836 814 IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ ··· 847 821 IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ 848 822 IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ 849 823 IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ 824 + IMC_DEV(AML_YD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Dual Core */ 825 + IMC_DEV(AML_YQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Quad Core */ 826 + IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ 827 + IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ 828 + IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */ 850 829 IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ 851 830 IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ 852 831 { /* end marker */ }
+601
arch/x86/events/intel/uncore_snbep.c
··· 324 324 #define SKX_M2M_PCI_PMON_CTR0 0x200 325 325 #define SKX_M2M_PCI_PMON_BOX_CTL 0x258 326 326 327 + /* SNR Ubox */ 328 + #define SNR_U_MSR_PMON_CTR0 0x1f98 329 + #define SNR_U_MSR_PMON_CTL0 0x1f91 330 + #define SNR_U_MSR_PMON_UCLK_FIXED_CTL 0x1f93 331 + #define SNR_U_MSR_PMON_UCLK_FIXED_CTR 0x1f94 332 + 333 + /* SNR CHA */ 334 + #define SNR_CHA_RAW_EVENT_MASK_EXT 0x3ffffff 335 + #define SNR_CHA_MSR_PMON_CTL0 0x1c01 336 + #define SNR_CHA_MSR_PMON_CTR0 0x1c08 337 + #define SNR_CHA_MSR_PMON_BOX_CTL 0x1c00 338 + #define SNR_C0_MSR_PMON_BOX_FILTER0 0x1c05 339 + 340 + 341 + /* SNR IIO */ 342 + #define SNR_IIO_MSR_PMON_CTL0 0x1e08 343 + #define SNR_IIO_MSR_PMON_CTR0 0x1e01 344 + #define SNR_IIO_MSR_PMON_BOX_CTL 0x1e00 345 + #define SNR_IIO_MSR_OFFSET 0x10 346 + #define SNR_IIO_PMON_RAW_EVENT_MASK_EXT 0x7ffff 347 + 348 + /* SNR IRP */ 349 + #define SNR_IRP0_MSR_PMON_CTL0 0x1ea8 350 + #define SNR_IRP0_MSR_PMON_CTR0 0x1ea1 351 + #define SNR_IRP0_MSR_PMON_BOX_CTL 0x1ea0 352 + #define SNR_IRP_MSR_OFFSET 0x10 353 + 354 + /* SNR M2PCIE */ 355 + #define SNR_M2PCIE_MSR_PMON_CTL0 0x1e58 356 + #define SNR_M2PCIE_MSR_PMON_CTR0 0x1e51 357 + #define SNR_M2PCIE_MSR_PMON_BOX_CTL 0x1e50 358 + #define SNR_M2PCIE_MSR_OFFSET 0x10 359 + 360 + /* SNR PCU */ 361 + #define SNR_PCU_MSR_PMON_CTL0 0x1ef1 362 + #define SNR_PCU_MSR_PMON_CTR0 0x1ef8 363 + #define SNR_PCU_MSR_PMON_BOX_CTL 0x1ef0 364 + #define SNR_PCU_MSR_PMON_BOX_FILTER 0x1efc 365 + 366 + /* SNR M2M */ 367 + #define SNR_M2M_PCI_PMON_CTL0 0x468 368 + #define SNR_M2M_PCI_PMON_CTR0 0x440 369 + #define SNR_M2M_PCI_PMON_BOX_CTL 0x438 370 + #define SNR_M2M_PCI_PMON_UMASK_EXT 0xff 371 + 372 + /* SNR PCIE3 */ 373 + #define SNR_PCIE3_PCI_PMON_CTL0 0x508 374 + #define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 375 + #define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4 376 + 377 + /* SNR IMC */ 378 + #define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 379 + #define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 380 + #define SNR_IMC_MMIO_PMON_CTL0 0x40 381 + #define SNR_IMC_MMIO_PMON_CTR0 0x8 382 + #define SNR_IMC_MMIO_PMON_BOX_CTL 0x22800 383 + #define SNR_IMC_MMIO_OFFSET 0x4000 384 + #define SNR_IMC_MMIO_SIZE 0x4000 385 + #define SNR_IMC_MMIO_BASE_OFFSET 0xd0 386 + #define SNR_IMC_MMIO_BASE_MASK 0x1FFFFFFF 387 + #define SNR_IMC_MMIO_MEM0_OFFSET 0xd8 388 + #define SNR_IMC_MMIO_MEM0_MASK 0x7FF 389 + 327 390 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); 328 391 DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); 329 392 DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); 330 393 DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); 331 394 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); 332 395 DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); 396 + DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); 397 + DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); 333 398 DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); 334 399 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); 335 400 DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); ··· 408 343 DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); 409 344 DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31"); 410 345 DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43"); 346 + DEFINE_UNCORE_FORMAT_ATTR(ch_mask2, ch_mask, "config:36-47"); 411 347 DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46"); 348 + DEFINE_UNCORE_FORMAT_ATTR(fc_mask2, fc_mask, "config:48-50"); 412 349 DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); 413 350 DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); 414 351 DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); 415 352 DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8"); 353 + DEFINE_UNCORE_FORMAT_ATTR(filter_tid5, filter_tid, "config1:0-9"); 416 354 DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); 417 355 DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); 418 356 DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); ··· 3653 3585 3654 3586 static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = { 3655 3587 .read_counter = uncore_msr_read_counter, 3588 + .hw_config = uncore_freerunning_hw_config, 3656 3589 }; 3657 3590 3658 3591 static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = { ··· 4036 3967 } 4037 3968 4038 3969 /* end of SKX uncore support */ 3970 + 3971 + /* SNR uncore support */ 3972 + 3973 + static struct intel_uncore_type snr_uncore_ubox = { 3974 + .name = "ubox", 3975 + .num_counters = 2, 3976 + .num_boxes = 1, 3977 + .perf_ctr_bits = 48, 3978 + .fixed_ctr_bits = 48, 3979 + .perf_ctr = SNR_U_MSR_PMON_CTR0, 3980 + .event_ctl = SNR_U_MSR_PMON_CTL0, 3981 + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, 3982 + .fixed_ctr = SNR_U_MSR_PMON_UCLK_FIXED_CTR, 3983 + .fixed_ctl = SNR_U_MSR_PMON_UCLK_FIXED_CTL, 3984 + .ops = &ivbep_uncore_msr_ops, 3985 + .format_group = &ivbep_uncore_format_group, 3986 + }; 3987 + 3988 + static struct attribute *snr_uncore_cha_formats_attr[] = { 3989 + &format_attr_event.attr, 3990 + &format_attr_umask_ext2.attr, 3991 + &format_attr_edge.attr, 3992 + &format_attr_tid_en.attr, 3993 + &format_attr_inv.attr, 3994 + &format_attr_thresh8.attr, 3995 + &format_attr_filter_tid5.attr, 3996 + NULL, 3997 + }; 3998 + static const struct attribute_group snr_uncore_chabox_format_group = { 3999 + .name = "format", 4000 + .attrs = snr_uncore_cha_formats_attr, 4001 + }; 4002 + 4003 + static int snr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) 4004 + { 4005 + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 4006 + 4007 + reg1->reg = SNR_C0_MSR_PMON_BOX_FILTER0 + 4008 + box->pmu->type->msr_offset * box->pmu->pmu_idx; 4009 + reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID; 4010 + reg1->idx = 0; 4011 + 4012 + return 0; 4013 + } 4014 + 4015 + static void snr_cha_enable_event(struct intel_uncore_box *box, 4016 + struct perf_event *event) 4017 + { 4018 + struct hw_perf_event *hwc = &event->hw; 4019 + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; 4020 + 4021 + if (reg1->idx != EXTRA_REG_NONE) 4022 + wrmsrl(reg1->reg, reg1->config); 4023 + 4024 + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); 4025 + } 4026 + 4027 + static struct intel_uncore_ops snr_uncore_chabox_ops = { 4028 + .init_box = ivbep_uncore_msr_init_box, 4029 + .disable_box = snbep_uncore_msr_disable_box, 4030 + .enable_box = snbep_uncore_msr_enable_box, 4031 + .disable_event = snbep_uncore_msr_disable_event, 4032 + .enable_event = snr_cha_enable_event, 4033 + .read_counter = uncore_msr_read_counter, 4034 + .hw_config = snr_cha_hw_config, 4035 + }; 4036 + 4037 + static struct intel_uncore_type snr_uncore_chabox = { 4038 + .name = "cha", 4039 + .num_counters = 4, 4040 + .num_boxes = 6, 4041 + .perf_ctr_bits = 48, 4042 + .event_ctl = SNR_CHA_MSR_PMON_CTL0, 4043 + .perf_ctr = SNR_CHA_MSR_PMON_CTR0, 4044 + .box_ctl = SNR_CHA_MSR_PMON_BOX_CTL, 4045 + .msr_offset = HSWEP_CBO_MSR_OFFSET, 4046 + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, 4047 + .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT, 4048 + .ops = &snr_uncore_chabox_ops, 4049 + .format_group = &snr_uncore_chabox_format_group, 4050 + }; 4051 + 4052 + static struct attribute *snr_uncore_iio_formats_attr[] = { 4053 + &format_attr_event.attr, 4054 + &format_attr_umask.attr, 4055 + &format_attr_edge.attr, 4056 + &format_attr_inv.attr, 4057 + &format_attr_thresh9.attr, 4058 + &format_attr_ch_mask2.attr, 4059 + &format_attr_fc_mask2.attr, 4060 + NULL, 4061 + }; 4062 + 4063 + static const struct attribute_group snr_uncore_iio_format_group = { 4064 + .name = "format", 4065 + .attrs = snr_uncore_iio_formats_attr, 4066 + }; 4067 + 4068 + static struct intel_uncore_type snr_uncore_iio = { 4069 + .name = "iio", 4070 + .num_counters = 4, 4071 + .num_boxes = 5, 4072 + .perf_ctr_bits = 48, 4073 + .event_ctl = SNR_IIO_MSR_PMON_CTL0, 4074 + .perf_ctr = SNR_IIO_MSR_PMON_CTR0, 4075 + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, 4076 + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, 4077 + .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, 4078 + .msr_offset = SNR_IIO_MSR_OFFSET, 4079 + .ops = &ivbep_uncore_msr_ops, 4080 + .format_group = &snr_uncore_iio_format_group, 4081 + }; 4082 + 4083 + static struct intel_uncore_type snr_uncore_irp = { 4084 + .name = "irp", 4085 + .num_counters = 2, 4086 + .num_boxes = 5, 4087 + .perf_ctr_bits = 48, 4088 + .event_ctl = SNR_IRP0_MSR_PMON_CTL0, 4089 + .perf_ctr = SNR_IRP0_MSR_PMON_CTR0, 4090 + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, 4091 + .box_ctl = SNR_IRP0_MSR_PMON_BOX_CTL, 4092 + .msr_offset = SNR_IRP_MSR_OFFSET, 4093 + .ops = &ivbep_uncore_msr_ops, 4094 + .format_group = &ivbep_uncore_format_group, 4095 + }; 4096 + 4097 + static struct intel_uncore_type snr_uncore_m2pcie = { 4098 + .name = "m2pcie", 4099 + .num_counters = 4, 4100 + .num_boxes = 5, 4101 + .perf_ctr_bits = 48, 4102 + .event_ctl = SNR_M2PCIE_MSR_PMON_CTL0, 4103 + .perf_ctr = SNR_M2PCIE_MSR_PMON_CTR0, 4104 + .box_ctl = SNR_M2PCIE_MSR_PMON_BOX_CTL, 4105 + .msr_offset = SNR_M2PCIE_MSR_OFFSET, 4106 + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, 4107 + .ops = &ivbep_uncore_msr_ops, 4108 + .format_group = &ivbep_uncore_format_group, 4109 + }; 4110 + 4111 + static int snr_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) 4112 + { 4113 + struct hw_perf_event *hwc = &event->hw; 4114 + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; 4115 + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; 4116 + 4117 + if (ev_sel >= 0xb && ev_sel <= 0xe) { 4118 + reg1->reg = SNR_PCU_MSR_PMON_BOX_FILTER; 4119 + reg1->idx = ev_sel - 0xb; 4120 + reg1->config = event->attr.config1 & (0xff << reg1->idx); 4121 + } 4122 + return 0; 4123 + } 4124 + 4125 + static struct intel_uncore_ops snr_uncore_pcu_ops = { 4126 + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), 4127 + .hw_config = snr_pcu_hw_config, 4128 + .get_constraint = snbep_pcu_get_constraint, 4129 + .put_constraint = snbep_pcu_put_constraint, 4130 + }; 4131 + 4132 + static struct intel_uncore_type snr_uncore_pcu = { 4133 + .name = "pcu", 4134 + .num_counters = 4, 4135 + .num_boxes = 1, 4136 + .perf_ctr_bits = 48, 4137 + .perf_ctr = SNR_PCU_MSR_PMON_CTR0, 4138 + .event_ctl = SNR_PCU_MSR_PMON_CTL0, 4139 + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, 4140 + .box_ctl = SNR_PCU_MSR_PMON_BOX_CTL, 4141 + .num_shared_regs = 1, 4142 + .ops = &snr_uncore_pcu_ops, 4143 + .format_group = &skx_uncore_pcu_format_group, 4144 + }; 4145 + 4146 + enum perf_uncore_snr_iio_freerunning_type_id { 4147 + SNR_IIO_MSR_IOCLK, 4148 + SNR_IIO_MSR_BW_IN, 4149 + 4150 + SNR_IIO_FREERUNNING_TYPE_MAX, 4151 + }; 4152 + 4153 + static struct freerunning_counters snr_iio_freerunning[] = { 4154 + [SNR_IIO_MSR_IOCLK] = { 0x1eac, 0x1, 0x10, 1, 48 }, 4155 + [SNR_IIO_MSR_BW_IN] = { 0x1f00, 0x1, 0x10, 8, 48 }, 4156 + }; 4157 + 4158 + static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { 4159 + /* Free-Running IIO CLOCKS Counter */ 4160 + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), 4161 + /* Free-Running IIO BANDWIDTH IN Counters */ 4162 + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), 4163 + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), 4164 + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), 4165 + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), 4166 + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), 4167 + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), 4168 + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), 4169 + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), 4170 + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), 4171 + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), 4172 + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), 4173 + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), 4174 + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), 4175 + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), 4176 + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), 4177 + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), 4178 + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), 4179 + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), 4180 + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), 4181 + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), 4182 + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), 4183 + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), 4184 + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), 4185 + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), 4186 + { /* end: all zeroes */ }, 4187 + }; 4188 + 4189 + static struct intel_uncore_type snr_uncore_iio_free_running = { 4190 + .name = "iio_free_running", 4191 + .num_counters = 9, 4192 + .num_boxes = 5, 4193 + .num_freerunning_types = SNR_IIO_FREERUNNING_TYPE_MAX, 4194 + .freerunning = snr_iio_freerunning, 4195 + .ops = &skx_uncore_iio_freerunning_ops, 4196 + .event_descs = snr_uncore_iio_freerunning_events, 4197 + .format_group = &skx_uncore_iio_freerunning_format_group, 4198 + }; 4199 + 4200 + static struct intel_uncore_type *snr_msr_uncores[] = { 4201 + &snr_uncore_ubox, 4202 + &snr_uncore_chabox, 4203 + &snr_uncore_iio, 4204 + &snr_uncore_irp, 4205 + &snr_uncore_m2pcie, 4206 + &snr_uncore_pcu, 4207 + &snr_uncore_iio_free_running, 4208 + NULL, 4209 + }; 4210 + 4211 + void snr_uncore_cpu_init(void) 4212 + { 4213 + uncore_msr_uncores = snr_msr_uncores; 4214 + } 4215 + 4216 + static void snr_m2m_uncore_pci_init_box(struct intel_uncore_box *box) 4217 + { 4218 + struct pci_dev *pdev = box->pci_dev; 4219 + int box_ctl = uncore_pci_box_ctl(box); 4220 + 4221 + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); 4222 + pci_write_config_dword(pdev, box_ctl, IVBEP_PMON_BOX_CTL_INT); 4223 + } 4224 + 4225 + static struct intel_uncore_ops snr_m2m_uncore_pci_ops = { 4226 + .init_box = snr_m2m_uncore_pci_init_box, 4227 + .disable_box = snbep_uncore_pci_disable_box, 4228 + .enable_box = snbep_uncore_pci_enable_box, 4229 + .disable_event = snbep_uncore_pci_disable_event, 4230 + .enable_event = snbep_uncore_pci_enable_event, 4231 + .read_counter = snbep_uncore_pci_read_counter, 4232 + }; 4233 + 4234 + static struct attribute *snr_m2m_uncore_formats_attr[] = { 4235 + &format_attr_event.attr, 4236 + &format_attr_umask_ext3.attr, 4237 + &format_attr_edge.attr, 4238 + &format_attr_inv.attr, 4239 + &format_attr_thresh8.attr, 4240 + NULL, 4241 + }; 4242 + 4243 + static const struct attribute_group snr_m2m_uncore_format_group = { 4244 + .name = "format", 4245 + .attrs = snr_m2m_uncore_formats_attr, 4246 + }; 4247 + 4248 + static struct intel_uncore_type snr_uncore_m2m = { 4249 + .name = "m2m", 4250 + .num_counters = 4, 4251 + .num_boxes = 1, 4252 + .perf_ctr_bits = 48, 4253 + .perf_ctr = SNR_M2M_PCI_PMON_CTR0, 4254 + .event_ctl = SNR_M2M_PCI_PMON_CTL0, 4255 + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, 4256 + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, 4257 + .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, 4258 + .ops = &snr_m2m_uncore_pci_ops, 4259 + .format_group = &snr_m2m_uncore_format_group, 4260 + }; 4261 + 4262 + static struct intel_uncore_type snr_uncore_pcie3 = { 4263 + .name = "pcie3", 4264 + .num_counters = 4, 4265 + .num_boxes = 1, 4266 + .perf_ctr_bits = 48, 4267 + .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, 4268 + .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, 4269 + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, 4270 + .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, 4271 + .ops = &ivbep_uncore_pci_ops, 4272 + .format_group = &ivbep_uncore_format_group, 4273 + }; 4274 + 4275 + enum { 4276 + SNR_PCI_UNCORE_M2M, 4277 + SNR_PCI_UNCORE_PCIE3, 4278 + }; 4279 + 4280 + static struct intel_uncore_type *snr_pci_uncores[] = { 4281 + [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, 4282 + [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, 4283 + NULL, 4284 + }; 4285 + 4286 + static const struct pci_device_id snr_uncore_pci_ids[] = { 4287 + { /* M2M */ 4288 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), 4289 + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0), 4290 + }, 4291 + { /* PCIe3 */ 4292 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), 4293 + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), 4294 + }, 4295 + { /* end: all zeroes */ } 4296 + }; 4297 + 4298 + static struct pci_driver snr_uncore_pci_driver = { 4299 + .name = "snr_uncore", 4300 + .id_table = snr_uncore_pci_ids, 4301 + }; 4302 + 4303 + int snr_uncore_pci_init(void) 4304 + { 4305 + /* SNR UBOX DID */ 4306 + int ret = snbep_pci2phy_map_init(0x3460, SKX_CPUNODEID, 4307 + SKX_GIDNIDMAP, true); 4308 + 4309 + if (ret) 4310 + return ret; 4311 + 4312 + uncore_pci_uncores = snr_pci_uncores; 4313 + uncore_pci_driver = &snr_uncore_pci_driver; 4314 + return 0; 4315 + } 4316 + 4317 + static struct pci_dev *snr_uncore_get_mc_dev(int id) 4318 + { 4319 + struct pci_dev *mc_dev = NULL; 4320 + int phys_id, pkg; 4321 + 4322 + while (1) { 4323 + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev); 4324 + if (!mc_dev) 4325 + break; 4326 + phys_id = uncore_pcibus_to_physid(mc_dev->bus); 4327 + if (phys_id < 0) 4328 + continue; 4329 + pkg = topology_phys_to_logical_pkg(phys_id); 4330 + if (pkg < 0) 4331 + continue; 4332 + else if (pkg == id) 4333 + break; 4334 + } 4335 + return mc_dev; 4336 + } 4337 + 4338 + static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) 4339 + { 4340 + struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); 4341 + unsigned int box_ctl = uncore_mmio_box_ctl(box); 4342 + resource_size_t addr; 4343 + u32 pci_dword; 4344 + 4345 + if (!pdev) 4346 + return; 4347 + 4348 + pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); 4349 + addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; 4350 + 4351 + pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword); 4352 + addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; 4353 + 4354 + addr += box_ctl; 4355 + 4356 + box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE); 4357 + if (!box->io_addr) 4358 + return; 4359 + 4360 + writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); 4361 + } 4362 + 4363 + static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) 4364 + { 4365 + u32 config; 4366 + 4367 + if (!box->io_addr) 4368 + return; 4369 + 4370 + config = readl(box->io_addr); 4371 + config |= SNBEP_PMON_BOX_CTL_FRZ; 4372 + writel(config, box->io_addr); 4373 + } 4374 + 4375 + static void snr_uncore_mmio_enable_box(struct intel_uncore_box *box) 4376 + { 4377 + u32 config; 4378 + 4379 + if (!box->io_addr) 4380 + return; 4381 + 4382 + config = readl(box->io_addr); 4383 + config &= ~SNBEP_PMON_BOX_CTL_FRZ; 4384 + writel(config, box->io_addr); 4385 + } 4386 + 4387 + static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box, 4388 + struct perf_event *event) 4389 + { 4390 + struct hw_perf_event *hwc = &event->hw; 4391 + 4392 + if (!box->io_addr) 4393 + return; 4394 + 4395 + writel(hwc->config | SNBEP_PMON_CTL_EN, 4396 + box->io_addr + hwc->config_base); 4397 + } 4398 + 4399 + static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box, 4400 + struct perf_event *event) 4401 + { 4402 + struct hw_perf_event *hwc = &event->hw; 4403 + 4404 + if (!box->io_addr) 4405 + return; 4406 + 4407 + writel(hwc->config, box->io_addr + hwc->config_base); 4408 + } 4409 + 4410 + static struct intel_uncore_ops snr_uncore_mmio_ops = { 4411 + .init_box = snr_uncore_mmio_init_box, 4412 + .exit_box = uncore_mmio_exit_box, 4413 + .disable_box = snr_uncore_mmio_disable_box, 4414 + .enable_box = snr_uncore_mmio_enable_box, 4415 + .disable_event = snr_uncore_mmio_disable_event, 4416 + .enable_event = snr_uncore_mmio_enable_event, 4417 + .read_counter = uncore_mmio_read_counter, 4418 + }; 4419 + 4420 + static struct uncore_event_desc snr_uncore_imc_events[] = { 4421 + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"), 4422 + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x0f"), 4423 + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), 4424 + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), 4425 + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x30"), 4426 + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), 4427 + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), 4428 + { /* end: all zeroes */ }, 4429 + }; 4430 + 4431 + static struct intel_uncore_type snr_uncore_imc = { 4432 + .name = "imc", 4433 + .num_counters = 4, 4434 + .num_boxes = 2, 4435 + .perf_ctr_bits = 48, 4436 + .fixed_ctr_bits = 48, 4437 + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, 4438 + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, 4439 + .event_descs = snr_uncore_imc_events, 4440 + .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, 4441 + .event_ctl = SNR_IMC_MMIO_PMON_CTL0, 4442 + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, 4443 + .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, 4444 + .mmio_offset = SNR_IMC_MMIO_OFFSET, 4445 + .ops = &snr_uncore_mmio_ops, 4446 + .format_group = &skx_uncore_format_group, 4447 + }; 4448 + 4449 + enum perf_uncore_snr_imc_freerunning_type_id { 4450 + SNR_IMC_DCLK, 4451 + SNR_IMC_DDR, 4452 + 4453 + SNR_IMC_FREERUNNING_TYPE_MAX, 4454 + }; 4455 + 4456 + static struct freerunning_counters snr_imc_freerunning[] = { 4457 + [SNR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, 4458 + [SNR_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 }, 4459 + }; 4460 + 4461 + static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { 4462 + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), 4463 + 4464 + INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), 4465 + INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), 4466 + INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), 4467 + INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), 4468 + INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), 4469 + INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), 4470 + }; 4471 + 4472 + static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = { 4473 + .init_box = snr_uncore_mmio_init_box, 4474 + .exit_box = uncore_mmio_exit_box, 4475 + .read_counter = uncore_mmio_read_counter, 4476 + .hw_config = uncore_freerunning_hw_config, 4477 + }; 4478 + 4479 + static struct intel_uncore_type snr_uncore_imc_free_running = { 4480 + .name = "imc_free_running", 4481 + .num_counters = 3, 4482 + .num_boxes = 1, 4483 + .num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX, 4484 + .freerunning = snr_imc_freerunning, 4485 + .ops = &snr_uncore_imc_freerunning_ops, 4486 + .event_descs = snr_uncore_imc_freerunning_events, 4487 + .format_group = &skx_uncore_iio_freerunning_format_group, 4488 + }; 4489 + 4490 + static struct intel_uncore_type *snr_mmio_uncores[] = { 4491 + &snr_uncore_imc, 4492 + &snr_uncore_imc_free_running, 4493 + NULL, 4494 + }; 4495 + 4496 + void snr_uncore_mmio_init(void) 4497 + { 4498 + uncore_mmio_uncores = snr_mmio_uncores; 4499 + } 4500 + 4501 + /* end of SNR uncore support */
+60 -50
arch/x86/events/msr.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/perf_event.h> 3 + #include <linux/sysfs.h> 3 4 #include <linux/nospec.h> 4 5 #include <asm/intel-family.h> 6 + #include "probe.h" 5 7 6 8 enum perf_msr_id { 7 9 PERF_MSR_TSC = 0, ··· 14 12 PERF_MSR_PTSC = 5, 15 13 PERF_MSR_IRPERF = 6, 16 14 PERF_MSR_THERM = 7, 17 - PERF_MSR_THERM_SNAP = 8, 18 - PERF_MSR_THERM_UNIT = 9, 19 15 PERF_MSR_EVENT_MAX, 20 16 }; 21 17 22 - static bool test_aperfmperf(int idx) 18 + static bool test_aperfmperf(int idx, void *data) 23 19 { 24 20 return boot_cpu_has(X86_FEATURE_APERFMPERF); 25 21 } 26 22 27 - static bool test_ptsc(int idx) 23 + static bool test_ptsc(int idx, void *data) 28 24 { 29 25 return boot_cpu_has(X86_FEATURE_PTSC); 30 26 } 31 27 32 - static bool test_irperf(int idx) 28 + static bool test_irperf(int idx, void *data) 33 29 { 34 30 return boot_cpu_has(X86_FEATURE_IRPERF); 35 31 } 36 32 37 - static bool test_therm_status(int idx) 33 + static bool test_therm_status(int idx, void *data) 38 34 { 39 35 return boot_cpu_has(X86_FEATURE_DTHERM); 40 36 } 41 37 42 - static bool test_intel(int idx) 38 + static bool test_intel(int idx, void *data) 43 39 { 44 40 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 45 41 boot_cpu_data.x86 != 6) ··· 98 98 return false; 99 99 } 100 100 101 - struct perf_msr { 102 - u64 msr; 103 - struct perf_pmu_events_attr *attr; 104 - bool (*test)(int idx); 101 + PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" ); 102 + PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" ); 103 + PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" ); 104 + PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" ); 105 + PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" ); 106 + PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" ); 107 + PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" ); 108 + PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" ); 109 + PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" ); 110 + PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" ); 111 + 112 + static unsigned long msr_mask; 113 + 114 + PMU_EVENT_GROUP(events, aperf); 115 + PMU_EVENT_GROUP(events, mperf); 116 + PMU_EVENT_GROUP(events, pperf); 117 + PMU_EVENT_GROUP(events, smi); 118 + PMU_EVENT_GROUP(events, ptsc); 119 + PMU_EVENT_GROUP(events, irperf); 120 + 121 + static struct attribute *attrs_therm[] = { 122 + &attr_therm.attr.attr, 123 + &attr_therm_snap.attr.attr, 124 + &attr_therm_unit.attr.attr, 125 + NULL, 105 126 }; 106 127 107 - PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" ); 108 - PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" ); 109 - PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" ); 110 - PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" ); 111 - PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" ); 112 - PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" ); 113 - PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" ); 114 - PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" ); 115 - PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" ); 116 - PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" ); 128 + static struct attribute_group group_therm = { 129 + .name = "events", 130 + .attrs = attrs_therm, 131 + }; 117 132 118 133 static struct perf_msr msr[] = { 119 - [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, 120 - [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, 121 - [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, 122 - [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, 123 - [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, 124 - [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, 125 - [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, 126 - [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, }, 127 - [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, }, 128 - [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, }, 134 + [PERF_MSR_TSC] = { .no_check = true, }, 135 + [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, }, 136 + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &group_mperf, test_aperfmperf, }, 137 + [PERF_MSR_PPERF] = { MSR_PPERF, &group_pperf, test_intel, }, 138 + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &group_smi, test_intel, }, 139 + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &group_ptsc, test_ptsc, }, 140 + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &group_irperf, test_irperf, }, 141 + [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &group_therm, test_therm_status, }, 129 142 }; 130 143 131 - static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { 144 + static struct attribute *events_attrs[] = { 145 + &attr_tsc.attr.attr, 132 146 NULL, 133 147 }; 134 148 ··· 167 153 NULL, 168 154 }; 169 155 156 + const struct attribute_group *attr_update[] = { 157 + &group_aperf, 158 + &group_mperf, 159 + &group_pperf, 160 + &group_smi, 161 + &group_ptsc, 162 + &group_irperf, 163 + &group_therm, 164 + NULL, 165 + }; 166 + 170 167 static int msr_event_init(struct perf_event *event) 171 168 { 172 169 u64 cfg = event->attr.config; ··· 194 169 195 170 cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); 196 171 197 - if (!msr[cfg].attr) 172 + if (!(msr_mask & (1 << cfg))) 198 173 return -EINVAL; 199 174 200 175 event->hw.idx = -1; ··· 277 252 .stop = msr_event_stop, 278 253 .read = msr_event_update, 279 254 .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, 255 + .attr_update = attr_update, 280 256 }; 281 257 282 258 static int __init msr_init(void) 283 259 { 284 - int i, j = 0; 285 - 286 260 if (!boot_cpu_has(X86_FEATURE_TSC)) { 287 261 pr_cont("no MSR PMU driver.\n"); 288 262 return 0; 289 263 } 290 264 291 - /* Probe the MSRs. */ 292 - for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) { 293 - u64 val; 294 - 295 - /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ 296 - if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) 297 - msr[i].attr = NULL; 298 - } 299 - 300 - /* List remaining MSRs in the sysfs attrs. */ 301 - for (i = 0; i < PERF_MSR_EVENT_MAX; i++) { 302 - if (msr[i].attr) 303 - events_attrs[j++] = &msr[i].attr->attr.attr; 304 - } 305 - events_attrs[j] = NULL; 265 + msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL); 306 266 307 267 perf_pmu_register(&pmu_msr, "msr", -1); 308 268
+1 -6
arch/x86/events/perf_event.h
··· 613 613 int attr_rdpmc_broken; 614 614 int attr_rdpmc; 615 615 struct attribute **format_attrs; 616 - struct attribute **event_attrs; 617 - struct attribute **caps_attrs; 618 616 619 617 ssize_t (*events_sysfs_show)(char *page, u64 config); 620 - struct attribute **cpu_events; 618 + const struct attribute_group **attr_update; 621 619 622 620 unsigned long attr_freeze_on_smi; 623 - struct attribute **attrs; 624 621 625 622 /* 626 623 * CPU Hotplug hooks ··· 882 885 883 886 ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); 884 887 ssize_t intel_event_sysfs_show(char *page, u64 config); 885 - 886 - struct attribute **merge_attr(struct attribute **a, struct attribute **b); 887 888 888 889 ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, 889 890 char *page);
+45
arch/x86/events/probe.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/export.h> 3 + #include <linux/types.h> 4 + #include <linux/bits.h> 5 + #include "probe.h" 6 + 7 + static umode_t 8 + not_visible(struct kobject *kobj, struct attribute *attr, int i) 9 + { 10 + return 0; 11 + } 12 + 13 + unsigned long 14 + perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) 15 + { 16 + unsigned long avail = 0; 17 + unsigned int bit; 18 + u64 val; 19 + 20 + if (cnt >= BITS_PER_LONG) 21 + return 0; 22 + 23 + for (bit = 0; bit < cnt; bit++) { 24 + if (!msr[bit].no_check) { 25 + struct attribute_group *grp = msr[bit].grp; 26 + 27 + grp->is_visible = not_visible; 28 + 29 + if (msr[bit].test && !msr[bit].test(bit, data)) 30 + continue; 31 + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ 32 + if (rdmsrl_safe(msr[bit].msr, &val)) 33 + continue; 34 + /* Disable zero counters if requested. */ 35 + if (!zero && !val) 36 + continue; 37 + 38 + grp->is_visible = NULL; 39 + } 40 + avail |= BIT(bit); 41 + } 42 + 43 + return avail; 44 + } 45 + EXPORT_SYMBOL_GPL(perf_msr_probe);
+29
arch/x86/events/probe.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __ARCH_X86_EVENTS_PROBE_H__ 3 + #define __ARCH_X86_EVENTS_PROBE_H__ 4 + #include <linux/sysfs.h> 5 + 6 + struct perf_msr { 7 + u64 msr; 8 + struct attribute_group *grp; 9 + bool (*test)(int idx, void *data); 10 + bool no_check; 11 + }; 12 + 13 + unsigned long 14 + perf_msr_probe(struct perf_msr *msr, int cnt, bool no_zero, void *data); 15 + 16 + #define __PMU_EVENT_GROUP(_name) \ 17 + static struct attribute *attrs_##_name[] = { \ 18 + &attr_##_name.attr.attr, \ 19 + NULL, \ 20 + } 21 + 22 + #define PMU_EVENT_GROUP(_grp, _name) \ 23 + __PMU_EVENT_GROUP(_name); \ 24 + static struct attribute_group group_##_name = { \ 25 + .name = #_grp, \ 26 + .attrs = attrs_##_name, \ 27 + } 28 + 29 + #endif /* __ARCH_X86_EVENTS_PROBE_H__ */
+39 -15
fs/sysfs/group.c
··· 175 175 } 176 176 EXPORT_SYMBOL_GPL(sysfs_create_group); 177 177 178 + static int internal_create_groups(struct kobject *kobj, int update, 179 + const struct attribute_group **groups) 180 + { 181 + int error = 0; 182 + int i; 183 + 184 + if (!groups) 185 + return 0; 186 + 187 + for (i = 0; groups[i]; i++) { 188 + error = internal_create_group(kobj, update, groups[i]); 189 + if (error) { 190 + while (--i >= 0) 191 + sysfs_remove_group(kobj, groups[i]); 192 + break; 193 + } 194 + } 195 + return error; 196 + } 197 + 178 198 /** 179 199 * sysfs_create_groups - given a directory kobject, create a bunch of attribute groups 180 200 * @kobj: The kobject to create the group on ··· 211 191 int sysfs_create_groups(struct kobject *kobj, 212 192 const struct attribute_group **groups) 213 193 { 214 - int error = 0; 215 - int i; 216 - 217 - if (!groups) 218 - return 0; 219 - 220 - for (i = 0; groups[i]; i++) { 221 - error = sysfs_create_group(kobj, groups[i]); 222 - if (error) { 223 - while (--i >= 0) 224 - sysfs_remove_group(kobj, groups[i]); 225 - break; 226 - } 227 - } 228 - return error; 194 + return internal_create_groups(kobj, 0, groups); 229 195 } 230 196 EXPORT_SYMBOL_GPL(sysfs_create_groups); 197 + 198 + /** 199 + * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups 200 + * @kobj: The kobject to update the group on 201 + * @groups: The attribute groups to update, NULL terminated 202 + * 203 + * This function update a bunch of attribute groups. If an error occurs when 204 + * updating a group, all previously updated groups will be removed together 205 + * with already existing (not updated) attributes. 206 + * 207 + * Returns 0 on success or error code from sysfs_update_group on failure. 208 + */ 209 + int sysfs_update_groups(struct kobject *kobj, 210 + const struct attribute_group **groups) 211 + { 212 + return internal_create_groups(kobj, 1, groups); 213 + } 214 + EXPORT_SYMBOL_GPL(sysfs_update_groups); 231 215 232 216 /** 233 217 * sysfs_update_group - given a directory kobject, update an attribute group
+6
include/linux/perf_event.h
··· 256 256 struct module *module; 257 257 struct device *dev; 258 258 const struct attribute_group **attr_groups; 259 + const struct attribute_group **attr_update; 259 260 const char *name; 260 261 int type; 261 262 ··· 750 749 int nr_stat; 751 750 int nr_freq; 752 751 int rotate_disable; 752 + /* 753 + * Set when nr_events != nr_active, except tolerant to events not 754 + * necessary to be active due to scheduling constraints, such as cgroups. 755 + */ 756 + int rotate_necessary; 753 757 refcount_t refcount; 754 758 struct task_struct *task; 755 759
+8
include/linux/sysfs.h
··· 268 268 const struct attribute_group *grp); 269 269 int __must_check sysfs_create_groups(struct kobject *kobj, 270 270 const struct attribute_group **groups); 271 + int __must_check sysfs_update_groups(struct kobject *kobj, 272 + const struct attribute_group **groups); 271 273 int sysfs_update_group(struct kobject *kobj, 272 274 const struct attribute_group *grp); 273 275 void sysfs_remove_group(struct kobject *kobj, ··· 430 428 } 431 429 432 430 static inline int sysfs_create_groups(struct kobject *kobj, 431 + const struct attribute_group **groups) 432 + { 433 + return 0; 434 + } 435 + 436 + static inline int sysfs_update_groups(struct kobject *kobj, 433 437 const struct attribute_group **groups) 434 438 { 435 439 return 0;
+30 -22
kernel/events/core.c
··· 2952 2952 if (!ctx->nr_active || !(is_active & EVENT_ALL)) 2953 2953 return; 2954 2954 2955 + /* 2956 + * If we had been multiplexing, no rotations are necessary, now no events 2957 + * are active. 2958 + */ 2959 + ctx->rotate_necessary = 0; 2960 + 2955 2961 perf_pmu_disable(ctx->pmu); 2956 2962 if (is_active & EVENT_PINNED) { 2957 2963 list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list) ··· 3325 3319 return 0; 3326 3320 3327 3321 if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { 3328 - if (!group_sched_in(event, sid->cpuctx, sid->ctx)) 3329 - list_add_tail(&event->active_list, &sid->ctx->flexible_active); 3330 - else 3322 + int ret = group_sched_in(event, sid->cpuctx, sid->ctx); 3323 + if (ret) { 3331 3324 sid->can_add_hw = 0; 3325 + sid->ctx->rotate_necessary = 1; 3326 + return 0; 3327 + } 3328 + list_add_tail(&event->active_list, &sid->ctx->flexible_active); 3332 3329 } 3333 3330 3334 3331 return 0; ··· 3699 3690 static bool perf_rotate_context(struct perf_cpu_context *cpuctx) 3700 3691 { 3701 3692 struct perf_event *cpu_event = NULL, *task_event = NULL; 3702 - bool cpu_rotate = false, task_rotate = false; 3703 - struct perf_event_context *ctx = NULL; 3693 + struct perf_event_context *task_ctx = NULL; 3694 + int cpu_rotate, task_rotate; 3704 3695 3705 3696 /* 3706 3697 * Since we run this from IRQ context, nobody can install new 3707 3698 * events, thus the event count values are stable. 3708 3699 */ 3709 3700 3710 - if (cpuctx->ctx.nr_events) { 3711 - if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 3712 - cpu_rotate = true; 3713 - } 3714 - 3715 - ctx = cpuctx->task_ctx; 3716 - if (ctx && ctx->nr_events) { 3717 - if (ctx->nr_events != ctx->nr_active) 3718 - task_rotate = true; 3719 - } 3701 + cpu_rotate = cpuctx->ctx.rotate_necessary; 3702 + task_ctx = cpuctx->task_ctx; 3703 + task_rotate = task_ctx ? task_ctx->rotate_necessary : 0; 3720 3704 3721 3705 if (!(cpu_rotate || task_rotate)) 3722 3706 return false; ··· 3718 3716 perf_pmu_disable(cpuctx->ctx.pmu); 3719 3717 3720 3718 if (task_rotate) 3721 - task_event = ctx_first_active(ctx); 3719 + task_event = ctx_first_active(task_ctx); 3722 3720 if (cpu_rotate) 3723 3721 cpu_event = ctx_first_active(&cpuctx->ctx); 3724 3722 ··· 3726 3724 * As per the order given at ctx_resched() first 'pop' task flexible 3727 3725 * and then, if needed CPU flexible. 3728 3726 */ 3729 - if (task_event || (ctx && cpu_event)) 3730 - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); 3727 + if (task_event || (task_ctx && cpu_event)) 3728 + ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE); 3731 3729 if (cpu_event) 3732 3730 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 3733 3731 3734 3732 if (task_event) 3735 - rotate_ctx(ctx, task_event); 3733 + rotate_ctx(task_ctx, task_event); 3736 3734 if (cpu_event) 3737 3735 rotate_ctx(&cpuctx->ctx, cpu_event); 3738 3736 3739 - perf_event_sched_in(cpuctx, ctx, current); 3737 + perf_event_sched_in(cpuctx, task_ctx, current); 3740 3738 3741 3739 perf_pmu_enable(cpuctx->ctx.pmu); 3742 3740 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); ··· 8537 8535 if (event->hw.state & PERF_HES_STOPPED) 8538 8536 return 0; 8539 8537 /* 8540 - * All tracepoints are from kernel-space. 8538 + * If exclude_kernel, only trace user-space tracepoints (uprobes) 8541 8539 */ 8542 - if (event->attr.exclude_kernel) 8540 + if (event->attr.exclude_kernel && !user_mode(regs)) 8543 8541 return 0; 8544 8542 8545 8543 if (!perf_tp_filter_match(event, data)) ··· 9875 9873 /* For PMUs with address filters, throw in an extra attribute: */ 9876 9874 if (pmu->nr_addr_filters) 9877 9875 ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters); 9876 + 9877 + if (ret) 9878 + goto del_dev; 9879 + 9880 + if (pmu->attr_update) 9881 + ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update); 9878 9882 9879 9883 if (ret) 9880 9884 goto del_dev;
+1 -1
kernel/trace/trace_uprobe.c
··· 1336 1336 call->event.funcs = &uprobe_funcs; 1337 1337 call->class->define_fields = uprobe_event_define_fields; 1338 1338 1339 - call->flags = TRACE_EVENT_FL_UPROBE; 1339 + call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; 1340 1340 call->class->reg = trace_uprobe_register; 1341 1341 call->data = tu; 1342 1342 }
+7
tools/arch/arm64/include/uapi/asm/kvm.h
··· 260 260 KVM_REG_SIZE_U256 | \ 261 261 ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) 262 262 263 + /* 264 + * Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and 265 + * KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness- 266 + * invariant layout which differs from the layout used for the FPSIMD 267 + * V-registers on big-endian systems: see sigcontext.h for more explanation. 268 + */ 269 + 263 270 #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN 264 271 #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX 265 272
+15 -6
tools/arch/x86/include/asm/cpufeatures.h
··· 239 239 #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ 240 240 #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ 241 241 #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ 242 + #define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ 242 243 #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ 243 244 #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ 244 245 #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ 245 246 #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ 246 247 #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ 247 248 #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ 249 + #define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ 248 250 #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ 249 251 #define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ 250 252 #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ ··· 271 269 #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ 272 270 #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ 273 271 274 - /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ 275 - #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ 272 + /* 273 + * Extended auxiliary flags: Linux defined - for features scattered in various 274 + * CPUID levels like 0xf, etc. 275 + * 276 + * Reuse free bits when adding new feature flags! 277 + */ 278 + #define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ 279 + #define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ 280 + #define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ 281 + #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ 276 282 277 - /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ 278 - #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ 279 - #define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ 280 - #define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ 283 + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ 284 + #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ 281 285 282 286 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ 283 287 #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ ··· 330 322 #define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ 331 323 #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ 332 324 #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ 325 + #define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ 333 326 #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ 334 327 #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ 335 328 #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
+21 -10
tools/arch/x86/include/uapi/asm/kvm.h
··· 383 383 #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) 384 384 #define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) 385 385 386 + #define KVM_STATE_NESTED_FORMAT_VMX 0 387 + #define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ 388 + 386 389 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 387 390 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 388 391 #define KVM_STATE_NESTED_EVMCS 0x00000004 ··· 393 390 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 394 391 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 395 392 396 - struct kvm_vmx_nested_state { 393 + #define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 394 + 395 + struct kvm_vmx_nested_state_data { 396 + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; 397 + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; 398 + }; 399 + 400 + struct kvm_vmx_nested_state_hdr { 397 401 __u64 vmxon_pa; 398 402 __u64 vmcs12_pa; 399 403 ··· 411 401 412 402 /* for KVM_CAP_NESTED_STATE */ 413 403 struct kvm_nested_state { 414 - /* KVM_STATE_* flags */ 415 404 __u16 flags; 416 - 417 - /* 0 for VMX, 1 for SVM. */ 418 405 __u16 format; 419 - 420 - /* 128 for SVM, 128 + VMCS size for VMX. */ 421 406 __u32 size; 422 407 423 408 union { 424 - /* VMXON, VMCS */ 425 - struct kvm_vmx_nested_state vmx; 409 + struct kvm_vmx_nested_state_hdr vmx; 426 410 427 411 /* Pad the header to 128 bytes. */ 428 412 __u8 pad[120]; 429 - }; 413 + } hdr; 430 414 431 - __u8 data[0]; 415 + /* 416 + * Define data region as 0 bytes to preserve backwards-compatability 417 + * to old definition of kvm_nested_state in order to avoid changing 418 + * KVM_{GET,PUT}_NESTED_STATE ioctl values. 419 + */ 420 + union { 421 + struct kvm_vmx_nested_state_data vmx[0]; 422 + } data; 432 423 }; 433 424 434 425 #endif /* _ASM_X86_KVM_H */
+2 -1
tools/build/Makefile.feature
··· 36 36 fortify-source \ 37 37 sync-compare-and-swap \ 38 38 get_current_dir_name \ 39 + gettid \ 39 40 glibc \ 40 41 gtk2 \ 41 42 gtk2-infobar \ ··· 53 52 libpython \ 54 53 libpython-version \ 55 54 libslang \ 55 + libslang-include-subdir \ 56 56 libcrypto \ 57 57 libunwind \ 58 58 pthread-attr-setaffinity-np \ ··· 115 113 numa_num_possible_cpus \ 116 114 libperl \ 117 115 libpython \ 118 - libslang \ 119 116 libcrypto \ 120 117 libunwind \ 121 118 libdw-dwarf-unwind \
+9 -1
tools/build/feature/Makefile
··· 31 31 test-libpython.bin \ 32 32 test-libpython-version.bin \ 33 33 test-libslang.bin \ 34 + test-libslang-include-subdir.bin \ 34 35 test-libcrypto.bin \ 35 36 test-libunwind.bin \ 36 37 test-libunwind-debug-frame.bin \ ··· 55 54 test-get_cpuid.bin \ 56 55 test-sdt.bin \ 57 56 test-cxx.bin \ 57 + test-gettid.bin \ 58 58 test-jvmti.bin \ 59 59 test-jvmti-cmlr.bin \ 60 60 test-sched_getcpu.bin \ ··· 183 181 $(BUILD) -laudit 184 182 185 183 $(OUTPUT)test-libslang.bin: 186 - $(BUILD) -I/usr/include/slang -lslang 184 + $(BUILD) -lslang 185 + 186 + $(OUTPUT)test-libslang-include-subdir.bin: 187 + $(BUILD) -lslang 187 188 188 189 $(OUTPUT)test-libcrypto.bin: 189 190 $(BUILD) -lcrypto ··· 271 266 272 267 $(OUTPUT)test-cxx.bin: 273 268 $(BUILDXX) -std=gnu++11 269 + 270 + $(OUTPUT)test-gettid.bin: 271 + $(BUILD) 274 272 275 273 $(OUTPUT)test-jvmti.bin: 276 274 $(BUILD)
+6 -1
tools/build/feature/test-all.c
··· 38 38 # include "test-get_current_dir_name.c" 39 39 #undef main 40 40 41 + #define main main_test_gettid 42 + # include "test-gettid.c" 43 + #undef main 44 + 41 45 #define main main_test_glibc 42 46 # include "test-glibc.c" 43 47 #undef main ··· 186 182 # include "test-disassembler-four-args.c" 187 183 #undef main 188 184 189 - #define main main_test_zstd 185 + #define main main_test_libzstd 190 186 # include "test-libzstd.c" 191 187 #undef main 192 188 ··· 199 195 main_test_libelf(); 200 196 main_test_libelf_mmap(); 201 197 main_test_get_current_dir_name(); 198 + main_test_gettid(); 202 199 main_test_glibc(); 203 200 main_test_dwarf(); 204 201 main_test_dwarf_getlocations();
+1
tools/build/feature/test-fortify-source.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 #include <stdio.h> 2 3 3 4 int main(void)
+11
tools/build/feature/test-gettid.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 + #define _GNU_SOURCE 4 + #include <unistd.h> 5 + 6 + int main(void) 7 + { 8 + return gettid(); 9 + } 10 + 11 + #undef _GNU_SOURCE
+1
tools/build/feature/test-hello.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 #include <stdio.h> 2 3 3 4 int main(void)
+7
tools/build/feature/test-libslang-include-subdir.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <slang/slang.h> 3 + 4 + int main(void) 5 + { 6 + return SLsmg_init_smg(); 7 + }
+1
tools/build/feature/test-setns.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 #define _GNU_SOURCE 2 3 #include <sched.h> 3 4
+75
tools/include/linux/ctype.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_CTYPE_H 3 + #define _LINUX_CTYPE_H 4 + 5 + /* 6 + * NOTE! This ctype does not handle EOF like the standard C 7 + * library is required to. 8 + */ 9 + 10 + #define _U 0x01 /* upper */ 11 + #define _L 0x02 /* lower */ 12 + #define _D 0x04 /* digit */ 13 + #define _C 0x08 /* cntrl */ 14 + #define _P 0x10 /* punct */ 15 + #define _S 0x20 /* white space (space/lf/tab) */ 16 + #define _X 0x40 /* hex digit */ 17 + #define _SP 0x80 /* hard space (0x20) */ 18 + 19 + extern const unsigned char _ctype[]; 20 + 21 + #define __ismask(x) (_ctype[(int)(unsigned char)(x)]) 22 + 23 + #define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) 24 + #define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) 25 + #define iscntrl(c) ((__ismask(c)&(_C)) != 0) 26 + static inline int __isdigit(int c) 27 + { 28 + return '0' <= c && c <= '9'; 29 + } 30 + #define isdigit(c) __isdigit(c) 31 + #define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) 32 + #define islower(c) ((__ismask(c)&(_L)) != 0) 33 + #define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) 34 + #define ispunct(c) ((__ismask(c)&(_P)) != 0) 35 + /* Note: isspace() must return false for %NUL-terminator */ 36 + #define isspace(c) ((__ismask(c)&(_S)) != 0) 37 + #define isupper(c) ((__ismask(c)&(_U)) != 0) 38 + #define isxdigit(c) ((__ismask(c)&(_D|_X)) != 0) 39 + 40 + #define isascii(c) (((unsigned char)(c))<=0x7f) 41 + #define toascii(c) (((unsigned char)(c))&0x7f) 42 + 43 + static inline unsigned char __tolower(unsigned char c) 44 + { 45 + if (isupper(c)) 46 + c -= 'A'-'a'; 47 + return c; 48 + } 49 + 50 + static inline unsigned char __toupper(unsigned char c) 51 + { 52 + if (islower(c)) 53 + c -= 'a'-'A'; 54 + return c; 55 + } 56 + 57 + #define tolower(c) __tolower(c) 58 + #define toupper(c) __toupper(c) 59 + 60 + /* 61 + * Fast implementation of tolower() for internal usage. Do not use in your 62 + * code. 63 + */ 64 + static inline char _tolower(const char c) 65 + { 66 + return c | 0x20; 67 + } 68 + 69 + /* Fast check for octal digit */ 70 + static inline int isodigit(const char c) 71 + { 72 + return c >= '0' && c <= '7'; 73 + } 74 + 75 + #endif
+1
tools/include/linux/kernel.h
··· 102 102 103 103 int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); 104 104 int scnprintf(char * buf, size_t size, const char * fmt, ...); 105 + int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); 105 106 106 107 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) 107 108
+10 -1
tools/include/linux/string.h
··· 7 7 8 8 void *memdup(const void *src, size_t len); 9 9 10 + char **argv_split(const char *str, int *argcp); 11 + void argv_free(char **argv); 12 + 10 13 int strtobool(const char *s, bool *res); 11 14 12 15 /* ··· 22 19 23 20 char *str_error_r(int errnum, char *buf, size_t buflen); 24 21 22 + char *strreplace(char *s, char old, char new); 23 + 25 24 /** 26 25 * strstarts - does @str start with @prefix? 27 26 * @str: string to examine ··· 34 29 return strncmp(str, prefix, strlen(prefix)) == 0; 35 30 } 36 31 37 - #endif /* _LINUX_STRING_H_ */ 32 + extern char * __must_check skip_spaces(const char *); 33 + 34 + extern char *strim(char *); 35 + 36 + #endif /* _TOOLS_LINUX_STRING_H_ */
+100
tools/lib/argv_split.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Helper function for splitting a string into an argv-like array. 4 + */ 5 + 6 + #include <stdlib.h> 7 + #include <linux/kernel.h> 8 + #include <linux/ctype.h> 9 + #include <linux/string.h> 10 + 11 + static const char *skip_arg(const char *cp) 12 + { 13 + while (*cp && !isspace(*cp)) 14 + cp++; 15 + 16 + return cp; 17 + } 18 + 19 + static int count_argc(const char *str) 20 + { 21 + int count = 0; 22 + 23 + while (*str) { 24 + str = skip_spaces(str); 25 + if (*str) { 26 + count++; 27 + str = skip_arg(str); 28 + } 29 + } 30 + 31 + return count; 32 + } 33 + 34 + /** 35 + * argv_free - free an argv 36 + * @argv - the argument vector to be freed 37 + * 38 + * Frees an argv and the strings it points to. 39 + */ 40 + void argv_free(char **argv) 41 + { 42 + char **p; 43 + for (p = argv; *p; p++) { 44 + free(*p); 45 + *p = NULL; 46 + } 47 + 48 + free(argv); 49 + } 50 + 51 + /** 52 + * argv_split - split a string at whitespace, returning an argv 53 + * @str: the string to be split 54 + * @argcp: returned argument count 55 + * 56 + * Returns an array of pointers to strings which are split out from 57 + * @str. This is performed by strictly splitting on white-space; no 58 + * quote processing is performed. Multiple whitespace characters are 59 + * considered to be a single argument separator. The returned array 60 + * is always NULL-terminated. Returns NULL on memory allocation 61 + * failure. 62 + */ 63 + char **argv_split(const char *str, int *argcp) 64 + { 65 + int argc = count_argc(str); 66 + char **argv = calloc(argc + 1, sizeof(*argv)); 67 + char **argvp; 68 + 69 + if (argv == NULL) 70 + goto out; 71 + 72 + if (argcp) 73 + *argcp = argc; 74 + 75 + argvp = argv; 76 + 77 + while (*str) { 78 + str = skip_spaces(str); 79 + 80 + if (*str) { 81 + const char *p = str; 82 + char *t; 83 + 84 + str = skip_arg(str); 85 + 86 + t = strndup(p, str-p); 87 + if (t == NULL) 88 + goto fail; 89 + *argvp++ = t; 90 + } 91 + } 92 + *argvp = NULL; 93 + 94 + out: 95 + return argv; 96 + 97 + fail: 98 + argv_free(argv); 99 + return NULL; 100 + }
+35
tools/lib/ctype.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * linux/lib/ctype.c 4 + * 5 + * Copyright (C) 1991, 1992 Linus Torvalds 6 + */ 7 + 8 + #include <linux/ctype.h> 9 + #include <linux/compiler.h> 10 + 11 + const unsigned char _ctype[] = { 12 + _C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ 13 + _C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ 14 + _C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ 15 + _C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ 16 + _S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ 17 + _P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ 18 + _D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ 19 + _D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ 20 + _P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ 21 + _U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ 22 + _U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ 23 + _U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ 24 + _P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ 25 + _L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ 26 + _L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ 27 + _L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ 28 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ 29 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ 30 + _S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ 31 + _P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ 32 + _U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ 33 + _U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ 34 + _L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ 35 + _L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */
+55
tools/lib/string.c
··· 17 17 #include <string.h> 18 18 #include <errno.h> 19 19 #include <linux/string.h> 20 + #include <linux/ctype.h> 20 21 #include <linux/compiler.h> 21 22 22 23 /** ··· 106 105 dest[len] = '\0'; 107 106 } 108 107 return ret; 108 + } 109 + 110 + /** 111 + * skip_spaces - Removes leading whitespace from @str. 112 + * @str: The string to be stripped. 113 + * 114 + * Returns a pointer to the first non-whitespace character in @str. 115 + */ 116 + char *skip_spaces(const char *str) 117 + { 118 + while (isspace(*str)) 119 + ++str; 120 + return (char *)str; 121 + } 122 + 123 + /** 124 + * strim - Removes leading and trailing whitespace from @s. 125 + * @s: The string to be stripped. 126 + * 127 + * Note that the first trailing whitespace is replaced with a %NUL-terminator 128 + * in the given string @s. Returns a pointer to the first non-whitespace 129 + * character in @s. 130 + */ 131 + char *strim(char *s) 132 + { 133 + size_t size; 134 + char *end; 135 + 136 + size = strlen(s); 137 + if (!size) 138 + return s; 139 + 140 + end = s + size - 1; 141 + while (end >= s && isspace(*end)) 142 + end--; 143 + *(end + 1) = '\0'; 144 + 145 + return skip_spaces(s); 146 + } 147 + 148 + /** 149 + * strreplace - Replace all occurrences of character in string. 150 + * @s: The string to operate on. 151 + * @old: The character being replaced. 152 + * @new: The character @old is replaced with. 153 + * 154 + * Returns pointer to the nul byte at the end of @s. 155 + */ 156 + char *strreplace(char *s, char old, char new) 157 + { 158 + for (; *s; ++s) 159 + if (*s == old) 160 + *s = new; 161 + return s; 109 162 }
+13 -1
tools/lib/symbol/kallsyms.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - #include <ctype.h> 3 2 #include "symbol/kallsyms.h" 4 3 #include <stdio.h> 5 4 #include <stdlib.h> ··· 13 14 { 14 15 symbol_type = toupper(symbol_type); 15 16 return symbol_type == 'T' || symbol_type == 'W'; 17 + } 18 + 19 + /* 20 + * While we find nice hex chars, build a long_val. 21 + * Return number of chars processed. 22 + */ 23 + int hex2u64(const char *ptr, u64 *long_val) 24 + { 25 + char *p; 26 + 27 + *long_val = strtoull(ptr, &p, 16); 28 + 29 + return p - ptr; 16 30 } 17 31 18 32 int kallsyms__parse(const char *filename, void *arg,
+2
tools/lib/symbol/kallsyms.h
··· 18 18 return isupper(type) ? STB_GLOBAL : STB_LOCAL; 19 19 } 20 20 21 + int hex2u64(const char *ptr, u64 *long_val); 22 + 21 23 u8 kallsyms2elf_type(char type); 22 24 23 25 bool kallsyms__is_function(char symbol_type);
+19
tools/lib/vsprintf.c
··· 23 23 24 24 return (i >= ssize) ? (ssize - 1) : i; 25 25 } 26 + 27 + int scnprintf_pad(char * buf, size_t size, const char * fmt, ...) 28 + { 29 + ssize_t ssize = size; 30 + va_list args; 31 + int i; 32 + 33 + va_start(args, fmt); 34 + i = vscnprintf(buf, size, fmt, args); 35 + va_end(args); 36 + 37 + if (i < (int) size) { 38 + for (; i < (int) size; i++) 39 + buf[i] = ' '; 40 + buf[i] = 0x0; 41 + } 42 + 43 + return (i >= ssize) ? (ssize - 1) : i; 44 + }
+5
tools/objtool/Build
··· 9 9 objtool-y += objtool.o 10 10 11 11 objtool-y += libstring.o 12 + objtool-y += libctype.o 12 13 objtool-y += str_error_r.o 13 14 14 15 CFLAGS += -I$(srctree)/tools/lib 15 16 16 17 $(OUTPUT)libstring.o: ../lib/string.c FORCE 18 + $(call rule_mkdir) 19 + $(call if_changed_dep,cc_o_c) 20 + 21 + $(OUTPUT)libctype.o: ../lib/ctype.c FORCE 17 22 $(call rule_mkdir) 18 23 $(call if_changed_dep,cc_o_c) 19 24
+41
tools/perf/Documentation/db-export.txt
··· 1 + Database Export 2 + =============== 3 + 4 + perf tool's python scripting engine: 5 + 6 + tools/perf/util/scripting-engines/trace-event-python.c 7 + 8 + supports scripts: 9 + 10 + tools/perf/scripts/python/export-to-sqlite.py 11 + tools/perf/scripts/python/export-to-postgresql.py 12 + 13 + which export data to a SQLite3 or PostgreSQL database. 14 + 15 + The export process provides records with unique sequential ids which allows the 16 + data to be imported directly to a database and provides the relationships 17 + between tables. 18 + 19 + Over time it is possible to continue to expand the export while maintaining 20 + backward and forward compatibility, by following some simple rules: 21 + 22 + 1. Because of the nature of SQL, existing tables and columns can continue to be 23 + used so long as the names and meanings (and to some extent data types) remain 24 + the same. 25 + 26 + 2. New tables and columns can be added, without affecting existing SQL queries, 27 + so long as the new names are unique. 28 + 29 + 3. Scripts that use a database (e.g. exported-sql-viewer.py) can maintain 30 + backward compatibility by testing for the presence of new tables and columns 31 + before using them. e.g. function IsSelectable() in exported-sql-viewer.py 32 + 33 + 4. The export scripts themselves maintain forward compatibility (i.e. an existing 34 + script will continue to work with new versions of perf) by accepting a variable 35 + number of arguments (e.g. def call_return_table(*x)) i.e. perf can pass more 36 + arguments which old scripts will ignore. 37 + 38 + 5. The scripting engine tests for the existence of script handler functions 39 + before calling them. The scripting engine can also test for the support of new 40 + or optional features by checking for the existence and value of script global 41 + variables.
+35 -5
tools/perf/Documentation/intel-pt.txt
··· 88 88 89 89 To represent software control flow, "branches" samples are produced. By default 90 90 a branch sample is synthesized for every single branch. To get an idea what 91 - data is available you can use the 'perf script' tool with no parameters, which 92 - will list all the samples. 91 + data is available you can use the 'perf script' tool with all itrace sampling 92 + options, which will list all the samples. 93 93 94 94 perf record -e intel_pt//u ls 95 - perf script 95 + perf script --itrace=ibxwpe 96 96 97 97 An interesting field that is not printed by default is 'flags' which can be 98 98 displayed as follows: 99 99 100 - perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags 100 + perf script --itrace=ibxwpe -F+flags 101 101 102 102 The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, 103 103 system, asynchronous, interrupt, transaction abort, trace begin, trace end, and 104 104 in transaction, respectively. 105 + 106 + Another interesting field that is not printed by default is 'ipc' which can be 107 + displayed as follows: 108 + 109 + perf script --itrace=be -F+ipc 110 + 111 + There are two ways that instructions-per-cycle (IPC) can be calculated depending 112 + on the recording. 113 + 114 + If the 'cyc' config term (see config terms section below) was used, then IPC is 115 + calculated using the cycle count from CYC packets, otherwise MTC packets are 116 + used - refer to the 'mtc' config term. When MTC is used, however, the values 117 + are less accurate because the timing is less accurate. 118 + 119 + Because Intel PT does not update the cycle count on every branch or instruction, 120 + the values will often be zero. When there are values, they will be the number 121 + of instructions and number of cycles since the last update, and thus represent 122 + the average IPC since the last IPC for that event type. Note IPC for "branches" 123 + events is calculated separately from IPC for "instructions" events. 124 + 125 + Also note that the IPC instruction count may or may not include the current 126 + instruction. If the cycle count is associated with an asynchronous branch 127 + (e.g. page fault or interrupt), then the instruction count does not include the 128 + current instruction, otherwise it does. That is consistent with whether or not 129 + that instruction has retired when the cycle count is updated. 130 + 131 + Another note, in the case of "branches" events, non-taken branches are not 132 + presently sampled, so IPC values for them do not appear e.g. a CYC packet with a 133 + TNT packet that starts with a non-taken branch. To see every possible IPC 134 + value, "instructions" events can be used e.g. --itrace=i0ns 105 135 106 136 While it is possible to create scripts to analyze the data, an alternative 107 137 approach is available to export the data to a sqlite or postgresql database. ··· 743 713 744 714 which, in turn, is the same as 745 715 746 - --itrace=ibxwpe 716 + --itrace=cepwx 747 717 748 718 The letters are: 749 719
+6 -3
tools/perf/Documentation/perf-config.txt
··· 564 564 llvm.clang-bpf-cmd-template:: 565 565 Cmdline template. Below lines show its default value. Environment 566 566 variable is used to pass options. 567 - "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \ 568 - -Wno-unused-value -Wno-pointer-sign -working-directory \ 569 - $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -" 567 + "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ 568 + "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ 569 + "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ 570 + "-Wno-unused-value -Wno-pointer-sign " \ 571 + "-working-directory $WORKING_DIR " \ 572 + "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" 570 573 571 574 llvm.clang-opt:: 572 575 Options passed to clang.
+22 -9
tools/perf/Documentation/perf-diff.txt
··· 90 90 91 91 -c:: 92 92 --compute:: 93 - Differential computation selection - delta, ratio, wdiff, delta-abs 94 - (default is delta-abs). Default can be changed using diff.compute 95 - config option. See COMPARISON METHODS section for more info. 93 + Differential computation selection - delta, ratio, wdiff, cycles, 94 + delta-abs (default is delta-abs). Default can be changed using 95 + diff.compute config option. See COMPARISON METHODS section for 96 + more info. 96 97 97 98 -p:: 98 99 --period:: ··· 143 142 perf diff --time 0%-10%,30%-40% 144 143 145 144 It also supports analyzing samples within a given time window 146 - <start>,<stop>. Times have the format seconds.microseconds. If 'start' 147 - is not given (i.e., time string is ',x.y') then analysis starts at 148 - the beginning of the file. If stop time is not given (i.e, time 149 - string is 'x.y,') then analysis goes to the end of the file. Time string is 150 - 'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps for different 151 - perf.data files. 145 + <start>,<stop>. Times have the format seconds.nanoseconds. If 'start' 146 + is not given (i.e. time string is ',x.y') then analysis starts at 147 + the beginning of the file. If stop time is not given (i.e. time 148 + string is 'x.y,') then analysis goes to the end of the file. 149 + Multiple ranges can be separated by spaces, which requires the argument 150 + to be quoted e.g. --time "1234.567,1234.789 1235," 151 + Time string is'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps 152 + for different perf.data files. 152 153 153 154 For example, we get the timestamp information from 'perf script'. 154 155 ··· 280 277 behind ':' separator like '-c wdiff:1,2'. 281 278 - WEIGHT-A being the weight of the data file 282 279 - WEIGHT-B being the weight of the baseline data file 280 + 281 + cycles 282 + ~~~~~~ 283 + If specified the '[Program Block Range] Cycles Diff' column is displayed. 284 + It displays the cycles difference of same program basic block amongst 285 + two perf.data. The program basic block is the code between two branches. 286 + 287 + '[Program Block Range]' indicates the range of a program basic block. 288 + Source line is reported if it can be found otherwise uses symbol+offset 289 + instead. 283 290 284 291 SEE ALSO 285 292 --------
+11
tools/perf/Documentation/perf-record.txt
··· 490 490 --all-user:: 491 491 Configure all used events to run in user space. 492 492 493 + --kernel-callchains:: 494 + Collect callchains only from kernel space. I.e. this option sets 495 + perf_event_attr.exclude_callchain_user to 1. 496 + 497 + --user-callchains:: 498 + Collect callchains only from user space. I.e. this option sets 499 + perf_event_attr.exclude_callchain_kernel to 1. 500 + 501 + Don't use both --kernel-callchains and --user-callchains at the same time or no 502 + callchains will be collected. 503 + 493 504 --timestamp-filename 494 505 Append timestamp to output file name. 495 506
+6 -5
tools/perf/Documentation/perf-report.txt
··· 89 89 - socket: processor socket number the task ran at the time of sample 90 90 - srcline: filename and line number executed at the time of sample. The 91 91 DWARF debugging info must be provided. 92 - - srcfile: file name of the source file of the same. Requires dwarf 92 + - srcfile: file name of the source file of the samples. Requires dwarf 93 93 information. 94 94 - weight: Event specific weight, e.g. memory latency or transaction 95 95 abort cost. This is the global weight. ··· 412 412 413 413 --time:: 414 414 Only analyze samples within given time window: <start>,<stop>. Times 415 - have the format seconds.microseconds. If start is not given (i.e., time 415 + have the format seconds.nanoseconds. If start is not given (i.e. time 416 416 string is ',x.y') then analysis starts at the beginning of the file. If 417 - stop time is not given (i.e, time string is 'x.y,') then analysis goes 418 - to end of file. 417 + stop time is not given (i.e. time string is 'x.y,') then analysis goes 418 + to end of file. Multiple ranges can be separated by spaces, which 419 + requires the argument to be quoted e.g. --time "1234.567,1234.789 1235," 419 420 420 - Also support time percent with multiple time range. Time string is 421 + Also support time percent with multiple time ranges. Time string is 421 422 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. 422 423 423 424 For example:
+12 -5
tools/perf/Documentation/perf-script.txt
··· 117 117 Comma separated list of fields to print. Options are: 118 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 119 119 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, 120 - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode. 120 + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc. 121 121 Field list can be prepended with the type, trace, sw or hw, 122 122 to indicate to which event type the field list applies. 123 123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace ··· 201 201 instruction. 202 202 203 203 The synth field is used by synthesized events which may be created when 204 + Instruction Trace decoding. 205 + 206 + The ipc (instructions per cycle) field is synthesized and may have a value when 204 207 Instruction Trace decoding. 205 208 206 209 Finally, a user may not set fields to none for all event types. ··· 316 313 --show-round-events 317 314 Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND. 318 315 316 + --show-bpf-events 317 + Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT. 318 + 319 319 --demangle:: 320 320 Demangle symbol names to human readable form. It's enabled by default, 321 321 disable with --no-demangle. ··· 361 355 362 356 --time:: 363 357 Only analyze samples within given time window: <start>,<stop>. Times 364 - have the format seconds.microseconds. If start is not given (i.e., time 358 + have the format seconds.nanoseconds. If start is not given (i.e. time 365 359 string is ',x.y') then analysis starts at the beginning of the file. If 366 - stop time is not given (i.e, time string is 'x.y,') then analysis goes 367 - to end of file. 360 + stop time is not given (i.e. time string is 'x.y,') then analysis goes 361 + to end of file. Multiple ranges can be separated by spaces, which 362 + requires the argument to be quoted e.g. --time "1234.567,1234.789 1235," 368 363 369 - Also support time percent with multipe time range. Time string is 364 + Also support time percent with multiple time ranges. Time string is 370 365 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. 371 366 372 367 For example:
+10
tools/perf/Documentation/perf-stat.txt
··· 200 200 socket number and the number of online processors on that socket. This is 201 201 useful to gauge the amount of aggregation. 202 202 203 + --per-die:: 204 + Aggregate counts per processor die for system-wide mode measurements. This 205 + is a useful mode to detect imbalance between dies. To enable this mode, 206 + use --per-die in addition to -a. (system-wide). The output includes the 207 + die number and the number of online processors on that die. This is 208 + useful to gauge the amount of aggregation. 209 + 203 210 --per-core:: 204 211 Aggregate counts per physical processor for system-wide mode measurements. This 205 212 is a useful mode to detect imbalance between physical cores. To enable this mode, ··· 245 238 246 239 --per-socket:: 247 240 Aggregate counts per processor socket for system-wide mode measurements. 241 + 242 + --per-die:: 243 + Aggregate counts per processor die for system-wide mode measurements. 248 244 249 245 --per-core:: 250 246 Aggregate counts per physical processor for system-wide mode measurements.
+5
tools/perf/Documentation/perf-top.txt
··· 262 262 The number of threads to run when synthesizing events for existing processes. 263 263 By default, the number of threads equals to the number of online CPUs. 264 264 265 + --namespaces:: 266 + Record events of type PERF_RECORD_NAMESPACES and display it with the 267 + 'cgroup_id' sort key. 268 + 269 + 265 270 INTERACTIVE PROMPTING KEYS 266 271 -------------------------- 267 272
+90 -7
tools/perf/Documentation/perf.data-file-format.txt
··· 151 151 152 152 HEADER_CPU_TOPOLOGY = 13, 153 153 154 - String lists defining the core and CPU threads topology. 155 - The string lists are followed by a variable length array 156 - which contains core_id and socket_id of each cpu. 157 - The number of entries can be determined by the size of the 158 - section minus the sizes of both string lists. 159 - 160 154 struct { 155 + /* 156 + * First revision of HEADER_CPU_TOPOLOGY 157 + * 158 + * See 'struct perf_header_string_list' definition earlier 159 + * in this file. 160 + */ 161 + 161 162 struct perf_header_string_list cores; /* Variable length */ 162 163 struct perf_header_string_list threads; /* Variable length */ 164 + 165 + /* 166 + * Second revision of HEADER_CPU_TOPOLOGY, older tools 167 + * will not consider what comes next 168 + */ 169 + 163 170 struct { 164 171 uint32_t core_id; 165 172 uint32_t socket_id; 166 173 } cpus[nr]; /* Variable length records */ 174 + /* 'nr' comes from previously processed HEADER_NRCPUS's nr_cpu_avail */ 175 + 176 + /* 177 + * Third revision of HEADER_CPU_TOPOLOGY, older tools 178 + * will not consider what comes next 179 + */ 180 + 181 + struct perf_header_string_list dies; /* Variable length */ 182 + uint32_t die_id[nr_cpus_avail]; /* from previously processed HEADER_NR_CPUS, VLA */ 167 183 }; 168 184 169 185 Example: 170 - sibling cores : 0-3 186 + sibling sockets : 0-8 187 + sibling dies : 0-3 188 + sibling dies : 4-7 171 189 sibling threads : 0-1 172 190 sibling threads : 2-3 191 + sibling threads : 4-5 192 + sibling threads : 6-7 173 193 174 194 HEADER_NUMA_TOPOLOGY = 14, 175 195 ··· 291 271 HEADER_SAMPLE_TIME = 21, 292 272 293 273 Two uint64_t for the time of first sample and the time of last sample. 274 + 275 + HEADER_SAMPLE_TOPOLOGY = 22, 276 + 277 + Physical memory map and its node assignments. 278 + 279 + The format of data in MEM_TOPOLOGY is as follows: 280 + 281 + 0 - version | for future changes 282 + 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes 283 + 16 - count | number of nodes 284 + 285 + For each node we store map of physical indexes: 286 + 287 + 32 - node id | node index 288 + 40 - size | size of bitmap 289 + 48 - bitmap | bitmap of memory indexes that belongs to node 290 + | /sys/devices/system/node/node<NODE>/memory<INDEX> 291 + 292 + The MEM_TOPOLOGY can be displayed with following command: 293 + 294 + $ perf report --header-only -I 295 + ... 296 + # memory nodes (nr 1, block size 0x8000000): 297 + # 0 [7G]: 0-23,32-69 298 + 299 + HEADER_CLOCKID = 23, 300 + 301 + One uint64_t for the clockid frequency, specified, for instance, via 'perf 302 + record -k' (see clock_gettime()), to enable timestamps derived metrics 303 + conversion into wall clock time on the reporting stage. 304 + 305 + HEADER_DIR_FORMAT = 24, 306 + 307 + The data files layout is described by HEADER_DIR_FORMAT feature. Currently it 308 + holds only version number (1): 309 + 310 + uint64_t version; 311 + 312 + The current version holds only version value (1) means that data files: 313 + 314 + - Follow the 'data.*' name format. 315 + 316 + - Contain raw events data in standard perf format as read from kernel (and need 317 + to be sorted) 318 + 319 + Future versions are expected to describe different data files layout according 320 + to special needs. 321 + 322 + HEADER_BPF_PROG_INFO = 25, 323 + 324 + struct bpf_prog_info_linear, which contains detailed information about 325 + a BPF program, including type, id, tag, jited/xlated instructions, etc. 326 + 327 + HEADER_BPF_BTF = 26, 328 + 329 + Contains BPF Type Format (BTF). For more information about BTF, please 330 + refer to Documentation/bpf/btf.rst. 331 + 332 + struct { 333 + u32 id; 334 + u32 data_size; 335 + char data[]; 336 + }; 294 337 295 338 HEADER_COMPRESSED = 27, 296 339
+1 -1
tools/perf/Documentation/tips.txt
··· 38 38 To browse sample contexts use perf report --sample 10 and select in context menu 39 39 To separate samples by time use perf report --sort time,overhead,sym 40 40 To set sample time separation other than 100ms with --sort time use --time-quantum 41 - Add -I to perf report to sample register values visible in perf report context. 41 + Add -I to perf record to sample register values, which will be visible in perf report sample context. 42 42 To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context 43 43 To show context switches in perf report sample context add --switch-events to perf record.
+2
tools/perf/MANIFEST
··· 7 7 tools/lib/api 8 8 tools/lib/bpf 9 9 tools/lib/subcmd 10 + tools/lib/argv_split.c 11 + tools/lib/ctype.c 10 12 tools/lib/hweight.c 11 13 tools/lib/rbtree.c 12 14 tools/lib/string.c
+15 -4
tools/perf/Makefile.config
··· 332 332 CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME 333 333 endif 334 334 335 + ifeq ($(feature-gettid), 1) 336 + CFLAGS += -DHAVE_GETTID 337 + endif 338 + 335 339 ifdef NO_LIBELF 336 340 NO_DWARF := 1 337 341 NO_DEMANGLE := 1 ··· 417 413 $(call feature_check,libopencsd) 418 414 ifeq ($(feature-libopencsd), 1) 419 415 CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) 416 + ifeq ($(feature-reallocarray), 0) 417 + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY 418 + endif 420 419 LDFLAGS += $(LIBOPENCSD_LDFLAGS) 421 420 EXTLIBS += $(OPENCSDLIBS) 422 421 $(call detected,CONFIG_LIBOPENCSD) ··· 644 637 645 638 ifndef NO_SLANG 646 639 ifneq ($(feature-libslang), 1) 647 - msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev); 648 - NO_SLANG := 1 649 - else 640 + ifneq ($(feature-libslang-include-subdir), 1) 641 + msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev); 642 + NO_SLANG := 1 643 + else 644 + CFLAGS += -DHAVE_SLANG_INCLUDE_SUBDIR 645 + endif 646 + endif 647 + ifndef NO_SLANG 650 648 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h 651 - CFLAGS += -I/usr/include/slang 652 649 CFLAGS += -DHAVE_SLANG_SUPPORT 653 650 EXTLIBS += -lslang 654 651 $(call detected,CONFIG_SLANG)
+42 -2
tools/perf/Makefile.perf
··· 420 420 $(fadvise_advice_array): $(linux_uapi_dir)/in.h $(fadvise_advice_tbl) 421 421 $(Q)$(SHELL) '$(fadvise_advice_tbl)' $(linux_uapi_dir) > $@ 422 422 423 + fsmount_arrays := $(beauty_outdir)/fsmount_arrays.c 424 + fsmount_tbls := $(srctree)/tools/perf/trace/beauty/fsmount.sh 425 + 426 + $(fsmount_arrays): $(linux_uapi_dir)/fs.h $(fsmount_tbls) 427 + $(Q)$(SHELL) '$(fsmount_tbls)' $(linux_uapi_dir) > $@ 428 + 429 + fspick_arrays := $(beauty_outdir)/fspick_arrays.c 430 + fspick_tbls := $(srctree)/tools/perf/trace/beauty/fspick.sh 431 + 432 + $(fspick_arrays): $(linux_uapi_dir)/fs.h $(fspick_tbls) 433 + $(Q)$(SHELL) '$(fspick_tbls)' $(linux_uapi_dir) > $@ 434 + 435 + fsconfig_arrays := $(beauty_outdir)/fsconfig_arrays.c 436 + fsconfig_tbls := $(srctree)/tools/perf/trace/beauty/fsconfig.sh 437 + 438 + $(fsconfig_arrays): $(linux_uapi_dir)/fs.h $(fsconfig_tbls) 439 + $(Q)$(SHELL) '$(fsconfig_tbls)' $(linux_uapi_dir) > $@ 440 + 423 441 pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c 424 442 asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/ 425 443 pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh ··· 512 494 $(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl) 513 495 $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@ 514 496 497 + move_mount_flags_array := $(beauty_outdir)/move_mount_flags_array.c 498 + move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh 499 + 500 + $(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl) 501 + $(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@ 502 + 515 503 prctl_option_array := $(beauty_outdir)/prctl_option_array.c 516 504 prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ 517 505 prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh ··· 549 525 550 526 $(arch_errno_name_array): $(arch_errno_tbl) 551 527 $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@ 528 + 529 + sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c 530 + sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh 531 + 532 + $(sync_file_range_arrays): $(linux_uapi_dir)/fs.h $(sync_file_range_tbls) 533 + $(Q)$(SHELL) '$(sync_file_range_tbls)' $(linux_uapi_dir) > $@ 552 534 553 535 all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) 554 536 ··· 659 629 660 630 prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \ 661 631 $(fadvise_advice_array) \ 632 + $(fsconfig_arrays) \ 633 + $(fsmount_arrays) \ 634 + $(fspick_arrays) \ 662 635 $(pkey_alloc_access_rights_array) \ 663 636 $(sndrv_pcm_ioctl_array) \ 664 637 $(sndrv_ctl_ioctl_array) \ ··· 672 639 $(madvise_behavior_array) \ 673 640 $(mmap_flags_array) \ 674 641 $(mount_flags_array) \ 642 + $(move_mount_flags_array) \ 675 643 $(perf_ioctl_array) \ 676 644 $(prctl_option_array) \ 677 645 $(usbdevfs_ioctl_array) \ 678 646 $(x86_arch_prctl_code_array) \ 679 647 $(rename_flags_array) \ 680 - $(arch_errno_name_array) 648 + $(arch_errno_name_array) \ 649 + $(sync_file_range_arrays) 681 650 682 651 $(OUTPUT)%.o: %.c prepare FORCE 683 652 $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ ··· 958 923 $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ 959 924 $(OUTPUT)pmu-events/pmu-events.c \ 960 925 $(OUTPUT)$(fadvise_advice_array) \ 926 + $(OUTPUT)$(fsconfig_arrays) \ 927 + $(OUTPUT)$(fsmount_arrays) \ 928 + $(OUTPUT)$(fspick_arrays) \ 961 929 $(OUTPUT)$(madvise_behavior_array) \ 962 930 $(OUTPUT)$(mmap_flags_array) \ 963 931 $(OUTPUT)$(mount_flags_array) \ 932 + $(OUTPUT)$(move_mount_flags_array) \ 964 933 $(OUTPUT)$(drm_ioctl_array) \ 965 934 $(OUTPUT)$(pkey_alloc_access_rights_array) \ 966 935 $(OUTPUT)$(sndrv_ctl_ioctl_array) \ ··· 978 939 $(OUTPUT)$(usbdevfs_ioctl_array) \ 979 940 $(OUTPUT)$(x86_arch_prctl_code_array) \ 980 941 $(OUTPUT)$(rename_flags_array) \ 981 - $(OUTPUT)$(arch_errno_name_array) 942 + $(OUTPUT)$(arch_errno_name_array) \ 943 + $(OUTPUT)$(sync_file_range_arrays) 982 944 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean 983 945 984 946 #
+287 -23
tools/perf/arch/arm/util/cs-etm.c
··· 22 22 #include "../../util/pmu.h" 23 23 #include "../../util/thread_map.h" 24 24 #include "../../util/cs-etm.h" 25 + #include "../../util/util.h" 25 26 26 27 #include <errno.h> 27 28 #include <stdlib.h> ··· 32 31 struct auxtrace_record itr; 33 32 struct perf_pmu *cs_etm_pmu; 34 33 struct perf_evlist *evlist; 34 + int wrapped_cnt; 35 + bool *wrapped; 35 36 bool snapshot_mode; 36 37 size_t snapshot_size; 37 38 }; 38 39 40 + static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { 41 + [CS_ETM_ETMCCER] = "mgmt/etmccer", 42 + [CS_ETM_ETMIDR] = "mgmt/etmidr", 43 + }; 44 + 45 + static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { 46 + [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", 47 + [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", 48 + [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", 49 + [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", 50 + [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", 51 + }; 52 + 39 53 static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); 54 + 55 + static int cs_etm_set_context_id(struct auxtrace_record *itr, 56 + struct perf_evsel *evsel, int cpu) 57 + { 58 + struct cs_etm_recording *ptr; 59 + struct perf_pmu *cs_etm_pmu; 60 + char path[PATH_MAX]; 61 + int err = -EINVAL; 62 + u32 val; 63 + 64 + ptr = container_of(itr, struct cs_etm_recording, itr); 65 + cs_etm_pmu = ptr->cs_etm_pmu; 66 + 67 + if (!cs_etm_is_etmv4(itr, cpu)) 68 + goto out; 69 + 70 + /* Get a handle on TRCIRD2 */ 71 + snprintf(path, PATH_MAX, "cpu%d/%s", 72 + cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); 73 + err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); 74 + 75 + /* There was a problem reading the file, bailing out */ 76 + if (err != 1) { 77 + pr_err("%s: can't read file %s\n", 78 + CORESIGHT_ETM_PMU_NAME, path); 79 + goto out; 80 + } 81 + 82 + /* 83 + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing 84 + * is supported: 85 + * 0b00000 Context ID tracing is not supported. 86 + * 0b00100 Maximum of 32-bit Context ID size. 87 + * All other values are reserved. 88 + */ 89 + val = BMVAL(val, 5, 9); 90 + if (!val || val != 0x4) { 91 + err = -EINVAL; 92 + goto out; 93 + } 94 + 95 + /* All good, let the kernel know */ 96 + evsel->attr.config |= (1 << ETM_OPT_CTXTID); 97 + err = 0; 98 + 99 + out: 100 + 101 + return err; 102 + } 103 + 104 + static int cs_etm_set_timestamp(struct auxtrace_record *itr, 105 + struct perf_evsel *evsel, int cpu) 106 + { 107 + struct cs_etm_recording *ptr; 108 + struct perf_pmu *cs_etm_pmu; 109 + char path[PATH_MAX]; 110 + int err = -EINVAL; 111 + u32 val; 112 + 113 + ptr = container_of(itr, struct cs_etm_recording, itr); 114 + cs_etm_pmu = ptr->cs_etm_pmu; 115 + 116 + if (!cs_etm_is_etmv4(itr, cpu)) 117 + goto out; 118 + 119 + /* Get a handle on TRCIRD0 */ 120 + snprintf(path, PATH_MAX, "cpu%d/%s", 121 + cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); 122 + err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); 123 + 124 + /* There was a problem reading the file, bailing out */ 125 + if (err != 1) { 126 + pr_err("%s: can't read file %s\n", 127 + CORESIGHT_ETM_PMU_NAME, path); 128 + goto out; 129 + } 130 + 131 + /* 132 + * TRCIDR0.TSSIZE, bit [28-24], indicates whether global timestamping 133 + * is supported: 134 + * 0b00000 Global timestamping is not implemented 135 + * 0b00110 Implementation supports a maximum timestamp of 48bits. 136 + * 0b01000 Implementation supports a maximum timestamp of 64bits. 137 + */ 138 + val &= GENMASK(28, 24); 139 + if (!val) { 140 + err = -EINVAL; 141 + goto out; 142 + } 143 + 144 + /* All good, let the kernel know */ 145 + evsel->attr.config |= (1 << ETM_OPT_TS); 146 + err = 0; 147 + 148 + out: 149 + return err; 150 + } 151 + 152 + static int cs_etm_set_option(struct auxtrace_record *itr, 153 + struct perf_evsel *evsel, u32 option) 154 + { 155 + int i, err = -EINVAL; 156 + struct cpu_map *event_cpus = evsel->evlist->cpus; 157 + struct cpu_map *online_cpus = cpu_map__new(NULL); 158 + 159 + /* Set option of each CPU we have */ 160 + for (i = 0; i < cpu__max_cpu(); i++) { 161 + if (!cpu_map__has(event_cpus, i) || 162 + !cpu_map__has(online_cpus, i)) 163 + continue; 164 + 165 + if (option & ETM_OPT_CTXTID) { 166 + err = cs_etm_set_context_id(itr, evsel, i); 167 + if (err) 168 + goto out; 169 + } 170 + if (option & ETM_OPT_TS) { 171 + err = cs_etm_set_timestamp(itr, evsel, i); 172 + if (err) 173 + goto out; 174 + } 175 + if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS)) 176 + /* Nothing else is currently supported */ 177 + goto out; 178 + } 179 + 180 + err = 0; 181 + out: 182 + cpu_map__put(online_cpus); 183 + return err; 184 + } 40 185 41 186 static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr, 42 187 struct record_opts *opts, ··· 252 105 container_of(itr, struct cs_etm_recording, itr); 253 106 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; 254 107 struct perf_evsel *evsel, *cs_etm_evsel = NULL; 255 - const struct cpu_map *cpus = evlist->cpus; 108 + struct cpu_map *cpus = evlist->cpus; 256 109 bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0); 110 + int err = 0; 257 111 258 112 ptr->evlist = evlist; 259 113 ptr->snapshot_mode = opts->auxtrace_snapshot_mode; 114 + 115 + if (perf_can_record_switch_events()) 116 + opts->record_switch_events = true; 260 117 261 118 evlist__for_each_entry(evlist, evsel) { 262 119 if (evsel->attr.type == cs_etm_pmu->type) { ··· 392 241 393 242 /* 394 243 * In the case of per-cpu mmaps, we need the CPU on the 395 - * AUX event. 244 + * AUX event. We also need the contextID in order to be notified 245 + * when a context switch happened. 396 246 */ 397 - if (!cpu_map__empty(cpus)) 247 + if (!cpu_map__empty(cpus)) { 398 248 perf_evsel__set_sample_bit(cs_etm_evsel, CPU); 249 + 250 + err = cs_etm_set_option(itr, cs_etm_evsel, 251 + ETM_OPT_CTXTID | ETM_OPT_TS); 252 + if (err) 253 + goto out; 254 + } 399 255 400 256 /* Add dummy event to keep tracking */ 401 257 if (opts->full_auxtrace) { 402 258 struct perf_evsel *tracking_evsel; 403 - int err; 404 259 405 260 err = parse_events(evlist, "dummy:u", NULL); 406 261 if (err) 407 - return err; 262 + goto out; 408 263 409 264 tracking_evsel = perf_evlist__last(evlist); 410 265 perf_evlist__set_tracking_event(evlist, tracking_evsel); ··· 423 266 perf_evsel__set_sample_bit(tracking_evsel, TIME); 424 267 } 425 268 426 - return 0; 269 + out: 270 + return err; 427 271 } 428 272 429 273 static u64 cs_etm_get_config(struct auxtrace_record *itr) ··· 472 314 config_opts = cs_etm_get_config(itr); 473 315 if (config_opts & BIT(ETM_OPT_CYCACC)) 474 316 config |= BIT(ETM4_CFG_BIT_CYCACC); 317 + if (config_opts & BIT(ETM_OPT_CTXTID)) 318 + config |= BIT(ETM4_CFG_BIT_CTXTID); 475 319 if (config_opts & BIT(ETM_OPT_TS)) 476 320 config |= BIT(ETM4_CFG_BIT_TS); 477 321 if (config_opts & BIT(ETM_OPT_RETSTK)) ··· 522 362 (etmv4 * CS_ETMV4_PRIV_SIZE) + 523 363 (etmv3 * CS_ETMV3_PRIV_SIZE)); 524 364 } 525 - 526 - static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { 527 - [CS_ETM_ETMCCER] = "mgmt/etmccer", 528 - [CS_ETM_ETMIDR] = "mgmt/etmidr", 529 - }; 530 - 531 - static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { 532 - [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", 533 - [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", 534 - [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", 535 - [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", 536 - [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", 537 - }; 538 365 539 366 static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu) 540 367 { ··· 683 536 return 0; 684 537 } 685 538 686 - static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused, 539 + static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx) 540 + { 541 + bool *wrapped; 542 + int cnt = ptr->wrapped_cnt; 543 + 544 + /* Make @ptr->wrapped as big as @idx */ 545 + while (cnt <= idx) 546 + cnt++; 547 + 548 + /* 549 + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid 550 + * cross compilation problems where the host's system supports 551 + * reallocarray() but not the target. 552 + */ 553 + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool)); 554 + if (!wrapped) 555 + return -ENOMEM; 556 + 557 + wrapped[cnt - 1] = false; 558 + ptr->wrapped_cnt = cnt; 559 + ptr->wrapped = wrapped; 560 + 561 + return 0; 562 + } 563 + 564 + static bool cs_etm_buffer_has_wrapped(unsigned char *buffer, 565 + size_t buffer_size, u64 head) 566 + { 567 + u64 i, watermark; 568 + u64 *buf = (u64 *)buffer; 569 + size_t buf_size = buffer_size; 570 + 571 + /* 572 + * We want to look the very last 512 byte (chosen arbitrarily) in 573 + * the ring buffer. 574 + */ 575 + watermark = buf_size - 512; 576 + 577 + /* 578 + * @head is continuously increasing - if its value is equal or greater 579 + * than the size of the ring buffer, it has wrapped around. 580 + */ 581 + if (head >= buffer_size) 582 + return true; 583 + 584 + /* 585 + * The value of @head is somewhere within the size of the ring buffer. 586 + * This can be that there hasn't been enough data to fill the ring 587 + * buffer yet or the trace time was so long that @head has numerically 588 + * wrapped around. To find we need to check if we have data at the very 589 + * end of the ring buffer. We can reliably do this because mmap'ed 590 + * pages are zeroed out and there is a fresh mapping with every new 591 + * session. 592 + */ 593 + 594 + /* @head is less than 512 byte from the end of the ring buffer */ 595 + if (head > watermark) 596 + watermark = head; 597 + 598 + /* 599 + * Speed things up by using 64 bit transactions (see "u64 *buf" above) 600 + */ 601 + watermark >>= 3; 602 + buf_size >>= 3; 603 + 604 + /* 605 + * If we find trace data at the end of the ring buffer, @head has 606 + * been there and has numerically wrapped around at least once. 607 + */ 608 + for (i = watermark; i < buf_size; i++) 609 + if (buf[i]) 610 + return true; 611 + 612 + return false; 613 + } 614 + 615 + static int cs_etm_find_snapshot(struct auxtrace_record *itr, 687 616 int idx, struct auxtrace_mmap *mm, 688 - unsigned char *data __maybe_unused, 617 + unsigned char *data, 689 618 u64 *head, u64 *old) 690 619 { 620 + int err; 621 + bool wrapped; 622 + struct cs_etm_recording *ptr = 623 + container_of(itr, struct cs_etm_recording, itr); 624 + 625 + /* 626 + * Allocate memory to keep track of wrapping if this is the first 627 + * time we deal with this *mm. 628 + */ 629 + if (idx >= ptr->wrapped_cnt) { 630 + err = cs_etm_alloc_wrapped_array(ptr, idx); 631 + if (err) 632 + return err; 633 + } 634 + 635 + /* 636 + * Check to see if *head has wrapped around. If it hasn't only the 637 + * amount of data between *head and *old is snapshot'ed to avoid 638 + * bloating the perf.data file with zeros. But as soon as *head has 639 + * wrapped around the entire size of the AUX ring buffer it taken. 640 + */ 641 + wrapped = ptr->wrapped[idx]; 642 + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) { 643 + wrapped = true; 644 + ptr->wrapped[idx] = true; 645 + } 646 + 691 647 pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", 692 648 __func__, idx, (size_t)*old, (size_t)*head, mm->len); 693 649 694 - *old = *head; 695 - *head += mm->len; 650 + /* No wrap has occurred, we can just use *head and *old. */ 651 + if (!wrapped) 652 + return 0; 653 + 654 + /* 655 + * *head has wrapped around - adjust *head and *old to pickup the 656 + * entire content of the AUX buffer. 657 + */ 658 + if (*head >= mm->len) { 659 + *old = *head - mm->len; 660 + } else { 661 + *head += mm->len; 662 + *old = *head - mm->len; 663 + } 696 664 697 665 return 0; 698 666 } ··· 848 586 { 849 587 struct cs_etm_recording *ptr = 850 588 container_of(itr, struct cs_etm_recording, itr); 589 + 590 + zfree(&ptr->wrapped); 851 591 free(ptr); 852 592 } 853 593
+1 -1
tools/perf/arch/arm64/Build
··· 1 1 perf-y += util/ 2 - perf-$(CONFIG_DWARF_UNWIND) += tests/ 2 + perf-y += tests/
+1 -1
tools/perf/arch/arm64/tests/Build
··· 1 1 perf-y += regs_load.o 2 - perf-y += dwarf-unwind.o 2 + perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o 3 3 4 4 perf-y += arch-tests.o
+48
tools/perf/arch/csky/annotate/instructions.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. 3 + 4 + #include <linux/compiler.h> 5 + 6 + static struct ins_ops *csky__associate_ins_ops(struct arch *arch, 7 + const char *name) 8 + { 9 + struct ins_ops *ops = NULL; 10 + 11 + /* catch all kind of jumps */ 12 + if (!strcmp(name, "bt") || 13 + !strcmp(name, "bf") || 14 + !strcmp(name, "bez") || 15 + !strcmp(name, "bnez") || 16 + !strcmp(name, "bnezad") || 17 + !strcmp(name, "bhsz") || 18 + !strcmp(name, "bhz") || 19 + !strcmp(name, "blsz") || 20 + !strcmp(name, "blz") || 21 + !strcmp(name, "br") || 22 + !strcmp(name, "jmpi") || 23 + !strcmp(name, "jmp")) 24 + ops = &jump_ops; 25 + 26 + /* catch function call */ 27 + if (!strcmp(name, "bsr") || 28 + !strcmp(name, "jsri") || 29 + !strcmp(name, "jsr")) 30 + ops = &call_ops; 31 + 32 + /* catch function return */ 33 + if (!strcmp(name, "rts")) 34 + ops = &ret_ops; 35 + 36 + if (ops) 37 + arch__associate_ins_ops(arch, name, ops); 38 + return ops; 39 + } 40 + 41 + static int csky__annotate_init(struct arch *arch, char *cpuid __maybe_unused) 42 + { 43 + arch->initialized = true; 44 + arch->objdump.comment_char = '/'; 45 + arch->associate_instruction_ops = csky__associate_ins_ops; 46 + 47 + return 0; 48 + }
+1 -1
tools/perf/arch/s390/util/header.c
··· 11 11 #include <unistd.h> 12 12 #include <stdio.h> 13 13 #include <string.h> 14 - #include <ctype.h> 14 + #include <linux/ctype.h> 15 15 16 16 #include "../../util/header.h" 17 17 #include "../../util/util.h"
+1
tools/perf/arch/x86/include/arch-tests.h
··· 9 9 int test__rdpmc(struct test *test __maybe_unused, int subtest); 10 10 int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); 11 11 int test__insn_x86(struct test *test __maybe_unused, int subtest); 12 + int test__intel_pt_pkt_decoder(struct test *test, int subtest); 12 13 int test__bp_modify(struct test *test, int subtest); 13 14 14 15 #ifdef HAVE_DWARF_UNWIND_SUPPORT
+1 -1
tools/perf/arch/x86/tests/Build
··· 4 4 perf-y += arch-tests.o 5 5 perf-y += rdpmc.o 6 6 perf-y += perf-time-to-tsc.o 7 - perf-$(CONFIG_AUXTRACE) += insn-x86.o 7 + perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o 8 8 perf-$(CONFIG_X86_64) += bp-modify.o
+4
tools/perf/arch/x86/tests/arch-tests.c
··· 23 23 .desc = "x86 instruction decoder - new instructions", 24 24 .func = test__insn_x86, 25 25 }, 26 + { 27 + .desc = "Intel PT packet decoder", 28 + .func = test__intel_pt_pkt_decoder, 29 + }, 26 30 #endif 27 31 #if defined(__x86_64__) 28 32 {
+1
tools/perf/arch/x86/tests/intel-cqm.c
··· 6 6 #include "evlist.h" 7 7 #include "evsel.h" 8 8 #include "arch-tests.h" 9 + #include "util.h" 9 10 10 11 #include <signal.h> 11 12 #include <sys/mman.h>
+304
tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <string.h> 4 + 5 + #include "intel-pt-decoder/intel-pt-pkt-decoder.h" 6 + 7 + #include "debug.h" 8 + #include "tests/tests.h" 9 + #include "arch-tests.h" 10 + 11 + /** 12 + * struct test_data - Test data. 13 + * @len: number of bytes to decode 14 + * @bytes: bytes to decode 15 + * @ctx: packet context to decode 16 + * @packet: expected packet 17 + * @new_ctx: expected new packet context 18 + * @ctx_unchanged: the packet context must not change 19 + */ 20 + struct test_data { 21 + int len; 22 + u8 bytes[INTEL_PT_PKT_MAX_SZ]; 23 + enum intel_pt_pkt_ctx ctx; 24 + struct intel_pt_pkt packet; 25 + enum intel_pt_pkt_ctx new_ctx; 26 + int ctx_unchanged; 27 + } data[] = { 28 + /* Padding Packet */ 29 + {1, {0}, 0, {INTEL_PT_PAD, 0, 0}, 0, 1 }, 30 + /* Short Taken/Not Taken Packet */ 31 + {1, {4}, 0, {INTEL_PT_TNT, 1, 0}, 0, 0 }, 32 + {1, {6}, 0, {INTEL_PT_TNT, 1, 0x20ULL << 58}, 0, 0 }, 33 + {1, {0x80}, 0, {INTEL_PT_TNT, 6, 0}, 0, 0 }, 34 + {1, {0xfe}, 0, {INTEL_PT_TNT, 6, 0x3fULL << 58}, 0, 0 }, 35 + /* Long Taken/Not Taken Packet */ 36 + {8, {0x02, 0xa3, 2}, 0, {INTEL_PT_TNT, 1, 0xa302ULL << 47}, 0, 0 }, 37 + {8, {0x02, 0xa3, 3}, 0, {INTEL_PT_TNT, 1, 0x1a302ULL << 47}, 0, 0 }, 38 + {8, {0x02, 0xa3, 0, 0, 0, 0, 0, 0x80}, 0, {INTEL_PT_TNT, 47, 0xa302ULL << 1}, 0, 0 }, 39 + {8, {0x02, 0xa3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_TNT, 47, 0xffffffffffffa302ULL << 1}, 0, 0 }, 40 + /* Target IP Packet */ 41 + {1, {0x0d}, 0, {INTEL_PT_TIP, 0, 0}, 0, 0 }, 42 + {3, {0x2d, 1, 2}, 0, {INTEL_PT_TIP, 1, 0x201}, 0, 0 }, 43 + {5, {0x4d, 1, 2, 3, 4}, 0, {INTEL_PT_TIP, 2, 0x4030201}, 0, 0 }, 44 + {7, {0x6d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 3, 0x60504030201}, 0, 0 }, 45 + {7, {0x8d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 4, 0x60504030201}, 0, 0 }, 46 + {9, {0xcd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP, 6, 0x807060504030201}, 0, 0 }, 47 + /* Packet Generation Enable */ 48 + {1, {0x11}, 0, {INTEL_PT_TIP_PGE, 0, 0}, 0, 0 }, 49 + {3, {0x31, 1, 2}, 0, {INTEL_PT_TIP_PGE, 1, 0x201}, 0, 0 }, 50 + {5, {0x51, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGE, 2, 0x4030201}, 0, 0 }, 51 + {7, {0x71, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 3, 0x60504030201}, 0, 0 }, 52 + {7, {0x91, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 4, 0x60504030201}, 0, 0 }, 53 + {9, {0xd1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGE, 6, 0x807060504030201}, 0, 0 }, 54 + /* Packet Generation Disable */ 55 + {1, {0x01}, 0, {INTEL_PT_TIP_PGD, 0, 0}, 0, 0 }, 56 + {3, {0x21, 1, 2}, 0, {INTEL_PT_TIP_PGD, 1, 0x201}, 0, 0 }, 57 + {5, {0x41, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGD, 2, 0x4030201}, 0, 0 }, 58 + {7, {0x61, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 3, 0x60504030201}, 0, 0 }, 59 + {7, {0x81, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 4, 0x60504030201}, 0, 0 }, 60 + {9, {0xc1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGD, 6, 0x807060504030201}, 0, 0 }, 61 + /* Flow Update Packet */ 62 + {1, {0x1d}, 0, {INTEL_PT_FUP, 0, 0}, 0, 0 }, 63 + {3, {0x3d, 1, 2}, 0, {INTEL_PT_FUP, 1, 0x201}, 0, 0 }, 64 + {5, {0x5d, 1, 2, 3, 4}, 0, {INTEL_PT_FUP, 2, 0x4030201}, 0, 0 }, 65 + {7, {0x7d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 3, 0x60504030201}, 0, 0 }, 66 + {7, {0x9d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 4, 0x60504030201}, 0, 0 }, 67 + {9, {0xdd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_FUP, 6, 0x807060504030201}, 0, 0 }, 68 + /* Paging Information Packet */ 69 + {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201}, 0, 0 }, 70 + {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201 | (1ULL << 63)}, 0, 0 }, 71 + /* Mode Exec Packet */ 72 + {2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 }, 73 + {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 0, 64}, 0, 0 }, 74 + {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 0, 32}, 0, 0 }, 75 + /* Mode TSX Packet */ 76 + {2, {0x99, 0x20}, 0, {INTEL_PT_MODE_TSX, 0, 0}, 0, 0 }, 77 + {2, {0x99, 0x21}, 0, {INTEL_PT_MODE_TSX, 0, 1}, 0, 0 }, 78 + {2, {0x99, 0x22}, 0, {INTEL_PT_MODE_TSX, 0, 2}, 0, 0 }, 79 + /* Trace Stop Packet */ 80 + {2, {0x02, 0x83}, 0, {INTEL_PT_TRACESTOP, 0, 0}, 0, 0 }, 81 + /* Core:Bus Ratio Packet */ 82 + {4, {0x02, 0x03, 0x12, 0}, 0, {INTEL_PT_CBR, 0, 0x12}, 0, 1 }, 83 + /* Timestamp Counter Packet */ 84 + {8, {0x19, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_TSC, 0, 0x7060504030201}, 0, 1 }, 85 + /* Mini Time Counter Packet */ 86 + {2, {0x59, 0x12}, 0, {INTEL_PT_MTC, 0, 0x12}, 0, 1 }, 87 + /* TSC / MTC Alignment Packet */ 88 + {7, {0x02, 0x73}, 0, {INTEL_PT_TMA, 0, 0}, 0, 1 }, 89 + {7, {0x02, 0x73, 1, 2}, 0, {INTEL_PT_TMA, 0, 0x201}, 0, 1 }, 90 + {7, {0x02, 0x73, 0, 0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0}, 0, 1 }, 91 + {7, {0x02, 0x73, 0x80, 0xc0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0xc080}, 0, 1 }, 92 + /* Cycle Count Packet */ 93 + {1, {0x03}, 0, {INTEL_PT_CYC, 0, 0}, 0, 1 }, 94 + {1, {0x0b}, 0, {INTEL_PT_CYC, 0, 1}, 0, 1 }, 95 + {1, {0xfb}, 0, {INTEL_PT_CYC, 0, 0x1f}, 0, 1 }, 96 + {2, {0x07, 2}, 0, {INTEL_PT_CYC, 0, 0x20}, 0, 1 }, 97 + {2, {0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xfff}, 0, 1 }, 98 + {3, {0x07, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x1000}, 0, 1 }, 99 + {3, {0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7ffff}, 0, 1 }, 100 + {4, {0x07, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x80000}, 0, 1 }, 101 + {4, {0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3ffffff}, 0, 1 }, 102 + {5, {0x07, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x4000000}, 0, 1 }, 103 + {5, {0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1ffffffff}, 0, 1 }, 104 + {6, {0x07, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x200000000}, 0, 1 }, 105 + {6, {0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xffffffffff}, 0, 1 }, 106 + {7, {0x07, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x10000000000}, 0, 1 }, 107 + {7, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7fffffffffff}, 0, 1 }, 108 + {8, {0x07, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x800000000000}, 0, 1 }, 109 + {8, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3fffffffffffff}, 0, 1 }, 110 + {9, {0x07, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x40000000000000}, 0, 1 }, 111 + {9, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1fffffffffffffff}, 0, 1 }, 112 + {10, {0x07, 1, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x2000000000000000}, 0, 1 }, 113 + {10, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe}, 0, {INTEL_PT_CYC, 0, 0xffffffffffffffff}, 0, 1 }, 114 + /* Virtual-Machine Control Structure Packet */ 115 + {7, {0x02, 0xc8, 1, 2, 3, 4, 5}, 0, {INTEL_PT_VMCS, 5, 0x504030201}, 0, 0 }, 116 + /* Overflow Packet */ 117 + {2, {0x02, 0xf3}, 0, {INTEL_PT_OVF, 0, 0}, 0, 0 }, 118 + {2, {0x02, 0xf3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 }, 119 + {2, {0x02, 0xf3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 }, 120 + /* Packet Stream Boundary*/ 121 + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, 0, {INTEL_PT_PSB, 0, 0}, 0, 0 }, 122 + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_4_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 }, 123 + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_8_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 }, 124 + /* PSB End Packet */ 125 + {2, {0x02, 0x23}, 0, {INTEL_PT_PSBEND, 0, 0}, 0, 0 }, 126 + /* Maintenance Packet */ 127 + {11, {0x02, 0xc3, 0x88, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_MNT, 0, 0x7060504030201}, 0, 1 }, 128 + /* Write Data to PT Packet */ 129 + {6, {0x02, 0x12, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE, 0, 0x4030201}, 0, 0 }, 130 + {10, {0x02, 0x32, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE, 1, 0x807060504030201}, 0, 0 }, 131 + {6, {0x02, 0x92, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE_IP, 0, 0x4030201}, 0, 0 }, 132 + {10, {0x02, 0xb2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE_IP, 1, 0x807060504030201}, 0, 0 }, 133 + /* Execution Stop Packet */ 134 + {2, {0x02, 0x62}, 0, {INTEL_PT_EXSTOP, 0, 0}, 0, 1 }, 135 + {2, {0x02, 0xe2}, 0, {INTEL_PT_EXSTOP_IP, 0, 0}, 0, 1 }, 136 + /* Monitor Wait Packet */ 137 + {10, {0x02, 0xc2}, 0, {INTEL_PT_MWAIT, 0, 0}, 0, 0 }, 138 + {10, {0x02, 0xc2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x807060504030201}, 0, 0 }, 139 + {10, {0x02, 0xc2, 0xff, 2, 3, 4, 7, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x8070607040302ff}, 0, 0 }, 140 + /* Power Entry Packet */ 141 + {4, {0x02, 0x22}, 0, {INTEL_PT_PWRE, 0, 0}, 0, 1 }, 142 + {4, {0x02, 0x22, 1, 2}, 0, {INTEL_PT_PWRE, 0, 0x0201}, 0, 1 }, 143 + {4, {0x02, 0x22, 0x80, 0x34}, 0, {INTEL_PT_PWRE, 0, 0x3480}, 0, 1 }, 144 + {4, {0x02, 0x22, 0x00, 0x56}, 0, {INTEL_PT_PWRE, 0, 0x5600}, 0, 1 }, 145 + /* Power Exit Packet */ 146 + {7, {0x02, 0xa2}, 0, {INTEL_PT_PWRX, 0, 0}, 0, 1 }, 147 + {7, {0x02, 0xa2, 1, 2, 3, 4, 5}, 0, {INTEL_PT_PWRX, 0, 0x504030201}, 0, 1 }, 148 + {7, {0x02, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_PWRX, 0, 0xffffffffff}, 0, 1 }, 149 + /* Block Begin Packet */ 150 + {3, {0x02, 0x63, 0x00}, 0, {INTEL_PT_BBP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 }, 151 + {3, {0x02, 0x63, 0x80}, 0, {INTEL_PT_BBP, 1, 0}, INTEL_PT_BLK_4_CTX, 0 }, 152 + {3, {0x02, 0x63, 0x1f}, 0, {INTEL_PT_BBP, 0, 0x1f}, INTEL_PT_BLK_8_CTX, 0 }, 153 + {3, {0x02, 0x63, 0x9f}, 0, {INTEL_PT_BBP, 1, 0x1f}, INTEL_PT_BLK_4_CTX, 0 }, 154 + /* 4-byte Block Item Packet */ 155 + {5, {0x04}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_4_CTX, 0 }, 156 + {5, {0xfc}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_4_CTX, 0 }, 157 + {5, {0x04, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 }, 158 + {5, {0xfc, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 }, 159 + /* 8-byte Block Item Packet */ 160 + {9, {0x04}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 }, 161 + {9, {0xfc}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_8_CTX, 0 }, 162 + {9, {0x04, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 }, 163 + {9, {0xfc, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 }, 164 + /* Block End Packet */ 165 + {2, {0x02, 0x33}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, 166 + {2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, 167 + {2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, 168 + {2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, 169 + /* Terminator */ 170 + {0, {0}, 0, {0, 0, 0}, 0, 0 }, 171 + }; 172 + 173 + static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len) 174 + { 175 + char desc[INTEL_PT_PKT_DESC_MAX]; 176 + int ret, i; 177 + 178 + for (i = 0; i < len; i++) 179 + pr_debug(" %02x", bytes[i]); 180 + for (; i < INTEL_PT_PKT_MAX_SZ; i++) 181 + pr_debug(" "); 182 + pr_debug(" "); 183 + ret = intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX); 184 + if (ret < 0) { 185 + pr_debug("intel_pt_pkt_desc failed!\n"); 186 + return TEST_FAIL; 187 + } 188 + pr_debug("%s\n", desc); 189 + 190 + return TEST_OK; 191 + } 192 + 193 + static void decoding_failed(struct test_data *d) 194 + { 195 + pr_debug("Decoding failed!\n"); 196 + pr_debug("Decoding: "); 197 + dump_packet(&d->packet, d->bytes, d->len); 198 + } 199 + 200 + static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len, 201 + enum intel_pt_pkt_ctx new_ctx) 202 + { 203 + decoding_failed(d); 204 + 205 + if (len != d->len) 206 + pr_debug("Expected length: %d Decoded length %d\n", 207 + d->len, len); 208 + 209 + if (packet->type != d->packet.type) 210 + pr_debug("Expected type: %d Decoded type %d\n", 211 + d->packet.type, packet->type); 212 + 213 + if (packet->count != d->packet.count) 214 + pr_debug("Expected count: %d Decoded count %d\n", 215 + d->packet.count, packet->count); 216 + 217 + if (packet->payload != d->packet.payload) 218 + pr_debug("Expected payload: 0x%llx Decoded payload 0x%llx\n", 219 + (unsigned long long)d->packet.payload, 220 + (unsigned long long)packet->payload); 221 + 222 + if (new_ctx != d->new_ctx) 223 + pr_debug("Expected packet context: %d Decoded packet context %d\n", 224 + d->new_ctx, new_ctx); 225 + 226 + return TEST_FAIL; 227 + } 228 + 229 + static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet, 230 + enum intel_pt_pkt_ctx ctx) 231 + { 232 + enum intel_pt_pkt_ctx old_ctx = ctx; 233 + 234 + intel_pt_upd_pkt_ctx(packet, &ctx); 235 + 236 + if (ctx != old_ctx) { 237 + decoding_failed(d); 238 + pr_debug("Packet context changed!\n"); 239 + return TEST_FAIL; 240 + } 241 + 242 + return TEST_OK; 243 + } 244 + 245 + static int test_one(struct test_data *d) 246 + { 247 + struct intel_pt_pkt packet; 248 + enum intel_pt_pkt_ctx ctx = d->ctx; 249 + int ret; 250 + 251 + memset(&packet, 0xff, sizeof(packet)); 252 + 253 + /* Decode a packet */ 254 + ret = intel_pt_get_packet(d->bytes, d->len, &packet, &ctx); 255 + if (ret < 0 || ret > INTEL_PT_PKT_MAX_SZ) { 256 + decoding_failed(d); 257 + pr_debug("intel_pt_get_packet returned %d\n", ret); 258 + return TEST_FAIL; 259 + } 260 + 261 + /* Some packets must always leave the packet context unchanged */ 262 + if (d->ctx_unchanged) { 263 + int err; 264 + 265 + err = test_ctx_unchanged(d, &packet, INTEL_PT_NO_CTX); 266 + if (err) 267 + return err; 268 + err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_4_CTX); 269 + if (err) 270 + return err; 271 + err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_8_CTX); 272 + if (err) 273 + return err; 274 + } 275 + 276 + /* Compare to the expected values */ 277 + if (ret != d->len || packet.type != d->packet.type || 278 + packet.count != d->packet.count || 279 + packet.payload != d->packet.payload || ctx != d->new_ctx) 280 + return fail(d, &packet, ret, ctx); 281 + 282 + pr_debug("Decoded ok:"); 283 + ret = dump_packet(&d->packet, d->bytes, d->len); 284 + 285 + return ret; 286 + } 287 + 288 + /* 289 + * This test feeds byte sequences to the Intel PT packet decoder and checks the 290 + * results. Changes to the packet context are also checked. 291 + */ 292 + int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused) 293 + { 294 + struct test_data *d = data; 295 + int ret; 296 + 297 + for (d = data; d->len; d++) { 298 + ret = test_one(d); 299 + if (ret) 300 + return ret; 301 + } 302 + 303 + return TEST_OK; 304 + }
+1
tools/perf/arch/x86/util/intel-pt.c
··· 25 25 #include "../../util/auxtrace.h" 26 26 #include "../../util/tsc.h" 27 27 #include "../../util/intel-pt.h" 28 + #include "../../util/util.h" 28 29 29 30 #define KiB(x) ((x) * 1024) 30 31 #define MiB(x) ((x) * 1024 * 1024)
+2 -1
tools/perf/arch/x86/util/machine.c
··· 3 3 #include <linux/string.h> 4 4 #include <stdlib.h> 5 5 6 + #include "../../util/util.h" 6 7 #include "../../util/machine.h" 7 8 #include "../../util/map.h" 8 9 #include "../../util/symbol.h" 9 - #include "../../util/sane_ctype.h" 10 + #include <linux/ctype.h> 10 11 11 12 #include <symbol/kallsyms.h> 12 13
+377 -5
tools/perf/builtin-diff.c
··· 20 20 #include "util/data.h" 21 21 #include "util/config.h" 22 22 #include "util/time-utils.h" 23 + #include "util/annotate.h" 24 + #include "util/map.h" 23 25 24 26 #include <errno.h> 25 27 #include <inttypes.h> ··· 34 32 struct perf_time_interval *ptime_range; 35 33 int range_size; 36 34 int range_num; 35 + bool has_br_stack; 37 36 }; 38 37 39 38 /* Diff command specific HPP columns. */ ··· 47 44 PERF_HPP_DIFF__WEIGHTED_DIFF, 48 45 PERF_HPP_DIFF__FORMULA, 49 46 PERF_HPP_DIFF__DELTA_ABS, 47 + PERF_HPP_DIFF__CYCLES, 50 48 51 49 PERF_HPP_DIFF__MAX_INDEX 52 50 }; ··· 90 86 static const char *cpu_list; 91 87 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 92 88 89 + static struct addr_location dummy_al; 90 + 93 91 enum { 94 92 COMPUTE_DELTA, 95 93 COMPUTE_RATIO, 96 94 COMPUTE_WEIGHTED_DIFF, 97 95 COMPUTE_DELTA_ABS, 96 + COMPUTE_CYCLES, 98 97 COMPUTE_MAX, 99 98 }; 100 99 ··· 106 99 [COMPUTE_DELTA_ABS] = "delta-abs", 107 100 [COMPUTE_RATIO] = "ratio", 108 101 [COMPUTE_WEIGHTED_DIFF] = "wdiff", 102 + [COMPUTE_CYCLES] = "cycles", 109 103 }; 110 104 111 105 static int compute = COMPUTE_DELTA_ABS; ··· 116 108 [COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS, 117 109 [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO, 118 110 [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF, 111 + [COMPUTE_CYCLES] = PERF_HPP_DIFF__CYCLES, 119 112 }; 120 113 121 114 #define MAX_COL_WIDTH 70 ··· 155 146 [PERF_HPP_DIFF__FORMULA] = { 156 147 .name = "Formula", 157 148 .width = MAX_COL_WIDTH, 149 + }, 150 + [PERF_HPP_DIFF__CYCLES] = { 151 + .name = "[Program Block Range] Cycles Diff", 152 + .width = 70, 158 153 } 159 154 }; 160 155 ··· 348 335 return -1; 349 336 } 350 337 338 + static void *block_hist_zalloc(size_t size) 339 + { 340 + struct block_hist *bh; 341 + 342 + bh = zalloc(size + sizeof(*bh)); 343 + if (!bh) 344 + return NULL; 345 + 346 + return &bh->he; 347 + } 348 + 349 + static void block_hist_free(void *he) 350 + { 351 + struct block_hist *bh; 352 + 353 + bh = container_of(he, struct block_hist, he); 354 + hists__delete_entries(&bh->block_hists); 355 + free(bh); 356 + } 357 + 358 + struct hist_entry_ops block_hist_ops = { 359 + .new = block_hist_zalloc, 360 + .free = block_hist_free, 361 + }; 362 + 351 363 static int diff__process_sample_event(struct perf_tool *tool, 352 364 union perf_event *event, 353 365 struct perf_sample *sample, ··· 400 362 goto out_put; 401 363 } 402 364 403 - if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) { 404 - pr_warning("problem incrementing symbol period, skipping event\n"); 405 - goto out_put; 365 + if (compute != COMPUTE_CYCLES) { 366 + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, 367 + true)) { 368 + pr_warning("problem incrementing symbol period, " 369 + "skipping event\n"); 370 + goto out_put; 371 + } 372 + } else { 373 + if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL, 374 + NULL, NULL, sample, true)) { 375 + pr_warning("problem incrementing symbol period, " 376 + "skipping event\n"); 377 + goto out_put; 378 + } 379 + 380 + hist__account_cycles(sample->branch_stack, &al, sample, false); 406 381 } 407 382 408 383 /* ··· 525 474 } 526 475 } 527 476 477 + static int64_t block_cmp(struct perf_hpp_fmt *fmt __maybe_unused, 478 + struct hist_entry *left, struct hist_entry *right) 479 + { 480 + struct block_info *bi_l = left->block_info; 481 + struct block_info *bi_r = right->block_info; 482 + int cmp; 483 + 484 + if (!bi_l->sym || !bi_r->sym) { 485 + if (!bi_l->sym && !bi_r->sym) 486 + return 0; 487 + else if (!bi_l->sym) 488 + return -1; 489 + else 490 + return 1; 491 + } 492 + 493 + if (bi_l->sym == bi_r->sym) { 494 + if (bi_l->start == bi_r->start) { 495 + if (bi_l->end == bi_r->end) 496 + return 0; 497 + else 498 + return (int64_t)(bi_r->end - bi_l->end); 499 + } else 500 + return (int64_t)(bi_r->start - bi_l->start); 501 + } else { 502 + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); 503 + return cmp; 504 + } 505 + 506 + if (bi_l->sym->start != bi_r->sym->start) 507 + return (int64_t)(bi_r->sym->start - bi_l->sym->start); 508 + 509 + return (int64_t)(bi_r->sym->end - bi_l->sym->end); 510 + } 511 + 512 + static int64_t block_cycles_diff_cmp(struct hist_entry *left, 513 + struct hist_entry *right) 514 + { 515 + bool pairs_left = hist_entry__has_pairs(left); 516 + bool pairs_right = hist_entry__has_pairs(right); 517 + s64 l, r; 518 + 519 + if (!pairs_left && !pairs_right) 520 + return 0; 521 + 522 + l = labs(left->diff.cycles); 523 + r = labs(right->diff.cycles); 524 + return r - l; 525 + } 526 + 527 + static int64_t block_sort(struct perf_hpp_fmt *fmt __maybe_unused, 528 + struct hist_entry *left, struct hist_entry *right) 529 + { 530 + return block_cycles_diff_cmp(right, left); 531 + } 532 + 533 + static void init_block_hist(struct block_hist *bh) 534 + { 535 + __hists__init(&bh->block_hists, &bh->block_list); 536 + perf_hpp_list__init(&bh->block_list); 537 + 538 + INIT_LIST_HEAD(&bh->block_fmt.list); 539 + INIT_LIST_HEAD(&bh->block_fmt.sort_list); 540 + bh->block_fmt.cmp = block_cmp; 541 + bh->block_fmt.sort = block_sort; 542 + perf_hpp_list__register_sort_field(&bh->block_list, 543 + &bh->block_fmt); 544 + bh->valid = true; 545 + } 546 + 547 + static void init_block_info(struct block_info *bi, struct symbol *sym, 548 + struct cyc_hist *ch, int offset) 549 + { 550 + bi->sym = sym; 551 + bi->start = ch->start; 552 + bi->end = offset; 553 + bi->cycles = ch->cycles; 554 + bi->cycles_aggr = ch->cycles_aggr; 555 + bi->num = ch->num; 556 + bi->num_aggr = ch->num_aggr; 557 + } 558 + 559 + static int process_block_per_sym(struct hist_entry *he) 560 + { 561 + struct annotation *notes; 562 + struct cyc_hist *ch; 563 + struct block_hist *bh; 564 + 565 + if (!he->ms.map || !he->ms.sym) 566 + return 0; 567 + 568 + notes = symbol__annotation(he->ms.sym); 569 + if (!notes || !notes->src || !notes->src->cycles_hist) 570 + return 0; 571 + 572 + bh = container_of(he, struct block_hist, he); 573 + init_block_hist(bh); 574 + 575 + ch = notes->src->cycles_hist; 576 + for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { 577 + if (ch[i].num_aggr) { 578 + struct block_info *bi; 579 + struct hist_entry *he_block; 580 + 581 + bi = block_info__new(); 582 + if (!bi) 583 + return -1; 584 + 585 + init_block_info(bi, he->ms.sym, &ch[i], i); 586 + he_block = hists__add_entry_block(&bh->block_hists, 587 + &dummy_al, bi); 588 + if (!he_block) { 589 + block_info__put(bi); 590 + return -1; 591 + } 592 + } 593 + } 594 + 595 + return 0; 596 + } 597 + 598 + static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) 599 + { 600 + struct block_info *bi_a = a->block_info; 601 + struct block_info *bi_b = b->block_info; 602 + int cmp; 603 + 604 + if (!bi_a->sym || !bi_b->sym) 605 + return -1; 606 + 607 + cmp = strcmp(bi_a->sym->name, bi_b->sym->name); 608 + 609 + if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) 610 + return 0; 611 + 612 + return -1; 613 + } 614 + 615 + static struct hist_entry *get_block_pair(struct hist_entry *he, 616 + struct hists *hists_pair) 617 + { 618 + struct rb_root_cached *root = hists_pair->entries_in; 619 + struct rb_node *next = rb_first_cached(root); 620 + int cmp; 621 + 622 + while (next != NULL) { 623 + struct hist_entry *he_pair = rb_entry(next, struct hist_entry, 624 + rb_node_in); 625 + 626 + next = rb_next(&he_pair->rb_node_in); 627 + 628 + cmp = block_pair_cmp(he_pair, he); 629 + if (!cmp) 630 + return he_pair; 631 + } 632 + 633 + return NULL; 634 + } 635 + 636 + static void compute_cycles_diff(struct hist_entry *he, 637 + struct hist_entry *pair) 638 + { 639 + pair->diff.computed = true; 640 + if (pair->block_info->num && he->block_info->num) { 641 + pair->diff.cycles = 642 + pair->block_info->cycles_aggr / pair->block_info->num_aggr - 643 + he->block_info->cycles_aggr / he->block_info->num_aggr; 644 + } 645 + } 646 + 647 + static void block_hists_match(struct hists *hists_base, 648 + struct hists *hists_pair) 649 + { 650 + struct rb_root_cached *root = hists_base->entries_in; 651 + struct rb_node *next = rb_first_cached(root); 652 + 653 + while (next != NULL) { 654 + struct hist_entry *he = rb_entry(next, struct hist_entry, 655 + rb_node_in); 656 + struct hist_entry *pair = get_block_pair(he, hists_pair); 657 + 658 + next = rb_next(&he->rb_node_in); 659 + 660 + if (pair) { 661 + hist_entry__add_pair(pair, he); 662 + compute_cycles_diff(he, pair); 663 + } 664 + } 665 + } 666 + 667 + static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) 668 + { 669 + /* Skip the calculation of column length in output_resort */ 670 + he->filtered = true; 671 + return 0; 672 + } 673 + 528 674 static void hists__precompute(struct hists *hists) 529 675 { 530 676 struct rb_root_cached *root; ··· 734 486 735 487 next = rb_first_cached(root); 736 488 while (next != NULL) { 489 + struct block_hist *bh, *pair_bh; 737 490 struct hist_entry *he, *pair; 738 491 struct data__file *d; 739 492 int i; 740 493 741 494 he = rb_entry(next, struct hist_entry, rb_node_in); 742 495 next = rb_next(&he->rb_node_in); 496 + 497 + if (compute == COMPUTE_CYCLES) 498 + process_block_per_sym(he); 743 499 744 500 data__for_each_file_new(i, d) { 745 501 pair = get_pair_data(he, d); ··· 760 508 break; 761 509 case COMPUTE_WEIGHTED_DIFF: 762 510 compute_wdiff(he, pair); 511 + break; 512 + case COMPUTE_CYCLES: 513 + process_block_per_sym(pair); 514 + bh = container_of(he, struct block_hist, he); 515 + pair_bh = container_of(pair, struct block_hist, 516 + he); 517 + 518 + if (bh->valid && pair_bh->valid) { 519 + block_hists_match(&bh->block_hists, 520 + &pair_bh->block_hists); 521 + hists__output_resort_cb(&pair_bh->block_hists, 522 + NULL, filter_cb); 523 + } 763 524 break; 764 525 default: 765 526 BUG_ON(1); ··· 985 720 hists__precompute(hists); 986 721 hists__output_resort(hists, NULL); 987 722 723 + if (compute == COMPUTE_CYCLES) 724 + symbol_conf.report_block = true; 725 + 988 726 hists__fprintf(hists, !quiet, 0, 0, 0, stdout, 989 727 !symbol_conf.use_callchain); 990 728 } ··· 1141 873 return ret; 1142 874 } 1143 875 876 + static int check_file_brstack(void) 877 + { 878 + struct data__file *d; 879 + bool has_br_stack; 880 + int i; 881 + 882 + data__for_each_file(i, d) { 883 + d->session = perf_session__new(&d->data, false, &pdiff.tool); 884 + if (!d->session) { 885 + pr_err("Failed to open %s\n", d->data.path); 886 + return -1; 887 + } 888 + 889 + has_br_stack = perf_header__has_feat(&d->session->header, 890 + HEADER_BRANCH_STACK); 891 + perf_session__delete(d->session); 892 + if (!has_br_stack) 893 + return 0; 894 + } 895 + 896 + /* Set only all files having branch stacks */ 897 + pdiff.has_br_stack = true; 898 + return 0; 899 + } 900 + 1144 901 static int __cmd_diff(void) 1145 902 { 1146 903 struct data__file *d; ··· 1243 950 OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, 1244 951 "Show only items with match in baseline"), 1245 952 OPT_CALLBACK('c', "compute", &compute, 1246 - "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)", 953 + "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs),cycles", 1247 954 "Entries differential computation selection", 1248 955 setup_compute), 1249 956 OPT_BOOLEAN('p', "period", &show_period, ··· 1321 1028 return ret; 1322 1029 } 1323 1030 1031 + static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, 1032 + struct perf_hpp *hpp, int width) 1033 + { 1034 + struct block_hist *bh = container_of(he, struct block_hist, he); 1035 + struct block_hist *bh_pair = container_of(pair, struct block_hist, he); 1036 + struct hist_entry *block_he; 1037 + struct block_info *bi; 1038 + char buf[128]; 1039 + char *start_line, *end_line; 1040 + 1041 + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); 1042 + if (!block_he) { 1043 + hpp->skip = true; 1044 + return 0; 1045 + } 1046 + 1047 + /* 1048 + * Avoid printing the warning "addr2line_init failed for ..." 1049 + */ 1050 + symbol_conf.disable_add2line_warn = true; 1051 + 1052 + bi = block_he->block_info; 1053 + 1054 + start_line = map__srcline(he->ms.map, bi->sym->start + bi->start, 1055 + he->ms.sym); 1056 + 1057 + end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, 1058 + he->ms.sym); 1059 + 1060 + if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { 1061 + scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", 1062 + start_line, end_line, block_he->diff.cycles); 1063 + } else { 1064 + scnprintf(buf, sizeof(buf), "[%7lx -> %7lx] %4ld", 1065 + bi->start, bi->end, block_he->diff.cycles); 1066 + } 1067 + 1068 + free_srcline(start_line); 1069 + free_srcline(end_line); 1070 + 1071 + return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); 1072 + } 1073 + 1324 1074 static int __hpp__color_compare(struct perf_hpp_fmt *fmt, 1325 1075 struct perf_hpp *hpp, struct hist_entry *he, 1326 1076 int comparison_method) ··· 1375 1039 s64 wdiff; 1376 1040 char pfmt[20] = " "; 1377 1041 1378 - if (!pair) 1042 + if (!pair) { 1043 + if (comparison_method == COMPUTE_CYCLES) { 1044 + struct block_hist *bh; 1045 + 1046 + bh = container_of(he, struct block_hist, he); 1047 + if (bh->block_idx) 1048 + hpp->skip = true; 1049 + } 1050 + 1379 1051 goto no_print; 1052 + } 1380 1053 1381 1054 switch (comparison_method) { 1382 1055 case COMPUTE_DELTA: ··· 1420 1075 return color_snprintf(hpp->buf, hpp->size, 1421 1076 get_percent_color(wdiff), 1422 1077 pfmt, wdiff); 1078 + case COMPUTE_CYCLES: 1079 + return cycles_printf(he, pair, hpp, dfmt->header_width); 1423 1080 default: 1424 1081 BUG_ON(1); 1425 1082 } ··· 1449 1102 struct perf_hpp *hpp, struct hist_entry *he) 1450 1103 { 1451 1104 return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF); 1105 + } 1106 + 1107 + static int hpp__color_cycles(struct perf_hpp_fmt *fmt, 1108 + struct perf_hpp *hpp, struct hist_entry *he) 1109 + { 1110 + return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); 1452 1111 } 1453 1112 1454 1113 static void ··· 1658 1305 fmt->color = hpp__color_delta; 1659 1306 fmt->sort = hist_entry__cmp_delta_abs; 1660 1307 break; 1308 + case PERF_HPP_DIFF__CYCLES: 1309 + fmt->color = hpp__color_cycles; 1310 + fmt->sort = hist_entry__cmp_nop; 1311 + break; 1661 1312 default: 1662 1313 fmt->sort = hist_entry__cmp_nop; 1663 1314 break; ··· 1741 1384 break; 1742 1385 case COMPUTE_DELTA_ABS: 1743 1386 fmt->sort = hist_entry__cmp_delta_abs_idx; 1387 + break; 1388 + case COMPUTE_CYCLES: 1389 + /* 1390 + * Should set since 'fmt->sort' is called without 1391 + * checking valid during sorting 1392 + */ 1393 + fmt->sort = hist_entry__cmp_nop; 1744 1394 break; 1745 1395 default: 1746 1396 BUG_ON(1); ··· 1845 1481 if (quiet) 1846 1482 perf_quiet_option(); 1847 1483 1484 + symbol__annotation_init(); 1485 + 1848 1486 if (symbol__init(NULL) < 0) 1849 1487 return -1; 1850 1488 1851 1489 if (data_init(argc, argv) < 0) 1490 + return -1; 1491 + 1492 + if (check_file_brstack() < 0) 1493 + return -1; 1494 + 1495 + if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack) 1852 1496 return -1; 1853 1497 1854 1498 if (ui_init() < 0)
+2 -1
tools/perf/builtin-kmem.c
··· 21 21 #include "util/cpumap.h" 22 22 23 23 #include "util/debug.h" 24 + #include "util/string2.h" 24 25 25 26 #include <linux/kernel.h> 26 27 #include <linux/rbtree.h> ··· 31 30 #include <locale.h> 32 31 #include <regex.h> 33 32 34 - #include "sane_ctype.h" 33 + #include <linux/ctype.h> 35 34 36 35 static int kmem_slab; 37 36 static int kmem_page;
+4
tools/perf/builtin-record.c
··· 2191 2191 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2192 2192 "Configure all used events to run in user space.", 2193 2193 PARSE_OPT_EXCLUSIVE), 2194 + OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 2195 + "collect kernel callchains"), 2196 + OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 2197 + "collect user callchains"), 2194 2198 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2195 2199 "clang binary to use for compiling BPF scriptlets"), 2196 2200 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
+9 -4
tools/perf/builtin-report.c
··· 47 47 #include <errno.h> 48 48 #include <inttypes.h> 49 49 #include <regex.h> 50 - #include "sane_ctype.h" 50 + #include <linux/ctype.h> 51 51 #include <signal.h> 52 52 #include <linux/bitmap.h> 53 53 #include <linux/stringify.h> ··· 941 941 pr_err("time quantum cannot be 0"); 942 942 return -1; 943 943 } 944 - while (isspace(*end)) 945 - end++; 944 + end = skip_spaces(end); 946 945 if (*end == 0) 947 946 return 0; 948 947 if (!strcmp(end, "s")) { ··· 1427 1428 &report.range_num); 1428 1429 if (ret < 0) 1429 1430 goto error; 1431 + 1432 + itrace_synth_opts__set_time_range(&itrace_synth_opts, 1433 + report.ptime_range, 1434 + report.range_num); 1430 1435 } 1431 1436 1432 1437 if (session->tevent.pevent && ··· 1452 1449 ret = 0; 1453 1450 1454 1451 error: 1455 - if (report.ptime_range) 1452 + if (report.ptime_range) { 1453 + itrace_synth_opts__clear_time_range(&itrace_synth_opts); 1456 1454 zfree(&report.ptime_range); 1455 + } 1457 1456 zstd_fini(&(session->zstd_data)); 1458 1457 perf_session__delete(session); 1459 1458 return ret;
+2 -1
tools/perf/builtin-sched.c
··· 15 15 #include "util/thread_map.h" 16 16 #include "util/color.h" 17 17 #include "util/stat.h" 18 + #include "util/string2.h" 18 19 #include "util/callchain.h" 19 20 #include "util/time-utils.h" 20 21 ··· 37 36 #include <api/fs/fs.h> 38 37 #include <linux/time64.h> 39 38 40 - #include "sane_ctype.h" 39 + #include <linux/ctype.h> 41 40 42 41 #define PR_SET_NAME 15 /* Set process name */ 43 42 #define MAX_CPUS 4096
+98 -9
tools/perf/builtin-script.c
··· 49 49 #include <unistd.h> 50 50 #include <subcmd/pager.h> 51 51 52 - #include "sane_ctype.h" 52 + #include <linux/ctype.h> 53 53 54 54 static char const *script_name; 55 55 static char const *generate_script_lang; ··· 102 102 PERF_OUTPUT_METRIC = 1U << 28, 103 103 PERF_OUTPUT_MISC = 1U << 29, 104 104 PERF_OUTPUT_SRCCODE = 1U << 30, 105 + PERF_OUTPUT_IPC = 1U << 31, 105 106 }; 106 107 107 108 struct output_option { ··· 140 139 {.str = "metric", .field = PERF_OUTPUT_METRIC}, 141 140 {.str = "misc", .field = PERF_OUTPUT_MISC}, 142 141 {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, 142 + {.str = "ipc", .field = PERF_OUTPUT_IPC}, 143 143 }; 144 144 145 145 enum { ··· 1270 1268 return printed; 1271 1269 } 1272 1270 1271 + static int perf_sample__fprintf_ipc(struct perf_sample *sample, 1272 + struct perf_event_attr *attr, FILE *fp) 1273 + { 1274 + unsigned int ipc; 1275 + 1276 + if (!PRINT_FIELD(IPC) || !sample->cyc_cnt || !sample->insn_cnt) 1277 + return 0; 1278 + 1279 + ipc = (sample->insn_cnt * 100) / sample->cyc_cnt; 1280 + 1281 + return fprintf(fp, " \t IPC: %u.%02u (%" PRIu64 "/%" PRIu64 ") ", 1282 + ipc / 100, ipc % 100, sample->insn_cnt, sample->cyc_cnt); 1283 + } 1284 + 1273 1285 static int perf_sample__fprintf_bts(struct perf_sample *sample, 1274 1286 struct perf_evsel *evsel, 1275 1287 struct thread *thread, ··· 1327 1311 printed += fprintf(fp, " => "); 1328 1312 printed += perf_sample__fprintf_addr(sample, thread, attr, fp); 1329 1313 } 1314 + 1315 + printed += perf_sample__fprintf_ipc(sample, attr, fp); 1330 1316 1331 1317 if (print_srcline_last) 1332 1318 printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); ··· 1624 1606 bool show_namespace_events; 1625 1607 bool show_lost_events; 1626 1608 bool show_round_events; 1609 + bool show_bpf_events; 1627 1610 bool allocated; 1628 1611 bool per_event_dump; 1629 1612 struct cpu_map *cpus; ··· 1877 1858 1878 1859 if (PRINT_FIELD(PHYS_ADDR)) 1879 1860 fprintf(fp, "%16" PRIx64, sample->phys_addr); 1861 + 1862 + perf_sample__fprintf_ipc(sample, attr, fp); 1863 + 1880 1864 fprintf(fp, "\n"); 1881 1865 1882 1866 if (PRINT_FIELD(SRCCODE)) { ··· 2340 2318 return 0; 2341 2319 } 2342 2320 2321 + static int 2322 + process_bpf_events(struct perf_tool *tool __maybe_unused, 2323 + union perf_event *event, 2324 + struct perf_sample *sample, 2325 + struct machine *machine) 2326 + { 2327 + struct thread *thread; 2328 + struct perf_script *script = container_of(tool, struct perf_script, tool); 2329 + struct perf_session *session = script->session; 2330 + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); 2331 + 2332 + if (machine__process_ksymbol(machine, event, sample) < 0) 2333 + return -1; 2334 + 2335 + if (!evsel->attr.sample_id_all) { 2336 + perf_event__fprintf(event, stdout); 2337 + return 0; 2338 + } 2339 + 2340 + thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2341 + if (thread == NULL) { 2342 + pr_debug("problem processing MMAP event, skipping it.\n"); 2343 + return -1; 2344 + } 2345 + 2346 + if (!filter_cpu(sample)) { 2347 + perf_sample__fprintf_start(sample, thread, evsel, 2348 + event->header.type, stdout); 2349 + perf_event__fprintf(event, stdout); 2350 + } 2351 + 2352 + thread__put(thread); 2353 + return 0; 2354 + } 2355 + 2343 2356 static void sig_handler(int sig __maybe_unused) 2344 2357 { 2345 2358 session_done = 1; ··· 2476 2419 if (script->show_round_events) { 2477 2420 script->tool.ordered_events = false; 2478 2421 script->tool.finished_round = process_finished_round_event; 2422 + } 2423 + if (script->show_bpf_events) { 2424 + script->tool.ksymbol = process_bpf_events; 2425 + script->tool.bpf_event = process_bpf_events; 2479 2426 } 2480 2427 2481 2428 if (perf_script__setup_per_event_dump(script)) { ··· 2880 2819 return -1; 2881 2820 2882 2821 while (fgets(line, sizeof(line), fp)) { 2883 - p = ltrim(line); 2822 + p = skip_spaces(line); 2884 2823 if (strlen(p) == 0) 2885 2824 continue; 2886 2825 if (*p != '#') ··· 2889 2828 if (strlen(p) && *p == '!') 2890 2829 continue; 2891 2830 2892 - p = ltrim(p); 2831 + p = skip_spaces(p); 2893 2832 if (strlen(p) && p[strlen(p) - 1] == '\n') 2894 2833 p[strlen(p) - 1] = '\0'; 2895 2834 2896 2835 if (!strncmp(p, "description:", strlen("description:"))) { 2897 2836 p += strlen("description:"); 2898 - desc->half_liner = strdup(ltrim(p)); 2837 + desc->half_liner = strdup(skip_spaces(p)); 2899 2838 continue; 2900 2839 } 2901 2840 2902 2841 if (!strncmp(p, "args:", strlen("args:"))) { 2903 2842 p += strlen("args:"); 2904 - desc->args = strdup(ltrim(p)); 2843 + desc->args = strdup(skip_spaces(p)); 2905 2844 continue; 2906 2845 } 2907 2846 } ··· 3008 2947 return -1; 3009 2948 3010 2949 while (fgets(line, sizeof(line), fp)) { 3011 - p = ltrim(line); 2950 + p = skip_spaces(line); 3012 2951 if (*p == '#') 3013 2952 continue; 3014 2953 ··· 3018 2957 break; 3019 2958 3020 2959 p += 2; 3021 - p = ltrim(p); 2960 + p = skip_spaces(p); 3022 2961 len = strcspn(p, " \t"); 3023 2962 if (!len) 3024 2963 break; ··· 3358 3297 parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); 3359 3298 itrace_parse_synth_opts(opt, "cewp", 0); 3360 3299 symbol_conf.nanosecs = true; 3300 + symbol_conf.pad_output_len_dso = 50; 3361 3301 return 0; 3362 3302 } 3363 3303 ··· 3454 3392 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 3455 3393 "addr,symoff,srcline,period,iregs,uregs,brstack," 3456 3394 "brstacksym,flags,bpf-output,brstackinsn,brstackoff," 3457 - "callindent,insn,insnlen,synth,phys_addr,metric,misc", 3395 + "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc", 3458 3396 parse_output_fields), 3459 3397 OPT_BOOLEAN('a', "all-cpus", &system_wide, 3460 3398 "system-wide collection from all CPUs"), ··· 3500 3438 "Show lost events (if recorded)"), 3501 3439 OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events, 3502 3440 "Show round events (if recorded)"), 3441 + OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events, 3442 + "Show bpf related events (if recorded)"), 3503 3443 OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, 3504 3444 "Dump trace output to files named by the monitored events"), 3505 3445 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), ··· 3522 3458 "Time span of interest (start,stop)"), 3523 3459 OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, 3524 3460 "Show inline function"), 3461 + OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 3462 + "guest mount directory under which every guest os" 3463 + " instance has a subdir"), 3464 + OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name, 3465 + "file", "file saving guest os vmlinux"), 3466 + OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms, 3467 + "file", "file saving guest os /proc/kallsyms"), 3468 + OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, 3469 + "file", "file saving guest os /proc/modules"), 3525 3470 OPT_END() 3526 3471 }; 3527 3472 const char * const script_subcommands[] = { "record", "report", NULL }; ··· 3549 3476 3550 3477 argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, 3551 3478 PARSE_OPT_STOP_AT_NON_OPTION); 3479 + 3480 + if (symbol_conf.guestmount || 3481 + symbol_conf.default_guest_vmlinux_name || 3482 + symbol_conf.default_guest_kallsyms || 3483 + symbol_conf.default_guest_modules) { 3484 + /* 3485 + * Enable guest sample processing. 3486 + */ 3487 + perf_guest = true; 3488 + } 3552 3489 3553 3490 data.path = input_name; 3554 3491 data.force = symbol_conf.force; ··· 3848 3765 &script.range_num); 3849 3766 if (err < 0) 3850 3767 goto out_delete; 3768 + 3769 + itrace_synth_opts__set_time_range(&itrace_synth_opts, 3770 + script.ptime_range, 3771 + script.range_num); 3851 3772 } 3852 3773 3853 3774 err = __cmd_script(&script); ··· 3859 3772 flush_scripting(); 3860 3773 3861 3774 out_delete: 3862 - if (script.ptime_range) 3775 + if (script.ptime_range) { 3776 + itrace_synth_opts__clear_time_range(&itrace_synth_opts); 3863 3777 zfree(&script.ptime_range); 3778 + } 3864 3779 3865 3780 perf_evlist__free_stats(session->evlist); 3866 3781 perf_session__delete(session);
+82 -7
tools/perf/builtin-stat.c
··· 82 82 #include <sys/time.h> 83 83 #include <sys/resource.h> 84 84 85 - #include "sane_ctype.h" 85 + #include <linux/ctype.h> 86 86 87 87 #define DEFAULT_SEPARATOR " " 88 88 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" ··· 776 776 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 777 777 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 778 778 "aggregate counts per processor socket", AGGR_SOCKET), 779 + OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode, 780 + "aggregate counts per processor die", AGGR_DIE), 779 781 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 780 782 "aggregate counts per physical processor core", AGGR_CORE), 781 783 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, ··· 800 798 struct cpu_map *map, int cpu) 801 799 { 802 800 return cpu_map__get_socket(map, cpu, NULL); 801 + } 802 + 803 + static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, 804 + struct cpu_map *map, int cpu) 805 + { 806 + return cpu_map__get_die(map, cpu, NULL); 803 807 } 804 808 805 809 static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, ··· 848 840 return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); 849 841 } 850 842 843 + static int perf_stat__get_die_cached(struct perf_stat_config *config, 844 + struct cpu_map *map, int idx) 845 + { 846 + return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); 847 + } 848 + 851 849 static int perf_stat__get_core_cached(struct perf_stat_config *config, 852 850 struct cpu_map *map, int idx) 853 851 { ··· 883 869 return -1; 884 870 } 885 871 stat_config.aggr_get_id = perf_stat__get_socket_cached; 872 + break; 873 + case AGGR_DIE: 874 + if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) { 875 + perror("cannot build die map"); 876 + return -1; 877 + } 878 + stat_config.aggr_get_id = perf_stat__get_die_cached; 886 879 break; 887 880 case AGGR_CORE: 888 881 if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { ··· 956 935 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 957 936 } 958 937 938 + static int perf_env__get_die(struct cpu_map *map, int idx, void *data) 939 + { 940 + struct perf_env *env = data; 941 + int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); 942 + 943 + if (cpu != -1) { 944 + /* 945 + * Encode socket in bit range 15:8 946 + * die_id is relative to socket, 947 + * we need a global id. So we combine 948 + * socket + die id 949 + */ 950 + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) 951 + return -1; 952 + 953 + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) 954 + return -1; 955 + 956 + die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); 957 + } 958 + 959 + return die_id; 960 + } 961 + 959 962 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 960 963 { 961 964 struct perf_env *env = data; 962 965 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 963 966 964 967 if (cpu != -1) { 965 - int socket_id = env->cpu[cpu].socket_id; 966 - 967 968 /* 968 - * Encode socket in upper 16 bits 969 - * core_id is relative to socket, and 969 + * Encode socket in bit range 31:24 970 + * encode die id in bit range 23:16 971 + * core_id is relative to socket and die, 970 972 * we need a global id. So we combine 971 - * socket + core id. 973 + * socket + die id + core id 972 974 */ 973 - core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 975 + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) 976 + return -1; 977 + 978 + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) 979 + return -1; 980 + 981 + if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) 982 + return -1; 983 + 984 + core = (env->cpu[cpu].socket_id << 24) | 985 + (env->cpu[cpu].die_id << 16) | 986 + (env->cpu[cpu].core_id & 0xffff); 974 987 } 975 988 976 989 return core; ··· 1014 959 struct cpu_map **sockp) 1015 960 { 1016 961 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 962 + } 963 + 964 + static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus, 965 + struct cpu_map **diep) 966 + { 967 + return cpu_map__build_map(cpus, diep, perf_env__get_die, env); 1017 968 } 1018 969 1019 970 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, ··· 1032 971 struct cpu_map *map, int idx) 1033 972 { 1034 973 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 974 + } 975 + static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, 976 + struct cpu_map *map, int idx) 977 + { 978 + return perf_env__get_die(map, idx, &perf_stat.session->header.env); 1035 979 } 1036 980 1037 981 static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, ··· 1056 990 return -1; 1057 991 } 1058 992 stat_config.aggr_get_id = perf_stat__get_socket_file; 993 + break; 994 + case AGGR_DIE: 995 + if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) { 996 + perror("cannot build die map"); 997 + return -1; 998 + } 999 + stat_config.aggr_get_id = perf_stat__get_die_file; 1059 1000 break; 1060 1001 case AGGR_CORE: 1061 1002 if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { ··· 1614 1541 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1615 1542 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 1616 1543 "aggregate counts per processor socket", AGGR_SOCKET), 1544 + OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, 1545 + "aggregate counts per processor die", AGGR_DIE), 1617 1546 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 1618 1547 "aggregate counts per physical processor core", AGGR_CORE), 1619 1548 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
+8 -2
tools/perf/builtin-top.c
··· 40 40 #include "util/cpumap.h" 41 41 #include "util/xyarray.h" 42 42 #include "util/sort.h" 43 + #include "util/string2.h" 43 44 #include "util/term.h" 44 45 #include "util/intlist.h" 45 46 #include "util/parse-branch-options.h" ··· 76 75 #include <linux/time64.h> 77 76 #include <linux/types.h> 78 77 79 - #include "sane_ctype.h" 78 + #include <linux/ctype.h> 80 79 81 80 static volatile int done; 82 81 static volatile int resize; ··· 1208 1207 1209 1208 init_process_thread(top); 1210 1209 1210 + if (opts->record_namespaces) 1211 + top->tool.namespace_events = true; 1212 + 1211 1213 ret = perf_event__synthesize_bpf_events(top->session, perf_event__process, 1212 1214 &top->session->machines.host, 1213 1215 &top->record_opts); 1214 1216 if (ret < 0) 1215 - pr_warning("Couldn't synthesize bpf events.\n"); 1217 + pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n"); 1216 1218 1217 1219 machine__synthesize_threads(&top->session->machines.host, &opts->target, 1218 1220 top->evlist->threads, false, ··· 1503 1499 OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), 1504 1500 OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize, 1505 1501 "number of thread to run event synthesize"), 1502 + OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces, 1503 + "Record namespaces events"), 1506 1504 OPT_END() 1507 1505 }; 1508 1506 struct perf_evlist *sb_evlist = NULL;
+108 -31
tools/perf/builtin-trace.c
··· 64 64 #include <fcntl.h> 65 65 #include <sys/sysmacros.h> 66 66 67 - #include "sane_ctype.h" 67 + #include <linux/ctype.h> 68 68 69 69 #ifndef O_CLOEXEC 70 70 # define O_CLOEXEC 02000000 ··· 402 402 403 403 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 404 404 405 + size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg) 406 + { 407 + return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val); 408 + } 409 + 405 410 size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val) 406 411 { 407 412 size_t printed; ··· 485 480 "MAP_GET_NEXT_KEY", "PROG_LOAD", 486 481 }; 487 482 static DEFINE_STRARRAY(bpf_cmd, "BPF_"); 483 + 484 + static const char *fsmount_flags[] = { 485 + [1] = "CLOEXEC", 486 + }; 487 + static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_"); 488 + 489 + #include "trace/beauty/generated/fsconfig_arrays.c" 490 + 491 + static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_"); 488 492 489 493 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 490 494 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1); ··· 655 641 { .scnprintf = SCA_STRARRAY, \ 656 642 .parm = &strarray__##array, } 657 643 644 + #define STRARRAY_FLAGS(name, array) \ 645 + { .scnprintf = SCA_STRARRAY_FLAGS, \ 646 + .parm = &strarray__##array, } 647 + 658 648 #include "trace/beauty/arch_errno_names.c" 659 649 #include "trace/beauty/eventfd.c" 660 650 #include "trace/beauty/futex_op.c" ··· 730 712 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, 731 713 { .name = "flock", 732 714 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, 715 + { .name = "fsconfig", 716 + .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, }, 717 + { .name = "fsmount", 718 + .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags), 719 + [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, }, 720 + { .name = "fspick", 721 + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 722 + [1] = { .scnprintf = SCA_FILENAME, /* path */ }, 723 + [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, }, 733 724 { .name = "fstat", .alias = "newfstat", }, 734 725 { .name = "fstatat", .alias = "newfstatat", }, 735 726 { .name = "futex", ··· 801 774 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, 802 775 [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ 803 776 .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, 777 + { .name = "move_mount", 778 + .arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ }, 779 + [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ }, 780 + [2] = { .scnprintf = SCA_FDAT, /* to_dfd */ }, 781 + [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ }, 782 + [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, }, 804 783 { .name = "mprotect", 805 784 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 806 785 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, ··· 911 878 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 912 879 { .name = "symlinkat", 913 880 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 881 + { .name = "sync_file_range", 882 + .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, }, 914 883 { .name = "tgkill", 915 884 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 916 885 { .name = "tkill", ··· 971 936 struct syscall_arg_fmt *arg_fmt; 972 937 }; 973 938 939 + /* 940 + * Must match what is in the BPF program: 941 + * 942 + * tools/perf/examples/bpf/augmented_raw_syscalls.c 943 + */ 974 944 struct bpf_map_syscall_entry { 975 945 bool enabled; 946 + u16 string_args_len[6]; 976 947 }; 977 948 978 949 /* ··· 1232 1191 static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size) 1233 1192 { 1234 1193 struct augmented_arg *augmented_arg = arg->augmented.args; 1194 + size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value); 1195 + /* 1196 + * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls 1197 + * we would have two strings, each prefixed by its size. 1198 + */ 1199 + int consumed = sizeof(*augmented_arg) + augmented_arg->size; 1235 1200 1236 - return scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value); 1201 + arg->augmented.args = ((void *)arg->augmented.args) + consumed; 1202 + arg->augmented.size -= consumed; 1203 + 1204 + return printed; 1237 1205 } 1238 1206 1239 1207 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, ··· 1430 1380 if (sc->fmt && sc->fmt->arg[idx].scnprintf) 1431 1381 continue; 1432 1382 1383 + len = strlen(field->name); 1384 + 1433 1385 if (strcmp(field->type, "const char *") == 0 && 1434 - (strcmp(field->name, "filename") == 0 || 1435 - strcmp(field->name, "path") == 0 || 1436 - strcmp(field->name, "pathname") == 0)) 1386 + ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) || 1387 + strstr(field->name, "path") != NULL)) 1437 1388 sc->arg_fmt[idx].scnprintf = SCA_FILENAME; 1438 1389 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) 1439 1390 sc->arg_fmt[idx].scnprintf = SCA_PTR; ··· 1445 1394 else if ((strcmp(field->type, "int") == 0 || 1446 1395 strcmp(field->type, "unsigned int") == 0 || 1447 1396 strcmp(field->type, "long") == 0) && 1448 - (len = strlen(field->name)) >= 2 && 1449 - strcmp(field->name + len - 2, "fd") == 0) { 1397 + len >= 2 && strcmp(field->name + len - 2, "fd") == 0) { 1450 1398 /* 1451 1399 * /sys/kernel/tracing/events/syscalls/sys_enter* 1452 1400 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c ··· 1527 1477 1528 1478 static int trace__validate_ev_qualifier(struct trace *trace) 1529 1479 { 1530 - int err = 0, i; 1531 - size_t nr_allocated; 1480 + int err = 0; 1481 + bool printed_invalid_prefix = false; 1532 1482 struct str_node *pos; 1483 + size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier); 1533 1484 1534 - trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1535 - trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * 1485 + trace->ev_qualifier_ids.entries = malloc(nr_allocated * 1536 1486 sizeof(trace->ev_qualifier_ids.entries[0])); 1537 1487 1538 1488 if (trace->ev_qualifier_ids.entries == NULL) { ··· 1541 1491 err = -EINVAL; 1542 1492 goto out; 1543 1493 } 1544 - 1545 - nr_allocated = trace->ev_qualifier_ids.nr; 1546 - i = 0; 1547 1494 1548 1495 strlist__for_each_entry(pos, trace->ev_qualifier) { 1549 1496 const char *sc = pos->s; ··· 1551 1504 if (id >= 0) 1552 1505 goto matches; 1553 1506 1554 - if (err == 0) { 1555 - fputs("Error:\tInvalid syscall ", trace->output); 1556 - err = -EINVAL; 1507 + if (!printed_invalid_prefix) { 1508 + pr_debug("Skipping unknown syscalls: "); 1509 + printed_invalid_prefix = true; 1557 1510 } else { 1558 - fputs(", ", trace->output); 1511 + pr_debug(", "); 1559 1512 } 1560 1513 1561 - fputs(sc, trace->output); 1514 + pr_debug("%s", sc); 1515 + continue; 1562 1516 } 1563 1517 matches: 1564 - trace->ev_qualifier_ids.entries[i++] = id; 1518 + trace->ev_qualifier_ids.entries[nr_used++] = id; 1565 1519 if (match_next == -1) 1566 1520 continue; 1567 1521 ··· 1570 1522 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); 1571 1523 if (id < 0) 1572 1524 break; 1573 - if (nr_allocated == trace->ev_qualifier_ids.nr) { 1525 + if (nr_allocated == nr_used) { 1574 1526 void *entries; 1575 1527 1576 1528 nr_allocated += 8; ··· 1583 1535 } 1584 1536 trace->ev_qualifier_ids.entries = entries; 1585 1537 } 1586 - trace->ev_qualifier_ids.nr++; 1587 - trace->ev_qualifier_ids.entries[i++] = id; 1538 + trace->ev_qualifier_ids.entries[nr_used++] = id; 1588 1539 } 1589 1540 } 1590 1541 1591 - if (err < 0) { 1592 - fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1593 - "\nHint:\tand: 'man syscalls'\n", trace->output); 1594 - out_free: 1595 - zfree(&trace->ev_qualifier_ids.entries); 1596 - trace->ev_qualifier_ids.nr = 0; 1597 - } 1542 + trace->ev_qualifier_ids.nr = nr_used; 1598 1543 out: 1544 + if (printed_invalid_prefix) 1545 + pr_debug("\n"); 1599 1546 return err; 1547 + out_free: 1548 + zfree(&trace->ev_qualifier_ids.entries); 1549 + trace->ev_qualifier_ids.nr = 0; 1550 + goto out; 1600 1551 } 1601 1552 1602 1553 /* ··· 2722 2675 } 2723 2676 2724 2677 #ifdef HAVE_LIBBPF_SUPPORT 2678 + static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) 2679 + { 2680 + struct syscall *sc = trace__syscall_info(trace, NULL, id); 2681 + int arg = 0; 2682 + 2683 + if (sc == NULL) 2684 + goto out; 2685 + 2686 + for (; arg < sc->nr_args; ++arg) { 2687 + entry->string_args_len[arg] = 0; 2688 + if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) { 2689 + /* Should be set like strace -s strsize */ 2690 + entry->string_args_len[arg] = PATH_MAX; 2691 + } 2692 + } 2693 + out: 2694 + for (; arg < 6; ++arg) 2695 + entry->string_args_len[arg] = 0; 2696 + } 2725 2697 static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) 2726 2698 { 2727 2699 int fd = bpf_map__fd(trace->syscalls.map); ··· 2752 2686 2753 2687 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { 2754 2688 int key = trace->ev_qualifier_ids.entries[i]; 2689 + 2690 + if (value.enabled) 2691 + trace__init_bpf_map_syscall_args(trace, key, &value); 2755 2692 2756 2693 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); 2757 2694 if (err) ··· 2773 2704 int err = 0, key; 2774 2705 2775 2706 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { 2707 + if (enabled) 2708 + trace__init_bpf_map_syscall_args(trace, key, &value); 2709 + 2776 2710 err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); 2777 2711 if (err) 2778 2712 break; ··· 3699 3627 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", 3700 3628 "event selector. use 'perf list' to list available events", 3701 3629 parse_events_option); 3702 - err = parse_events_option(&o, value, 0); 3630 + /* 3631 + * We can't propagate parse_event_option() return, as it is 1 3632 + * for failure while perf_config() expects -1. 3633 + */ 3634 + if (parse_events_option(&o, value, 0)) 3635 + err = -1; 3703 3636 } else if (!strcmp(var, "trace.show_timestamp")) { 3704 3637 trace->show_tstamp = perf_config_bool(var, value); 3705 3638 } else if (!strcmp(var, "trace.show_duration")) {
+2
tools/perf/check-headers.sh
··· 105 105 check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' 106 106 check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' 107 107 check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' 108 + check include/linux/ctype.h '-I "isdigit("' 109 + check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' 108 110 109 111 # diff non-symmetric files 110 112 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
+101 -167
tools/perf/examples/bpf/augmented_raw_syscalls.c
··· 21 21 /* bpf-output associated map */ 22 22 bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); 23 23 24 + /* 25 + * string_args_len: one per syscall arg, 0 means not a string or don't copy it, 26 + * PATH_MAX for copying everything, any other value to limit 27 + * it a la 'strace -s strsize'. 28 + */ 24 29 struct syscall { 25 30 bool enabled; 31 + u16 string_args_len[6]; 26 32 }; 27 33 28 34 bpf_map(syscalls, ARRAY, int, struct syscall, 512); ··· 47 41 48 42 struct augmented_filename { 49 43 unsigned int size; 50 - int reserved; 44 + int err; 51 45 char value[PATH_MAX]; 52 46 }; 53 - 54 - /* syscalls where the first arg is a string */ 55 - #define SYS_OPEN 2 56 - #define SYS_STAT 4 57 - #define SYS_LSTAT 6 58 - #define SYS_ACCESS 21 59 - #define SYS_EXECVE 59 60 - #define SYS_TRUNCATE 76 61 - #define SYS_CHDIR 80 62 - #define SYS_RENAME 82 63 - #define SYS_MKDIR 83 64 - #define SYS_RMDIR 84 65 - #define SYS_CREAT 85 66 - #define SYS_LINK 86 67 - #define SYS_UNLINK 87 68 - #define SYS_SYMLINK 88 69 - #define SYS_READLINK 89 70 - #define SYS_CHMOD 90 71 - #define SYS_CHOWN 92 72 - #define SYS_LCHOWN 94 73 - #define SYS_MKNOD 133 74 - #define SYS_STATFS 137 75 - #define SYS_PIVOT_ROOT 155 76 - #define SYS_CHROOT 161 77 - #define SYS_ACCT 163 78 - #define SYS_SWAPON 167 79 - #define SYS_SWAPOFF 168 80 - #define SYS_DELETE_MODULE 176 81 - #define SYS_SETXATTR 188 82 - #define SYS_LSETXATTR 189 83 - #define SYS_GETXATTR 191 84 - #define SYS_LGETXATTR 192 85 - #define SYS_LISTXATTR 194 86 - #define SYS_LLISTXATTR 195 87 - #define SYS_REMOVEXATTR 197 88 - #define SYS_LREMOVEXATTR 198 89 - #define SYS_MQ_OPEN 240 90 - #define SYS_MQ_UNLINK 241 91 - #define SYS_ADD_KEY 248 92 - #define SYS_REQUEST_KEY 249 93 - #define SYS_SYMLINKAT 266 94 - #define SYS_MEMFD_CREATE 319 95 - 96 - /* syscalls where the first arg is a string */ 97 - 98 - #define SYS_PWRITE64 18 99 - #define SYS_EXECVE 59 100 - #define SYS_RENAME 82 101 - #define SYS_QUOTACTL 179 102 - #define SYS_FSETXATTR 190 103 - #define SYS_FGETXATTR 193 104 - #define SYS_FREMOVEXATTR 199 105 - #define SYS_MQ_TIMEDSEND 242 106 - #define SYS_REQUEST_KEY 249 107 - #define SYS_INOTIFY_ADD_WATCH 254 108 - #define SYS_OPENAT 257 109 - #define SYS_MKDIRAT 258 110 - #define SYS_MKNODAT 259 111 - #define SYS_FCHOWNAT 260 112 - #define SYS_FUTIMESAT 261 113 - #define SYS_NEWFSTATAT 262 114 - #define SYS_UNLINKAT 263 115 - #define SYS_RENAMEAT 264 116 - #define SYS_LINKAT 265 117 - #define SYS_READLINKAT 267 118 - #define SYS_FCHMODAT 268 119 - #define SYS_FACCESSAT 269 120 - #define SYS_UTIMENSAT 280 121 - #define SYS_NAME_TO_HANDLE_AT 303 122 - #define SYS_FINIT_MODULE 313 123 - #define SYS_RENAMEAT2 316 124 - #define SYS_EXECVEAT 322 125 - #define SYS_STATX 332 126 47 127 48 pid_filter(pids_filtered); 128 49 ··· 60 127 61 128 bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1); 62 129 130 + static inline 131 + unsigned int augmented_filename__read(struct augmented_filename *augmented_filename, 132 + const void *filename_arg, unsigned int filename_len) 133 + { 134 + unsigned int len = sizeof(*augmented_filename); 135 + int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg); 136 + 137 + augmented_filename->size = augmented_filename->err = 0; 138 + /* 139 + * probe_read_str may return < 0, e.g. -EFAULT 140 + * So we leave that in the augmented_filename->size that userspace will 141 + */ 142 + if (size > 0) { 143 + len -= sizeof(augmented_filename->value) - size; 144 + len &= sizeof(augmented_filename->value) - 1; 145 + augmented_filename->size = size; 146 + } else { 147 + /* 148 + * So that username notice the error while still being able 149 + * to skip this augmented arg record 150 + */ 151 + augmented_filename->err = size; 152 + len = offsetof(struct augmented_filename, value); 153 + } 154 + 155 + return len; 156 + } 157 + 63 158 SEC("raw_syscalls:sys_enter") 64 159 int sys_enter(struct syscall_enter_args *args) 65 160 { 66 161 struct augmented_args_filename *augmented_args; 67 - unsigned int len = sizeof(*augmented_args); 68 - const void *filename_arg = NULL; 162 + /* 163 + * We start len, the amount of data that will be in the perf ring 164 + * buffer, if this is not filtered out by one of pid_filter__has(), 165 + * syscall->enabled, etc, with the non-augmented raw syscall payload, 166 + * i.e. sizeof(augmented_args->args). 167 + * 168 + * We'll add to this as we add augmented syscalls right after that 169 + * initial, non-augmented raw_syscalls:sys_enter payload. 170 + */ 171 + unsigned int len = sizeof(augmented_args->args); 69 172 struct syscall *syscall; 70 173 int key = 0; 71 174 ··· 158 189 * after the ctx memory access to prevent their down stream merging. 159 190 */ 160 191 /* 161 - * This table of what args are strings will be provided by userspace, 162 - * in the syscalls map, i.e. we will already have to do the lookup to 163 - * see if this specific syscall is filtered, so we can as well get more 164 - * info about what syscall args are strings or pointers, and how many 165 - * bytes to copy, per arg, etc. 192 + * For now copy just the first string arg, we need to improve the protocol 193 + * and have more than one. 166 194 * 167 - * For now hard code it, till we have all the basic mechanisms in place 168 - * to automate everything and make the kernel part be completely driven 169 - * by information obtained in userspace for each kernel version and 170 - * processor architecture, making the kernel part the same no matter what 171 - * kernel version or processor architecture it runs on. 172 - */ 173 - switch (augmented_args->args.syscall_nr) { 174 - case SYS_ACCT: 175 - case SYS_ADD_KEY: 176 - case SYS_CHDIR: 177 - case SYS_CHMOD: 178 - case SYS_CHOWN: 179 - case SYS_CHROOT: 180 - case SYS_CREAT: 181 - case SYS_DELETE_MODULE: 182 - case SYS_EXECVE: 183 - case SYS_GETXATTR: 184 - case SYS_LCHOWN: 185 - case SYS_LGETXATTR: 186 - case SYS_LINK: 187 - case SYS_LISTXATTR: 188 - case SYS_LLISTXATTR: 189 - case SYS_LREMOVEXATTR: 190 - case SYS_LSETXATTR: 191 - case SYS_LSTAT: 192 - case SYS_MEMFD_CREATE: 193 - case SYS_MKDIR: 194 - case SYS_MKNOD: 195 - case SYS_MQ_OPEN: 196 - case SYS_MQ_UNLINK: 197 - case SYS_PIVOT_ROOT: 198 - case SYS_READLINK: 199 - case SYS_REMOVEXATTR: 200 - case SYS_RENAME: 201 - case SYS_REQUEST_KEY: 202 - case SYS_RMDIR: 203 - case SYS_SETXATTR: 204 - case SYS_STAT: 205 - case SYS_STATFS: 206 - case SYS_SWAPOFF: 207 - case SYS_SWAPON: 208 - case SYS_SYMLINK: 209 - case SYS_SYMLINKAT: 210 - case SYS_TRUNCATE: 211 - case SYS_UNLINK: 212 - case SYS_ACCESS: 213 - case SYS_OPEN: filename_arg = (const void *)args->args[0]; 195 + * Using the unrolled loop is not working, only when we do it manually, 196 + * check this out later... 197 + 198 + u8 arg; 199 + #pragma clang loop unroll(full) 200 + for (arg = 0; arg < 6; ++arg) { 201 + if (syscall->string_args_len[arg] != 0) { 202 + filename_len = syscall->string_args_len[arg]; 203 + filename_arg = (const void *)args->args[arg]; 214 204 __asm__ __volatile__("": : :"memory"); 215 - break; 216 - case SYS_EXECVEAT: 217 - case SYS_FACCESSAT: 218 - case SYS_FCHMODAT: 219 - case SYS_FCHOWNAT: 220 - case SYS_FGETXATTR: 221 - case SYS_FINIT_MODULE: 222 - case SYS_FREMOVEXATTR: 223 - case SYS_FSETXATTR: 224 - case SYS_FUTIMESAT: 225 - case SYS_INOTIFY_ADD_WATCH: 226 - case SYS_LINKAT: 227 - case SYS_MKDIRAT: 228 - case SYS_MKNODAT: 229 - case SYS_MQ_TIMEDSEND: 230 - case SYS_NAME_TO_HANDLE_AT: 231 - case SYS_NEWFSTATAT: 232 - case SYS_PWRITE64: 233 - case SYS_QUOTACTL: 234 - case SYS_READLINKAT: 235 - case SYS_RENAMEAT: 236 - case SYS_RENAMEAT2: 237 - case SYS_STATX: 238 - case SYS_UNLINKAT: 239 - case SYS_UTIMENSAT: 240 - case SYS_OPENAT: filename_arg = (const void *)args->args[1]; 241 - break; 205 + break; 206 + } 242 207 } 243 208 244 - if (filename_arg != NULL) { 245 - augmented_args->filename.reserved = 0; 246 - augmented_args->filename.size = probe_read_str(&augmented_args->filename.value, 247 - sizeof(augmented_args->filename.value), 248 - filename_arg); 249 - if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) { 250 - len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size; 251 - len &= sizeof(augmented_args->filename.value) - 1; 252 - } 253 - } else { 254 - len = sizeof(augmented_args->args); 255 - } 209 + verifier log: 210 + 211 + ; if (syscall->string_args_len[arg] != 0) { 212 + 37: (69) r3 = *(u16 *)(r0 +2) 213 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv0 R2_w=map_value(id=0,off=2,ks=4,vs=14,imm=0) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm 214 + ; if (syscall->string_args_len[arg] != 0) { 215 + 38: (55) if r3 != 0x0 goto pc+5 216 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv0 R2=map_value(id=0,off=2,ks=4,vs=14,imm=0) R3=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm 217 + 39: (b7) r1 = 1 218 + ; if (syscall->string_args_len[arg] != 0) { 219 + 40: (bf) r2 = r0 220 + 41: (07) r2 += 4 221 + 42: (69) r3 = *(u16 *)(r0 +4) 222 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv1 R2_w=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3_w=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm 223 + ; if (syscall->string_args_len[arg] != 0) { 224 + 43: (15) if r3 == 0x0 goto pc+32 225 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv1 R2=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm 226 + ; filename_arg = (const void *)args->args[arg]; 227 + 44: (67) r1 <<= 3 228 + 45: (bf) r3 = r6 229 + 46: (0f) r3 += r1 230 + 47: (b7) r5 = 64 231 + 48: (79) r3 = *(u64 *)(r3 +16) 232 + dereference of modified ctx ptr R3 off=8 disallowed 233 + processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_states 12 mark_read 7 234 + */ 235 + 236 + #define __loop_iter(arg) \ 237 + if (syscall->string_args_len[arg] != 0) { \ 238 + unsigned int filename_len = syscall->string_args_len[arg]; \ 239 + const void *filename_arg = (const void *)args->args[arg]; \ 240 + if (filename_len <= sizeof(augmented_args->filename.value)) \ 241 + len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len); 242 + #define loop_iter_first() __loop_iter(0); } 243 + #define loop_iter(arg) else __loop_iter(arg); } 244 + #define loop_iter_last(arg) else __loop_iter(arg); __asm__ __volatile__("": : :"memory"); } 245 + 246 + loop_iter_first() 247 + loop_iter(1) 248 + loop_iter(2) 249 + loop_iter(3) 250 + loop_iter(4) 251 + loop_iter_last(5) 256 252 257 253 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ 258 254 return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len);
+2
tools/perf/jvmti/jvmti_agent.c
··· 45 45 static char jit_path[PATH_MAX]; 46 46 static void *marker_addr; 47 47 48 + #ifndef HAVE_GETTID 48 49 static inline pid_t gettid(void) 49 50 { 50 51 return (pid_t)syscall(__NR_gettid); 51 52 } 53 + #endif 52 54 53 55 static int get_e_machine(struct jitheader *hdr) 54 56 {
+2 -2
tools/perf/jvmti/libjvmti.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/compiler.h> 3 + #include <linux/string.h> 3 4 #include <sys/types.h> 4 5 #include <stdio.h> 5 6 #include <string.h> ··· 163 162 result[i] = '\0'; 164 163 } else { 165 164 /* fallback case */ 166 - size_t file_name_len = strlen(file_name); 167 - strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length); 165 + strlcpy(result, file_name, max_length); 168 166 } 169 167 } 170 168
-5
tools/perf/perf-with-kcore.sh
··· 104 104 105 105 USER_HOME=$(bash <<< "echo ~$SUDO_USER") 106 106 107 - if [ "$HOME" != "$USER_HOME" ] ; then 108 - echo "Fix unnecessary because root has a home: $HOME" >&2 109 - exit 1 110 - fi 111 - 112 107 echo "Fixing buildid cache permissions" 113 108 114 109 find "$USER_HOME/.debug" -xdev -type d ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \;
+1
tools/perf/perf.c
··· 18 18 #include "util/bpf-loader.h" 19 19 #include "util/debug.h" 20 20 #include "util/event.h" 21 + #include "util/util.h" 21 22 #include <api/fs/fs.h> 22 23 #include <api/fs/tracing_path.h> 23 24 #include <errno.h>
+3 -1
tools/perf/perf.h
··· 26 26 } 27 27 28 28 #ifndef MAX_NR_CPUS 29 - #define MAX_NR_CPUS 1024 29 + #define MAX_NR_CPUS 2048 30 30 #endif 31 31 32 32 extern const char *input_name; ··· 61 61 bool record_switch_events; 62 62 bool all_kernel; 63 63 bool all_user; 64 + bool kernel_callchains; 65 + bool user_callchains; 64 66 bool tail_synthesize; 65 67 bool overwrite; 66 68 bool ignore_missing_thread;
+44
tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json
··· 1 + [ 2 + { 3 + "EventCode": "0x02", 4 + "EventName": "uncore_hisi_ddrc.flux_wcmd", 5 + "BriefDescription": "DDRC write commands", 6 + "PublicDescription": "DDRC write commands", 7 + "Unit": "hisi_sccl,ddrc", 8 + }, 9 + { 10 + "EventCode": "0x03", 11 + "EventName": "uncore_hisi_ddrc.flux_rcmd", 12 + "BriefDescription": "DDRC read commands", 13 + "PublicDescription": "DDRC read commands", 14 + "Unit": "hisi_sccl,ddrc", 15 + }, 16 + { 17 + "EventCode": "0x04", 18 + "EventName": "uncore_hisi_ddrc.flux_wr", 19 + "BriefDescription": "DDRC precharge commands", 20 + "PublicDescription": "DDRC precharge commands", 21 + "Unit": "hisi_sccl,ddrc", 22 + }, 23 + { 24 + "EventCode": "0x05", 25 + "EventName": "uncore_hisi_ddrc.act_cmd", 26 + "BriefDescription": "DDRC active commands", 27 + "PublicDescription": "DDRC active commands", 28 + "Unit": "hisi_sccl,ddrc", 29 + }, 30 + { 31 + "EventCode": "0x06", 32 + "EventName": "uncore_hisi_ddrc.rnk_chg", 33 + "BriefDescription": "DDRC rank commands", 34 + "PublicDescription": "DDRC rank commands", 35 + "Unit": "hisi_sccl,ddrc", 36 + }, 37 + { 38 + "EventCode": "0x07", 39 + "EventName": "uncore_hisi_ddrc.rw_chg", 40 + "BriefDescription": "DDRC read and write changes", 41 + "PublicDescription": "DDRC read and write changes", 42 + "Unit": "hisi_sccl,ddrc", 43 + }, 44 + ]
+51
tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json
··· 1 + [ 2 + { 3 + "EventCode": "0x00", 4 + "EventName": "uncore_hisi_hha.rx_ops_num", 5 + "BriefDescription": "The number of all operations received by the HHA", 6 + "PublicDescription": "The number of all operations received by the HHA", 7 + "Unit": "hisi_sccl,hha", 8 + }, 9 + { 10 + "EventCode": "0x01", 11 + "EventName": "uncore_hisi_hha.rx_outer", 12 + "BriefDescription": "The number of all operations received by the HHA from another socket", 13 + "PublicDescription": "The number of all operations received by the HHA from another socket", 14 + "Unit": "hisi_sccl,hha", 15 + }, 16 + { 17 + "EventCode": "0x02", 18 + "EventName": "uncore_hisi_hha.rx_sccl", 19 + "BriefDescription": "The number of all operations received by the HHA from another SCCL in this socket", 20 + "PublicDescription": "The number of all operations received by the HHA from another SCCL in this socket", 21 + "Unit": "hisi_sccl,hha", 22 + }, 23 + { 24 + "EventCode": "0x1c", 25 + "EventName": "uncore_hisi_hha.rd_ddr_64b", 26 + "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes", 27 + "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 64bytes", 28 + "Unit": "hisi_sccl,hha", 29 + }, 30 + { 31 + "EventCode": "0x1d", 32 + "EventName": "uncore_hisi_hha.wr_dr_64b", 33 + "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", 34 + "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", 35 + "Unit": "hisi_sccl,hha", 36 + }, 37 + { 38 + "EventCode": "0x1e", 39 + "EventName": "uncore_hisi_hha.rd_ddr_128b", 40 + "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", 41 + "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", 42 + "Unit": "hisi_sccl,hha", 43 + }, 44 + { 45 + "EventCode": "0x1f", 46 + "EventName": "uncore_hisi_hha.wr_ddr_128b", 47 + "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", 48 + "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", 49 + "Unit": "hisi_sccl,hha", 50 + }, 51 + ]
+37
tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json
··· 1 + [ 2 + { 3 + "EventCode": "0x00", 4 + "EventName": "uncore_hisi_l3c.rd_cpipe", 5 + "BriefDescription": "Total read accesses", 6 + "PublicDescription": "Total read accesses", 7 + "Unit": "hisi_sccl,l3c", 8 + }, 9 + { 10 + "EventCode": "0x01", 11 + "EventName": "uncore_hisi_l3c.wr_cpipe", 12 + "BriefDescription": "Total write accesses", 13 + "PublicDescription": "Total write accesses", 14 + "Unit": "hisi_sccl,l3c", 15 + }, 16 + { 17 + "EventCode": "0x02", 18 + "EventName": "uncore_hisi_l3c.rd_hit_cpipe", 19 + "BriefDescription": "Total read hits", 20 + "PublicDescription": "Total read hits", 21 + "Unit": "hisi_sccl,l3c", 22 + }, 23 + { 24 + "EventCode": "0x03", 25 + "EventName": "uncore_hisi_l3c.wr_hit_cpipe", 26 + "BriefDescription": "Total write hits", 27 + "PublicDescription": "Total write hits", 28 + "Unit": "hisi_sccl,l3c", 29 + }, 30 + { 31 + "EventCode": "0x04", 32 + "EventName": "uncore_hisi_l3c.victim_num", 33 + "BriefDescription": "l3c precharge commands", 34 + "PublicDescription": "l3c precharge commands", 35 + "Unit": "hisi_sccl,l3c", 36 + }, 37 + ]
+2 -2
tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
··· 314 314 "MetricName": "DRAM_BW_Use" 315 315 }, 316 316 { 317 - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", 317 + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", 318 318 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", 319 319 "MetricGroup": "Memory_Lat", 320 320 "MetricName": "DRAM_Read_Latency" 321 321 }, 322 322 { 323 - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", 323 + "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@", 324 324 "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", 325 325 "MetricGroup": "Memory_BW", 326 326 "MetricName": "DRAM_Parallel_Reads"
+2 -20
tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
··· 314 314 "MetricName": "DRAM_BW_Use" 315 315 }, 316 316 { 317 - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", 317 + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", 318 318 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", 319 319 "MetricGroup": "Memory_Lat", 320 320 "MetricName": "DRAM_Read_Latency" 321 321 }, 322 322 { 323 - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", 323 + "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@", 324 324 "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", 325 325 "MetricGroup": "Memory_BW", 326 326 "MetricName": "DRAM_Parallel_Reads" 327 - }, 328 - { 329 - "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0", 330 - "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", 331 - "MetricGroup": "Memory_Lat", 332 - "MetricName": "MEM_PMM_Read_Latency" 333 - }, 334 - { 335 - "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", 336 - "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", 337 - "MetricGroup": "Memory_BW", 338 - "MetricName": "PMM_Read_BW" 339 - }, 340 - { 341 - "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", 342 - "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", 343 - "MetricGroup": "Memory_BW", 344 - "MetricName": "PMM_Write_BW" 345 327 }, 346 328 { 347 329 "MetricExpr": "cha_0@event\\=0x0@",
+5 -2
tools/perf/pmu-events/jevents.c
··· 236 236 { "CPU-M-CF", "cpum_cf" }, 237 237 { "CPU-M-SF", "cpum_sf" }, 238 238 { "UPI LL", "uncore_upi" }, 239 + { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, 240 + { "hisi_sccl,hha", "hisi_sccl,hha" }, 241 + { "hisi_sccl,l3c", "hisi_sccl,l3c" }, 239 242 {} 240 243 }; 241 244 ··· 844 841 _Exit(1); 845 842 } 846 843 847 - fprintf(outfp, "#include \"../../pmu-events/pmu-events.h\"\n"); 844 + fprintf(outfp, "#include \"pmu-events/pmu-events.h\"\n"); 848 845 print_mapping_table_prefix(outfp); 849 846 print_mapping_table_suffix(outfp); 850 847 fclose(outfp); ··· 1099 1096 } 1100 1097 1101 1098 /* Include pmu-events.h first */ 1102 - fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n"); 1099 + fprintf(eventsfp, "#include \"pmu-events/pmu-events.h\"\n"); 1103 1100 1104 1101 /* 1105 1102 * The mapfile allows multiple CPUids to point to the same JSON file,
+309 -21
tools/perf/scripts/python/export-to-postgresql.py
··· 27 27 # 28 28 # fedora: 29 29 # 30 - # $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql 30 + # $ sudo yum install postgresql postgresql-server qt-postgresql 31 31 # $ sudo su - postgres -c initdb 32 32 # $ sudo service postgresql start 33 33 # $ sudo su - postgres 34 - # $ createuser <your user id here> 34 + # $ createuser -s <your user id here> # Older versions may not support -s, in which case answer the prompt below: 35 35 # Shall the new role be a superuser? (y/n) y 36 + # $ sudo yum install python-pyside 37 + # 38 + # Alternately, to use Python3 and/or pyside 2, one of the following: 39 + # $ sudo yum install python3-pyside 40 + # $ pip install --user PySide2 41 + # $ pip3 install --user PySide2 36 42 # 37 43 # ubuntu: 38 44 # 39 - # $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql 45 + # $ sudo apt-get install postgresql 40 46 # $ sudo su - postgres 41 47 # $ createuser -s <your user id here> 48 + # $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql 49 + # 50 + # Alternately, to use Python3 and/or pyside 2, one of the following: 51 + # 52 + # $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql 53 + # $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql 54 + # $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql 42 55 # 43 56 # An example of using this script with Intel PT: 44 57 # ··· 212 199 # print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5)) 213 200 # call_path_id = query.value(6) 214 201 215 - from PySide.QtSql import * 202 + pyside_version_1 = True 203 + if not "pyside-version-1" in sys.argv: 204 + try: 205 + from PySide2.QtSql import * 206 + pyside_version_1 = False 207 + except: 208 + pass 209 + 210 + if pyside_version_1: 211 + from PySide.QtSql import * 216 212 217 213 if sys.version_info < (3, 0): 218 214 def toserverstr(str): ··· 277 255 print(datetime.datetime.today(), *args, sep=' ', **kw_args) 278 256 279 257 def usage(): 280 - printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]") 281 - printerr("where: columns 'all' or 'branches'") 282 - printerr(" calls 'calls' => create calls and call_paths table") 283 - printerr(" callchains 'callchains' => create call_paths table") 284 - raise Exception("Too few arguments") 258 + printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>] [<pyside-version-1>]"); 259 + printerr("where: columns 'all' or 'branches'"); 260 + printerr(" calls 'calls' => create calls and call_paths table"); 261 + printerr(" callchains 'callchains' => create call_paths table"); 262 + printerr(" pyside-version-1 'pyside-version-1' => use pyside version 1"); 263 + raise Exception("Too few or bad arguments") 285 264 286 265 if (len(sys.argv) < 2): 287 266 usage() ··· 304 281 perf_db_export_calls = True 305 282 elif (sys.argv[i] == "callchains"): 306 283 perf_db_export_callchains = True 284 + elif (sys.argv[i] == "pyside-version-1"): 285 + pass 307 286 else: 308 287 usage() 309 288 ··· 394 369 'to_ip bigint,' 395 370 'branch_type integer,' 396 371 'in_tx boolean,' 397 - 'call_path_id bigint)') 372 + 'call_path_id bigint,' 373 + 'insn_count bigint,' 374 + 'cyc_count bigint)') 398 375 else: 399 376 do_query(query, 'CREATE TABLE samples (' 400 377 'id bigint NOT NULL,' ··· 420 393 'data_src bigint,' 421 394 'branch_type integer,' 422 395 'in_tx boolean,' 423 - 'call_path_id bigint)') 396 + 'call_path_id bigint,' 397 + 'insn_count bigint,' 398 + 'cyc_count bigint)') 424 399 425 400 if perf_db_export_calls or perf_db_export_callchains: 426 401 do_query(query, 'CREATE TABLE call_paths (' ··· 443 414 'return_id bigint,' 444 415 'parent_call_path_id bigint,' 445 416 'flags integer,' 446 - 'parent_id bigint)') 417 + 'parent_id bigint,' 418 + 'insn_count bigint,' 419 + 'cyc_count bigint)') 420 + 421 + do_query(query, 'CREATE TABLE ptwrite (' 422 + 'id bigint NOT NULL,' 423 + 'payload bigint,' 424 + 'exact_ip boolean)') 425 + 426 + do_query(query, 'CREATE TABLE cbr (' 427 + 'id bigint NOT NULL,' 428 + 'cbr integer,' 429 + 'mhz integer,' 430 + 'percent integer)') 431 + 432 + do_query(query, 'CREATE TABLE mwait (' 433 + 'id bigint NOT NULL,' 434 + 'hints integer,' 435 + 'extensions integer)') 436 + 437 + do_query(query, 'CREATE TABLE pwre (' 438 + 'id bigint NOT NULL,' 439 + 'cstate integer,' 440 + 'subcstate integer,' 441 + 'hw boolean)') 442 + 443 + do_query(query, 'CREATE TABLE exstop (' 444 + 'id bigint NOT NULL,' 445 + 'exact_ip boolean)') 446 + 447 + do_query(query, 'CREATE TABLE pwrx (' 448 + 'id bigint NOT NULL,' 449 + 'deepest_cstate integer,' 450 + 'last_cstate integer,' 451 + 'wake_reason integer)') 447 452 448 453 do_query(query, 'CREATE VIEW machines_view AS ' 449 454 'SELECT ' ··· 559 496 'return_time,' 560 497 'return_time - call_time AS elapsed_time,' 561 498 'branch_count,' 499 + 'insn_count,' 500 + 'cyc_count,' 501 + 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC,' 562 502 'call_id,' 563 503 'return_id,' 564 504 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE CAST ( flags AS VARCHAR(6) ) END AS flags,' ··· 587 521 'to_sym_offset,' 588 522 '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' 589 523 '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' 590 - 'in_tx' 524 + 'in_tx,' 525 + 'insn_count,' 526 + 'cyc_count,' 527 + 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC' 591 528 ' FROM samples') 592 529 530 + do_query(query, 'CREATE VIEW ptwrite_view AS ' 531 + 'SELECT ' 532 + 'ptwrite.id,' 533 + 'time,' 534 + 'cpu,' 535 + 'to_hex(payload) AS payload_hex,' 536 + 'CASE WHEN exact_ip=FALSE THEN \'False\' ELSE \'True\' END AS exact_ip' 537 + ' FROM ptwrite' 538 + ' INNER JOIN samples ON samples.id = ptwrite.id') 539 + 540 + do_query(query, 'CREATE VIEW cbr_view AS ' 541 + 'SELECT ' 542 + 'cbr.id,' 543 + 'time,' 544 + 'cpu,' 545 + 'cbr,' 546 + 'mhz,' 547 + 'percent' 548 + ' FROM cbr' 549 + ' INNER JOIN samples ON samples.id = cbr.id') 550 + 551 + do_query(query, 'CREATE VIEW mwait_view AS ' 552 + 'SELECT ' 553 + 'mwait.id,' 554 + 'time,' 555 + 'cpu,' 556 + 'to_hex(hints) AS hints_hex,' 557 + 'to_hex(extensions) AS extensions_hex' 558 + ' FROM mwait' 559 + ' INNER JOIN samples ON samples.id = mwait.id') 560 + 561 + do_query(query, 'CREATE VIEW pwre_view AS ' 562 + 'SELECT ' 563 + 'pwre.id,' 564 + 'time,' 565 + 'cpu,' 566 + 'cstate,' 567 + 'subcstate,' 568 + 'CASE WHEN hw=FALSE THEN \'False\' ELSE \'True\' END AS hw' 569 + ' FROM pwre' 570 + ' INNER JOIN samples ON samples.id = pwre.id') 571 + 572 + do_query(query, 'CREATE VIEW exstop_view AS ' 573 + 'SELECT ' 574 + 'exstop.id,' 575 + 'time,' 576 + 'cpu,' 577 + 'CASE WHEN exact_ip=FALSE THEN \'False\' ELSE \'True\' END AS exact_ip' 578 + ' FROM exstop' 579 + ' INNER JOIN samples ON samples.id = exstop.id') 580 + 581 + do_query(query, 'CREATE VIEW pwrx_view AS ' 582 + 'SELECT ' 583 + 'pwrx.id,' 584 + 'time,' 585 + 'cpu,' 586 + 'deepest_cstate,' 587 + 'last_cstate,' 588 + 'CASE WHEN wake_reason=1 THEN \'Interrupt\'' 589 + ' WHEN wake_reason=2 THEN \'Timer Deadline\'' 590 + ' WHEN wake_reason=4 THEN \'Monitored Address\'' 591 + ' WHEN wake_reason=8 THEN \'HW\'' 592 + ' ELSE CAST ( wake_reason AS VARCHAR(2) )' 593 + 'END AS wake_reason' 594 + ' FROM pwrx' 595 + ' INNER JOIN samples ON samples.id = pwrx.id') 596 + 597 + do_query(query, 'CREATE VIEW power_events_view AS ' 598 + 'SELECT ' 599 + 'samples.id,' 600 + 'samples.time,' 601 + 'samples.cpu,' 602 + 'selected_events.name AS event,' 603 + 'FORMAT(\'%6s\', cbr.cbr) AS cbr,' 604 + 'FORMAT(\'%6s\', cbr.mhz) AS MHz,' 605 + 'FORMAT(\'%5s\', cbr.percent) AS percent,' 606 + 'to_hex(mwait.hints) AS hints_hex,' 607 + 'to_hex(mwait.extensions) AS extensions_hex,' 608 + 'FORMAT(\'%3s\', pwre.cstate) AS cstate,' 609 + 'FORMAT(\'%3s\', pwre.subcstate) AS subcstate,' 610 + 'CASE WHEN pwre.hw=FALSE THEN \'False\' WHEN pwre.hw=TRUE THEN \'True\' ELSE NULL END AS hw,' 611 + 'CASE WHEN exstop.exact_ip=FALSE THEN \'False\' WHEN exstop.exact_ip=TRUE THEN \'True\' ELSE NULL END AS exact_ip,' 612 + 'FORMAT(\'%3s\', pwrx.deepest_cstate) AS deepest_cstate,' 613 + 'FORMAT(\'%3s\', pwrx.last_cstate) AS last_cstate,' 614 + 'CASE WHEN pwrx.wake_reason=1 THEN \'Interrupt\'' 615 + ' WHEN pwrx.wake_reason=2 THEN \'Timer Deadline\'' 616 + ' WHEN pwrx.wake_reason=4 THEN \'Monitored Address\'' 617 + ' WHEN pwrx.wake_reason=8 THEN \'HW\'' 618 + ' ELSE FORMAT(\'%2s\', pwrx.wake_reason)' 619 + 'END AS wake_reason' 620 + ' FROM cbr' 621 + ' FULL JOIN mwait ON mwait.id = cbr.id' 622 + ' FULL JOIN pwre ON pwre.id = cbr.id' 623 + ' FULL JOIN exstop ON exstop.id = cbr.id' 624 + ' FULL JOIN pwrx ON pwrx.id = cbr.id' 625 + ' INNER JOIN samples ON samples.id = coalesce(cbr.id, mwait.id, pwre.id, exstop.id, pwrx.id)' 626 + ' INNER JOIN selected_events ON selected_events.id = samples.evsel_id' 627 + ' ORDER BY samples.id') 593 628 594 629 file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0) 595 630 file_trailer = b"\377\377" ··· 750 583 call_path_file = open_output_file("call_path_table.bin") 751 584 if perf_db_export_calls: 752 585 call_file = open_output_file("call_table.bin") 586 + ptwrite_file = open_output_file("ptwrite_table.bin") 587 + cbr_file = open_output_file("cbr_table.bin") 588 + mwait_file = open_output_file("mwait_table.bin") 589 + pwre_file = open_output_file("pwre_table.bin") 590 + exstop_file = open_output_file("exstop_table.bin") 591 + pwrx_file = open_output_file("pwrx_table.bin") 753 592 754 593 def trace_begin(): 755 594 printdate("Writing to intermediate files...") ··· 766 593 comm_table(0, "unknown") 767 594 dso_table(0, 0, "unknown", "unknown", "") 768 595 symbol_table(0, 0, 0, 0, 0, "unknown") 769 - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 596 + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 770 597 if perf_db_export_calls or perf_db_export_callchains: 771 598 call_path_table(0, 0, 0, 0) 772 - call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 599 + call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 773 600 774 601 unhandled_count = 0 602 + 603 + def is_table_empty(table_name): 604 + do_query(query, 'SELECT * FROM ' + table_name + ' LIMIT 1'); 605 + if query.next(): 606 + return False 607 + return True 608 + 609 + def drop(table_name): 610 + do_query(query, 'DROP VIEW ' + table_name + '_view'); 611 + do_query(query, 'DROP TABLE ' + table_name); 775 612 776 613 def trace_end(): 777 614 printdate("Copying to database...") ··· 798 615 copy_output_file(call_path_file, "call_paths") 799 616 if perf_db_export_calls: 800 617 copy_output_file(call_file, "calls") 618 + copy_output_file(ptwrite_file, "ptwrite") 619 + copy_output_file(cbr_file, "cbr") 620 + copy_output_file(mwait_file, "mwait") 621 + copy_output_file(pwre_file, "pwre") 622 + copy_output_file(exstop_file, "exstop") 623 + copy_output_file(pwrx_file, "pwrx") 801 624 802 625 printdate("Removing intermediate files...") 803 626 remove_output_file(evsel_file) ··· 819 630 remove_output_file(call_path_file) 820 631 if perf_db_export_calls: 821 632 remove_output_file(call_file) 633 + remove_output_file(ptwrite_file) 634 + remove_output_file(cbr_file) 635 + remove_output_file(mwait_file) 636 + remove_output_file(pwre_file) 637 + remove_output_file(exstop_file) 638 + remove_output_file(pwrx_file) 822 639 os.rmdir(output_dir_name) 823 640 printdate("Adding primary keys") 824 641 do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') ··· 840 645 do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)') 841 646 if perf_db_export_calls: 842 647 do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') 648 + do_query(query, 'ALTER TABLE ptwrite ADD PRIMARY KEY (id)') 649 + do_query(query, 'ALTER TABLE cbr ADD PRIMARY KEY (id)') 650 + do_query(query, 'ALTER TABLE mwait ADD PRIMARY KEY (id)') 651 + do_query(query, 'ALTER TABLE pwre ADD PRIMARY KEY (id)') 652 + do_query(query, 'ALTER TABLE exstop ADD PRIMARY KEY (id)') 653 + do_query(query, 'ALTER TABLE pwrx ADD PRIMARY KEY (id)') 843 654 844 655 printdate("Adding foreign keys") 845 656 do_query(query, 'ALTER TABLE threads ' ··· 881 680 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)') 882 681 do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') 883 682 do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') 683 + do_query(query, 'ALTER TABLE ptwrite ' 684 + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') 685 + do_query(query, 'ALTER TABLE cbr ' 686 + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') 687 + do_query(query, 'ALTER TABLE mwait ' 688 + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') 689 + do_query(query, 'ALTER TABLE pwre ' 690 + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') 691 + do_query(query, 'ALTER TABLE exstop ' 692 + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') 693 + do_query(query, 'ALTER TABLE pwrx ' 694 + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') 695 + 696 + printdate("Dropping unused tables") 697 + if is_table_empty("ptwrite"): 698 + drop("ptwrite") 699 + if is_table_empty("mwait") and is_table_empty("pwre") and is_table_empty("exstop") and is_table_empty("pwrx"): 700 + drop("mwait") 701 + drop("pwre") 702 + drop("exstop") 703 + drop("pwrx") 704 + do_query(query, 'DROP VIEW power_events_view'); 705 + if is_table_empty("cbr"): 706 + drop("cbr") 884 707 885 708 if (unhandled_count): 886 709 printdate("Warning: ", unhandled_count, " unhandled events") ··· 972 747 value = struct.pack(fmt, 2, 4, branch_type, n, name) 973 748 branch_type_file.write(value) 974 749 975 - def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x): 750 + def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, insn_cnt, cyc_cnt, *x): 976 751 if branches: 977 - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id) 752 + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiqiqiq", 20, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt) 978 753 else: 979 - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id) 754 + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiqiqiq", 24, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt) 980 755 sample_file.write(value) 981 756 982 757 def call_path_table(cp_id, parent_id, symbol_id, ip, *x): ··· 984 759 value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip) 985 760 call_path_file.write(value) 986 761 987 - def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, *x): 988 - fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiq" 989 - value = struct.pack(fmt, 12, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id) 762 + def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, insn_cnt, cyc_cnt, *x): 763 + fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiqiqiq" 764 + value = struct.pack(fmt, 14, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id, 8, insn_cnt, 8, cyc_cnt) 990 765 call_file.write(value) 766 + 767 + def ptwrite(id, raw_buf): 768 + data = struct.unpack_from("<IQ", raw_buf) 769 + flags = data[0] 770 + payload = data[1] 771 + exact_ip = flags & 1 772 + value = struct.pack("!hiqiqiB", 3, 8, id, 8, payload, 1, exact_ip) 773 + ptwrite_file.write(value) 774 + 775 + def cbr(id, raw_buf): 776 + data = struct.unpack_from("<BBBBII", raw_buf) 777 + cbr = data[0] 778 + MHz = (data[4] + 500) / 1000 779 + percent = ((cbr * 1000 / data[2]) + 5) / 10 780 + value = struct.pack("!hiqiiiiii", 4, 8, id, 4, cbr, 4, MHz, 4, percent) 781 + cbr_file.write(value) 782 + 783 + def mwait(id, raw_buf): 784 + data = struct.unpack_from("<IQ", raw_buf) 785 + payload = data[1] 786 + hints = payload & 0xff 787 + extensions = (payload >> 32) & 0x3 788 + value = struct.pack("!hiqiiii", 3, 8, id, 4, hints, 4, extensions) 789 + mwait_file.write(value) 790 + 791 + def pwre(id, raw_buf): 792 + data = struct.unpack_from("<IQ", raw_buf) 793 + payload = data[1] 794 + hw = (payload >> 7) & 1 795 + cstate = (payload >> 12) & 0xf 796 + subcstate = (payload >> 8) & 0xf 797 + value = struct.pack("!hiqiiiiiB", 4, 8, id, 4, cstate, 4, subcstate, 1, hw) 798 + pwre_file.write(value) 799 + 800 + def exstop(id, raw_buf): 801 + data = struct.unpack_from("<I", raw_buf) 802 + flags = data[0] 803 + exact_ip = flags & 1 804 + value = struct.pack("!hiqiB", 2, 8, id, 1, exact_ip) 805 + exstop_file.write(value) 806 + 807 + def pwrx(id, raw_buf): 808 + data = struct.unpack_from("<IQ", raw_buf) 809 + payload = data[1] 810 + deepest_cstate = payload & 0xf 811 + last_cstate = (payload >> 4) & 0xf 812 + wake_reason = (payload >> 8) & 0xf 813 + value = struct.pack("!hiqiiiiii", 4, 8, id, 4, deepest_cstate, 4, last_cstate, 4, wake_reason) 814 + pwrx_file.write(value) 815 + 816 + def synth_data(id, config, raw_buf, *x): 817 + if config == 0: 818 + ptwrite(id, raw_buf) 819 + elif config == 1: 820 + mwait(id, raw_buf) 821 + elif config == 2: 822 + pwre(id, raw_buf) 823 + elif config == 3: 824 + exstop(id, raw_buf) 825 + elif config == 4: 826 + pwrx(id, raw_buf) 827 + elif config == 5: 828 + cbr(id, raw_buf)
+301 -18
tools/perf/scripts/python/export-to-sqlite.py
··· 21 21 # provides LGPL-licensed Python bindings for Qt. You will also need the package 22 22 # libqt4-sql-sqlite for Qt sqlite3 support. 23 23 # 24 + # Examples of installing pyside: 25 + # 26 + # ubuntu: 27 + # 28 + # $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql 29 + # 30 + # Alternately, to use Python3 and/or pyside 2, one of the following: 31 + # 32 + # $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql 33 + # $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql 34 + # $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql 35 + # fedora: 36 + # 37 + # $ sudo yum install python-pyside 38 + # 39 + # Alternately, to use Python3 and/or pyside 2, one of the following: 40 + # $ sudo yum install python3-pyside 41 + # $ pip install --user PySide2 42 + # $ pip3 install --user PySide2 43 + # 24 44 # An example of using this script with Intel PT: 25 45 # 26 46 # $ perf record -e intel_pt//u ls ··· 69 49 # difference is the 'transaction' column of the 'samples' table which is 70 50 # renamed 'transaction_' in sqlite because 'transaction' is a reserved word. 71 51 72 - from PySide.QtSql import * 52 + pyside_version_1 = True 53 + if not "pyside-version-1" in sys.argv: 54 + try: 55 + from PySide2.QtSql import * 56 + pyside_version_1 = False 57 + except: 58 + pass 59 + 60 + if pyside_version_1: 61 + from PySide.QtSql import * 73 62 74 63 sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 75 64 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') ··· 98 69 print(datetime.datetime.today(), *args, sep=' ', **kw_args) 99 70 100 71 def usage(): 101 - printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]"); 102 - printerr("where: columns 'all' or 'branches'"); 103 - printerr(" calls 'calls' => create calls and call_paths table"); 104 - printerr(" callchains 'callchains' => create call_paths table"); 105 - raise Exception("Too few arguments") 72 + printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>] [<pyside-version-1>]"); 73 + printerr("where: columns 'all' or 'branches'"); 74 + printerr(" calls 'calls' => create calls and call_paths table"); 75 + printerr(" callchains 'callchains' => create call_paths table"); 76 + printerr(" pyside-version-1 'pyside-version-1' => use pyside version 1"); 77 + raise Exception("Too few or bad arguments") 106 78 107 79 if (len(sys.argv) < 2): 108 80 usage() ··· 125 95 perf_db_export_calls = True 126 96 elif (sys.argv[i] == "callchains"): 127 97 perf_db_export_callchains = True 98 + elif (sys.argv[i] == "pyside-version-1"): 99 + pass 128 100 else: 129 101 usage() 130 102 ··· 218 186 'to_ip bigint,' 219 187 'branch_type integer,' 220 188 'in_tx boolean,' 221 - 'call_path_id bigint)') 189 + 'call_path_id bigint,' 190 + 'insn_count bigint,' 191 + 'cyc_count bigint)') 222 192 else: 223 193 do_query(query, 'CREATE TABLE samples (' 224 194 'id integer NOT NULL PRIMARY KEY,' ··· 244 210 'data_src bigint,' 245 211 'branch_type integer,' 246 212 'in_tx boolean,' 247 - 'call_path_id bigint)') 213 + 'call_path_id bigint,' 214 + 'insn_count bigint,' 215 + 'cyc_count bigint)') 248 216 249 217 if perf_db_export_calls or perf_db_export_callchains: 250 218 do_query(query, 'CREATE TABLE call_paths (' ··· 267 231 'return_id bigint,' 268 232 'parent_call_path_id bigint,' 269 233 'flags integer,' 270 - 'parent_id bigint)') 234 + 'parent_id bigint,' 235 + 'insn_count bigint,' 236 + 'cyc_count bigint)') 237 + 238 + do_query(query, 'CREATE TABLE ptwrite (' 239 + 'id integer NOT NULL PRIMARY KEY,' 240 + 'payload bigint,' 241 + 'exact_ip integer)') 242 + 243 + do_query(query, 'CREATE TABLE cbr (' 244 + 'id integer NOT NULL PRIMARY KEY,' 245 + 'cbr integer,' 246 + 'mhz integer,' 247 + 'percent integer)') 248 + 249 + do_query(query, 'CREATE TABLE mwait (' 250 + 'id integer NOT NULL PRIMARY KEY,' 251 + 'hints integer,' 252 + 'extensions integer)') 253 + 254 + do_query(query, 'CREATE TABLE pwre (' 255 + 'id integer NOT NULL PRIMARY KEY,' 256 + 'cstate integer,' 257 + 'subcstate integer,' 258 + 'hw integer)') 259 + 260 + do_query(query, 'CREATE TABLE exstop (' 261 + 'id integer NOT NULL PRIMARY KEY,' 262 + 'exact_ip integer)') 263 + 264 + do_query(query, 'CREATE TABLE pwrx (' 265 + 'id integer NOT NULL PRIMARY KEY,' 266 + 'deepest_cstate integer,' 267 + 'last_cstate integer,' 268 + 'wake_reason integer)') 271 269 272 270 # printf was added to sqlite in version 3.8.3 273 271 sqlite_has_printf = False ··· 397 327 'return_time,' 398 328 'return_time - call_time AS elapsed_time,' 399 329 'branch_count,' 330 + 'insn_count,' 331 + 'cyc_count,' 332 + 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC,' 400 333 'call_id,' 401 334 'return_id,' 402 335 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' ··· 425 352 'to_sym_offset,' 426 353 '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' 427 354 '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' 428 - 'in_tx' 355 + 'in_tx,' 356 + 'insn_count,' 357 + 'cyc_count,' 358 + 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC' 429 359 ' FROM samples') 360 + 361 + do_query(query, 'CREATE VIEW ptwrite_view AS ' 362 + 'SELECT ' 363 + 'ptwrite.id,' 364 + 'time,' 365 + 'cpu,' 366 + + emit_to_hex('payload') + ' AS payload_hex,' 367 + 'CASE WHEN exact_ip=0 THEN \'False\' ELSE \'True\' END AS exact_ip' 368 + ' FROM ptwrite' 369 + ' INNER JOIN samples ON samples.id = ptwrite.id') 370 + 371 + do_query(query, 'CREATE VIEW cbr_view AS ' 372 + 'SELECT ' 373 + 'cbr.id,' 374 + 'time,' 375 + 'cpu,' 376 + 'cbr,' 377 + 'mhz,' 378 + 'percent' 379 + ' FROM cbr' 380 + ' INNER JOIN samples ON samples.id = cbr.id') 381 + 382 + do_query(query, 'CREATE VIEW mwait_view AS ' 383 + 'SELECT ' 384 + 'mwait.id,' 385 + 'time,' 386 + 'cpu,' 387 + + emit_to_hex('hints') + ' AS hints_hex,' 388 + + emit_to_hex('extensions') + ' AS extensions_hex' 389 + ' FROM mwait' 390 + ' INNER JOIN samples ON samples.id = mwait.id') 391 + 392 + do_query(query, 'CREATE VIEW pwre_view AS ' 393 + 'SELECT ' 394 + 'pwre.id,' 395 + 'time,' 396 + 'cpu,' 397 + 'cstate,' 398 + 'subcstate,' 399 + 'CASE WHEN hw=0 THEN \'False\' ELSE \'True\' END AS hw' 400 + ' FROM pwre' 401 + ' INNER JOIN samples ON samples.id = pwre.id') 402 + 403 + do_query(query, 'CREATE VIEW exstop_view AS ' 404 + 'SELECT ' 405 + 'exstop.id,' 406 + 'time,' 407 + 'cpu,' 408 + 'CASE WHEN exact_ip=0 THEN \'False\' ELSE \'True\' END AS exact_ip' 409 + ' FROM exstop' 410 + ' INNER JOIN samples ON samples.id = exstop.id') 411 + 412 + do_query(query, 'CREATE VIEW pwrx_view AS ' 413 + 'SELECT ' 414 + 'pwrx.id,' 415 + 'time,' 416 + 'cpu,' 417 + 'deepest_cstate,' 418 + 'last_cstate,' 419 + 'CASE WHEN wake_reason=1 THEN \'Interrupt\'' 420 + ' WHEN wake_reason=2 THEN \'Timer Deadline\'' 421 + ' WHEN wake_reason=4 THEN \'Monitored Address\'' 422 + ' WHEN wake_reason=8 THEN \'HW\'' 423 + ' ELSE wake_reason ' 424 + 'END AS wake_reason' 425 + ' FROM pwrx' 426 + ' INNER JOIN samples ON samples.id = pwrx.id') 427 + 428 + do_query(query, 'CREATE VIEW power_events_view AS ' 429 + 'SELECT ' 430 + 'samples.id,' 431 + 'time,' 432 + 'cpu,' 433 + 'selected_events.name AS event,' 434 + 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT cbr FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS cbr,' 435 + 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT mhz FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS mhz,' 436 + 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT percent FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS percent,' 437 + 'CASE WHEN selected_events.name=\'mwait\' THEN (SELECT ' + emit_to_hex('hints') + ' FROM mwait WHERE mwait.id = samples.id) ELSE "" END AS hints_hex,' 438 + 'CASE WHEN selected_events.name=\'mwait\' THEN (SELECT ' + emit_to_hex('extensions') + ' FROM mwait WHERE mwait.id = samples.id) ELSE "" END AS extensions_hex,' 439 + 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT cstate FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS cstate,' 440 + 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT subcstate FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS subcstate,' 441 + 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT hw FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS hw,' 442 + 'CASE WHEN selected_events.name=\'exstop\' THEN (SELECT exact_ip FROM exstop WHERE exstop.id = samples.id) ELSE "" END AS exact_ip,' 443 + 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT deepest_cstate FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS deepest_cstate,' 444 + 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT last_cstate FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS last_cstate,' 445 + 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT ' 446 + 'CASE WHEN wake_reason=1 THEN \'Interrupt\'' 447 + ' WHEN wake_reason=2 THEN \'Timer Deadline\'' 448 + ' WHEN wake_reason=4 THEN \'Monitored Address\'' 449 + ' WHEN wake_reason=8 THEN \'HW\'' 450 + ' ELSE wake_reason ' 451 + 'END' 452 + ' FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS wake_reason' 453 + ' FROM samples' 454 + ' INNER JOIN selected_events ON selected_events.id = evsel_id' 455 + ' WHERE selected_events.name IN (\'cbr\',\'mwait\',\'exstop\',\'pwre\',\'pwrx\')') 430 456 431 457 do_query(query, 'END TRANSACTION') 432 458 ··· 547 375 branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)") 548 376 sample_query = QSqlQuery(db) 549 377 if branches: 550 - sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 378 + sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 551 379 else: 552 - sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 380 + sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 553 381 if perf_db_export_calls or perf_db_export_callchains: 554 382 call_path_query = QSqlQuery(db) 555 383 call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)") 556 384 if perf_db_export_calls: 557 385 call_query = QSqlQuery(db) 558 - call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 386 + call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 387 + ptwrite_query = QSqlQuery(db) 388 + ptwrite_query.prepare("INSERT INTO ptwrite VALUES (?, ?, ?)") 389 + cbr_query = QSqlQuery(db) 390 + cbr_query.prepare("INSERT INTO cbr VALUES (?, ?, ?, ?)") 391 + mwait_query = QSqlQuery(db) 392 + mwait_query.prepare("INSERT INTO mwait VALUES (?, ?, ?)") 393 + pwre_query = QSqlQuery(db) 394 + pwre_query.prepare("INSERT INTO pwre VALUES (?, ?, ?, ?)") 395 + exstop_query = QSqlQuery(db) 396 + exstop_query.prepare("INSERT INTO exstop VALUES (?, ?)") 397 + pwrx_query = QSqlQuery(db) 398 + pwrx_query.prepare("INSERT INTO pwrx VALUES (?, ?, ?, ?)") 559 399 560 400 def trace_begin(): 561 401 printdate("Writing records...") ··· 579 395 comm_table(0, "unknown") 580 396 dso_table(0, 0, "unknown", "unknown", "") 581 397 symbol_table(0, 0, 0, 0, 0, "unknown") 582 - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 398 + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 583 399 if perf_db_export_calls or perf_db_export_callchains: 584 400 call_path_table(0, 0, 0, 0) 585 - call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 401 + call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 586 402 587 403 unhandled_count = 0 404 + 405 + def is_table_empty(table_name): 406 + do_query(query, 'SELECT * FROM ' + table_name + ' LIMIT 1'); 407 + if query.next(): 408 + return False 409 + return True 410 + 411 + def drop(table_name): 412 + do_query(query, 'DROP VIEW ' + table_name + '_view'); 413 + do_query(query, 'DROP TABLE ' + table_name); 588 414 589 415 def trace_end(): 590 416 do_query(query, 'END TRANSACTION') ··· 603 409 if perf_db_export_calls: 604 410 do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') 605 411 do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') 412 + 413 + printdate("Dropping unused tables") 414 + if is_table_empty("ptwrite"): 415 + drop("ptwrite") 416 + if is_table_empty("mwait") and is_table_empty("pwre") and is_table_empty("exstop") and is_table_empty("pwrx"): 417 + drop("mwait") 418 + drop("pwre") 419 + drop("exstop") 420 + drop("pwrx") 421 + do_query(query, 'DROP VIEW power_events_view'); 422 + if is_table_empty("cbr"): 423 + drop("cbr") 606 424 607 425 if (unhandled_count): 608 426 printdate("Warning: ", unhandled_count, " unhandled events") ··· 660 454 if branches: 661 455 for xx in x[0:15]: 662 456 sample_query.addBindValue(str(xx)) 663 - for xx in x[19:22]: 457 + for xx in x[19:24]: 664 458 sample_query.addBindValue(str(xx)) 665 459 do_query_(sample_query) 666 460 else: 667 - bind_exec(sample_query, 22, x) 461 + bind_exec(sample_query, 24, x) 668 462 669 463 def call_path_table(*x): 670 464 bind_exec(call_path_query, 4, x) 671 465 672 466 def call_return_table(*x): 673 - bind_exec(call_query, 12, x) 467 + bind_exec(call_query, 14, x) 468 + 469 + def ptwrite(id, raw_buf): 470 + data = struct.unpack_from("<IQ", raw_buf) 471 + flags = data[0] 472 + payload = data[1] 473 + exact_ip = flags & 1 474 + ptwrite_query.addBindValue(str(id)) 475 + ptwrite_query.addBindValue(str(payload)) 476 + ptwrite_query.addBindValue(str(exact_ip)) 477 + do_query_(ptwrite_query) 478 + 479 + def cbr(id, raw_buf): 480 + data = struct.unpack_from("<BBBBII", raw_buf) 481 + cbr = data[0] 482 + MHz = (data[4] + 500) / 1000 483 + percent = ((cbr * 1000 / data[2]) + 5) / 10 484 + cbr_query.addBindValue(str(id)) 485 + cbr_query.addBindValue(str(cbr)) 486 + cbr_query.addBindValue(str(MHz)) 487 + cbr_query.addBindValue(str(percent)) 488 + do_query_(cbr_query) 489 + 490 + def mwait(id, raw_buf): 491 + data = struct.unpack_from("<IQ", raw_buf) 492 + payload = data[1] 493 + hints = payload & 0xff 494 + extensions = (payload >> 32) & 0x3 495 + mwait_query.addBindValue(str(id)) 496 + mwait_query.addBindValue(str(hints)) 497 + mwait_query.addBindValue(str(extensions)) 498 + do_query_(mwait_query) 499 + 500 + def pwre(id, raw_buf): 501 + data = struct.unpack_from("<IQ", raw_buf) 502 + payload = data[1] 503 + hw = (payload >> 7) & 1 504 + cstate = (payload >> 12) & 0xf 505 + subcstate = (payload >> 8) & 0xf 506 + pwre_query.addBindValue(str(id)) 507 + pwre_query.addBindValue(str(cstate)) 508 + pwre_query.addBindValue(str(subcstate)) 509 + pwre_query.addBindValue(str(hw)) 510 + do_query_(pwre_query) 511 + 512 + def exstop(id, raw_buf): 513 + data = struct.unpack_from("<I", raw_buf) 514 + flags = data[0] 515 + exact_ip = flags & 1 516 + exstop_query.addBindValue(str(id)) 517 + exstop_query.addBindValue(str(exact_ip)) 518 + do_query_(exstop_query) 519 + 520 + def pwrx(id, raw_buf): 521 + data = struct.unpack_from("<IQ", raw_buf) 522 + payload = data[1] 523 + deepest_cstate = payload & 0xf 524 + last_cstate = (payload >> 4) & 0xf 525 + wake_reason = (payload >> 8) & 0xf 526 + pwrx_query.addBindValue(str(id)) 527 + pwrx_query.addBindValue(str(deepest_cstate)) 528 + pwrx_query.addBindValue(str(last_cstate)) 529 + pwrx_query.addBindValue(str(wake_reason)) 530 + do_query_(pwrx_query) 531 + 532 + def synth_data(id, config, raw_buf, *x): 533 + if config == 0: 534 + ptwrite(id, raw_buf) 535 + elif config == 1: 536 + mwait(id, raw_buf) 537 + elif config == 2: 538 + pwre(id, raw_buf) 539 + elif config == 3: 540 + exstop(id, raw_buf) 541 + elif config == 4: 542 + pwrx(id, raw_buf) 543 + elif config == 5: 544 + cbr(id, raw_buf)
+263 -80
tools/perf/scripts/python/exported-sql-viewer.py
··· 1 - #!/usr/bin/env python2 1 + #!/usr/bin/env python 2 2 # SPDX-License-Identifier: GPL-2.0 3 3 # exported-sql-viewer.py: view data from sql database 4 4 # Copyright (c) 2014-2018, Intel Corporation. ··· 91 91 from __future__ import print_function 92 92 93 93 import sys 94 + import argparse 94 95 import weakref 95 96 import threading 96 97 import string ··· 105 104 glb_nsz = 16 106 105 import re 107 106 import os 108 - from PySide.QtCore import * 109 - from PySide.QtGui import * 110 - from PySide.QtSql import * 107 + 111 108 pyside_version_1 = True 109 + if not "--pyside-version-1" in sys.argv: 110 + try: 111 + from PySide2.QtCore import * 112 + from PySide2.QtGui import * 113 + from PySide2.QtSql import * 114 + from PySide2.QtWidgets import * 115 + pyside_version_1 = False 116 + except: 117 + pass 118 + 119 + if pyside_version_1: 120 + from PySide.QtCore import * 121 + from PySide.QtGui import * 122 + from PySide.QtSql import * 123 + 112 124 from decimal import * 113 125 from ctypes import * 114 126 from multiprocessing import Process, Array, Value, Event ··· 200 186 201 187 class TreeModel(QAbstractItemModel): 202 188 203 - def __init__(self, glb, parent=None): 189 + def __init__(self, glb, params, parent=None): 204 190 super(TreeModel, self).__init__(parent) 205 191 self.glb = glb 192 + self.params = params 206 193 self.root = self.GetRoot() 207 194 self.last_row_read = 0 208 195 ··· 400 385 401 386 def Activate(self): 402 387 self.bar.show() 388 + self.textbox.lineEdit().selectAll() 403 389 self.textbox.setFocus() 404 390 405 391 def Deactivate(self): ··· 465 449 466 450 class CallGraphLevelItemBase(object): 467 451 468 - def __init__(self, glb, row, parent_item): 452 + def __init__(self, glb, params, row, parent_item): 469 453 self.glb = glb 454 + self.params = params 470 455 self.row = row 471 456 self.parent_item = parent_item 472 457 self.query_done = False; ··· 506 489 507 490 class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): 508 491 509 - def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): 510 - super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) 492 + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): 493 + super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) 511 494 self.comm_id = comm_id 512 495 self.thread_id = thread_id 513 496 self.call_path_id = call_path_id 497 + self.insn_cnt = insn_cnt 498 + self.cyc_cnt = cyc_cnt 514 499 self.branch_count = branch_count 515 500 self.time = time 516 501 517 502 def Select(self): 518 503 self.query_done = True; 519 504 query = QSqlQuery(self.glb.db) 520 - QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" 505 + if self.params.have_ipc: 506 + ipc_str = ", SUM(insn_count), SUM(cyc_count)" 507 + else: 508 + ipc_str = "" 509 + QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time)" + ipc_str + ", SUM(branch_count)" 521 510 " FROM calls" 522 511 " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" 523 512 " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" ··· 534 511 " GROUP BY call_path_id, name, short_name" 535 512 " ORDER BY call_path_id") 536 513 while query.next(): 537 - child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) 514 + if self.params.have_ipc: 515 + insn_cnt = int(query.value(5)) 516 + cyc_cnt = int(query.value(6)) 517 + branch_count = int(query.value(7)) 518 + else: 519 + insn_cnt = 0 520 + cyc_cnt = 0 521 + branch_count = int(query.value(5)) 522 + child_item = CallGraphLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self) 538 523 self.child_items.append(child_item) 539 524 self.child_count += 1 540 525 ··· 550 519 551 520 class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): 552 521 553 - def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): 554 - super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) 522 + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): 523 + super(CallGraphLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) 555 524 dso = dsoname(dso) 556 - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] 525 + if self.params.have_ipc: 526 + insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) 527 + cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) 528 + br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) 529 + ipc = CalcIPC(cyc_cnt, insn_cnt) 530 + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] 531 + else: 532 + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] 557 533 self.dbid = call_path_id 558 534 559 535 # Context-sensitive call graph data model level two item 560 536 561 537 class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): 562 538 563 - def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): 564 - super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) 565 - self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] 539 + def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): 540 + super(CallGraphLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 1, 0, 0, 0, 0, parent_item) 541 + if self.params.have_ipc: 542 + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] 543 + else: 544 + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] 566 545 self.dbid = thread_id 567 546 568 547 def Select(self): 569 548 super(CallGraphLevelTwoItem, self).Select() 570 549 for child_item in self.child_items: 571 550 self.time += child_item.time 551 + self.insn_cnt += child_item.insn_cnt 552 + self.cyc_cnt += child_item.cyc_cnt 572 553 self.branch_count += child_item.branch_count 573 554 for child_item in self.child_items: 574 555 child_item.data[4] = PercentToOneDP(child_item.time, self.time) 575 - child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) 556 + if self.params.have_ipc: 557 + child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt) 558 + child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt) 559 + child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count) 560 + else: 561 + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) 576 562 577 563 # Context-sensitive call graph data model level one item 578 564 579 565 class CallGraphLevelOneItem(CallGraphLevelItemBase): 580 566 581 - def __init__(self, glb, row, comm_id, comm, parent_item): 582 - super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) 583 - self.data = [comm, "", "", "", "", "", ""] 567 + def __init__(self, glb, params, row, comm_id, comm, parent_item): 568 + super(CallGraphLevelOneItem, self).__init__(glb, params, row, parent_item) 569 + if self.params.have_ipc: 570 + self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""] 571 + else: 572 + self.data = [comm, "", "", "", "", "", ""] 584 573 self.dbid = comm_id 585 574 586 575 def Select(self): ··· 611 560 " INNER JOIN threads ON thread_id = threads.id" 612 561 " WHERE comm_id = " + str(self.dbid)) 613 562 while query.next(): 614 - child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) 563 + child_item = CallGraphLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) 615 564 self.child_items.append(child_item) 616 565 self.child_count += 1 617 566 ··· 619 568 620 569 class CallGraphRootItem(CallGraphLevelItemBase): 621 570 622 - def __init__(self, glb): 623 - super(CallGraphRootItem, self).__init__(glb, 0, None) 571 + def __init__(self, glb, params): 572 + super(CallGraphRootItem, self).__init__(glb, params, 0, None) 624 573 self.dbid = 0 625 574 self.query_done = True; 626 575 query = QSqlQuery(glb.db) ··· 628 577 while query.next(): 629 578 if not query.value(0): 630 579 continue 631 - child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) 580 + child_item = CallGraphLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self) 632 581 self.child_items.append(child_item) 633 582 self.child_count += 1 583 + 584 + # Call graph model parameters 585 + 586 + class CallGraphModelParams(): 587 + 588 + def __init__(self, glb, parent=None): 589 + self.have_ipc = IsSelectable(glb.db, "calls", columns = "insn_count, cyc_count") 634 590 635 591 # Context-sensitive call graph data model base 636 592 637 593 class CallGraphModelBase(TreeModel): 638 594 639 595 def __init__(self, glb, parent=None): 640 - super(CallGraphModelBase, self).__init__(glb, parent) 596 + super(CallGraphModelBase, self).__init__(glb, CallGraphModelParams(glb), parent) 641 597 642 598 def FindSelect(self, value, pattern, query): 643 599 if pattern: ··· 726 668 super(CallGraphModel, self).__init__(glb, parent) 727 669 728 670 def GetRoot(self): 729 - return CallGraphRootItem(self.glb) 671 + return CallGraphRootItem(self.glb, self.params) 730 672 731 673 def columnCount(self, parent=None): 732 - return 7 674 + if self.params.have_ipc: 675 + return 12 676 + else: 677 + return 7 733 678 734 679 def columnHeader(self, column): 735 - headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] 680 + if self.params.have_ipc: 681 + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "] 682 + else: 683 + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] 736 684 return headers[column] 737 685 738 686 def columnAlignment(self, column): 739 - alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] 687 + if self.params.have_ipc: 688 + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] 689 + else: 690 + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] 740 691 return alignment[column] 741 692 742 693 def DoFindSelect(self, query, match): ··· 782 715 783 716 class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): 784 717 785 - def __init__(self, glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item): 786 - super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) 718 + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): 719 + super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) 787 720 self.comm_id = comm_id 788 721 self.thread_id = thread_id 789 722 self.calls_id = calls_id 723 + self.insn_cnt = insn_cnt 724 + self.cyc_cnt = cyc_cnt 790 725 self.branch_count = branch_count 791 726 self.time = time 792 727 ··· 798 729 comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id) 799 730 else: 800 731 comm_thread = "" 732 + if self.params.have_ipc: 733 + ipc_str = ", insn_count, cyc_count" 734 + else: 735 + ipc_str = "" 801 736 query = QSqlQuery(self.glb.db) 802 - QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time, branch_count" 737 + QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time" + ipc_str + ", branch_count" 803 738 " FROM calls" 804 739 " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" 805 740 " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" ··· 811 738 " WHERE calls.parent_id = " + str(self.calls_id) + comm_thread + 812 739 " ORDER BY call_time, calls.id") 813 740 while query.next(): 814 - child_item = CallTreeLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) 741 + if self.params.have_ipc: 742 + insn_cnt = int(query.value(5)) 743 + cyc_cnt = int(query.value(6)) 744 + branch_count = int(query.value(7)) 745 + else: 746 + insn_cnt = 0 747 + cyc_cnt = 0 748 + branch_count = int(query.value(5)) 749 + child_item = CallTreeLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self) 815 750 self.child_items.append(child_item) 816 751 self.child_count += 1 817 752 ··· 827 746 828 747 class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): 829 748 830 - def __init__(self, glb, row, comm_id, thread_id, calls_id, name, dso, count, time, branch_count, parent_item): 831 - super(CallTreeLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item) 749 + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): 750 + super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) 832 751 dso = dsoname(dso) 833 - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] 752 + if self.params.have_ipc: 753 + insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) 754 + cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) 755 + br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) 756 + ipc = CalcIPC(cyc_cnt, insn_cnt) 757 + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] 758 + else: 759 + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] 834 760 self.dbid = calls_id 835 761 836 762 # Call tree data model level two item 837 763 838 764 class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): 839 765 840 - def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): 841 - super(CallTreeLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 0, 0, 0, parent_item) 842 - self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] 766 + def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): 767 + super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item) 768 + if self.params.have_ipc: 769 + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] 770 + else: 771 + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] 843 772 self.dbid = thread_id 844 773 845 774 def Select(self): 846 775 super(CallTreeLevelTwoItem, self).Select() 847 776 for child_item in self.child_items: 848 777 self.time += child_item.time 778 + self.insn_cnt += child_item.insn_cnt 779 + self.cyc_cnt += child_item.cyc_cnt 849 780 self.branch_count += child_item.branch_count 850 781 for child_item in self.child_items: 851 782 child_item.data[4] = PercentToOneDP(child_item.time, self.time) 852 - child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) 783 + if self.params.have_ipc: 784 + child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt) 785 + child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt) 786 + child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count) 787 + else: 788 + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) 853 789 854 790 # Call tree data model level one item 855 791 856 792 class CallTreeLevelOneItem(CallGraphLevelItemBase): 857 793 858 - def __init__(self, glb, row, comm_id, comm, parent_item): 859 - super(CallTreeLevelOneItem, self).__init__(glb, row, parent_item) 860 - self.data = [comm, "", "", "", "", "", ""] 794 + def __init__(self, glb, params, row, comm_id, comm, parent_item): 795 + super(CallTreeLevelOneItem, self).__init__(glb, params, row, parent_item) 796 + if self.params.have_ipc: 797 + self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""] 798 + else: 799 + self.data = [comm, "", "", "", "", "", ""] 861 800 self.dbid = comm_id 862 801 863 802 def Select(self): ··· 888 787 " INNER JOIN threads ON thread_id = threads.id" 889 788 " WHERE comm_id = " + str(self.dbid)) 890 789 while query.next(): 891 - child_item = CallTreeLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) 790 + child_item = CallTreeLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) 892 791 self.child_items.append(child_item) 893 792 self.child_count += 1 894 793 ··· 896 795 897 796 class CallTreeRootItem(CallGraphLevelItemBase): 898 797 899 - def __init__(self, glb): 900 - super(CallTreeRootItem, self).__init__(glb, 0, None) 798 + def __init__(self, glb, params): 799 + super(CallTreeRootItem, self).__init__(glb, params, 0, None) 901 800 self.dbid = 0 902 801 self.query_done = True; 903 802 query = QSqlQuery(glb.db) ··· 905 804 while query.next(): 906 805 if not query.value(0): 907 806 continue 908 - child_item = CallTreeLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) 807 + child_item = CallTreeLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self) 909 808 self.child_items.append(child_item) 910 809 self.child_count += 1 911 810 ··· 917 816 super(CallTreeModel, self).__init__(glb, parent) 918 817 919 818 def GetRoot(self): 920 - return CallTreeRootItem(self.glb) 819 + return CallTreeRootItem(self.glb, self.params) 921 820 922 821 def columnCount(self, parent=None): 923 - return 7 822 + if self.params.have_ipc: 823 + return 12 824 + else: 825 + return 7 924 826 925 827 def columnHeader(self, column): 926 - headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] 828 + if self.params.have_ipc: 829 + headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "] 830 + else: 831 + headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] 927 832 return headers[column] 928 833 929 834 def columnAlignment(self, column): 930 - alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] 835 + if self.params.have_ipc: 836 + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] 837 + else: 838 + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] 931 839 return alignment[column] 932 840 933 841 def DoFindSelect(self, query, match): ··· 1465 1355 1466 1356 class BranchLevelTwoItem(): 1467 1357 1468 - def __init__(self, row, text, parent_item): 1358 + def __init__(self, row, col, text, parent_item): 1469 1359 self.row = row 1470 1360 self.parent_item = parent_item 1471 - self.data = [""] * 8 1472 - self.data[7] = text 1361 + self.data = [""] * (col + 1) 1362 + self.data[col] = text 1473 1363 self.level = 2 1474 1364 1475 1365 def getParentItem(self): ··· 1501 1391 self.dbid = data[0] 1502 1392 self.level = 1 1503 1393 self.query_done = False 1394 + self.br_col = len(self.data) - 1 1504 1395 1505 1396 def getChildItem(self, row): 1506 1397 return self.child_items[row] ··· 1582 1471 while k < 15: 1583 1472 byte_str += " " 1584 1473 k += 1 1585 - self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self)) 1474 + self.child_items.append(BranchLevelTwoItem(0, self.br_col, byte_str + " " + text, self)) 1586 1475 self.child_count += 1 1587 1476 else: 1588 1477 return ··· 1633 1522 def getData(self, column): 1634 1523 return "" 1635 1524 1525 + # Calculate instructions per cycle 1526 + 1527 + def CalcIPC(cyc_cnt, insn_cnt): 1528 + if cyc_cnt and insn_cnt: 1529 + ipc = Decimal(float(insn_cnt) / cyc_cnt) 1530 + ipc = str(ipc.quantize(Decimal(".01"), rounding=ROUND_HALF_UP)) 1531 + else: 1532 + ipc = "0" 1533 + return ipc 1534 + 1636 1535 # Branch data preparation 1536 + 1537 + def BranchDataPrepBr(query, data): 1538 + data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + 1539 + " (" + dsoname(query.value(11)) + ")" + " -> " + 1540 + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + 1541 + " (" + dsoname(query.value(15)) + ")") 1542 + 1543 + def BranchDataPrepIPC(query, data): 1544 + insn_cnt = query.value(16) 1545 + cyc_cnt = query.value(17) 1546 + ipc = CalcIPC(cyc_cnt, insn_cnt) 1547 + data.append(insn_cnt) 1548 + data.append(cyc_cnt) 1549 + data.append(ipc) 1637 1550 1638 1551 def BranchDataPrep(query): 1639 1552 data = [] 1640 1553 for i in xrange(0, 8): 1641 1554 data.append(query.value(i)) 1642 - data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + 1643 - " (" + dsoname(query.value(11)) + ")" + " -> " + 1644 - tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + 1645 - " (" + dsoname(query.value(15)) + ")") 1555 + BranchDataPrepBr(query, data) 1646 1556 return data 1647 1557 1648 1558 def BranchDataPrepWA(query): ··· 1673 1541 data.append("{:>19}".format(query.value(1))) 1674 1542 for i in xrange(2, 8): 1675 1543 data.append(query.value(i)) 1676 - data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + 1677 - " (" + dsoname(query.value(11)) + ")" + " -> " + 1678 - tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + 1679 - " (" + dsoname(query.value(15)) + ")") 1544 + BranchDataPrepBr(query, data) 1545 + return data 1546 + 1547 + def BranchDataWithIPCPrep(query): 1548 + data = [] 1549 + for i in xrange(0, 8): 1550 + data.append(query.value(i)) 1551 + BranchDataPrepIPC(query, data) 1552 + BranchDataPrepBr(query, data) 1553 + return data 1554 + 1555 + def BranchDataWithIPCPrepWA(query): 1556 + data = [] 1557 + data.append(query.value(0)) 1558 + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string 1559 + data.append("{:>19}".format(query.value(1))) 1560 + for i in xrange(2, 8): 1561 + data.append(query.value(i)) 1562 + BranchDataPrepIPC(query, data) 1563 + BranchDataPrepBr(query, data) 1680 1564 return data 1681 1565 1682 1566 # Branch data model ··· 1702 1554 progress = Signal(object) 1703 1555 1704 1556 def __init__(self, glb, event_id, where_clause, parent=None): 1705 - super(BranchModel, self).__init__(glb, parent) 1557 + super(BranchModel, self).__init__(glb, None, parent) 1706 1558 self.event_id = event_id 1707 1559 self.more = True 1708 1560 self.populated = 0 1561 + self.have_ipc = IsSelectable(glb.db, "samples", columns = "insn_count, cyc_count") 1562 + if self.have_ipc: 1563 + select_ipc = ", insn_count, cyc_count" 1564 + prep_fn = BranchDataWithIPCPrep 1565 + prep_wa_fn = BranchDataWithIPCPrepWA 1566 + else: 1567 + select_ipc = "" 1568 + prep_fn = BranchDataPrep 1569 + prep_wa_fn = BranchDataPrepWA 1709 1570 sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name," 1710 1571 " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END," 1711 1572 " ip, symbols.name, sym_offset, dsos.short_name," 1712 1573 " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name" 1574 + + select_ipc + 1713 1575 " FROM samples" 1714 1576 " INNER JOIN comms ON comm_id = comms.id" 1715 1577 " INNER JOIN threads ON thread_id = threads.id" ··· 1733 1575 " ORDER BY samples.id" 1734 1576 " LIMIT " + str(glb_chunk_sz)) 1735 1577 if pyside_version_1 and sys.version_info[0] == 3: 1736 - prep = BranchDataPrepWA 1578 + prep = prep_fn 1737 1579 else: 1738 - prep = BranchDataPrep 1580 + prep = prep_wa_fn 1739 1581 self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample) 1740 1582 self.fetcher.done.connect(self.Update) 1741 1583 self.fetcher.Fetch(glb_chunk_sz) ··· 1744 1586 return BranchRootItem() 1745 1587 1746 1588 def columnCount(self, parent=None): 1747 - return 8 1589 + if self.have_ipc: 1590 + return 11 1591 + else: 1592 + return 8 1748 1593 1749 1594 def columnHeader(self, column): 1750 - return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] 1595 + if self.have_ipc: 1596 + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Insn Cnt", "Cyc Cnt", "IPC", "Branch")[column] 1597 + else: 1598 + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] 1751 1599 1752 1600 def columnFont(self, column): 1753 - if column != 7: 1601 + if self.have_ipc: 1602 + br_col = 10 1603 + else: 1604 + br_col = 7 1605 + if column != br_col: 1754 1606 return None 1755 1607 return QFont("Monospace") 1756 1608 ··· 2268 2100 2269 2101 # Is a table selectable 2270 2102 2271 - def IsSelectable(db, table, sql = ""): 2103 + def IsSelectable(db, table, sql = "", columns = "*"): 2272 2104 query = QSqlQuery(db) 2273 2105 try: 2274 - QueryExec(query, "SELECT * FROM " + table + " " + sql + " LIMIT 1") 2106 + QueryExec(query, "SELECT " + columns + " FROM " + table + " " + sql + " LIMIT 1") 2275 2107 except: 2276 2108 return False 2277 2109 return True ··· 2922 2754 action = self.window_menu.addAction(label) 2923 2755 action.setCheckable(True) 2924 2756 action.setChecked(sub_window == self.mdi_area.activeSubWindow()) 2925 - action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x)) 2757 + action.triggered.connect(lambda a=None,x=nr: self.setActiveSubWindow(x)) 2926 2758 self.window_menu.addAction(action) 2927 2759 nr += 1 2928 2760 ··· 3008 2840 sudo ./mfile.py --prefix=/usr/local install 3009 2841 sudo ldconfig 3010 2842 </pre> 2843 + <h3>Instructions per Cycle (IPC)</h3> 2844 + If available, IPC information is displayed in columns 'insn_cnt', 'cyc_cnt' and 'IPC'. 2845 + <p><b>Intel PT note:</b> The information applies to the blocks of code ending with, and including, that branch. 2846 + Due to the granularity of timing information, the number of cycles for some code blocks will not be known. 2847 + In that case, 'insn_cnt', 'cyc_cnt' and 'IPC' are zero, but when 'IPC' is displayed it covers the period 2848 + since the previous displayed 'IPC'. 3011 2849 <h3>Find</h3> 3012 2850 Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. 3013 2851 Refer to Python documentation for the regular expression syntax. ··· 3288 3114 event = event.split(":")[0] 3289 3115 if event == "branches": 3290 3116 label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" 3291 - reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) 3117 + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self)) 3292 3118 label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" 3293 - reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self)) 3119 + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self)) 3294 3120 3295 3121 def TableMenu(self, tables, menu): 3296 3122 table_menu = menu.addMenu("&Tables") 3297 3123 for table in tables: 3298 - table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self)) 3124 + table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda a=None,t=table: self.NewTableView(t), self)) 3299 3125 3300 3126 def NewCallGraph(self): 3301 3127 CallGraphWindow(self.glb, self) ··· 3535 3361 # Main 3536 3362 3537 3363 def Main(): 3538 - if (len(sys.argv) < 2): 3539 - printerr("Usage is: exported-sql-viewer.py {<database name> | --help-only}"); 3540 - raise Exception("Too few arguments") 3364 + usage_str = "exported-sql-viewer.py [--pyside-version-1] <database name>\n" \ 3365 + " or: exported-sql-viewer.py --help-only" 3366 + ap = argparse.ArgumentParser(usage = usage_str, add_help = False) 3367 + ap.add_argument("--pyside-version-1", action='store_true') 3368 + ap.add_argument("dbname", nargs="?") 3369 + ap.add_argument("--help-only", action='store_true') 3370 + args = ap.parse_args() 3541 3371 3542 - dbname = sys.argv[1] 3543 - if dbname == "--help-only": 3372 + if args.help_only: 3544 3373 app = QApplication(sys.argv) 3545 3374 mainwindow = HelpOnlyWindow() 3546 3375 mainwindow.show() 3547 3376 err = app.exec_() 3548 3377 sys.exit(err) 3378 + 3379 + dbname = args.dbname 3380 + if dbname is None: 3381 + ap.print_usage() 3382 + print("Too few arguments") 3383 + sys.exit(1) 3549 3384 3550 3385 is_sqlite3 = False 3551 3386 try:
+4
tools/perf/tests/Build
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + 1 3 perf-y += builtin-test.o 2 4 perf-y += parse-events.o 3 5 perf-y += dso-data.o ··· 52 50 perf-y += clang.o 53 51 perf-y += unit_number__scnprintf.o 54 52 perf-y += mem2node.o 53 + perf-y += map_groups.o 54 + perf-y += time-utils-test.o 55 55 56 56 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build 57 57 $(call rule_mkdir)
+1
tools/perf/tests/bp_account.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 /* 2 3 * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select 3 4 * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+1
tools/perf/tests/bpf-script-example.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 /* 2 3 * bpf-script-example.c 3 4 * Test basic LLVM building
+1
tools/perf/tests/bpf-script-test-kbuild.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 /* 2 3 * bpf-script-test-kbuild.c 3 4 * Test include from kernel header
+1
tools/perf/tests/bpf-script-test-prologue.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 /* 2 3 * bpf-script-test-prologue.c 3 4 * Test BPF prologue
+1
tools/perf/tests/bpf-script-test-relocation.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 /* 2 3 * bpf-script-test-relocation.c 3 4 * Test BPF loader checking relocation
+1
tools/perf/tests/bpf.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 #include <errno.h> 2 3 #include <stdio.h> 3 4 #include <sys/epoll.h>
+10 -1
tools/perf/tests/builtin-test.c
··· 22 22 #include "string2.h" 23 23 #include "symbol.h" 24 24 #include <linux/kernel.h> 25 + #include <linux/string.h> 25 26 #include <subcmd/exec-cmd.h> 26 27 27 28 static bool dont_fork; ··· 291 290 .func = test__mem2node, 292 291 }, 293 292 { 293 + .desc = "time utils", 294 + .func = test__time_utils, 295 + }, 296 + { 297 + .desc = "map_groups__merge_in", 298 + .func = test__map_groups__merge_in, 299 + }, 300 + { 294 301 .func = NULL, 295 302 }, 296 303 }; ··· 439 430 description = fgets(description, size, fp); 440 431 fclose(fp); 441 432 442 - return description ? trim(description + 1) : NULL; 433 + return description ? strim(description + 1) : NULL; 443 434 } 444 435 445 436 #define for_each_shell_test(dir, base, ent) \
+1 -1
tools/perf/tests/code-reading.c
··· 22 22 23 23 #include "tests.h" 24 24 25 - #include "sane_ctype.h" 25 + #include <linux/ctype.h> 26 26 27 27 #define BUFSZ 1024 28 28 #define READLEN 128
+121
tools/perf/tests/map_groups.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/compiler.h> 3 + #include <linux/kernel.h> 4 + #include "tests.h" 5 + #include "map.h" 6 + #include "map_groups.h" 7 + #include "dso.h" 8 + #include "debug.h" 9 + 10 + struct map_def { 11 + const char *name; 12 + u64 start; 13 + u64 end; 14 + }; 15 + 16 + static int check_maps(struct map_def *merged, unsigned int size, struct map_groups *mg) 17 + { 18 + struct map *map; 19 + unsigned int i = 0; 20 + 21 + map = map_groups__first(mg); 22 + while (map) { 23 + TEST_ASSERT_VAL("wrong map start", map->start == merged[i].start); 24 + TEST_ASSERT_VAL("wrong map end", map->end == merged[i].end); 25 + TEST_ASSERT_VAL("wrong map name", !strcmp(map->dso->name, merged[i].name)); 26 + TEST_ASSERT_VAL("wrong map refcnt", refcount_read(&map->refcnt) == 2); 27 + 28 + i++; 29 + map = map_groups__next(map); 30 + 31 + TEST_ASSERT_VAL("less maps expected", (map && i < size) || (!map && i == size)); 32 + } 33 + 34 + return TEST_OK; 35 + } 36 + 37 + int test__map_groups__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused) 38 + { 39 + struct map_groups mg; 40 + unsigned int i; 41 + struct map_def bpf_progs[] = { 42 + { "bpf_prog_1", 200, 300 }, 43 + { "bpf_prog_2", 500, 600 }, 44 + { "bpf_prog_3", 800, 900 }, 45 + }; 46 + struct map_def merged12[] = { 47 + { "kcore1", 100, 200 }, 48 + { "bpf_prog_1", 200, 300 }, 49 + { "kcore1", 300, 500 }, 50 + { "bpf_prog_2", 500, 600 }, 51 + { "kcore1", 600, 800 }, 52 + { "bpf_prog_3", 800, 900 }, 53 + { "kcore1", 900, 1000 }, 54 + }; 55 + struct map_def merged3[] = { 56 + { "kcore1", 100, 200 }, 57 + { "bpf_prog_1", 200, 300 }, 58 + { "kcore1", 300, 500 }, 59 + { "bpf_prog_2", 500, 600 }, 60 + { "kcore1", 600, 800 }, 61 + { "bpf_prog_3", 800, 900 }, 62 + { "kcore1", 900, 1000 }, 63 + { "kcore3", 1000, 1100 }, 64 + }; 65 + struct map *map_kcore1, *map_kcore2, *map_kcore3; 66 + int ret; 67 + 68 + map_groups__init(&mg, NULL); 69 + 70 + for (i = 0; i < ARRAY_SIZE(bpf_progs); i++) { 71 + struct map *map; 72 + 73 + map = dso__new_map(bpf_progs[i].name); 74 + TEST_ASSERT_VAL("failed to create map", map); 75 + 76 + map->start = bpf_progs[i].start; 77 + map->end = bpf_progs[i].end; 78 + map_groups__insert(&mg, map); 79 + map__put(map); 80 + } 81 + 82 + map_kcore1 = dso__new_map("kcore1"); 83 + TEST_ASSERT_VAL("failed to create map", map_kcore1); 84 + 85 + map_kcore2 = dso__new_map("kcore2"); 86 + TEST_ASSERT_VAL("failed to create map", map_kcore2); 87 + 88 + map_kcore3 = dso__new_map("kcore3"); 89 + TEST_ASSERT_VAL("failed to create map", map_kcore3); 90 + 91 + /* kcore1 map overlaps over all bpf maps */ 92 + map_kcore1->start = 100; 93 + map_kcore1->end = 1000; 94 + 95 + /* kcore2 map hides behind bpf_prog_2 */ 96 + map_kcore2->start = 550; 97 + map_kcore2->end = 570; 98 + 99 + /* kcore3 map hides behind bpf_prog_3, kcore1 and adds new map */ 100 + map_kcore3->start = 880; 101 + map_kcore3->end = 1100; 102 + 103 + ret = map_groups__merge_in(&mg, map_kcore1); 104 + TEST_ASSERT_VAL("failed to merge map", !ret); 105 + 106 + ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg); 107 + TEST_ASSERT_VAL("merge check failed", !ret); 108 + 109 + ret = map_groups__merge_in(&mg, map_kcore2); 110 + TEST_ASSERT_VAL("failed to merge map", !ret); 111 + 112 + ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg); 113 + TEST_ASSERT_VAL("merge check failed", !ret); 114 + 115 + ret = map_groups__merge_in(&mg, map_kcore3); 116 + TEST_ASSERT_VAL("failed to merge map", !ret); 117 + 118 + ret = check_maps(merged3, ARRAY_SIZE(merged3), &mg); 119 + TEST_ASSERT_VAL("merge check failed", !ret); 120 + return TEST_OK; 121 + }
+1
tools/perf/tests/mem.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 #include "util/mem-events.h" 2 3 #include "util/symbol.h" 3 4 #include "linux/perf_event.h"
+1
tools/perf/tests/mem2node.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 #include <linux/compiler.h> 2 3 #include <linux/bitmap.h> 3 4 #include "cpumap.h"
+27
tools/perf/tests/parse-events.c
··· 18 18 #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ 19 19 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) 20 20 21 + #if defined(__s390x__) 22 + /* Return true if kvm module is available and loaded. Test this 23 + * and retun success when trace point kvm_s390_create_vm 24 + * exists. Otherwise this test always fails. 25 + */ 26 + static bool kvm_s390_create_vm_valid(void) 27 + { 28 + char *eventfile; 29 + bool rc = false; 30 + 31 + eventfile = get_events_file("kvm-s390"); 32 + 33 + if (eventfile) { 34 + DIR *mydir = opendir(eventfile); 35 + 36 + if (mydir) { 37 + rc = true; 38 + closedir(mydir); 39 + } 40 + put_events_file(eventfile); 41 + } 42 + 43 + return rc; 44 + } 45 + #endif 46 + 21 47 static int test__checkevent_tracepoint(struct perf_evlist *evlist) 22 48 { 23 49 struct perf_evsel *evsel = perf_evlist__first(evlist); ··· 1668 1642 { 1669 1643 .name = "kvm-s390:kvm_s390_create_vm", 1670 1644 .check = test__checkevent_tracepoint, 1645 + .valid = kvm_s390_create_vm_valid, 1671 1646 .id = 100, 1672 1647 }, 1673 1648 #endif
+1
tools/perf/tests/shell/lib/probe.sh
··· 1 + # SPDX-License-Identifier: GPL-2.0 1 2 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 2 3 3 4 skip_if_no_perf_probe() {
+2 -1
tools/perf/tests/shell/probe_vfs_getname.sh
··· 1 1 #!/bin/sh 2 2 # Add vfs_getname probe to get syscall args filenames 3 - # 3 + 4 + # SPDX-License-Identifier: GPL-2.0 4 5 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 5 6 6 7 . $(dirname $0)/lib/probe.sh
+2 -1
tools/perf/tests/shell/record+probe_libc_inet_pton.sh
··· 7 7 # This needs no debuginfo package, all is done using the libc ELF symtab 8 8 # and the CFI info in the binaries. 9 9 10 + # SPDX-License-Identifier: GPL-2.0 10 11 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 11 12 12 13 . $(dirname $0)/lib/probe.sh ··· 45 44 eventattr='max-stack=4' 46 45 echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected 47 46 echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected 48 - echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected 47 + echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected 49 48 ;; 50 49 *) 51 50 eventattr='max-stack=3'
+1
tools/perf/tests/shell/record+script_probe_vfs_getname.sh
··· 6 6 # checks that that was captured by the vfs_getname probe in the generated 7 7 # perf.data file, with the temp file name as the pathname argument. 8 8 9 + # SPDX-License-Identifier: GPL-2.0 9 10 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 10 11 11 12 . $(dirname $0)/lib/probe.sh
+2
tools/perf/tests/shell/record+zstd_comp_decomp.sh
··· 1 1 #!/bin/sh 2 2 # Zstd perf.data compression/decompression 3 3 4 + # SPDX-License-Identifier: GPL-2.0 5 + 4 6 trace_file=$(mktemp /tmp/perf.data.XXX) 5 7 perf_tool=perf 6 8
+1
tools/perf/tests/shell/trace+probe_vfs_getname.sh
··· 7 7 # that already handles "probe:vfs_getname" if present, and used in the 8 8 # "open" syscall "filename" argument beautifier. 9 9 10 + # SPDX-License-Identifier: GPL-2.0 10 11 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 11 12 12 13 . $(dirname $0)/lib/probe.sh
+2
tools/perf/tests/tests.h
··· 107 107 int test__clang_subtest_get_nr(void); 108 108 int test__unit_number__scnprint(struct test *test, int subtest); 109 109 int test__mem2node(struct test *t, int subtest); 110 + int test__map_groups__merge_in(struct test *t, int subtest); 111 + int test__time_utils(struct test *t, int subtest); 110 112 111 113 bool test__bp_signal_is_supported(void); 112 114 bool test__wp_is_supported(void);
+251
tools/perf/tests/time-utils-test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/compiler.h> 3 + #include <linux/time64.h> 4 + #include <inttypes.h> 5 + #include <string.h> 6 + #include "time-utils.h" 7 + #include "evlist.h" 8 + #include "session.h" 9 + #include "debug.h" 10 + #include "tests.h" 11 + 12 + static bool test__parse_nsec_time(const char *str, u64 expected) 13 + { 14 + u64 ptime; 15 + int err; 16 + 17 + pr_debug("\nparse_nsec_time(\"%s\")\n", str); 18 + 19 + err = parse_nsec_time(str, &ptime); 20 + if (err) { 21 + pr_debug("error %d\n", err); 22 + return false; 23 + } 24 + 25 + if (ptime != expected) { 26 + pr_debug("Failed. ptime %" PRIu64 " expected %" PRIu64 "\n", 27 + ptime, expected); 28 + return false; 29 + } 30 + 31 + pr_debug("%" PRIu64 "\n", ptime); 32 + 33 + return true; 34 + } 35 + 36 + static bool test__perf_time__parse_str(const char *ostr, u64 start, u64 end) 37 + { 38 + struct perf_time_interval ptime; 39 + int err; 40 + 41 + pr_debug("\nperf_time__parse_str(\"%s\")\n", ostr); 42 + 43 + err = perf_time__parse_str(&ptime, ostr); 44 + if (err) { 45 + pr_debug("Error %d\n", err); 46 + return false; 47 + } 48 + 49 + if (ptime.start != start || ptime.end != end) { 50 + pr_debug("Failed. Expected %" PRIu64 " to %" PRIu64 "\n", 51 + start, end); 52 + return false; 53 + } 54 + 55 + return true; 56 + } 57 + 58 + #define TEST_MAX 64 59 + 60 + struct test_data { 61 + const char *str; 62 + u64 first; 63 + u64 last; 64 + struct perf_time_interval ptime[TEST_MAX]; 65 + int num; 66 + u64 skip[TEST_MAX]; 67 + u64 noskip[TEST_MAX]; 68 + }; 69 + 70 + static bool test__perf_time__parse_for_ranges(struct test_data *d) 71 + { 72 + struct perf_evlist evlist = { 73 + .first_sample_time = d->first, 74 + .last_sample_time = d->last, 75 + }; 76 + struct perf_session session = { .evlist = &evlist }; 77 + struct perf_time_interval *ptime = NULL; 78 + int range_size, range_num; 79 + bool pass = false; 80 + int i, err; 81 + 82 + pr_debug("\nperf_time__parse_for_ranges(\"%s\")\n", d->str); 83 + 84 + if (strchr(d->str, '%')) 85 + pr_debug("first_sample_time %" PRIu64 " last_sample_time %" PRIu64 "\n", 86 + d->first, d->last); 87 + 88 + err = perf_time__parse_for_ranges(d->str, &session, &ptime, &range_size, 89 + &range_num); 90 + if (err) { 91 + pr_debug("error %d\n", err); 92 + goto out; 93 + } 94 + 95 + if (range_size < d->num || range_num != d->num) { 96 + pr_debug("bad size: range_size %d range_num %d expected num %d\n", 97 + range_size, range_num, d->num); 98 + goto out; 99 + } 100 + 101 + for (i = 0; i < d->num; i++) { 102 + if (ptime[i].start != d->ptime[i].start || 103 + ptime[i].end != d->ptime[i].end) { 104 + pr_debug("bad range %d expected %" PRIu64 " to %" PRIu64 "\n", 105 + i, d->ptime[i].start, d->ptime[i].end); 106 + goto out; 107 + } 108 + } 109 + 110 + if (perf_time__ranges_skip_sample(ptime, d->num, 0)) { 111 + pr_debug("failed to keep 0\n"); 112 + goto out; 113 + } 114 + 115 + for (i = 0; i < TEST_MAX; i++) { 116 + if (d->skip[i] && 117 + !perf_time__ranges_skip_sample(ptime, d->num, d->skip[i])) { 118 + pr_debug("failed to skip %" PRIu64 "\n", d->skip[i]); 119 + goto out; 120 + } 121 + if (d->noskip[i] && 122 + perf_time__ranges_skip_sample(ptime, d->num, d->noskip[i])) { 123 + pr_debug("failed to keep %" PRIu64 "\n", d->noskip[i]); 124 + goto out; 125 + } 126 + } 127 + 128 + pass = true; 129 + out: 130 + free(ptime); 131 + return pass; 132 + } 133 + 134 + int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused) 135 + { 136 + bool pass = true; 137 + 138 + pass &= test__parse_nsec_time("0", 0); 139 + pass &= test__parse_nsec_time("1", 1000000000ULL); 140 + pass &= test__parse_nsec_time("0.000000001", 1); 141 + pass &= test__parse_nsec_time("1.000000001", 1000000001ULL); 142 + pass &= test__parse_nsec_time("123456.123456", 123456123456000ULL); 143 + pass &= test__parse_nsec_time("1234567.123456789", 1234567123456789ULL); 144 + pass &= test__parse_nsec_time("18446744073.709551615", 145 + 0xFFFFFFFFFFFFFFFFULL); 146 + 147 + pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456789", 148 + 1234567123456789ULL, 1234567123456789ULL); 149 + pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456790", 150 + 1234567123456789ULL, 1234567123456790ULL); 151 + pass &= test__perf_time__parse_str("1234567.123456789,", 152 + 1234567123456789ULL, 0); 153 + pass &= test__perf_time__parse_str(",1234567.123456789", 154 + 0, 1234567123456789ULL); 155 + pass &= test__perf_time__parse_str("0,1234567.123456789", 156 + 0, 1234567123456789ULL); 157 + 158 + { 159 + u64 b = 1234567123456789ULL; 160 + struct test_data d = { 161 + .str = "1234567.123456789,1234567.123456790", 162 + .ptime = { {b, b + 1}, }, 163 + .num = 1, 164 + .skip = { b - 1, b + 2, }, 165 + .noskip = { b, b + 1, }, 166 + }; 167 + 168 + pass &= test__perf_time__parse_for_ranges(&d); 169 + } 170 + 171 + { 172 + u64 b = 1234567123456789ULL; 173 + u64 c = 7654321987654321ULL; 174 + u64 e = 8000000000000000ULL; 175 + struct test_data d = { 176 + .str = "1234567.123456789,1234567.123456790 " 177 + "7654321.987654321,7654321.987654444 " 178 + "8000000,8000000.000000005", 179 + .ptime = { {b, b + 1}, {c, c + 123}, {e, e + 5}, }, 180 + .num = 3, 181 + .skip = { b - 1, b + 2, c - 1, c + 124, e - 1, e + 6 }, 182 + .noskip = { b, b + 1, c, c + 123, e, e + 5 }, 183 + }; 184 + 185 + pass &= test__perf_time__parse_for_ranges(&d); 186 + } 187 + 188 + { 189 + u64 b = 7654321ULL * NSEC_PER_SEC; 190 + struct test_data d = { 191 + .str = "10%/1", 192 + .first = b, 193 + .last = b + 100, 194 + .ptime = { {b, b + 9}, }, 195 + .num = 1, 196 + .skip = { b - 1, b + 10, }, 197 + .noskip = { b, b + 9, }, 198 + }; 199 + 200 + pass &= test__perf_time__parse_for_ranges(&d); 201 + } 202 + 203 + { 204 + u64 b = 7654321ULL * NSEC_PER_SEC; 205 + struct test_data d = { 206 + .str = "10%/2", 207 + .first = b, 208 + .last = b + 100, 209 + .ptime = { {b + 10, b + 19}, }, 210 + .num = 1, 211 + .skip = { b + 9, b + 20, }, 212 + .noskip = { b + 10, b + 19, }, 213 + }; 214 + 215 + pass &= test__perf_time__parse_for_ranges(&d); 216 + } 217 + 218 + { 219 + u64 b = 11223344ULL * NSEC_PER_SEC; 220 + struct test_data d = { 221 + .str = "10%/1,10%/2", 222 + .first = b, 223 + .last = b + 100, 224 + .ptime = { {b, b + 9}, {b + 10, b + 19}, }, 225 + .num = 2, 226 + .skip = { b - 1, b + 20, }, 227 + .noskip = { b, b + 8, b + 9, b + 10, b + 11, b + 12, b + 19, }, 228 + }; 229 + 230 + pass &= test__perf_time__parse_for_ranges(&d); 231 + } 232 + 233 + { 234 + u64 b = 11223344ULL * NSEC_PER_SEC; 235 + struct test_data d = { 236 + .str = "10%/1,10%/3,10%/10", 237 + .first = b, 238 + .last = b + 100, 239 + .ptime = { {b, b + 9}, {b + 20, b + 29}, { b + 90, b + 100}, }, 240 + .num = 3, 241 + .skip = { b - 1, b + 10, b + 19, b + 30, b + 89, b + 101 }, 242 + .noskip = { b, b + 9, b + 20, b + 29, b + 90, b + 100}, 243 + }; 244 + 245 + pass &= test__perf_time__parse_for_ranges(&d); 246 + } 247 + 248 + pr_debug("\n"); 249 + 250 + return pass ? 0 : TEST_FAIL; 251 + }
+4
tools/perf/trace/beauty/Build
··· 1 1 perf-y += clone.o 2 2 perf-y += fcntl.o 3 3 perf-y += flock.o 4 + perf-y += fsmount.o 5 + perf-y += fspick.o 4 6 ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) 5 7 perf-y += ioctl.o 6 8 endif 7 9 perf-y += kcmp.o 8 10 perf-y += mount_flags.o 11 + perf-y += move_mount.o 9 12 perf-y += pkey_alloc.o 10 13 perf-y += arch_prctl.o 11 14 perf-y += prctl.o ··· 16 13 perf-y += sockaddr.o 17 14 perf-y += socket.o 18 15 perf-y += statx.o 16 + perf-y += sync_file_range.o
+15
tools/perf/trace/beauty/beauty.h
··· 108 108 109 109 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx); 110 110 111 + size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg); 112 + #define SCA_STRARRAY_FLAGS syscall_arg__scnprintf_strarray_flags 113 + 111 114 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); 112 115 #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays 113 116 ··· 144 141 size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg); 145 142 #define SCA_FLOCK syscall_arg__scnprintf_flock 146 143 144 + size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg); 145 + #define SCA_FSMOUNT_ATTR_FLAGS syscall_arg__scnprintf_fsmount_attr_flags 146 + 147 + size_t syscall_arg__scnprintf_fspick_flags(char *bf, size_t size, struct syscall_arg *arg); 148 + #define SCA_FSPICK_FLAGS syscall_arg__scnprintf_fspick_flags 149 + 147 150 size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); 148 151 #define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd 149 152 ··· 164 155 165 156 size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg); 166 157 #define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags 158 + 159 + size_t syscall_arg__scnprintf_move_mount_flags(char *bf, size_t size, struct syscall_arg *arg); 160 + #define SCA_MOVE_MOUNT_FLAGS syscall_arg__scnprintf_move_mount_flags 167 161 168 162 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); 169 163 #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights ··· 200 188 201 189 size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg); 202 190 #define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask 191 + 192 + size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg); 193 + #define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags 203 194 204 195 size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix); 205 196
+1
tools/perf/trace/beauty/clone.c
··· 25 25 P_FLAG(FS); 26 26 P_FLAG(FILES); 27 27 P_FLAG(SIGHAND); 28 + P_FLAG(PIDFD); 28 29 P_FLAG(PTRACE); 29 30 P_FLAG(VFORK); 30 31 P_FLAG(PARENT);
+17
tools/perf/trace/beauty/fsconfig.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: LGPL-2.1 3 + 4 + if [ $# -ne 1 ] ; then 5 + linux_header_dir=tools/include/uapi/linux 6 + else 7 + linux_header_dir=$1 8 + fi 9 + 10 + linux_mount=${linux_header_dir}/mount.h 11 + 12 + printf "static const char *fsconfig_cmds[] = {\n" 13 + regex='^[[:space:]]*+FSCONFIG_([[:alnum:]_]+)[[:space:]]*=[[:space:]]*([[:digit:]]+)[[:space:]]*,[[:space:]]*.*' 14 + egrep $regex ${linux_mount} | \ 15 + sed -r "s/$regex/\2 \1/g" | \ 16 + xargs printf "\t[%s] = \"%s\",\n" 17 + printf "};\n"
+34
tools/perf/trace/beauty/fsmount.c
··· 1 + // SPDX-License-Identifier: LGPL-2.1 2 + /* 3 + * trace/beauty/fsmount.c 4 + * 5 + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 6 + */ 7 + 8 + #include "trace/beauty/beauty.h" 9 + #include <linux/log2.h> 10 + #include <uapi/linux/mount.h> 11 + 12 + static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) 13 + { 14 + #include "trace/beauty/generated/fsmount_arrays.c" 15 + static DEFINE_STRARRAY(fsmount_attr_flags, "MOUNT_ATTR_"); 16 + size_t printed = 0; 17 + 18 + if ((flags & ~MOUNT_ATTR__ATIME) != 0) 19 + printed += strarray__scnprintf_flags(&strarray__fsmount_attr_flags, bf, size, show_prefix, flags); 20 + 21 + if ((flags & MOUNT_ATTR__ATIME) == MOUNT_ATTR_RELATIME) { 22 + printed += scnprintf(bf + printed, size - printed, "%s%s%s", 23 + printed ? "|" : "", show_prefix ? "MOUNT_ATTR_" : "", "RELATIME"); 24 + } 25 + 26 + return printed; 27 + } 28 + 29 + size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg) 30 + { 31 + unsigned long flags = arg->val; 32 + 33 + return fsmount__scnprintf_attr_flags(flags, bf, size, arg->show_string_prefix); 34 + }
+22
tools/perf/trace/beauty/fsmount.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: LGPL-2.1 3 + 4 + if [ $# -ne 1 ] ; then 5 + linux_header_dir=tools/include/uapi/linux 6 + else 7 + linux_header_dir=$1 8 + fi 9 + 10 + linux_mount=${linux_header_dir}/mount.h 11 + 12 + # Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier 13 + # Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case 14 + # for things like MOUNT_ATTR__ATIME that is a mask for the possible ATIME handling 15 + # bits. Special case it as well in the beautifier 16 + 17 + printf "static const char *fsmount_attr_flags[] = {\n" 18 + regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' 19 + egrep $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \ 20 + sed -r "s/$regex/\2 \1/g" | \ 21 + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" 22 + printf "};\n"
+24
tools/perf/trace/beauty/fspick.c
··· 1 + // SPDX-License-Identifier: LGPL-2.1 2 + /* 3 + * trace/beauty/fspick.c 4 + * 5 + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 6 + */ 7 + 8 + #include "trace/beauty/beauty.h" 9 + #include <linux/log2.h> 10 + 11 + static size_t fspick__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) 12 + { 13 + #include "trace/beauty/generated/fspick_arrays.c" 14 + static DEFINE_STRARRAY(fspick_flags, "FSPICK_"); 15 + 16 + return strarray__scnprintf_flags(&strarray__fspick_flags, bf, size, show_prefix, flags); 17 + } 18 + 19 + size_t syscall_arg__scnprintf_fspick_flags(char *bf, size_t size, struct syscall_arg *arg) 20 + { 21 + unsigned long flags = arg->val; 22 + 23 + return fspick__scnprintf_flags(flags, bf, size, arg->show_string_prefix); 24 + }
+17
tools/perf/trace/beauty/fspick.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: LGPL-2.1 3 + 4 + if [ $# -ne 1 ] ; then 5 + linux_header_dir=tools/include/uapi/linux 6 + else 7 + linux_header_dir=$1 8 + fi 9 + 10 + linux_mount=${linux_header_dir}/mount.h 11 + 12 + printf "static const char *fspick_flags[] = {\n" 13 + regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+FSPICK_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' 14 + egrep $regex ${linux_mount} | \ 15 + sed -r "s/$regex/\2 \1/g" | \ 16 + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" 17 + printf "};\n"
+24
tools/perf/trace/beauty/move_mount.c
··· 1 + // SPDX-License-Identifier: LGPL-2.1 2 + /* 3 + * trace/beauty/move_mount.c 4 + * 5 + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 6 + */ 7 + 8 + #include "trace/beauty/beauty.h" 9 + #include <linux/log2.h> 10 + 11 + static size_t move_mount__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) 12 + { 13 + #include "trace/beauty/generated/move_mount_flags_array.c" 14 + static DEFINE_STRARRAY(move_mount_flags, "MOVE_MOUNT_"); 15 + 16 + return strarray__scnprintf_flags(&strarray__move_mount_flags, bf, size, show_prefix, flags); 17 + } 18 + 19 + size_t syscall_arg__scnprintf_move_mount_flags(char *bf, size_t size, struct syscall_arg *arg) 20 + { 21 + unsigned long flags = arg->val; 22 + 23 + return move_mount__scnprintf_flags(flags, bf, size, arg->show_string_prefix); 24 + }
+17
tools/perf/trace/beauty/move_mount_flags.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: LGPL-2.1 3 + 4 + if [ $# -ne 1 ] ; then 5 + linux_header_dir=tools/include/uapi/linux 6 + else 7 + linux_header_dir=$1 8 + fi 9 + 10 + linux_mount=${linux_header_dir}/mount.h 11 + 12 + printf "static const char *move_mount_flags[] = {\n" 13 + regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' 14 + egrep $regex ${linux_mount} | \ 15 + sed -r "s/$regex/\2 \1/g" | \ 16 + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" 17 + printf "};\n"
+31
tools/perf/trace/beauty/sync_file_range.c
··· 1 + // SPDX-License-Identifier: LGPL-2.1 2 + /* 3 + * trace/beauty/sync_file_range.c 4 + * 5 + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 6 + */ 7 + 8 + #include "trace/beauty/beauty.h" 9 + #include <linux/log2.h> 10 + #include <uapi/linux/fs.h> 11 + 12 + static size_t sync_file_range__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) 13 + { 14 + #include "trace/beauty/generated/sync_file_range_arrays.c" 15 + static DEFINE_STRARRAY(sync_file_range_flags, "SYNC_FILE_RANGE_"); 16 + size_t printed = 0; 17 + 18 + if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) == SYNC_FILE_RANGE_WRITE_AND_WAIT) { 19 + printed += scnprintf(bf + printed, size - printed, "%s%s", show_prefix ? "SYNC_FILE_RANGE_" : "", "WRITE_AND_WAIT"); 20 + flags &= ~SYNC_FILE_RANGE_WRITE_AND_WAIT; 21 + } 22 + 23 + return printed + strarray__scnprintf_flags(&strarray__sync_file_range_flags, bf + printed, size - printed, show_prefix, flags); 24 + } 25 + 26 + size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg) 27 + { 28 + unsigned long flags = arg->val; 29 + 30 + return sync_file_range__scnprintf_flags(flags, bf, size, arg->show_string_prefix); 31 + }
+17
tools/perf/trace/beauty/sync_file_range.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: LGPL-2.1 3 + 4 + if [ $# -ne 1 ] ; then 5 + linux_header_dir=tools/include/uapi/linux 6 + else 7 + linux_header_dir=$1 8 + fi 9 + 10 + linux_fs=${linux_header_dir}/fs.h 11 + 12 + printf "static const char *sync_file_range_flags[] = {\n" 13 + regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+SYNC_FILE_RANGE_([[:alnum:]_]+)[[:space:]]+([[:xdigit:]]+)[[:space:]]*.*' 14 + egrep $regex ${linux_fs} | \ 15 + sed -r "s/$regex/\2 \1/g" | \ 16 + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" 17 + printf "};\n"
+2 -2
tools/perf/ui/browser.c
··· 16 16 #include "helpline.h" 17 17 #include "keysyms.h" 18 18 #include "../color.h" 19 - #include "sane_ctype.h" 19 + #include <linux/ctype.h> 20 20 21 21 static int ui_browser__percent_color(struct ui_browser *browser, 22 22 double percent, bool current) ··· 594 594 break; 595 595 596 596 *bg = '\0'; 597 - bg = ltrim(++bg); 597 + bg = skip_spaces(bg + 1); 598 598 ui_browser__colorsets[i].bg = bg; 599 599 ui_browser__colorsets[i].fg = fg; 600 600 return 0;
+3 -2
tools/perf/ui/browsers/annotate.c
··· 97 97 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); 98 98 struct annotation *notes = browser__annotation(browser); 99 99 struct annotation_line *al = list_entry(entry, struct annotation_line, node); 100 + const bool is_current_entry = ui_browser__is_current_entry(browser, row); 100 101 struct annotation_write_ops ops = { 101 102 .first_line = row == 0, 102 - .current_entry = ui_browser__is_current_entry(browser, row), 103 + .current_entry = is_current_entry, 103 104 .change_color = (!notes->options->hide_src_code && 104 - (!ops.current_entry || 105 + (!is_current_entry || 105 106 (browser->use_navkeypressed && 106 107 !browser->navkeypressed))), 107 108 .width = browser->width,
+6 -4
tools/perf/ui/browsers/hists.c
··· 6 6 #include <stdlib.h> 7 7 #include <string.h> 8 8 #include <linux/rbtree.h> 9 + #include <linux/string.h> 9 10 #include <sys/ttydefaults.h> 10 11 #include <linux/time64.h> 11 12 ··· 34 33 #include "units.h" 35 34 #include "time-utils.h" 36 35 37 - #include "sane_ctype.h" 36 + #include <linux/ctype.h> 38 37 39 38 extern void hist_browser__init_hpp(void); 40 39 ··· 1471 1470 int i = 0; 1472 1471 1473 1472 width -= fmt->entry(fmt, &hpp, entry); 1474 - ui_browser__printf(&browser->b, "%s", ltrim(s)); 1473 + ui_browser__printf(&browser->b, "%s", skip_spaces(s)); 1475 1474 1476 1475 while (isspace(s[i++])) 1477 1476 width++; ··· 1687 1686 ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL); 1688 1687 dummy_hpp.buf[ret] = '\0'; 1689 1688 1690 - start = trim(dummy_hpp.buf); 1689 + start = strim(dummy_hpp.buf); 1691 1690 ret = strlen(start); 1692 1691 1693 1692 if (start != dummy_hpp.buf) ··· 2071 2070 advance_hpp(&hpp, ret); 2072 2071 } 2073 2072 2074 - printed += fprintf(fp, "%s\n", rtrim(s)); 2073 + strim(s); 2074 + printed += fprintf(fp, "%s\n", s); 2075 2075 2076 2076 if (he->leaf && folded_sign == '-') { 2077 2077 printed += hist_browser__fprintf_callchain(browser, he, fp,
+1 -1
tools/perf/ui/browsers/map.c
··· 13 13 #include "../keysyms.h" 14 14 #include "map.h" 15 15 16 - #include "sane_ctype.h" 16 + #include <linux/ctype.h> 17 17 18 18 struct map_browser { 19 19 struct ui_browser b;
+3 -2
tools/perf/ui/gtk/hists.c
··· 9 9 #include "../string2.h" 10 10 #include "gtk.h" 11 11 #include <signal.h> 12 + #include <linux/string.h> 12 13 13 14 #define MAX_COLUMNS 32 14 15 ··· 460 459 advance_hpp(hpp, ret + 2); 461 460 } 462 461 463 - gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1); 462 + gtk_tree_store_set(store, &iter, col_idx, strim(bf), -1); 464 463 465 464 if (!he->leaf) { 466 465 hpp->buf = bf; ··· 556 555 first_col = false; 557 556 558 557 fmt->header(fmt, &hpp, hists, 0, NULL); 559 - strcat(buf, ltrim(rtrim(hpp.buf))); 558 + strcat(buf, strim(hpp.buf)); 560 559 } 561 560 } 562 561
+5
tools/perf/ui/libslang.h
··· 10 10 #ifndef HAVE_LONG_LONG 11 11 #define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG 12 12 #endif 13 + 14 + #ifdef HAVE_SLANG_INCLUDE_SUBDIR 15 + #include <slang/slang.h> 16 + #else 13 17 #include <slang.h> 18 + #endif 14 19 15 20 #if SLANG_VERSION < 20104 16 21 #define slsmg_printf(msg, args...) \
+1 -1
tools/perf/ui/progress.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/kernel.h> 3 - #include "../cache.h" 3 + #include "../util/cache.h" 4 4 #include "progress.h" 5 5 6 6 static void null_progress__update(struct ui_progress *p __maybe_unused)
+37 -6
tools/perf/ui/stdio/hist.c
··· 13 13 #include "../../util/srcline.h" 14 14 #include "../../util/string2.h" 15 15 #include "../../util/thread.h" 16 - #include "../../util/sane_ctype.h" 16 + #include <linux/ctype.h> 17 17 18 18 static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) 19 19 { ··· 516 516 * dynamic entries are right-aligned but we want left-aligned 517 517 * in the hierarchy mode 518 518 */ 519 - printed += fprintf(fp, "%s%s", sep ?: " ", ltrim(buf)); 519 + printed += fprintf(fp, "%s%s", sep ?: " ", skip_spaces(buf)); 520 520 } 521 521 printed += putc('\n', fp); 522 522 ··· 529 529 530 530 out: 531 531 return printed; 532 + } 533 + 534 + static int hist_entry__block_fprintf(struct hist_entry *he, 535 + char *bf, size_t size, 536 + FILE *fp) 537 + { 538 + struct block_hist *bh = container_of(he, struct block_hist, he); 539 + int ret = 0; 540 + 541 + for (unsigned int i = 0; i < bh->block_hists.nr_entries; i++) { 542 + struct perf_hpp hpp = { 543 + .buf = bf, 544 + .size = size, 545 + .skip = false, 546 + }; 547 + 548 + bh->block_idx = i; 549 + hist_entry__snprintf(he, &hpp); 550 + 551 + if (!hpp.skip) 552 + ret += fprintf(fp, "%s\n", bf); 553 + } 554 + 555 + return ret; 532 556 } 533 557 534 558 static int hist_entry__fprintf(struct hist_entry *he, size_t size, ··· 574 550 if (symbol_conf.report_hierarchy) 575 551 return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); 576 552 553 + if (symbol_conf.report_block) 554 + return hist_entry__block_fprintf(he, bf, size, fp); 555 + 577 556 hist_entry__snprintf(he, &hpp); 578 557 579 558 ret = fprintf(fp, "%s\n", bf); ··· 593 566 static int print_hierarchy_indent(const char *sep, int indent, 594 567 const char *line, FILE *fp) 595 568 { 569 + int width; 570 + 596 571 if (sep != NULL || indent < 2) 597 572 return 0; 598 573 599 - return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line); 574 + width = (indent - 2) * HIERARCHY_INDENT; 575 + 576 + return fprintf(fp, "%-*.*s", width, width, line); 600 577 } 601 578 602 579 static int hists__fprintf_hierarchy_headers(struct hists *hists, ··· 618 587 indent = hists->nr_hpp_node; 619 588 620 589 /* preserve max indent depth for column headers */ 621 - print_hierarchy_indent(sep, indent, spaces, fp); 590 + print_hierarchy_indent(sep, indent, " ", fp); 622 591 623 592 /* the first hpp_list_node is for overhead columns */ 624 593 fmt_node = list_first_entry(&hists->hpp_formats, ··· 647 616 648 617 fmt->header(fmt, hpp, hists, 0, NULL); 649 618 650 - header_width += fprintf(fp, "%s", trim(hpp->buf)); 619 + header_width += fprintf(fp, "%s", strim(hpp->buf)); 651 620 } 652 621 } 653 622 ··· 847 816 if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { 848 817 int depth = hists->nr_hpp_node + h->depth + 1; 849 818 850 - print_hierarchy_indent(sep, depth, spaces, fp); 819 + print_hierarchy_indent(sep, depth, " ", fp); 851 820 fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); 852 821 853 822 if (max_rows && ++nr_rows >= max_rows)
+9
tools/perf/util/Build
··· 20 20 perf-y += perf_regs.o 21 21 perf-y += path.o 22 22 perf-y += print_binary.o 23 + perf-y += argv_split.o 23 24 perf-y += rbtree.o 24 25 perf-y += libstring.o 25 26 perf-y += bitmap.o ··· 210 209 $(call rule_mkdir) 211 210 $(call if_changed_dep,cc_o_c) 212 211 212 + $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE 213 + $(call rule_mkdir) 214 + $(call if_changed_dep,cc_o_c) 215 + 213 216 $(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE 217 + $(call rule_mkdir) 218 + $(call if_changed_dep,cc_o_c) 219 + 220 + $(OUTPUT)util/ctype.o: ../lib/ctype.c FORCE 214 221 $(call rule_mkdir) 215 222 $(call if_changed_dep,cc_o_c) 216 223
+1 -1
tools/perf/util/PERF-VERSION-GEN
··· 19 19 if test -d ../../.git -o -f ../../.git 20 20 then 21 21 TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null ) 22 - CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" 22 + CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" 23 23 elif test -f ../../PERF-VERSION-FILE 24 24 then 25 25 TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
+15 -10
tools/perf/util/annotate.c
··· 35 35 #include <pthread.h> 36 36 #include <linux/bitops.h> 37 37 #include <linux/kernel.h> 38 + #include <linux/string.h> 38 39 #include <bpf/libbpf.h> 39 40 40 41 /* FIXME: For the HE_COLORSET */ ··· 50 49 #define DARROW_CHAR ((unsigned char)'.') 51 50 #define UARROW_CHAR ((unsigned char)'-') 52 51 53 - #include "sane_ctype.h" 52 + #include <linux/ctype.h> 54 53 55 54 struct annotation_options annotation__default_options = { 56 55 .use_offset = true, ··· 145 144 #include "arch/arc/annotate/instructions.c" 146 145 #include "arch/arm/annotate/instructions.c" 147 146 #include "arch/arm64/annotate/instructions.c" 147 + #include "arch/csky/annotate/instructions.c" 148 148 #include "arch/x86/annotate/instructions.c" 149 149 #include "arch/powerpc/annotate/instructions.c" 150 150 #include "arch/s390/annotate/instructions.c" ··· 163 161 { 164 162 .name = "arm64", 165 163 .init = arm64__annotate_init, 164 + }, 165 + { 166 + .name = "csky", 167 + .init = csky__annotate_init, 166 168 }, 167 169 { 168 170 .name = "x86", ··· 563 557 if (comment == NULL) 564 558 return 0; 565 559 566 - comment = ltrim(comment); 560 + comment = skip_spaces(comment); 567 561 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); 568 562 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 569 563 ··· 608 602 if (comment == NULL) 609 603 return 0; 610 604 611 - comment = ltrim(comment); 605 + comment = skip_spaces(comment); 612 606 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 613 607 614 608 return 0; ··· 937 931 if (sym == NULL) 938 932 return 0; 939 933 src = symbol__hists(sym, evsel->evlist->nr_entries); 940 - if (src == NULL) 941 - return -ENOMEM; 942 - return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample); 934 + return (src) ? __symbol__inc_addr_samples(sym, map, src, evsel->idx, 935 + addr, sample) : 0; 943 936 } 944 937 945 938 static int symbol__account_cycles(u64 addr, u64 start, ··· 1104 1099 1105 1100 static int disasm_line__parse(char *line, const char **namep, char **rawp) 1106 1101 { 1107 - char tmp, *name = ltrim(line); 1102 + char tmp, *name = skip_spaces(line); 1108 1103 1109 1104 if (name[0] == '\0') 1110 1105 return -1; ··· 1122 1117 goto out_free_name; 1123 1118 1124 1119 (*rawp)[0] = tmp; 1125 - *rawp = ltrim(*rawp); 1120 + *rawp = skip_spaces(*rawp); 1126 1121 1127 1122 return 0; 1128 1123 ··· 1501 1496 return -1; 1502 1497 1503 1498 line_ip = -1; 1504 - parsed_line = rtrim(line); 1499 + parsed_line = strim(line); 1505 1500 1506 1501 /* /filename:linenr ? Save line number and ignore. */ 1507 1502 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { ··· 1509 1504 return 0; 1510 1505 } 1511 1506 1512 - tmp = ltrim(parsed_line); 1507 + tmp = skip_spaces(parsed_line); 1513 1508 if (*tmp) { 1514 1509 /* 1515 1510 * Parse hexa addresses followed by ':'
+3 -2
tools/perf/util/auxtrace.c
··· 51 51 #include "arm-spe.h" 52 52 #include "s390-cpumsf.h" 53 53 54 - #include "sane_ctype.h" 54 + #include <linux/ctype.h> 55 55 #include "symbol/kallsyms.h" 56 56 57 57 static bool auxtrace__dont_decode(struct perf_session *session) ··· 1001 1001 } 1002 1002 1003 1003 if (!str) { 1004 - itrace_synth_opts__set_default(synth_opts, false); 1004 + itrace_synth_opts__set_default(synth_opts, 1005 + synth_opts->default_no_sample); 1005 1006 return 0; 1006 1007 } 1007 1008
+34
tools/perf/util/auxtrace.h
··· 74 74 * @period_type: 'instructions' events period type 75 75 * @initial_skip: skip N events at the beginning. 76 76 * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all 77 + * @ptime_range: time intervals to trace or NULL 78 + * @range_num: number of time intervals to trace 77 79 */ 78 80 struct itrace_synth_opts { 79 81 bool set; ··· 100 98 enum itrace_period_type period_type; 101 99 unsigned long initial_skip; 102 100 unsigned long *cpu_bitmap; 101 + struct perf_time_interval *ptime_range; 102 + int range_num; 103 103 }; 104 104 105 105 /** ··· 594 590 " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ 595 591 " concatenate multiple options. Default is ibxwpe or cewp\n" 596 592 593 + static inline 594 + void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts, 595 + struct perf_time_interval *ptime_range, 596 + int range_num) 597 + { 598 + opts->ptime_range = ptime_range; 599 + opts->range_num = range_num; 600 + } 601 + 602 + static inline 603 + void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts) 604 + { 605 + opts->ptime_range = NULL; 606 + opts->range_num = 0; 607 + } 597 608 598 609 #else 599 610 ··· 751 732 bool per_cpu); 752 733 753 734 #define ITRACE_HELP "" 735 + 736 + static inline 737 + void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts 738 + __maybe_unused, 739 + struct perf_time_interval *ptime_range 740 + __maybe_unused, 741 + int range_num __maybe_unused) 742 + { 743 + } 744 + 745 + static inline 746 + void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts 747 + __maybe_unused) 748 + { 749 + } 754 750 755 751 #endif 756 752
+1 -1
tools/perf/util/build-id.c
··· 29 29 #include "probe-file.h" 30 30 #include "strlist.h" 31 31 32 - #include "sane_ctype.h" 32 + #include <linux/ctype.h> 33 33 34 34 static bool no_buildid_cache; 35 35
+7 -3
tools/perf/util/config.c
··· 24 24 #include <unistd.h> 25 25 #include <linux/string.h> 26 26 27 - #include "sane_ctype.h" 27 + #include <linux/ctype.h> 28 28 29 29 #define MAXNAME (256) 30 30 ··· 739 739 if (ret < 0) { 740 740 pr_err("Error: wrong config key-value pair %s=%s\n", 741 741 key, value); 742 - break; 742 + /* 743 + * Can't be just a 'break', as perf_config_set__for_each_entry() 744 + * expands to two nested for() loops. 745 + */ 746 + goto out; 743 747 } 744 748 } 745 749 } 746 - 750 + out: 747 751 return ret; 748 752 } 749 753
+58 -8
tools/perf/util/cpumap.c
··· 10 10 #include <linux/bitmap.h> 11 11 #include "asm/bug.h" 12 12 13 - #include "sane_ctype.h" 13 + #include <linux/ctype.h> 14 14 15 15 static int max_cpu_num; 16 16 static int max_present_cpu_num; ··· 373 373 return 0; 374 374 } 375 375 376 + int cpu_map__get_die_id(int cpu) 377 + { 378 + int value, ret = cpu__get_topology_int(cpu, "die_id", &value); 379 + 380 + return ret ?: value; 381 + } 382 + 383 + int cpu_map__get_die(struct cpu_map *map, int idx, void *data) 384 + { 385 + int cpu, die_id, s; 386 + 387 + if (idx > map->nr) 388 + return -1; 389 + 390 + cpu = map->map[idx]; 391 + 392 + die_id = cpu_map__get_die_id(cpu); 393 + /* There is no die_id on legacy system. */ 394 + if (die_id == -1) 395 + die_id = 0; 396 + 397 + s = cpu_map__get_socket(map, idx, data); 398 + if (s == -1) 399 + return -1; 400 + 401 + /* 402 + * Encode socket in bit range 15:8 403 + * die_id is relative to socket, and 404 + * we need a global id. So we combine 405 + * socket + die id 406 + */ 407 + if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n")) 408 + return -1; 409 + 410 + if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) 411 + return -1; 412 + 413 + return (s << 8) | (die_id & 0xff); 414 + } 415 + 376 416 int cpu_map__get_core_id(int cpu) 377 417 { 378 418 int value, ret = cpu__get_topology_int(cpu, "core_id", &value); ··· 421 381 422 382 int cpu_map__get_core(struct cpu_map *map, int idx, void *data) 423 383 { 424 - int cpu, s; 384 + int cpu, s_die; 425 385 426 386 if (idx > map->nr) 427 387 return -1; ··· 430 390 431 391 cpu = cpu_map__get_core_id(cpu); 432 392 433 - s = cpu_map__get_socket(map, idx, data); 434 - if (s == -1) 393 + /* s_die is the combination of socket + die id */ 394 + s_die = cpu_map__get_die(map, idx, data); 395 + if (s_die == -1) 435 396 return -1; 436 397 437 398 /* 438 - * encode socket in upper 16 bits 439 - * core_id is relative to socket, and 399 + * encode socket in bit range 31:24 400 + * encode die id in bit range 23:16 401 + * core_id is relative to socket and die, 440 402 * we need a global id. So we combine 441 - * socket+ core id 403 + * socket + die id + core id 442 404 */ 443 - return (s << 16) | (cpu & 0xffff); 405 + if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) 406 + return -1; 407 + 408 + return (s_die << 16) | (cpu & 0xffff); 444 409 } 445 410 446 411 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) 447 412 { 448 413 return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); 414 + } 415 + 416 + int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep) 417 + { 418 + return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); 449 419 } 450 420 451 421 int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
+9 -1
tools/perf/util/cpumap.h
··· 25 25 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); 26 26 int cpu_map__get_socket_id(int cpu); 27 27 int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); 28 + int cpu_map__get_die_id(int cpu); 29 + int cpu_map__get_die(struct cpu_map *map, int idx, void *data); 28 30 int cpu_map__get_core_id(int cpu); 29 31 int cpu_map__get_core(struct cpu_map *map, int idx, void *data); 30 32 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); 33 + int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep); 31 34 int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); 32 35 const struct cpu_map *cpu_map__online(void); /* thread unsafe */ 33 36 ··· 46 43 47 44 static inline int cpu_map__id_to_socket(int id) 48 45 { 49 - return id >> 16; 46 + return id >> 24; 47 + } 48 + 49 + static inline int cpu_map__id_to_die(int id) 50 + { 51 + return (id >> 16) & 0xff; 50 52 } 51 53 52 54 static inline int cpu_map__id_to_cpu(int id)
+79 -7
tools/perf/util/cputopo.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <sys/param.h> 3 + #include <sys/utsname.h> 3 4 #include <inttypes.h> 4 5 #include <api/fs/fs.h> 5 6 ··· 9 8 #include "util.h" 10 9 #include "env.h" 11 10 12 - 13 11 #define CORE_SIB_FMT \ 14 12 "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" 13 + #define DIE_SIB_FMT \ 14 + "%s/devices/system/cpu/cpu%d/topology/die_cpus_list" 15 15 #define THRD_SIB_FMT \ 16 16 "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" 17 + #define THRD_SIB_FMT_NEW \ 18 + "%s/devices/system/cpu/cpu%d/topology/core_cpus_list" 17 19 #define NODE_ONLINE_FMT \ 18 20 "%s/devices/system/node/online" 19 21 #define NODE_MEMINFO_FMT \ ··· 38 34 sysfs__mountpoint(), cpu); 39 35 fp = fopen(filename, "r"); 40 36 if (!fp) 41 - goto try_threads; 37 + goto try_dies; 42 38 43 39 sret = getline(&buf, &len, fp); 44 40 fclose(fp); 45 41 if (sret <= 0) 46 - goto try_threads; 42 + goto try_dies; 47 43 48 44 p = strchr(buf, '\n'); 49 45 if (p) ··· 61 57 } 62 58 ret = 0; 63 59 64 - try_threads: 65 - scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, 60 + try_dies: 61 + if (!tp->die_siblings) 62 + goto try_threads; 63 + 64 + scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, 66 65 sysfs__mountpoint(), cpu); 66 + fp = fopen(filename, "r"); 67 + if (!fp) 68 + goto try_threads; 69 + 70 + sret = getline(&buf, &len, fp); 71 + fclose(fp); 72 + if (sret <= 0) 73 + goto try_threads; 74 + 75 + p = strchr(buf, '\n'); 76 + if (p) 77 + *p = '\0'; 78 + 79 + for (i = 0; i < tp->die_sib; i++) { 80 + if (!strcmp(buf, tp->die_siblings[i])) 81 + break; 82 + } 83 + if (i == tp->die_sib) { 84 + tp->die_siblings[i] = buf; 85 + tp->die_sib++; 86 + buf = NULL; 87 + len = 0; 88 + } 89 + ret = 0; 90 + 91 + try_threads: 92 + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW, 93 + sysfs__mountpoint(), cpu); 94 + if (access(filename, F_OK) == -1) { 95 + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, 96 + sysfs__mountpoint(), cpu); 97 + } 67 98 fp = fopen(filename, "r"); 68 99 if (!fp) 69 100 goto done; ··· 137 98 for (i = 0 ; i < tp->core_sib; i++) 138 99 zfree(&tp->core_siblings[i]); 139 100 101 + if (tp->die_sib) { 102 + for (i = 0 ; i < tp->die_sib; i++) 103 + zfree(&tp->die_siblings[i]); 104 + } 105 + 140 106 for (i = 0 ; i < tp->thread_sib; i++) 141 107 zfree(&tp->thread_siblings[i]); 142 108 143 109 free(tp); 144 110 } 145 111 112 + static bool has_die_topology(void) 113 + { 114 + char filename[MAXPATHLEN]; 115 + struct utsname uts; 116 + 117 + if (uname(&uts) < 0) 118 + return false; 119 + 120 + if (strncmp(uts.machine, "x86_64", 6)) 121 + return false; 122 + 123 + scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, 124 + sysfs__mountpoint(), 0); 125 + if (access(filename, F_OK) == -1) 126 + return false; 127 + 128 + return true; 129 + } 130 + 146 131 struct cpu_topology *cpu_topology__new(void) 147 132 { 148 133 struct cpu_topology *tp = NULL; 149 134 void *addr; 150 - u32 nr, i; 135 + u32 nr, i, nr_addr; 151 136 size_t sz; 152 137 long ncpus; 153 138 int ret = -1; 154 139 struct cpu_map *map; 140 + bool has_die = has_die_topology(); 155 141 156 142 ncpus = cpu__max_present_cpu(); 157 143 ··· 190 126 nr = (u32)(ncpus & UINT_MAX); 191 127 192 128 sz = nr * sizeof(char *); 193 - addr = calloc(1, sizeof(*tp) + 2 * sz); 129 + if (has_die) 130 + nr_addr = 3; 131 + else 132 + nr_addr = 2; 133 + addr = calloc(1, sizeof(*tp) + nr_addr * sz); 194 134 if (!addr) 195 135 goto out_free; 196 136 ··· 202 134 addr += sizeof(*tp); 203 135 tp->core_siblings = addr; 204 136 addr += sz; 137 + if (has_die) { 138 + tp->die_siblings = addr; 139 + addr += sz; 140 + } 205 141 tp->thread_siblings = addr; 206 142 207 143 for (i = 0; i < nr; i++) {
+2
tools/perf/util/cputopo.h
··· 7 7 8 8 struct cpu_topology { 9 9 u32 core_sib; 10 + u32 die_sib; 10 11 u32 thread_sib; 11 12 char **core_siblings; 13 + char **die_siblings; 12 14 char **thread_siblings; 13 15 }; 14 16
+181 -85
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
··· 18 18 #include "intlist.h" 19 19 #include "util.h" 20 20 21 - #define MAX_BUFFER 1024 22 - 23 21 /* use raw logging */ 24 22 #ifdef CS_DEBUG_RAW 25 23 #define CS_LOG_RAW_FRAMES ··· 29 31 #endif 30 32 #endif 31 33 32 - #define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL 33 - 34 34 struct cs_etm_decoder { 35 35 void *data; 36 36 void (*packet_printer)(const char *msg); 37 37 dcd_tree_handle_t dcd_tree; 38 38 cs_etm_mem_cb_type mem_access; 39 39 ocsd_datapath_resp_t prev_return; 40 - u32 packet_count; 41 - u32 head; 42 - u32 tail; 43 - struct cs_etm_packet packet_buffer[MAX_BUFFER]; 44 40 }; 45 41 46 42 static u32 47 43 cs_etm_decoder__mem_access(const void *context, 48 44 const ocsd_vaddr_t address, 49 45 const ocsd_mem_space_acc_t mem_space __maybe_unused, 46 + const u8 trace_chan_id, 50 47 const u32 req_size, 51 48 u8 *buffer) 52 49 { 53 50 struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; 54 51 55 - return decoder->mem_access(decoder->data, 56 - address, 57 - req_size, 58 - buffer); 52 + return decoder->mem_access(decoder->data, trace_chan_id, 53 + address, req_size, buffer); 59 54 } 60 55 61 56 int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, ··· 57 66 { 58 67 decoder->mem_access = cb_func; 59 68 60 - if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end, 61 - OCSD_MEM_SPACE_ANY, 62 - cs_etm_decoder__mem_access, decoder)) 69 + if (ocsd_dt_add_callback_trcid_mem_acc(decoder->dcd_tree, start, end, 70 + OCSD_MEM_SPACE_ANY, 71 + cs_etm_decoder__mem_access, 72 + decoder)) 63 73 return -1; 64 74 65 75 return 0; ··· 80 88 return 0; 81 89 } 82 90 83 - int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, 91 + int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, 84 92 struct cs_etm_packet *packet) 85 93 { 86 - if (!decoder || !packet) 94 + if (!packet_queue || !packet) 87 95 return -EINVAL; 88 96 89 97 /* Nothing to do, might as well just return */ 90 - if (decoder->packet_count == 0) 98 + if (packet_queue->packet_count == 0) 91 99 return 0; 92 100 /* 93 101 * The queueing process in function cs_etm_decoder__buffer_packet() ··· 98 106 * value. Otherwise the first element of the packet queue is not 99 107 * used. 100 108 */ 101 - decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); 109 + packet_queue->head = (packet_queue->head + 1) & 110 + (CS_ETM_PACKET_MAX_BUFFER - 1); 102 111 103 - *packet = decoder->packet_buffer[decoder->head]; 112 + *packet = packet_queue->packet_buffer[packet_queue->head]; 104 113 105 - decoder->packet_count--; 114 + packet_queue->packet_count--; 106 115 107 116 return 1; 108 117 } ··· 269 276 trace_config); 270 277 } 271 278 272 - static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) 279 + static ocsd_datapath_resp_t 280 + cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, 281 + struct cs_etm_packet_queue *packet_queue, 282 + const uint8_t trace_chan_id) 273 283 { 274 - int i; 284 + /* No timestamp packet has been received, nothing to do */ 285 + if (!packet_queue->timestamp) 286 + return OCSD_RESP_CONT; 275 287 276 - decoder->head = 0; 277 - decoder->tail = 0; 278 - decoder->packet_count = 0; 279 - for (i = 0; i < MAX_BUFFER; i++) { 280 - decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 281 - decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 282 - decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 283 - decoder->packet_buffer[i].instr_count = 0; 284 - decoder->packet_buffer[i].last_instr_taken_branch = false; 285 - decoder->packet_buffer[i].last_instr_size = 0; 286 - decoder->packet_buffer[i].last_instr_type = 0; 287 - decoder->packet_buffer[i].last_instr_subtype = 0; 288 - decoder->packet_buffer[i].last_instr_cond = 0; 289 - decoder->packet_buffer[i].flags = 0; 290 - decoder->packet_buffer[i].exception_number = UINT32_MAX; 291 - decoder->packet_buffer[i].trace_chan_id = UINT8_MAX; 292 - decoder->packet_buffer[i].cpu = INT_MIN; 293 - } 288 + packet_queue->timestamp = packet_queue->next_timestamp; 289 + 290 + /* Estimate the timestamp for the next range packet */ 291 + packet_queue->next_timestamp += packet_queue->instr_count; 292 + packet_queue->instr_count = 0; 293 + 294 + /* Tell the front end which traceid_queue needs attention */ 295 + cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); 296 + 297 + return OCSD_RESP_WAIT; 294 298 } 295 299 296 300 static ocsd_datapath_resp_t 297 - cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, 301 + cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, 302 + const ocsd_generic_trace_elem *elem, 303 + const uint8_t trace_chan_id) 304 + { 305 + struct cs_etm_packet_queue *packet_queue; 306 + 307 + /* First get the packet queue for this traceID */ 308 + packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); 309 + if (!packet_queue) 310 + return OCSD_RESP_FATAL_SYS_ERR; 311 + 312 + /* 313 + * We've seen a timestamp packet before - simply record the new value. 314 + * Function do_soft_timestamp() will report the value to the front end, 315 + * hence asking the decoder to keep decoding rather than stopping. 316 + */ 317 + if (packet_queue->timestamp) { 318 + packet_queue->next_timestamp = elem->timestamp; 319 + return OCSD_RESP_CONT; 320 + } 321 + 322 + /* 323 + * This is the first timestamp we've seen since the beginning of traces 324 + * or a discontinuity. Since timestamps packets are generated *after* 325 + * range packets have been generated, we need to estimate the time at 326 + * which instructions started by substracting the number of instructions 327 + * executed to the timestamp. 328 + */ 329 + packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; 330 + packet_queue->next_timestamp = elem->timestamp; 331 + packet_queue->instr_count = 0; 332 + 333 + /* Tell the front end which traceid_queue needs attention */ 334 + cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); 335 + 336 + /* Halt processing until we are being told to proceed */ 337 + return OCSD_RESP_WAIT; 338 + } 339 + 340 + static void 341 + cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) 342 + { 343 + packet_queue->timestamp = 0; 344 + packet_queue->next_timestamp = 0; 345 + packet_queue->instr_count = 0; 346 + } 347 + 348 + static ocsd_datapath_resp_t 349 + cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, 298 350 const u8 trace_chan_id, 299 351 enum cs_etm_sample_type sample_type) 300 352 { 301 353 u32 et = 0; 302 354 int cpu; 303 355 304 - if (decoder->packet_count >= MAX_BUFFER - 1) 356 + if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1) 305 357 return OCSD_RESP_FATAL_SYS_ERR; 306 358 307 359 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) 308 360 return OCSD_RESP_FATAL_SYS_ERR; 309 361 310 - et = decoder->tail; 311 - et = (et + 1) & (MAX_BUFFER - 1); 312 - decoder->tail = et; 313 - decoder->packet_count++; 362 + et = packet_queue->tail; 363 + et = (et + 1) & (CS_ETM_PACKET_MAX_BUFFER - 1); 364 + packet_queue->tail = et; 365 + packet_queue->packet_count++; 314 366 315 - decoder->packet_buffer[et].sample_type = sample_type; 316 - decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; 317 - decoder->packet_buffer[et].cpu = cpu; 318 - decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; 319 - decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; 320 - decoder->packet_buffer[et].instr_count = 0; 321 - decoder->packet_buffer[et].last_instr_taken_branch = false; 322 - decoder->packet_buffer[et].last_instr_size = 0; 323 - decoder->packet_buffer[et].last_instr_type = 0; 324 - decoder->packet_buffer[et].last_instr_subtype = 0; 325 - decoder->packet_buffer[et].last_instr_cond = 0; 326 - decoder->packet_buffer[et].flags = 0; 327 - decoder->packet_buffer[et].exception_number = UINT32_MAX; 328 - decoder->packet_buffer[et].trace_chan_id = trace_chan_id; 367 + packet_queue->packet_buffer[et].sample_type = sample_type; 368 + packet_queue->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; 369 + packet_queue->packet_buffer[et].cpu = cpu; 370 + packet_queue->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; 371 + packet_queue->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; 372 + packet_queue->packet_buffer[et].instr_count = 0; 373 + packet_queue->packet_buffer[et].last_instr_taken_branch = false; 374 + packet_queue->packet_buffer[et].last_instr_size = 0; 375 + packet_queue->packet_buffer[et].last_instr_type = 0; 376 + packet_queue->packet_buffer[et].last_instr_subtype = 0; 377 + packet_queue->packet_buffer[et].last_instr_cond = 0; 378 + packet_queue->packet_buffer[et].flags = 0; 379 + packet_queue->packet_buffer[et].exception_number = UINT32_MAX; 380 + packet_queue->packet_buffer[et].trace_chan_id = trace_chan_id; 329 381 330 - if (decoder->packet_count == MAX_BUFFER - 1) 382 + if (packet_queue->packet_count == CS_ETM_PACKET_MAX_BUFFER - 1) 331 383 return OCSD_RESP_WAIT; 332 384 333 385 return OCSD_RESP_CONT; 334 386 } 335 387 336 388 static ocsd_datapath_resp_t 337 - cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, 389 + cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq, 390 + struct cs_etm_packet_queue *packet_queue, 338 391 const ocsd_generic_trace_elem *elem, 339 392 const uint8_t trace_chan_id) 340 393 { 341 394 int ret = 0; 342 395 struct cs_etm_packet *packet; 343 396 344 - ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 397 + ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id, 345 398 CS_ETM_RANGE); 346 399 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) 347 400 return ret; 348 401 349 - packet = &decoder->packet_buffer[decoder->tail]; 402 + packet = &packet_queue->packet_buffer[packet_queue->tail]; 350 403 351 404 switch (elem->isa) { 352 405 case ocsd_isa_aarch64: ··· 435 396 436 397 packet->last_instr_size = elem->last_instr_sz; 437 398 399 + /* per-thread scenario, no need to generate a timestamp */ 400 + if (cs_etm__etmq_is_timeless(etmq)) 401 + goto out; 402 + 403 + /* 404 + * The packet queue is full and we haven't seen a timestamp (had we 405 + * seen one the packet queue wouldn't be full). Let the front end 406 + * deal with it. 407 + */ 408 + if (ret == OCSD_RESP_WAIT) 409 + goto out; 410 + 411 + packet_queue->instr_count += elem->num_instr_range; 412 + /* Tell the front end we have a new timestamp to process */ 413 + ret = cs_etm_decoder__do_soft_timestamp(etmq, packet_queue, 414 + trace_chan_id); 415 + out: 438 416 return ret; 439 417 } 440 418 441 419 static ocsd_datapath_resp_t 442 - cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder, 443 - const uint8_t trace_chan_id) 420 + cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue, 421 + const uint8_t trace_chan_id) 444 422 { 445 - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 423 + /* 424 + * Something happened and who knows when we'll get new traces so 425 + * reset time statistics. 426 + */ 427 + cs_etm_decoder__reset_timestamp(queue); 428 + return cs_etm_decoder__buffer_packet(queue, trace_chan_id, 446 429 CS_ETM_DISCONTINUITY); 447 430 } 448 431 449 432 static ocsd_datapath_resp_t 450 - cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, 433 + cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue, 451 434 const ocsd_generic_trace_elem *elem, 452 435 const uint8_t trace_chan_id) 453 436 { int ret = 0; 454 437 struct cs_etm_packet *packet; 455 438 456 - ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 439 + ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id, 457 440 CS_ETM_EXCEPTION); 458 441 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) 459 442 return ret; 460 443 461 - packet = &decoder->packet_buffer[decoder->tail]; 444 + packet = &queue->packet_buffer[queue->tail]; 462 445 packet->exception_number = elem->exception_number; 463 446 464 447 return ret; 465 448 } 466 449 467 450 static ocsd_datapath_resp_t 468 - cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder, 451 + cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue, 469 452 const uint8_t trace_chan_id) 470 453 { 471 - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 454 + return cs_etm_decoder__buffer_packet(queue, trace_chan_id, 472 455 CS_ETM_EXCEPTION_RET); 456 + } 457 + 458 + static ocsd_datapath_resp_t 459 + cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, 460 + struct cs_etm_packet_queue *packet_queue, 461 + const ocsd_generic_trace_elem *elem, 462 + const uint8_t trace_chan_id) 463 + { 464 + pid_t tid; 465 + 466 + /* Ignore PE_CONTEXT packets that don't have a valid contextID */ 467 + if (!elem->context.ctxt_id_valid) 468 + return OCSD_RESP_CONT; 469 + 470 + tid = elem->context.context_id; 471 + if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) 472 + return OCSD_RESP_FATAL_SYS_ERR; 473 + 474 + /* 475 + * A timestamp is generated after a PE_CONTEXT element so make sure 476 + * to rely on that coming one. 477 + */ 478 + cs_etm_decoder__reset_timestamp(packet_queue); 479 + 480 + return OCSD_RESP_CONT; 473 481 } 474 482 475 483 static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( ··· 527 441 { 528 442 ocsd_datapath_resp_t resp = OCSD_RESP_CONT; 529 443 struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; 444 + struct cs_etm_queue *etmq = decoder->data; 445 + struct cs_etm_packet_queue *packet_queue; 446 + 447 + /* First get the packet queue for this traceID */ 448 + packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); 449 + if (!packet_queue) 450 + return OCSD_RESP_FATAL_SYS_ERR; 530 451 531 452 switch (elem->elem_type) { 532 453 case OCSD_GEN_TRC_ELEM_UNKNOWN: ··· 541 448 case OCSD_GEN_TRC_ELEM_EO_TRACE: 542 449 case OCSD_GEN_TRC_ELEM_NO_SYNC: 543 450 case OCSD_GEN_TRC_ELEM_TRACE_ON: 544 - resp = cs_etm_decoder__buffer_discontinuity(decoder, 451 + resp = cs_etm_decoder__buffer_discontinuity(packet_queue, 545 452 trace_chan_id); 546 453 break; 547 454 case OCSD_GEN_TRC_ELEM_INSTR_RANGE: 548 - resp = cs_etm_decoder__buffer_range(decoder, elem, 455 + resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem, 549 456 trace_chan_id); 550 457 break; 551 458 case OCSD_GEN_TRC_ELEM_EXCEPTION: 552 - resp = cs_etm_decoder__buffer_exception(decoder, elem, 459 + resp = cs_etm_decoder__buffer_exception(packet_queue, elem, 553 460 trace_chan_id); 554 461 break; 555 462 case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: 556 - resp = cs_etm_decoder__buffer_exception_ret(decoder, 463 + resp = cs_etm_decoder__buffer_exception_ret(packet_queue, 557 464 trace_chan_id); 558 465 break; 559 - case OCSD_GEN_TRC_ELEM_PE_CONTEXT: 560 - case OCSD_GEN_TRC_ELEM_ADDR_NACC: 561 466 case OCSD_GEN_TRC_ELEM_TIMESTAMP: 467 + resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, 468 + trace_chan_id); 469 + break; 470 + case OCSD_GEN_TRC_ELEM_PE_CONTEXT: 471 + resp = cs_etm_decoder__set_tid(etmq, packet_queue, 472 + elem, trace_chan_id); 473 + break; 474 + case OCSD_GEN_TRC_ELEM_ADDR_NACC: 562 475 case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: 563 476 case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: 564 477 case OCSD_GEN_TRC_ELEM_EVENT: ··· 653 554 654 555 decoder->data = d_params->data; 655 556 decoder->prev_return = OCSD_RESP_CONT; 656 - cs_etm_decoder__clear_buffer(decoder); 657 557 format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : 658 558 OCSD_TRC_SRC_SINGLE); 659 559 flags = 0; ··· 675 577 /* init library print logging support */ 676 578 ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder); 677 579 if (ret != 0) 678 - goto err_free_decoder_tree; 580 + goto err_free_decoder; 679 581 680 582 /* init raw frame logging if required */ 681 583 cs_etm_decoder__init_raw_frame_logging(d_params, decoder); ··· 685 587 &t_params[i], 686 588 decoder); 687 589 if (ret != 0) 688 - goto err_free_decoder_tree; 590 + goto err_free_decoder; 689 591 } 690 592 691 593 return decoder; 692 594 693 - err_free_decoder_tree: 694 - ocsd_destroy_dcd_tree(decoder->dcd_tree); 695 595 err_free_decoder: 696 - free(decoder); 596 + cs_etm_decoder__free(decoder); 697 597 return NULL; 698 598 } 699 599
+4 -35
tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
··· 14 14 #include <stdio.h> 15 15 16 16 struct cs_etm_decoder; 17 - 18 - enum cs_etm_sample_type { 19 - CS_ETM_EMPTY, 20 - CS_ETM_RANGE, 21 - CS_ETM_DISCONTINUITY, 22 - CS_ETM_EXCEPTION, 23 - CS_ETM_EXCEPTION_RET, 24 - }; 25 - 26 - enum cs_etm_isa { 27 - CS_ETM_ISA_UNKNOWN, 28 - CS_ETM_ISA_A64, 29 - CS_ETM_ISA_A32, 30 - CS_ETM_ISA_T32, 31 - }; 32 - 33 - struct cs_etm_packet { 34 - enum cs_etm_sample_type sample_type; 35 - enum cs_etm_isa isa; 36 - u64 start_addr; 37 - u64 end_addr; 38 - u32 instr_count; 39 - u32 last_instr_type; 40 - u32 last_instr_subtype; 41 - u32 flags; 42 - u32 exception_number; 43 - u8 last_instr_cond; 44 - u8 last_instr_taken_branch; 45 - u8 last_instr_size; 46 - u8 trace_chan_id; 47 - int cpu; 48 - }; 17 + struct cs_etm_packet; 18 + struct cs_etm_packet_queue; 49 19 50 20 struct cs_etm_queue; 51 21 52 - typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, 53 - size_t, u8 *); 22 + typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *); 54 23 55 24 struct cs_etmv3_trace_params { 56 25 u32 reg_ctrl; ··· 88 119 u64 start, u64 end, 89 120 cs_etm_mem_cb_type cb_func); 90 121 91 - int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, 122 + int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, 92 123 struct cs_etm_packet *packet); 93 124 94 125 int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
+791 -233
tools/perf/util/cs-etm.c
··· 29 29 #include "thread.h" 30 30 #include "thread_map.h" 31 31 #include "thread-stack.h" 32 + #include <tools/libc_compat.h> 32 33 #include "util.h" 33 34 34 35 #define MAX_TIMESTAMP (~0ULL) ··· 61 60 unsigned int pmu_type; 62 61 }; 63 62 64 - struct cs_etm_queue { 65 - struct cs_etm_auxtrace *etm; 66 - struct thread *thread; 67 - struct cs_etm_decoder *decoder; 68 - struct auxtrace_buffer *buffer; 69 - union perf_event *event_buf; 70 - unsigned int queue_nr; 63 + struct cs_etm_traceid_queue { 64 + u8 trace_chan_id; 71 65 pid_t pid, tid; 72 - int cpu; 73 - u64 offset; 74 66 u64 period_instructions; 67 + size_t last_branch_pos; 68 + union perf_event *event_buf; 69 + struct thread *thread; 75 70 struct branch_stack *last_branch; 76 71 struct branch_stack *last_branch_rb; 77 - size_t last_branch_pos; 78 72 struct cs_etm_packet *prev_packet; 79 73 struct cs_etm_packet *packet; 74 + struct cs_etm_packet_queue packet_queue; 75 + }; 76 + 77 + struct cs_etm_queue { 78 + struct cs_etm_auxtrace *etm; 79 + struct cs_etm_decoder *decoder; 80 + struct auxtrace_buffer *buffer; 81 + unsigned int queue_nr; 82 + u8 pending_timestamp; 83 + u64 offset; 80 84 const unsigned char *buf; 81 85 size_t buf_len, buf_used; 86 + /* Conversion between traceID and index in traceid_queues array */ 87 + struct intlist *traceid_queues_list; 88 + struct cs_etm_traceid_queue **traceid_queues; 82 89 }; 83 90 84 91 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 92 + static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); 85 93 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 86 94 pid_t tid); 95 + static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 96 + static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 87 97 88 98 /* PTMs ETMIDR [11:8] set to b0011 */ 89 99 #define ETMIDR_PTM_VERSION 0x00000300 100 + 101 + /* 102 + * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 103 + * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 104 + * encode the etm queue number as the upper 16 bit and the channel as 105 + * the lower 16 bit. 106 + */ 107 + #define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \ 108 + (queue_nr << 16 | trace_chan_id) 109 + #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 110 + #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 90 111 91 112 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 92 113 { ··· 146 123 metadata = inode->priv; 147 124 *cpu = (int)metadata[CS_ETM_CPU]; 148 125 return 0; 126 + } 127 + 128 + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 129 + u8 trace_chan_id) 130 + { 131 + /* 132 + * Wnen a timestamp packet is encountered the backend code 133 + * is stopped so that the front end has time to process packets 134 + * that were accumulated in the traceID queue. Since there can 135 + * be more than one channel per cs_etm_queue, we need to specify 136 + * what traceID queue needs servicing. 137 + */ 138 + etmq->pending_timestamp = trace_chan_id; 139 + } 140 + 141 + static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 142 + u8 *trace_chan_id) 143 + { 144 + struct cs_etm_packet_queue *packet_queue; 145 + 146 + if (!etmq->pending_timestamp) 147 + return 0; 148 + 149 + if (trace_chan_id) 150 + *trace_chan_id = etmq->pending_timestamp; 151 + 152 + packet_queue = cs_etm__etmq_get_packet_queue(etmq, 153 + etmq->pending_timestamp); 154 + if (!packet_queue) 155 + return 0; 156 + 157 + /* Acknowledge pending status */ 158 + etmq->pending_timestamp = 0; 159 + 160 + /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 161 + return packet_queue->timestamp; 162 + } 163 + 164 + static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 165 + { 166 + int i; 167 + 168 + queue->head = 0; 169 + queue->tail = 0; 170 + queue->packet_count = 0; 171 + for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 172 + queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 173 + queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 174 + queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 175 + queue->packet_buffer[i].instr_count = 0; 176 + queue->packet_buffer[i].last_instr_taken_branch = false; 177 + queue->packet_buffer[i].last_instr_size = 0; 178 + queue->packet_buffer[i].last_instr_type = 0; 179 + queue->packet_buffer[i].last_instr_subtype = 0; 180 + queue->packet_buffer[i].last_instr_cond = 0; 181 + queue->packet_buffer[i].flags = 0; 182 + queue->packet_buffer[i].exception_number = UINT32_MAX; 183 + queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 184 + queue->packet_buffer[i].cpu = INT_MIN; 185 + } 186 + } 187 + 188 + static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 189 + { 190 + int idx; 191 + struct int_node *inode; 192 + struct cs_etm_traceid_queue *tidq; 193 + struct intlist *traceid_queues_list = etmq->traceid_queues_list; 194 + 195 + intlist__for_each_entry(inode, traceid_queues_list) { 196 + idx = (int)(intptr_t)inode->priv; 197 + tidq = etmq->traceid_queues[idx]; 198 + cs_etm__clear_packet_queue(&tidq->packet_queue); 199 + } 200 + } 201 + 202 + static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 203 + struct cs_etm_traceid_queue *tidq, 204 + u8 trace_chan_id) 205 + { 206 + int rc = -ENOMEM; 207 + struct auxtrace_queue *queue; 208 + struct cs_etm_auxtrace *etm = etmq->etm; 209 + 210 + cs_etm__clear_packet_queue(&tidq->packet_queue); 211 + 212 + queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 213 + tidq->tid = queue->tid; 214 + tidq->pid = -1; 215 + tidq->trace_chan_id = trace_chan_id; 216 + 217 + tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 218 + if (!tidq->packet) 219 + goto out; 220 + 221 + tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 222 + if (!tidq->prev_packet) 223 + goto out_free; 224 + 225 + if (etm->synth_opts.last_branch) { 226 + size_t sz = sizeof(struct branch_stack); 227 + 228 + sz += etm->synth_opts.last_branch_sz * 229 + sizeof(struct branch_entry); 230 + tidq->last_branch = zalloc(sz); 231 + if (!tidq->last_branch) 232 + goto out_free; 233 + tidq->last_branch_rb = zalloc(sz); 234 + if (!tidq->last_branch_rb) 235 + goto out_free; 236 + } 237 + 238 + tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 239 + if (!tidq->event_buf) 240 + goto out_free; 241 + 242 + return 0; 243 + 244 + out_free: 245 + zfree(&tidq->last_branch_rb); 246 + zfree(&tidq->last_branch); 247 + zfree(&tidq->prev_packet); 248 + zfree(&tidq->packet); 249 + out: 250 + return rc; 251 + } 252 + 253 + static struct cs_etm_traceid_queue 254 + *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 255 + { 256 + int idx; 257 + struct int_node *inode; 258 + struct intlist *traceid_queues_list; 259 + struct cs_etm_traceid_queue *tidq, **traceid_queues; 260 + struct cs_etm_auxtrace *etm = etmq->etm; 261 + 262 + if (etm->timeless_decoding) 263 + trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 264 + 265 + traceid_queues_list = etmq->traceid_queues_list; 266 + 267 + /* 268 + * Check if the traceid_queue exist for this traceID by looking 269 + * in the queue list. 270 + */ 271 + inode = intlist__find(traceid_queues_list, trace_chan_id); 272 + if (inode) { 273 + idx = (int)(intptr_t)inode->priv; 274 + return etmq->traceid_queues[idx]; 275 + } 276 + 277 + /* We couldn't find a traceid_queue for this traceID, allocate one */ 278 + tidq = malloc(sizeof(*tidq)); 279 + if (!tidq) 280 + return NULL; 281 + 282 + memset(tidq, 0, sizeof(*tidq)); 283 + 284 + /* Get a valid index for the new traceid_queue */ 285 + idx = intlist__nr_entries(traceid_queues_list); 286 + /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 287 + inode = intlist__findnew(traceid_queues_list, trace_chan_id); 288 + if (!inode) 289 + goto out_free; 290 + 291 + /* Associate this traceID with this index */ 292 + inode->priv = (void *)(intptr_t)idx; 293 + 294 + if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 295 + goto out_free; 296 + 297 + /* Grow the traceid_queues array by one unit */ 298 + traceid_queues = etmq->traceid_queues; 299 + traceid_queues = reallocarray(traceid_queues, 300 + idx + 1, 301 + sizeof(*traceid_queues)); 302 + 303 + /* 304 + * On failure reallocarray() returns NULL and the original block of 305 + * memory is left untouched. 306 + */ 307 + if (!traceid_queues) 308 + goto out_free; 309 + 310 + traceid_queues[idx] = tidq; 311 + etmq->traceid_queues = traceid_queues; 312 + 313 + return etmq->traceid_queues[idx]; 314 + 315 + out_free: 316 + /* 317 + * Function intlist__remove() removes the inode from the list 318 + * and delete the memory associated to it. 319 + */ 320 + intlist__remove(traceid_queues_list, inode); 321 + free(tidq); 322 + 323 + return NULL; 324 + } 325 + 326 + struct cs_etm_packet_queue 327 + *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 328 + { 329 + struct cs_etm_traceid_queue *tidq; 330 + 331 + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 332 + if (tidq) 333 + return &tidq->packet_queue; 334 + 335 + return NULL; 149 336 } 150 337 151 338 static void cs_etm__packet_dump(const char *pkt_string) ··· 509 276 if (!tool->ordered_events) 510 277 return -EINVAL; 511 278 512 - if (!etm->timeless_decoding) 513 - return -EINVAL; 514 - 515 279 ret = cs_etm__update_queues(etm); 516 280 517 281 if (ret < 0) 518 282 return ret; 519 283 520 - return cs_etm__process_timeless_queues(etm, -1); 284 + if (etm->timeless_decoding) 285 + return cs_etm__process_timeless_queues(etm, -1); 286 + 287 + return cs_etm__process_queues(etm); 288 + } 289 + 290 + static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 291 + { 292 + int idx; 293 + uintptr_t priv; 294 + struct int_node *inode, *tmp; 295 + struct cs_etm_traceid_queue *tidq; 296 + struct intlist *traceid_queues_list = etmq->traceid_queues_list; 297 + 298 + intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 299 + priv = (uintptr_t)inode->priv; 300 + idx = priv; 301 + 302 + /* Free this traceid_queue from the array */ 303 + tidq = etmq->traceid_queues[idx]; 304 + thread__zput(tidq->thread); 305 + zfree(&tidq->event_buf); 306 + zfree(&tidq->last_branch); 307 + zfree(&tidq->last_branch_rb); 308 + zfree(&tidq->prev_packet); 309 + zfree(&tidq->packet); 310 + zfree(&tidq); 311 + 312 + /* 313 + * Function intlist__remove() removes the inode from the list 314 + * and delete the memory associated to it. 315 + */ 316 + intlist__remove(traceid_queues_list, inode); 317 + } 318 + 319 + /* Then the RB tree itself */ 320 + intlist__delete(traceid_queues_list); 321 + etmq->traceid_queues_list = NULL; 322 + 323 + /* finally free the traceid_queues array */ 324 + free(etmq->traceid_queues); 325 + etmq->traceid_queues = NULL; 521 326 } 522 327 523 328 static void cs_etm__free_queue(void *priv) ··· 565 294 if (!etmq) 566 295 return; 567 296 568 - thread__zput(etmq->thread); 569 297 cs_etm_decoder__free(etmq->decoder); 570 - zfree(&etmq->event_buf); 571 - zfree(&etmq->last_branch); 572 - zfree(&etmq->last_branch_rb); 573 - zfree(&etmq->prev_packet); 574 - zfree(&etmq->packet); 298 + cs_etm__free_traceid_queues(etmq); 575 299 free(etmq); 576 300 } 577 301 ··· 631 365 } 632 366 } 633 367 634 - static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, 635 - size_t size, u8 *buffer) 368 + static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 369 + u64 address, size_t size, u8 *buffer) 636 370 { 637 371 u8 cpumode; 638 372 u64 offset; 639 373 int len; 640 - struct thread *thread; 641 - struct machine *machine; 642 - struct addr_location al; 374 + struct thread *thread; 375 + struct machine *machine; 376 + struct addr_location al; 377 + struct cs_etm_traceid_queue *tidq; 643 378 644 379 if (!etmq) 645 380 return 0; 646 381 647 382 machine = etmq->etm->machine; 648 383 cpumode = cs_etm__cpu_mode(etmq, address); 384 + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 385 + if (!tidq) 386 + return 0; 649 387 650 - thread = etmq->thread; 388 + thread = tidq->thread; 651 389 if (!thread) { 652 390 if (cpumode != PERF_RECORD_MISC_KERNEL) 653 391 return 0; ··· 682 412 struct cs_etm_decoder_params d_params; 683 413 struct cs_etm_trace_params *t_params = NULL; 684 414 struct cs_etm_queue *etmq; 685 - size_t szp = sizeof(struct cs_etm_packet); 686 415 687 416 etmq = zalloc(sizeof(*etmq)); 688 417 if (!etmq) 689 418 return NULL; 690 419 691 - etmq->packet = zalloc(szp); 692 - if (!etmq->packet) 693 - goto out_free; 694 - 695 - etmq->prev_packet = zalloc(szp); 696 - if (!etmq->prev_packet) 697 - goto out_free; 698 - 699 - if (etm->synth_opts.last_branch) { 700 - size_t sz = sizeof(struct branch_stack); 701 - 702 - sz += etm->synth_opts.last_branch_sz * 703 - sizeof(struct branch_entry); 704 - etmq->last_branch = zalloc(sz); 705 - if (!etmq->last_branch) 706 - goto out_free; 707 - etmq->last_branch_rb = zalloc(sz); 708 - if (!etmq->last_branch_rb) 709 - goto out_free; 710 - } 711 - 712 - etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 713 - if (!etmq->event_buf) 420 + etmq->traceid_queues_list = intlist__new(NULL); 421 + if (!etmq->traceid_queues_list) 714 422 goto out_free; 715 423 716 424 /* Use metadata to fill in trace parameters for trace decoder */ ··· 725 477 out_free_decoder: 726 478 cs_etm_decoder__free(etmq->decoder); 727 479 out_free: 728 - zfree(&t_params); 729 - zfree(&etmq->event_buf); 730 - zfree(&etmq->last_branch); 731 - zfree(&etmq->last_branch_rb); 732 - zfree(&etmq->prev_packet); 733 - zfree(&etmq->packet); 480 + intlist__delete(etmq->traceid_queues_list); 734 481 free(etmq); 735 482 736 483 return NULL; ··· 736 493 unsigned int queue_nr) 737 494 { 738 495 int ret = 0; 496 + unsigned int cs_queue_nr; 497 + u8 trace_chan_id; 498 + u64 timestamp; 739 499 struct cs_etm_queue *etmq = queue->priv; 740 500 741 501 if (list_empty(&queue->head) || etmq) ··· 754 508 queue->priv = etmq; 755 509 etmq->etm = etm; 756 510 etmq->queue_nr = queue_nr; 757 - etmq->cpu = queue->cpu; 758 - etmq->tid = queue->tid; 759 - etmq->pid = -1; 760 511 etmq->offset = 0; 761 - etmq->period_instructions = 0; 762 512 513 + if (etm->timeless_decoding) 514 + goto out; 515 + 516 + /* 517 + * We are under a CPU-wide trace scenario. As such we need to know 518 + * when the code that generated the traces started to execute so that 519 + * it can be correlated with execution on other CPUs. So we get a 520 + * handle on the beginning of traces and decode until we find a 521 + * timestamp. The timestamp is then added to the auxtrace min heap 522 + * in order to know what nibble (of all the etmqs) to decode first. 523 + */ 524 + while (1) { 525 + /* 526 + * Fetch an aux_buffer from this etmq. Bail if no more 527 + * blocks or an error has been encountered. 528 + */ 529 + ret = cs_etm__get_data_block(etmq); 530 + if (ret <= 0) 531 + goto out; 532 + 533 + /* 534 + * Run decoder on the trace block. The decoder will stop when 535 + * encountering a timestamp, a full packet queue or the end of 536 + * trace for that block. 537 + */ 538 + ret = cs_etm__decode_data_block(etmq); 539 + if (ret) 540 + goto out; 541 + 542 + /* 543 + * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 544 + * the timestamp calculation for us. 545 + */ 546 + timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 547 + 548 + /* We found a timestamp, no need to continue. */ 549 + if (timestamp) 550 + break; 551 + 552 + /* 553 + * We didn't find a timestamp so empty all the traceid packet 554 + * queues before looking for another timestamp packet, either 555 + * in the current data block or a new one. Packets that were 556 + * just decoded are useless since no timestamp has been 557 + * associated with them. As such simply discard them. 558 + */ 559 + cs_etm__clear_all_packet_queues(etmq); 560 + } 561 + 562 + /* 563 + * We have a timestamp. Add it to the min heap to reflect when 564 + * instructions conveyed by the range packets of this traceID queue 565 + * started to execute. Once the same has been done for all the traceID 566 + * queues of each etmq, redenring and decoding can start in 567 + * chronological order. 568 + * 569 + * Note that packets decoded above are still in the traceID's packet 570 + * queue and will be processed in cs_etm__process_queues(). 571 + */ 572 + cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan); 573 + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); 763 574 out: 764 575 return ret; 765 576 } ··· 848 545 return 0; 849 546 } 850 547 851 - static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) 548 + static inline 549 + void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 550 + struct cs_etm_traceid_queue *tidq) 852 551 { 853 - struct branch_stack *bs_src = etmq->last_branch_rb; 854 - struct branch_stack *bs_dst = etmq->last_branch; 552 + struct branch_stack *bs_src = tidq->last_branch_rb; 553 + struct branch_stack *bs_dst = tidq->last_branch; 855 554 size_t nr = 0; 856 555 857 556 /* ··· 873 568 * two steps. First, copy the branches from the most recently inserted 874 569 * branch ->last_branch_pos until the end of bs_src->entries buffer. 875 570 */ 876 - nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; 571 + nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 877 572 memcpy(&bs_dst->entries[0], 878 - &bs_src->entries[etmq->last_branch_pos], 573 + &bs_src->entries[tidq->last_branch_pos], 879 574 sizeof(struct branch_entry) * nr); 880 575 881 576 /* ··· 888 583 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 889 584 memcpy(&bs_dst->entries[nr], 890 585 &bs_src->entries[0], 891 - sizeof(struct branch_entry) * etmq->last_branch_pos); 586 + sizeof(struct branch_entry) * tidq->last_branch_pos); 892 587 } 893 588 } 894 589 895 - static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) 590 + static inline 591 + void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 896 592 { 897 - etmq->last_branch_pos = 0; 898 - etmq->last_branch_rb->nr = 0; 593 + tidq->last_branch_pos = 0; 594 + tidq->last_branch_rb->nr = 0; 899 595 } 900 596 901 597 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 902 - u64 addr) { 598 + u8 trace_chan_id, u64 addr) 599 + { 903 600 u8 instrBytes[2]; 904 601 905 - cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes); 602 + cs_etm__mem_access(etmq, trace_chan_id, addr, 603 + ARRAY_SIZE(instrBytes), instrBytes); 906 604 /* 907 605 * T32 instruction size is indicated by bits[15:11] of the first 908 606 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 ··· 934 626 } 935 627 936 628 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 629 + u64 trace_chan_id, 937 630 const struct cs_etm_packet *packet, 938 631 u64 offset) 939 632 { ··· 942 633 u64 addr = packet->start_addr; 943 634 944 635 while (offset > 0) { 945 - addr += cs_etm__t32_instr_size(etmq, addr); 636 + addr += cs_etm__t32_instr_size(etmq, 637 + trace_chan_id, addr); 946 638 offset--; 947 639 } 948 640 return addr; ··· 953 643 return packet->start_addr + offset * 4; 954 644 } 955 645 956 - static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) 646 + static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 647 + struct cs_etm_traceid_queue *tidq) 957 648 { 958 - struct branch_stack *bs = etmq->last_branch_rb; 649 + struct branch_stack *bs = tidq->last_branch_rb; 959 650 struct branch_entry *be; 960 651 961 652 /* ··· 965 654 * buffer down. After writing the first element of the stack, move the 966 655 * insert position back to the end of the buffer. 967 656 */ 968 - if (!etmq->last_branch_pos) 969 - etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 657 + if (!tidq->last_branch_pos) 658 + tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 970 659 971 - etmq->last_branch_pos -= 1; 660 + tidq->last_branch_pos -= 1; 972 661 973 - be = &bs->entries[etmq->last_branch_pos]; 974 - be->from = cs_etm__last_executed_instr(etmq->prev_packet); 975 - be->to = cs_etm__first_executed_instr(etmq->packet); 662 + be = &bs->entries[tidq->last_branch_pos]; 663 + be->from = cs_etm__last_executed_instr(tidq->prev_packet); 664 + be->to = cs_etm__first_executed_instr(tidq->packet); 976 665 /* No support for mispredict */ 977 666 be->flags.mispred = 0; 978 667 be->flags.predicted = 1; ··· 1036 725 } 1037 726 1038 727 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 1039 - struct auxtrace_queue *queue) 728 + struct cs_etm_traceid_queue *tidq) 1040 729 { 1041 - struct cs_etm_queue *etmq = queue->priv; 730 + if ((!tidq->thread) && (tidq->tid != -1)) 731 + tidq->thread = machine__find_thread(etm->machine, -1, 732 + tidq->tid); 1042 733 1043 - /* CPU-wide tracing isn't supported yet */ 1044 - if (queue->tid == -1) 1045 - return; 734 + if (tidq->thread) 735 + tidq->pid = tidq->thread->pid_; 736 + } 1046 737 1047 - if ((!etmq->thread) && (etmq->tid != -1)) 1048 - etmq->thread = machine__find_thread(etm->machine, -1, 1049 - etmq->tid); 738 + int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, 739 + pid_t tid, u8 trace_chan_id) 740 + { 741 + int cpu, err = -EINVAL; 742 + struct cs_etm_auxtrace *etm = etmq->etm; 743 + struct cs_etm_traceid_queue *tidq; 1050 744 1051 - if (etmq->thread) { 1052 - etmq->pid = etmq->thread->pid_; 1053 - if (queue->cpu == -1) 1054 - etmq->cpu = etmq->thread->cpu; 1055 - } 745 + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 746 + if (!tidq) 747 + return err; 748 + 749 + if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) 750 + return err; 751 + 752 + err = machine__set_current_tid(etm->machine, cpu, tid, tid); 753 + if (err) 754 + return err; 755 + 756 + tidq->tid = tid; 757 + thread__zput(tidq->thread); 758 + 759 + cs_etm__set_pid_tid_cpu(etm, tidq); 760 + return 0; 761 + } 762 + 763 + bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 764 + { 765 + return !!etmq->etm->timeless_decoding; 1056 766 } 1057 767 1058 768 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 769 + struct cs_etm_traceid_queue *tidq, 1059 770 u64 addr, u64 period) 1060 771 { 1061 772 int ret = 0; 1062 773 struct cs_etm_auxtrace *etm = etmq->etm; 1063 - union perf_event *event = etmq->event_buf; 774 + union perf_event *event = tidq->event_buf; 1064 775 struct perf_sample sample = {.ip = 0,}; 1065 776 1066 777 event->sample.header.type = PERF_RECORD_SAMPLE; ··· 1090 757 event->sample.header.size = sizeof(struct perf_event_header); 1091 758 1092 759 sample.ip = addr; 1093 - sample.pid = etmq->pid; 1094 - sample.tid = etmq->tid; 760 + sample.pid = tidq->pid; 761 + sample.tid = tidq->tid; 1095 762 sample.id = etmq->etm->instructions_id; 1096 763 sample.stream_id = etmq->etm->instructions_id; 1097 764 sample.period = period; 1098 - sample.cpu = etmq->packet->cpu; 1099 - sample.flags = etmq->prev_packet->flags; 765 + sample.cpu = tidq->packet->cpu; 766 + sample.flags = tidq->prev_packet->flags; 1100 767 sample.insn_len = 1; 1101 768 sample.cpumode = event->sample.header.misc; 1102 769 1103 770 if (etm->synth_opts.last_branch) { 1104 - cs_etm__copy_last_branch_rb(etmq); 1105 - sample.branch_stack = etmq->last_branch; 771 + cs_etm__copy_last_branch_rb(etmq, tidq); 772 + sample.branch_stack = tidq->last_branch; 1106 773 } 1107 774 1108 775 if (etm->synth_opts.inject) { ··· 1120 787 ret); 1121 788 1122 789 if (etm->synth_opts.last_branch) 1123 - cs_etm__reset_last_branch_rb(etmq); 790 + cs_etm__reset_last_branch_rb(tidq); 1124 791 1125 792 return ret; 1126 793 } ··· 1129 796 * The cs etm packet encodes an instruction range between a branch target 1130 797 * and the next taken branch. Generate sample accordingly. 1131 798 */ 1132 - static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) 799 + static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 800 + struct cs_etm_traceid_queue *tidq) 1133 801 { 1134 802 int ret = 0; 1135 803 struct cs_etm_auxtrace *etm = etmq->etm; 1136 804 struct perf_sample sample = {.ip = 0,}; 1137 - union perf_event *event = etmq->event_buf; 805 + union perf_event *event = tidq->event_buf; 1138 806 struct dummy_branch_stack { 1139 807 u64 nr; 1140 808 struct branch_entry entries; 1141 809 } dummy_bs; 1142 810 u64 ip; 1143 811 1144 - ip = cs_etm__last_executed_instr(etmq->prev_packet); 812 + ip = cs_etm__last_executed_instr(tidq->prev_packet); 1145 813 1146 814 event->sample.header.type = PERF_RECORD_SAMPLE; 1147 815 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); 1148 816 event->sample.header.size = sizeof(struct perf_event_header); 1149 817 1150 818 sample.ip = ip; 1151 - sample.pid = etmq->pid; 1152 - sample.tid = etmq->tid; 1153 - sample.addr = cs_etm__first_executed_instr(etmq->packet); 819 + sample.pid = tidq->pid; 820 + sample.tid = tidq->tid; 821 + sample.addr = cs_etm__first_executed_instr(tidq->packet); 1154 822 sample.id = etmq->etm->branches_id; 1155 823 sample.stream_id = etmq->etm->branches_id; 1156 824 sample.period = 1; 1157 - sample.cpu = etmq->packet->cpu; 1158 - sample.flags = etmq->prev_packet->flags; 825 + sample.cpu = tidq->packet->cpu; 826 + sample.flags = tidq->prev_packet->flags; 1159 827 sample.cpumode = event->sample.header.misc; 1160 828 1161 829 /* ··· 1299 965 return 0; 1300 966 } 1301 967 1302 - static int cs_etm__sample(struct cs_etm_queue *etmq) 968 + static int cs_etm__sample(struct cs_etm_queue *etmq, 969 + struct cs_etm_traceid_queue *tidq) 1303 970 { 1304 971 struct cs_etm_auxtrace *etm = etmq->etm; 1305 972 struct cs_etm_packet *tmp; 1306 973 int ret; 1307 - u64 instrs_executed = etmq->packet->instr_count; 974 + u8 trace_chan_id = tidq->trace_chan_id; 975 + u64 instrs_executed = tidq->packet->instr_count; 1308 976 1309 - etmq->period_instructions += instrs_executed; 977 + tidq->period_instructions += instrs_executed; 1310 978 1311 979 /* 1312 980 * Record a branch when the last instruction in 1313 981 * PREV_PACKET is a branch. 1314 982 */ 1315 983 if (etm->synth_opts.last_branch && 1316 - etmq->prev_packet->sample_type == CS_ETM_RANGE && 1317 - etmq->prev_packet->last_instr_taken_branch) 1318 - cs_etm__update_last_branch_rb(etmq); 984 + tidq->prev_packet->sample_type == CS_ETM_RANGE && 985 + tidq->prev_packet->last_instr_taken_branch) 986 + cs_etm__update_last_branch_rb(etmq, tidq); 1319 987 1320 988 if (etm->sample_instructions && 1321 - etmq->period_instructions >= etm->instructions_sample_period) { 989 + tidq->period_instructions >= etm->instructions_sample_period) { 1322 990 /* 1323 991 * Emit instruction sample periodically 1324 992 * TODO: allow period to be defined in cycles and clock time 1325 993 */ 1326 994 1327 995 /* Get number of instructions executed after the sample point */ 1328 - u64 instrs_over = etmq->period_instructions - 996 + u64 instrs_over = tidq->period_instructions - 1329 997 etm->instructions_sample_period; 1330 998 1331 999 /* ··· 1336 1000 * executed, but PC has not advanced to next instruction) 1337 1001 */ 1338 1002 u64 offset = (instrs_executed - instrs_over - 1); 1339 - u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset); 1003 + u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1004 + tidq->packet, offset); 1340 1005 1341 1006 ret = cs_etm__synth_instruction_sample( 1342 - etmq, addr, etm->instructions_sample_period); 1007 + etmq, tidq, addr, etm->instructions_sample_period); 1343 1008 if (ret) 1344 1009 return ret; 1345 1010 1346 1011 /* Carry remaining instructions into next sample period */ 1347 - etmq->period_instructions = instrs_over; 1012 + tidq->period_instructions = instrs_over; 1348 1013 } 1349 1014 1350 1015 if (etm->sample_branches) { 1351 1016 bool generate_sample = false; 1352 1017 1353 1018 /* Generate sample for tracing on packet */ 1354 - if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1019 + if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1355 1020 generate_sample = true; 1356 1021 1357 1022 /* Generate sample for branch taken packet */ 1358 - if (etmq->prev_packet->sample_type == CS_ETM_RANGE && 1359 - etmq->prev_packet->last_instr_taken_branch) 1023 + if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1024 + tidq->prev_packet->last_instr_taken_branch) 1360 1025 generate_sample = true; 1361 1026 1362 1027 if (generate_sample) { 1363 - ret = cs_etm__synth_branch_sample(etmq); 1028 + ret = cs_etm__synth_branch_sample(etmq, tidq); 1364 1029 if (ret) 1365 1030 return ret; 1366 1031 } ··· 1372 1035 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1373 1036 * the next incoming packet. 1374 1037 */ 1375 - tmp = etmq->packet; 1376 - etmq->packet = etmq->prev_packet; 1377 - etmq->prev_packet = tmp; 1038 + tmp = tidq->packet; 1039 + tidq->packet = tidq->prev_packet; 1040 + tidq->prev_packet = tmp; 1378 1041 } 1379 1042 1380 1043 return 0; 1381 1044 } 1382 1045 1383 - static int cs_etm__exception(struct cs_etm_queue *etmq) 1046 + static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1384 1047 { 1385 1048 /* 1386 1049 * When the exception packet is inserted, whether the last instruction ··· 1393 1056 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1394 1057 * for generating instruction and branch samples. 1395 1058 */ 1396 - if (etmq->prev_packet->sample_type == CS_ETM_RANGE) 1397 - etmq->prev_packet->last_instr_taken_branch = true; 1059 + if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1060 + tidq->prev_packet->last_instr_taken_branch = true; 1398 1061 1399 1062 return 0; 1400 1063 } 1401 1064 1402 - static int cs_etm__flush(struct cs_etm_queue *etmq) 1065 + static int cs_etm__flush(struct cs_etm_queue *etmq, 1066 + struct cs_etm_traceid_queue *tidq) 1403 1067 { 1404 1068 int err = 0; 1405 1069 struct cs_etm_auxtrace *etm = etmq->etm; 1406 1070 struct cs_etm_packet *tmp; 1407 1071 1408 1072 /* Handle start tracing packet */ 1409 - if (etmq->prev_packet->sample_type == CS_ETM_EMPTY) 1073 + if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1410 1074 goto swap_packet; 1411 1075 1412 1076 if (etmq->etm->synth_opts.last_branch && 1413 - etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1077 + tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1414 1078 /* 1415 1079 * Generate a last branch event for the branches left in the 1416 1080 * circular buffer at the end of the trace. ··· 1419 1081 * Use the address of the end of the last reported execution 1420 1082 * range 1421 1083 */ 1422 - u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 1084 + u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1423 1085 1424 1086 err = cs_etm__synth_instruction_sample( 1425 - etmq, addr, 1426 - etmq->period_instructions); 1087 + etmq, tidq, addr, 1088 + tidq->period_instructions); 1427 1089 if (err) 1428 1090 return err; 1429 1091 1430 - etmq->period_instructions = 0; 1092 + tidq->period_instructions = 0; 1431 1093 1432 1094 } 1433 1095 1434 1096 if (etm->sample_branches && 1435 - etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1436 - err = cs_etm__synth_branch_sample(etmq); 1097 + tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1098 + err = cs_etm__synth_branch_sample(etmq, tidq); 1437 1099 if (err) 1438 1100 return err; 1439 1101 } ··· 1444 1106 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1445 1107 * the next incoming packet. 1446 1108 */ 1447 - tmp = etmq->packet; 1448 - etmq->packet = etmq->prev_packet; 1449 - etmq->prev_packet = tmp; 1109 + tmp = tidq->packet; 1110 + tidq->packet = tidq->prev_packet; 1111 + tidq->prev_packet = tmp; 1450 1112 } 1451 1113 1452 1114 return err; 1453 1115 } 1454 1116 1455 - static int cs_etm__end_block(struct cs_etm_queue *etmq) 1117 + static int cs_etm__end_block(struct cs_etm_queue *etmq, 1118 + struct cs_etm_traceid_queue *tidq) 1456 1119 { 1457 1120 int err; 1458 1121 ··· 1467 1128 * the trace. 1468 1129 */ 1469 1130 if (etmq->etm->synth_opts.last_branch && 1470 - etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1131 + tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1471 1132 /* 1472 1133 * Use the address of the end of the last reported execution 1473 1134 * range. 1474 1135 */ 1475 - u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 1136 + u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1476 1137 1477 1138 err = cs_etm__synth_instruction_sample( 1478 - etmq, addr, 1479 - etmq->period_instructions); 1139 + etmq, tidq, addr, 1140 + tidq->period_instructions); 1480 1141 if (err) 1481 1142 return err; 1482 1143 1483 - etmq->period_instructions = 0; 1144 + tidq->period_instructions = 0; 1484 1145 } 1485 1146 1486 1147 return 0; ··· 1512 1173 return etmq->buf_len; 1513 1174 } 1514 1175 1515 - static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, 1176 + static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 1516 1177 struct cs_etm_packet *packet, 1517 1178 u64 end_addr) 1518 1179 { 1519 - u16 instr16; 1520 - u32 instr32; 1180 + /* Initialise to keep compiler happy */ 1181 + u16 instr16 = 0; 1182 + u32 instr32 = 0; 1521 1183 u64 addr; 1522 1184 1523 1185 switch (packet->isa) { ··· 1536 1196 * so below only read 2 bytes as instruction size for T32. 1537 1197 */ 1538 1198 addr = end_addr - 2; 1539 - cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); 1199 + cs_etm__mem_access(etmq, trace_chan_id, addr, 1200 + sizeof(instr16), (u8 *)&instr16); 1540 1201 if ((instr16 & 0xFF00) == 0xDF00) 1541 1202 return true; 1542 1203 ··· 1552 1211 * +---------+---------+-------------------------+ 1553 1212 */ 1554 1213 addr = end_addr - 4; 1555 - cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1214 + cs_etm__mem_access(etmq, trace_chan_id, addr, 1215 + sizeof(instr32), (u8 *)&instr32); 1556 1216 if ((instr32 & 0x0F000000) == 0x0F000000 && 1557 1217 (instr32 & 0xF0000000) != 0xF0000000) 1558 1218 return true; ··· 1569 1227 * +-----------------------+---------+-----------+ 1570 1228 */ 1571 1229 addr = end_addr - 4; 1572 - cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1230 + cs_etm__mem_access(etmq, trace_chan_id, addr, 1231 + sizeof(instr32), (u8 *)&instr32); 1573 1232 if ((instr32 & 0xFFE0001F) == 0xd4000001) 1574 1233 return true; 1575 1234 ··· 1583 1240 return false; 1584 1241 } 1585 1242 1586 - static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) 1243 + static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 1244 + struct cs_etm_traceid_queue *tidq, u64 magic) 1587 1245 { 1588 - struct cs_etm_packet *packet = etmq->packet; 1589 - struct cs_etm_packet *prev_packet = etmq->prev_packet; 1246 + u8 trace_chan_id = tidq->trace_chan_id; 1247 + struct cs_etm_packet *packet = tidq->packet; 1248 + struct cs_etm_packet *prev_packet = tidq->prev_packet; 1590 1249 1591 1250 if (magic == __perf_cs_etmv3_magic) 1592 1251 if (packet->exception_number == CS_ETMV3_EXC_SVC) ··· 1601 1256 */ 1602 1257 if (magic == __perf_cs_etmv4_magic) { 1603 1258 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1604 - cs_etm__is_svc_instr(etmq, prev_packet, 1259 + cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 1605 1260 prev_packet->end_addr)) 1606 1261 return true; 1607 1262 } ··· 1609 1264 return false; 1610 1265 } 1611 1266 1612 - static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) 1267 + static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 1268 + u64 magic) 1613 1269 { 1614 - struct cs_etm_packet *packet = etmq->packet; 1270 + struct cs_etm_packet *packet = tidq->packet; 1615 1271 1616 1272 if (magic == __perf_cs_etmv3_magic) 1617 1273 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || ··· 1635 1289 return false; 1636 1290 } 1637 1291 1638 - static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) 1292 + static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 1293 + struct cs_etm_traceid_queue *tidq, 1294 + u64 magic) 1639 1295 { 1640 - struct cs_etm_packet *packet = etmq->packet; 1641 - struct cs_etm_packet *prev_packet = etmq->prev_packet; 1296 + u8 trace_chan_id = tidq->trace_chan_id; 1297 + struct cs_etm_packet *packet = tidq->packet; 1298 + struct cs_etm_packet *prev_packet = tidq->prev_packet; 1642 1299 1643 1300 if (magic == __perf_cs_etmv3_magic) 1644 1301 if (packet->exception_number == CS_ETMV3_EXC_SMC || ··· 1665 1316 * (SMC, HVC) are taken as sync exceptions. 1666 1317 */ 1667 1318 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1668 - !cs_etm__is_svc_instr(etmq, prev_packet, 1319 + !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 1669 1320 prev_packet->end_addr)) 1670 1321 return true; 1671 1322 ··· 1684 1335 return false; 1685 1336 } 1686 1337 1687 - static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) 1338 + static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 1339 + struct cs_etm_traceid_queue *tidq) 1688 1340 { 1689 - struct cs_etm_packet *packet = etmq->packet; 1690 - struct cs_etm_packet *prev_packet = etmq->prev_packet; 1341 + struct cs_etm_packet *packet = tidq->packet; 1342 + struct cs_etm_packet *prev_packet = tidq->prev_packet; 1343 + u8 trace_chan_id = tidq->trace_chan_id; 1691 1344 u64 magic; 1692 1345 int ret; 1693 1346 ··· 1770 1419 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 1771 1420 PERF_IP_FLAG_RETURN | 1772 1421 PERF_IP_FLAG_INTERRUPT) && 1773 - cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) 1422 + cs_etm__is_svc_instr(etmq, trace_chan_id, 1423 + packet, packet->start_addr)) 1774 1424 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1775 1425 PERF_IP_FLAG_RETURN | 1776 1426 PERF_IP_FLAG_SYSCALLRET; ··· 1792 1440 return ret; 1793 1441 1794 1442 /* The exception is for system call. */ 1795 - if (cs_etm__is_syscall(etmq, magic)) 1443 + if (cs_etm__is_syscall(etmq, tidq, magic)) 1796 1444 packet->flags = PERF_IP_FLAG_BRANCH | 1797 1445 PERF_IP_FLAG_CALL | 1798 1446 PERF_IP_FLAG_SYSCALLRET; ··· 1800 1448 * The exceptions are triggered by external signals from bus, 1801 1449 * interrupt controller, debug module, PE reset or halt. 1802 1450 */ 1803 - else if (cs_etm__is_async_exception(etmq, magic)) 1451 + else if (cs_etm__is_async_exception(tidq, magic)) 1804 1452 packet->flags = PERF_IP_FLAG_BRANCH | 1805 1453 PERF_IP_FLAG_CALL | 1806 1454 PERF_IP_FLAG_ASYNC | ··· 1809 1457 * Otherwise, exception is caused by trap, instruction & 1810 1458 * data fault, or alignment errors. 1811 1459 */ 1812 - else if (cs_etm__is_sync_exception(etmq, magic)) 1460 + else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 1813 1461 packet->flags = PERF_IP_FLAG_BRANCH | 1814 1462 PERF_IP_FLAG_CALL | 1815 1463 PERF_IP_FLAG_INTERRUPT; ··· 1891 1539 return ret; 1892 1540 } 1893 1541 1894 - static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) 1542 + static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 1543 + struct cs_etm_traceid_queue *tidq) 1895 1544 { 1896 1545 int ret; 1546 + struct cs_etm_packet_queue *packet_queue; 1897 1547 1898 - /* Process each packet in this chunk */ 1899 - while (1) { 1900 - ret = cs_etm_decoder__get_packet(etmq->decoder, 1901 - etmq->packet); 1902 - if (ret <= 0) 1903 - /* 1904 - * Stop processing this chunk on 1905 - * end of data or error 1906 - */ 1907 - break; 1548 + packet_queue = &tidq->packet_queue; 1908 1549 1550 + /* Process each packet in this chunk */ 1551 + while (1) { 1552 + ret = cs_etm_decoder__get_packet(packet_queue, 1553 + tidq->packet); 1554 + if (ret <= 0) 1909 1555 /* 1910 - * Since packet addresses are swapped in packet 1911 - * handling within below switch() statements, 1912 - * thus setting sample flags must be called 1913 - * prior to switch() statement to use address 1914 - * information before packets swapping. 1556 + * Stop processing this chunk on 1557 + * end of data or error 1915 1558 */ 1916 - ret = cs_etm__set_sample_flags(etmq); 1917 - if (ret < 0) 1918 - break; 1559 + break; 1919 1560 1920 - switch (etmq->packet->sample_type) { 1921 - case CS_ETM_RANGE: 1922 - /* 1923 - * If the packet contains an instruction 1924 - * range, generate instruction sequence 1925 - * events. 1926 - */ 1927 - cs_etm__sample(etmq); 1928 - break; 1929 - case CS_ETM_EXCEPTION: 1930 - case CS_ETM_EXCEPTION_RET: 1931 - /* 1932 - * If the exception packet is coming, 1933 - * make sure the previous instruction 1934 - * range packet to be handled properly. 1935 - */ 1936 - cs_etm__exception(etmq); 1937 - break; 1938 - case CS_ETM_DISCONTINUITY: 1939 - /* 1940 - * Discontinuity in trace, flush 1941 - * previous branch stack 1942 - */ 1943 - cs_etm__flush(etmq); 1944 - break; 1945 - case CS_ETM_EMPTY: 1946 - /* 1947 - * Should not receive empty packet, 1948 - * report error. 1949 - */ 1950 - pr_err("CS ETM Trace: empty packet\n"); 1951 - return -EINVAL; 1952 - default: 1953 - break; 1954 - } 1561 + /* 1562 + * Since packet addresses are swapped in packet 1563 + * handling within below switch() statements, 1564 + * thus setting sample flags must be called 1565 + * prior to switch() statement to use address 1566 + * information before packets swapping. 1567 + */ 1568 + ret = cs_etm__set_sample_flags(etmq, tidq); 1569 + if (ret < 0) 1570 + break; 1571 + 1572 + switch (tidq->packet->sample_type) { 1573 + case CS_ETM_RANGE: 1574 + /* 1575 + * If the packet contains an instruction 1576 + * range, generate instruction sequence 1577 + * events. 1578 + */ 1579 + cs_etm__sample(etmq, tidq); 1580 + break; 1581 + case CS_ETM_EXCEPTION: 1582 + case CS_ETM_EXCEPTION_RET: 1583 + /* 1584 + * If the exception packet is coming, 1585 + * make sure the previous instruction 1586 + * range packet to be handled properly. 1587 + */ 1588 + cs_etm__exception(tidq); 1589 + break; 1590 + case CS_ETM_DISCONTINUITY: 1591 + /* 1592 + * Discontinuity in trace, flush 1593 + * previous branch stack 1594 + */ 1595 + cs_etm__flush(etmq, tidq); 1596 + break; 1597 + case CS_ETM_EMPTY: 1598 + /* 1599 + * Should not receive empty packet, 1600 + * report error. 1601 + */ 1602 + pr_err("CS ETM Trace: empty packet\n"); 1603 + return -EINVAL; 1604 + default: 1605 + break; 1955 1606 } 1607 + } 1956 1608 1957 1609 return ret; 1610 + } 1611 + 1612 + static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 1613 + { 1614 + int idx; 1615 + struct int_node *inode; 1616 + struct cs_etm_traceid_queue *tidq; 1617 + struct intlist *traceid_queues_list = etmq->traceid_queues_list; 1618 + 1619 + intlist__for_each_entry(inode, traceid_queues_list) { 1620 + idx = (int)(intptr_t)inode->priv; 1621 + tidq = etmq->traceid_queues[idx]; 1622 + 1623 + /* Ignore return value */ 1624 + cs_etm__process_traceid_queue(etmq, tidq); 1625 + 1626 + /* 1627 + * Generate an instruction sample with the remaining 1628 + * branchstack entries. 1629 + */ 1630 + cs_etm__flush(etmq, tidq); 1631 + } 1958 1632 } 1959 1633 1960 1634 static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 1961 1635 { 1962 1636 int err = 0; 1637 + struct cs_etm_traceid_queue *tidq; 1638 + 1639 + tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 1640 + if (!tidq) 1641 + return -EINVAL; 1963 1642 1964 1643 /* Go through each buffer in the queue and decode them one by one */ 1965 1644 while (1) { ··· 2009 1626 * an error occurs other than hoping the next one will 2010 1627 * be better. 2011 1628 */ 2012 - err = cs_etm__process_decoder_queue(etmq); 1629 + err = cs_etm__process_traceid_queue(etmq, tidq); 2013 1630 2014 1631 } while (etmq->buf_len); 2015 1632 2016 1633 if (err == 0) 2017 1634 /* Flush any remaining branch stack entries */ 2018 - err = cs_etm__end_block(etmq); 1635 + err = cs_etm__end_block(etmq, tidq); 2019 1636 } 2020 1637 2021 1638 return err; ··· 2030 1647 for (i = 0; i < queues->nr_queues; i++) { 2031 1648 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2032 1649 struct cs_etm_queue *etmq = queue->priv; 1650 + struct cs_etm_traceid_queue *tidq; 2033 1651 2034 - if (etmq && ((tid == -1) || (etmq->tid == tid))) { 2035 - cs_etm__set_pid_tid_cpu(etm, queue); 1652 + if (!etmq) 1653 + continue; 1654 + 1655 + tidq = cs_etm__etmq_get_traceid_queue(etmq, 1656 + CS_ETM_PER_THREAD_TRACEID); 1657 + 1658 + if (!tidq) 1659 + continue; 1660 + 1661 + if ((tid == -1) || (tidq->tid == tid)) { 1662 + cs_etm__set_pid_tid_cpu(etm, tidq); 2036 1663 cs_etm__run_decoder(etmq); 2037 1664 } 2038 1665 } 1666 + 1667 + return 0; 1668 + } 1669 + 1670 + static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) 1671 + { 1672 + int ret = 0; 1673 + unsigned int cs_queue_nr, queue_nr; 1674 + u8 trace_chan_id; 1675 + u64 timestamp; 1676 + struct auxtrace_queue *queue; 1677 + struct cs_etm_queue *etmq; 1678 + struct cs_etm_traceid_queue *tidq; 1679 + 1680 + while (1) { 1681 + if (!etm->heap.heap_cnt) 1682 + goto out; 1683 + 1684 + /* Take the entry at the top of the min heap */ 1685 + cs_queue_nr = etm->heap.heap_array[0].queue_nr; 1686 + queue_nr = TO_QUEUE_NR(cs_queue_nr); 1687 + trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 1688 + queue = &etm->queues.queue_array[queue_nr]; 1689 + etmq = queue->priv; 1690 + 1691 + /* 1692 + * Remove the top entry from the heap since we are about 1693 + * to process it. 1694 + */ 1695 + auxtrace_heap__pop(&etm->heap); 1696 + 1697 + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1698 + if (!tidq) { 1699 + /* 1700 + * No traceID queue has been allocated for this traceID, 1701 + * which means something somewhere went very wrong. No 1702 + * other choice than simply exit. 1703 + */ 1704 + ret = -EINVAL; 1705 + goto out; 1706 + } 1707 + 1708 + /* 1709 + * Packets associated with this timestamp are already in 1710 + * the etmq's traceID queue, so process them. 1711 + */ 1712 + ret = cs_etm__process_traceid_queue(etmq, tidq); 1713 + if (ret < 0) 1714 + goto out; 1715 + 1716 + /* 1717 + * Packets for this timestamp have been processed, time to 1718 + * move on to the next timestamp, fetching a new auxtrace_buffer 1719 + * if need be. 1720 + */ 1721 + refetch: 1722 + ret = cs_etm__get_data_block(etmq); 1723 + if (ret < 0) 1724 + goto out; 1725 + 1726 + /* 1727 + * No more auxtrace_buffers to process in this etmq, simply 1728 + * move on to another entry in the auxtrace_heap. 1729 + */ 1730 + if (!ret) 1731 + continue; 1732 + 1733 + ret = cs_etm__decode_data_block(etmq); 1734 + if (ret) 1735 + goto out; 1736 + 1737 + timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 1738 + 1739 + if (!timestamp) { 1740 + /* 1741 + * Function cs_etm__decode_data_block() returns when 1742 + * there is no more traces to decode in the current 1743 + * auxtrace_buffer OR when a timestamp has been 1744 + * encountered on any of the traceID queues. Since we 1745 + * did not get a timestamp, there is no more traces to 1746 + * process in this auxtrace_buffer. As such empty and 1747 + * flush all traceID queues. 1748 + */ 1749 + cs_etm__clear_all_traceid_queues(etmq); 1750 + 1751 + /* Fetch another auxtrace_buffer for this etmq */ 1752 + goto refetch; 1753 + } 1754 + 1755 + /* 1756 + * Add to the min heap the timestamp for packets that have 1757 + * just been decoded. They will be processed and synthesized 1758 + * during the next call to cs_etm__process_traceid_queue() for 1759 + * this queue/traceID. 1760 + */ 1761 + cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 1762 + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); 1763 + } 1764 + 1765 + out: 1766 + return ret; 1767 + } 1768 + 1769 + static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 1770 + union perf_event *event) 1771 + { 1772 + struct thread *th; 1773 + 1774 + if (etm->timeless_decoding) 1775 + return 0; 1776 + 1777 + /* 1778 + * Add the tid/pid to the log so that we can get a match when 1779 + * we get a contextID from the decoder. 1780 + */ 1781 + th = machine__findnew_thread(etm->machine, 1782 + event->itrace_start.pid, 1783 + event->itrace_start.tid); 1784 + if (!th) 1785 + return -ENOMEM; 1786 + 1787 + thread__put(th); 1788 + 1789 + return 0; 1790 + } 1791 + 1792 + static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 1793 + union perf_event *event) 1794 + { 1795 + struct thread *th; 1796 + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1797 + 1798 + /* 1799 + * Context switch in per-thread mode are irrelevant since perf 1800 + * will start/stop tracing as the process is scheduled. 1801 + */ 1802 + if (etm->timeless_decoding) 1803 + return 0; 1804 + 1805 + /* 1806 + * SWITCH_IN events carry the next process to be switched out while 1807 + * SWITCH_OUT events carry the process to be switched in. As such 1808 + * we don't care about IN events. 1809 + */ 1810 + if (!out) 1811 + return 0; 1812 + 1813 + /* 1814 + * Add the tid/pid to the log so that we can get a match when 1815 + * we get a contextID from the decoder. 1816 + */ 1817 + th = machine__findnew_thread(etm->machine, 1818 + event->context_switch.next_prev_pid, 1819 + event->context_switch.next_prev_tid); 1820 + if (!th) 1821 + return -ENOMEM; 1822 + 1823 + thread__put(th); 2039 1824 2040 1825 return 0; 2041 1826 } ··· 2227 1676 return -EINVAL; 2228 1677 } 2229 1678 2230 - if (!etm->timeless_decoding) 2231 - return -EINVAL; 2232 - 2233 1679 if (sample->time && (sample->time != (u64) -1)) 2234 1680 timestamp = sample->time; 2235 1681 else ··· 2238 1690 return err; 2239 1691 } 2240 1692 2241 - if (event->header.type == PERF_RECORD_EXIT) 1693 + if (etm->timeless_decoding && 1694 + event->header.type == PERF_RECORD_EXIT) 2242 1695 return cs_etm__process_timeless_queues(etm, 2243 1696 event->fork.tid); 1697 + 1698 + if (event->header.type == PERF_RECORD_ITRACE_START) 1699 + return cs_etm__process_itrace_start(etm, event); 1700 + else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) 1701 + return cs_etm__process_switch_cpu_wide(etm, event); 1702 + 1703 + if (!etm->timeless_decoding && 1704 + event->header.type == PERF_RECORD_AUX) 1705 + return cs_etm__process_queues(etm); 2244 1706 2245 1707 return 0; 2246 1708 }
+94
tools/perf/util/cs-etm.h
··· 9 9 10 10 #include "util/event.h" 11 11 #include "util/session.h" 12 + #include <linux/bits.h> 12 13 13 14 /* Versionning header in case things need tro change in the future. That way 14 15 * decoding of old snapshot is still possible. ··· 98 97 CS_ETMV4_EXC_END = 31, 99 98 }; 100 99 100 + enum cs_etm_sample_type { 101 + CS_ETM_EMPTY, 102 + CS_ETM_RANGE, 103 + CS_ETM_DISCONTINUITY, 104 + CS_ETM_EXCEPTION, 105 + CS_ETM_EXCEPTION_RET, 106 + }; 107 + 108 + enum cs_etm_isa { 109 + CS_ETM_ISA_UNKNOWN, 110 + CS_ETM_ISA_A64, 111 + CS_ETM_ISA_A32, 112 + CS_ETM_ISA_T32, 113 + }; 114 + 101 115 /* RB tree for quick conversion between traceID and metadata pointers */ 102 116 struct intlist *traceid_list; 103 117 118 + struct cs_etm_queue; 119 + 120 + struct cs_etm_packet { 121 + enum cs_etm_sample_type sample_type; 122 + enum cs_etm_isa isa; 123 + u64 start_addr; 124 + u64 end_addr; 125 + u32 instr_count; 126 + u32 last_instr_type; 127 + u32 last_instr_subtype; 128 + u32 flags; 129 + u32 exception_number; 130 + u8 last_instr_cond; 131 + u8 last_instr_taken_branch; 132 + u8 last_instr_size; 133 + u8 trace_chan_id; 134 + int cpu; 135 + }; 136 + 137 + #define CS_ETM_PACKET_MAX_BUFFER 1024 138 + 139 + /* 140 + * When working with per-thread scenarios the process under trace can 141 + * be scheduled on any CPU and as such, more than one traceID may be 142 + * associated with the same process. Since a traceID of '0' is illegal 143 + * as per the CoreSight architecture, use that specific value to 144 + * identify the queue where all packets (with any traceID) are 145 + * aggregated. 146 + */ 147 + #define CS_ETM_PER_THREAD_TRACEID 0 148 + 149 + struct cs_etm_packet_queue { 150 + u32 packet_count; 151 + u32 head; 152 + u32 tail; 153 + u32 instr_count; 154 + u64 timestamp; 155 + u64 next_timestamp; 156 + struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; 157 + }; 158 + 104 159 #define KiB(x) ((x) * 1024) 105 160 #define MiB(x) ((x) * 1024 * 1024) 161 + 162 + #define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL 163 + 164 + #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) 106 165 107 166 #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) 108 167 ··· 175 114 int cs_etm__process_auxtrace_info(union perf_event *event, 176 115 struct perf_session *session); 177 116 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); 117 + int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, 118 + pid_t tid, u8 trace_chan_id); 119 + bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); 120 + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 121 + u8 trace_chan_id); 122 + struct cs_etm_packet_queue 123 + *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id); 178 124 #else 179 125 static inline int 180 126 cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, ··· 194 126 int *cpu __maybe_unused) 195 127 { 196 128 return -1; 129 + } 130 + 131 + static inline int cs_etm__etmq_set_tid( 132 + struct cs_etm_queue *etmq __maybe_unused, 133 + pid_t tid __maybe_unused, 134 + u8 trace_chan_id __maybe_unused) 135 + { 136 + return -1; 137 + } 138 + 139 + static inline bool cs_etm__etmq_is_timeless( 140 + struct cs_etm_queue *etmq __maybe_unused) 141 + { 142 + /* What else to return? */ 143 + return true; 144 + } 145 + 146 + static inline void cs_etm__etmq_set_traceid_queue_timestamp( 147 + struct cs_etm_queue *etmq __maybe_unused, 148 + u8 trace_chan_id __maybe_unused) {} 149 + 150 + static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue( 151 + struct cs_etm_queue *etmq __maybe_unused, 152 + u8 trace_chan_id __maybe_unused) 153 + { 154 + return NULL; 197 155 } 198 156 #endif 199 157
-49
tools/perf/util/ctype.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * Sane locale-independent, ASCII ctype. 4 - * 5 - * No surprises, and works with signed and unsigned chars. 6 - */ 7 - #include "sane_ctype.h" 8 - 9 - enum { 10 - S = GIT_SPACE, 11 - A = GIT_ALPHA, 12 - D = GIT_DIGIT, 13 - G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ 14 - R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ 15 - P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */ 16 - 17 - PS = GIT_SPACE | GIT_PRINT_EXTRA, 18 - }; 19 - 20 - unsigned char sane_ctype[256] = { 21 - /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 22 - 23 - 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ 24 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ 25 - PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ 26 - D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ 27 - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ 28 - A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */ 29 - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ 30 - A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ 31 - /* Nothing in the 128.. range */ 32 - }; 33 - 34 - const char *graph_line = 35 - "_____________________________________________________________________" 36 - "_____________________________________________________________________" 37 - "_____________________________________________________________________"; 38 - const char *graph_dotted_line = 39 - "---------------------------------------------------------------------" 40 - "---------------------------------------------------------------------" 41 - "---------------------------------------------------------------------"; 42 - const char *spaces = 43 - " " 44 - " " 45 - " "; 46 - const char *dots = 47 - "....................................................................." 48 - "....................................................................." 49 - ".....................................................................";
+1 -1
tools/perf/util/data-convert-bt.c
··· 29 29 #include "evsel.h" 30 30 #include "machine.h" 31 31 #include "config.h" 32 - #include "sane_ctype.h" 32 + #include <linux/ctype.h> 33 33 34 34 #define pr_N(n, fmt, ...) \ 35 35 eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
+1 -1
tools/perf/util/debug.c
··· 21 21 #include "util.h" 22 22 #include "target.h" 23 23 24 - #include "sane_ctype.h" 24 + #include <linux/ctype.h> 25 25 26 26 int verbose; 27 27 bool dump_trace = false, quiet = false;
+1 -1
tools/perf/util/demangle-java.c
··· 8 8 9 9 #include "demangle-java.h" 10 10 11 - #include "sane_ctype.h" 11 + #include <linux/ctype.h> 12 12 13 13 enum { 14 14 MODE_PREFIX = 0,
+93 -39
tools/perf/util/dso.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <asm/bug.h> 3 3 #include <linux/kernel.h> 4 + #include <linux/string.h> 4 5 #include <sys/time.h> 5 6 #include <sys/resource.h> 6 7 #include <sys/types.h> ··· 10 9 #include <errno.h> 11 10 #include <fcntl.h> 12 11 #include <libgen.h> 12 + #include <bpf/libbpf.h> 13 + #include "bpf-event.h" 13 14 #include "compress.h" 14 15 #include "namespaces.h" 15 16 #include "path.h" ··· 395 392 return -ENOMEM; 396 393 } 397 394 398 - strxfrchar(m->name, '-', '_'); 395 + strreplace(m->name, '-', '_'); 399 396 } 400 397 401 398 return 0; ··· 709 706 return false; 710 707 } 711 708 709 + static ssize_t bpf_read(struct dso *dso, u64 offset, char *data) 710 + { 711 + struct bpf_prog_info_node *node; 712 + ssize_t size = DSO__DATA_CACHE_SIZE; 713 + u64 len; 714 + u8 *buf; 715 + 716 + node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); 717 + if (!node || !node->info_linear) { 718 + dso->data.status = DSO_DATA_STATUS_ERROR; 719 + return -1; 720 + } 721 + 722 + len = node->info_linear->info.jited_prog_len; 723 + buf = (u8 *)(uintptr_t)node->info_linear->info.jited_prog_insns; 724 + 725 + if (offset >= len) 726 + return -1; 727 + 728 + size = (ssize_t)min(len - offset, (u64)size); 729 + memcpy(data, buf + offset, size); 730 + return size; 731 + } 732 + 733 + static int bpf_size(struct dso *dso) 734 + { 735 + struct bpf_prog_info_node *node; 736 + 737 + node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); 738 + if (!node || !node->info_linear) { 739 + dso->data.status = DSO_DATA_STATUS_ERROR; 740 + return -1; 741 + } 742 + 743 + dso->data.file_size = node->info_linear->info.jited_prog_len; 744 + return 0; 745 + } 746 + 712 747 static void 713 748 dso_cache__free(struct dso *dso) 714 749 { ··· 835 794 return cache_size; 836 795 } 837 796 797 + static ssize_t file_read(struct dso *dso, struct machine *machine, 798 + u64 offset, char *data) 799 + { 800 + ssize_t ret; 801 + 802 + pthread_mutex_lock(&dso__data_open_lock); 803 + 804 + /* 805 + * dso->data.fd might be closed if other thread opened another 806 + * file (dso) due to open file limit (RLIMIT_NOFILE). 807 + */ 808 + try_to_open_dso(dso, machine); 809 + 810 + if (dso->data.fd < 0) { 811 + dso->data.status = DSO_DATA_STATUS_ERROR; 812 + ret = -errno; 813 + goto out; 814 + } 815 + 816 + ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset); 817 + out: 818 + pthread_mutex_unlock(&dso__data_open_lock); 819 + return ret; 820 + } 821 + 838 822 static ssize_t 839 823 dso_cache__read(struct dso *dso, struct machine *machine, 840 824 u64 offset, u8 *data, ssize_t size) 841 825 { 826 + u64 cache_offset = offset & DSO__DATA_CACHE_MASK; 842 827 struct dso_cache *cache; 843 828 struct dso_cache *old; 844 829 ssize_t ret; 845 830 846 - do { 847 - u64 cache_offset; 831 + cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); 832 + if (!cache) 833 + return -ENOMEM; 848 834 849 - cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); 850 - if (!cache) 851 - return -ENOMEM; 852 - 853 - pthread_mutex_lock(&dso__data_open_lock); 854 - 855 - /* 856 - * dso->data.fd might be closed if other thread opened another 857 - * file (dso) due to open file limit (RLIMIT_NOFILE). 858 - */ 859 - try_to_open_dso(dso, machine); 860 - 861 - if (dso->data.fd < 0) { 862 - ret = -errno; 863 - dso->data.status = DSO_DATA_STATUS_ERROR; 864 - break; 865 - } 866 - 867 - cache_offset = offset & DSO__DATA_CACHE_MASK; 868 - 869 - ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset); 870 - if (ret <= 0) 871 - break; 872 - 873 - cache->offset = cache_offset; 874 - cache->size = ret; 875 - } while (0); 876 - 877 - pthread_mutex_unlock(&dso__data_open_lock); 835 + if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) 836 + ret = bpf_read(dso, cache_offset, cache->data); 837 + else 838 + ret = file_read(dso, machine, cache_offset, cache->data); 878 839 879 840 if (ret > 0) { 841 + cache->offset = cache_offset; 842 + cache->size = ret; 843 + 880 844 old = dso_cache__insert(dso, cache); 881 845 if (old) { 882 846 /* we lose the race */ ··· 944 898 return r; 945 899 } 946 900 947 - int dso__data_file_size(struct dso *dso, struct machine *machine) 901 + static int file_size(struct dso *dso, struct machine *machine) 948 902 { 949 903 int ret = 0; 950 904 struct stat st; 951 905 char sbuf[STRERR_BUFSIZE]; 952 - 953 - if (dso->data.file_size) 954 - return 0; 955 - 956 - if (dso->data.status == DSO_DATA_STATUS_ERROR) 957 - return -1; 958 906 959 907 pthread_mutex_lock(&dso__data_open_lock); 960 908 ··· 976 936 out: 977 937 pthread_mutex_unlock(&dso__data_open_lock); 978 938 return ret; 939 + } 940 + 941 + int dso__data_file_size(struct dso *dso, struct machine *machine) 942 + { 943 + if (dso->data.file_size) 944 + return 0; 945 + 946 + if (dso->data.status == DSO_DATA_STATUS_ERROR) 947 + return -1; 948 + 949 + if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) 950 + return bpf_size(dso); 951 + 952 + return file_size(dso, machine); 979 953 } 980 954 981 955 /**
+2 -1
tools/perf/util/env.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include "cpumap.h" 3 3 #include "env.h" 4 - #include "sane_ctype.h" 4 + #include <linux/ctype.h> 5 5 #include "util.h" 6 6 #include "bpf-event.h" 7 7 #include <errno.h> ··· 246 246 for (cpu = 0; cpu < nr_cpus; ++cpu) { 247 247 env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); 248 248 env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); 249 + env->cpu[cpu].die_id = cpu_map__get_die_id(cpu); 249 250 } 250 251 251 252 env->nr_cpus_avail = nr_cpus;
+3
tools/perf/util/env.h
··· 9 9 10 10 struct cpu_topology_map { 11 11 int socket_id; 12 + int die_id; 12 13 int core_id; 13 14 }; 14 15 ··· 50 49 51 50 int nr_cmdline; 52 51 int nr_sibling_cores; 52 + int nr_sibling_dies; 53 53 int nr_sibling_threads; 54 54 int nr_numa_nodes; 55 55 int nr_memory_nodes; ··· 59 57 char *cmdline; 60 58 const char **cmdline_argv; 61 59 char *sibling_cores; 60 + char *sibling_dies; 62 61 char *sibling_threads; 63 62 char *pmu_mappings; 64 63 struct cpu_topology_map *cpu;
+4 -6
tools/perf/util/event.c
··· 20 20 #include "strlist.h" 21 21 #include "thread.h" 22 22 #include "thread_map.h" 23 - #include "sane_ctype.h" 23 + #include <linux/ctype.h> 24 24 #include "map.h" 25 25 #include "symbol.h" 26 26 #include "symbol/kallsyms.h" ··· 158 158 if (name) { 159 159 char *nl; 160 160 161 - name += 5; /* strlen("Name:") */ 162 - name = ltrim(name); 163 - 161 + name = skip_spaces(name + 5); /* strlen("Name:") */ 164 162 nl = strchr(name, '\n'); 165 163 if (nl) 166 164 *nl = '\0'; ··· 1484 1486 1485 1487 size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp) 1486 1488 { 1487 - return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n", 1489 + return fprintf(fp, " addr %" PRIx64 " len %u type %u flags 0x%x name %s\n", 1488 1490 event->ksymbol_event.addr, event->ksymbol_event.len, 1489 1491 event->ksymbol_event.ksym_type, 1490 1492 event->ksymbol_event.flags, event->ksymbol_event.name); ··· 1492 1494 1493 1495 size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp) 1494 1496 { 1495 - return fprintf(fp, " bpf event with type %u, flags %u, id %u\n", 1497 + return fprintf(fp, " type %u, flags %u, id %u\n", 1496 1498 event->bpf_event.type, event->bpf_event.flags, 1497 1499 event->bpf_event.id); 1498 1500 }
+2
tools/perf/util/event.h
··· 204 204 u64 period; 205 205 u64 weight; 206 206 u64 transaction; 207 + u64 insn_cnt; 208 + u64 cyc_cnt; 207 209 u32 cpu; 208 210 u32 raw_size; 209 211 u64 data_src;
+23 -14
tools/perf/util/evsel.c
··· 35 35 #include "debug.h" 36 36 #include "trace-event.h" 37 37 #include "stat.h" 38 + #include "string2.h" 38 39 #include "memswap.h" 39 40 #include "util/parse-branch-options.h" 40 41 41 - #include "sane_ctype.h" 42 + #include <linux/ctype.h> 42 43 43 44 struct perf_missing_features perf_missing_features; 44 45 ··· 590 589 { 591 590 char bf[128]; 592 591 592 + if (!evsel) 593 + goto out_unknown; 594 + 593 595 if (evsel->name) 594 596 return evsel->name; 595 597 ··· 632 628 633 629 evsel->name = strdup(bf); 634 630 635 - return evsel->name ?: "unknown"; 631 + if (evsel->name) 632 + return evsel->name; 633 + out_unknown: 634 + return "unknown"; 636 635 } 637 636 638 637 const char *perf_evsel__group_name(struct perf_evsel *evsel) ··· 686 679 687 680 attr->sample_max_stack = param->max_stack; 688 681 682 + if (opts->kernel_callchains) 683 + attr->exclude_callchain_user = 1; 684 + if (opts->user_callchains) 685 + attr->exclude_callchain_kernel = 1; 689 686 if (param->record_mode == CALLCHAIN_LBR) { 690 687 if (!opts->branch_stack) { 691 688 if (attr->exclude_user) { ··· 712 701 if (!function) { 713 702 perf_evsel__set_sample_bit(evsel, REGS_USER); 714 703 perf_evsel__set_sample_bit(evsel, STACK_USER); 715 - attr->sample_regs_user |= PERF_REGS_MASK; 704 + if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) { 705 + attr->sample_regs_user |= DWARF_MINIMAL_REGS; 706 + pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, " 707 + "specifying a subset with --user-regs may render DWARF unwinding unreliable, " 708 + "so the minimal registers set (IP, SP) is explicitly forced.\n"); 709 + } else { 710 + attr->sample_regs_user |= PERF_REGS_MASK; 711 + } 716 712 attr->sample_stack_user = param->dump_size; 717 713 attr->exclude_callchain_user = 1; 718 714 } else { ··· 1154 1136 1155 1137 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 1156 1138 { 1157 - if (evsel->system_wide) 1158 - nthreads = 1; 1159 - 1160 1139 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 1161 1140 1162 1141 if (evsel->fd) { ··· 1800 1785 if (fd >= 0) 1801 1786 break; 1802 1787 1803 - /* 1804 - * Do quick precise_ip fallback if: 1805 - * - there is precise_ip set in perf_event_attr 1806 - * - maximum precise is requested 1807 - * - sys_perf_event_open failed with ENOTSUP error, 1808 - * which is associated with wrong precise_ip 1809 - */ 1810 - if (!precise_ip || !evsel->precise_max || (errno != ENOTSUP)) 1788 + /* Do not try less precise if not requested. */ 1789 + if (!evsel->precise_max) 1811 1790 break; 1812 1791 1813 1792 /*
+97 -15
tools/perf/util/header.c
··· 13 13 #include <linux/list.h> 14 14 #include <linux/kernel.h> 15 15 #include <linux/bitops.h> 16 + #include <linux/string.h> 16 17 #include <linux/stringify.h> 17 18 #include <sys/stat.h> 18 19 #include <sys/utsname.h> ··· 44 43 #include "cputopo.h" 45 44 #include "bpf-event.h" 46 45 47 - #include "sane_ctype.h" 46 + #include <linux/ctype.h> 48 47 49 48 /* 50 49 * magic2 = "PERFILE2" ··· 417 416 while (*p) { 418 417 if (isspace(*p)) { 419 418 char *r = p + 1; 420 - char *q = r; 419 + char *q = skip_spaces(r); 421 420 *p = ' '; 422 - while (*q && isspace(*q)) 423 - q++; 424 421 if (q != (p+1)) 425 422 while ((*r++ = *q++)); 426 423 } ··· 598 599 if (ret < 0) 599 600 return ret; 600 601 } 602 + 603 + if (!tp->die_sib) 604 + goto done; 605 + 606 + ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib)); 607 + if (ret < 0) 608 + goto done; 609 + 610 + for (i = 0; i < tp->die_sib; i++) { 611 + ret = do_write_string(ff, tp->die_siblings[i]); 612 + if (ret < 0) 613 + goto done; 614 + } 615 + 616 + for (j = 0; j < perf_env.nr_cpus_avail; j++) { 617 + ret = do_write(ff, &perf_env.cpu[j].die_id, 618 + sizeof(perf_env.cpu[j].die_id)); 619 + if (ret < 0) 620 + return ret; 621 + } 622 + 601 623 done: 602 624 cpu_topology__delete(tp); 603 625 return ret; ··· 1048 1028 return -1; 1049 1029 1050 1030 cache->type[len] = 0; 1051 - cache->type = rtrim(cache->type); 1031 + cache->type = strim(cache->type); 1052 1032 1053 1033 scnprintf(file, PATH_MAX, "%s/size", path); 1054 1034 if (sysfs__read_str(file, &cache->size, &len)) { ··· 1057 1037 } 1058 1038 1059 1039 cache->size[len] = 0; 1060 - cache->size = rtrim(cache->size); 1040 + cache->size = strim(cache->size); 1061 1041 1062 1042 scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); 1063 1043 if (sysfs__read_str(file, &cache->map, &len)) { ··· 1067 1047 } 1068 1048 1069 1049 cache->map[len] = 0; 1070 - cache->map = rtrim(cache->map); 1050 + cache->map = strim(cache->map); 1071 1051 return 0; 1072 1052 } 1073 1053 ··· 1120 1100 return 0; 1121 1101 } 1122 1102 1123 - #define MAX_CACHES 2000 1103 + #define MAX_CACHES (MAX_NR_CPUS * 4) 1124 1104 1125 1105 static int write_cache(struct feat_fd *ff, 1126 1106 struct perf_evlist *evlist __maybe_unused) ··· 1459 1439 str = ph->env.sibling_cores; 1460 1440 1461 1441 for (i = 0; i < nr; i++) { 1462 - fprintf(fp, "# sibling cores : %s\n", str); 1442 + fprintf(fp, "# sibling sockets : %s\n", str); 1463 1443 str += strlen(str) + 1; 1444 + } 1445 + 1446 + if (ph->env.nr_sibling_dies) { 1447 + nr = ph->env.nr_sibling_dies; 1448 + str = ph->env.sibling_dies; 1449 + 1450 + for (i = 0; i < nr; i++) { 1451 + fprintf(fp, "# sibling dies : %s\n", str); 1452 + str += strlen(str) + 1; 1453 + } 1464 1454 } 1465 1455 1466 1456 nr = ph->env.nr_sibling_threads; ··· 1481 1451 str += strlen(str) + 1; 1482 1452 } 1483 1453 1484 - if (ph->env.cpu != NULL) { 1485 - for (i = 0; i < cpu_nr; i++) 1486 - fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i, 1487 - ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id); 1488 - } else 1489 - fprintf(fp, "# Core ID and Socket ID information is not available\n"); 1454 + if (ph->env.nr_sibling_dies) { 1455 + if (ph->env.cpu != NULL) { 1456 + for (i = 0; i < cpu_nr; i++) 1457 + fprintf(fp, "# CPU %d: Core ID %d, " 1458 + "Die ID %d, Socket ID %d\n", 1459 + i, ph->env.cpu[i].core_id, 1460 + ph->env.cpu[i].die_id, 1461 + ph->env.cpu[i].socket_id); 1462 + } else 1463 + fprintf(fp, "# Core ID, Die ID and Socket ID " 1464 + "information is not available\n"); 1465 + } else { 1466 + if (ph->env.cpu != NULL) { 1467 + for (i = 0; i < cpu_nr; i++) 1468 + fprintf(fp, "# CPU %d: Core ID %d, " 1469 + "Socket ID %d\n", 1470 + i, ph->env.cpu[i].core_id, 1471 + ph->env.cpu[i].socket_id); 1472 + } else 1473 + fprintf(fp, "# Core ID and Socket ID " 1474 + "information is not available\n"); 1475 + } 1490 1476 } 1491 1477 1492 1478 static void print_clockid(struct feat_fd *ff, FILE *fp) ··· 2260 2214 goto free_cpu; 2261 2215 2262 2216 ph->env.cpu[i].core_id = nr; 2217 + size += sizeof(u32); 2263 2218 2264 2219 if (do_read_u32(ff, &nr)) 2265 2220 goto free_cpu; ··· 2272 2225 } 2273 2226 2274 2227 ph->env.cpu[i].socket_id = nr; 2228 + size += sizeof(u32); 2229 + } 2230 + 2231 + /* 2232 + * The header may be from old perf, 2233 + * which doesn't include die information. 2234 + */ 2235 + if (ff->size <= size) 2236 + return 0; 2237 + 2238 + if (do_read_u32(ff, &nr)) 2239 + return -1; 2240 + 2241 + ph->env.nr_sibling_dies = nr; 2242 + size += sizeof(u32); 2243 + 2244 + for (i = 0; i < nr; i++) { 2245 + str = do_read_string(ff); 2246 + if (!str) 2247 + goto error; 2248 + 2249 + /* include a NULL character at the end */ 2250 + if (strbuf_add(&sb, str, strlen(str) + 1) < 0) 2251 + goto error; 2252 + size += string_size(str); 2253 + free(str); 2254 + } 2255 + ph->env.sibling_dies = strbuf_detach(&sb, NULL); 2256 + 2257 + for (i = 0; i < (u32)cpu_nr; i++) { 2258 + if (do_read_u32(ff, &nr)) 2259 + goto free_cpu; 2260 + 2261 + ph->env.cpu[i].die_id = nr; 2275 2262 } 2276 2263 2277 2264 return 0; ··· 3683 3602 return -ENOMEM; 3684 3603 3685 3604 ff.size = sz - sz_hdr; 3605 + ff.ph = &session->header; 3686 3606 3687 3607 for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { 3688 3608 if (!feat_ops[feat].synthesize) {
+40 -3
tools/perf/util/hist.c
··· 376 376 } 377 377 } 378 378 379 + struct hist_entry *hists__get_entry(struct hists *hists, int idx) 380 + { 381 + struct rb_node *next = rb_first_cached(&hists->entries); 382 + struct hist_entry *n; 383 + int i = 0; 384 + 385 + while (next) { 386 + n = rb_entry(next, struct hist_entry, rb_node); 387 + if (i == idx) 388 + return n; 389 + 390 + next = rb_next(&n->rb_node); 391 + i++; 392 + } 393 + 394 + return NULL; 395 + } 396 + 379 397 /* 380 398 * histogram, sorted on item, collects periods 381 399 */ ··· 592 574 */ 593 575 mem_info__zput(entry->mem_info); 594 576 577 + block_info__zput(entry->block_info); 578 + 595 579 /* If the map of an existing hist_entry has 596 580 * become out-of-date due to an exec() or 597 581 * similar, update it. Otherwise we will ··· 665 645 struct symbol *sym_parent, 666 646 struct branch_info *bi, 667 647 struct mem_info *mi, 648 + struct block_info *block_info, 668 649 struct perf_sample *sample, 669 650 bool sample_self, 670 651 struct hist_entry_ops *ops) ··· 698 677 .hists = hists, 699 678 .branch_info = bi, 700 679 .mem_info = mi, 680 + .block_info = block_info, 701 681 .transaction = sample->transaction, 702 682 .raw_data = sample->raw_data, 703 683 .raw_size = sample->raw_size, ··· 721 699 struct perf_sample *sample, 722 700 bool sample_self) 723 701 { 724 - return __hists__add_entry(hists, al, sym_parent, bi, mi, 702 + return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, 725 703 sample, sample_self, NULL); 726 704 } 727 705 ··· 734 712 struct perf_sample *sample, 735 713 bool sample_self) 736 714 { 737 - return __hists__add_entry(hists, al, sym_parent, bi, mi, 715 + return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, 738 716 sample, sample_self, ops); 717 + } 718 + 719 + struct hist_entry *hists__add_entry_block(struct hists *hists, 720 + struct addr_location *al, 721 + struct block_info *block_info) 722 + { 723 + struct hist_entry entry = { 724 + .block_info = block_info, 725 + .hists = hists, 726 + }, *he = hists__findnew_entry(hists, &entry, al, false); 727 + 728 + return he; 739 729 } 740 730 741 731 static int ··· 1246 1212 map__zput(he->mem_info->daddr.map); 1247 1213 mem_info__zput(he->mem_info); 1248 1214 } 1215 + 1216 + if (he->block_info) 1217 + block_info__zput(he->block_info); 1249 1218 1250 1219 zfree(&he->res_samples); 1251 1220 zfree(&he->stat_acc); ··· 2598 2561 char unit; 2599 2562 int printed; 2600 2563 const struct dso *dso = hists->dso_filter; 2601 - const struct thread *thread = hists->thread_filter; 2564 + struct thread *thread = hists->thread_filter; 2602 2565 int socket_id = hists->socket_filter; 2603 2566 unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 2604 2567 u64 nr_events = hists->stats.total_period;
+8
tools/perf/util/hist.h
··· 16 16 struct map_symbol; 17 17 struct mem_info; 18 18 struct branch_info; 19 + struct block_info; 19 20 struct symbol; 20 21 21 22 enum hist_filter { ··· 150 149 struct perf_sample *sample, 151 150 bool sample_self); 152 151 152 + struct hist_entry *hists__add_entry_block(struct hists *hists, 153 + struct addr_location *al, 154 + struct block_info *bi); 155 + 153 156 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, 154 157 int max_stack_depth, void *arg); 155 158 ··· 182 177 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); 183 178 void hists__delete_entries(struct hists *hists); 184 179 void hists__output_recalc_col_len(struct hists *hists, int max_rows); 180 + 181 + struct hist_entry *hists__get_entry(struct hists *hists, int idx); 185 182 186 183 u64 hists__total_period(struct hists *hists); 187 184 void hists__reset_stats(struct hists *hists); ··· 250 243 size_t size; 251 244 const char *sep; 252 245 void *ptr; 246 + bool skip; 253 247 }; 254 248 255 249 struct perf_hpp_fmt {
-1
tools/perf/util/include/linux/ctype.h
··· 1 - #include "../util.h"
+412 -59
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 95 95 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, 96 96 uint64_t max_insn_cnt, void *data); 97 97 bool (*pgd_ip)(uint64_t ip, void *data); 98 + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); 98 99 void *data; 99 100 struct intel_pt_state state; 100 101 const unsigned char *buf; ··· 108 107 bool have_cyc; 109 108 bool fixup_last_mtc; 110 109 bool have_last_ip; 110 + bool in_psb; 111 111 enum intel_pt_param_flags flags; 112 112 uint64_t pos; 113 113 uint64_t last_ip; ··· 117 115 uint64_t timestamp; 118 116 uint64_t tsc_timestamp; 119 117 uint64_t ref_timestamp; 118 + uint64_t buf_timestamp; 120 119 uint64_t sample_timestamp; 121 120 uint64_t ret_addr; 122 121 uint64_t ctc_timestamp; ··· 133 130 int mtc_shift; 134 131 struct intel_pt_stack stack; 135 132 enum intel_pt_pkt_state pkt_state; 133 + enum intel_pt_pkt_ctx pkt_ctx; 134 + enum intel_pt_pkt_ctx prev_pkt_ctx; 135 + enum intel_pt_blk_type blk_type; 136 + int blk_type_pos; 136 137 struct intel_pt_pkt packet; 137 138 struct intel_pt_pkt tnt; 138 139 int pkt_step; ··· 158 151 uint64_t period_mask; 159 152 uint64_t period_ticks; 160 153 uint64_t last_masked_timestamp; 154 + uint64_t tot_cyc_cnt; 155 + uint64_t sample_tot_cyc_cnt; 156 + uint64_t base_cyc_cnt; 157 + uint64_t cyc_cnt_timestamp; 158 + double tsc_to_cyc; 161 159 bool continuous_period; 162 160 bool overflow; 163 161 bool set_fup_tx_flags; ··· 170 158 bool set_fup_mwait; 171 159 bool set_fup_pwre; 172 160 bool set_fup_exstop; 161 + bool set_fup_bep; 162 + bool sample_cyc; 173 163 unsigned int fup_tx_flags; 174 164 unsigned int tx_flags; 175 165 uint64_t fup_ptw_payload; ··· 231 217 decoder->get_trace = params->get_trace; 232 218 decoder->walk_insn = params->walk_insn; 233 219 decoder->pgd_ip = params->pgd_ip; 220 + decoder->lookahead = params->lookahead; 234 221 decoder->data = params->data; 235 222 decoder->return_compression = params->return_compression; 236 223 decoder->branch_enable = params->branch_enable; ··· 485 470 return -EBADMSG; 486 471 } 487 472 488 - static int intel_pt_get_data(struct intel_pt_decoder *decoder) 473 + static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) 474 + { 475 + decoder->sample_timestamp = decoder->timestamp; 476 + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; 477 + } 478 + 479 + static void intel_pt_reposition(struct intel_pt_decoder *decoder) 480 + { 481 + decoder->ip = 0; 482 + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; 483 + decoder->timestamp = 0; 484 + decoder->have_tma = false; 485 + } 486 + 487 + static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition) 489 488 { 490 489 struct intel_pt_buffer buffer = { .buf = 0, }; 491 490 int ret; ··· 516 487 intel_pt_log("No more data\n"); 517 488 return -ENODATA; 518 489 } 519 - if (!buffer.consecutive) { 520 - decoder->ip = 0; 521 - decoder->pkt_state = INTEL_PT_STATE_NO_PSB; 490 + decoder->buf_timestamp = buffer.ref_timestamp; 491 + if (!buffer.consecutive || reposition) { 492 + intel_pt_reposition(decoder); 522 493 decoder->ref_timestamp = buffer.ref_timestamp; 523 - decoder->timestamp = 0; 524 - decoder->have_tma = false; 525 494 decoder->state.trace_nr = buffer.trace_nr; 526 495 intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", 527 496 decoder->ref_timestamp); ··· 529 502 return 0; 530 503 } 531 504 532 - static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) 505 + static int intel_pt_get_next_data(struct intel_pt_decoder *decoder, 506 + bool reposition) 533 507 { 534 508 if (!decoder->next_buf) 535 - return intel_pt_get_data(decoder); 509 + return intel_pt_get_data(decoder, reposition); 536 510 537 511 decoder->buf = decoder->next_buf; 538 512 decoder->len = decoder->next_len; ··· 552 524 len = decoder->len; 553 525 memcpy(buf, decoder->buf, len); 554 526 555 - ret = intel_pt_get_data(decoder); 527 + ret = intel_pt_get_data(decoder, false); 556 528 if (ret) { 557 529 decoder->pos += old_len; 558 530 return ret < 0 ? ret : -EINVAL; ··· 564 536 memcpy(buf + len, decoder->buf, n); 565 537 len += n; 566 538 567 - ret = intel_pt_get_packet(buf, len, &decoder->packet); 539 + decoder->prev_pkt_ctx = decoder->pkt_ctx; 540 + ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx); 568 541 if (ret < (int)old_len) { 569 542 decoder->next_buf = decoder->buf; 570 543 decoder->next_len = decoder->len; ··· 600 571 { 601 572 struct intel_pt_pkt_info pkt_info; 602 573 const unsigned char *buf = decoder->buf; 574 + enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx; 603 575 size_t len = decoder->len; 604 576 int ret; 605 577 ··· 619 589 if (!len) 620 590 return INTEL_PT_NEED_MORE_BYTES; 621 591 622 - ret = intel_pt_get_packet(buf, len, &pkt_info.packet); 592 + ret = intel_pt_get_packet(buf, len, &pkt_info.packet, 593 + &pkt_ctx); 623 594 if (!ret) 624 595 return INTEL_PT_NEED_MORE_BYTES; 625 596 if (ret < 0) ··· 695 664 case INTEL_PT_MNT: 696 665 case INTEL_PT_PTWRITE: 697 666 case INTEL_PT_PTWRITE_IP: 667 + case INTEL_PT_BBP: 668 + case INTEL_PT_BIP: 669 + case INTEL_PT_BEP: 670 + case INTEL_PT_BEP_IP: 698 671 return 0; 699 672 700 673 case INTEL_PT_MTC: ··· 885 850 decoder->len -= decoder->pkt_step; 886 851 887 852 if (!decoder->len) { 888 - ret = intel_pt_get_next_data(decoder); 853 + ret = intel_pt_get_next_data(decoder, false); 889 854 if (ret) 890 855 return ret; 891 856 } 892 857 858 + decoder->prev_pkt_ctx = decoder->pkt_ctx; 893 859 ret = intel_pt_get_packet(decoder->buf, decoder->len, 894 - &decoder->packet); 860 + &decoder->packet, &decoder->pkt_ctx); 895 861 if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && 896 862 decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { 897 863 ret = intel_pt_get_split_packet(decoder); ··· 1130 1094 decoder->state.to_ip = 0; 1131 1095 ret = true; 1132 1096 } 1097 + if (decoder->set_fup_bep) { 1098 + decoder->set_fup_bep = false; 1099 + decoder->state.type |= INTEL_PT_BLK_ITEMS; 1100 + decoder->state.type &= ~INTEL_PT_BRANCH; 1101 + decoder->state.from_ip = decoder->ip; 1102 + decoder->state.to_ip = 0; 1103 + ret = true; 1104 + } 1133 1105 return ret; 1134 1106 } 1135 1107 ··· 1352 1308 decoder->ip += intel_pt_insn.length; 1353 1309 return 0; 1354 1310 } 1311 + decoder->sample_cyc = false; 1355 1312 decoder->ip += intel_pt_insn.length; 1356 1313 if (!decoder->tnt.count) { 1357 - decoder->sample_timestamp = decoder->timestamp; 1358 - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; 1314 + intel_pt_update_sample_time(decoder); 1359 1315 return -EAGAIN; 1360 1316 } 1361 1317 decoder->tnt.payload <<= 1; ··· 1389 1345 return 0; 1390 1346 } 1391 1347 1348 + static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp) 1349 + { 1350 + timestamp |= (ref_timestamp & (0xffULL << 56)); 1351 + 1352 + if (timestamp < ref_timestamp) { 1353 + if (ref_timestamp - timestamp > (1ULL << 55)) 1354 + timestamp += (1ULL << 56); 1355 + } else { 1356 + if (timestamp - ref_timestamp > (1ULL << 55)) 1357 + timestamp -= (1ULL << 56); 1358 + } 1359 + 1360 + return timestamp; 1361 + } 1362 + 1392 1363 static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) 1393 1364 { 1394 1365 uint64_t timestamp; ··· 1411 1352 decoder->have_tma = false; 1412 1353 1413 1354 if (decoder->ref_timestamp) { 1414 - timestamp = decoder->packet.payload | 1415 - (decoder->ref_timestamp & (0xffULL << 56)); 1416 - if (timestamp < decoder->ref_timestamp) { 1417 - if (decoder->ref_timestamp - timestamp > (1ULL << 55)) 1418 - timestamp += (1ULL << 56); 1419 - } else { 1420 - if (timestamp - decoder->ref_timestamp > (1ULL << 55)) 1421 - timestamp -= (1ULL << 56); 1422 - } 1355 + timestamp = intel_pt_8b_tsc(decoder->packet.payload, 1356 + decoder->ref_timestamp); 1423 1357 decoder->tsc_timestamp = timestamp; 1424 1358 decoder->timestamp = timestamp; 1425 1359 decoder->ref_timestamp = 0; ··· 1456 1404 return -EOVERFLOW; 1457 1405 } 1458 1406 1407 + static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder) 1408 + { 1409 + if (decoder->have_cyc) 1410 + return; 1411 + 1412 + decoder->cyc_cnt_timestamp = decoder->timestamp; 1413 + decoder->base_cyc_cnt = decoder->tot_cyc_cnt; 1414 + } 1415 + 1416 + static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder) 1417 + { 1418 + decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp; 1419 + 1420 + if (decoder->pge) 1421 + intel_pt_mtc_cyc_cnt_pge(decoder); 1422 + } 1423 + 1424 + static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder) 1425 + { 1426 + uint64_t tot_cyc_cnt, tsc_delta; 1427 + 1428 + if (decoder->have_cyc) 1429 + return; 1430 + 1431 + decoder->sample_cyc = true; 1432 + 1433 + if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp) 1434 + return; 1435 + 1436 + tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp; 1437 + tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt; 1438 + 1439 + if (tot_cyc_cnt > decoder->tot_cyc_cnt) 1440 + decoder->tot_cyc_cnt = tot_cyc_cnt; 1441 + } 1442 + 1459 1443 static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) 1460 1444 { 1461 1445 uint32_t ctc = decoder->packet.payload; ··· 1500 1412 1501 1413 if (!decoder->tsc_ctc_ratio_d) 1502 1414 return; 1415 + 1416 + if (decoder->pge && !decoder->in_psb) 1417 + intel_pt_mtc_cyc_cnt_pge(decoder); 1418 + else 1419 + intel_pt_mtc_cyc_cnt_upd(decoder); 1503 1420 1504 1421 decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; 1505 1422 decoder->ctc_timestamp = decoder->tsc_timestamp - fc; ··· 1561 1468 else 1562 1469 decoder->timestamp = timestamp; 1563 1470 1471 + intel_pt_mtc_cyc_cnt_upd(decoder); 1472 + 1564 1473 decoder->timestamp_insn_cnt = 0; 1565 1474 decoder->last_mtc = mtc; 1566 1475 ··· 1587 1492 1588 1493 decoder->cbr = cbr; 1589 1494 decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; 1495 + 1496 + intel_pt_mtc_cyc_cnt_cbr(decoder); 1590 1497 } 1591 1498 1592 1499 static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) ··· 1598 1501 decoder->have_cyc = true; 1599 1502 1600 1503 decoder->cycle_cnt += decoder->packet.payload; 1504 + if (decoder->pge) 1505 + decoder->tot_cyc_cnt += decoder->packet.payload; 1506 + decoder->sample_cyc = true; 1601 1507 1602 1508 if (!decoder->cyc_ref_timestamp) 1603 1509 return; ··· 1623 1523 intel_pt_log_to("Setting timestamp", decoder->timestamp); 1624 1524 } 1625 1525 1526 + static void intel_pt_bbp(struct intel_pt_decoder *decoder) 1527 + { 1528 + if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) { 1529 + memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask)); 1530 + decoder->state.items.is_32_bit = false; 1531 + } 1532 + decoder->blk_type = decoder->packet.payload; 1533 + decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type); 1534 + if (decoder->blk_type == INTEL_PT_GP_REGS) 1535 + decoder->state.items.is_32_bit = decoder->packet.count; 1536 + if (decoder->blk_type_pos < 0) { 1537 + intel_pt_log("WARNING: Unknown block type %u\n", 1538 + decoder->blk_type); 1539 + } else if (decoder->state.items.mask[decoder->blk_type_pos]) { 1540 + intel_pt_log("WARNING: Duplicate block type %u\n", 1541 + decoder->blk_type); 1542 + } 1543 + } 1544 + 1545 + static void intel_pt_bip(struct intel_pt_decoder *decoder) 1546 + { 1547 + uint32_t id = decoder->packet.count; 1548 + uint32_t bit = 1 << id; 1549 + int pos = decoder->blk_type_pos; 1550 + 1551 + if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) { 1552 + intel_pt_log("WARNING: Unknown block item %u type %d\n", 1553 + id, decoder->blk_type); 1554 + return; 1555 + } 1556 + 1557 + if (decoder->state.items.mask[pos] & bit) { 1558 + intel_pt_log("WARNING: Duplicate block item %u type %d\n", 1559 + id, decoder->blk_type); 1560 + } 1561 + 1562 + decoder->state.items.mask[pos] |= bit; 1563 + decoder->state.items.val[pos][id] = decoder->packet.payload; 1564 + } 1565 + 1626 1566 /* Walk PSB+ packets when already in sync. */ 1627 1567 static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) 1628 1568 { 1629 1569 int err; 1630 1570 1571 + decoder->in_psb = true; 1572 + 1631 1573 while (1) { 1632 1574 err = intel_pt_get_next_packet(decoder); 1633 1575 if (err) 1634 - return err; 1576 + goto out; 1635 1577 1636 1578 switch (decoder->packet.type) { 1637 1579 case INTEL_PT_PSBEND: 1638 - return 0; 1580 + err = 0; 1581 + goto out; 1639 1582 1640 1583 case INTEL_PT_TIP_PGD: 1641 1584 case INTEL_PT_TIP_PGE: ··· 1694 1551 case INTEL_PT_MWAIT: 1695 1552 case INTEL_PT_PWRE: 1696 1553 case INTEL_PT_PWRX: 1554 + case INTEL_PT_BBP: 1555 + case INTEL_PT_BIP: 1556 + case INTEL_PT_BEP: 1557 + case INTEL_PT_BEP_IP: 1697 1558 decoder->have_tma = false; 1698 1559 intel_pt_log("ERROR: Unexpected packet\n"); 1699 - return -EAGAIN; 1560 + err = -EAGAIN; 1561 + goto out; 1700 1562 1701 1563 case INTEL_PT_OVF: 1702 - return intel_pt_overflow(decoder); 1564 + err = intel_pt_overflow(decoder); 1565 + goto out; 1703 1566 1704 1567 case INTEL_PT_TSC: 1705 1568 intel_pt_calc_tsc_timestamp(decoder); ··· 1751 1602 break; 1752 1603 } 1753 1604 } 1605 + out: 1606 + decoder->in_psb = false; 1607 + 1608 + return err; 1754 1609 } 1755 1610 1756 1611 static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) ··· 1791 1638 case INTEL_PT_MWAIT: 1792 1639 case INTEL_PT_PWRE: 1793 1640 case INTEL_PT_PWRX: 1641 + case INTEL_PT_BBP: 1642 + case INTEL_PT_BIP: 1643 + case INTEL_PT_BEP: 1644 + case INTEL_PT_BEP_IP: 1794 1645 intel_pt_log("ERROR: Missing TIP after FUP\n"); 1795 1646 decoder->pkt_state = INTEL_PT_STATE_ERR3; 1796 1647 decoder->pkt_step = 0; ··· 1832 1675 decoder->state.to_ip = decoder->ip; 1833 1676 } 1834 1677 decoder->state.type |= INTEL_PT_TRACE_BEGIN; 1678 + intel_pt_mtc_cyc_cnt_pge(decoder); 1835 1679 return 0; 1836 1680 1837 1681 case INTEL_PT_TIP: ··· 1903 1745 1904 1746 case INTEL_PT_TIP_PGE: { 1905 1747 decoder->pge = true; 1748 + intel_pt_mtc_cyc_cnt_pge(decoder); 1906 1749 if (decoder->packet.count == 0) { 1907 1750 intel_pt_log_at("Skipping zero TIP.PGE", 1908 1751 decoder->pos); ··· 1975 1816 goto next; 1976 1817 if (err) 1977 1818 return err; 1819 + /* 1820 + * PSB+ CBR will not have changed but cater for the 1821 + * possibility of another CBR change that gets caught up 1822 + * in the PSB+. 1823 + */ 1824 + if (decoder->cbr != decoder->cbr_seen) 1825 + return 0; 1978 1826 break; 1979 1827 1980 1828 case INTEL_PT_PIP: ··· 2022 1856 2023 1857 case INTEL_PT_CBR: 2024 1858 intel_pt_calc_cbr(decoder); 2025 - if (!decoder->branch_enable && 2026 - decoder->cbr != decoder->cbr_seen) { 2027 - decoder->cbr_seen = decoder->cbr; 2028 - decoder->state.type = INTEL_PT_CBR_CHG; 2029 - decoder->state.from_ip = decoder->ip; 2030 - decoder->state.to_ip = 0; 2031 - decoder->state.cbr_payload = 2032 - decoder->packet.payload; 1859 + if (decoder->cbr != decoder->cbr_seen) 2033 1860 return 0; 2034 - } 2035 1861 break; 2036 1862 2037 1863 case INTEL_PT_MODE_EXEC: ··· 2115 1957 decoder->state.pwrx_payload = decoder->packet.payload; 2116 1958 return 0; 2117 1959 1960 + case INTEL_PT_BBP: 1961 + intel_pt_bbp(decoder); 1962 + break; 1963 + 1964 + case INTEL_PT_BIP: 1965 + intel_pt_bip(decoder); 1966 + break; 1967 + 1968 + case INTEL_PT_BEP: 1969 + decoder->state.type = INTEL_PT_BLK_ITEMS; 1970 + decoder->state.from_ip = decoder->ip; 1971 + decoder->state.to_ip = 0; 1972 + return 0; 1973 + 1974 + case INTEL_PT_BEP_IP: 1975 + err = intel_pt_get_next_packet(decoder); 1976 + if (err) 1977 + return err; 1978 + if (decoder->packet.type == INTEL_PT_FUP) { 1979 + decoder->set_fup_bep = true; 1980 + no_tip = true; 1981 + } else { 1982 + intel_pt_log_at("ERROR: Missing FUP after BEP", 1983 + decoder->pos); 1984 + } 1985 + goto next; 1986 + 2118 1987 default: 2119 1988 return intel_pt_bug(decoder); 2120 1989 } ··· 2160 1975 { 2161 1976 int err; 2162 1977 1978 + decoder->in_psb = true; 1979 + 2163 1980 while (1) { 2164 1981 err = intel_pt_get_next_packet(decoder); 2165 1982 if (err) 2166 - return err; 1983 + goto out; 2167 1984 2168 1985 switch (decoder->packet.type) { 2169 1986 case INTEL_PT_TIP_PGD: ··· 2180 1993 case INTEL_PT_MWAIT: 2181 1994 case INTEL_PT_PWRE: 2182 1995 case INTEL_PT_PWRX: 1996 + case INTEL_PT_BBP: 1997 + case INTEL_PT_BIP: 1998 + case INTEL_PT_BEP: 1999 + case INTEL_PT_BEP_IP: 2183 2000 intel_pt_log("ERROR: Unexpected packet\n"); 2184 - return -ENOENT; 2001 + err = -ENOENT; 2002 + goto out; 2185 2003 2186 2004 case INTEL_PT_FUP: 2187 2005 decoder->pge = true; ··· 2245 2053 decoder->pkt_state = INTEL_PT_STATE_ERR4; 2246 2054 else 2247 2055 decoder->pkt_state = INTEL_PT_STATE_ERR3; 2248 - return -ENOENT; 2056 + err = -ENOENT; 2057 + goto out; 2249 2058 2250 2059 case INTEL_PT_BAD: /* Does not happen */ 2251 - return intel_pt_bug(decoder); 2060 + err = intel_pt_bug(decoder); 2061 + goto out; 2252 2062 2253 2063 case INTEL_PT_OVF: 2254 - return intel_pt_overflow(decoder); 2064 + err = intel_pt_overflow(decoder); 2065 + goto out; 2255 2066 2256 2067 case INTEL_PT_PSBEND: 2257 - return 0; 2068 + err = 0; 2069 + goto out; 2258 2070 2259 2071 case INTEL_PT_PSB: 2260 2072 case INTEL_PT_VMCS: ··· 2268 2072 break; 2269 2073 } 2270 2074 } 2075 + out: 2076 + decoder->in_psb = false; 2077 + 2078 + return err; 2271 2079 } 2272 2080 2273 2081 static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) ··· 2286 2086 switch (decoder->packet.type) { 2287 2087 case INTEL_PT_TIP_PGD: 2288 2088 decoder->continuous_period = false; 2289 - __fallthrough; 2290 - case INTEL_PT_TIP_PGE: 2291 - case INTEL_PT_TIP: 2292 - decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; 2089 + decoder->pge = false; 2293 2090 if (intel_pt_have_ip(decoder)) 2294 2091 intel_pt_set_ip(decoder); 2295 2092 if (!decoder->ip) 2296 2093 break; 2297 - if (decoder->packet.type == INTEL_PT_TIP_PGE) 2298 - decoder->state.type |= INTEL_PT_TRACE_BEGIN; 2299 - if (decoder->packet.type == INTEL_PT_TIP_PGD) 2300 - decoder->state.type |= INTEL_PT_TRACE_END; 2094 + decoder->state.type |= INTEL_PT_TRACE_END; 2095 + return 0; 2096 + 2097 + case INTEL_PT_TIP_PGE: 2098 + decoder->pge = true; 2099 + intel_pt_mtc_cyc_cnt_pge(decoder); 2100 + if (intel_pt_have_ip(decoder)) 2101 + intel_pt_set_ip(decoder); 2102 + if (!decoder->ip) 2103 + break; 2104 + decoder->state.type |= INTEL_PT_TRACE_BEGIN; 2105 + return 0; 2106 + 2107 + case INTEL_PT_TIP: 2108 + decoder->pge = true; 2109 + if (intel_pt_have_ip(decoder)) 2110 + intel_pt_set_ip(decoder); 2111 + if (!decoder->ip) 2112 + break; 2301 2113 return 0; 2302 2114 2303 2115 case INTEL_PT_FUP: ··· 2390 2178 case INTEL_PT_MWAIT: 2391 2179 case INTEL_PT_PWRE: 2392 2180 case INTEL_PT_PWRX: 2181 + case INTEL_PT_BBP: 2182 + case INTEL_PT_BIP: 2183 + case INTEL_PT_BEP: 2184 + case INTEL_PT_BEP_IP: 2393 2185 default: 2394 2186 break; 2395 2187 } ··· 2409 2193 decoder->set_fup_mwait = false; 2410 2194 decoder->set_fup_pwre = false; 2411 2195 decoder->set_fup_exstop = false; 2196 + decoder->set_fup_bep = false; 2412 2197 2413 2198 if (!decoder->branch_enable) { 2414 2199 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; ··· 2467 2250 decoder->pos += decoder->len; 2468 2251 decoder->len = 0; 2469 2252 2470 - ret = intel_pt_get_next_data(decoder); 2253 + ret = intel_pt_get_next_data(decoder, false); 2471 2254 if (ret) 2472 2255 return ret; 2473 2256 ··· 2493 2276 intel_pt_log("Scanning for PSB\n"); 2494 2277 while (1) { 2495 2278 if (!decoder->len) { 2496 - ret = intel_pt_get_next_data(decoder); 2279 + ret = intel_pt_get_next_data(decoder, false); 2497 2280 if (ret) 2498 2281 return ret; 2499 2282 } ··· 2621 2404 if (err) { 2622 2405 decoder->state.err = intel_pt_ext_err(err); 2623 2406 decoder->state.from_ip = decoder->ip; 2624 - decoder->sample_timestamp = decoder->timestamp; 2625 - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; 2407 + intel_pt_update_sample_time(decoder); 2408 + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; 2626 2409 } else { 2627 2410 decoder->state.err = 0; 2628 - if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { 2411 + if (decoder->cbr != decoder->cbr_seen) { 2629 2412 decoder->cbr_seen = decoder->cbr; 2413 + if (!decoder->state.type) { 2414 + decoder->state.from_ip = decoder->ip; 2415 + decoder->state.to_ip = 0; 2416 + } 2630 2417 decoder->state.type |= INTEL_PT_CBR_CHG; 2631 2418 decoder->state.cbr_payload = decoder->cbr_payload; 2419 + decoder->state.cbr = decoder->cbr; 2632 2420 } 2633 2421 if (intel_pt_sample_time(decoder->pkt_state)) { 2634 - decoder->sample_timestamp = decoder->timestamp; 2635 - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; 2422 + intel_pt_update_sample_time(decoder); 2423 + if (decoder->sample_cyc) 2424 + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; 2636 2425 } 2637 2426 } 2638 2427 ··· 2646 2423 decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); 2647 2424 decoder->state.cr3 = decoder->cr3; 2648 2425 decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; 2426 + decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt; 2649 2427 2650 2428 return &decoder->state; 2651 2429 } ··· 2750 2526 static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc, 2751 2527 size_t *rem) 2752 2528 { 2529 + enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; 2753 2530 struct intel_pt_pkt packet; 2754 2531 int ret; 2755 2532 2756 2533 while (len) { 2757 - ret = intel_pt_get_packet(buf, len, &packet); 2534 + ret = intel_pt_get_packet(buf, len, &packet, &ctx); 2758 2535 if (ret <= 0) 2759 2536 return false; 2760 2537 if (packet.type == INTEL_PT_TSC) { ··· 2956 2731 if (!intel_pt_step_psb(&buf_a, &len_a)) 2957 2732 return buf_b; /* No overlap */ 2958 2733 } 2734 + } 2735 + 2736 + /** 2737 + * struct fast_forward_data - data used by intel_pt_ff_cb(). 2738 + * @timestamp: timestamp to fast forward towards 2739 + * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than 2740 + * the fast forward timestamp. 2741 + */ 2742 + struct fast_forward_data { 2743 + uint64_t timestamp; 2744 + uint64_t buf_timestamp; 2745 + }; 2746 + 2747 + /** 2748 + * intel_pt_ff_cb - fast forward lookahead callback. 2749 + * @buffer: Intel PT trace buffer 2750 + * @data: opaque pointer to fast forward data (struct fast_forward_data) 2751 + * 2752 + * Determine if @buffer trace is past the fast forward timestamp. 2753 + * 2754 + * Return: 1 (stop lookahead) if @buffer trace is past the fast forward 2755 + * timestamp, and 0 otherwise. 2756 + */ 2757 + static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data) 2758 + { 2759 + struct fast_forward_data *d = data; 2760 + unsigned char *buf; 2761 + uint64_t tsc; 2762 + size_t rem; 2763 + size_t len; 2764 + 2765 + buf = (unsigned char *)buffer->buf; 2766 + len = buffer->len; 2767 + 2768 + if (!intel_pt_next_psb(&buf, &len) || 2769 + !intel_pt_next_tsc(buf, len, &tsc, &rem)) 2770 + return 0; 2771 + 2772 + tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp); 2773 + 2774 + intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n", 2775 + tsc, buffer->ref_timestamp); 2776 + 2777 + /* 2778 + * If the buffer contains a timestamp earlier that the fast forward 2779 + * timestamp, then record it, else stop. 2780 + */ 2781 + if (tsc < d->timestamp) 2782 + d->buf_timestamp = buffer->ref_timestamp; 2783 + else 2784 + return 1; 2785 + 2786 + return 0; 2787 + } 2788 + 2789 + /** 2790 + * intel_pt_fast_forward - reposition decoder forwards. 2791 + * @decoder: Intel PT decoder 2792 + * @timestamp: timestamp to fast forward towards 2793 + * 2794 + * Reposition decoder at the last PSB with a timestamp earlier than @timestamp. 2795 + * 2796 + * Return: 0 on success or negative error code on failure. 2797 + */ 2798 + int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp) 2799 + { 2800 + struct fast_forward_data d = { .timestamp = timestamp }; 2801 + unsigned char *buf; 2802 + size_t len; 2803 + int err; 2804 + 2805 + intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp); 2806 + 2807 + /* Find buffer timestamp of buffer to fast forward to */ 2808 + err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d); 2809 + if (err < 0) 2810 + return err; 2811 + 2812 + /* Walk to buffer with same buffer timestamp */ 2813 + if (d.buf_timestamp) { 2814 + do { 2815 + decoder->pos += decoder->len; 2816 + decoder->len = 0; 2817 + err = intel_pt_get_next_data(decoder, true); 2818 + /* -ENOLINK means non-consecutive trace */ 2819 + if (err && err != -ENOLINK) 2820 + return err; 2821 + } while (decoder->buf_timestamp != d.buf_timestamp); 2822 + } 2823 + 2824 + if (!decoder->buf) 2825 + return 0; 2826 + 2827 + buf = (unsigned char *)decoder->buf; 2828 + len = decoder->len; 2829 + 2830 + if (!intel_pt_next_psb(&buf, &len)) 2831 + return 0; 2832 + 2833 + /* 2834 + * Walk PSBs while the PSB timestamp is less than the fast forward 2835 + * timestamp. 2836 + */ 2837 + do { 2838 + uint64_t tsc; 2839 + size_t rem; 2840 + 2841 + if (!intel_pt_next_tsc(buf, len, &tsc, &rem)) 2842 + break; 2843 + tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp); 2844 + /* 2845 + * A TSC packet can slip past MTC packets but, after fast 2846 + * forward, decoding starts at the TSC timestamp. That means 2847 + * the timestamps may not be exactly the same as the timestamps 2848 + * that would have been decoded without fast forward. 2849 + */ 2850 + if (tsc < timestamp) { 2851 + intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc); 2852 + decoder->pos += decoder->len - len; 2853 + decoder->buf = buf; 2854 + decoder->len = len; 2855 + intel_pt_reposition(decoder); 2856 + } else { 2857 + break; 2858 + } 2859 + } while (intel_pt_step_psb(&buf, &len)); 2860 + 2861 + return 0; 2959 2862 }
+144
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
··· 30 30 INTEL_PT_CBR_CHG = 1 << 8, 31 31 INTEL_PT_TRACE_BEGIN = 1 << 9, 32 32 INTEL_PT_TRACE_END = 1 << 10, 33 + INTEL_PT_BLK_ITEMS = 1 << 11, 33 34 }; 34 35 35 36 enum intel_pt_period_type { ··· 62 61 INTEL_PT_FUP_WITH_NLIP = 1 << 0, 63 62 }; 64 63 64 + enum intel_pt_blk_type { 65 + INTEL_PT_GP_REGS = 1, 66 + INTEL_PT_PEBS_BASIC = 4, 67 + INTEL_PT_PEBS_MEM = 5, 68 + INTEL_PT_LBR_0 = 8, 69 + INTEL_PT_LBR_1 = 9, 70 + INTEL_PT_LBR_2 = 10, 71 + INTEL_PT_XMM = 16, 72 + INTEL_PT_BLK_TYPE_MAX 73 + }; 74 + 75 + /* 76 + * The block type numbers are not sequential but here they are given sequential 77 + * positions to avoid wasting space for array placement. 78 + */ 79 + enum intel_pt_blk_type_pos { 80 + INTEL_PT_GP_REGS_POS, 81 + INTEL_PT_PEBS_BASIC_POS, 82 + INTEL_PT_PEBS_MEM_POS, 83 + INTEL_PT_LBR_0_POS, 84 + INTEL_PT_LBR_1_POS, 85 + INTEL_PT_LBR_2_POS, 86 + INTEL_PT_XMM_POS, 87 + INTEL_PT_BLK_TYPE_CNT 88 + }; 89 + 90 + /* Get the array position for a block type */ 91 + static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type) 92 + { 93 + #define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1 94 + const int map[INTEL_PT_BLK_TYPE_MAX] = { 95 + BLK_TYPE(GP_REGS), 96 + BLK_TYPE(PEBS_BASIC), 97 + BLK_TYPE(PEBS_MEM), 98 + BLK_TYPE(LBR_0), 99 + BLK_TYPE(LBR_1), 100 + BLK_TYPE(LBR_2), 101 + BLK_TYPE(XMM), 102 + }; 103 + #undef BLK_TYPE 104 + 105 + return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1; 106 + } 107 + 108 + #define INTEL_PT_BLK_ITEM_ID_CNT 32 109 + 110 + /* 111 + * Use unions so that the block items can be accessed by name or by array index. 112 + * There is an array of 32-bit masks for each block type, which indicate which 113 + * values are present. Then arrays of 32 64-bit values for each block type. 114 + */ 115 + struct intel_pt_blk_items { 116 + union { 117 + uint32_t mask[INTEL_PT_BLK_TYPE_CNT]; 118 + struct { 119 + uint32_t has_rflags:1; 120 + uint32_t has_rip:1; 121 + uint32_t has_rax:1; 122 + uint32_t has_rcx:1; 123 + uint32_t has_rdx:1; 124 + uint32_t has_rbx:1; 125 + uint32_t has_rsp:1; 126 + uint32_t has_rbp:1; 127 + uint32_t has_rsi:1; 128 + uint32_t has_rdi:1; 129 + uint32_t has_r8:1; 130 + uint32_t has_r9:1; 131 + uint32_t has_r10:1; 132 + uint32_t has_r11:1; 133 + uint32_t has_r12:1; 134 + uint32_t has_r13:1; 135 + uint32_t has_r14:1; 136 + uint32_t has_r15:1; 137 + uint32_t has_unused_0:14; 138 + uint32_t has_ip:1; 139 + uint32_t has_applicable_counters:1; 140 + uint32_t has_timestamp:1; 141 + uint32_t has_unused_1:29; 142 + uint32_t has_mem_access_address:1; 143 + uint32_t has_mem_aux_info:1; 144 + uint32_t has_mem_access_latency:1; 145 + uint32_t has_tsx_aux_info:1; 146 + uint32_t has_unused_2:28; 147 + uint32_t has_lbr_0; 148 + uint32_t has_lbr_1; 149 + uint32_t has_lbr_2; 150 + uint32_t has_xmm; 151 + }; 152 + }; 153 + union { 154 + uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT]; 155 + struct { 156 + struct { 157 + uint64_t rflags; 158 + uint64_t rip; 159 + uint64_t rax; 160 + uint64_t rcx; 161 + uint64_t rdx; 162 + uint64_t rbx; 163 + uint64_t rsp; 164 + uint64_t rbp; 165 + uint64_t rsi; 166 + uint64_t rdi; 167 + uint64_t r8; 168 + uint64_t r9; 169 + uint64_t r10; 170 + uint64_t r11; 171 + uint64_t r12; 172 + uint64_t r13; 173 + uint64_t r14; 174 + uint64_t r15; 175 + uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18]; 176 + }; 177 + struct { 178 + uint64_t ip; 179 + uint64_t applicable_counters; 180 + uint64_t timestamp; 181 + uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3]; 182 + }; 183 + struct { 184 + uint64_t mem_access_address; 185 + uint64_t mem_aux_info; 186 + uint64_t mem_access_latency; 187 + uint64_t tsx_aux_info; 188 + uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4]; 189 + }; 190 + uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT]; 191 + uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT]; 192 + uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT]; 193 + uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT]; 194 + }; 195 + }; 196 + bool is_32_bit; 197 + }; 198 + 65 199 struct intel_pt_state { 66 200 enum intel_pt_sample_type type; 67 201 int err; ··· 204 68 uint64_t to_ip; 205 69 uint64_t cr3; 206 70 uint64_t tot_insn_cnt; 71 + uint64_t tot_cyc_cnt; 207 72 uint64_t timestamp; 208 73 uint64_t est_timestamp; 209 74 uint64_t trace_nr; ··· 213 76 uint64_t pwre_payload; 214 77 uint64_t pwrx_payload; 215 78 uint64_t cbr_payload; 79 + uint32_t cbr; 216 80 uint32_t flags; 217 81 enum intel_pt_insn_op insn_op; 218 82 int insn_len; 219 83 char insn[INTEL_PT_INSN_BUF_SZ]; 84 + struct intel_pt_blk_items items; 220 85 }; 221 86 222 87 struct intel_pt_insn; ··· 231 92 uint64_t trace_nr; 232 93 }; 233 94 95 + typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *); 96 + 234 97 struct intel_pt_params { 235 98 int (*get_trace)(struct intel_pt_buffer *buffer, void *data); 236 99 int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, 237 100 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, 238 101 uint64_t max_insn_cnt, void *data); 239 102 bool (*pgd_ip)(uint64_t ip, void *data); 103 + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); 240 104 void *data; 241 105 bool return_compression; 242 106 bool branch_enable; ··· 258 116 void intel_pt_decoder_free(struct intel_pt_decoder *decoder); 259 117 260 118 const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); 119 + 120 + int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp); 261 121 262 122 unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, 263 123 unsigned char *buf_b, size_t len_b,
+137 -3
tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
··· 62 62 [INTEL_PT_MWAIT] = "MWAIT", 63 63 [INTEL_PT_PWRE] = "PWRE", 64 64 [INTEL_PT_PWRX] = "PWRX", 65 + [INTEL_PT_BBP] = "BBP", 66 + [INTEL_PT_BIP] = "BIP", 67 + [INTEL_PT_BEP] = "BEP", 68 + [INTEL_PT_BEP_IP] = "BEP", 65 69 }; 66 70 67 71 const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) ··· 284 280 return 7; 285 281 } 286 282 283 + static int intel_pt_get_bbp(const unsigned char *buf, size_t len, 284 + struct intel_pt_pkt *packet) 285 + { 286 + if (len < 3) 287 + return INTEL_PT_NEED_MORE_BYTES; 288 + packet->type = INTEL_PT_BBP; 289 + packet->count = buf[2] >> 7; 290 + packet->payload = buf[2] & 0x1f; 291 + return 3; 292 + } 293 + 294 + static int intel_pt_get_bip_4(const unsigned char *buf, size_t len, 295 + struct intel_pt_pkt *packet) 296 + { 297 + if (len < 5) 298 + return INTEL_PT_NEED_MORE_BYTES; 299 + packet->type = INTEL_PT_BIP; 300 + packet->count = buf[0] >> 3; 301 + memcpy_le64(&packet->payload, buf + 1, 4); 302 + return 5; 303 + } 304 + 305 + static int intel_pt_get_bip_8(const unsigned char *buf, size_t len, 306 + struct intel_pt_pkt *packet) 307 + { 308 + if (len < 9) 309 + return INTEL_PT_NEED_MORE_BYTES; 310 + packet->type = INTEL_PT_BIP; 311 + packet->count = buf[0] >> 3; 312 + memcpy_le64(&packet->payload, buf + 1, 8); 313 + return 9; 314 + } 315 + 316 + static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet) 317 + { 318 + if (len < 2) 319 + return INTEL_PT_NEED_MORE_BYTES; 320 + packet->type = INTEL_PT_BEP; 321 + return 2; 322 + } 323 + 324 + static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet) 325 + { 326 + if (len < 2) 327 + return INTEL_PT_NEED_MORE_BYTES; 328 + packet->type = INTEL_PT_BEP_IP; 329 + return 2; 330 + } 331 + 287 332 static int intel_pt_get_ext(const unsigned char *buf, size_t len, 288 333 struct intel_pt_pkt *packet) 289 334 { ··· 373 320 return intel_pt_get_pwre(buf, len, packet); 374 321 case 0xA2: /* PWRX */ 375 322 return intel_pt_get_pwrx(buf, len, packet); 323 + case 0x63: /* BBP */ 324 + return intel_pt_get_bbp(buf, len, packet); 325 + case 0x33: /* BEP no IP */ 326 + return intel_pt_get_bep(len, packet); 327 + case 0xb3: /* BEP with IP */ 328 + return intel_pt_get_bep_ip(len, packet); 376 329 default: 377 330 return INTEL_PT_BAD_PACKET; 378 331 } ··· 527 468 } 528 469 529 470 static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, 530 - struct intel_pt_pkt *packet) 471 + struct intel_pt_pkt *packet, 472 + enum intel_pt_pkt_ctx ctx) 531 473 { 532 474 unsigned int byte; 533 475 ··· 538 478 return INTEL_PT_NEED_MORE_BYTES; 539 479 540 480 byte = buf[0]; 481 + 482 + switch (ctx) { 483 + case INTEL_PT_NO_CTX: 484 + break; 485 + case INTEL_PT_BLK_4_CTX: 486 + if ((byte & 0x7) == 4) 487 + return intel_pt_get_bip_4(buf, len, packet); 488 + break; 489 + case INTEL_PT_BLK_8_CTX: 490 + if ((byte & 0x7) == 4) 491 + return intel_pt_get_bip_8(buf, len, packet); 492 + break; 493 + default: 494 + break; 495 + }; 496 + 541 497 if (!(byte & BIT(0))) { 542 498 if (byte == 0) 543 499 return intel_pt_get_pad(packet); ··· 592 516 } 593 517 } 594 518 519 + void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet, 520 + enum intel_pt_pkt_ctx *ctx) 521 + { 522 + switch (packet->type) { 523 + case INTEL_PT_BAD: 524 + case INTEL_PT_PAD: 525 + case INTEL_PT_TSC: 526 + case INTEL_PT_TMA: 527 + case INTEL_PT_MTC: 528 + case INTEL_PT_FUP: 529 + case INTEL_PT_CYC: 530 + case INTEL_PT_CBR: 531 + case INTEL_PT_MNT: 532 + case INTEL_PT_EXSTOP: 533 + case INTEL_PT_EXSTOP_IP: 534 + case INTEL_PT_PWRE: 535 + case INTEL_PT_PWRX: 536 + case INTEL_PT_BIP: 537 + break; 538 + case INTEL_PT_TNT: 539 + case INTEL_PT_TIP: 540 + case INTEL_PT_TIP_PGD: 541 + case INTEL_PT_TIP_PGE: 542 + case INTEL_PT_MODE_EXEC: 543 + case INTEL_PT_MODE_TSX: 544 + case INTEL_PT_PIP: 545 + case INTEL_PT_OVF: 546 + case INTEL_PT_VMCS: 547 + case INTEL_PT_TRACESTOP: 548 + case INTEL_PT_PSB: 549 + case INTEL_PT_PSBEND: 550 + case INTEL_PT_PTWRITE: 551 + case INTEL_PT_PTWRITE_IP: 552 + case INTEL_PT_MWAIT: 553 + case INTEL_PT_BEP: 554 + case INTEL_PT_BEP_IP: 555 + *ctx = INTEL_PT_NO_CTX; 556 + break; 557 + case INTEL_PT_BBP: 558 + if (packet->count) 559 + *ctx = INTEL_PT_BLK_4_CTX; 560 + else 561 + *ctx = INTEL_PT_BLK_8_CTX; 562 + break; 563 + default: 564 + break; 565 + } 566 + } 567 + 595 568 int intel_pt_get_packet(const unsigned char *buf, size_t len, 596 - struct intel_pt_pkt *packet) 569 + struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx) 597 570 { 598 571 int ret; 599 572 600 - ret = intel_pt_do_get_packet(buf, len, packet); 573 + ret = intel_pt_do_get_packet(buf, len, packet, *ctx); 601 574 if (ret > 0) { 602 575 while (ret < 8 && len > (size_t)ret && !buf[ret]) 603 576 ret += 1; 577 + intel_pt_upd_pkt_ctx(packet, ctx); 604 578 } 605 579 return ret; 606 580 } ··· 728 602 return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload); 729 603 case INTEL_PT_PTWRITE_IP: 730 604 return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload); 605 + case INTEL_PT_BEP: 731 606 case INTEL_PT_EXSTOP: 732 607 return snprintf(buf, buf_len, "%s IP:0", name); 608 + case INTEL_PT_BEP_IP: 733 609 case INTEL_PT_EXSTOP_IP: 734 610 return snprintf(buf, buf_len, "%s IP:1", name); 735 611 case INTEL_PT_MWAIT: ··· 749 621 (unsigned int)((payload >> 4) & 0xf), 750 622 (unsigned int)(payload & 0xf), 751 623 (unsigned int)((payload >> 8) & 0xf)); 624 + case INTEL_PT_BBP: 625 + return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx", 626 + name, packet->count ? "4" : "8", payload); 627 + case INTEL_PT_BIP: 628 + return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx", 629 + name, packet->count, payload); 752 630 default: 753 631 break; 754 632 }
+20 -1
tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
··· 50 50 INTEL_PT_MWAIT, 51 51 INTEL_PT_PWRE, 52 52 INTEL_PT_PWRX, 53 + INTEL_PT_BBP, 54 + INTEL_PT_BIP, 55 + INTEL_PT_BEP, 56 + INTEL_PT_BEP_IP, 53 57 }; 54 58 55 59 struct intel_pt_pkt { ··· 62 58 uint64_t payload; 63 59 }; 64 60 61 + /* 62 + * Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP 63 + * packets only occur in the context of a block (i.e. between BBP and BEP), that 64 + * context must be recorded and passed to the packet decoder. 65 + */ 66 + enum intel_pt_pkt_ctx { 67 + INTEL_PT_NO_CTX, /* BIP packets are invalid */ 68 + INTEL_PT_BLK_4_CTX, /* 4-byte BIP packets */ 69 + INTEL_PT_BLK_8_CTX, /* 8-byte BIP packets */ 70 + }; 71 + 65 72 const char *intel_pt_pkt_name(enum intel_pt_pkt_type); 66 73 67 74 int intel_pt_get_packet(const unsigned char *buf, size_t len, 68 - struct intel_pt_pkt *packet); 75 + struct intel_pt_pkt *packet, 76 + enum intel_pt_pkt_ctx *ctx); 77 + 78 + void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet, 79 + enum intel_pt_pkt_ctx *ctx); 69 80 70 81 int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len); 71 82
+703 -59
tools/perf/util/intel-pt.c
··· 33 33 #include "tsc.h" 34 34 #include "intel-pt.h" 35 35 #include "config.h" 36 + #include "time-utils.h" 37 + 38 + #include "../arch/x86/include/uapi/asm/perf_regs.h" 36 39 37 40 #include "intel-pt-decoder/intel-pt-log.h" 38 41 #include "intel-pt-decoder/intel-pt-decoder.h" ··· 43 40 #include "intel-pt-decoder/intel-pt-pkt-decoder.h" 44 41 45 42 #define MAX_TIMESTAMP (~0ULL) 43 + 44 + struct range { 45 + u64 start; 46 + u64 end; 47 + }; 46 48 47 49 struct intel_pt { 48 50 struct auxtrace auxtrace; ··· 103 95 u64 pwrx_id; 104 96 u64 cbr_id; 105 97 98 + bool sample_pebs; 99 + struct perf_evsel *pebs_evsel; 100 + 106 101 u64 tsc_bit; 107 102 u64 mtc_bit; 108 103 u64 mtc_freq_bits; ··· 120 109 121 110 char *filter; 122 111 struct addr_filters filts; 112 + 113 + struct range *time_ranges; 114 + unsigned int range_cnt; 123 115 }; 124 116 125 117 enum switch_state { ··· 159 145 bool have_sample; 160 146 u64 time; 161 147 u64 timestamp; 148 + u64 sel_timestamp; 149 + bool sel_start; 150 + unsigned int sel_idx; 162 151 u32 flags; 163 152 u16 insn_len; 164 153 u64 last_insn_cnt; 154 + u64 ipc_insn_cnt; 155 + u64 ipc_cyc_cnt; 156 + u64 last_in_insn_cnt; 157 + u64 last_in_cyc_cnt; 158 + u64 last_br_insn_cnt; 159 + u64 last_br_cyc_cnt; 160 + unsigned int cbr_seen; 165 161 char insn[INTEL_PT_INSN_BUF_SZ]; 166 162 }; 167 163 ··· 183 159 int ret, pkt_len, i; 184 160 char desc[INTEL_PT_PKT_DESC_MAX]; 185 161 const char *color = PERF_COLOR_BLUE; 162 + enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; 186 163 187 164 color_fprintf(stdout, color, 188 165 ". ... Intel Processor Trace data: size %zu bytes\n", 189 166 len); 190 167 191 168 while (len) { 192 - ret = intel_pt_get_packet(buf, len, &packet); 169 + ret = intel_pt_get_packet(buf, len, &packet, &ctx); 193 170 if (ret > 0) 194 171 pkt_len = ret; 195 172 else ··· 249 224 return 0; 250 225 } 251 226 252 - /* This function assumes data is processed sequentially only */ 253 - static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) 227 + static int intel_pt_get_buffer(struct intel_pt_queue *ptq, 228 + struct auxtrace_buffer *buffer, 229 + struct auxtrace_buffer *old_buffer, 230 + struct intel_pt_buffer *b) 254 231 { 255 - struct intel_pt_queue *ptq = data; 256 - struct auxtrace_buffer *buffer = ptq->buffer; 257 - struct auxtrace_buffer *old_buffer = ptq->old_buffer; 258 - struct auxtrace_queue *queue; 259 232 bool might_overlap; 260 - 261 - if (ptq->stop) { 262 - b->len = 0; 263 - return 0; 264 - } 265 - 266 - queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 267 - 268 - buffer = auxtrace_buffer__next(queue, buffer); 269 - if (!buffer) { 270 - if (old_buffer) 271 - auxtrace_buffer__drop_data(old_buffer); 272 - b->len = 0; 273 - return 0; 274 - } 275 - 276 - ptq->buffer = buffer; 277 233 278 234 if (!buffer->data) { 279 235 int fd = perf_data__fd(ptq->pt->session->data); ··· 284 278 } else { 285 279 b->consecutive = true; 286 280 } 281 + 282 + return 0; 283 + } 284 + 285 + /* Do not drop buffers with references - refer intel_pt_get_trace() */ 286 + static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq, 287 + struct auxtrace_buffer *buffer) 288 + { 289 + if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer) 290 + return; 291 + 292 + auxtrace_buffer__drop_data(buffer); 293 + } 294 + 295 + /* Must be serialized with respect to intel_pt_get_trace() */ 296 + static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb, 297 + void *cb_data) 298 + { 299 + struct intel_pt_queue *ptq = data; 300 + struct auxtrace_buffer *buffer = ptq->buffer; 301 + struct auxtrace_buffer *old_buffer = ptq->old_buffer; 302 + struct auxtrace_queue *queue; 303 + int err = 0; 304 + 305 + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 306 + 307 + while (1) { 308 + struct intel_pt_buffer b = { .len = 0 }; 309 + 310 + buffer = auxtrace_buffer__next(queue, buffer); 311 + if (!buffer) 312 + break; 313 + 314 + err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b); 315 + if (err) 316 + break; 317 + 318 + if (b.len) { 319 + intel_pt_lookahead_drop_buffer(ptq, old_buffer); 320 + old_buffer = buffer; 321 + } else { 322 + intel_pt_lookahead_drop_buffer(ptq, buffer); 323 + continue; 324 + } 325 + 326 + err = cb(&b, cb_data); 327 + if (err) 328 + break; 329 + } 330 + 331 + if (buffer != old_buffer) 332 + intel_pt_lookahead_drop_buffer(ptq, buffer); 333 + intel_pt_lookahead_drop_buffer(ptq, old_buffer); 334 + 335 + return err; 336 + } 337 + 338 + /* 339 + * This function assumes data is processed sequentially only. 340 + * Must be serialized with respect to intel_pt_lookahead() 341 + */ 342 + static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) 343 + { 344 + struct intel_pt_queue *ptq = data; 345 + struct auxtrace_buffer *buffer = ptq->buffer; 346 + struct auxtrace_buffer *old_buffer = ptq->old_buffer; 347 + struct auxtrace_queue *queue; 348 + int err; 349 + 350 + if (ptq->stop) { 351 + b->len = 0; 352 + return 0; 353 + } 354 + 355 + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 356 + 357 + buffer = auxtrace_buffer__next(queue, buffer); 358 + if (!buffer) { 359 + if (old_buffer) 360 + auxtrace_buffer__drop_data(old_buffer); 361 + b->len = 0; 362 + return 0; 363 + } 364 + 365 + ptq->buffer = buffer; 366 + 367 + err = intel_pt_get_buffer(ptq, buffer, old_buffer, b); 368 + if (err) 369 + return err; 287 370 288 371 if (ptq->step_through_buffers) 289 372 ptq->stop = true; ··· 893 798 894 799 params.get_trace = intel_pt_get_trace; 895 800 params.walk_insn = intel_pt_walk_next_insn; 801 + params.lookahead = intel_pt_lookahead; 896 802 params.data = ptq; 897 803 params.return_compression = intel_pt_return_compression(pt); 898 804 params.branch_enable = intel_pt_branch_enable(pt); ··· 1017 921 ptq->flags |= PERF_IP_FLAG_TRACE_END; 1018 922 } 1019 923 924 + static void intel_pt_setup_time_range(struct intel_pt *pt, 925 + struct intel_pt_queue *ptq) 926 + { 927 + if (!pt->range_cnt) 928 + return; 929 + 930 + ptq->sel_timestamp = pt->time_ranges[0].start; 931 + ptq->sel_idx = 0; 932 + 933 + if (ptq->sel_timestamp) { 934 + ptq->sel_start = true; 935 + } else { 936 + ptq->sel_timestamp = pt->time_ranges[0].end; 937 + ptq->sel_start = false; 938 + } 939 + } 940 + 1020 941 static int intel_pt_setup_queue(struct intel_pt *pt, 1021 942 struct auxtrace_queue *queue, 1022 943 unsigned int queue_nr) ··· 1053 940 ptq->cpu = queue->cpu; 1054 941 ptq->tid = queue->tid; 1055 942 943 + ptq->cbr_seen = UINT_MAX; 944 + 1056 945 if (pt->sampling_mode && !pt->snapshot_mode && 1057 946 pt->timeless_decoding) 1058 947 ptq->step_through_buffers = true; 1059 948 1060 949 ptq->sync_switch = pt->sync_switch; 950 + 951 + intel_pt_setup_time_range(pt, ptq); 1061 952 } 1062 953 1063 954 if (!ptq->on_heap && ··· 1076 959 intel_pt_log("queue %u getting timestamp\n", queue_nr); 1077 960 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 1078 961 queue_nr, ptq->cpu, ptq->pid, ptq->tid); 962 + 963 + if (ptq->sel_start && ptq->sel_timestamp) { 964 + ret = intel_pt_fast_forward(ptq->decoder, 965 + ptq->sel_timestamp); 966 + if (ret) 967 + return ret; 968 + } 969 + 1079 970 while (1) { 1080 971 state = intel_pt_decode(ptq->decoder); 1081 972 if (state->err) { ··· 1103 978 queue_nr, ptq->timestamp); 1104 979 ptq->state = state; 1105 980 ptq->have_sample = true; 981 + if (ptq->sel_start && ptq->sel_timestamp && 982 + ptq->timestamp < ptq->sel_timestamp) 983 + ptq->have_sample = false; 1106 984 intel_pt_sample_flags(ptq); 1107 985 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); 1108 986 if (ret) ··· 1187 1059 pt->num_events++ < pt->synth_opts.initial_skip; 1188 1060 } 1189 1061 1062 + /* 1063 + * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen. 1064 + * Also ensure CBR is first non-skipped event by allowing for 4 more samples 1065 + * from this decoder state. 1066 + */ 1067 + static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt) 1068 + { 1069 + return pt->synth_opts.initial_skip && 1070 + pt->num_events + 4 < pt->synth_opts.initial_skip; 1071 + } 1072 + 1073 + static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, 1074 + union perf_event *event, 1075 + struct perf_sample *sample) 1076 + { 1077 + event->sample.header.type = PERF_RECORD_SAMPLE; 1078 + event->sample.header.size = sizeof(struct perf_event_header); 1079 + 1080 + sample->pid = ptq->pid; 1081 + sample->tid = ptq->tid; 1082 + sample->cpu = ptq->cpu; 1083 + sample->insn_len = ptq->insn_len; 1084 + memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); 1085 + } 1086 + 1190 1087 static void intel_pt_prep_b_sample(struct intel_pt *pt, 1191 1088 struct intel_pt_queue *ptq, 1192 1089 union perf_event *event, 1193 1090 struct perf_sample *sample) 1194 1091 { 1092 + intel_pt_prep_a_sample(ptq, event, sample); 1093 + 1195 1094 if (!pt->timeless_decoding) 1196 1095 sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 1197 1096 1198 1097 sample->ip = ptq->state->from_ip; 1199 1098 sample->cpumode = intel_pt_cpumode(pt, sample->ip); 1200 - sample->pid = ptq->pid; 1201 - sample->tid = ptq->tid; 1202 1099 sample->addr = ptq->state->to_ip; 1203 1100 sample->period = 1; 1204 - sample->cpu = ptq->cpu; 1205 1101 sample->flags = ptq->flags; 1206 - sample->insn_len = ptq->insn_len; 1207 - memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); 1208 1102 1209 - event->sample.header.type = PERF_RECORD_SAMPLE; 1210 1103 event->sample.header.misc = sample->cpumode; 1211 - event->sample.header.size = sizeof(struct perf_event_header); 1212 1104 } 1213 1105 1214 1106 static int intel_pt_inject_event(union perf_event *event, ··· 1301 1153 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1302 1154 } 1303 1155 1156 + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; 1157 + if (sample.cyc_cnt) { 1158 + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; 1159 + ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; 1160 + ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; 1161 + } 1162 + 1304 1163 return intel_pt_deliver_synth_b_event(pt, event, &sample, 1305 1164 pt->branches_sample_type); 1306 1165 } ··· 1362 1207 sample.id = ptq->pt->instructions_id; 1363 1208 sample.stream_id = ptq->pt->instructions_id; 1364 1209 sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; 1210 + 1211 + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; 1212 + if (sample.cyc_cnt) { 1213 + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; 1214 + ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; 1215 + ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt; 1216 + } 1365 1217 1366 1218 ptq->last_insn_cnt = ptq->state->tot_insn_cnt; 1367 1219 ··· 1443 1281 struct perf_synth_intel_cbr raw; 1444 1282 u32 flags; 1445 1283 1446 - if (intel_pt_skip_event(pt)) 1284 + if (intel_pt_skip_cbr_event(pt)) 1447 1285 return 0; 1286 + 1287 + ptq->cbr_seen = ptq->state->cbr; 1448 1288 1449 1289 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1450 1290 ··· 1565 1401 pt->pwr_events_sample_type); 1566 1402 } 1567 1403 1404 + /* 1405 + * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer 1406 + * intel_pt_add_gp_regs(). 1407 + */ 1408 + static const int pebs_gp_regs[] = { 1409 + [PERF_REG_X86_FLAGS] = 1, 1410 + [PERF_REG_X86_IP] = 2, 1411 + [PERF_REG_X86_AX] = 3, 1412 + [PERF_REG_X86_CX] = 4, 1413 + [PERF_REG_X86_DX] = 5, 1414 + [PERF_REG_X86_BX] = 6, 1415 + [PERF_REG_X86_SP] = 7, 1416 + [PERF_REG_X86_BP] = 8, 1417 + [PERF_REG_X86_SI] = 9, 1418 + [PERF_REG_X86_DI] = 10, 1419 + [PERF_REG_X86_R8] = 11, 1420 + [PERF_REG_X86_R9] = 12, 1421 + [PERF_REG_X86_R10] = 13, 1422 + [PERF_REG_X86_R11] = 14, 1423 + [PERF_REG_X86_R12] = 15, 1424 + [PERF_REG_X86_R13] = 16, 1425 + [PERF_REG_X86_R14] = 17, 1426 + [PERF_REG_X86_R15] = 18, 1427 + }; 1428 + 1429 + static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos, 1430 + const struct intel_pt_blk_items *items, 1431 + u64 regs_mask) 1432 + { 1433 + const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS]; 1434 + u32 mask = items->mask[INTEL_PT_GP_REGS_POS]; 1435 + u32 bit; 1436 + int i; 1437 + 1438 + for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) { 1439 + /* Get the PEBS gp_regs array index */ 1440 + int n = pebs_gp_regs[i] - 1; 1441 + 1442 + if (n < 0) 1443 + continue; 1444 + /* 1445 + * Add only registers that were requested (i.e. 'regs_mask') and 1446 + * that were provided (i.e. 'mask'), and update the resulting 1447 + * mask (i.e. 'intr_regs->mask') accordingly. 1448 + */ 1449 + if (mask & 1 << n && regs_mask & bit) { 1450 + intr_regs->mask |= bit; 1451 + *pos++ = gp_regs[n]; 1452 + } 1453 + } 1454 + 1455 + return pos; 1456 + } 1457 + 1458 + #ifndef PERF_REG_X86_XMM0 1459 + #define PERF_REG_X86_XMM0 32 1460 + #endif 1461 + 1462 + static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos, 1463 + const struct intel_pt_blk_items *items, 1464 + u64 regs_mask) 1465 + { 1466 + u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0); 1467 + const u64 *xmm = items->xmm; 1468 + 1469 + /* 1470 + * If there are any XMM registers, then there should be all of them. 1471 + * Nevertheless, follow the logic to add only registers that were 1472 + * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'), 1473 + * and update the resulting mask (i.e. 'intr_regs->mask') accordingly. 1474 + */ 1475 + intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0; 1476 + 1477 + for (; mask; mask >>= 1, xmm++) { 1478 + if (mask & 1) 1479 + *pos++ = *xmm; 1480 + } 1481 + } 1482 + 1483 + #define LBR_INFO_MISPRED (1ULL << 63) 1484 + #define LBR_INFO_IN_TX (1ULL << 62) 1485 + #define LBR_INFO_ABORT (1ULL << 61) 1486 + #define LBR_INFO_CYCLES 0xffff 1487 + 1488 + /* Refer kernel's intel_pmu_store_pebs_lbrs() */ 1489 + static u64 intel_pt_lbr_flags(u64 info) 1490 + { 1491 + union { 1492 + struct branch_flags flags; 1493 + u64 result; 1494 + } u = { 1495 + .flags = { 1496 + .mispred = !!(info & LBR_INFO_MISPRED), 1497 + .predicted = !(info & LBR_INFO_MISPRED), 1498 + .in_tx = !!(info & LBR_INFO_IN_TX), 1499 + .abort = !!(info & LBR_INFO_ABORT), 1500 + .cycles = info & LBR_INFO_CYCLES, 1501 + } 1502 + }; 1503 + 1504 + return u.result; 1505 + } 1506 + 1507 + static void intel_pt_add_lbrs(struct branch_stack *br_stack, 1508 + const struct intel_pt_blk_items *items) 1509 + { 1510 + u64 *to; 1511 + int i; 1512 + 1513 + br_stack->nr = 0; 1514 + 1515 + to = &br_stack->entries[0].from; 1516 + 1517 + for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) { 1518 + u32 mask = items->mask[i]; 1519 + const u64 *from = items->val[i]; 1520 + 1521 + for (; mask; mask >>= 3, from += 3) { 1522 + if ((mask & 7) == 7) { 1523 + *to++ = from[0]; 1524 + *to++ = from[1]; 1525 + *to++ = intel_pt_lbr_flags(from[2]); 1526 + br_stack->nr += 1; 1527 + } 1528 + } 1529 + } 1530 + } 1531 + 1532 + /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ 1533 + #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3) 1534 + 1535 + static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) 1536 + { 1537 + const struct intel_pt_blk_items *items = &ptq->state->items; 1538 + struct perf_sample sample = { .ip = 0, }; 1539 + union perf_event *event = ptq->event_buf; 1540 + struct intel_pt *pt = ptq->pt; 1541 + struct perf_evsel *evsel = pt->pebs_evsel; 1542 + u64 sample_type = evsel->attr.sample_type; 1543 + u64 id = evsel->id[0]; 1544 + u8 cpumode; 1545 + 1546 + if (intel_pt_skip_event(pt)) 1547 + return 0; 1548 + 1549 + intel_pt_prep_a_sample(ptq, event, &sample); 1550 + 1551 + sample.id = id; 1552 + sample.stream_id = id; 1553 + 1554 + if (!evsel->attr.freq) 1555 + sample.period = evsel->attr.sample_period; 1556 + 1557 + /* No support for non-zero CS base */ 1558 + if (items->has_ip) 1559 + sample.ip = items->ip; 1560 + else if (items->has_rip) 1561 + sample.ip = items->rip; 1562 + else 1563 + sample.ip = ptq->state->from_ip; 1564 + 1565 + /* No support for guest mode at this time */ 1566 + cpumode = sample.ip < ptq->pt->kernel_start ? 1567 + PERF_RECORD_MISC_USER : 1568 + PERF_RECORD_MISC_KERNEL; 1569 + 1570 + event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; 1571 + 1572 + sample.cpumode = cpumode; 1573 + 1574 + if (sample_type & PERF_SAMPLE_TIME) { 1575 + u64 timestamp = 0; 1576 + 1577 + if (items->has_timestamp) 1578 + timestamp = items->timestamp; 1579 + else if (!pt->timeless_decoding) 1580 + timestamp = ptq->timestamp; 1581 + if (timestamp) 1582 + sample.time = tsc_to_perf_time(timestamp, &pt->tc); 1583 + } 1584 + 1585 + if (sample_type & PERF_SAMPLE_CALLCHAIN && 1586 + pt->synth_opts.callchain) { 1587 + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, 1588 + pt->synth_opts.callchain_sz, sample.ip, 1589 + pt->kernel_start); 1590 + sample.callchain = ptq->chain; 1591 + } 1592 + 1593 + if (sample_type & PERF_SAMPLE_REGS_INTR && 1594 + items->mask[INTEL_PT_GP_REGS_POS]) { 1595 + u64 regs[sizeof(sample.intr_regs.mask)]; 1596 + u64 regs_mask = evsel->attr.sample_regs_intr; 1597 + u64 *pos; 1598 + 1599 + sample.intr_regs.abi = items->is_32_bit ? 1600 + PERF_SAMPLE_REGS_ABI_32 : 1601 + PERF_SAMPLE_REGS_ABI_64; 1602 + sample.intr_regs.regs = regs; 1603 + 1604 + pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); 1605 + 1606 + intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); 1607 + } 1608 + 1609 + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { 1610 + struct { 1611 + struct branch_stack br_stack; 1612 + struct branch_entry entries[LBRS_MAX]; 1613 + } br; 1614 + 1615 + if (items->mask[INTEL_PT_LBR_0_POS] || 1616 + items->mask[INTEL_PT_LBR_1_POS] || 1617 + items->mask[INTEL_PT_LBR_2_POS]) { 1618 + intel_pt_add_lbrs(&br.br_stack, items); 1619 + sample.branch_stack = &br.br_stack; 1620 + } else if (pt->synth_opts.last_branch) { 1621 + intel_pt_copy_last_branch_rb(ptq); 1622 + sample.branch_stack = ptq->last_branch; 1623 + } else { 1624 + br.br_stack.nr = 0; 1625 + sample.branch_stack = &br.br_stack; 1626 + } 1627 + } 1628 + 1629 + if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) 1630 + sample.addr = items->mem_access_address; 1631 + 1632 + if (sample_type & PERF_SAMPLE_WEIGHT) { 1633 + /* 1634 + * Refer kernel's setup_pebs_adaptive_sample_data() and 1635 + * intel_hsw_weight(). 1636 + */ 1637 + if (items->has_mem_access_latency) 1638 + sample.weight = items->mem_access_latency; 1639 + if (!sample.weight && items->has_tsx_aux_info) { 1640 + /* Cycles last block */ 1641 + sample.weight = (u32)items->tsx_aux_info; 1642 + } 1643 + } 1644 + 1645 + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { 1646 + u64 ax = items->has_rax ? items->rax : 0; 1647 + /* Refer kernel's intel_hsw_transaction() */ 1648 + u64 txn = (u8)(items->tsx_aux_info >> 32); 1649 + 1650 + /* For RTM XABORTs also log the abort code from AX */ 1651 + if (txn & PERF_TXN_TRANSACTION && ax & 1) 1652 + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; 1653 + sample.transaction = txn; 1654 + } 1655 + 1656 + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); 1657 + } 1658 + 1568 1659 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 1569 1660 pid_t pid, pid_t tid, u64 ip, u64 timestamp) 1570 1661 { ··· 1884 1465 } 1885 1466 1886 1467 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ 1887 - INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ 1888 - INTEL_PT_CBR_CHG) 1468 + INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT) 1889 1469 1890 1470 static int intel_pt_sample(struct intel_pt_queue *ptq) 1891 1471 { ··· 1897 1479 1898 1480 ptq->have_sample = false; 1899 1481 1900 - if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { 1901 - if (state->type & INTEL_PT_CBR_CHG) { 1482 + if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) { 1483 + /* 1484 + * Cycle count and instruction count only go together to create 1485 + * a valid IPC ratio when the cycle count changes. 1486 + */ 1487 + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; 1488 + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; 1489 + } 1490 + 1491 + /* 1492 + * Do PEBS first to allow for the possibility that the PEBS timestamp 1493 + * precedes the current timestamp. 1494 + */ 1495 + if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) { 1496 + err = intel_pt_synth_pebs_sample(ptq); 1497 + if (err) 1498 + return err; 1499 + } 1500 + 1501 + if (pt->sample_pwr_events) { 1502 + if (ptq->state->cbr != ptq->cbr_seen) { 1902 1503 err = intel_pt_synth_cbr_sample(ptq); 1903 1504 if (err) 1904 1505 return err; 1905 1506 } 1906 - if (state->type & INTEL_PT_MWAIT_OP) { 1907 - err = intel_pt_synth_mwait_sample(ptq); 1908 - if (err) 1909 - return err; 1910 - } 1911 - if (state->type & INTEL_PT_PWR_ENTRY) { 1912 - err = intel_pt_synth_pwre_sample(ptq); 1913 - if (err) 1914 - return err; 1915 - } 1916 - if (state->type & INTEL_PT_EX_STOP) { 1917 - err = intel_pt_synth_exstop_sample(ptq); 1918 - if (err) 1919 - return err; 1920 - } 1921 - if (state->type & INTEL_PT_PWR_EXIT) { 1922 - err = intel_pt_synth_pwrx_sample(ptq); 1923 - if (err) 1924 - return err; 1507 + if (state->type & INTEL_PT_PWR_EVT) { 1508 + if (state->type & INTEL_PT_MWAIT_OP) { 1509 + err = intel_pt_synth_mwait_sample(ptq); 1510 + if (err) 1511 + return err; 1512 + } 1513 + if (state->type & INTEL_PT_PWR_ENTRY) { 1514 + err = intel_pt_synth_pwre_sample(ptq); 1515 + if (err) 1516 + return err; 1517 + } 1518 + if (state->type & INTEL_PT_EX_STOP) { 1519 + err = intel_pt_synth_exstop_sample(ptq); 1520 + if (err) 1521 + return err; 1522 + } 1523 + if (state->type & INTEL_PT_PWR_EXIT) { 1524 + err = intel_pt_synth_pwrx_sample(ptq); 1525 + if (err) 1526 + return err; 1527 + } 1925 1528 } 1926 1529 } 1927 1530 ··· 2080 1641 } 2081 1642 } 2082 1643 1644 + /* 1645 + * To filter against time ranges, it is only necessary to look at the next start 1646 + * or end time. 1647 + */ 1648 + static bool intel_pt_next_time(struct intel_pt_queue *ptq) 1649 + { 1650 + struct intel_pt *pt = ptq->pt; 1651 + 1652 + if (ptq->sel_start) { 1653 + /* Next time is an end time */ 1654 + ptq->sel_start = false; 1655 + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; 1656 + return true; 1657 + } else if (ptq->sel_idx + 1 < pt->range_cnt) { 1658 + /* Next time is a start time */ 1659 + ptq->sel_start = true; 1660 + ptq->sel_idx += 1; 1661 + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start; 1662 + return true; 1663 + } 1664 + 1665 + /* No next time */ 1666 + return false; 1667 + } 1668 + 1669 + static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp) 1670 + { 1671 + int err; 1672 + 1673 + while (1) { 1674 + if (ptq->sel_start) { 1675 + if (ptq->timestamp >= ptq->sel_timestamp) { 1676 + /* After start time, so consider next time */ 1677 + intel_pt_next_time(ptq); 1678 + if (!ptq->sel_timestamp) { 1679 + /* No end time */ 1680 + return 0; 1681 + } 1682 + /* Check against end time */ 1683 + continue; 1684 + } 1685 + /* Before start time, so fast forward */ 1686 + ptq->have_sample = false; 1687 + if (ptq->sel_timestamp > *ff_timestamp) { 1688 + if (ptq->sync_switch) { 1689 + intel_pt_next_tid(ptq->pt, ptq); 1690 + ptq->switch_state = INTEL_PT_SS_UNKNOWN; 1691 + } 1692 + *ff_timestamp = ptq->sel_timestamp; 1693 + err = intel_pt_fast_forward(ptq->decoder, 1694 + ptq->sel_timestamp); 1695 + if (err) 1696 + return err; 1697 + } 1698 + return 0; 1699 + } else if (ptq->timestamp > ptq->sel_timestamp) { 1700 + /* After end time, so consider next time */ 1701 + if (!intel_pt_next_time(ptq)) { 1702 + /* No next time range, so stop decoding */ 1703 + ptq->have_sample = false; 1704 + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; 1705 + return 1; 1706 + } 1707 + /* Check against next start time */ 1708 + continue; 1709 + } else { 1710 + /* Before end time */ 1711 + return 0; 1712 + } 1713 + } 1714 + } 1715 + 2083 1716 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) 2084 1717 { 2085 1718 const struct intel_pt_state *state = ptq->state; 2086 1719 struct intel_pt *pt = ptq->pt; 1720 + u64 ff_timestamp = 0; 2087 1721 int err; 2088 1722 2089 1723 if (!pt->kernel_start) { ··· 2219 1707 ptq->timestamp = state->est_timestamp; 2220 1708 } else if (state->timestamp > ptq->timestamp) { 2221 1709 ptq->timestamp = state->timestamp; 1710 + } 1711 + 1712 + if (ptq->sel_timestamp) { 1713 + err = intel_pt_time_filter(ptq, &ff_timestamp); 1714 + if (err) 1715 + return err; 2222 1716 } 2223 1717 2224 1718 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { ··· 2368 1850 2369 1851 switch (ptq->switch_state) { 2370 1852 case INTEL_PT_SS_NOT_TRACING: 2371 - ptq->next_tid = -1; 2372 1853 break; 2373 1854 case INTEL_PT_SS_UNKNOWN: 2374 1855 case INTEL_PT_SS_TRACING: ··· 2387 1870 ptq->switch_state = INTEL_PT_SS_TRACING; 2388 1871 break; 2389 1872 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 2390 - ptq->next_tid = tid; 2391 1873 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); 2392 1874 break; 2393 1875 default: 2394 1876 break; 2395 1877 } 1878 + 1879 + ptq->next_tid = -1; 2396 1880 2397 1881 return 1; 2398 1882 } ··· 2423 1905 return machine__set_current_tid(pt->machine, cpu, -1, tid); 2424 1906 } 2425 1907 1908 + static int intel_pt_context_switch_in(struct intel_pt *pt, 1909 + struct perf_sample *sample) 1910 + { 1911 + pid_t pid = sample->pid; 1912 + pid_t tid = sample->tid; 1913 + int cpu = sample->cpu; 1914 + 1915 + if (pt->sync_switch) { 1916 + struct intel_pt_queue *ptq; 1917 + 1918 + ptq = intel_pt_cpu_to_ptq(pt, cpu); 1919 + if (ptq && ptq->sync_switch) { 1920 + ptq->next_tid = -1; 1921 + switch (ptq->switch_state) { 1922 + case INTEL_PT_SS_NOT_TRACING: 1923 + case INTEL_PT_SS_UNKNOWN: 1924 + case INTEL_PT_SS_TRACING: 1925 + break; 1926 + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: 1927 + case INTEL_PT_SS_EXPECTING_SWITCH_IP: 1928 + ptq->switch_state = INTEL_PT_SS_TRACING; 1929 + break; 1930 + default: 1931 + break; 1932 + } 1933 + } 1934 + } 1935 + 1936 + /* 1937 + * If the current tid has not been updated yet, ensure it is now that 1938 + * a "switch in" event has occurred. 1939 + */ 1940 + if (machine__get_current_tid(pt->machine, cpu) == tid) 1941 + return 0; 1942 + 1943 + return machine__set_current_tid(pt->machine, cpu, pid, tid); 1944 + } 1945 + 2426 1946 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, 2427 1947 struct perf_sample *sample) 2428 1948 { ··· 2472 1916 2473 1917 if (pt->have_sched_switch == 3) { 2474 1918 if (!out) 2475 - return 0; 1919 + return intel_pt_context_switch_in(pt, sample); 2476 1920 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { 2477 1921 pr_err("Expecting CPU-wide context switch event\n"); 2478 1922 return -EINVAL; ··· 2632 2076 thread__put(pt->unknown_thread); 2633 2077 addr_filters__exit(&pt->filts); 2634 2078 zfree(&pt->filter); 2079 + zfree(&pt->time_ranges); 2635 2080 free(pt); 2636 2081 } 2637 2082 ··· 2930 2373 return 0; 2931 2374 } 2932 2375 2376 + /* Find least TSC which converts to ns or later */ 2377 + static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt) 2378 + { 2379 + u64 tsc, tm; 2380 + 2381 + tsc = perf_time_to_tsc(ns, &pt->tc); 2382 + 2383 + while (1) { 2384 + tm = tsc_to_perf_time(tsc, &pt->tc); 2385 + if (tm < ns) 2386 + break; 2387 + tsc -= 1; 2388 + } 2389 + 2390 + while (tm < ns) 2391 + tm = tsc_to_perf_time(++tsc, &pt->tc); 2392 + 2393 + return tsc; 2394 + } 2395 + 2396 + /* Find greatest TSC which converts to ns or earlier */ 2397 + static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt) 2398 + { 2399 + u64 tsc, tm; 2400 + 2401 + tsc = perf_time_to_tsc(ns, &pt->tc); 2402 + 2403 + while (1) { 2404 + tm = tsc_to_perf_time(tsc, &pt->tc); 2405 + if (tm > ns) 2406 + break; 2407 + tsc += 1; 2408 + } 2409 + 2410 + while (tm > ns) 2411 + tm = tsc_to_perf_time(--tsc, &pt->tc); 2412 + 2413 + return tsc; 2414 + } 2415 + 2416 + static int intel_pt_setup_time_ranges(struct intel_pt *pt, 2417 + struct itrace_synth_opts *opts) 2418 + { 2419 + struct perf_time_interval *p = opts->ptime_range; 2420 + int n = opts->range_num; 2421 + int i; 2422 + 2423 + if (!n || !p || pt->timeless_decoding) 2424 + return 0; 2425 + 2426 + pt->time_ranges = calloc(n, sizeof(struct range)); 2427 + if (!pt->time_ranges) 2428 + return -ENOMEM; 2429 + 2430 + pt->range_cnt = n; 2431 + 2432 + intel_pt_log("%s: %u range(s)\n", __func__, n); 2433 + 2434 + for (i = 0; i < n; i++) { 2435 + struct range *r = &pt->time_ranges[i]; 2436 + u64 ts = p[i].start; 2437 + u64 te = p[i].end; 2438 + 2439 + /* 2440 + * Take care to ensure the TSC range matches the perf-time range 2441 + * when converted back to perf-time. 2442 + */ 2443 + r->start = ts ? intel_pt_tsc_start(ts, pt) : 0; 2444 + r->end = te ? intel_pt_tsc_end(te, pt) : 0; 2445 + 2446 + intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n", 2447 + i, ts, te); 2448 + intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n", 2449 + i, r->start, r->end); 2450 + } 2451 + 2452 + return 0; 2453 + } 2454 + 2933 2455 static const char * const intel_pt_info_fmts[] = { 2934 2456 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", 2935 2457 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", ··· 3215 2579 } else { 3216 2580 itrace_synth_opts__set_default(&pt->synth_opts, 3217 2581 session->itrace_synth_opts->default_no_sample); 3218 - if (use_browser != -1) { 2582 + if (!session->itrace_synth_opts->default_no_sample && 2583 + !session->itrace_synth_opts->inject) { 3219 2584 pt->synth_opts.branches = false; 3220 2585 pt->synth_opts.callchain = true; 3221 2586 } ··· 3239 2602 intel_pt_log("Maximum non-turbo ratio %u\n", 3240 2603 pt->max_non_turbo_ratio); 3241 2604 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; 2605 + } 2606 + 2607 + if (session->itrace_synth_opts) { 2608 + err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts); 2609 + if (err) 2610 + goto err_delete_thread; 3242 2611 } 3243 2612 3244 2613 if (pt->synth_opts.calls) ··· 3287 2644 err_free: 3288 2645 addr_filters__exit(&pt->filts); 3289 2646 zfree(&pt->filter); 2647 + zfree(&pt->time_ranges); 3290 2648 free(pt); 3291 2649 return err; 3292 2650 }
+1 -1
tools/perf/util/jitdump.c
··· 28 28 #include "genelf.h" 29 29 #include "../builtin.h" 30 30 31 - #include "sane_ctype.h" 31 + #include <linux/ctype.h> 32 32 33 33 struct jit_buf_desc { 34 34 struct perf_data *output;
+29 -7
tools/perf/util/machine.c
··· 15 15 #include "strlist.h" 16 16 #include "thread.h" 17 17 #include "vdso.h" 18 + #include "util.h" 18 19 #include <stdbool.h> 19 20 #include <sys/types.h> 20 21 #include <sys/stat.h> ··· 25 24 #include "asm/bug.h" 26 25 #include "bpf-event.h" 27 26 28 - #include "sane_ctype.h" 27 + #include <linux/ctype.h> 29 28 #include <symbol/kallsyms.h> 30 29 #include <linux/mman.h> 31 30 ··· 210 209 211 210 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 212 211 struct threads *threads = &machine->threads[i]; 212 + struct thread *thread, *n; 213 + /* 214 + * Forget about the dead, at this point whatever threads were 215 + * left in the dead lists better have a reference count taken 216 + * by who is using them, and then, when they drop those references 217 + * and it finally hits zero, thread__put() will check and see that 218 + * its not in the dead threads list and will not try to remove it 219 + * from there, just calling thread__delete() straight away. 220 + */ 221 + list_for_each_entry_safe(thread, n, &threads->dead, node) 222 + list_del_init(&thread->node); 223 + 213 224 exit_rwsem(&threads->lock); 214 225 } 215 226 } ··· 717 704 return -ENOMEM; 718 705 719 706 map->start = event->ksymbol_event.addr; 720 - map->pgoff = map->start; 721 707 map->end = map->start + event->ksymbol_event.len; 722 708 map_groups__insert(&machine->kmaps, map); 723 709 } 724 710 725 - sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len, 711 + sym = symbol__new(map->map_ip(map, map->start), 712 + event->ksymbol_event.len, 726 713 0, 0, event->ksymbol_event.name); 727 714 if (!sym) 728 715 return -ENOMEM; ··· 1254 1241 return NULL; 1255 1242 1256 1243 tmp = fgets(version, sizeof(version), file); 1257 - if (!tmp) 1258 - *version = '\0'; 1259 1244 fclose(file); 1245 + if (!tmp) 1246 + return NULL; 1260 1247 1261 1248 name = strstr(version, prefix); 1262 1249 if (!name) ··· 1771 1758 if (threads->last_match == th) 1772 1759 threads__set_last_match(threads, NULL); 1773 1760 1774 - BUG_ON(refcount_read(&th->refcnt) == 0); 1775 1761 if (lock) 1776 1762 down_write(&threads->lock); 1763 + 1764 + BUG_ON(refcount_read(&th->refcnt) == 0); 1765 + 1777 1766 rb_erase_cached(&th->rb_node, &threads->entries); 1778 1767 RB_CLEAR_NODE(&th->rb_node); 1779 1768 --threads->nr; ··· 1785 1770 * will be called and we will remove it from the dead_threads list. 1786 1771 */ 1787 1772 list_add_tail(&th->node, &threads->dead); 1773 + 1774 + /* 1775 + * We need to do the put here because if this is the last refcount, 1776 + * then we will be touching the threads->dead head when removing the 1777 + * thread. 1778 + */ 1779 + thread__put(th); 1780 + 1788 1781 if (lock) 1789 1782 up_write(&threads->lock); 1790 - thread__put(th); 1791 1783 } 1792 1784 1793 1785 void machine__remove_thread(struct machine *machine, struct thread *th)
+6
tools/perf/util/map.c
··· 405 405 406 406 size_t map__fprintf_dsoname(struct map *map, FILE *fp) 407 407 { 408 + char buf[symbol_conf.pad_output_len_dso + 1]; 408 409 const char *dsoname = "[unknown]"; 409 410 410 411 if (map && map->dso) { ··· 413 412 dsoname = map->dso->long_name; 414 413 else 415 414 dsoname = map->dso->name; 415 + } 416 + 417 + if (symbol_conf.pad_output_len_dso) { 418 + scnprintf_pad(buf, symbol_conf.pad_output_len_dso, "%s", dsoname); 419 + dsoname = buf; 416 420 } 417 421 418 422 return fprintf(fp, "%s", dsoname);
+2
tools/perf/util/map_groups.h
··· 88 88 89 89 struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); 90 90 91 + int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map); 92 + 91 93 #endif // __PERF_MAP_GROUPS_H
+55 -18
tools/perf/util/metricgroup.c
··· 17 17 #include "pmu-events/pmu-events.h" 18 18 #include "strlist.h" 19 19 #include <assert.h> 20 - #include <ctype.h> 20 + #include <linux/ctype.h> 21 21 22 22 struct metric_event *metricgroup__lookup(struct rblist *metric_events, 23 23 struct perf_evsel *evsel, ··· 85 85 const char *metric_expr; 86 86 }; 87 87 88 - static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist, 89 - const char **ids, 90 - int idnum, 91 - struct perf_evsel **metric_events) 88 + static bool record_evsel(int *ind, struct perf_evsel **start, 89 + int idnum, 90 + struct perf_evsel **metric_events, 91 + struct perf_evsel *ev) 92 + { 93 + metric_events[*ind] = ev; 94 + if (*ind == 0) 95 + *start = ev; 96 + if (++*ind == idnum) { 97 + metric_events[*ind] = NULL; 98 + return true; 99 + } 100 + return false; 101 + } 102 + 103 + static struct perf_evsel *find_evsel_group(struct perf_evlist *perf_evlist, 104 + const char **ids, 105 + int idnum, 106 + struct perf_evsel **metric_events) 92 107 { 93 108 struct perf_evsel *ev, *start = NULL; 94 109 int ind = 0; 95 110 96 111 evlist__for_each_entry (perf_evlist, ev) { 112 + if (ev->collect_stat) 113 + continue; 97 114 if (!strcmp(ev->name, ids[ind])) { 98 - metric_events[ind] = ev; 99 - if (ind == 0) 100 - start = ev; 101 - if (++ind == idnum) { 102 - metric_events[ind] = NULL; 115 + if (record_evsel(&ind, &start, idnum, 116 + metric_events, ev)) 103 117 return start; 104 - } 105 118 } else { 119 + /* 120 + * We saw some other event that is not 121 + * in our list of events. Discard 122 + * the whole match and start again. 123 + */ 106 124 ind = 0; 107 125 start = NULL; 126 + if (!strcmp(ev->name, ids[ind])) { 127 + if (record_evsel(&ind, &start, idnum, 128 + metric_events, ev)) 129 + return start; 130 + } 108 131 } 109 132 } 110 133 /* ··· 157 134 ret = -ENOMEM; 158 135 break; 159 136 } 160 - evsel = find_evsel(perf_evlist, eg->ids, eg->idnum, 161 - metric_events); 137 + evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum, 138 + metric_events); 162 139 if (!evsel) { 163 140 pr_debug("Cannot resolve %s: %s\n", 164 141 eg->metric_name, eg->metric_expr); ··· 331 308 struct mep *me; 332 309 char *s; 333 310 311 + g = skip_spaces(g); 334 312 if (*g == 0) 335 313 g = "No_group"; 336 - while (isspace(*g)) 337 - g++; 338 314 if (filter && !strstr(g, filter)) 339 315 continue; 340 316 if (raw) ··· 375 353 struct mep *me = container_of(node, struct mep, nd); 376 354 377 355 if (metricgroups) 378 - printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n"); 356 + printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n"); 379 357 if (metrics) 380 358 metricgroup__print_strlist(me->metrics, raw); 381 359 next = rb_next(node); ··· 409 387 const char **ids; 410 388 int idnum; 411 389 struct egroup *eg; 390 + bool no_group = false; 412 391 413 392 pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); 414 393 ··· 420 397 strbuf_addf(events, ","); 421 398 for (j = 0; j < idnum; j++) { 422 399 pr_debug("found event %s\n", ids[j]); 400 + /* 401 + * Duration time maps to a software event and can make 402 + * groups not count. Always use it outside a 403 + * group. 404 + */ 405 + if (!strcmp(ids[j], "duration_time")) { 406 + if (j > 0) 407 + strbuf_addf(events, "}:W,"); 408 + strbuf_addf(events, "duration_time"); 409 + no_group = true; 410 + continue; 411 + } 423 412 strbuf_addf(events, "%s%s", 424 - j == 0 ? "{" : ",", 413 + j == 0 || no_group ? "{" : ",", 425 414 ids[j]); 415 + no_group = false; 426 416 } 427 - strbuf_addf(events, "}:W"); 417 + if (!no_group) 418 + strbuf_addf(events, "}:W"); 428 419 429 420 eg = malloc(sizeof(struct egroup)); 430 421 if (!eg) {
+4
tools/perf/util/perf_regs.h
··· 29 29 #ifdef HAVE_PERF_REGS_SUPPORT 30 30 #include <perf_regs.h> 31 31 32 + #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) 33 + 32 34 int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); 33 35 34 36 #else 35 37 #define PERF_REGS_MASK 0 36 38 #define PERF_REGS_MAX 0 39 + 40 + #define DWARF_MINIMAL_REGS PERF_REGS_MASK 37 41 38 42 static inline const char *perf_reg_name(int id __maybe_unused) 39 43 {
+51 -18
tools/perf/util/pmu.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/list.h> 3 3 #include <linux/compiler.h> 4 + #include <linux/string.h> 4 5 #include <sys/types.h> 5 6 #include <errno.h> 6 7 #include <fcntl.h> ··· 395 394 buf[ret] = 0; 396 395 397 396 /* Remove trailing newline from sysfs file */ 398 - rtrim(buf); 397 + strim(buf); 399 398 400 399 return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, 401 400 NULL, NULL, NULL); ··· 701 700 return map; 702 701 } 703 702 703 + static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) 704 + { 705 + char *tmp = NULL, *tok, *str; 706 + bool res; 707 + 708 + str = strdup(pmu_name); 709 + if (!str) 710 + return false; 711 + 712 + /* 713 + * uncore alias may be from different PMU with common prefix 714 + */ 715 + tok = strtok_r(str, ",", &tmp); 716 + if (strncmp(pmu_name, tok, strlen(tok))) { 717 + res = false; 718 + goto out; 719 + } 720 + 721 + /* 722 + * Match more complex aliases where the alias name is a comma-delimited 723 + * list of tokens, orderly contained in the matching PMU name. 724 + * 725 + * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we 726 + * match "socket" in "socketX_pmunameY" and then "pmuname" in 727 + * "pmunameY". 728 + */ 729 + for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { 730 + name = strstr(name, tok); 731 + if (!name) { 732 + res = false; 733 + goto out; 734 + } 735 + } 736 + 737 + res = true; 738 + out: 739 + free(str); 740 + return res; 741 + } 742 + 704 743 /* 705 744 * From the pmu_events_map, find the table of PMU events that corresponds 706 745 * to the current running CPU. Then, add all PMU events from that table ··· 750 709 { 751 710 int i; 752 711 struct pmu_events_map *map; 753 - struct pmu_event *pe; 754 712 const char *name = pmu->name; 755 - const char *pname; 756 713 757 714 map = perf_pmu__find_map(pmu); 758 715 if (!map) ··· 761 722 */ 762 723 i = 0; 763 724 while (1) { 725 + const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu"; 726 + struct pmu_event *pe = &map->table[i++]; 727 + const char *pname = pe->pmu ? pe->pmu : cpu_name; 764 728 765 - pe = &map->table[i++]; 766 729 if (!pe->name) { 767 730 if (pe->metric_group || pe->metric_name) 768 731 continue; 769 732 break; 770 733 } 771 734 772 - if (!is_arm_pmu_core(name)) { 773 - pname = pe->pmu ? pe->pmu : "cpu"; 735 + if (pmu_is_uncore(name) && 736 + pmu_uncore_alias_match(pname, name)) 737 + goto new_alias; 774 738 775 - /* 776 - * uncore alias may be from different PMU 777 - * with common prefix 778 - */ 779 - if (pmu_is_uncore(name) && 780 - !strncmp(pname, name, strlen(pname))) 781 - goto new_alias; 782 - 783 - if (strcmp(pname, name)) 784 - continue; 785 - } 739 + if (strcmp(pname, name)) 740 + continue; 786 741 787 742 new_alias: 788 743 /* need type casts to override 'const' */ ··· 1376 1343 break; 1377 1344 s += wlen; 1378 1345 column += n; 1379 - s = ltrim(s); 1346 + s = skip_spaces(s); 1380 1347 } 1381 1348 } 1382 1349
+1 -1
tools/perf/util/print_binary.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include "print_binary.h" 3 3 #include <linux/log2.h> 4 - #include "sane_ctype.h" 4 + #include <linux/ctype.h> 5 5 6 6 int binary__fprintf(unsigned char *data, size_t len, 7 7 size_t bytes_per_line, binary__fprintf_t printer,
+1 -1
tools/perf/util/probe-event.c
··· 39 39 #include "session.h" 40 40 #include "string2.h" 41 41 42 - #include "sane_ctype.h" 42 + #include <linux/ctype.h> 43 43 44 44 #define PERFPROBE_GROUP "probe" 45 45
+1 -1
tools/perf/util/probe-finder.h
··· 5 5 #include <stdbool.h> 6 6 #include "intlist.h" 7 7 #include "probe-event.h" 8 - #include "sane_ctype.h" 8 + #include <linux/ctype.h> 9 9 10 10 #define MAX_PROBE_BUFFER 1024 11 11 #define MAX_PROBES 128
+2 -1
tools/perf/util/python-ext-sources
··· 6 6 # 7 7 8 8 util/python.c 9 - util/ctype.c 9 + ../lib/ctype.c 10 10 util/evlist.c 11 11 util/evsel.c 12 12 util/cpumap.c ··· 16 16 ../lib/bitmap.c 17 17 ../lib/find_bit.c 18 18 ../lib/hweight.c 19 + ../lib/string.c 19 20 ../lib/vsprintf.c 20 21 util/thread_map.c 21 22 util/util.c
+1
tools/perf/util/python.c
··· 12 12 #include "print_binary.h" 13 13 #include "thread_map.h" 14 14 #include "mmap.h" 15 + #include "util.h" 15 16 16 17 #if PY_MAJOR_VERSION < 3 17 18 #define _PyUnicode_FromString(arg) \
+78 -18
tools/perf/util/s390-cpumsf.c
··· 17 17 * see Documentation/perf.data-file-format.txt. 18 18 * PERF_RECORD_AUXTRACE_INFO: 19 19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This 20 - * record is generated during 'perf record' command. Each record contains up 21 - * to 256 entries describing offset and size of the AUXTRACE data in the 20 + * record is generated during 'perf record' command. Each record contains 21 + * up to 256 entries describing offset and size of the AUXTRACE data in the 22 22 * perf.data file. 23 23 * PERF_RECORD_AUXTRACE_ERROR: 24 24 * Indicates an error during AUXTRACE collection such as buffer overflow. ··· 237 237 return rc; 238 238 } 239 239 240 - /* Display s390 CPU measurement facility basic-sampling data entry */ 240 + /* Display s390 CPU measurement facility basic-sampling data entry 241 + * Data written on s390 in big endian byte order and contains bit 242 + * fields across byte boundaries. 243 + */ 241 244 static bool s390_cpumsf_basic_show(const char *color, size_t pos, 242 - struct hws_basic_entry *basic) 245 + struct hws_basic_entry *basicp) 243 246 { 247 + struct hws_basic_entry *basic = basicp; 248 + #if __BYTE_ORDER == __LITTLE_ENDIAN 249 + struct hws_basic_entry local; 250 + unsigned long long word = be64toh(*(unsigned long long *)basicp); 251 + 252 + memset(&local, 0, sizeof(local)); 253 + local.def = be16toh(basicp->def); 254 + local.prim_asn = word & 0xffff; 255 + local.CL = word >> 30 & 0x3; 256 + local.I = word >> 32 & 0x1; 257 + local.AS = word >> 33 & 0x3; 258 + local.P = word >> 35 & 0x1; 259 + local.W = word >> 36 & 0x1; 260 + local.T = word >> 37 & 0x1; 261 + local.U = word >> 40 & 0xf; 262 + local.ia = be64toh(basicp->ia); 263 + local.gpp = be64toh(basicp->gpp); 264 + local.hpp = be64toh(basicp->hpp); 265 + basic = &local; 266 + #endif 244 267 if (basic->def != 1) { 245 268 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); 246 269 return false; ··· 281 258 return true; 282 259 } 283 260 284 - /* Display s390 CPU measurement facility diagnostic-sampling data entry */ 261 + /* Display s390 CPU measurement facility diagnostic-sampling data entry. 262 + * Data written on s390 in big endian byte order and contains bit 263 + * fields across byte boundaries. 264 + */ 285 265 static bool s390_cpumsf_diag_show(const char *color, size_t pos, 286 - struct hws_diag_entry *diag) 266 + struct hws_diag_entry *diagp) 287 267 { 268 + struct hws_diag_entry *diag = diagp; 269 + #if __BYTE_ORDER == __LITTLE_ENDIAN 270 + struct hws_diag_entry local; 271 + unsigned long long word = be64toh(*(unsigned long long *)diagp); 272 + 273 + local.def = be16toh(diagp->def); 274 + local.I = word >> 32 & 0x1; 275 + diag = &local; 276 + #endif 288 277 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { 289 278 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); 290 279 return false; ··· 307 272 } 308 273 309 274 /* Return TOD timestamp contained in an trailer entry */ 310 - static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) 275 + static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, 276 + int idx) 311 277 { 312 278 /* te->t set: TOD in STCKE format, bytes 8-15 313 279 * to->t not set: TOD in STCK format, bytes 0-7 314 280 */ 315 281 unsigned long long ts; 316 282 317 - memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); 318 - return ts; 283 + memcpy(&ts, &te->timestamp[idx], sizeof(ts)); 284 + return be64toh(ts); 319 285 } 320 286 321 287 /* Display s390 CPU measurement facility trailer entry */ 322 288 static bool s390_cpumsf_trailer_show(const char *color, size_t pos, 323 289 struct hws_trailer_entry *te) 324 290 { 291 + #if __BYTE_ORDER == __LITTLE_ENDIAN 292 + struct hws_trailer_entry local; 293 + const unsigned long long flags = be64toh(te->flags); 294 + 295 + memset(&local, 0, sizeof(local)); 296 + local.f = flags >> 63 & 0x1; 297 + local.a = flags >> 62 & 0x1; 298 + local.t = flags >> 61 & 0x1; 299 + local.bsdes = be16toh((flags >> 16 & 0xffff)); 300 + local.dsdes = be16toh((flags & 0xffff)); 301 + memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp)); 302 + local.overflow = be64toh(te->overflow); 303 + local.clock_base = be64toh(te->progusage[0]) >> 63 & 1; 304 + local.progusage2 = be64toh(te->progusage2); 305 + te = &local; 306 + #endif 325 307 if (te->bsdes != sizeof(struct hws_basic_entry)) { 326 308 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); 327 309 return false; 328 310 } 329 311 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" 330 312 " dsdes:%d Overflow:%lld Time:%#llx\n" 331 - "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", 313 + "\t\tC:%d TOD:%#lx\n", 332 314 pos, 333 315 te->f ? 'F' : ' ', 334 316 te->a ? 'A' : ' ', 335 317 te->t ? 'T' : ' ', 336 318 te->bsdes, te->dsdes, te->overflow, 337 - trailer_timestamp(te), te->clock_base, te->progusage2, 338 - te->progusage[0], te->progusage[1]); 319 + trailer_timestamp(te, te->clock_base), 320 + te->clock_base, te->progusage2); 339 321 return true; 340 322 } 341 323 ··· 379 327 *dsdes = *bsdes = 0; 380 328 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ 381 329 return false; 382 - if (basic->def != 1) /* No basic set entry, must be first */ 330 + if (be16toh(basic->def) != 1) /* No basic set entry, must be first */ 383 331 return false; 384 332 /* Check for trailer entry at end of SDB */ 385 333 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 386 334 - sizeof(*te)); 387 - *bsdes = te->bsdes; 388 - *dsdes = te->dsdes; 335 + *bsdes = be16toh(te->bsdes); 336 + *dsdes = be16toh(te->dsdes); 389 337 if (!te->bsdes && !te->dsdes) { 390 338 /* Very old hardware, use CPUID */ 391 339 switch (machine_type) { ··· 547 495 static unsigned long long get_trailer_time(const unsigned char *buf) 548 496 { 549 497 struct hws_trailer_entry *te; 550 - unsigned long long aux_time; 498 + unsigned long long aux_time, progusage2; 499 + bool clock_base; 551 500 552 501 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 553 502 - sizeof(*te)); 554 503 555 - if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ 504 + #if __BYTE_ORDER == __LITTLE_ENDIAN 505 + clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; 506 + progusage2 = be64toh(te->progusage[1]); 507 + #else 508 + clock_base = te->clock_base; 509 + progusage2 = te->progusage2; 510 + #endif 511 + if (!clock_base) /* TOD_CLOCK_BASE value missing */ 556 512 return 0; 557 513 558 514 /* Correct calculation to convert time stamp in trailer entry to 559 515 * nano seconds (taken from arch/s390 function tod_to_ns()). 560 516 * TOD_CLOCK_BASE is stored in trailer entry member progusage2. 561 517 */ 562 - aux_time = trailer_timestamp(te) - te->progusage2; 518 + aux_time = trailer_timestamp(te, clock_base) - progusage2; 563 519 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); 564 520 return aux_time; 565 521 }
-52
tools/perf/util/sane_ctype.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _PERF_SANE_CTYPE_H 3 - #define _PERF_SANE_CTYPE_H 4 - 5 - extern const char *graph_line; 6 - extern const char *graph_dotted_line; 7 - extern const char *spaces; 8 - extern const char *dots; 9 - 10 - /* Sane ctype - no locale, and works with signed chars */ 11 - #undef isascii 12 - #undef isspace 13 - #undef isdigit 14 - #undef isxdigit 15 - #undef isalpha 16 - #undef isprint 17 - #undef isalnum 18 - #undef islower 19 - #undef isupper 20 - #undef tolower 21 - #undef toupper 22 - 23 - extern unsigned char sane_ctype[256]; 24 - #define GIT_SPACE 0x01 25 - #define GIT_DIGIT 0x02 26 - #define GIT_ALPHA 0x04 27 - #define GIT_GLOB_SPECIAL 0x08 28 - #define GIT_REGEX_SPECIAL 0x10 29 - #define GIT_PRINT_EXTRA 0x20 30 - #define GIT_PRINT 0x3E 31 - #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) 32 - #define isascii(x) (((x) & ~0x7f) == 0) 33 - #define isspace(x) sane_istest(x,GIT_SPACE) 34 - #define isdigit(x) sane_istest(x,GIT_DIGIT) 35 - #define isxdigit(x) \ 36 - (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G') 37 - #define isalpha(x) sane_istest(x,GIT_ALPHA) 38 - #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) 39 - #define isprint(x) sane_istest(x,GIT_PRINT) 40 - #define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20)) 41 - #define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20)) 42 - #define tolower(x) sane_case((unsigned char)(x), 0x20) 43 - #define toupper(x) sane_case((unsigned char)(x), 0) 44 - 45 - static inline int sane_case(int x, int high) 46 - { 47 - if (sane_istest(x, GIT_ALPHA)) 48 - x = (x & ~0x20) | high; 49 - return x; 50 - } 51 - 52 - #endif /* _PERF_SANE_CTYPE_H */
+50 -4
tools/perf/util/scripting-engines/trace-event-python.c
··· 112 112 PyObject *sample_handler; 113 113 PyObject *call_path_handler; 114 114 PyObject *call_return_handler; 115 + PyObject *synth_handler; 115 116 bool db_export_mode; 116 117 }; 117 118 ··· 948 947 return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s)); 949 948 } 950 949 950 + static int tuple_set_bytes(PyObject *t, unsigned int pos, void *bytes, 951 + unsigned int sz) 952 + { 953 + return PyTuple_SetItem(t, pos, _PyBytes_FromStringAndSize(bytes, sz)); 954 + } 955 + 951 956 static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel) 952 957 { 953 958 struct tables *tables = container_of(dbe, struct tables, dbe); ··· 1112 1105 return 0; 1113 1106 } 1114 1107 1115 - static int python_export_sample(struct db_export *dbe, 1116 - struct export_sample *es) 1108 + static void python_export_sample_table(struct db_export *dbe, 1109 + struct export_sample *es) 1117 1110 { 1118 1111 struct tables *tables = container_of(dbe, struct tables, dbe); 1119 1112 PyObject *t; 1120 1113 1121 - t = tuple_new(22); 1114 + t = tuple_new(24); 1122 1115 1123 1116 tuple_set_u64(t, 0, es->db_id); 1124 1117 tuple_set_u64(t, 1, es->evsel->db_id); ··· 1142 1135 tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); 1143 1136 tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); 1144 1137 tuple_set_u64(t, 21, es->call_path_id); 1138 + tuple_set_u64(t, 22, es->sample->insn_cnt); 1139 + tuple_set_u64(t, 23, es->sample->cyc_cnt); 1145 1140 1146 1141 call_object(tables->sample_handler, t, "sample_table"); 1147 1142 1148 1143 Py_DECREF(t); 1144 + } 1145 + 1146 + static void python_export_synth(struct db_export *dbe, struct export_sample *es) 1147 + { 1148 + struct tables *tables = container_of(dbe, struct tables, dbe); 1149 + PyObject *t; 1150 + 1151 + t = tuple_new(3); 1152 + 1153 + tuple_set_u64(t, 0, es->db_id); 1154 + tuple_set_u64(t, 1, es->evsel->attr.config); 1155 + tuple_set_bytes(t, 2, es->sample->raw_data, es->sample->raw_size); 1156 + 1157 + call_object(tables->synth_handler, t, "synth_data"); 1158 + 1159 + Py_DECREF(t); 1160 + } 1161 + 1162 + static int python_export_sample(struct db_export *dbe, 1163 + struct export_sample *es) 1164 + { 1165 + struct tables *tables = container_of(dbe, struct tables, dbe); 1166 + 1167 + python_export_sample_table(dbe, es); 1168 + 1169 + if (es->evsel->attr.type == PERF_TYPE_SYNTH && tables->synth_handler) 1170 + python_export_synth(dbe, es); 1149 1171 1150 1172 return 0; 1151 1173 } ··· 1209 1173 u64 comm_db_id = cr->comm ? cr->comm->db_id : 0; 1210 1174 PyObject *t; 1211 1175 1212 - t = tuple_new(12); 1176 + t = tuple_new(14); 1213 1177 1214 1178 tuple_set_u64(t, 0, cr->db_id); 1215 1179 tuple_set_u64(t, 1, cr->thread->db_id); ··· 1223 1187 tuple_set_u64(t, 9, cr->cp->parent->db_id); 1224 1188 tuple_set_s32(t, 10, cr->flags); 1225 1189 tuple_set_u64(t, 11, cr->parent_db_id); 1190 + tuple_set_u64(t, 12, cr->insn_count); 1191 + tuple_set_u64(t, 13, cr->cyc_count); 1226 1192 1227 1193 call_object(tables->call_return_handler, t, "call_return_table"); 1228 1194 ··· 1511 1473 SET_TABLE_HANDLER(sample); 1512 1474 SET_TABLE_HANDLER(call_path); 1513 1475 SET_TABLE_HANDLER(call_return); 1476 + 1477 + /* 1478 + * Synthesized events are samples but with architecture-specific data 1479 + * stored in sample->raw_data. They are exported via 1480 + * python_export_sample() and consequently do not need a separate export 1481 + * callback. 1482 + */ 1483 + tables->synth_handler = get_handler("synth_data"); 1514 1484 } 1515 1485 1516 1486 #if PY_MAJOR_VERSION < 3
+2
tools/perf/util/setup.py
··· 17 17 vars[var] = sub("-fcf-protection", "", vars[var]) 18 18 if not clang_has_option("-fstack-clash-protection"): 19 19 vars[var] = sub("-fstack-clash-protection", "", vars[var]) 20 + if not clang_has_option("-fstack-protector-strong"): 21 + vars[var] = sub("-fstack-protector-strong", "", vars[var]) 20 22 21 23 from distutils.core import setup, Extension 22 24
+6 -2
tools/perf/util/smt.c
··· 23 23 char fn[256]; 24 24 25 25 snprintf(fn, sizeof fn, 26 - "devices/system/cpu/cpu%d/topology/thread_siblings", 27 - cpu); 26 + "devices/system/cpu/cpu%d/topology/core_cpus", cpu); 27 + if (access(fn, F_OK) == -1) { 28 + snprintf(fn, sizeof fn, 29 + "devices/system/cpu/cpu%d/topology/thread_siblings", 30 + cpu); 31 + } 28 32 if (sysfs__read_str(fn, &str, &strlen) < 0) 29 33 continue; 30 34 /* Entry is hex, but does not have 0x, so need custom parser */
+13
tools/perf/util/sort.h
··· 79 79 80 80 /* HISTC_WEIGHTED_DIFF */ 81 81 s64 wdiff; 82 + 83 + /* PERF_HPP_DIFF__CYCLES */ 84 + s64 cycles; 82 85 }; 83 86 }; 84 87 ··· 147 144 long time; 148 145 struct hists *hists; 149 146 struct mem_info *mem_info; 147 + struct block_info *block_info; 150 148 void *raw_data; 151 149 u32 raw_size; 152 150 int num_res; ··· 287 283 unsigned int width); 288 284 int (*se_filter)(struct hist_entry *he, int type, const void *arg); 289 285 u8 se_width_idx; 286 + }; 287 + 288 + struct block_hist { 289 + struct hists block_hists; 290 + struct perf_hpp_list block_list; 291 + struct perf_hpp_fmt block_fmt; 292 + int block_idx; 293 + bool valid; 294 + struct hist_entry he; 290 295 }; 291 296 292 297 extern struct sort_entry sort_thread;
+5 -2
tools/perf/util/srcline.c
··· 5 5 #include <string.h> 6 6 7 7 #include <linux/kernel.h> 8 + #include <linux/string.h> 8 9 9 10 #include "util/dso.h" 10 11 #include "util/util.h" 11 12 #include "util/debug.h" 12 13 #include "util/callchain.h" 14 + #include "util/symbol_conf.h" 13 15 #include "srcline.h" 14 16 #include "string2.h" 15 17 #include "symbol.h" ··· 289 287 } 290 288 291 289 if (a2l == NULL) { 292 - pr_warning("addr2line_init failed for %s\n", dso_name); 290 + if (!symbol_conf.disable_add2line_warn) 291 + pr_warning("addr2line_init failed for %s\n", dso_name); 293 292 return 0; 294 293 } 295 294 ··· 467 464 char *srcline; 468 465 struct symbol *inline_sym; 469 466 470 - rtrim(funcname); 467 + strim(funcname); 471 468 472 469 if (getline(&filename, &filelen, fp) == -1) 473 470 goto out;
+32 -11
tools/perf/util/stat-display.c
··· 1 1 #include <stdio.h> 2 2 #include <inttypes.h> 3 + #include <linux/string.h> 3 4 #include <linux/time64.h> 4 5 #include <math.h> 5 6 #include "color.h" ··· 11 10 #include "thread_map.h" 12 11 #include "cpumap.h" 13 12 #include "string2.h" 14 - #include "sane_ctype.h" 13 + #include <linux/ctype.h> 15 14 #include "cgroup.h" 16 15 #include <math.h> 17 16 #include <api/fs/fs.h> ··· 70 69 { 71 70 switch (config->aggr_mode) { 72 71 case AGGR_CORE: 73 - fprintf(config->output, "S%d-C%*d%s%*d%s", 72 + fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", 74 73 cpu_map__id_to_socket(id), 74 + cpu_map__id_to_die(id), 75 75 config->csv_output ? 0 : -8, 76 76 cpu_map__id_to_cpu(id), 77 + config->csv_sep, 78 + config->csv_output ? 0 : 4, 79 + nr, 80 + config->csv_sep); 81 + break; 82 + case AGGR_DIE: 83 + fprintf(config->output, "S%d-D%*d%s%*d%s", 84 + cpu_map__id_to_socket(id << 16), 85 + config->csv_output ? 0 : -8, 86 + cpu_map__id_to_die(id << 16), 77 87 config->csv_sep, 78 88 config->csv_output ? 0 : 4, 79 89 nr, ··· 101 89 break; 102 90 case AGGR_NONE: 103 91 if (evsel->percore) { 104 - fprintf(config->output, "S%d-C%*d%s", 92 + fprintf(config->output, "S%d-D%d-C%*d%s", 105 93 cpu_map__id_to_socket(id), 94 + cpu_map__id_to_die(id), 106 95 config->csv_output ? 0 : -5, 107 96 cpu_map__id_to_cpu(id), config->csv_sep); 108 97 } else { ··· 212 199 return; 213 200 } 214 201 snprintf(buf, sizeof(buf), fmt, val); 215 - ends = vals = ltrim(buf); 202 + ends = vals = skip_spaces(buf); 216 203 while (isdigit(*ends) || *ends == '.') 217 204 ends++; 218 205 *ends = 0; 219 - while (isspace(*unit)) 220 - unit++; 221 - fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit); 206 + fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, skip_spaces(unit)); 222 207 } 223 208 224 209 /* Filter out some columns that don't work well in metrics only mode */ ··· 280 269 return; 281 270 unit = fixunit(tbuf, os->evsel, unit); 282 271 snprintf(buf, sizeof buf, fmt, val); 283 - ends = vals = ltrim(buf); 272 + ends = vals = skip_spaces(buf); 284 273 while (isdigit(*ends) || *ends == '.') 285 274 ends++; 286 275 *ends = 0; ··· 418 407 [AGGR_THREAD] = 1, 419 408 [AGGR_NONE] = 1, 420 409 [AGGR_SOCKET] = 2, 410 + [AGGR_DIE] = 2, 421 411 [AGGR_CORE] = 2, 422 412 }; 423 413 ··· 554 542 alias->scale != counter->scale || 555 543 alias->cgrp != counter->cgrp || 556 544 strcmp(alias->unit, counter->unit) || 557 - perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter)) 545 + perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) || 546 + !strcmp(alias->pmu_name, counter->pmu_name)) 558 547 break; 559 548 alias->merged_stat = true; 560 549 cb(config, alias, data, false); ··· 892 879 } 893 880 894 881 static int aggr_header_lens[] = { 895 - [AGGR_CORE] = 18, 882 + [AGGR_CORE] = 24, 883 + [AGGR_DIE] = 18, 896 884 [AGGR_SOCKET] = 12, 897 885 [AGGR_NONE] = 6, 898 886 [AGGR_THREAD] = 24, ··· 902 888 903 889 static const char *aggr_header_csv[] = { 904 890 [AGGR_CORE] = "core,cpus,", 891 + [AGGR_DIE] = "die,cpus", 905 892 [AGGR_SOCKET] = "socket,cpus", 906 893 [AGGR_NONE] = "cpu,", 907 894 [AGGR_THREAD] = "comm-pid,", ··· 969 954 if (!metric_only) 970 955 fprintf(output, " counts %*s events\n", unit_width, "unit"); 971 956 break; 957 + case AGGR_DIE: 958 + fprintf(output, "# time die cpus"); 959 + if (!metric_only) 960 + fprintf(output, " counts %*s events\n", unit_width, "unit"); 961 + break; 972 962 case AGGR_CORE: 973 - fprintf(output, "# time core cpus"); 963 + fprintf(output, "# time core cpus"); 974 964 if (!metric_only) 975 965 fprintf(output, " counts %*s events\n", unit_width, "unit"); 976 966 break; ··· 1185 1165 1186 1166 switch (config->aggr_mode) { 1187 1167 case AGGR_CORE: 1168 + case AGGR_DIE: 1188 1169 case AGGR_SOCKET: 1189 1170 print_aggr(config, evlist, prefix); 1190 1171 break;
+21 -3
tools/perf/util/stat-shadow.c
··· 12 12 /* 13 13 * AGGR_GLOBAL: Use CPU 0 14 14 * AGGR_SOCKET: Use first CPU of socket 15 + * AGGR_DIE: Use first CPU of die 15 16 * AGGR_CORE: Use first CPU of core 16 17 * AGGR_NONE: Use matching CPU 17 18 * AGGR_THREAD: Not supported? ··· 304 303 struct perf_evsel *c2; 305 304 306 305 evlist__for_each_entry (evsel_list, c2) { 307 - if (!strcasecmp(c2->name, name)) 306 + if (!strcasecmp(c2->name, name) && !c2->collect_stat) 308 307 return c2; 309 308 } 310 309 return NULL; ··· 343 342 if (leader) { 344 343 /* Search in group */ 345 344 for_each_group_member (oc, leader) { 346 - if (!strcasecmp(oc->name, metric_names[i])) { 345 + if (!strcasecmp(oc->name, metric_names[i]) && 346 + !oc->collect_stat) { 347 347 found = true; 348 348 break; 349 349 } ··· 724 722 double ratio; 725 723 int i; 726 724 void *ctxp = out->ctx; 725 + char *n, *pn; 727 726 728 727 expr__ctx_init(&pctx); 729 728 expr__add_id(&pctx, name, avg); ··· 744 741 stats = &v->stats; 745 742 scale = 1.0; 746 743 } 747 - expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); 744 + 745 + n = strdup(metric_events[i]->name); 746 + if (!n) 747 + return; 748 + /* 749 + * This display code with --no-merge adds [cpu] postfixes. 750 + * These are not supported by the parser. Remove everything 751 + * after the space. 752 + */ 753 + pn = strchr(n, ' '); 754 + if (pn) 755 + *pn = 0; 756 + expr__add_id(&pctx, n, avg_stats(stats)*scale); 748 757 } 749 758 if (!metric_events[i]) { 750 759 const char *p = metric_expr; ··· 773 758 (metric_name ? metric_name : name) : "", 0); 774 759 } else 775 760 print_metric(config, ctxp, NULL, NULL, "", 0); 761 + 762 + for (i = 1; i < pctx.num_ids; i++) 763 + free((void *)pctx.ids[i].name); 776 764 } 777 765 778 766 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+1
tools/perf/util/stat.c
··· 272 272 switch (config->aggr_mode) { 273 273 case AGGR_THREAD: 274 274 case AGGR_CORE: 275 + case AGGR_DIE: 275 276 case AGGR_SOCKET: 276 277 case AGGR_NONE: 277 278 if (!evsel->snapshot)
+1
tools/perf/util/stat.h
··· 44 44 AGGR_NONE, 45 45 AGGR_GLOBAL, 46 46 AGGR_SOCKET, 47 + AGGR_DIE, 47 48 AGGR_CORE, 48 49 AGGR_THREAD, 49 50 AGGR_UNSET,
+3 -3
tools/perf/util/strfilter.c
··· 4 4 #include "strfilter.h" 5 5 6 6 #include <errno.h> 7 - #include "sane_ctype.h" 7 + #include <linux/ctype.h> 8 + #include <linux/string.h> 8 9 9 10 /* Operators */ 10 11 static const char *OP_and = "&"; /* Logical AND */ ··· 38 37 { 39 38 const char *p; 40 39 41 - while (isspace(*s)) /* Skip spaces */ 42 - s++; 40 + s = skip_spaces(s); 43 41 44 42 if (*s == '\0') { 45 43 p = s;
+10 -159
tools/perf/util/string.c
··· 4 4 #include <linux/string.h> 5 5 #include <stdlib.h> 6 6 7 - #include "sane_ctype.h" 7 + #include <linux/ctype.h> 8 + 9 + const char *graph_dotted_line = 10 + "---------------------------------------------------------------------" 11 + "---------------------------------------------------------------------" 12 + "---------------------------------------------------------------------"; 13 + const char *dots = 14 + "....................................................................." 15 + "....................................................................." 16 + "....................................................................."; 8 17 9 18 #define K 1024LL 10 19 /* ··· 67 58 68 59 out_err: 69 60 return -1; 70 - } 71 - 72 - /* 73 - * Helper function for splitting a string into an argv-like array. 74 - * originally copied from lib/argv_split.c 75 - */ 76 - static const char *skip_sep(const char *cp) 77 - { 78 - while (*cp && isspace(*cp)) 79 - cp++; 80 - 81 - return cp; 82 - } 83 - 84 - static const char *skip_arg(const char *cp) 85 - { 86 - while (*cp && !isspace(*cp)) 87 - cp++; 88 - 89 - return cp; 90 - } 91 - 92 - static int count_argc(const char *str) 93 - { 94 - int count = 0; 95 - 96 - while (*str) { 97 - str = skip_sep(str); 98 - if (*str) { 99 - count++; 100 - str = skip_arg(str); 101 - } 102 - } 103 - 104 - return count; 105 - } 106 - 107 - /** 108 - * argv_free - free an argv 109 - * @argv - the argument vector to be freed 110 - * 111 - * Frees an argv and the strings it points to. 112 - */ 113 - void argv_free(char **argv) 114 - { 115 - char **p; 116 - for (p = argv; *p; p++) { 117 - free(*p); 118 - *p = NULL; 119 - } 120 - 121 - free(argv); 122 - } 123 - 124 - /** 125 - * argv_split - split a string at whitespace, returning an argv 126 - * @str: the string to be split 127 - * @argcp: returned argument count 128 - * 129 - * Returns an array of pointers to strings which are split out from 130 - * @str. This is performed by strictly splitting on white-space; no 131 - * quote processing is performed. Multiple whitespace characters are 132 - * considered to be a single argument separator. The returned array 133 - * is always NULL-terminated. Returns NULL on memory allocation 134 - * failure. 135 - */ 136 - char **argv_split(const char *str, int *argcp) 137 - { 138 - int argc = count_argc(str); 139 - char **argv = calloc(argc + 1, sizeof(*argv)); 140 - char **argvp; 141 - 142 - if (argv == NULL) 143 - goto out; 144 - 145 - if (argcp) 146 - *argcp = argc; 147 - 148 - argvp = argv; 149 - 150 - while (*str) { 151 - str = skip_sep(str); 152 - 153 - if (*str) { 154 - const char *p = str; 155 - char *t; 156 - 157 - str = skip_arg(str); 158 - 159 - t = strndup(p, str-p); 160 - if (t == NULL) 161 - goto fail; 162 - *argvp++ = t; 163 - } 164 - } 165 - *argvp = NULL; 166 - 167 - out: 168 - return argv; 169 - 170 - fail: 171 - argv_free(argv); 172 - return NULL; 173 61 } 174 62 175 63 /* Character class matching */ ··· 207 301 return s1[i1] - s2[i2]; 208 302 } 209 303 return 0; 210 - } 211 - 212 - /** 213 - * strxfrchar - Locate and replace character in @s 214 - * @s: The string to be searched/changed. 215 - * @from: Source character to be replaced. 216 - * @to: Destination character. 217 - * 218 - * Return pointer to the changed string. 219 - */ 220 - char *strxfrchar(char *s, char from, char to) 221 - { 222 - char *p = s; 223 - 224 - while ((p = strchr(p, from)) != NULL) 225 - *p++ = to; 226 - 227 - return s; 228 - } 229 - 230 - /** 231 - * ltrim - Removes leading whitespace from @s. 232 - * @s: The string to be stripped. 233 - * 234 - * Return pointer to the first non-whitespace character in @s. 235 - */ 236 - char *ltrim(char *s) 237 - { 238 - while (isspace(*s)) 239 - s++; 240 - 241 - return s; 242 - } 243 - 244 - /** 245 - * rtrim - Removes trailing whitespace from @s. 246 - * @s: The string to be stripped. 247 - * 248 - * Note that the first trailing whitespace is replaced with a %NUL-terminator 249 - * in the given string @s. Returns @s. 250 - */ 251 - char *rtrim(char *s) 252 - { 253 - size_t size = strlen(s); 254 - char *end; 255 - 256 - if (!size) 257 - return s; 258 - 259 - end = s + size - 1; 260 - while (end >= s && isspace(*end)) 261 - end--; 262 - *(end + 1) = '\0'; 263 - 264 - return s; 265 304 } 266 305 267 306 char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
+4 -11
tools/perf/util/string2.h
··· 2 2 #ifndef PERF_STRING_H 3 3 #define PERF_STRING_H 4 4 5 + #include <linux/string.h> 5 6 #include <linux/types.h> 6 7 #include <stddef.h> 7 8 #include <string.h> 8 9 10 + extern const char *graph_dotted_line; 11 + extern const char *dots; 12 + 9 13 s64 perf_atoll(const char *str); 10 - char **argv_split(const char *str, int *argcp); 11 - void argv_free(char **argv); 12 14 bool strglobmatch(const char *str, const char *pat); 13 15 bool strglobmatch_nocase(const char *str, const char *pat); 14 16 bool strlazymatch(const char *str, const char *pat); ··· 19 17 return strpbrk(str, "*?[") != NULL; 20 18 } 21 19 int strtailcmp(const char *s1, const char *s2); 22 - char *strxfrchar(char *s, char from, char to); 23 - 24 - char *ltrim(char *s); 25 - char *rtrim(char *s); 26 - 27 - static inline char *trim(char *s) 28 - { 29 - return ltrim(rtrim(s)); 30 - } 31 20 32 21 char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); 33 22
+3 -3
tools/perf/util/symbol-elf.c
··· 14 14 #include "machine.h" 15 15 #include "vdso.h" 16 16 #include "debug.h" 17 - #include "sane_ctype.h" 17 + #include "util.h" 18 + #include <linux/ctype.h> 18 19 #include <symbol/kallsyms.h> 19 20 20 21 #ifndef EM_AARCH64 ··· 700 699 int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, 701 700 enum dso_binary_type type) 702 701 { 703 - int err = -1; 704 702 GElf_Ehdr ehdr; 705 703 Elf *elf; 706 704 int fd; ··· 793 793 elf_end(elf); 794 794 out_close: 795 795 close(fd); 796 - return err; 796 + return -1; 797 797 } 798 798 799 799 /**
+116 -5
tools/perf/util/symbol.c
··· 25 25 #include "namespaces.h" 26 26 #include "header.h" 27 27 #include "path.h" 28 - #include "sane_ctype.h" 28 + #include <linux/ctype.h> 29 29 30 30 #include <elf.h> 31 31 #include <limits.h> ··· 1166 1166 return 0; 1167 1167 } 1168 1168 1169 + /* 1170 + * Merges map into map_groups by splitting the new map 1171 + * within the existing map regions. 1172 + */ 1173 + int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map) 1174 + { 1175 + struct map *old_map; 1176 + LIST_HEAD(merged); 1177 + 1178 + for (old_map = map_groups__first(kmaps); old_map; 1179 + old_map = map_groups__next(old_map)) { 1180 + 1181 + /* no overload with this one */ 1182 + if (new_map->end < old_map->start || 1183 + new_map->start >= old_map->end) 1184 + continue; 1185 + 1186 + if (new_map->start < old_map->start) { 1187 + /* 1188 + * |new...... 1189 + * |old.... 1190 + */ 1191 + if (new_map->end < old_map->end) { 1192 + /* 1193 + * |new......| -> |new..| 1194 + * |old....| -> |old....| 1195 + */ 1196 + new_map->end = old_map->start; 1197 + } else { 1198 + /* 1199 + * |new.............| -> |new..| |new..| 1200 + * |old....| -> |old....| 1201 + */ 1202 + struct map *m = map__clone(new_map); 1203 + 1204 + if (!m) 1205 + return -ENOMEM; 1206 + 1207 + m->end = old_map->start; 1208 + list_add_tail(&m->node, &merged); 1209 + new_map->start = old_map->end; 1210 + } 1211 + } else { 1212 + /* 1213 + * |new...... 1214 + * |old.... 1215 + */ 1216 + if (new_map->end < old_map->end) { 1217 + /* 1218 + * |new..| -> x 1219 + * |old.........| -> |old.........| 1220 + */ 1221 + map__put(new_map); 1222 + new_map = NULL; 1223 + break; 1224 + } else { 1225 + /* 1226 + * |new......| -> |new...| 1227 + * |old....| -> |old....| 1228 + */ 1229 + new_map->start = old_map->end; 1230 + } 1231 + } 1232 + } 1233 + 1234 + while (!list_empty(&merged)) { 1235 + old_map = list_entry(merged.next, struct map, node); 1236 + list_del_init(&old_map->node); 1237 + map_groups__insert(kmaps, old_map); 1238 + map__put(old_map); 1239 + } 1240 + 1241 + if (new_map) { 1242 + map_groups__insert(kmaps, new_map); 1243 + map__put(new_map); 1244 + } 1245 + return 0; 1246 + } 1247 + 1169 1248 static int dso__load_kcore(struct dso *dso, struct map *map, 1170 1249 const char *kallsyms_filename) 1171 1250 { ··· 1301 1222 while (old_map) { 1302 1223 struct map *next = map_groups__next(old_map); 1303 1224 1304 - if (old_map != map) 1225 + /* 1226 + * We need to preserve eBPF maps even if they are 1227 + * covered by kcore, because we need to access 1228 + * eBPF dso for source data. 1229 + */ 1230 + if (old_map != map && !__map__is_bpf_prog(old_map)) 1305 1231 map_groups__remove(kmaps, old_map); 1306 1232 old_map = next; 1307 1233 } ··· 1340 1256 map_groups__remove(kmaps, map); 1341 1257 map_groups__insert(kmaps, map); 1342 1258 map__put(map); 1259 + map__put(new_map); 1343 1260 } else { 1344 - map_groups__insert(kmaps, new_map); 1261 + /* 1262 + * Merge kcore map into existing maps, 1263 + * and ensure that current maps (eBPF) 1264 + * stay intact. 1265 + */ 1266 + if (map_groups__merge_in(kmaps, new_map)) 1267 + goto out_err; 1345 1268 } 1346 - 1347 - map__put(new_map); 1348 1269 } 1349 1270 1350 1271 if (machine__is(machine, "x86_64")) { ··· 2350 2261 if (mi) 2351 2262 refcount_set(&mi->refcnt, 1); 2352 2263 return mi; 2264 + } 2265 + 2266 + struct block_info *block_info__get(struct block_info *bi) 2267 + { 2268 + if (bi) 2269 + refcount_inc(&bi->refcnt); 2270 + return bi; 2271 + } 2272 + 2273 + void block_info__put(struct block_info *bi) 2274 + { 2275 + if (bi && refcount_dec_and_test(&bi->refcnt)) 2276 + free(bi); 2277 + } 2278 + 2279 + struct block_info *block_info__new(void) 2280 + { 2281 + struct block_info *bi = zalloc(sizeof(*bi)); 2282 + 2283 + if (bi) 2284 + refcount_set(&bi->refcnt, 1); 2285 + return bi; 2353 2286 }
+23
tools/perf/util/symbol.h
··· 131 131 refcount_t refcnt; 132 132 }; 133 133 134 + struct block_info { 135 + struct symbol *sym; 136 + u64 start; 137 + u64 end; 138 + u64 cycles; 139 + u64 cycles_aggr; 140 + int num; 141 + int num_aggr; 142 + refcount_t refcnt; 143 + }; 144 + 134 145 struct addr_location { 135 146 struct machine *machine; 136 147 struct thread *thread; ··· 342 331 } 343 332 344 333 #define mem_info__zput(mi) __mem_info__zput(&mi) 334 + 335 + struct block_info *block_info__new(void); 336 + struct block_info *block_info__get(struct block_info *bi); 337 + void block_info__put(struct block_info *bi); 338 + 339 + static inline void __block_info__zput(struct block_info **bi) 340 + { 341 + block_info__put(*bi); 342 + *bi = NULL; 343 + } 344 + 345 + #define block_info__zput(bi) __block_info__zput(&bi) 345 346 346 347 #endif /* __PERF_SYMBOL */
+4 -1
tools/perf/util/symbol_conf.h
··· 39 39 hide_unresolved, 40 40 raw_trace, 41 41 report_hierarchy, 42 - inline_name; 42 + report_block, 43 + inline_name, 44 + disable_add2line_warn; 43 45 const char *vmlinux_name, 44 46 *kallsyms_name, 45 47 *source_prefix, ··· 71 69 *tid_list; 72 70 const char *symfs; 73 71 int res_sample; 72 + int pad_output_len_dso; 74 73 }; 75 74 76 75 extern struct symbol_conf symbol_conf;
+49 -13
tools/perf/util/thread-stack.c
··· 40 40 * @timestamp: timestamp (if known) 41 41 * @ref: external reference (e.g. db_id of sample) 42 42 * @branch_count: the branch count when the entry was created 43 + * @insn_count: the instruction count when the entry was created 44 + * @cyc_count the cycle count when the entry was created 43 45 * @db_id: id used for db-export 44 46 * @cp: call path 45 47 * @no_call: a 'call' was not seen ··· 53 51 u64 timestamp; 54 52 u64 ref; 55 53 u64 branch_count; 54 + u64 insn_count; 55 + u64 cyc_count; 56 56 u64 db_id; 57 57 struct call_path *cp; 58 58 bool no_call; ··· 70 66 * @sz: current maximum stack size 71 67 * @trace_nr: current trace number 72 68 * @branch_count: running branch count 69 + * @insn_count: running instruction count 70 + * @cyc_count running cycle count 73 71 * @kernel_start: kernel start address 74 72 * @last_time: last timestamp 75 73 * @crp: call/return processor ··· 85 79 size_t sz; 86 80 u64 trace_nr; 87 81 u64 branch_count; 82 + u64 insn_count; 83 + u64 cyc_count; 88 84 u64 kernel_start; 89 85 u64 last_time; 90 86 struct call_return_processor *crp; ··· 288 280 cr.call_time = tse->timestamp; 289 281 cr.return_time = timestamp; 290 282 cr.branch_count = ts->branch_count - tse->branch_count; 283 + cr.insn_count = ts->insn_count - tse->insn_count; 284 + cr.cyc_count = ts->cyc_count - tse->cyc_count; 291 285 cr.db_id = tse->db_id; 292 286 cr.call_ref = tse->ref; 293 287 cr.return_ref = ref; ··· 545 535 tse->timestamp = timestamp; 546 536 tse->ref = ref; 547 537 tse->branch_count = ts->branch_count; 538 + tse->insn_count = ts->insn_count; 539 + tse->cyc_count = ts->cyc_count; 548 540 tse->cp = cp; 549 541 tse->no_call = no_call; 550 542 tse->trace_end = trace_end; ··· 628 616 true, false); 629 617 } 630 618 619 + static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts, 620 + struct perf_sample *sample, u64 ref) 621 + { 622 + u64 tm = sample->time; 623 + int err; 624 + 625 + /* Return to userspace, so pop all kernel addresses */ 626 + while (thread_stack__in_kernel(ts)) { 627 + err = thread_stack__call_return(thread, ts, --ts->cnt, 628 + tm, ref, true); 629 + if (err) 630 + return err; 631 + } 632 + 633 + return 0; 634 + } 635 + 631 636 static int thread_stack__no_call_return(struct thread *thread, 632 637 struct thread_stack *ts, 633 638 struct perf_sample *sample, ··· 664 635 665 636 if (ip >= ks && addr < ks) { 666 637 /* Return to userspace, so pop all kernel addresses */ 667 - while (thread_stack__in_kernel(ts)) { 668 - err = thread_stack__call_return(thread, ts, --ts->cnt, 669 - tm, ref, true); 670 - if (err) 671 - return err; 672 - } 638 + err = thread_stack__pop_ks(thread, ts, sample, ref); 639 + if (err) 640 + return err; 673 641 674 642 /* If the stack is empty, push the userspace address */ 675 643 if (!ts->cnt) { ··· 676 650 } 677 651 } else if (thread_stack__in_kernel(ts) && ip < ks) { 678 652 /* Return to userspace, so pop all kernel addresses */ 679 - while (thread_stack__in_kernel(ts)) { 680 - err = thread_stack__call_return(thread, ts, --ts->cnt, 681 - tm, ref, true); 682 - if (err) 683 - return err; 684 - } 653 + err = thread_stack__pop_ks(thread, ts, sample, ref); 654 + if (err) 655 + return err; 685 656 } 686 657 687 658 if (ts->cnt) ··· 888 865 } 889 866 890 867 ts->branch_count += 1; 868 + ts->insn_count += sample->insn_cnt; 869 + ts->cyc_count += sample->cyc_cnt; 891 870 ts->last_time = sample->time; 892 871 893 872 if (sample->flags & PERF_IP_FLAG_CALL) { ··· 921 896 ts->rstate = X86_RETPOLINE_DETECTED; 922 897 923 898 } else if (sample->flags & PERF_IP_FLAG_RETURN) { 924 - if (!sample->ip || !sample->addr) 899 + if (!sample->addr) { 900 + u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET | 901 + PERF_IP_FLAG_INTERRUPT; 902 + 903 + if (!(sample->flags & return_from_kernel)) 904 + return 0; 905 + 906 + /* Pop kernel stack */ 907 + return thread_stack__pop_ks(thread, ts, sample, ref); 908 + } 909 + 910 + if (!sample->ip) 925 911 return 0; 926 912 927 913 /* x86 retpoline 'return' doesn't match the stack */
+4
tools/perf/util/thread-stack.h
··· 43 43 * @call_time: timestamp of call (if known) 44 44 * @return_time: timestamp of return (if known) 45 45 * @branch_count: number of branches seen between call and return 46 + * @insn_count: approx. number of instructions between call and return 47 + * @cyc_count: approx. number of cycles between call and return 46 48 * @call_ref: external reference to 'call' sample (e.g. db_id) 47 49 * @return_ref: external reference to 'return' sample (e.g. db_id) 48 50 * @db_id: id used for db-export ··· 58 56 u64 call_time; 59 57 u64 return_time; 60 58 u64 branch_count; 59 + u64 insn_count; 60 + u64 cyc_count; 61 61 u64 call_ref; 62 62 u64 return_ref; 63 63 u64 db_id;
+26 -9
tools/perf/util/thread.c
··· 125 125 { 126 126 if (thread && refcount_dec_and_test(&thread->refcnt)) { 127 127 /* 128 - * Remove it from the dead_threads list, as last reference 129 - * is gone. 128 + * Remove it from the dead threads list, as last reference is 129 + * gone, if it is in a dead threads list. 130 + * 131 + * We may not be there anymore if say, the machine where it was 132 + * stored was already deleted, so we already removed it from 133 + * the dead threads and some other piece of code still keeps a 134 + * reference. 135 + * 136 + * This is what 'perf sched' does and finally drops it in 137 + * perf_sched__lat(), where it calls perf_sched__read_events(), 138 + * that processes the events by creating a session and deleting 139 + * it, which ends up destroying the list heads for the dead 140 + * threads, but before it does that it removes all threads from 141 + * it using list_del_init(). 142 + * 143 + * So we need to check here if it is in a dead threads list and 144 + * if so, remove it before finally deleting the thread, to avoid 145 + * an use after free situation. 130 146 */ 131 - list_del_init(&thread->node); 147 + if (!list_empty(&thread->node)) 148 + list_del_init(&thread->node); 132 149 thread__delete(thread); 133 150 } 134 151 } ··· 158 141 return list_first_entry(&thread->namespaces_list, struct namespaces, list); 159 142 } 160 143 161 - struct namespaces *thread__namespaces(const struct thread *thread) 144 + struct namespaces *thread__namespaces(struct thread *thread) 162 145 { 163 146 struct namespaces *ns; 164 147 165 - down_read((struct rw_semaphore *)&thread->namespaces_lock); 148 + down_read(&thread->namespaces_lock); 166 149 ns = __thread__namespaces(thread); 167 - up_read((struct rw_semaphore *)&thread->namespaces_lock); 150 + up_read(&thread->namespaces_lock); 168 151 169 152 return ns; 170 153 } ··· 288 271 return comm__str(comm); 289 272 } 290 273 291 - const char *thread__comm_str(const struct thread *thread) 274 + const char *thread__comm_str(struct thread *thread) 292 275 { 293 276 const char *str; 294 277 295 - down_read((struct rw_semaphore *)&thread->comm_lock); 278 + down_read(&thread->comm_lock); 296 279 str = __thread__comm_str(thread); 297 - up_read((struct rw_semaphore *)&thread->comm_lock); 280 + up_read(&thread->comm_lock); 298 281 299 282 return str; 300 283 }
+2 -2
tools/perf/util/thread.h
··· 76 76 thread->dead = true; 77 77 } 78 78 79 - struct namespaces *thread__namespaces(const struct thread *thread); 79 + struct namespaces *thread__namespaces(struct thread *thread); 80 80 int thread__set_namespaces(struct thread *thread, u64 timestamp, 81 81 struct namespaces_event *event); 82 82 ··· 93 93 int thread__comm_len(struct thread *thread); 94 94 struct comm *thread__comm(const struct thread *thread); 95 95 struct comm *thread__exec_comm(const struct thread *thread); 96 - const char *thread__comm_str(const struct thread *thread); 96 + const char *thread__comm_str(struct thread *thread); 97 97 int thread__insert_map(struct thread *thread, struct map *map); 98 98 int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); 99 99 size_t thread__fprintf(struct thread *thread, FILE *fp);
+2 -1
tools/perf/util/thread_map.c
··· 12 12 #include "strlist.h" 13 13 #include <string.h> 14 14 #include <api/fs/fs.h> 15 + #include <linux/string.h> 15 16 #include "asm/bug.h" 16 17 #include "thread_map.h" 17 18 #include "util.h" ··· 393 392 * mark the end of the string. 394 393 */ 395 394 (*comm)[size] = 0; 396 - rtrim(*comm); 395 + strim(*comm); 397 396 } 398 397 399 398 free(path);
+95 -35
tools/perf/util/time-utils.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <stdlib.h> 3 3 #include <string.h> 4 + #include <linux/string.h> 4 5 #include <sys/time.h> 5 6 #include <linux/time64.h> 6 7 #include <time.h> 7 8 #include <errno.h> 8 9 #include <inttypes.h> 9 10 #include <math.h> 11 + #include <linux/ctype.h> 10 12 11 13 #include "perf.h" 12 14 #include "debug.h" ··· 118 116 return rc; 119 117 } 120 118 119 + static int perf_time__parse_strs(struct perf_time_interval *ptime, 120 + const char *ostr, int size) 121 + { 122 + const char *cp; 123 + char *str, *arg, *p; 124 + int i, num = 0, rc = 0; 125 + 126 + /* Count the commas */ 127 + for (cp = ostr; *cp; cp++) 128 + num += !!(*cp == ','); 129 + 130 + if (!num) 131 + return -EINVAL; 132 + 133 + BUG_ON(num > size); 134 + 135 + str = strdup(ostr); 136 + if (!str) 137 + return -ENOMEM; 138 + 139 + /* Split the string and parse each piece, except the last */ 140 + for (i = 0, p = str; i < num - 1; i++) { 141 + arg = p; 142 + /* Find next comma, there must be one */ 143 + p = skip_spaces(strchr(p, ',') + 1); 144 + /* Skip the value, must not contain space or comma */ 145 + while (*p && !isspace(*p)) { 146 + if (*p++ == ',') { 147 + rc = -EINVAL; 148 + goto out; 149 + } 150 + } 151 + /* Split and parse */ 152 + if (*p) 153 + *p++ = 0; 154 + rc = perf_time__parse_str(ptime + i, arg); 155 + if (rc < 0) 156 + goto out; 157 + } 158 + 159 + /* Parse the last piece */ 160 + rc = perf_time__parse_str(ptime + i, p); 161 + if (rc < 0) 162 + goto out; 163 + 164 + /* Check there is no overlap */ 165 + for (i = 0; i < num - 1; i++) { 166 + if (ptime[i].end >= ptime[i + 1].start) { 167 + rc = -EINVAL; 168 + goto out; 169 + } 170 + } 171 + 172 + rc = num; 173 + out: 174 + free(str); 175 + 176 + return rc; 177 + } 178 + 121 179 static int parse_percent(double *pcnt, char *str) 122 180 { 123 181 char *c, *endptr; ··· 197 135 return 0; 198 136 } 199 137 138 + static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt, 139 + double end_pcnt, u64 start, u64 end) 140 + { 141 + u64 total = end - start; 142 + 143 + if (start_pcnt < 0.0 || start_pcnt > 1.0 || 144 + end_pcnt < 0.0 || end_pcnt > 1.0) { 145 + return -1; 146 + } 147 + 148 + ptime->start = start + round(start_pcnt * total); 149 + ptime->end = start + round(end_pcnt * total); 150 + 151 + if (ptime->end > ptime->start && ptime->end != end) 152 + ptime->end -= 1; 153 + 154 + return 0; 155 + } 156 + 200 157 static int percent_slash_split(char *str, struct perf_time_interval *ptime, 201 158 u64 start, u64 end) 202 159 { 203 160 char *p, *end_str; 204 161 double pcnt, start_pcnt, end_pcnt; 205 - u64 total = end - start; 206 162 int i; 207 163 208 164 /* ··· 248 168 start_pcnt = pcnt * (i - 1); 249 169 end_pcnt = pcnt * i; 250 170 251 - if (start_pcnt < 0.0 || start_pcnt > 1.0 || 252 - end_pcnt < 0.0 || end_pcnt > 1.0) { 253 - return -1; 254 - } 255 - 256 - ptime->start = start + round(start_pcnt * total); 257 - ptime->end = start + round(end_pcnt * total); 258 - 259 - return 0; 171 + return set_percent_time(ptime, start_pcnt, end_pcnt, start, end); 260 172 } 261 173 262 174 static int percent_dash_split(char *str, struct perf_time_interval *ptime, ··· 256 184 { 257 185 char *start_str = NULL, *end_str; 258 186 double start_pcnt, end_pcnt; 259 - u64 total = end - start; 260 187 int ret; 261 188 262 189 /* ··· 274 203 275 204 free(start_str); 276 205 277 - if (start_pcnt < 0.0 || start_pcnt > 1.0 || 278 - end_pcnt < 0.0 || end_pcnt > 1.0 || 279 - start_pcnt > end_pcnt) { 280 - return -1; 281 - } 282 - 283 - ptime->start = start + round(start_pcnt * total); 284 - ptime->end = start + round(end_pcnt * total); 285 - 286 - return 0; 206 + return set_percent_time(ptime, start_pcnt, end_pcnt, start, end); 287 207 } 288 208 289 209 typedef int (*time_pecent_split)(char *, struct perf_time_interval *, ··· 451 389 ptime = &ptime_buf[i]; 452 390 453 391 if (timestamp >= ptime->start && 454 - ((timestamp < ptime->end && i < num - 1) || 455 - (timestamp <= ptime->end && i == num - 1))) { 456 - break; 392 + (timestamp <= ptime->end || !ptime->end)) { 393 + return false; 457 394 } 458 395 } 459 396 460 - return (i == num) ? true : false; 397 + return true; 461 398 } 462 399 463 400 int perf_time__parse_for_ranges(const char *time_str, ··· 464 403 struct perf_time_interval **ranges, 465 404 int *range_size, int *range_num) 466 405 { 406 + bool has_percent = strchr(time_str, '%'); 467 407 struct perf_time_interval *ptime_range; 468 - int size, num, ret; 408 + int size, num, ret = -EINVAL; 469 409 470 410 ptime_range = perf_time__range_alloc(time_str, &size); 471 411 if (!ptime_range) 472 412 return -ENOMEM; 473 413 474 - if (perf_time__parse_str(ptime_range, time_str) != 0) { 414 + if (has_percent) { 475 415 if (session->evlist->first_sample_time == 0 && 476 416 session->evlist->last_sample_time == 0) { 477 417 pr_err("HINT: no first/last sample time found in perf data.\n" 478 418 "Please use latest perf binary to execute 'perf record'\n" 479 419 "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); 480 - ret = -EINVAL; 481 420 goto error; 482 421 } 483 422 ··· 486 425 time_str, 487 426 session->evlist->first_sample_time, 488 427 session->evlist->last_sample_time); 489 - 490 - if (num < 0) { 491 - pr_err("Invalid time string\n"); 492 - ret = -EINVAL; 493 - goto error; 494 - } 495 428 } else { 496 - num = 1; 429 + num = perf_time__parse_strs(ptime_range, time_str, size); 497 430 } 431 + 432 + if (num < 0) 433 + goto error_invalid; 498 434 499 435 *range_size = size; 500 436 *range_num = num; 501 437 *ranges = ptime_range; 502 438 return 0; 503 439 440 + error_invalid: 441 + pr_err("Invalid time string\n"); 504 442 error: 505 443 free(ptime_range); 506 444 return ret;
+1 -1
tools/perf/util/trace-event-parse.c
··· 11 11 #include "debug.h" 12 12 #include "trace-event.h" 13 13 14 - #include "sane_ctype.h" 14 + #include <linux/ctype.h> 15 15 16 16 static int get_common_field(struct scripting_context *context, 17 17 int *offset, int *size, const char *type)
-13
tools/perf/util/util.c
··· 434 434 return n; 435 435 } 436 436 437 - /* 438 - * While we find nice hex chars, build a long_val. 439 - * Return number of chars processed. 440 - */ 441 - int hex2u64(const char *ptr, u64 *long_val) 442 - { 443 - char *p; 444 - 445 - *long_val = strtoull(ptr, &p, 16); 446 - 447 - return p - ptr; 448 - } 449 - 450 437 int perf_event_paranoid(void) 451 438 { 452 439 int value;
-1
tools/perf/util/util.h
··· 43 43 ssize_t writen(int fd, const void *buf, size_t n); 44 44 45 45 size_t hex_width(u64 v); 46 - int hex2u64(const char *ptr, u64 *val); 47 46 48 47 extern unsigned int page_size; 49 48 int __pure cacheline_size(void);