Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf: Clean up perf ctx time

The current perf tracks two timestamps for the normal ctx and cgroup.
The same type of variables and similar codes are used to track the
timestamps. In the following patch, the third timestamp to track the
guest time will be introduced.
To avoid the code duplication, add a new struct perf_time_ctx and factor
out a generic function update_perf_time_ctx().

No functional change.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Link: https://patch.msgid.link/20251206001720.468579-6-seanjc@google.com

authored by

Kan Liang and committed by
Peter Zijlstra
f5c7de8f eff95e17

+39 -44
+7 -6
include/linux/perf_event.h
··· 999 999 u64 index; 1000 1000 }; 1001 1001 1002 + struct perf_time_ctx { 1003 + u64 time; 1004 + u64 stamp; 1005 + u64 offset; 1006 + }; 1002 1007 1003 1008 /** 1004 1009 * struct perf_event_context - event context structure ··· 1042 1037 /* 1043 1038 * Context clock, runs when context enabled. 1044 1039 */ 1045 - u64 time; 1046 - u64 timestamp; 1047 - u64 timeoffset; 1040 + struct perf_time_ctx time; 1048 1041 1049 1042 /* 1050 1043 * These fields let us detect when two contexts have both ··· 1175 1172 * This is a per-cpu dynamically allocated data structure. 1176 1173 */ 1177 1174 struct perf_cgroup_info { 1178 - u64 time; 1179 - u64 timestamp; 1180 - u64 timeoffset; 1175 + struct perf_time_ctx time; 1181 1176 int active; 1182 1177 }; 1183 1178
+32 -38
kernel/events/core.c
··· 816 816 static void ctx_sched_out(struct perf_event_context *ctx, struct pmu *pmu, enum event_type_t event_type); 817 817 static void ctx_sched_in(struct perf_event_context *ctx, struct pmu *pmu, enum event_type_t event_type); 818 818 819 + static inline void update_perf_time_ctx(struct perf_time_ctx *time, u64 now, bool adv) 820 + { 821 + if (adv) 822 + time->time += now - time->stamp; 823 + time->stamp = now; 824 + 825 + /* 826 + * The above: time' = time + (now - timestamp), can be re-arranged 827 + * into: time` = now + (time - timestamp), which gives a single value 828 + * offset to compute future time without locks on. 829 + * 830 + * See perf_event_time_now(), which can be used from NMI context where 831 + * it's (obviously) not possible to acquire ctx->lock in order to read 832 + * both the above values in a consistent manner. 833 + */ 834 + WRITE_ONCE(time->offset, time->time - time->stamp); 835 + } 836 + 819 837 #ifdef CONFIG_CGROUP_PERF 820 838 821 839 static inline bool ··· 875 857 struct perf_cgroup_info *t; 876 858 877 859 t = per_cpu_ptr(event->cgrp->info, event->cpu); 878 - return t->time; 860 + return t->time.time; 879 861 } 880 862 881 863 static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) ··· 884 866 885 867 t = per_cpu_ptr(event->cgrp->info, event->cpu); 886 868 if (!__load_acquire(&t->active)) 887 - return t->time; 888 - now += READ_ONCE(t->timeoffset); 869 + return t->time.time; 870 + now += READ_ONCE(t->time.offset); 889 871 return now; 890 - } 891 - 892 - static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv) 893 - { 894 - if (adv) 895 - info->time += now - info->timestamp; 896 - info->timestamp = now; 897 - /* 898 - * see update_context_time() 899 - */ 900 - WRITE_ONCE(info->timeoffset, info->time - info->timestamp); 901 872 } 902 873 903 874 static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final) ··· 902 895 cgrp = container_of(css, struct perf_cgroup, css); 903 896 info = this_cpu_ptr(cgrp->info); 904 897 905 - __update_cgrp_time(info, now, true); 898 + update_perf_time_ctx(&info->time, now, true); 906 899 if (final) 907 900 __store_release(&info->active, 0); 908 901 } ··· 925 918 * Do not update time when cgroup is not active 926 919 */ 927 920 if (info->active) 928 - __update_cgrp_time(info, perf_clock(), true); 921 + update_perf_time_ctx(&info->time, perf_clock(), true); 929 922 } 930 923 931 924 static inline void ··· 949 942 for (css = &cgrp->css; css; css = css->parent) { 950 943 cgrp = container_of(css, struct perf_cgroup, css); 951 944 info = this_cpu_ptr(cgrp->info); 952 - __update_cgrp_time(info, ctx->timestamp, false); 945 + update_perf_time_ctx(&info->time, ctx->time.stamp, false); 953 946 __store_release(&info->active, 1); 954 947 } 955 948 } ··· 1570 1563 1571 1564 lockdep_assert_held(&ctx->lock); 1572 1565 1573 - if (adv) 1574 - ctx->time += now - ctx->timestamp; 1575 - ctx->timestamp = now; 1576 - 1577 - /* 1578 - * The above: time' = time + (now - timestamp), can be re-arranged 1579 - * into: time` = now + (time - timestamp), which gives a single value 1580 - * offset to compute future time without locks on. 1581 - * 1582 - * See perf_event_time_now(), which can be used from NMI context where 1583 - * it's (obviously) not possible to acquire ctx->lock in order to read 1584 - * both the above values in a consistent manner. 1585 - */ 1586 - WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp); 1566 + update_perf_time_ctx(&ctx->time, now, adv); 1587 1567 } 1588 1568 1589 1569 static void update_context_time(struct perf_event_context *ctx) ··· 1588 1594 if (is_cgroup_event(event)) 1589 1595 return perf_cgroup_event_time(event); 1590 1596 1591 - return ctx->time; 1597 + return ctx->time.time; 1592 1598 } 1593 1599 1594 1600 static u64 perf_event_time_now(struct perf_event *event, u64 now) ··· 1602 1608 return perf_cgroup_event_time_now(event, now); 1603 1609 1604 1610 if (!(__load_acquire(&ctx->is_active) & EVENT_TIME)) 1605 - return ctx->time; 1611 + return ctx->time.time; 1606 1612 1607 - now += READ_ONCE(ctx->timeoffset); 1613 + now += READ_ONCE(ctx->time.offset); 1608 1614 return now; 1609 1615 } 1610 1616 ··· 12107 12113 static void task_clock_event_start(struct perf_event *event, int flags) 12108 12114 { 12109 12115 event->hw.state = 0; 12110 - local64_set(&event->hw.prev_count, event->ctx->time); 12116 + local64_set(&event->hw.prev_count, event->ctx->time.time); 12111 12117 perf_swevent_start_hrtimer(event); 12112 12118 } 12113 12119 ··· 12116 12122 event->hw.state = PERF_HES_STOPPED; 12117 12123 perf_swevent_cancel_hrtimer(event); 12118 12124 if (flags & PERF_EF_UPDATE) 12119 - task_clock_event_update(event, event->ctx->time); 12125 + task_clock_event_update(event, event->ctx->time.time); 12120 12126 } 12121 12127 12122 12128 static int task_clock_event_add(struct perf_event *event, int flags) ··· 12136 12142 static void task_clock_event_read(struct perf_event *event) 12137 12143 { 12138 12144 u64 now = perf_clock(); 12139 - u64 delta = now - event->ctx->timestamp; 12140 - u64 time = event->ctx->time + delta; 12145 + u64 delta = now - event->ctx->time.stamp; 12146 + u64 time = event->ctx->time.time + delta; 12141 12147 12142 12148 task_clock_event_update(event, time); 12143 12149 }