Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf: Skip pmu_ctx based on event_type

To optimize the cgroup context switch, the perf_event_pmu_context
iteration skips the PMUs without cgroup events. A bool cgroup was
introduced to indicate the case. It can work, but this way is hard to
extend for other cases, e.g. skipping non-mediated PMUs. It doesn't
make sense to keep adding bool variables.

Pass the event_type instead of the specific bool variable. Check both
the event_type and related pmu_ctx variables to decide whether skipping
a PMU.

Event flags, e.g., EVENT_CGROUP, should be cleard in the ctx->is_active.
Add EVENT_FLAGS to indicate such event flags.

No functional change.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Link: https://patch.msgid.link/20251206001720.468579-2-seanjc@google.com

authored by

Kan Liang and committed by
Peter Zijlstra
b825444b 7ac422cf

+40 -34
+40 -34
kernel/events/core.c
··· 165 165 /* see ctx_resched() for details */ 166 166 EVENT_CPU = 0x10, 167 167 EVENT_CGROUP = 0x20, 168 - 168 + EVENT_FLAGS = EVENT_CGROUP, 169 169 /* compound helpers */ 170 170 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, 171 171 EVENT_TIME_FROZEN = EVENT_TIME | EVENT_FROZEN, ··· 779 779 ___p; \ 780 780 }) 781 781 782 - #define for_each_epc(_epc, _ctx, _pmu, _cgroup) \ 782 + static bool perf_skip_pmu_ctx(struct perf_event_pmu_context *pmu_ctx, 783 + enum event_type_t event_type) 784 + { 785 + if ((event_type & EVENT_CGROUP) && !pmu_ctx->nr_cgroups) 786 + return true; 787 + return false; 788 + } 789 + 790 + #define for_each_epc(_epc, _ctx, _pmu, _event_type) \ 783 791 list_for_each_entry(_epc, &((_ctx)->pmu_ctx_list), pmu_ctx_entry) \ 784 - if (_cgroup && !_epc->nr_cgroups) \ 792 + if (perf_skip_pmu_ctx(_epc, _event_type)) \ 785 793 continue; \ 786 794 else if (_pmu && _epc->pmu != _pmu) \ 787 795 continue; \ 788 796 else 789 797 790 - static void perf_ctx_disable(struct perf_event_context *ctx, bool cgroup) 798 + static void perf_ctx_disable(struct perf_event_context *ctx, 799 + enum event_type_t event_type) 791 800 { 792 801 struct perf_event_pmu_context *pmu_ctx; 793 802 794 - for_each_epc(pmu_ctx, ctx, NULL, cgroup) 803 + for_each_epc(pmu_ctx, ctx, NULL, event_type) 795 804 perf_pmu_disable(pmu_ctx->pmu); 796 805 } 797 806 798 - static void perf_ctx_enable(struct perf_event_context *ctx, bool cgroup) 807 + static void perf_ctx_enable(struct perf_event_context *ctx, 808 + enum event_type_t event_type) 799 809 { 800 810 struct perf_event_pmu_context *pmu_ctx; 801 811 802 - for_each_epc(pmu_ctx, ctx, NULL, cgroup) 812 + for_each_epc(pmu_ctx, ctx, NULL, event_type) 803 813 perf_pmu_enable(pmu_ctx->pmu); 804 814 } 805 815 ··· 974 964 return; 975 965 976 966 WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); 977 - 978 - perf_ctx_disable(&cpuctx->ctx, true); 967 + perf_ctx_disable(&cpuctx->ctx, EVENT_CGROUP); 979 968 980 969 ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); 981 970 /* ··· 990 981 */ 991 982 ctx_sched_in(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); 992 983 993 - perf_ctx_enable(&cpuctx->ctx, true); 984 + perf_ctx_enable(&cpuctx->ctx, EVENT_CGROUP); 994 985 } 995 986 996 987 static int perf_cgroup_ensure_storage(struct perf_event *event, ··· 2911 2902 2912 2903 event_type &= EVENT_ALL; 2913 2904 2914 - for_each_epc(epc, &cpuctx->ctx, pmu, false) 2905 + for_each_epc(epc, &cpuctx->ctx, pmu, 0) 2915 2906 perf_pmu_disable(epc->pmu); 2916 2907 2917 2908 if (task_ctx) { 2918 - for_each_epc(epc, task_ctx, pmu, false) 2909 + for_each_epc(epc, task_ctx, pmu, 0) 2919 2910 perf_pmu_disable(epc->pmu); 2920 2911 2921 2912 task_ctx_sched_out(task_ctx, pmu, event_type); ··· 2935 2926 2936 2927 perf_event_sched_in(cpuctx, task_ctx, pmu); 2937 2928 2938 - for_each_epc(epc, &cpuctx->ctx, pmu, false) 2929 + for_each_epc(epc, &cpuctx->ctx, pmu, 0) 2939 2930 perf_pmu_enable(epc->pmu); 2940 2931 2941 2932 if (task_ctx) { 2942 - for_each_epc(epc, task_ctx, pmu, false) 2933 + for_each_epc(epc, task_ctx, pmu, 0) 2943 2934 perf_pmu_enable(epc->pmu); 2944 2935 } 2945 2936 } ··· 3488 3479 ctx_sched_out(struct perf_event_context *ctx, struct pmu *pmu, enum event_type_t event_type) 3489 3480 { 3490 3481 struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); 3482 + enum event_type_t active_type = event_type & ~EVENT_FLAGS; 3491 3483 struct perf_event_pmu_context *pmu_ctx; 3492 3484 int is_active = ctx->is_active; 3493 - bool cgroup = event_type & EVENT_CGROUP; 3494 3485 3495 - event_type &= ~EVENT_CGROUP; 3496 3486 3497 3487 lockdep_assert_held(&ctx->lock); 3498 3488 ··· 3522 3514 * see __load_acquire() in perf_event_time_now() 3523 3515 */ 3524 3516 barrier(); 3525 - ctx->is_active &= ~event_type; 3517 + ctx->is_active &= ~active_type; 3526 3518 3527 3519 if (!(ctx->is_active & EVENT_ALL)) { 3528 3520 /* ··· 3543 3535 3544 3536 is_active ^= ctx->is_active; /* changed bits */ 3545 3537 3546 - for_each_epc(pmu_ctx, ctx, pmu, cgroup) 3538 + for_each_epc(pmu_ctx, ctx, pmu, event_type) 3547 3539 __pmu_ctx_sched_out(pmu_ctx, is_active); 3548 3540 } 3549 3541 ··· 3699 3691 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); 3700 3692 if (context_equiv(ctx, next_ctx)) { 3701 3693 3702 - perf_ctx_disable(ctx, false); 3694 + perf_ctx_disable(ctx, 0); 3703 3695 3704 3696 /* PMIs are disabled; ctx->nr_no_switch_fast is stable. */ 3705 3697 if (local_read(&ctx->nr_no_switch_fast) || ··· 3723 3715 3724 3716 perf_ctx_sched_task_cb(ctx, task, false); 3725 3717 3726 - perf_ctx_enable(ctx, false); 3718 + perf_ctx_enable(ctx, 0); 3727 3719 3728 3720 /* 3729 3721 * RCU_INIT_POINTER here is safe because we've not ··· 3747 3739 3748 3740 if (do_switch) { 3749 3741 raw_spin_lock(&ctx->lock); 3750 - perf_ctx_disable(ctx, false); 3742 + perf_ctx_disable(ctx, 0); 3751 3743 3752 3744 inside_switch: 3753 3745 perf_ctx_sched_task_cb(ctx, task, false); 3754 3746 task_ctx_sched_out(ctx, NULL, EVENT_ALL); 3755 3747 3756 - perf_ctx_enable(ctx, false); 3748 + perf_ctx_enable(ctx, 0); 3757 3749 raw_spin_unlock(&ctx->lock); 3758 3750 } 3759 3751 } ··· 4062 4054 ctx_sched_in(struct perf_event_context *ctx, struct pmu *pmu, enum event_type_t event_type) 4063 4055 { 4064 4056 struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); 4057 + enum event_type_t active_type = event_type & ~EVENT_FLAGS; 4065 4058 struct perf_event_pmu_context *pmu_ctx; 4066 4059 int is_active = ctx->is_active; 4067 - bool cgroup = event_type & EVENT_CGROUP; 4068 - 4069 - event_type &= ~EVENT_CGROUP; 4070 4060 4071 4061 lockdep_assert_held(&ctx->lock); 4072 4062 ··· 4082 4076 barrier(); 4083 4077 } 4084 4078 4085 - ctx->is_active |= (event_type | EVENT_TIME); 4079 + ctx->is_active |= active_type | EVENT_TIME; 4086 4080 if (ctx->task) { 4087 4081 if (!(is_active & EVENT_ALL)) 4088 4082 cpuctx->task_ctx = ctx; ··· 4097 4091 * in order to give them the best chance of going on. 4098 4092 */ 4099 4093 if (is_active & EVENT_PINNED) { 4100 - for_each_epc(pmu_ctx, ctx, pmu, cgroup) 4094 + for_each_epc(pmu_ctx, ctx, pmu, event_type) 4101 4095 __pmu_ctx_sched_in(pmu_ctx, EVENT_PINNED); 4102 4096 } 4103 4097 4104 4098 /* Then walk through the lower prio flexible groups */ 4105 4099 if (is_active & EVENT_FLEXIBLE) { 4106 - for_each_epc(pmu_ctx, ctx, pmu, cgroup) 4100 + for_each_epc(pmu_ctx, ctx, pmu, event_type) 4107 4101 __pmu_ctx_sched_in(pmu_ctx, EVENT_FLEXIBLE); 4108 4102 } 4109 4103 } ··· 4120 4114 4121 4115 if (cpuctx->task_ctx == ctx) { 4122 4116 perf_ctx_lock(cpuctx, ctx); 4123 - perf_ctx_disable(ctx, false); 4117 + perf_ctx_disable(ctx, 0); 4124 4118 4125 4119 perf_ctx_sched_task_cb(ctx, task, true); 4126 4120 4127 - perf_ctx_enable(ctx, false); 4121 + perf_ctx_enable(ctx, 0); 4128 4122 perf_ctx_unlock(cpuctx, ctx); 4129 4123 goto rcu_unlock; 4130 4124 } ··· 4137 4131 if (!ctx->nr_events) 4138 4132 goto unlock; 4139 4133 4140 - perf_ctx_disable(ctx, false); 4134 + perf_ctx_disable(ctx, 0); 4141 4135 /* 4142 4136 * We want to keep the following priority order: 4143 4137 * cpu pinned (that don't need to move), task pinned, ··· 4147 4141 * events, no need to flip the cpuctx's events around. 4148 4142 */ 4149 4143 if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) { 4150 - perf_ctx_disable(&cpuctx->ctx, false); 4144 + perf_ctx_disable(&cpuctx->ctx, 0); 4151 4145 ctx_sched_out(&cpuctx->ctx, NULL, EVENT_FLEXIBLE); 4152 4146 } 4153 4147 ··· 4156 4150 perf_ctx_sched_task_cb(cpuctx->task_ctx, task, true); 4157 4151 4158 4152 if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) 4159 - perf_ctx_enable(&cpuctx->ctx, false); 4153 + perf_ctx_enable(&cpuctx->ctx, 0); 4160 4154 4161 - perf_ctx_enable(ctx, false); 4155 + perf_ctx_enable(ctx, 0); 4162 4156 4163 4157 unlock: 4164 4158 perf_ctx_unlock(cpuctx, ctx);