Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Intel Running Average Power Limit (RAPL) Driver via MSR interface
4 * Copyright (c) 2019, Intel Corporation.
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/list.h>
11#include <linux/types.h>
12#include <linux/device.h>
13#include <linux/slab.h>
14#include <linux/log2.h>
15#include <linux/bitmap.h>
16#include <linux/delay.h>
17#include <linux/sysfs.h>
18#include <linux/cpu.h>
19#include <linux/powercap.h>
20#include <linux/suspend.h>
21#include <linux/intel_rapl.h>
22#include <linux/processor.h>
23#include <linux/platform_device.h>
24#include <linux/units.h>
25#include <linux/bits.h>
26
27#include <asm/cpu_device_id.h>
28#include <asm/intel-family.h>
29#include <asm/iosf_mbi.h>
30#include <asm/msr.h>
31
32/* Local defines */
33#define MSR_PLATFORM_POWER_LIMIT 0x0000065C
34#define MSR_VR_CURRENT_CONFIG 0x00000601
35
36#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
37
38#define POWER_UNIT_OFFSET 0x00
39#define POWER_UNIT_MASK GENMASK(3, 0)
40
41#define ENERGY_UNIT_OFFSET 0x08
42#define ENERGY_UNIT_MASK GENMASK(12, 8)
43
44#define TIME_UNIT_OFFSET 0x10
45#define TIME_UNIT_MASK GENMASK(19, 16)
46
47/* bitmasks for RAPL MSRs, used by primitive access functions */
48#define ENERGY_STATUS_MASK GENMASK(31, 0)
49
50#define POWER_LIMIT1_MASK GENMASK(14, 0)
51#define POWER_LIMIT1_ENABLE BIT(15)
52#define POWER_LIMIT1_CLAMP BIT(16)
53
54#define POWER_LIMIT2_MASK GENMASK_ULL(46, 32)
55#define POWER_LIMIT2_ENABLE BIT_ULL(47)
56#define POWER_LIMIT2_CLAMP BIT_ULL(48)
57#define POWER_HIGH_LOCK BIT_ULL(63)
58#define POWER_LOW_LOCK BIT(31)
59
60#define POWER_LIMIT4_MASK GENMASK(12, 0)
61
62#define TIME_WINDOW1_MASK GENMASK_ULL(23, 17)
63#define TIME_WINDOW2_MASK GENMASK_ULL(55, 49)
64
65#define POWER_INFO_MAX_MASK GENMASK_ULL(46, 32)
66#define POWER_INFO_MIN_MASK GENMASK_ULL(30, 16)
67#define POWER_INFO_MAX_TIME_WIN_MASK GENMASK_ULL(53, 48)
68#define POWER_INFO_THERMAL_SPEC_MASK GENMASK(14, 0)
69
70#define PERF_STATUS_THROTTLE_TIME_MASK GENMASK(31, 0)
71#define PP_POLICY_MASK GENMASK(4, 0)
72
73/*
74 * SPR has different layout for Psys Domain PowerLimit registers.
75 * There are 17 bits of PL1 and PL2 instead of 15 bits.
76 * The Enable bits and TimeWindow bits are also shifted as a result.
77 */
78#define PSYS_POWER_LIMIT1_MASK GENMASK_ULL(16, 0)
79#define PSYS_POWER_LIMIT1_ENABLE BIT(17)
80
81#define PSYS_POWER_LIMIT2_MASK GENMASK_ULL(48, 32)
82#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
83
84#define PSYS_TIME_WINDOW1_MASK GENMASK_ULL(25, 19)
85#define PSYS_TIME_WINDOW2_MASK GENMASK_ULL(57, 51)
86
87/* Sideband MBI registers */
88#define IOSF_CPU_POWER_BUDGET_CTL_BYT 0x02
89#define IOSF_CPU_POWER_BUDGET_CTL_TNG 0xDF
90
91/* private data for RAPL MSR Interface */
92static struct rapl_if_priv *rapl_msr_priv;
93
94static bool rapl_msr_pmu __ro_after_init;
95
96static struct rapl_if_priv rapl_msr_priv_intel = {
97 .type = RAPL_IF_MSR,
98 .reg_unit.msr = MSR_RAPL_POWER_UNIT,
99 .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_LIMIT].msr = MSR_PKG_POWER_LIMIT,
100 .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_STATUS].msr = MSR_PKG_ENERGY_STATUS,
101 .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PERF].msr = MSR_PKG_PERF_STATUS,
102 .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_INFO].msr = MSR_PKG_POWER_INFO,
103 .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_LIMIT].msr = MSR_PP0_POWER_LIMIT,
104 .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_STATUS].msr = MSR_PP0_ENERGY_STATUS,
105 .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_POLICY].msr = MSR_PP0_POLICY,
106 .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_LIMIT].msr = MSR_PP1_POWER_LIMIT,
107 .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_STATUS].msr = MSR_PP1_ENERGY_STATUS,
108 .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_POLICY].msr = MSR_PP1_POLICY,
109 .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_LIMIT].msr = MSR_DRAM_POWER_LIMIT,
110 .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_STATUS].msr = MSR_DRAM_ENERGY_STATUS,
111 .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_PERF].msr = MSR_DRAM_PERF_STATUS,
112 .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_INFO].msr = MSR_DRAM_POWER_INFO,
113 .regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT].msr = MSR_PLATFORM_POWER_LIMIT,
114 .regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS].msr = MSR_PLATFORM_ENERGY_STATUS,
115 .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
116 .limits[RAPL_DOMAIN_PLATFORM] = BIT(POWER_LIMIT2),
117};
118
119static struct rapl_if_priv rapl_msr_priv_amd = {
120 .type = RAPL_IF_MSR,
121 .reg_unit.msr = MSR_AMD_RAPL_POWER_UNIT,
122 .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_STATUS].msr = MSR_AMD_PKG_ENERGY_STATUS,
123 .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_STATUS].msr = MSR_AMD_CORE_ENERGY_STATUS,
124};
125
126/* Handles CPU hotplug on multi-socket systems.
127 * If a CPU goes online as the first CPU of the physical package
128 * we add the RAPL package to the system. Similarly, when the last
129 * CPU of the package is removed, we remove the RAPL package and its
130 * associated domains. Cooling devices are handled accordingly at
131 * per-domain level.
132 */
133static int rapl_cpu_online(unsigned int cpu)
134{
135 struct rapl_package *rp;
136
137 rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true);
138 if (!rp) {
139 rp = rapl_add_package_cpuslocked(cpu, rapl_msr_priv, true);
140 if (IS_ERR(rp))
141 return PTR_ERR(rp);
142 if (rapl_msr_pmu)
143 rapl_package_add_pmu_locked(rp);
144 }
145 cpumask_set_cpu(cpu, &rp->cpumask);
146 return 0;
147}
148
149static int rapl_cpu_down_prep(unsigned int cpu)
150{
151 struct rapl_package *rp;
152 int lead_cpu;
153
154 rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true);
155 if (!rp)
156 return 0;
157
158 cpumask_clear_cpu(cpu, &rp->cpumask);
159 lead_cpu = cpumask_first(&rp->cpumask);
160 if (lead_cpu >= nr_cpu_ids) {
161 if (rapl_msr_pmu)
162 rapl_package_remove_pmu_locked(rp);
163 rapl_remove_package_cpuslocked(rp);
164 } else if (rp->lead_cpu == cpu) {
165 rp->lead_cpu = lead_cpu;
166 }
167
168 return 0;
169}
170
171static int rapl_msr_read_raw(int cpu, struct reg_action *ra, bool pmu_ctx)
172{
173 /*
174 * When called from PMU context, perform MSR read directly using
175 * rdmsrq() without IPI overhead. Package-scoped MSRs are readable
176 * from any CPU in the package.
177 */
178 if (pmu_ctx) {
179 rdmsrq(ra->reg.msr, ra->value);
180 goto out;
181 }
182
183 if (rdmsrq_safe_on_cpu(cpu, ra->reg.msr, &ra->value)) {
184 pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg.msr, cpu);
185 return -EIO;
186 }
187
188out:
189 ra->value &= ra->mask;
190 return 0;
191}
192
193static void rapl_msr_update_func(void *info)
194{
195 struct reg_action *ra = info;
196 u64 val;
197
198 ra->err = rdmsrq_safe(ra->reg.msr, &val);
199 if (ra->err)
200 return;
201
202 val &= ~ra->mask;
203 val |= ra->value;
204
205 ra->err = wrmsrq_safe(ra->reg.msr, val);
206}
207
208static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
209{
210 int ret;
211
212 ret = smp_call_function_single(cpu, rapl_msr_update_func, ra, 1);
213 if (WARN_ON_ONCE(ret))
214 return ret;
215
216 return ra->err;
217}
218
219static int rapl_check_unit_atom(struct rapl_domain *rd)
220{
221 struct reg_action ra;
222 u32 value;
223
224 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
225 ra.mask = ~0;
226 if (rapl_msr_read_raw(rd->rp->lead_cpu, &ra, false)) {
227 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
228 ra.reg.val, rd->rp->name, rd->name);
229 return -ENODEV;
230 }
231
232 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
233 rd->energy_unit = ENERGY_UNIT_SCALE * (1ULL << value);
234
235 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
236 rd->power_unit = (1ULL << value) * MILLIWATT_PER_WATT;
237
238 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
239 rd->time_unit = USEC_PER_SEC >> value;
240
241 pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
242 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
243
244 return 0;
245}
246
247static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
248{
249 static u32 power_ctrl_orig_val;
250 const struct rapl_defaults *defaults = rd->rp->priv->defaults;
251 u32 mdata;
252
253 if (!defaults->floor_freq_reg_addr) {
254 pr_err("Invalid floor frequency config register\n");
255 return;
256 }
257
258 if (!power_ctrl_orig_val)
259 iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
260 defaults->floor_freq_reg_addr,
261 &power_ctrl_orig_val);
262 mdata = power_ctrl_orig_val;
263 if (enable) {
264 mdata &= ~GENMASK(14, 8);
265 mdata |= BIT(8);
266 }
267 iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
268 defaults->floor_freq_reg_addr, mdata);
269}
270
271static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
272 bool to_raw)
273{
274 if (to_raw)
275 return div64_u64(value, rd->time_unit);
276
277 /*
278 * Atom time unit encoding is straight forward val * time_unit,
279 * where time_unit is default to 1 sec. Never 0.
280 */
281 return value ? value * rd->time_unit : rd->time_unit;
282}
283
284/* RAPL primitives for MSR I/F */
285static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
286 /* name, mask, shift, msr index, unit divisor */
287 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
288 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
289 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
290 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
291 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
292 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
293 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
294 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
295 [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
296 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
297 [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
298 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
299 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
300 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
301 [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
302 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
303 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
304 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
305 [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
306 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
307 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
308 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
309 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
310 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
311 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER,
312 POWER_INFO_THERMAL_SPEC_MASK, 0,
313 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
314 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
315 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
316 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
317 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
318 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW,
319 POWER_INFO_MAX_TIME_WIN_MASK, 48,
320 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
321 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME,
322 PERF_STATUS_THROTTLE_TIME_MASK, 0,
323 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
324 [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
325 RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
326 [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
327 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
328 [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK,
329 32, RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
330 [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE,
331 17, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT,
332 0),
333 [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE,
334 49, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT,
335 0),
336 [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK,
337 19, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
338 [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK,
339 51, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
340};
341
342static const struct rapl_defaults rapl_defaults_core = {
343 .floor_freq_reg_addr = 0,
344 .check_unit = rapl_default_check_unit,
345 .set_floor_freq = rapl_default_set_floor_freq,
346 .compute_time_window = rapl_default_compute_time_window,
347};
348
349static const struct rapl_defaults rapl_defaults_hsw_server = {
350 .check_unit = rapl_default_check_unit,
351 .set_floor_freq = rapl_default_set_floor_freq,
352 .compute_time_window = rapl_default_compute_time_window,
353 .dram_domain_energy_unit = 15300,
354};
355
356static const struct rapl_defaults rapl_defaults_spr_server = {
357 .check_unit = rapl_default_check_unit,
358 .set_floor_freq = rapl_default_set_floor_freq,
359 .compute_time_window = rapl_default_compute_time_window,
360 .psys_domain_energy_unit = NANOJOULE_PER_JOULE,
361 .spr_psys_bits = true,
362};
363
364static const struct rapl_defaults rapl_defaults_byt = {
365 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
366 .check_unit = rapl_check_unit_atom,
367 .set_floor_freq = set_floor_freq_atom,
368 .compute_time_window = rapl_compute_time_window_atom,
369};
370
371static const struct rapl_defaults rapl_defaults_tng = {
372 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
373 .check_unit = rapl_check_unit_atom,
374 .set_floor_freq = set_floor_freq_atom,
375 .compute_time_window = rapl_compute_time_window_atom,
376};
377
378static const struct rapl_defaults rapl_defaults_ann = {
379 .floor_freq_reg_addr = 0,
380 .check_unit = rapl_check_unit_atom,
381 .set_floor_freq = NULL,
382 .compute_time_window = rapl_compute_time_window_atom,
383};
384
385static const struct rapl_defaults rapl_defaults_cht = {
386 .floor_freq_reg_addr = 0,
387 .check_unit = rapl_check_unit_atom,
388 .set_floor_freq = NULL,
389 .compute_time_window = rapl_compute_time_window_atom,
390};
391
392static const struct rapl_defaults rapl_defaults_amd = {
393 .check_unit = rapl_default_check_unit,
394};
395
396static const struct rapl_defaults rapl_defaults_core_pl4 = {
397 .floor_freq_reg_addr = 0,
398 .check_unit = rapl_default_check_unit,
399 .set_floor_freq = rapl_default_set_floor_freq,
400 .compute_time_window = rapl_default_compute_time_window,
401 .msr_pl4_support = 1,
402};
403
404static const struct rapl_defaults rapl_defaults_core_pl4_pmu = {
405 .floor_freq_reg_addr = 0,
406 .check_unit = rapl_default_check_unit,
407 .set_floor_freq = rapl_default_set_floor_freq,
408 .compute_time_window = rapl_default_compute_time_window,
409 .msr_pl4_support = 1,
410 .msr_pmu_support = 1,
411};
412
413static const struct x86_cpu_id rapl_ids[] = {
414 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &rapl_defaults_core),
415 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &rapl_defaults_core),
416
417 X86_MATCH_VFM(INTEL_IVYBRIDGE, &rapl_defaults_core),
418 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &rapl_defaults_core),
419
420 X86_MATCH_VFM(INTEL_HASWELL, &rapl_defaults_core),
421 X86_MATCH_VFM(INTEL_HASWELL_L, &rapl_defaults_core),
422 X86_MATCH_VFM(INTEL_HASWELL_G, &rapl_defaults_core),
423 X86_MATCH_VFM(INTEL_HASWELL_X, &rapl_defaults_hsw_server),
424
425 X86_MATCH_VFM(INTEL_BROADWELL, &rapl_defaults_core),
426 X86_MATCH_VFM(INTEL_BROADWELL_G, &rapl_defaults_core),
427 X86_MATCH_VFM(INTEL_BROADWELL_D, &rapl_defaults_core),
428 X86_MATCH_VFM(INTEL_BROADWELL_X, &rapl_defaults_hsw_server),
429
430 X86_MATCH_VFM(INTEL_SKYLAKE, &rapl_defaults_core),
431 X86_MATCH_VFM(INTEL_SKYLAKE_L, &rapl_defaults_core),
432 X86_MATCH_VFM(INTEL_SKYLAKE_X, &rapl_defaults_hsw_server),
433 X86_MATCH_VFM(INTEL_KABYLAKE_L, &rapl_defaults_core),
434 X86_MATCH_VFM(INTEL_KABYLAKE, &rapl_defaults_core),
435 X86_MATCH_VFM(INTEL_CANNONLAKE_L, &rapl_defaults_core),
436 X86_MATCH_VFM(INTEL_ICELAKE_L, &rapl_defaults_core_pl4),
437 X86_MATCH_VFM(INTEL_ICELAKE, &rapl_defaults_core),
438 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &rapl_defaults_core),
439 X86_MATCH_VFM(INTEL_ICELAKE_X, &rapl_defaults_hsw_server),
440 X86_MATCH_VFM(INTEL_ICELAKE_D, &rapl_defaults_hsw_server),
441 X86_MATCH_VFM(INTEL_COMETLAKE_L, &rapl_defaults_core),
442 X86_MATCH_VFM(INTEL_COMETLAKE, &rapl_defaults_core),
443 X86_MATCH_VFM(INTEL_TIGERLAKE_L, &rapl_defaults_core_pl4),
444 X86_MATCH_VFM(INTEL_TIGERLAKE, &rapl_defaults_core),
445 X86_MATCH_VFM(INTEL_ROCKETLAKE, &rapl_defaults_core),
446 X86_MATCH_VFM(INTEL_ALDERLAKE, &rapl_defaults_core_pl4),
447 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &rapl_defaults_core_pl4),
448 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &rapl_defaults_core_pl4),
449 X86_MATCH_VFM(INTEL_RAPTORLAKE, &rapl_defaults_core_pl4),
450 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &rapl_defaults_core_pl4),
451 X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &rapl_defaults_core),
452 X86_MATCH_VFM(INTEL_BARTLETTLAKE, &rapl_defaults_core),
453 X86_MATCH_VFM(INTEL_METEORLAKE, &rapl_defaults_core_pl4),
454 X86_MATCH_VFM(INTEL_METEORLAKE_L, &rapl_defaults_core_pl4),
455 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
456 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &rapl_defaults_spr_server),
457 X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core),
458 X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &rapl_defaults_core_pl4_pmu),
459 X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &rapl_defaults_core_pl4_pmu),
460 X86_MATCH_VFM(INTEL_NOVALAKE, &rapl_defaults_core_pl4),
461 X86_MATCH_VFM(INTEL_NOVALAKE_L, &rapl_defaults_core_pl4),
462 X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core_pl4),
463 X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core),
464 X86_MATCH_VFM(INTEL_ARROWLAKE_U, &rapl_defaults_core_pl4),
465 X86_MATCH_VFM(INTEL_LAKEFIELD, &rapl_defaults_core),
466
467 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt),
468 X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &rapl_defaults_cht),
469 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &rapl_defaults_tng),
470 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2, &rapl_defaults_ann),
471 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &rapl_defaults_core),
472 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
473 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &rapl_defaults_core),
474 X86_MATCH_VFM(INTEL_ATOM_TREMONT, &rapl_defaults_core),
475 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &rapl_defaults_core),
476 X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &rapl_defaults_core),
477
478 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &rapl_defaults_hsw_server),
479 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &rapl_defaults_hsw_server),
480
481 X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
482 X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
483 X86_MATCH_VENDOR_FAM(AMD, 0x1A, &rapl_defaults_amd),
484 X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
485 {}
486};
487MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
488
489static int rapl_msr_probe(struct platform_device *pdev)
490{
491 int ret;
492
493 switch (boot_cpu_data.x86_vendor) {
494 case X86_VENDOR_INTEL:
495 rapl_msr_priv = &rapl_msr_priv_intel;
496 break;
497 case X86_VENDOR_HYGON:
498 case X86_VENDOR_AMD:
499 rapl_msr_priv = &rapl_msr_priv_amd;
500 break;
501 default:
502 pr_err("intel-rapl does not support CPU vendor %d\n", boot_cpu_data.x86_vendor);
503 return -ENODEV;
504 }
505 rapl_msr_priv->read_raw = rapl_msr_read_raw;
506 rapl_msr_priv->write_raw = rapl_msr_write_raw;
507 rapl_msr_priv->defaults = (const struct rapl_defaults *)pdev->dev.platform_data;
508 rapl_msr_priv->rpi = rpi_msr;
509
510 if (rapl_msr_priv->defaults->msr_pl4_support) {
511 rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4);
512 rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4].msr =
513 MSR_VR_CURRENT_CONFIG;
514 pr_info("PL4 support detected (updated).\n");
515 }
516
517 if (rapl_msr_priv->defaults->msr_pmu_support) {
518 rapl_msr_pmu = true;
519 pr_info("MSR-based RAPL PMU support enabled (updated)\n");
520 }
521
522 rapl_msr_priv->control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
523 if (IS_ERR(rapl_msr_priv->control_type)) {
524 pr_debug("failed to register powercap control_type.\n");
525 return PTR_ERR(rapl_msr_priv->control_type);
526 }
527
528 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online",
529 rapl_cpu_online, rapl_cpu_down_prep);
530 if (ret < 0)
531 goto out;
532 rapl_msr_priv->pcap_rapl_online = ret;
533
534 return 0;
535
536out:
537 if (ret)
538 powercap_unregister_control_type(rapl_msr_priv->control_type);
539 return ret;
540}
541
542static void rapl_msr_remove(struct platform_device *pdev)
543{
544 cpuhp_remove_state(rapl_msr_priv->pcap_rapl_online);
545 powercap_unregister_control_type(rapl_msr_priv->control_type);
546}
547
548static const struct platform_device_id rapl_msr_ids[] = {
549 { .name = "intel_rapl_msr", },
550 {}
551};
552MODULE_DEVICE_TABLE(platform, rapl_msr_ids);
553
554static struct platform_driver intel_rapl_msr_driver = {
555 .probe = rapl_msr_probe,
556 .remove = rapl_msr_remove,
557 .id_table = rapl_msr_ids,
558 .driver = {
559 .name = "intel_rapl_msr",
560 },
561};
562
563static struct platform_device *rapl_msr_platdev;
564
565static int intel_rapl_msr_init(void)
566{
567 const struct rapl_defaults *def;
568 const struct x86_cpu_id *id;
569 int ret;
570
571 ret = platform_driver_register(&intel_rapl_msr_driver);
572 if (ret)
573 return ret;
574
575 /* Create the MSR RAPL platform device for supported platforms */
576 id = x86_match_cpu(rapl_ids);
577 if (!id)
578 return 0;
579
580 def = (const struct rapl_defaults *)id->driver_data;
581
582 rapl_msr_platdev = platform_device_register_data(NULL, "intel_rapl_msr", 0, def,
583 sizeof(*def));
584 if (IS_ERR(rapl_msr_platdev))
585 pr_debug("intel_rapl_msr device register failed, ret:%ld\n",
586 PTR_ERR(rapl_msr_platdev));
587
588 return 0;
589}
590module_init(intel_rapl_msr_init);
591
592static void intel_rapl_msr_exit(void)
593{
594 platform_device_unregister(rapl_msr_platdev);
595 platform_driver_unregister(&intel_rapl_msr_driver);
596}
597module_exit(intel_rapl_msr_exit);
598
599MODULE_DESCRIPTION("Driver for Intel RAPL (Running Average Power Limit) control via MSR interface");
600MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
601MODULE_LICENSE("GPL v2");
602MODULE_IMPORT_NS("INTEL_RAPL");