Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'pm-5.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management fixes from Rafael Wysocki:
"These fix reference counting in the operating performance points (OPP)
framework and address a few intel_pstate driver issues, mostly related
to switching driver operation modes and similar with hardware-managed
P-states (HWP) enabled.

Specifics:

- Fix reference counting of operating performance points (OPP) tables
(Viresh Kumar).

- Address intel_pstate driver interface issues, mostly related to
switching operation modes and handling CPU offline and online and
system-wide suspend/resume with hardware-managed P-states (HWP)
enabled (Rafael Wysocki).

- Fix the maximum frequency computation in the intel_pstate driver
with turbo P-states disabled by the platform firmware and HWP
enabled (Francisco Jerez)"

* tag 'pm-5.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
cpufreq: intel_pstate: Fix intel_pstate_get_hwp_max() for turbo disabled
cpufreq: intel_pstate: Free memory only when turning off
cpufreq: intel_pstate: Add ->offline and ->online callbacks
cpufreq: intel_pstate: Tweak the EPP sysfs interface
cpufreq: intel_pstate: Update cached EPP in the active mode
cpufreq: intel_pstate: Refuse to turn off with HWP enabled
opp: Don't drop reference for an OPP table that was never parsed

+170 -98
+3 -1
Documentation/admin-guide/pm/intel_pstate.rst
··· 123 123 internal P-state selection logic is expected to focus entirely on performance. 124 124 125 125 This will override the EPP/EPB setting coming from the ``sysfs`` interface 126 - (see `Energy vs Performance Hints`_ below). 126 + (see `Energy vs Performance Hints`_ below). Moreover, any attempts to change 127 + the EPP/EPB to a value different from 0 ("performance") via ``sysfs`` in this 128 + configuration will be rejected. 127 129 128 130 Also, in this configuration the range of P-states available to the processor's 129 131 internal P-state selection logic is always restricted to the upper boundary
+150 -90
drivers/cpufreq/intel_pstate.c
··· 219 219 * @epp_policy: Last saved policy used to set EPP/EPB 220 220 * @epp_default: Power on default HWP energy performance 221 221 * preference/bias 222 - * @epp_saved: Saved EPP/EPB during system suspend or CPU offline 223 - * operation 224 222 * @epp_cached Cached HWP energy-performance preference value 225 223 * @hwp_req_cached: Cached value of the last HWP Request MSR 226 224 * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR 227 225 * @last_io_update: Last time when IO wake flag was set 228 226 * @sched_flags: Store scheduler flags for possible cross CPU update 229 227 * @hwp_boost_min: Last HWP boosted min performance 228 + * @suspended: Whether or not the driver has been suspended. 230 229 * 231 230 * This structure stores per CPU instance data for all CPUs. 232 231 */ ··· 257 258 s16 epp_powersave; 258 259 s16 epp_policy; 259 260 s16 epp_default; 260 - s16 epp_saved; 261 261 s16 epp_cached; 262 262 u64 hwp_req_cached; 263 263 u64 hwp_cap_cached; 264 264 u64 last_io_update; 265 265 unsigned int sched_flags; 266 266 u32 hwp_boost_min; 267 + bool suspended; 267 268 }; 268 269 269 270 static struct cpudata **all_cpu_data; ··· 643 644 644 645 static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp) 645 646 { 647 + int ret; 648 + 646 649 /* 647 650 * Use the cached HWP Request MSR value, because in the active mode the 648 651 * register itself may be updated by intel_pstate_hwp_boost_up() or ··· 660 659 * function, so it cannot run in parallel with the update below. 661 660 */ 662 661 WRITE_ONCE(cpu->hwp_req_cached, value); 663 - return wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); 662 + ret = wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); 663 + if (!ret) 664 + cpu->epp_cached = epp; 665 + 666 + return ret; 664 667 } 665 668 666 669 static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, ··· 682 677 epp = raw_epp; 683 678 else if (epp == -EINVAL) 684 679 epp = epp_values[pref_index - 1]; 680 + 681 + /* 682 + * To avoid confusion, refuse to set EPP to any values different 683 + * from 0 (performance) if the current policy is "performance", 684 + * because those values would be overridden. 685 + */ 686 + if (epp > 0 && cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) 687 + return -EBUSY; 685 688 686 689 ret = intel_pstate_set_epp(cpu_data, epp); 687 690 } else { ··· 775 762 cpufreq_stop_governor(policy); 776 763 ret = intel_pstate_set_epp(cpu, epp); 777 764 err = cpufreq_start_governor(policy); 778 - if (!ret) { 779 - cpu->epp_cached = epp; 765 + if (!ret) 780 766 ret = err; 781 - } 782 767 } 783 768 } 784 769 ··· 836 825 837 826 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); 838 827 WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap); 839 - if (global.no_turbo) 828 + if (global.no_turbo || global.turbo_disabled) 840 829 *current_max = HWP_GUARANTEED_PERF(cap); 841 830 else 842 831 *current_max = HWP_HIGHEST_PERF(cap); ··· 870 859 871 860 cpu_data->epp_policy = cpu_data->policy; 872 861 873 - if (cpu_data->epp_saved >= 0) { 874 - epp = cpu_data->epp_saved; 875 - cpu_data->epp_saved = -EINVAL; 876 - goto update_epp; 877 - } 878 - 879 862 if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { 880 863 epp = intel_pstate_get_epp(cpu_data, value); 881 864 cpu_data->epp_powersave = epp; ··· 896 891 897 892 epp = cpu_data->epp_powersave; 898 893 } 899 - update_epp: 900 894 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { 901 895 value &= ~GENMASK_ULL(31, 24); 902 896 value |= (u64)epp << 24; ··· 907 903 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 908 904 } 909 905 910 - static void intel_pstate_hwp_force_min_perf(int cpu) 906 + static void intel_pstate_hwp_offline(struct cpudata *cpu) 911 907 { 912 - u64 value; 908 + u64 value = READ_ONCE(cpu->hwp_req_cached); 913 909 int min_perf; 914 910 915 - value = all_cpu_data[cpu]->hwp_req_cached; 911 + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { 912 + /* 913 + * In case the EPP has been set to "performance" by the 914 + * active mode "performance" scaling algorithm, replace that 915 + * temporary value with the cached EPP one. 916 + */ 917 + value &= ~GENMASK_ULL(31, 24); 918 + value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached); 919 + WRITE_ONCE(cpu->hwp_req_cached, value); 920 + } 921 + 916 922 value &= ~GENMASK_ULL(31, 0); 917 - min_perf = HWP_LOWEST_PERF(all_cpu_data[cpu]->hwp_cap_cached); 923 + min_perf = HWP_LOWEST_PERF(cpu->hwp_cap_cached); 918 924 919 925 /* Set hwp_max = hwp_min */ 920 926 value |= HWP_MAX_PERF(min_perf); ··· 934 920 if (boot_cpu_has(X86_FEATURE_HWP_EPP)) 935 921 value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); 936 922 937 - wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 938 - } 939 - 940 - static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) 941 - { 942 - struct cpudata *cpu_data = all_cpu_data[policy->cpu]; 943 - 944 - if (!hwp_active) 945 - return 0; 946 - 947 - cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0); 948 - 949 - return 0; 923 + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); 950 924 } 951 925 952 926 #define POWER_CTL_EE_ENABLE 1 ··· 961 959 962 960 static void intel_pstate_hwp_enable(struct cpudata *cpudata); 963 961 962 + static void intel_pstate_hwp_reenable(struct cpudata *cpu) 963 + { 964 + intel_pstate_hwp_enable(cpu); 965 + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached)); 966 + } 967 + 968 + static int intel_pstate_suspend(struct cpufreq_policy *policy) 969 + { 970 + struct cpudata *cpu = all_cpu_data[policy->cpu]; 971 + 972 + pr_debug("CPU %d suspending\n", cpu->cpu); 973 + 974 + cpu->suspended = true; 975 + 976 + return 0; 977 + } 978 + 964 979 static int intel_pstate_resume(struct cpufreq_policy *policy) 965 980 { 981 + struct cpudata *cpu = all_cpu_data[policy->cpu]; 982 + 983 + pr_debug("CPU %d resuming\n", cpu->cpu); 966 984 967 985 /* Only restore if the system default is changed */ 968 986 if (power_ctl_ee_state == POWER_CTL_EE_ENABLE) ··· 990 968 else if (power_ctl_ee_state == POWER_CTL_EE_DISABLE) 991 969 set_power_ctl_ee_state(false); 992 970 993 - if (!hwp_active) 994 - return 0; 971 + if (cpu->suspended && hwp_active) { 972 + mutex_lock(&intel_pstate_limits_lock); 995 973 996 - mutex_lock(&intel_pstate_limits_lock); 974 + /* Re-enable HWP, because "online" has not done that. */ 975 + intel_pstate_hwp_reenable(cpu); 997 976 998 - if (policy->cpu == 0) 999 - intel_pstate_hwp_enable(all_cpu_data[policy->cpu]); 977 + mutex_unlock(&intel_pstate_limits_lock); 978 + } 1000 979 1001 - all_cpu_data[policy->cpu]->epp_policy = 0; 1002 - intel_pstate_hwp_set(policy->cpu); 1003 - 1004 - mutex_unlock(&intel_pstate_limits_lock); 980 + cpu->suspended = false; 1005 981 1006 982 return 0; 1007 983 } ··· 1448 1428 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); 1449 1429 1450 1430 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); 1451 - cpudata->epp_policy = 0; 1452 1431 if (cpudata->epp_default == -EINVAL) 1453 1432 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); 1454 1433 } ··· 2116 2097 2117 2098 all_cpu_data[cpunum] = cpu; 2118 2099 2100 + cpu->cpu = cpunum; 2101 + 2119 2102 cpu->epp_default = -EINVAL; 2120 - cpu->epp_powersave = -EINVAL; 2121 - cpu->epp_saved = -EINVAL; 2103 + 2104 + if (hwp_active) { 2105 + const struct x86_cpu_id *id; 2106 + 2107 + intel_pstate_hwp_enable(cpu); 2108 + 2109 + id = x86_match_cpu(intel_pstate_hwp_boost_ids); 2110 + if (id && intel_pstate_acpi_pm_profile_server()) 2111 + hwp_boost = true; 2112 + } 2113 + } else if (hwp_active) { 2114 + /* 2115 + * Re-enable HWP in case this happens after a resume from ACPI 2116 + * S3 if the CPU was offline during the whole system/resume 2117 + * cycle. 2118 + */ 2119 + intel_pstate_hwp_reenable(cpu); 2122 2120 } 2123 2121 2124 - cpu = all_cpu_data[cpunum]; 2125 - 2126 - cpu->cpu = cpunum; 2127 - 2128 - if (hwp_active) { 2129 - const struct x86_cpu_id *id; 2130 - 2131 - intel_pstate_hwp_enable(cpu); 2132 - 2133 - id = x86_match_cpu(intel_pstate_hwp_boost_ids); 2134 - if (id && intel_pstate_acpi_pm_profile_server()) 2135 - hwp_boost = true; 2136 - } 2122 + cpu->epp_powersave = -EINVAL; 2123 + cpu->epp_policy = 0; 2137 2124 2138 2125 intel_pstate_get_cpu_pstates(cpu); 2139 2126 ··· 2321 2296 return 0; 2322 2297 } 2323 2298 2324 - static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy) 2299 + static int intel_pstate_cpu_offline(struct cpufreq_policy *policy) 2325 2300 { 2301 + struct cpudata *cpu = all_cpu_data[policy->cpu]; 2302 + 2303 + pr_debug("CPU %d going offline\n", cpu->cpu); 2304 + 2305 + if (cpu->suspended) 2306 + return 0; 2307 + 2308 + /* 2309 + * If the CPU is an SMT thread and it goes offline with the performance 2310 + * settings different from the minimum, it will prevent its sibling 2311 + * from getting to lower performance levels, so force the minimum 2312 + * performance on CPU offline to prevent that from happening. 2313 + */ 2326 2314 if (hwp_active) 2327 - intel_pstate_hwp_force_min_perf(policy->cpu); 2315 + intel_pstate_hwp_offline(cpu); 2328 2316 else 2329 - intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]); 2317 + intel_pstate_set_min_pstate(cpu); 2318 + 2319 + intel_pstate_exit_perf_limits(policy); 2320 + 2321 + return 0; 2322 + } 2323 + 2324 + static int intel_pstate_cpu_online(struct cpufreq_policy *policy) 2325 + { 2326 + struct cpudata *cpu = all_cpu_data[policy->cpu]; 2327 + 2328 + pr_debug("CPU %d going online\n", cpu->cpu); 2329 + 2330 + intel_pstate_init_acpi_perf_limits(policy); 2331 + 2332 + if (hwp_active) { 2333 + /* 2334 + * Re-enable HWP and clear the "suspended" flag to let "resume" 2335 + * know that it need not do that. 2336 + */ 2337 + intel_pstate_hwp_reenable(cpu); 2338 + cpu->suspended = false; 2339 + } 2340 + 2341 + return 0; 2330 2342 } 2331 2343 2332 2344 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) 2333 2345 { 2334 - pr_debug("CPU %d exiting\n", policy->cpu); 2346 + pr_debug("CPU %d stopping\n", policy->cpu); 2335 2347 2336 2348 intel_pstate_clear_update_util_hook(policy->cpu); 2337 - if (hwp_active) 2338 - intel_pstate_hwp_save_state(policy); 2339 - 2340 - intel_cpufreq_stop_cpu(policy); 2341 2349 } 2342 2350 2343 2351 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) 2344 2352 { 2345 - intel_pstate_exit_perf_limits(policy); 2353 + pr_debug("CPU %d exiting\n", policy->cpu); 2346 2354 2347 2355 policy->fast_switch_possible = false; 2348 2356 ··· 2436 2378 */ 2437 2379 policy->policy = CPUFREQ_POLICY_POWERSAVE; 2438 2380 2381 + if (hwp_active) { 2382 + struct cpudata *cpu = all_cpu_data[policy->cpu]; 2383 + 2384 + cpu->epp_cached = intel_pstate_get_epp(cpu, 0); 2385 + } 2386 + 2439 2387 return 0; 2440 2388 } 2441 2389 ··· 2449 2385 .flags = CPUFREQ_CONST_LOOPS, 2450 2386 .verify = intel_pstate_verify_policy, 2451 2387 .setpolicy = intel_pstate_set_policy, 2452 - .suspend = intel_pstate_hwp_save_state, 2388 + .suspend = intel_pstate_suspend, 2453 2389 .resume = intel_pstate_resume, 2454 2390 .init = intel_pstate_cpu_init, 2455 2391 .exit = intel_pstate_cpu_exit, 2456 2392 .stop_cpu = intel_pstate_stop_cpu, 2393 + .offline = intel_pstate_cpu_offline, 2394 + .online = intel_pstate_cpu_online, 2457 2395 .update_limits = intel_pstate_update_limits, 2458 2396 .name = "intel_pstate", 2459 2397 }; ··· 2651 2585 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP; 2652 2586 rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); 2653 2587 WRITE_ONCE(cpu->hwp_req_cached, value); 2654 - cpu->epp_cached = (value & GENMASK_ULL(31, 24)) >> 24; 2588 + cpu->epp_cached = intel_pstate_get_epp(cpu, value); 2655 2589 } else { 2656 2590 turbo_max = cpu->pstate.turbo_pstate; 2657 2591 policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; ··· 2710 2644 .fast_switch = intel_cpufreq_fast_switch, 2711 2645 .init = intel_cpufreq_cpu_init, 2712 2646 .exit = intel_cpufreq_cpu_exit, 2713 - .stop_cpu = intel_cpufreq_stop_cpu, 2647 + .offline = intel_pstate_cpu_offline, 2648 + .online = intel_pstate_cpu_online, 2649 + .suspend = intel_pstate_suspend, 2650 + .resume = intel_pstate_resume, 2714 2651 .update_limits = intel_pstate_update_limits, 2715 2652 .name = "intel_cpufreq", 2716 2653 }; ··· 2735 2666 } 2736 2667 } 2737 2668 put_online_cpus(); 2738 - 2739 - if (intel_pstate_driver == &intel_pstate) 2740 - intel_pstate_sysfs_hide_hwp_dynamic_boost(); 2741 2669 2742 2670 intel_pstate_driver = NULL; 2743 2671 } ··· 2761 2695 return 0; 2762 2696 } 2763 2697 2764 - static int intel_pstate_unregister_driver(void) 2765 - { 2766 - cpufreq_unregister_driver(intel_pstate_driver); 2767 - intel_pstate_driver_cleanup(); 2768 - 2769 - return 0; 2770 - } 2771 - 2772 2698 static ssize_t intel_pstate_show_status(char *buf) 2773 2699 { 2774 2700 if (!intel_pstate_driver) ··· 2772 2714 2773 2715 static int intel_pstate_update_status(const char *buf, size_t size) 2774 2716 { 2775 - int ret; 2717 + if (size == 3 && !strncmp(buf, "off", size)) { 2718 + if (!intel_pstate_driver) 2719 + return -EINVAL; 2776 2720 2777 - if (size == 3 && !strncmp(buf, "off", size)) 2778 - return intel_pstate_driver ? 2779 - intel_pstate_unregister_driver() : -EINVAL; 2721 + if (hwp_active) 2722 + return -EBUSY; 2723 + 2724 + cpufreq_unregister_driver(intel_pstate_driver); 2725 + intel_pstate_driver_cleanup(); 2726 + } 2780 2727 2781 2728 if (size == 6 && !strncmp(buf, "active", size)) { 2782 2729 if (intel_pstate_driver) { 2783 2730 if (intel_pstate_driver == &intel_pstate) 2784 2731 return 0; 2785 2732 2786 - ret = intel_pstate_unregister_driver(); 2787 - if (ret) 2788 - return ret; 2733 + cpufreq_unregister_driver(intel_pstate_driver); 2789 2734 } 2790 2735 2791 2736 return intel_pstate_register_driver(&intel_pstate); ··· 2799 2738 if (intel_pstate_driver == &intel_cpufreq) 2800 2739 return 0; 2801 2740 2802 - ret = intel_pstate_unregister_driver(); 2803 - if (ret) 2804 - return ret; 2741 + cpufreq_unregister_driver(intel_pstate_driver); 2742 + intel_pstate_sysfs_hide_hwp_dynamic_boost(); 2805 2743 } 2806 2744 2807 2745 return intel_pstate_register_driver(&intel_cpufreq);
+16 -6
drivers/opp/core.c
··· 1296 1296 } 1297 1297 EXPORT_SYMBOL_GPL(dev_pm_opp_remove); 1298 1298 1299 - void _opp_remove_all_static(struct opp_table *opp_table) 1299 + bool _opp_remove_all_static(struct opp_table *opp_table) 1300 1300 { 1301 1301 struct dev_pm_opp *opp, *tmp; 1302 + bool ret = true; 1302 1303 1303 1304 mutex_lock(&opp_table->lock); 1304 1305 1305 - if (!opp_table->parsed_static_opps || --opp_table->parsed_static_opps) 1306 + if (!opp_table->parsed_static_opps) { 1307 + ret = false; 1308 + goto unlock; 1309 + } 1310 + 1311 + if (--opp_table->parsed_static_opps) 1306 1312 goto unlock; 1307 1313 1308 1314 list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) { ··· 1318 1312 1319 1313 unlock: 1320 1314 mutex_unlock(&opp_table->lock); 1315 + 1316 + return ret; 1321 1317 } 1322 1318 1323 1319 /** ··· 2422 2414 return; 2423 2415 } 2424 2416 2425 - _opp_remove_all_static(opp_table); 2417 + /* 2418 + * Drop the extra reference only if the OPP table was successfully added 2419 + * with dev_pm_opp_of_add_table() earlier. 2420 + **/ 2421 + if (_opp_remove_all_static(opp_table)) 2422 + dev_pm_opp_put_opp_table(opp_table); 2426 2423 2427 2424 /* Drop reference taken by _find_opp_table() */ 2428 - dev_pm_opp_put_opp_table(opp_table); 2429 - 2430 - /* Drop reference taken while the OPP table was added */ 2431 2425 dev_pm_opp_put_opp_table(opp_table); 2432 2426 } 2433 2427
+1 -1
drivers/opp/opp.h
··· 212 212 213 213 /* Routines internal to opp core */ 214 214 void dev_pm_opp_get(struct dev_pm_opp *opp); 215 - void _opp_remove_all_static(struct opp_table *opp_table); 215 + bool _opp_remove_all_static(struct opp_table *opp_table); 216 216 void _get_opp_table_kref(struct opp_table *opp_table); 217 217 int _get_opp_count(struct opp_table *opp_table); 218 218 struct opp_table *_find_opp_table(struct device *dev);