Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branches 'pm-cpuidle', 'pm-opp' and 'pm-sleep'

Merge cpuidle updates, OPP (operating performance points) library
updates, and updates related to system suspend and hibernation for
7.1-rc1:

- Refine stopped tick handling in the menu cpuidle governor and
rearrange stopped tick handling in the teo cpuidle governor (Rafael
Wysocki)

- Add Panther Lake C-states table to the intel_idle driver (Artem
Bityutskiy)

- Clean up dead dependencies on CPU_IDLE in Kconfig (Julian Braha)

- Simplify cpuidle_register_device() with guard() (Huisong Li)

- Use performance level if available to distinguish between rates in
OPP debugfs (Manivannan Sadhasivam)

- Fix scoped_guard in dev_pm_opp_xlate_required_opp() (Viresh Kumar)

- Return -ENODATA if the snapshot image is not loaded (Alberto Garcia)

- Remove inclusion of crypto/hash.h from hibernate_64.c on x86 (Eric
Biggers)

* pm-cpuidle:
cpuidle: Simplify cpuidle_register_device() with guard()
cpuidle: clean up dead dependencies on CPU_IDLE in Kconfig
intel_idle: Add Panther Lake C-states table
cpuidle: governors: teo: Rearrange stopped tick handling
cpuidle: governors: menu: Refine stopped tick handling

* pm-opp:
OPP: Move break out of scoped_guard in dev_pm_opp_xlate_required_opp()
OPP: debugfs: Use performance level if available to distinguish between rates

* pm-sleep:
PM: hibernate: return -ENODATA if the snapshot image is not loaded
PM: hibernate: x86: Remove inclusion of crypto/hash.h

+114 -78
-2
arch/x86/power/hibernate_64.c
··· 14 14 #include <linux/kdebug.h> 15 15 #include <linux/pgtable.h> 16 16 17 - #include <crypto/hash.h> 18 - 19 17 #include <asm/e820/api.h> 20 18 #include <asm/init.h> 21 19 #include <asm/proto.h>
+1 -1
drivers/cpuidle/Kconfig
··· 81 81 before halting in the guest (more efficient than polling in the 82 82 host via halt_poll_ns for some scenarios). 83 83 84 - endif 84 + endif # CPU_IDLE 85 85 86 86 config ARCH_NEEDS_CPU_IDLE_COUPLED 87 87 def_bool n
+1 -1
drivers/cpuidle/Kconfig.mips
··· 4 4 # 5 5 config MIPS_CPS_CPUIDLE 6 6 bool "CPU Idle driver for MIPS CPS platforms" 7 - depends on CPU_IDLE && MIPS_CPS 7 + depends on MIPS_CPS 8 8 depends on SYS_SUPPORTS_MIPS_CPS 9 9 select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT || CPU_MIPSR6 10 10 select GENERIC_CLOCKEVENTS_BROADCAST if SMP
-2
drivers/cpuidle/Kconfig.powerpc
··· 4 4 # 5 5 config PSERIES_CPUIDLE 6 6 bool "Cpuidle driver for pSeries platforms" 7 - depends on CPU_IDLE 8 7 depends on PPC_PSERIES 9 8 default y 10 9 help ··· 12 13 13 14 config POWERNV_CPUIDLE 14 15 bool "Cpuidle driver for powernv platforms" 15 - depends on CPU_IDLE 16 16 depends on PPC_POWERNV 17 17 default y 18 18 help
+5 -7
drivers/cpuidle/cpuidle.c
··· 679 679 if (!dev) 680 680 return -EINVAL; 681 681 682 - mutex_lock(&cpuidle_lock); 682 + guard(mutex)(&cpuidle_lock); 683 683 684 684 if (dev->registered) 685 - goto out_unlock; 685 + return ret; 686 686 687 687 __cpuidle_device_init(dev); 688 688 689 689 ret = __cpuidle_register_device(dev); 690 690 if (ret) 691 - goto out_unlock; 691 + return ret; 692 692 693 693 ret = cpuidle_add_sysfs(dev); 694 694 if (ret) ··· 700 700 701 701 cpuidle_install_idle_handler(); 702 702 703 - out_unlock: 704 - mutex_unlock(&cpuidle_lock); 705 - 706 703 return ret; 707 704 708 705 out_sysfs: 709 706 cpuidle_remove_sysfs(dev); 710 707 out_unregister: 711 708 __cpuidle_unregister_device(dev); 712 - goto out_unlock; 709 + 710 + return ret; 713 711 } 714 712 715 713 EXPORT_SYMBOL_GPL(cpuidle_register_device);
+5
drivers/cpuidle/governors/gov.h
··· 10 10 * check the time till the closest expected timer event. 11 11 */ 12 12 #define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) 13 + /* 14 + * If the closest timer is in this range, the governor idle state selection need 15 + * not be adjusted after the scheduler tick has been stopped. 16 + */ 17 + #define SAFE_TIMER_RANGE_NS (2 * TICK_NSEC) 13 18 14 19 #endif /* __CPUIDLE_GOVERNOR_H */
+9 -6
drivers/cpuidle/governors/menu.c
··· 261 261 predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns); 262 262 /* 263 263 * If the tick is already stopped, the cost of possible short 264 - * idle duration misprediction is much higher, because the CPU 265 - * may be stuck in a shallow idle state for a long time as a 266 - * result of it. In that case, say we might mispredict and use 267 - * the known time till the closest timer event for the idle 268 - * state selection. 264 + * idle duration misprediction is higher because the CPU may get 265 + * stuck in a shallow idle state then. To avoid that, if 266 + * predicted_ns is small enough, say it might be mispredicted 267 + * and use the known time till the closest timer for idle state 268 + * selection unless that timer is going to trigger within 269 + * SAFE_TIMER_RANGE_NS in which case it can be regarded as a 270 + * sufficient safety net. 269 271 */ 270 - if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC) 272 + if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC && 273 + data->next_timer_ns > SAFE_TIMER_RANGE_NS) 271 274 predicted_ns = data->next_timer_ns; 272 275 } else { 273 276 /*
+34 -47
drivers/cpuidle/governors/teo.c
··· 407 407 * better choice. 408 408 */ 409 409 if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) { 410 - int min_idx = idx0; 411 - 412 - if (tick_nohz_tick_stopped()) { 413 - /* 414 - * Look for the shallowest idle state below the current 415 - * candidate one whose target residency is at least 416 - * equal to the tick period length. 417 - */ 418 - while (min_idx < idx && 419 - drv->states[min_idx].target_residency_ns < TICK_NSEC) 420 - min_idx++; 421 - 422 - /* 423 - * Avoid selecting a state with a lower index, but with 424 - * the same target residency as the current candidate 425 - * one. 426 - */ 427 - if (drv->states[min_idx].target_residency_ns == 428 - drv->states[idx].target_residency_ns) 429 - goto constraint; 430 - } 431 - 432 - /* 433 - * If the minimum state index is greater than or equal to the 434 - * index of the state with the maximum intercepts metric and 435 - * the corresponding state is enabled, there is no need to look 436 - * at the deeper states. 437 - */ 438 - if (min_idx >= intercept_max_idx && 439 - !dev->states_usage[min_idx].disable) { 440 - idx = min_idx; 441 - goto constraint; 442 - } 443 - 444 410 /* 445 411 * Look for the deepest enabled idle state, at most as deep as 446 412 * the one with the maximum intercepts metric, whose target 447 413 * residency had not been greater than the idle duration in over 448 414 * a half of the relevant cases in the past. 449 - * 450 - * Take the possible duration limitation present if the tick 451 - * has been stopped already into account. 452 415 */ 453 - for (i = idx - 1, intercept_sum = 0; i >= min_idx; i--) { 416 + for (i = idx - 1, intercept_sum = 0; i >= idx0; i--) { 454 417 intercept_sum += cpu_data->state_bins[i].intercepts; 455 418 456 419 if (dev->states_usage[i].disable) ··· 426 463 } 427 464 } 428 465 429 - constraint: 430 466 /* 431 467 * If there is a latency constraint, it may be necessary to select an 432 468 * idle state shallower than the current candidate one. ··· 434 472 idx = constraint_idx; 435 473 436 474 /* 437 - * If either the candidate state is state 0 or its target residency is 438 - * low enough, there is basically nothing more to do, but if the sleep 439 - * length is not updated, the subsequent wakeup will be counted as an 440 - * "intercept" which may be problematic in the cases when timer wakeups 441 - * are dominant. Namely, it may effectively prevent deeper idle states 442 - * from being selected at one point even if no imminent timers are 443 - * scheduled. 475 + * If the tick has not been stopped and either the candidate state is 476 + * state 0 or its target residency is low enough, there is basically 477 + * nothing more to do, but if the sleep length is not updated, the 478 + * subsequent wakeup will be counted as an "intercept". That may be 479 + * problematic in the cases when timer wakeups are dominant because it 480 + * may effectively prevent deeper idle states from being selected at one 481 + * point even if no imminent timers are scheduled. 444 482 * 445 483 * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one 446 484 * CPU are unlikely (user space has a default 50 us slack value for ··· 456 494 * shallow idle states regardless of the wakeup type, so the sleep 457 495 * length need not be known in that case. 458 496 */ 459 - if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && 497 + if (!tick_nohz_tick_stopped() && (!idx || 498 + drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && 460 499 (2 * cpu_data->short_idles >= cpu_data->total || 461 500 latency_req < LATENCY_THRESHOLD_NS)) 462 501 goto out_tick; 463 502 464 503 duration_ns = tick_nohz_get_sleep_length(&delta_tick); 465 504 cpu_data->sleep_length_ns = duration_ns; 505 + 506 + /* 507 + * If the tick has been stopped and the closest timer is too far away, 508 + * update the selection to prevent the CPU from getting stuck in a 509 + * shallow idle state for too long. 510 + */ 511 + if (tick_nohz_tick_stopped() && duration_ns > SAFE_TIMER_RANGE_NS && 512 + drv->states[idx].target_residency_ns < TICK_NSEC) { 513 + /* 514 + * Look for the deepest enabled idle state with exit latency 515 + * within the PM QoS limit and with target residency within 516 + * duration_ns. 517 + */ 518 + for (i = constraint_idx; i > idx; i--) { 519 + if (dev->states_usage[i].disable) 520 + continue; 521 + 522 + if (drv->states[i].target_residency_ns <= duration_ns) { 523 + idx = i; 524 + break; 525 + } 526 + } 527 + return idx; 528 + } 466 529 467 530 if (!idx) 468 531 goto out_tick;
+42
drivers/idle/intel_idle.c
··· 983 983 .enter = NULL } 984 984 }; 985 985 986 + static struct cpuidle_state ptl_cstates[] __initdata = { 987 + { 988 + .name = "C1", 989 + .desc = "MWAIT 0x00", 990 + .flags = MWAIT2flg(0x00), 991 + .exit_latency = 1, 992 + .target_residency = 1, 993 + .enter = &intel_idle, 994 + .enter_s2idle = intel_idle_s2idle, }, 995 + { 996 + .name = "C1E", 997 + .desc = "MWAIT 0x01", 998 + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 999 + .exit_latency = 10, 1000 + .target_residency = 10, 1001 + .enter = &intel_idle, 1002 + .enter_s2idle = intel_idle_s2idle, }, 1003 + { 1004 + .name = "C6S", 1005 + .desc = "MWAIT 0x21", 1006 + .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED, 1007 + .exit_latency = 300, 1008 + .target_residency = 300, 1009 + .enter = &intel_idle, 1010 + .enter_s2idle = intel_idle_s2idle, }, 1011 + { 1012 + .name = "C10", 1013 + .desc = "MWAIT 0x60", 1014 + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1015 + .exit_latency = 370, 1016 + .target_residency = 2500, 1017 + .enter = &intel_idle, 1018 + .enter_s2idle = intel_idle_s2idle, }, 1019 + { 1020 + .enter = NULL } 1021 + }; 1022 + 986 1023 static struct cpuidle_state gmt_cstates[] __initdata = { 987 1024 { 988 1025 .name = "C1", ··· 1598 1561 .state_table = mtl_l_cstates, 1599 1562 }; 1600 1563 1564 + static const struct idle_cpu idle_cpu_ptl __initconst = { 1565 + .state_table = ptl_cstates, 1566 + }; 1567 + 1601 1568 static const struct idle_cpu idle_cpu_gmt __initconst = { 1602 1569 .state_table = gmt_cstates, 1603 1570 }; ··· 1710 1669 X86_MATCH_VFM(INTEL_ALDERLAKE, &idle_cpu_adl), 1711 1670 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &idle_cpu_adl_l), 1712 1671 X86_MATCH_VFM(INTEL_METEORLAKE_L, &idle_cpu_mtl_l), 1672 + X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &idle_cpu_ptl), 1713 1673 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &idle_cpu_gmt), 1714 1674 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr), 1715 1675 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr),
+1 -1
drivers/opp/core.c
··· 2742 2742 break; 2743 2743 } 2744 2744 } 2745 - break; 2746 2745 } 2746 + break; 2747 2747 } 2748 2748 2749 2749 if (IS_ERR(dest_opp)) {
+11 -9
drivers/opp/debugfs.c
··· 130 130 { 131 131 struct dentry *pdentry = opp_table->dentry; 132 132 struct dentry *d; 133 - unsigned long id; 134 - char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */ 133 + char name[36]; /* "opp:"(4) + u64(20) + "-" (1) + u32(10) + NULL(1) */ 135 134 136 135 /* 137 136 * Get directory name for OPP. 138 137 * 139 - * - Normally rate is unique to each OPP, use it to get unique opp-name. 138 + * - Normally rate is unique to each OPP, use it to get unique opp-name, 139 + * together with performance level if available. 140 140 * - For some devices rate isn't available or there are multiple, use 141 141 * index instead for them. 142 142 */ 143 - if (likely(opp_table->clk_count == 1 && opp->rates[0])) 144 - id = opp->rates[0]; 145 - else 146 - id = _get_opp_count(opp_table); 147 - 148 - snprintf(name, sizeof(name), "opp:%lu", id); 143 + if (likely(opp_table->clk_count == 1 && opp->rates[0])) { 144 + if (opp->level == OPP_LEVEL_UNSET) 145 + snprintf(name, sizeof(name), "opp:%lu", opp->rates[0]); 146 + else 147 + snprintf(name, sizeof(name), "opp:%lu-%u", opp->rates[0], opp->level); 148 + } else { 149 + snprintf(name, sizeof(name), "opp:%u", _get_opp_count(opp_table)); 150 + } 149 151 150 152 /* Create per-opp directory */ 151 153 d = debugfs_create_dir(name, pdentry);
+5 -2
kernel/power/user.c
··· 322 322 error = snapshot_write_finalize(&data->handle); 323 323 if (error) 324 324 break; 325 - if (data->mode != O_WRONLY || !data->frozen || 326 - !snapshot_image_loaded(&data->handle)) { 325 + if (data->mode != O_WRONLY || !data->frozen) { 327 326 error = -EPERM; 327 + break; 328 + } 329 + if (!snapshot_image_loaded(&data->handle)) { 330 + error = -ENODATA; 328 331 break; 329 332 } 330 333 error = hibernation_restore(data->platform_support);