Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'thermal-6.11-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull thermal control fix from Rafael Wysocki:
"Prevent the thermal core from flooding the kernel log with useless
messages if thermal zone temperature can never be determined (or its
sensor has failed permanently) and make it finally give up and disable
defective thermal zones (Rafael Wysocki)"

* tag 'thermal-6.11-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
thermal: core: Back off when polling thermal zones on errors
thermal: trip: Split thermal_zone_device_set_mode()

+85 -14
+78 -11
drivers/thermal/thermal_core.c
··· 272 272 return ret; 273 273 } 274 274 275 + static int __thermal_zone_device_set_mode(struct thermal_zone_device *tz, 276 + enum thermal_device_mode mode) 277 + { 278 + if (tz->ops.change_mode) { 279 + int ret; 280 + 281 + ret = tz->ops.change_mode(tz, mode); 282 + if (ret) 283 + return ret; 284 + } 285 + 286 + tz->mode = mode; 287 + 288 + return 0; 289 + } 290 + 291 + static void thermal_zone_broken_disable(struct thermal_zone_device *tz) 292 + { 293 + struct thermal_trip_desc *td; 294 + 295 + dev_err(&tz->device, "Unable to get temperature, disabling!\n"); 296 + /* 297 + * This function only runs for enabled thermal zones, so no need to 298 + * check for the current mode. 299 + */ 300 + __thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED); 301 + thermal_notify_tz_disable(tz); 302 + 303 + for_each_trip_desc(tz, td) { 304 + if (td->trip.type == THERMAL_TRIP_CRITICAL && 305 + td->trip.temperature > THERMAL_TEMP_INVALID) { 306 + dev_crit(&tz->device, 307 + "Disabled thermal zone with critical trip point\n"); 308 + return; 309 + } 310 + } 311 + } 312 + 275 313 /* 276 314 * Zone update section: main control loop applied to each zone while monitoring 277 315 * in polling mode. The monitoring is done using a workqueue. ··· 328 290 &tz->poll_queue, delay); 329 291 else 330 292 cancel_delayed_work(&tz->poll_queue); 293 + } 294 + 295 + static void thermal_zone_recheck(struct thermal_zone_device *tz, int error) 296 + { 297 + if (error == -EAGAIN) { 298 + thermal_zone_device_set_polling(tz, THERMAL_RECHECK_DELAY); 299 + return; 300 + } 301 + 302 + /* 303 + * Print the message once to reduce log noise. It will be followed by 304 + * another one if the temperature cannot be determined after multiple 305 + * attempts. 306 + */ 307 + if (tz->recheck_delay_jiffies == THERMAL_RECHECK_DELAY) 308 + dev_info(&tz->device, "Temperature check failed (%d)\n", error); 309 + 310 + thermal_zone_device_set_polling(tz, tz->recheck_delay_jiffies); 311 + 312 + tz->recheck_delay_jiffies += max(tz->recheck_delay_jiffies >> 1, 1ULL); 313 + if (tz->recheck_delay_jiffies > THERMAL_MAX_RECHECK_DELAY) { 314 + thermal_zone_broken_disable(tz); 315 + /* 316 + * Restore the original recheck delay value to allow the thermal 317 + * zone to try to recover when it is reenabled by user space. 318 + */ 319 + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; 320 + } 331 321 } 332 322 333 323 static void monitor_thermal_zone(struct thermal_zone_device *tz) ··· 557 491 558 492 ret = __thermal_zone_get_temp(tz, &temp); 559 493 if (ret) { 560 - if (ret != -EAGAIN) 561 - dev_info(&tz->device, "Temperature check failed (%d)\n", ret); 562 - 563 - thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); 494 + thermal_zone_recheck(tz, ret); 564 495 return; 565 496 } else if (temp <= THERMAL_TEMP_INVALID) { 566 497 /* ··· 568 505 */ 569 506 goto monitor; 570 507 } 508 + 509 + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; 571 510 572 511 tz->last_temperature = tz->temperature; 573 512 tz->temperature = temp; ··· 605 540 static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, 606 541 enum thermal_device_mode mode) 607 542 { 608 - int ret = 0; 543 + int ret; 609 544 610 545 mutex_lock(&tz->lock); 611 546 ··· 613 548 if (mode == tz->mode) { 614 549 mutex_unlock(&tz->lock); 615 550 616 - return ret; 551 + return 0; 617 552 } 618 553 619 - if (tz->ops.change_mode) 620 - ret = tz->ops.change_mode(tz, mode); 554 + ret = __thermal_zone_device_set_mode(tz, mode); 555 + if (ret) { 556 + mutex_unlock(&tz->lock); 621 557 622 - if (!ret) 623 - tz->mode = mode; 558 + return ret; 559 + } 624 560 625 561 __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); 626 562 ··· 632 566 else 633 567 thermal_notify_tz_disable(tz); 634 568 635 - return ret; 569 + return 0; 636 570 } 637 571 638 572 int thermal_zone_device_enable(struct thermal_zone_device *tz) ··· 1511 1445 1512 1446 thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay); 1513 1447 thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay); 1448 + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; 1514 1449 1515 1450 /* sys I/F */ 1516 1451 /* Add nodes that are always present via .groups */
+7 -3
drivers/thermal/thermal_core.h
··· 67 67 * @polling_delay_jiffies: number of jiffies to wait between polls when 68 68 * checking whether trip points have been crossed (0 for 69 69 * interrupt driven systems) 70 + * @recheck_delay_jiffies: delay after a failed attempt to determine the zone 71 + * temperature before trying again 70 72 * @temperature: current temperature. This is only for core code, 71 73 * drivers should use thermal_zone_get_temp() to get the 72 74 * current temperature ··· 110 108 int num_trips; 111 109 unsigned long passive_delay_jiffies; 112 110 unsigned long polling_delay_jiffies; 111 + unsigned long recheck_delay_jiffies; 113 112 int temperature; 114 113 int last_temperature; 115 114 int emul_temperature; ··· 140 137 #define THERMAL_TEMP_INIT INT_MIN 141 138 142 139 /* 143 - * Default delay after a failing thermal zone temperature check before 144 - * attempting to check it again. 140 + * Default and maximum delay after a failed thermal zone temperature check 141 + * before attempting to check it again (in jiffies). 145 142 */ 146 - #define THERMAL_RECHECK_DELAY_MS 250 143 + #define THERMAL_RECHECK_DELAY msecs_to_jiffies(250) 144 + #define THERMAL_MAX_RECHECK_DELAY (120 * HZ) 147 145 148 146 /* Default Thermal Governor */ 149 147 #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)