drm/msm/gpu: Bypass PM QoS constraint for idle clamp

+65 -82

2 changed files

expand all

drivers

gpu

drm

msm

msm_gpu.h

msm_gpu_devfreq.c

+7 -5

drivers/gpu/drm/msm/msm_gpu.h

··· 109 109 struct mutex lock; 110 110 111 111 /** 112 - * idle_constraint: 112 + * idle_freq: 113 113 * 114 - * A PM QoS constraint to limit max freq while the GPU is idle. 114 + * Shadow frequency used while the GPU is idle. From the PoV of 115 + * the devfreq governor, we are continuing to sample busyness and 116 + * adjust frequency while the GPU is idle, but we use this shadow 117 + * value as the GPU is actually clamped to minimum frequency while 118 + * it is inactive. 115 119 */ 116 - struct dev_pm_qos_request idle_freq; 120 + unsigned long idle_freq; 117 121 118 122 /** 119 123 * boost_constraint: ··· 138 134 139 135 /** idle_time: Time of last transition to idle: */ 140 136 ktime_t idle_time; 141 - 142 - struct devfreq_dev_status average_status; 143 137 144 138 /** 145 139 * idle_work:

+58 -77

drivers/gpu/drm/msm/msm_gpu_devfreq.c

··· 33 33 34 34 trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); 35 35 36 + /* 37 + * If the GPU is idle, devfreq is not aware, so just stash 38 + * the new target freq (to use when we return to active) 39 + */ 40 + if (df->idle_freq) { 41 + df->idle_freq = *freq; 42 + dev_pm_opp_put(opp); 43 + return 0; 44 + } 45 + 36 46 if (gpu->funcs->gpu_set_freq) { 37 47 mutex_lock(&df->lock); 38 48 gpu->funcs->gpu_set_freq(gpu, opp, df->suspended); ··· 58 48 59 49 static unsigned long get_freq(struct msm_gpu *gpu) 60 50 { 51 + struct msm_gpu_devfreq *df = &gpu->devfreq; 52 + 53 + /* 54 + * If the GPU is idle, use the shadow/saved freq to avoid 55 + * confusing devfreq (which is unaware that we are switching 56 + * to lowest freq until the device is active again) 57 + */ 58 + if (df->idle_freq) 59 + return df->idle_freq; 60 + 61 61 if (gpu->funcs->gpu_get_freq) 62 62 return gpu->funcs->gpu_get_freq(gpu); 63 63 64 64 return clk_get_rate(gpu->core_clk); 65 65 } 66 66 67 - static void get_raw_dev_status(struct msm_gpu *gpu, 67 + static int msm_devfreq_get_dev_status(struct device *dev, 68 68 struct devfreq_dev_status *status) 69 69 { 70 + struct msm_gpu *gpu = dev_to_gpu(dev); 70 71 struct msm_gpu_devfreq *df = &gpu->devfreq; 71 72 u64 busy_cycles, busy_time; 72 73 unsigned long sample_rate; ··· 93 72 if (df->suspended) { 94 73 mutex_unlock(&df->lock); 95 74 status->busy_time = 0; 96 - return; 75 + return 0; 97 76 } 98 77 99 78 busy_cycles = gpu->funcs->gpu_busy(gpu, &sample_rate); ··· 108 87 busy_time = ~0LU; 109 88 110 89 status->busy_time = busy_time; 111 - } 112 - 113 - static void update_average_dev_status(struct msm_gpu *gpu, 114 - const struct devfreq_dev_status *raw) 115 - { 116 - struct msm_gpu_devfreq *df = &gpu->devfreq; 117 - const u32 polling_ms = df->devfreq->profile->polling_ms; 118 - const u32 max_history_ms = polling_ms * 11 / 10; 119 - struct devfreq_dev_status *avg = &df->average_status; 120 - u64 avg_freq; 121 - 122 - /* simple_ondemand governor interacts poorly with gpu->clamp_to_idle. 123 - * When we enforce the constraint on idle, it calls get_dev_status 124 - * which would normally reset the stats. When we remove the 125 - * constraint on active, it calls get_dev_status again where busy_time 126 - * would be 0. 127 - * 128 - * To remedy this, we always return the average load over the past 129 - * polling_ms. 130 - */ 131 - 132 - /* raw is longer than polling_ms or avg has no history */ 133 - if (div_u64(raw->total_time, USEC_PER_MSEC) >= polling_ms || 134 - !avg->total_time) { 135 - *avg = *raw; 136 - return; 137 - } 138 - 139 - /* Truncate the oldest history first. 140 - * 141 - * Because we keep the history with a single devfreq_dev_status, 142 - * rather than a list of devfreq_dev_status, we have to assume freq 143 - * and load are the same over avg->total_time. We can scale down 144 - * avg->busy_time and avg->total_time by the same factor to drop 145 - * history. 146 - */ 147 - if (div_u64(avg->total_time + raw->total_time, USEC_PER_MSEC) >= 148 - max_history_ms) { 149 - const u32 new_total_time = polling_ms * USEC_PER_MSEC - 150 - raw->total_time; 151 - avg->busy_time = div_u64( 152 - mul_u32_u32(avg->busy_time, new_total_time), 153 - avg->total_time); 154 - avg->total_time = new_total_time; 155 - } 156 - 157 - /* compute the average freq over avg->total_time + raw->total_time */ 158 - avg_freq = mul_u32_u32(avg->current_frequency, avg->total_time); 159 - avg_freq += mul_u32_u32(raw->current_frequency, raw->total_time); 160 - do_div(avg_freq, avg->total_time + raw->total_time); 161 - 162 - avg->current_frequency = avg_freq; 163 - avg->busy_time += raw->busy_time; 164 - avg->total_time += raw->total_time; 165 - } 166 - 167 - static int msm_devfreq_get_dev_status(struct device *dev, 168 - struct devfreq_dev_status *status) 169 - { 170 - struct msm_gpu *gpu = dev_to_gpu(dev); 171 - struct devfreq_dev_status raw; 172 - 173 - get_raw_dev_status(gpu, &raw); 174 - update_average_dev_status(gpu, &raw); 175 - *status = gpu->devfreq.average_status; 176 90 177 91 return 0; 178 92 } ··· 147 191 148 192 mutex_init(&df->lock); 149 193 150 - dev_pm_qos_add_request(&gpu->pdev->dev, &df->idle_freq, 151 - DEV_PM_QOS_MAX_FREQUENCY, 152 - PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); 153 194 dev_pm_qos_add_request(&gpu->pdev->dev, &df->boost_freq, 154 195 DEV_PM_QOS_MIN_FREQUENCY, 0); 155 196 ··· 167 214 168 215 if (IS_ERR(df->devfreq)) { 169 216 DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); 170 - dev_pm_qos_remove_request(&df->idle_freq); 171 217 dev_pm_qos_remove_request(&df->boost_freq); 172 218 df->devfreq = NULL; 173 219 return; ··· 208 256 209 257 devfreq_cooling_unregister(gpu->cooling); 210 258 dev_pm_qos_remove_request(&df->boost_freq); 211 - dev_pm_qos_remove_request(&df->idle_freq); 212 259 } 213 260 214 261 void msm_devfreq_resume(struct msm_gpu *gpu) ··· 280 329 { 281 330 struct msm_gpu_devfreq *df = &gpu->devfreq; 282 331 unsigned int idle_time; 332 + unsigned long target_freq; 283 333 284 334 if (!has_devfreq(gpu)) 285 335 return; ··· 290 338 */ 291 339 cancel_idle_work(df); 292 340 341 + /* 342 + * Hold devfreq lock to synchronize with get_dev_status()/ 343 + * target() callbacks 344 + */ 345 + mutex_lock(&df->devfreq->lock); 346 + 347 + target_freq = df->idle_freq; 348 + 293 349 idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time)); 350 + 351 + df->idle_freq = 0; 352 + 353 + /* 354 + * We could have become active again before the idle work had a 355 + * chance to run, in which case the df->idle_freq would have 356 + * still been zero. In this case, no need to change freq. 357 + */ 358 + if (target_freq) 359 + msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); 360 + 361 + mutex_unlock(&df->devfreq->lock); 294 362 295 363 /* 296 364 * If we've been idle for a significant fraction of a polling ··· 320 348 if (idle_time > msm_devfreq_profile.polling_ms) { 321 349 msm_devfreq_boost(gpu, 2); 322 350 } 323 - 324 - dev_pm_qos_update_request(&df->idle_freq, 325 - PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); 326 351 } 327 352 328 353 ··· 329 360 struct msm_gpu_devfreq, idle_work.work); 330 361 struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq); 331 362 struct msm_drm_private *priv = gpu->dev->dev_private; 363 + unsigned long idle_freq, target_freq = 0; 332 364 333 - df->idle_time = ktime_get(); 365 + /* 366 + * Hold devfreq lock to synchronize with get_dev_status()/ 367 + * target() callbacks 368 + */ 369 + mutex_lock(&df->devfreq->lock); 370 + 371 + idle_freq = get_freq(gpu); 334 372 335 373 if (priv->gpu_clamp_to_idle) 336 - dev_pm_qos_update_request(&df->idle_freq, 0); 374 + msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); 375 + 376 + df->idle_time = ktime_get(); 377 + df->idle_freq = idle_freq; 378 + 379 + mutex_unlock(&df->devfreq->lock); 337 380 } 338 381 339 382 void msm_devfreq_idle(struct msm_gpu *gpu)

Configure Feed

Configure Feed