Merge tag 'drm-intel-next-fixes-2019-11-28' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'drm-intel-next-fixes-2019-11-28' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

- Important fix to uAPI alignment on query IOCTL
- Fixes for the power regression introduced by the previous security patches
- Avoid regressing super heavy benchmarks by increasing the default request pre-emption timeout from 100 ms to 640 ms to
- Resulting set of smaller fixes done while problem was inspected
- Display fixes for EHL voltage level programming and TGL DKL PHY vswing for HDMI

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191128141524.GA11992@jlahtine-desk.ger.corp.intel.com

Dave Airlie 6 years ago 3e25dbca 36a170b1

+354 -93

24 changed files

expand all collapse all

drivers

gpu

drm

i915

Kconfig.profile

display

intel_cdclk.c

intel_ddi.c

intel_context.c

intel_engine.h

intel_engine_cs.c

intel_engine_pm.c

intel_engine_pm.h

intel_engine_types.h

intel_gt_pm.c

intel_gt_pm.h

intel_gt_requests.c

intel_gt_requests.h

intel_lrc.c

intel_reset.c

intel_ring.c

intel_timeline.c

intel_timeline_types.h

selftest_engine_pm.c

i915_active.c

i915_pmu.c

i915_query.c

intel_wakeref.c

intel_wakeref.h

+1 -1

drivers/gpu/drm/i915/Kconfig.profile

reviewed

··· 25 25 26 26 config DRM_I915_PREEMPT_TIMEOUT 27 27 int "Preempt timeout (ms, jiffy granularity)" 28 28 - default 100 # milliseconds 28 28 + default 640 # milliseconds 29 29 help 30 30 How long to wait (in milliseconds) for a preemption event to occur 31 31 when submitting a new context via execlists. If the current context

+3 -1

drivers/gpu/drm/i915/display/intel_cdclk.c

reviewed

··· 1273 1273 1274 1274 static u8 ehl_calc_voltage_level(int cdclk) 1275 1275 { 1276 1276 - if (cdclk > 312000) 1276 1276 + if (cdclk > 326400) 1277 1277 + return 3; 1278 1278 + else if (cdclk > 312000) 1277 1279 return 2; 1278 1280 else if (cdclk > 180000) 1279 1281 return 1;

+24 -5

drivers/gpu/drm/i915/display/intel_ddi.c

reviewed

··· 593 593 u32 dkl_de_emphasis_control; 594 594 }; 595 595 596 596 - static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { 596 596 + static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { 597 597 /* VS pre-emp Non-trans mV Pre-emph dB */ 598 598 { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ 599 599 { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ ··· 605 605 { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ 606 606 { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ 607 607 { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ 608 608 + }; 609 609 + 610 610 + static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = { 611 611 + /* HDMI Preset VS Pre-emph */ 612 612 + { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */ 613 613 + { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */ 614 614 + { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */ 615 615 + { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */ 616 616 + { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */ 617 617 + { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */ 618 618 + { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */ 619 619 + { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */ 620 620 + { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */ 621 621 + { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */ 608 622 }; 609 623 610 624 static const struct ddi_buf_trans * ··· 912 898 icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 913 899 0, &n_entries); 914 900 else 915 915 - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); 901 901 + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); 916 902 default_entry = n_entries - 1; 917 903 } else if (INTEL_GEN(dev_priv) == 11) { 918 904 if (intel_phy_is_combo(dev_priv, phy)) ··· 2385 2371 icl_get_combo_buf_trans(dev_priv, encoder->type, 2386 2372 intel_dp->link_rate, &n_entries); 2387 2373 else 2388 2388 - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); 2374 2374 + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); 2389 2375 } else if (INTEL_GEN(dev_priv) == 11) { 2390 2376 if (intel_phy_is_combo(dev_priv, phy)) 2391 2377 icl_get_combo_buf_trans(dev_priv, encoder->type, ··· 2837 2823 const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; 2838 2824 u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; 2839 2825 2840 2840 - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); 2841 2841 - ddi_translations = tgl_dkl_phy_ddi_translations; 2826 2826 + if (encoder->type == INTEL_OUTPUT_HDMI) { 2827 2827 + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); 2828 2828 + ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; 2829 2829 + } else { 2830 2830 + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); 2831 2831 + ddi_translations = tgl_dkl_phy_dp_ddi_trans; 2832 2832 + } 2842 2833 2843 2834 if (level >= n_entries) 2844 2835 level = n_entries - 1;

+17 -4

drivers/gpu/drm/i915/gt/intel_context.c

reviewed

··· 310 310 GEM_BUG_ON(rq->hw_context == ce); 311 311 312 312 if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ 313 313 - err = mutex_lock_interruptible_nested(&tl->mutex, 314 314 - SINGLE_DEPTH_NESTING); 315 315 - if (err) 316 316 - return err; 313 313 + /* 314 314 + * Ideally, we just want to insert our foreign fence as 315 315 + * a barrier into the remove context, such that this operation 316 316 + * occurs after all current operations in that context, and 317 317 + * all future operations must occur after this. 318 318 + * 319 319 + * Currently, the timeline->last_request tracking is guarded 320 320 + * by its mutex and so we must obtain that to atomically 321 321 + * insert our barrier. However, since we already hold our 322 322 + * timeline->mutex, we must be careful against potential 323 323 + * inversion if we are the kernel_context as the remote context 324 324 + * will itself poke at the kernel_context when it needs to 325 325 + * unpin. Ergo, if already locked, we drop both locks and 326 326 + * try again (through the magic of userspace repeating EAGAIN). 327 327 + */ 328 328 + if (!mutex_trylock(&tl->mutex)) 329 329 + return -EAGAIN; 317 330 318 331 /* Queue this switch after current activity by this context. */ 319 332 err = i915_active_fence_set(&tl->last_request, rq);

+1 -3

drivers/gpu/drm/i915/gt/intel_engine.h

reviewed

··· 100 100 static inline struct i915_request * 101 101 execlists_active(const struct intel_engine_execlists *execlists) 102 102 { 103 103 - GEM_BUG_ON(execlists->active - execlists->inflight > 104 104 - execlists_num_ports(execlists)); 105 105 - return READ_ONCE(*execlists->active); 103 103 + return *READ_ONCE(execlists->active); 106 104 } 107 105 108 106 static inline void

+5 -3

drivers/gpu/drm/i915/gt/intel_engine_cs.c

reviewed

··· 28 28 29 29 #include "i915_drv.h" 30 30 31 31 - #include "gt/intel_gt.h" 32 32 - 31 31 + #include "intel_context.h" 33 32 #include "intel_engine.h" 34 33 #include "intel_engine_pm.h" 35 34 #include "intel_engine_pool.h" 36 35 #include "intel_engine_user.h" 37 37 - #include "intel_context.h" 36 36 + #include "intel_gt.h" 37 37 + #include "intel_gt_requests.h" 38 38 #include "intel_lrc.h" 39 39 #include "intel_reset.h" 40 40 #include "intel_ring.h" ··· 616 616 intel_engine_init_execlists(engine); 617 617 intel_engine_init_cmd_parser(engine); 618 618 intel_engine_init__pm(engine); 619 619 + intel_engine_init_retire(engine); 619 620 620 621 intel_engine_pool_init(&engine->pool); 621 622 ··· 839 838 840 839 cleanup_status_page(engine); 841 840 841 841 + intel_engine_fini_retire(engine); 842 842 intel_engine_pool_fini(&engine->pool); 843 843 intel_engine_fini_breadcrumbs(engine); 844 844 intel_engine_cleanup_cmd_parser(engine);

+58 -9

drivers/gpu/drm/i915/gt/intel_engine_pm.c

reviewed

··· 73 73 74 74 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ 75 75 76 76 + static void 77 77 + __queue_and_release_pm(struct i915_request *rq, 78 78 + struct intel_timeline *tl, 79 79 + struct intel_engine_cs *engine) 80 80 + { 81 81 + struct intel_gt_timelines *timelines = &engine->gt->timelines; 82 82 + 83 83 + GEM_TRACE("%s\n", engine->name); 84 84 + 85 85 + /* 86 86 + * We have to serialise all potential retirement paths with our 87 87 + * submission, as we don't want to underflow either the 88 88 + * engine->wakeref.counter or our timeline->active_count. 89 89 + * 90 90 + * Equally, we cannot allow a new submission to start until 91 91 + * after we finish queueing, nor could we allow that submitter 92 92 + * to retire us before we are ready! 93 93 + */ 94 94 + spin_lock(&timelines->lock); 95 95 + 96 96 + /* Let intel_gt_retire_requests() retire us (acquired under lock) */ 97 97 + if (!atomic_fetch_inc(&tl->active_count)) 98 98 + list_add_tail(&tl->link, &timelines->active_list); 99 99 + 100 100 + /* Hand the request over to HW and so engine_retire() */ 101 101 + __i915_request_queue(rq, NULL); 102 102 + 103 103 + /* Let new submissions commence (and maybe retire this timeline) */ 104 104 + __intel_wakeref_defer_park(&engine->wakeref); 105 105 + 106 106 + spin_unlock(&timelines->lock); 107 107 + } 108 108 + 76 109 static bool switch_to_kernel_context(struct intel_engine_cs *engine) 77 110 { 111 111 + struct intel_context *ce = engine->kernel_context; 78 112 struct i915_request *rq; 79 113 unsigned long flags; 80 114 bool result = true; ··· 132 98 * This should hold true as we can only park the engine after 133 99 * retiring the last request, thus all rings should be empty and 134 100 * all timelines idle. 101 101 + * 102 102 + * For unlocking, there are 2 other parties and the GPU who have a 103 103 + * stake here. 104 104 + * 105 105 + * A new gpu user will be waiting on the engine-pm to start their 106 106 + * engine_unpark. New waiters are predicated on engine->wakeref.count 107 107 + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the 108 108 + * engine->wakeref. 109 109 + * 110 110 + * The other party is intel_gt_retire_requests(), which is walking the 111 111 + * list of active timelines looking for completions. Meanwhile as soon 112 112 + * as we call __i915_request_queue(), the GPU may complete our request. 113 113 + * Ergo, if we put ourselves on the timelines.active_list 114 114 + * (se intel_timeline_enter()) before we increment the 115 115 + * engine->wakeref.count, we may see the request completion and retire 116 116 + * it causing an undeflow of the engine->wakeref. 135 117 */ 136 136 - flags = __timeline_mark_lock(engine->kernel_context); 118 118 + flags = __timeline_mark_lock(ce); 119 119 + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); 137 120 138 138 - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); 121 121 + rq = __i915_request_create(ce, GFP_NOWAIT); 139 122 if (IS_ERR(rq)) 140 123 /* Context switch failed, hope for the best! Maybe reset? */ 141 124 goto out_unlock; 142 142 - 143 143 - intel_timeline_enter(i915_request_timeline(rq)); 144 125 145 126 /* Check again on the next retirement. */ 146 127 engine->wakeref_serial = engine->serial + 1; ··· 165 116 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 166 117 __i915_request_commit(rq); 167 118 168 168 - /* Release our exclusive hold on the engine */ 169 169 - __intel_wakeref_defer_park(&engine->wakeref); 170 170 - __i915_request_queue(rq, NULL); 119 119 + /* Expose ourselves to the world */ 120 120 + __queue_and_release_pm(rq, ce->timeline, engine); 171 121 172 122 result = false; 173 123 out_unlock: 174 174 - __timeline_mark_unlock(engine->kernel_context, flags); 124 124 + __timeline_mark_unlock(ce, flags); 175 125 return result; 176 126 } 177 127 ··· 225 177 226 178 engine->execlists.no_priolist = false; 227 179 228 228 - intel_gt_pm_put(engine->gt); 180 180 + /* While gt calls i915_vma_parked(), we have to break the lock cycle */ 181 181 + intel_gt_pm_put_async(engine->gt); 229 182 return 0; 230 183 } 231 184

+10

drivers/gpu/drm/i915/gt/intel_engine_pm.h

reviewed

··· 31 31 intel_wakeref_put(&engine->wakeref); 32 32 } 33 33 34 34 + static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) 35 35 + { 36 36 + intel_wakeref_put_async(&engine->wakeref); 37 37 + } 38 38 + 39 39 + static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) 40 40 + { 41 41 + intel_wakeref_unlock_wait(&engine->wakeref); 42 42 + } 43 43 + 34 44 void intel_engine_init__pm(struct intel_engine_cs *engine); 35 45 36 46 #endif /* INTEL_ENGINE_PM_H */

drivers/gpu/drm/i915/gt/intel_engine_types.h

reviewed

··· 451 451 452 452 struct intel_engine_execlists execlists; 453 453 454 454 + /* 455 455 + * Keep track of completed timelines on this engine for early 456 456 + * retirement with the goal of quickly enabling powersaving as 457 457 + * soon as the engine is idle. 458 458 + */ 459 459 + struct intel_timeline *retire; 460 460 + struct work_struct retire_work; 461 461 + 454 462 /* status_notifier: list of callbacks for context-switch changes */ 455 463 struct atomic_notifier_head context_status_notifier; 456 464

+1 -2

drivers/gpu/drm/i915/gt/intel_gt_pm.c

reviewed

··· 105 105 static const struct intel_wakeref_ops wf_ops = { 106 106 .get = __gt_unpark, 107 107 .put = __gt_park, 108 108 - .flags = INTEL_WAKEREF_PUT_ASYNC, 109 108 }; 110 109 111 110 void intel_gt_pm_init_early(struct intel_gt *gt) ··· 271 272 272 273 static suspend_state_t pm_suspend_target(void) 273 274 { 274 274 - #if IS_ENABLED(CONFIG_PM_SLEEP) 275 275 + #if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP) 275 276 return pm_suspend_target_state; 276 277 #else 277 278 return PM_SUSPEND_TO_IDLE;

drivers/gpu/drm/i915/gt/intel_gt_pm.h

reviewed

··· 32 32 intel_wakeref_put(&gt->wakeref); 33 33 } 34 34 35 35 + static inline void intel_gt_pm_put_async(struct intel_gt *gt) 36 36 + { 37 37 + intel_wakeref_put_async(&gt->wakeref); 38 38 + } 39 39 + 35 40 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) 36 41 { 37 42 return intel_wakeref_wait_for_idle(&gt->wakeref);

+79 -4

drivers/gpu/drm/i915/gt/intel_gt_requests.c

reviewed

··· 4 4 * Copyright © 2019 Intel Corporation 5 5 */ 6 6 7 7 + #include <linux/workqueue.h> 8 8 + 7 9 #include "i915_drv.h" /* for_each_engine() */ 8 10 #include "i915_request.h" 9 11 #include "intel_gt.h" ··· 31 29 intel_engine_flush_submission(engine); 32 30 } 33 31 32 32 + static void engine_retire(struct work_struct *work) 33 33 + { 34 34 + struct intel_engine_cs *engine = 35 35 + container_of(work, typeof(*engine), retire_work); 36 36 + struct intel_timeline *tl = xchg(&engine->retire, NULL); 37 37 + 38 38 + do { 39 39 + struct intel_timeline *next = xchg(&tl->retire, NULL); 40 40 + 41 41 + /* 42 42 + * Our goal here is to retire _idle_ timelines as soon as 43 43 + * possible (as they are idle, we do not expect userspace 44 44 + * to be cleaning up anytime soon). 45 45 + * 46 46 + * If the timeline is currently locked, either it is being 47 47 + * retired elsewhere or about to be! 48 48 + */ 49 49 + if (mutex_trylock(&tl->mutex)) { 50 50 + retire_requests(tl); 51 51 + mutex_unlock(&tl->mutex); 52 52 + } 53 53 + intel_timeline_put(tl); 54 54 + 55 55 + GEM_BUG_ON(!next); 56 56 + tl = ptr_mask_bits(next, 1); 57 57 + } while (tl); 58 58 + } 59 59 + 60 60 + static bool add_retire(struct intel_engine_cs *engine, 61 61 + struct intel_timeline *tl) 62 62 + { 63 63 + struct intel_timeline *first; 64 64 + 65 65 + /* 66 66 + * We open-code a llist here to include the additional tag [BIT(0)] 67 67 + * so that we know when the timeline is already on a 68 68 + * retirement queue: either this engine or another. 69 69 + * 70 70 + * However, we rely on that a timeline can only be active on a single 71 71 + * engine at any one time and that add_retire() is called before the 72 72 + * engine releases the timeline and transferred to another to retire. 73 73 + */ 74 74 + 75 75 + if (READ_ONCE(tl->retire)) /* already queued */ 76 76 + return false; 77 77 + 78 78 + intel_timeline_get(tl); 79 79 + first = READ_ONCE(engine->retire); 80 80 + do 81 81 + tl->retire = ptr_pack_bits(first, 1, 1); 82 82 + while (!try_cmpxchg(&engine->retire, &first, tl)); 83 83 + 84 84 + return !first; 85 85 + } 86 86 + 87 87 + void intel_engine_add_retire(struct intel_engine_cs *engine, 88 88 + struct intel_timeline *tl) 89 89 + { 90 90 + if (add_retire(engine, tl)) 91 91 + schedule_work(&engine->retire_work); 92 92 + } 93 93 + 94 94 + void intel_engine_init_retire(struct intel_engine_cs *engine) 95 95 + { 96 96 + INIT_WORK(&engine->retire_work, engine_retire); 97 97 + } 98 98 + 99 99 + void intel_engine_fini_retire(struct intel_engine_cs *engine) 100 100 + { 101 101 + flush_work(&engine->retire_work); 102 102 + GEM_BUG_ON(engine->retire); 103 103 + } 104 104 + 34 105 long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) 35 106 { 36 107 struct intel_gt_timelines *timelines = &gt->timelines; ··· 127 52 } 128 53 129 54 intel_timeline_get(tl); 130 130 - GEM_BUG_ON(!tl->active_count); 131 131 - tl->active_count++; /* pin the list element */ 55 55 + GEM_BUG_ON(!atomic_read(&tl->active_count)); 56 56 + atomic_inc(&tl->active_count); /* pin the list element */ 132 57 spin_unlock_irqrestore(&timelines->lock, flags); 133 58 134 59 if (timeout > 0) { ··· 149 74 150 75 /* Resume iteration after dropping lock */ 151 76 list_safe_reset_next(tl, tn, link); 152 152 - if (!--tl->active_count) 77 77 + if (atomic_dec_and_test(&tl->active_count)) 153 78 list_del(&tl->link); 154 79 else 155 80 active_count += !!rcu_access_pointer(tl->last_request.fence); ··· 158 83 159 84 /* Defer the final release to after the spinlock */ 160 85 if (refcount_dec_and_test(&tl->kref.refcount)) { 161 161 - GEM_BUG_ON(tl->active_count); 86 86 + GEM_BUG_ON(atomic_read(&tl->active_count)); 162 87 list_add(&tl->link, &free); 163 88 } 164 89 }

drivers/gpu/drm/i915/gt/intel_gt_requests.h

reviewed

··· 7 7 #ifndef INTEL_GT_REQUESTS_H 8 8 #define INTEL_GT_REQUESTS_H 9 9 10 10 + struct intel_engine_cs; 10 11 struct intel_gt; 12 12 + struct intel_timeline; 11 13 12 14 long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); 13 15 static inline void intel_gt_retire_requests(struct intel_gt *gt) 14 16 { 15 17 intel_gt_retire_requests_timeout(gt, 0); 16 18 } 19 19 + 20 20 + void intel_engine_init_retire(struct intel_engine_cs *engine); 21 21 + void intel_engine_add_retire(struct intel_engine_cs *engine, 22 22 + struct intel_timeline *tl); 23 23 + void intel_engine_fini_retire(struct intel_engine_cs *engine); 17 24 18 25 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); 19 26

+32 -18

drivers/gpu/drm/i915/gt/intel_lrc.c

reviewed

··· 142 142 #include "intel_engine_pm.h" 143 143 #include "intel_gt.h" 144 144 #include "intel_gt_pm.h" 145 145 + #include "intel_gt_requests.h" 145 146 #include "intel_lrc_reg.h" 146 147 #include "intel_mocs.h" 147 148 #include "intel_reset.h" ··· 1116 1115 * refrain from doing non-trivial work here. 1117 1116 */ 1118 1117 1118 1118 + /* 1119 1119 + * If we have just completed this context, the engine may now be 1120 1120 + * idle and we want to re-enter powersaving. 1121 1121 + */ 1122 1122 + if (list_is_last(&rq->link, &ce->timeline->requests) && 1123 1123 + i915_request_completed(rq)) 1124 1124 + intel_engine_add_retire(engine, ce->timeline); 1125 1125 + 1119 1126 intel_engine_context_out(engine); 1120 1127 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 1121 1121 - intel_gt_pm_put(engine->gt); 1128 1128 + intel_gt_pm_put_async(engine->gt); 1122 1129 1123 1130 /* 1124 1131 * If this is part of a virtual engine, its next request may ··· 1946 1937 static void 1947 1938 cancel_port_requests(struct intel_engine_execlists * const execlists) 1948 1939 { 1949 1949 - struct i915_request * const *port, *rq; 1940 1940 + struct i915_request * const *port; 1950 1941 1951 1951 - for (port = execlists->pending; (rq = *port); port++) 1952 1952 - execlists_schedule_out(rq); 1942 1942 + for (port = execlists->pending; *port; port++) 1943 1943 + execlists_schedule_out(*port); 1953 1944 memset(execlists->pending, 0, sizeof(execlists->pending)); 1954 1945 1955 1955 - for (port = execlists->active; (rq = *port); port++) 1956 1956 - execlists_schedule_out(rq); 1957 1957 - execlists->active = 1958 1958 - memset(execlists->inflight, 0, sizeof(execlists->inflight)); 1946 1946 + /* Mark the end of active before we overwrite *active */ 1947 1947 + for (port = xchg(&execlists->active, execlists->pending); *port; port++) 1948 1948 + execlists_schedule_out(*port); 1949 1949 + WRITE_ONCE(execlists->active, 1950 1950 + memset(execlists->inflight, 0, sizeof(execlists->inflight))); 1959 1951 } 1960 1952 1961 1953 static inline void ··· 2109 2099 else 2110 2100 promote = gen8_csb_parse(execlists, buf + 2 * head); 2111 2101 if (promote) { 2102 2102 + struct i915_request * const *old = execlists->active; 2103 2103 + 2104 2104 + /* Point active to the new ELSP; prevent overwriting */ 2105 2105 + WRITE_ONCE(execlists->active, execlists->pending); 2106 2106 + set_timeslice(engine); 2107 2107 + 2112 2108 if (!inject_preempt_hang(execlists)) 2113 2109 ring_set_paused(engine, 0); 2114 2110 2115 2111 /* cancel old inflight, prepare for switch */ 2116 2116 - trace_ports(execlists, "preempted", execlists->active); 2117 2117 - while (*execlists->active) 2118 2118 - execlists_schedule_out(*execlists->active++); 2112 2112 + trace_ports(execlists, "preempted", old); 2113 2113 + while (*old) 2114 2114 + execlists_schedule_out(*old++); 2119 2115 2120 2116 /* switch pending to inflight */ 2121 2117 GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); 2122 2122 - execlists->active = 2123 2123 - memcpy(execlists->inflight, 2124 2124 - execlists->pending, 2125 2125 - execlists_num_ports(execlists) * 2126 2126 - sizeof(*execlists->pending)); 2127 2127 - 2128 2128 - set_timeslice(engine); 2118 2118 + WRITE_ONCE(execlists->active, 2119 2119 + memcpy(execlists->inflight, 2120 2120 + execlists->pending, 2121 2121 + execlists_num_ports(execlists) * 2122 2122 + sizeof(*execlists->pending))); 2129 2123 2130 2124 WRITE_ONCE(execlists->pending[0], NULL); 2131 2125 } else {

+1 -1

drivers/gpu/drm/i915/gt/intel_reset.c

reviewed

··· 1114 1114 out: 1115 1115 intel_engine_cancel_stop_cs(engine); 1116 1116 reset_finish_engine(engine); 1117 1117 - intel_engine_pm_put(engine); 1117 1117 + intel_engine_pm_put_async(engine); 1118 1118 return ret; 1119 1119 } 1120 1120

+4 -9

drivers/gpu/drm/i915/gt/intel_ring.c

reviewed

··· 57 57 58 58 i915_vma_make_unshrinkable(vma); 59 59 60 60 - GEM_BUG_ON(ring->vaddr); 61 61 - ring->vaddr = addr; 60 60 + /* Discard any unused bytes beyond that submitted to hw. */ 61 61 + intel_ring_reset(ring, ring->emit); 62 62 63 63 + ring->vaddr = addr; 63 64 return 0; 64 65 65 66 err_ring: ··· 86 85 if (!atomic_dec_and_test(&ring->pin_count)) 87 86 return; 88 87 89 89 - /* Discard any unused bytes beyond that submitted to hw. */ 90 90 - intel_ring_reset(ring, ring->emit); 91 91 - 92 88 i915_vma_unset_ggtt_write(vma); 93 89 if (i915_vma_is_map_and_fenceable(vma)) 94 90 i915_vma_unpin_iomap(vma); 95 91 else 96 92 i915_gem_object_unpin_map(vma->obj); 97 93 98 98 - GEM_BUG_ON(!ring->vaddr); 99 99 - ring->vaddr = NULL; 100 100 - 101 101 - i915_vma_unpin(vma); 102 94 i915_vma_make_purgeable(vma); 95 95 + i915_vma_unpin(vma); 103 96 } 104 97 105 98 static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)

+28 -7

drivers/gpu/drm/i915/gt/intel_timeline.c

reviewed

··· 282 282 { 283 283 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 284 284 GEM_BUG_ON(!list_empty(&timeline->requests)); 285 285 + GEM_BUG_ON(timeline->retire); 285 286 286 287 if (timeline->hwsp_cacheline) 287 288 cacheline_free(timeline->hwsp_cacheline); ··· 340 339 struct intel_gt_timelines *timelines = &tl->gt->timelines; 341 340 unsigned long flags; 342 341 342 342 + /* 343 343 + * Pretend we are serialised by the timeline->mutex. 344 344 + * 345 345 + * While generally true, there are a few exceptions to the rule 346 346 + * for the engine->kernel_context being used to manage power 347 347 + * transitions. As the engine_park may be called from under any 348 348 + * timeline, it uses the power mutex as a global serialisation 349 349 + * lock to prevent any other request entering its timeline. 350 350 + * 351 351 + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 352 352 + * 353 353 + * However, intel_gt_retire_request() does not know which engine 354 354 + * it is retiring along and so cannot partake in the engine-pm 355 355 + * barrier, and there we use the tl->active_count as a means to 356 356 + * pin the timeline in the active_list while the locks are dropped. 357 357 + * Ergo, as that is outside of the engine-pm barrier, we need to 358 358 + * use atomic to manipulate tl->active_count. 359 359 + */ 343 360 lockdep_assert_held(&tl->mutex); 344 344 - 345 361 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 346 346 - if (tl->active_count++) 362 362 + 363 363 + if (atomic_add_unless(&tl->active_count, 1, 0)) 347 364 return; 348 348 - GEM_BUG_ON(!tl->active_count); /* overflow? */ 349 365 350 366 spin_lock_irqsave(&timelines->lock, flags); 351 351 - list_add(&tl->link, &timelines->active_list); 367 367 + if (!atomic_fetch_inc(&tl->active_count)) 368 368 + list_add_tail(&tl->link, &timelines->active_list); 352 369 spin_unlock_irqrestore(&timelines->lock, flags); 353 370 } 354 371 ··· 375 356 struct intel_gt_timelines *timelines = &tl->gt->timelines; 376 357 unsigned long flags; 377 358 359 359 + /* See intel_timeline_enter() */ 378 360 lockdep_assert_held(&tl->mutex); 379 361 380 380 - GEM_BUG_ON(!tl->active_count); 381 381 - if (--tl->active_count) 362 362 + GEM_BUG_ON(!atomic_read(&tl->active_count)); 363 363 + if (atomic_add_unless(&tl->active_count, -1, 1)) 382 364 return; 383 365 384 366 spin_lock_irqsave(&timelines->lock, flags); 385 385 - list_del(&tl->link); 367 367 + if (atomic_dec_and_test(&tl->active_count)) 368 368 + list_del(&tl->link); 386 369 spin_unlock_irqrestore(&timelines->lock, flags); 387 370 388 371 /*

+4 -1

drivers/gpu/drm/i915/gt/intel_timeline_types.h

reviewed

··· 42 42 * from the intel_context caller plus internal atomicity. 43 43 */ 44 44 atomic_t pin_count; 45 45 - unsigned int active_count; 45 45 + atomic_t active_count; 46 46 47 47 const u32 *hwsp_seqno; 48 48 struct i915_vma *hwsp_ggtt; ··· 65 65 * protection themselves (cf the i915_active_fence API). 66 66 */ 67 67 struct i915_active_fence last_request; 68 68 + 69 69 + /** A chain of completed timelines ready for early retirement. */ 70 70 + struct intel_timeline *retire; 68 71 69 72 /** 70 73 * We track the most recent seqno that we wait on in every context so

+4 -3

drivers/gpu/drm/i915/gt/selftest_engine_pm.c

reviewed

··· 51 51 pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", 52 52 engine->name, p->name); 53 53 else 54 54 - intel_engine_pm_put(engine); 55 55 - intel_engine_pm_put(engine); 54 54 + intel_engine_pm_put_async(engine); 55 55 + intel_engine_pm_put_async(engine); 56 56 p->critical_section_end(); 57 57 58 58 - /* engine wakeref is sync (instant) */ 58 58 + intel_engine_pm_flush(engine); 59 59 + 59 60 if (intel_engine_pm_is_awake(engine)) { 60 61 pr_err("%s is still awake after flushing pm\n", 61 62 engine->name);

+3 -2

drivers/gpu/drm/i915/i915_active.c

reviewed

··· 672 672 * populated by i915_request_add_active_barriers() to point to the 673 673 * request that will eventually release them. 674 674 */ 675 675 - spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING); 676 675 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 677 676 struct active_node *node = barrier_from_ll(pos); 678 677 struct intel_engine_cs *engine = barrier_to_engine(node); 679 678 struct rb_node **p, *parent; 680 679 680 680 + spin_lock_irqsave_nested(&ref->tree_lock, flags, 681 681 + SINGLE_DEPTH_NESTING); 681 682 parent = NULL; 682 683 p = &ref->tree.rb_node; 683 684 while (*p) { ··· 694 693 } 695 694 rb_link_node(&node->node, parent, p); 696 695 rb_insert_color(&node->node, &ref->tree); 696 696 + spin_unlock_irqrestore(&ref->tree_lock, flags); 697 697 698 698 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 699 699 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 700 700 intel_engine_pm_put(engine); 701 701 } 702 702 - spin_unlock_irqrestore(&ref->tree_lock, flags); 703 702 } 704 703 705 704 void i915_request_add_active_barriers(struct i915_request *rq)

+3 -3

drivers/gpu/drm/i915/i915_pmu.c

reviewed

··· 190 190 val = 0; 191 191 if (intel_gt_pm_get_if_awake(gt)) { 192 192 val = __get_rc6(gt); 193 193 - intel_gt_pm_put(gt); 193 193 + intel_gt_pm_put_async(gt); 194 194 } 195 195 196 196 spin_lock_irqsave(&pmu->lock, flags); ··· 343 343 344 344 skip: 345 345 spin_unlock_irqrestore(&engine->uncore->lock, flags); 346 346 - intel_engine_pm_put(engine); 346 346 + intel_engine_pm_put_async(engine); 347 347 } 348 348 } 349 349 ··· 368 368 if (intel_gt_pm_get_if_awake(gt)) { 369 369 val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); 370 370 val = intel_get_cagf(rps, val); 371 371 - intel_gt_pm_put(gt); 371 371 + intel_gt_pm_put_async(gt); 372 372 } 373 373 374 374 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],

+5 -2

drivers/gpu/drm/i915/i915_query.c

reviewed

··· 103 103 struct drm_i915_engine_info __user *info_ptr; 104 104 struct drm_i915_query_engine_info query; 105 105 struct drm_i915_engine_info info = { }; 106 106 + unsigned int num_uabi_engines = 0; 106 107 struct intel_engine_cs *engine; 107 108 int len, ret; 108 109 109 110 if (query_item->flags) 110 111 return -EINVAL; 111 112 113 113 + for_each_uabi_engine(engine, i915) 114 114 + num_uabi_engines++; 115 115 + 112 116 len = sizeof(struct drm_i915_query_engine_info) + 113 113 - RUNTIME_INFO(i915)->num_engines * 114 114 - sizeof(struct drm_i915_engine_info); 117 117 + num_uabi_engines * sizeof(struct drm_i915_engine_info); 115 118 116 119 ret = copy_query_item(&query, sizeof(query), len, query_item); 117 120 if (ret != 0)

+15 -6

drivers/gpu/drm/i915/intel_wakeref.c

reviewed

··· 54 54 55 55 static void ____intel_wakeref_put_last(struct intel_wakeref *wf) 56 56 { 57 57 - if (!atomic_dec_and_test(&wf->count)) 57 57 + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); 58 58 + if (unlikely(!atomic_dec_and_test(&wf->count))) 58 59 goto unlock; 59 60 60 61 /* ops->put() must reschedule its own release on error/deferral */ ··· 68 67 mutex_unlock(&wf->mutex); 69 68 } 70 69 71 71 - void __intel_wakeref_put_last(struct intel_wakeref *wf) 70 70 + void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) 72 71 { 73 72 INTEL_WAKEREF_BUG_ON(work_pending(&wf->work)); 74 73 75 74 /* Assume we are not in process context and so cannot sleep. */ 76 76 - if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC || 77 77 - !mutex_trylock(&wf->mutex)) { 75 75 + if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { 78 76 schedule_work(&wf->work); 79 77 return; 80 78 } ··· 109 109 110 110 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) 111 111 { 112 112 - return wait_var_event_killable(&wf->wakeref, 113 113 - !intel_wakeref_is_active(wf)); 112 112 + int err; 113 113 + 114 114 + might_sleep(); 115 115 + 116 116 + err = wait_var_event_killable(&wf->wakeref, 117 117 + !intel_wakeref_is_active(wf)); 118 118 + if (err) 119 119 + return err; 120 120 + 121 121 + intel_wakeref_unlock_wait(wf); 122 122 + return 0; 114 123 } 115 124 116 125 static void wakeref_auto_timeout(struct timer_list *t)

+36 -9

drivers/gpu/drm/i915/intel_wakeref.h

reviewed

··· 9 9 10 10 #include <linux/atomic.h> 11 11 #include <linux/bits.h> 12 12 + #include <linux/lockdep.h> 12 13 #include <linux/mutex.h> 13 14 #include <linux/refcount.h> 14 15 #include <linux/stackdepot.h> ··· 30 29 struct intel_wakeref_ops { 31 30 int (*get)(struct intel_wakeref *wf); 32 31 int (*put)(struct intel_wakeref *wf); 33 33 - 34 34 - unsigned long flags; 35 35 - #define INTEL_WAKEREF_PUT_ASYNC BIT(0) 36 32 }; 37 33 38 34 struct intel_wakeref { ··· 55 57 } while (0) 56 58 57 59 int __intel_wakeref_get_first(struct intel_wakeref *wf); 58 58 - void __intel_wakeref_put_last(struct intel_wakeref *wf); 60 60 + void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags); 59 61 60 62 /** 61 63 * intel_wakeref_get: Acquire the wakeref ··· 98 100 } 99 101 100 102 /** 101 101 - * intel_wakeref_put: Release the wakeref 102 102 - * @i915: the drm_i915_private device 103 103 + * intel_wakeref_put_flags: Release the wakeref 103 104 * @wf: the wakeref 104 104 - * @fn: callback for releasing the wakeref, called only on final release. 105 105 + * @flags: control flags 105 106 * 106 107 * Release our hold on the wakeref. When there are no more users, 107 108 * the runtime pm wakeref will be released after the @fn callback is called ··· 113 116 * code otherwise. 114 117 */ 115 118 static inline void 116 116 - intel_wakeref_put(struct intel_wakeref *wf) 119 119 + __intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags) 120 120 + #define INTEL_WAKEREF_PUT_ASYNC BIT(0) 117 121 { 118 122 INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); 119 123 if (unlikely(!atomic_add_unless(&wf->count, -1, 1))) 120 120 - __intel_wakeref_put_last(wf); 124 124 + __intel_wakeref_put_last(wf, flags); 125 125 + } 126 126 + 127 127 + static inline void 128 128 + intel_wakeref_put(struct intel_wakeref *wf) 129 129 + { 130 130 + might_sleep(); 131 131 + __intel_wakeref_put(wf, 0); 132 132 + } 133 133 + 134 134 + static inline void 135 135 + intel_wakeref_put_async(struct intel_wakeref *wf) 136 136 + { 137 137 + __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC); 121 138 } 122 139 123 140 /** ··· 163 152 } 164 153 165 154 /** 155 155 + * intel_wakeref_unlock_wait: Wait until the active callback is complete 156 156 + * @wf: the wakeref 157 157 + * 158 158 + * Waits for the active callback (under the @wf->mutex or another CPU) is 159 159 + * complete. 160 160 + */ 161 161 + static inline void 162 162 + intel_wakeref_unlock_wait(struct intel_wakeref *wf) 163 163 + { 164 164 + mutex_lock(&wf->mutex); 165 165 + mutex_unlock(&wf->mutex); 166 166 + flush_work(&wf->work); 167 167 + } 168 168 + 169 169 + /** 166 170 * intel_wakeref_is_active: Query whether the wakeref is currently held 167 171 * @wf: the wakeref 168 172 * ··· 196 170 static inline void 197 171 __intel_wakeref_defer_park(struct intel_wakeref *wf) 198 172 { 173 173 + lockdep_assert_held(&wf->mutex); 199 174 INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count)); 200 175 atomic_set_release(&wf->count, 1); 201 176 }