Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-intel-gt-next-2024-08-23' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-next

UAPI Changes:

- Limit the number of relocations to INT_MAX (Tvrtko)

Only impact should be synthetic tests.

Driver Changes:

- Fix for #11396: GPU Hang and rcs0 reset on Cherrytrail platform
- Fix Virtual Memory mapping boundaries calculation (Andi)
- Fix for #11255: Long hangs in buddy allocator with DG2/A380 without
Resizable BAR since 6.9 (David)
- Mark the GT as dead when mmio is unreliable (Chris, Andi)
- Workaround additions / fixes for MTL, ARL and DG2 (John H, Nitin)
- Enable partial memory mapping of GPU virtual memory (Andi, Chris)

- Prevent NULL deref on intel_memory_regions_hw_probe (Jonathan, Dan)
- Avoid UAF on intel_engines_release (Krzysztof)

- Don't update PWR_CLK_STATE starting Gen12 (Umesh)
- Code and dmesg cleanups (Andi, Jesus, Luca)

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZshcfSqgfnl8Mh4P@jlahtine-mobl.ger.corp.intel.com

+145 -41
+2 -2
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
··· 1533 1533 u64_to_user_ptr(entry->relocs_ptr); 1534 1534 unsigned long remain = entry->relocation_count; 1535 1535 1536 - if (unlikely(remain > N_RELOC(ULONG_MAX))) 1536 + if (unlikely(remain > N_RELOC(INT_MAX))) 1537 1537 return -EINVAL; 1538 1538 1539 1539 /* ··· 1641 1641 if (size == 0) 1642 1642 return 0; 1643 1643 1644 - if (size > N_RELOC(ULONG_MAX)) 1644 + if (size > N_RELOC(INT_MAX)) 1645 1645 return -EINVAL; 1646 1646 1647 1647 addr = u64_to_user_ptr(entry->relocs_ptr);
+63 -10
drivers/gpu/drm/i915/gem/i915_gem_mman.c
··· 252 252 struct vm_area_struct *area = vmf->vma; 253 253 struct i915_mmap_offset *mmo = area->vm_private_data; 254 254 struct drm_i915_gem_object *obj = mmo->obj; 255 + unsigned long obj_offset; 255 256 resource_size_t iomap; 256 257 int err; 257 258 ··· 274 273 iomap -= obj->mm.region->region.start; 275 274 } 276 275 276 + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); 277 277 /* PTEs are revoked in obj->ops->put_pages() */ 278 278 err = remap_io_sg(area, 279 279 area->vm_start, area->vm_end - area->vm_start, 280 - obj->mm.pages->sgl, iomap); 280 + obj->mm.pages->sgl, obj_offset, iomap); 281 281 282 282 if (area->vm_flags & VM_WRITE) { 283 283 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); ··· 290 288 out: 291 289 i915_gem_object_unlock(obj); 292 290 return i915_error_to_vmf_fault(err); 291 + } 292 + 293 + static void set_address_limits(struct vm_area_struct *area, 294 + struct i915_vma *vma, 295 + unsigned long obj_offset, 296 + resource_size_t gmadr_start, 297 + unsigned long *start_vaddr, 298 + unsigned long *end_vaddr, 299 + unsigned long *pfn) 300 + { 301 + unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ 302 + long start, end; /* memory boundaries */ 303 + 304 + /* 305 + * Let's move into the ">> PAGE_SHIFT" 306 + * domain to be sure not to lose bits 307 + */ 308 + vm_start = area->vm_start >> PAGE_SHIFT; 309 + vm_end = area->vm_end >> PAGE_SHIFT; 310 + vma_size = vma->size >> PAGE_SHIFT; 311 + 312 + /* 313 + * Calculate the memory boundaries by considering the offset 314 + * provided by the user during memory mapping and the offset 315 + * provided for the partial mapping. 316 + */ 317 + start = vm_start; 318 + start -= obj_offset; 319 + start += vma->gtt_view.partial.offset; 320 + end = start + vma_size; 321 + 322 + start = max_t(long, start, vm_start); 323 + end = min_t(long, end, vm_end); 324 + 325 + /* Let's move back into the "<< PAGE_SHIFT" domain */ 326 + *start_vaddr = (unsigned long)start << PAGE_SHIFT; 327 + *end_vaddr = (unsigned long)end << PAGE_SHIFT; 328 + 329 + *pfn = (gmadr_start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; 330 + *pfn += (*start_vaddr - area->vm_start) >> PAGE_SHIFT; 331 + *pfn += obj_offset - vma->gtt_view.partial.offset; 293 332 } 294 333 295 334 static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) ··· 345 302 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 346 303 bool write = area->vm_flags & VM_WRITE; 347 304 struct i915_gem_ww_ctx ww; 305 + unsigned long obj_offset; 306 + unsigned long start, end; /* memory boundaries */ 348 307 intel_wakeref_t wakeref; 349 308 struct i915_vma *vma; 350 309 pgoff_t page_offset; 310 + unsigned long pfn; 351 311 int srcu; 352 312 int ret; 353 313 354 - /* We don't use vmf->pgoff since that has the fake offset */ 314 + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); 355 315 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 316 + page_offset += obj_offset; 356 317 357 318 trace_i915_gem_object_fault(obj, page_offset, true, write); 358 319 ··· 449 402 if (ret) 450 403 goto err_unpin; 451 404 405 + /* 406 + * Dump all the necessary parameters in this function to perform the 407 + * arithmetic calculation for the virtual address start and end and 408 + * the PFN (Page Frame Number). 409 + */ 410 + set_address_limits(area, vma, obj_offset, ggtt->gmadr.start, 411 + &start, &end, &pfn); 412 + 452 413 /* Finally, remap it using the new GTT offset */ 453 - ret = remap_io_mapping(area, 454 - area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), 455 - (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, 456 - min_t(u64, vma->size, area->vm_end - area->vm_start), 457 - &ggtt->iomap); 414 + ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); 458 415 if (ret) 459 416 goto err_fence; 460 417 ··· 1081 1030 1082 1031 rcu_read_lock(); 1083 1032 drm_vma_offset_lock_lookup(dev->vma_offset_manager); 1084 - node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, 1085 - vma->vm_pgoff, 1086 - vma_pages(vma)); 1033 + node = drm_vma_offset_lookup_locked(dev->vma_offset_manager, 1034 + vma->vm_pgoff, 1035 + vma_pages(vma)); 1087 1036 if (node && drm_vma_node_is_allowed(node, priv)) { 1088 1037 /* 1089 1038 * Skip 0-refcnted objects as it is in the process of being ··· 1135 1084 mmo = mmap_offset_attach(obj, mmap_type, NULL); 1136 1085 if (IS_ERR(mmo)) 1137 1086 return PTR_ERR(mmo); 1087 + 1088 + vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node); 1138 1089 } 1139 1090 1140 1091 /*
+1 -1
drivers/gpu/drm/i915/gem/i915_gem_object_types.h
··· 535 535 * I915_CACHE_NONE. The only exception is userptr objects, where we 536 536 * instead force I915_CACHE_LLC, but we also don't allow userspace to 537 537 * ever change the @cache_level for such objects. Another special case 538 - * is dma-buf, which doesn't rely on @cache_dirty, but there we 538 + * is dma-buf, which doesn't rely on @cache_dirty, but there we 539 539 * always do a forced flush when acquiring the pages, if there is a 540 540 * chance that the pages can be read directly from main memory with 541 541 * the GPU.
+7 -6
drivers/gpu/drm/i915/gem/i915_gem_ttm.c
··· 165 165 i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : 166 166 obj->mm.region, &places[0], obj->bo_offset, 167 167 obj->base.size, flags); 168 - places[0].flags |= TTM_PL_FLAG_DESIRED; 169 168 170 169 /* Cache this on object? */ 171 170 for (i = 0; i < num_allowed; ++i) { ··· 778 779 .interruptible = true, 779 780 .no_wait_gpu = false, 780 781 }; 781 - int real_num_busy; 782 + struct ttm_placement initial_placement; 783 + struct ttm_place initial_place; 782 784 int ret; 783 785 784 786 /* First try only the requested placement. No eviction. */ 785 - real_num_busy = placement->num_placement; 786 - placement->num_placement = 1; 787 - ret = ttm_bo_validate(bo, placement, &ctx); 787 + initial_placement.num_placement = 1; 788 + memcpy(&initial_place, placement->placement, sizeof(struct ttm_place)); 789 + initial_place.flags |= TTM_PL_FLAG_DESIRED; 790 + initial_placement.placement = &initial_place; 791 + ret = ttm_bo_validate(bo, &initial_placement, &ctx); 788 792 if (ret) { 789 793 ret = i915_ttm_err_to_gem(ret); 790 794 /* ··· 802 800 * If the initial attempt fails, allow all accepted placements, 803 801 * evicting if necessary. 804 802 */ 805 - placement->num_placement = real_num_busy; 806 803 ret = ttm_bo_validate(bo, placement, &ctx); 807 804 if (ret) 808 805 return i915_ttm_err_to_gem(ret);
+2
drivers/gpu/drm/i915/gt/intel_engine_cs.c
··· 693 693 694 694 memset(&engine->reset, 0, sizeof(engine->reset)); 695 695 } 696 + 697 + llist_del_all(&gt->i915->uabi_engines_llist); 696 698 } 697 699 698 700 void intel_engine_free_request_pool(struct intel_engine_cs *engine)
+1
drivers/gpu/drm/i915/gt/intel_gpu_commands.h
··· 220 220 #define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) 221 221 #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) 222 222 #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) 223 + #define CMD_3DSTATE_MESH_CONTROL ((0x3 << 29) | (0x3 << 27) | (0x0 << 24) | (0x77 << 16) | (0x3)) 223 224 224 225 #define XY_CTRL_SURF_INSTR_SIZE 5 225 226 #define MI_FLUSH_DW_SIZE 3
+6 -1
drivers/gpu/drm/i915/gt/intel_gt.h
··· 174 174 175 175 int intel_gt_probe_all(struct drm_i915_private *i915); 176 176 int intel_gt_tiles_init(struct drm_i915_private *i915); 177 - void intel_gt_release_all(struct drm_i915_private *i915); 178 177 179 178 #define for_each_gt(gt__, i915__, id__) \ 180 179 for ((id__) = 0; \ ··· 207 208 void intel_gt_bind_context_set_ready(struct intel_gt *gt); 208 209 void intel_gt_bind_context_set_unready(struct intel_gt *gt); 209 210 bool intel_gt_is_bind_context_ready(struct intel_gt *gt); 211 + 212 + static inline void intel_gt_set_wedged_async(struct intel_gt *gt) 213 + { 214 + queue_work(system_highpri_wq, &gt->wedge); 215 + } 216 + 210 217 #endif /* __INTEL_GT_H__ */
+2
drivers/gpu/drm/i915/gt/intel_gt_types.h
··· 292 292 struct gt_defaults defaults; 293 293 struct kobject *sysfs_defaults; 294 294 295 + struct work_struct wedge; 296 + 295 297 struct i915_perf_gt perf; 296 298 297 299 /** link: &ggtt.gt_list */
+11 -1
drivers/gpu/drm/i915/gt/intel_reset.c
··· 1013 1013 GT_TRACE(gt, "end\n"); 1014 1014 } 1015 1015 1016 + static void set_wedged_work(struct work_struct *w) 1017 + { 1018 + struct intel_gt *gt = container_of(w, struct intel_gt, wedge); 1019 + intel_wakeref_t wf; 1020 + 1021 + with_intel_runtime_pm(gt->uncore->rpm, wf) 1022 + __intel_gt_set_wedged(gt); 1023 + } 1024 + 1016 1025 void intel_gt_set_wedged(struct intel_gt *gt) 1017 1026 { 1018 1027 intel_wakeref_t wakeref; ··· 1623 1614 init_waitqueue_head(&gt->reset.queue); 1624 1615 mutex_init(&gt->reset.mutex); 1625 1616 init_srcu_struct(&gt->reset.backoff_srcu); 1617 + INIT_WORK(&gt->wedge, set_wedged_work); 1626 1618 1627 1619 /* 1628 1620 * While undesirable to wait inside the shrinker, complain anyway. ··· 1650 1640 struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); 1651 1641 1652 1642 gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name); 1653 - intel_gt_set_wedged(w->gt); 1643 + set_wedged_work(&w->gt->wedge); 1654 1644 } 1655 1645 1656 1646 void __intel_init_wedge(struct intel_wedge_me *w,
+15 -1
drivers/gpu/drm/i915/gt/intel_workarounds.c
··· 974 974 if (ret) 975 975 return ret; 976 976 977 - cs = intel_ring_begin(rq, (wal->count * 2 + 2)); 977 + if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || 978 + IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) 979 + cs = intel_ring_begin(rq, (wal->count * 2 + 6)); 980 + else 981 + cs = intel_ring_begin(rq, (wal->count * 2 + 2)); 982 + 978 983 if (IS_ERR(cs)) 979 984 return PTR_ERR(cs); 980 985 ··· 1008 1003 *cs++ = val; 1009 1004 } 1010 1005 *cs++ = MI_NOOP; 1006 + 1007 + /* Wa_14019789679 */ 1008 + if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || 1009 + IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) { 1010 + *cs++ = CMD_3DSTATE_MESH_CONTROL; 1011 + *cs++ = 0; 1012 + *cs++ = 0; 1013 + *cs++ = MI_NOOP; 1014 + } 1011 1015 1012 1016 intel_uncore_forcewake_put__locked(uncore, fw); 1013 1017 spin_unlock(&uncore->lock);
+1 -1
drivers/gpu/drm/i915/gt/selftest_migrate.c
··· 336 336 337 337 if (vaddr[x] != val) { 338 338 pr_err("%ps failed, (%u != %u), offset: %zu\n", 339 - fn, vaddr[x], val, x * sizeof(u32)); 339 + fn, vaddr[x], val, x * sizeof(u32)); 340 340 igt_hexdump(vaddr + i * 1024, 4096); 341 341 err = -EINVAL; 342 342 }
+1
drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
··· 107 107 enum { 108 108 GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001, 109 109 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, 110 + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 0x9006, 110 111 }; 111 112 112 113 #endif /* _ABI_GUC_KLVS_ABI_H */
+1 -1
drivers/gpu/drm/i915/gt/uc/intel_guc.c
··· 296 296 297 297 /* Wa_16019325821 */ 298 298 /* Wa_14019159160 */ 299 - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) 299 + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) 300 300 flags |= GUC_WA_RCS_CCS_SWITCHOUT; 301 301 302 302 /*
+9 -9
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
··· 815 815 return PAGE_ALIGN(total_size); 816 816 } 817 817 818 - static void guc_waklv_enable_simple(struct intel_guc *guc, 819 - u32 klv_id, u32 *offset, u32 *remain) 818 + static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id) 820 819 { 821 820 u32 size; 822 821 u32 klv_entry[] = { ··· 849 850 remain = guc_ads_waklv_size(guc); 850 851 851 852 /* Wa_14019159160 */ 852 - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) 853 - guc_waklv_enable_simple(guc, 854 - GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE, 855 - &offset, &remain); 853 + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { 854 + guc_waklv_enable_simple(guc, &offset, &remain, 855 + GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE); 856 + guc_waklv_enable_simple(guc, &offset, &remain, 857 + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE); 858 + } 856 859 857 860 /* Wa_16021333562 */ 858 861 if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) && 859 862 (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || 860 863 IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) || 861 864 IS_DG2(gt->i915))) 862 - guc_waklv_enable_simple(guc, 863 - GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, 864 - &offset, &remain); 865 + guc_waklv_enable_simple(guc, &offset, &remain, 866 + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); 865 867 866 868 size = guc_ads_waklv_size(guc) - remain; 867 869 if (!size)
+3 -2
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
··· 2014 2014 2015 2015 /* 2016 2016 * Technically possible for either of these values to be non-zero here, 2017 - * but very unlikely + harmless. Regardless let's add a warn so we can 2017 + * but very unlikely + harmless. Regardless let's add an error so we can 2018 2018 * see in CI if this happens frequently / a precursor to taking down the 2019 2019 * machine. 2020 2020 */ 2021 - GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 2021 + if (atomic_read(&guc->outstanding_submission_g2h)) 2022 + guc_err(guc, "Unexpected outstanding GuC to Host in reset finish\n"); 2022 2023 atomic_set(&guc->outstanding_submission_g2h, 0); 2023 2024 2024 2025 intel_guc_global_policies_update(guc);
+1 -1
drivers/gpu/drm/i915/gt/uc/intel_uc.c
··· 99 99 } 100 100 101 101 if (!intel_uc_supports_guc(uc)) 102 - gt_info(gt, "Incompatible option enable_guc=%d - %s\n", 102 + gt_info(gt, "Incompatible option enable_guc=%d - %s\n", 103 103 i915->params.enable_guc, "GuC is not supported!"); 104 104 105 105 if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
+11 -1
drivers/gpu/drm/i915/i915_mm.c
··· 122 122 * @addr: target user address to start at 123 123 * @size: size of map area 124 124 * @sgl: Start sg entry 125 + * @offset: offset from the start of the page 125 126 * @iobase: Use stored dma address offset by this address or pfn if -1 126 127 * 127 128 * Note: this is only safe if the mm semaphore is held when called. 128 129 */ 129 130 int remap_io_sg(struct vm_area_struct *vma, 130 131 unsigned long addr, unsigned long size, 131 - struct scatterlist *sgl, resource_size_t iobase) 132 + struct scatterlist *sgl, unsigned long offset, 133 + resource_size_t iobase) 132 134 { 133 135 struct remap_pfn r = { 134 136 .mm = vma->vm_mm, ··· 142 140 143 141 /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ 144 142 GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); 143 + 144 + while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) { 145 + offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT; 146 + r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase)); 147 + if (!r.sgt.sgp) 148 + return -EINVAL; 149 + } 150 + r.sgt.curr = offset << PAGE_SHIFT; 145 151 146 152 if (!use_dma(iobase)) 147 153 flush_cache_range(vma, addr, size);
+2 -1
drivers/gpu/drm/i915/i915_mm.h
··· 30 30 31 31 int remap_io_sg(struct vm_area_struct *vma, 32 32 unsigned long addr, unsigned long size, 33 - struct scatterlist *sgl, resource_size_t iobase); 33 + struct scatterlist *sgl, unsigned long offset, 34 + resource_size_t iobase); 34 35 35 36 #endif /* __I915_MM_H__ */
+5 -2
drivers/gpu/drm/i915/intel_uncore.c
··· 24 24 #include <drm/drm_managed.h> 25 25 #include <linux/pm_runtime.h> 26 26 27 + #include "gt/intel_gt.h" 27 28 #include "gt/intel_engine_regs.h" 28 29 #include "gt/intel_gt_regs.h" 29 30 ··· 181 180 if (!wait_ack_clear(d, FORCEWAKE_KERNEL)) 182 181 return; 183 182 184 - if (fw_ack(d) == ~0) 183 + if (fw_ack(d) == ~0) { 185 184 drm_err(&d->uncore->i915->drm, 186 185 "%s: MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n", 187 186 intel_uncore_forcewake_domain_to_str(d->id)); 188 - else 187 + intel_gt_set_wedged_async(d->uncore->gt); 188 + } else { 189 189 drm_err(&d->uncore->i915->drm, 190 190 "%s: timed out waiting for forcewake ack to clear.\n", 191 191 intel_uncore_forcewake_domain_to_str(d->id)); 192 + } 192 193 193 194 add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */ 194 195 }
+1 -1
drivers/gpu/drm/i915/selftests/intel_memory_region.c
··· 517 517 518 518 if (!IS_ALIGNED(daddr, ps)) { 519 519 pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", 520 - __func__, &daddr, ps); 520 + __func__, &daddr, ps); 521 521 err = -EINVAL; 522 522 goto out_close; 523 523 }