Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'amd-drm-next-7.1-2026-03-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-7.1-2026-03-19:

amdgpu:
- Fix gamma 2.2 colorop TFs
- BO list fix
- LTO fix
- DC FP fix
- DisplayID handling fix
- DCN 2.01 fix
- MMHUB boundary fixes
- ISP fix
- TLB fence fix
- Hainan pm fix
- UserQ fixes
- MES 12.1 Updates
- GC 12.1 updates
- RAS fixes
- DML updates
- Cursor fixes
- SWSMU cleanups
- Misc cleanups
- Clean up duplicate format modifiers
- Devcoredump updates
- Cleanup mmhub cid handling
- Initial VCN 5.0.2 support
- Initial JPEG 5.0.2 support
- PSP 13.0.15 updates

amdkfd:
- Queue properties fix
- GC 12.1 updates

radeon:
- Hainan pm fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260319173334.479766-1-alexander.deucher@amd.com

+4588 -1421
+2
drivers/gpu/drm/amd/amdgpu/Makefile
··· 213 213 vcn_v4_0_5.o \ 214 214 vcn_v5_0_0.o \ 215 215 vcn_v5_0_1.o \ 216 + vcn_v5_0_2.o \ 216 217 amdgpu_jpeg.o \ 217 218 jpeg_v1_0.o \ 218 219 jpeg_v2_0.o \ ··· 224 223 jpeg_v4_0_5.o \ 225 224 jpeg_v5_0_0.o \ 226 225 jpeg_v5_0_1.o \ 226 + jpeg_v5_0_2.o \ 227 227 jpeg_v5_3_0.o 228 228 229 229 # add VPE block
+6
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 327 327 struct amdgpu_hive_info; 328 328 struct amdgpu_reset_context; 329 329 struct amdgpu_reset_control; 330 + struct amdgpu_coredump_info; 330 331 331 332 enum amdgpu_cp_irq { 332 333 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0, ··· 1147 1146 struct list_head ras_list; 1148 1147 1149 1148 struct amdgpu_reset_domain *reset_domain; 1149 + 1150 + #ifdef CONFIG_DEV_COREDUMP 1151 + struct amdgpu_coredump_info *coredump; 1152 + struct work_struct coredump_work; 1153 + #endif 1150 1154 1151 1155 struct mutex benchmark_mutex; 1152 1156
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c
··· 330 330 331 331 watch_address_cntl = 0; 332 332 watch_address_low = lower_32_bits(watch_address); 333 - watch_address_high = upper_32_bits(watch_address) & 0xffff; 333 + watch_address_high = upper_32_bits(watch_address) & 0x1ffffff; 334 334 335 335 watch_address_cntl = REG_SET_FIELD(watch_address_cntl, 336 336 TCP_WATCH0_CNTL,
+4
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
··· 36 36 37 37 #define AMDGPU_BO_LIST_MAX_PRIORITY 32u 38 38 #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) 39 + #define AMDGPU_BO_LIST_MAX_ENTRIES (128 * 1024) 39 40 40 41 static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) 41 42 { ··· 188 187 const uint32_t bo_info_size = in->bo_info_size; 189 188 const uint32_t bo_number = in->bo_number; 190 189 struct drm_amdgpu_bo_list_entry *info; 190 + 191 + if (bo_number > AMDGPU_BO_LIST_MAX_ENTRIES) 192 + return -EINVAL; 191 193 192 194 /* copy the handle array from userspace to a kernel buffer */ 193 195 if (likely(info_size == bo_info_size)) {
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
··· 1231 1231 case CONNECTOR_OBJECT_ID_HDMI_TYPE_B: 1232 1232 max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2; 1233 1233 break; 1234 + default: 1235 + return MODE_BAD; 1234 1236 } 1235 1237 1236 1238 /* When the display EDID claims that it's an HDMI display,
+7
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 1740 1740 struct drm_amdgpu_fence *fences; 1741 1741 int r; 1742 1742 1743 + /* 1744 + * fence_count must be non-zero; dma_fence_wait_any_timeout() 1745 + * does not accept an empty fence array. 1746 + */ 1747 + if (!wait->in.fence_count) 1748 + return -EINVAL; 1749 + 1743 1750 /* Get the fences from userspace */ 1744 1751 fences = memdup_array_user(u64_to_user_ptr(wait->in.fences), 1745 1752 wait->in.fence_count,
+76 -7
drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
··· 32 32 bool vram_lost, struct amdgpu_job *job) 33 33 { 34 34 } 35 + void amdgpu_coredump_init(struct amdgpu_device *adev) 36 + { 37 + } 35 38 #else 39 + 40 + #define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024) 36 41 37 42 const char *hw_ip_names[MAX_HWIP] = { 38 43 [GC_HWIP] = "GC", ··· 201 196 } 202 197 203 198 static ssize_t 204 - amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, 205 - void *data, size_t datalen) 199 + amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump) 206 200 { 207 201 struct drm_printer p; 208 - struct amdgpu_coredump_info *coredump = data; 209 202 struct drm_print_iterator iter; 210 203 struct amdgpu_vm_fault_info *fault_info; 211 204 struct amdgpu_ip_block *ip_block; ··· 211 208 212 209 iter.data = buffer; 213 210 iter.offset = 0; 214 - iter.start = offset; 215 211 iter.remain = count; 216 212 217 213 p = drm_coredump_printer(&iter); ··· 324 322 return count - iter.remain; 325 323 } 326 324 325 + static ssize_t 326 + amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, 327 + void *data, size_t datalen) 328 + { 329 + struct amdgpu_coredump_info *coredump = data; 330 + ssize_t byte_copied; 331 + 332 + if (!coredump) 333 + return -ENODEV; 334 + 335 + if (!coredump->formatted) 336 + return -ENODEV; 337 + 338 + if (offset >= coredump->formatted_size) 339 + return 0; 340 + 341 + byte_copied = count < coredump->formatted_size - offset ? count : 342 + coredump->formatted_size - offset; 343 + memcpy(buffer, coredump->formatted + offset, byte_copied); 344 + 345 + return byte_copied; 346 + } 347 + 327 348 static void amdgpu_devcoredump_free(void *data) 328 349 { 329 - kfree(data); 350 + struct amdgpu_coredump_info *coredump = data; 351 + 352 + kvfree(coredump->formatted); 353 + kvfree(data); 354 + } 355 + 356 + static void amdgpu_devcoredump_deferred_work(struct work_struct *work) 357 + { 358 + struct amdgpu_device *adev = container_of(work, typeof(*adev), coredump_work); 359 + struct amdgpu_coredump_info *coredump = adev->coredump; 360 + 361 + /* Do a one-time preparation of the coredump output because 362 + * repeatingly calling drm_coredump_printer is very slow. 363 + */ 364 + coredump->formatted_size = amdgpu_devcoredump_format( 365 + NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump); 366 + coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL); 367 + if (!coredump->formatted) { 368 + amdgpu_devcoredump_free(coredump); 369 + goto end; 370 + } 371 + 372 + amdgpu_devcoredump_format(coredump->formatted, coredump->formatted_size, coredump); 373 + 374 + /* If there's an existing coredump for this device, the free function will be 375 + * called immediately so coredump might be invalid after the call to dev_coredumpm. 376 + */ 377 + dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, 378 + amdgpu_devcoredump_read, amdgpu_devcoredump_free); 379 + 380 + end: 381 + adev->coredump = NULL; 330 382 } 331 383 332 384 void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, ··· 389 333 struct drm_device *dev = adev_to_drm(adev); 390 334 struct amdgpu_coredump_info *coredump; 391 335 struct drm_sched_job *s_job; 336 + 337 + /* No need to generate a new coredump if there's one in progress already. */ 338 + if (work_pending(&adev->coredump_work)) 339 + return; 392 340 393 341 coredump = kzalloc_obj(*coredump, GFP_NOWAIT); 394 342 if (!coredump) ··· 420 360 421 361 ktime_get_ts64(&coredump->reset_time); 422 362 423 - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, 424 - amdgpu_devcoredump_read, amdgpu_devcoredump_free); 363 + /* Update the current coredump pointer (no lock needed, this function can only be called 364 + * from a single thread) 365 + */ 366 + adev->coredump = coredump; 367 + /* Kick off coredump formatting to a worker thread. */ 368 + queue_work(system_unbound_wq, &adev->coredump_work); 425 369 426 370 drm_info(dev, "AMDGPU device coredump file has been created\n"); 427 371 drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", 428 372 dev->primary->index); 373 + } 374 + 375 + void amdgpu_coredump_init(struct amdgpu_device *adev) 376 + { 377 + INIT_WORK(&adev->coredump_work, amdgpu_devcoredump_deferred_work); 429 378 } 430 379 #endif
+7
drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
··· 35 35 struct amdgpu_device *adev; 36 36 struct amdgpu_task_info reset_task_info; 37 37 struct timespec64 reset_time; 38 + 38 39 bool skip_vram_check; 39 40 bool reset_vram_lost; 40 41 struct amdgpu_ring *ring; 42 + /* Readable form of coredevdump, generate once to speed up 43 + * reading it (see drm_coredump_printer's documentation). 44 + */ 45 + ssize_t formatted_size; 46 + char *formatted; 41 47 }; 42 48 #endif 43 49 44 50 void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, 45 51 bool vram_lost, struct amdgpu_job *job); 52 + void amdgpu_coredump_init(struct amdgpu_device *adev); 46 53 #endif
+17 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 3781 3781 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3782 3782 INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work); 3783 3783 3784 + amdgpu_coredump_init(adev); 3785 + 3784 3786 adev->gfx.gfx_off_req_count = 1; 3785 3787 adev->gfx.gfx_off_residency = 0; 3786 3788 adev->gfx.gfx_off_entrycount = 0; ··· 3880 3878 amdgpu_gmc_noretry_set(adev); 3881 3879 /* Need to get xgmi info early to decide the reset behavior*/ 3882 3880 if (adev->gmc.xgmi.supported) { 3883 - r = adev->gfxhub.funcs->get_xgmi_info(adev); 3884 - if (r) 3885 - return r; 3881 + if (adev->gfxhub.funcs && 3882 + adev->gfxhub.funcs->get_xgmi_info) { 3883 + r = adev->gfxhub.funcs->get_xgmi_info(adev); 3884 + if (r) 3885 + return r; 3886 + } 3887 + } 3888 + 3889 + if (adev->gmc.xgmi.connected_to_cpu) { 3890 + if (adev->mmhub.funcs && 3891 + adev->mmhub.funcs->get_xgmi_info) { 3892 + r = adev->mmhub.funcs->get_xgmi_info(adev); 3893 + if (r) 3894 + return r; 3895 + } 3886 3896 } 3887 3897 3888 3898 /* enable PCIE atomic ops */
+28 -13
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
··· 112 112 #include "smuio_v15_0_8.h" 113 113 #include "vcn_v5_0_0.h" 114 114 #include "vcn_v5_0_1.h" 115 + #include "vcn_v5_0_2.h" 115 116 #include "jpeg_v5_0_0.h" 116 117 #include "jpeg_v5_0_1.h" 118 + #include "jpeg_v5_0_2.h" 117 119 #include "jpeg_v5_3_0.h" 118 120 119 121 #include "amdgpu_ras_mgr.h" ··· 298 296 if (vram_size) 299 297 adev->discovery.offset = (vram_size << 20) - DISCOVERY_TMR_OFFSET; 300 298 301 - if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) { 302 - adev->discovery.offset = 303 - adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset; 304 - adev->discovery.size = 305 - adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10; 306 - if (!adev->discovery.offset || !adev->discovery.size) 307 - return -EINVAL; 299 + if (amdgpu_sriov_vf(adev)) { 300 + if (adev->virt.is_dynamic_crit_regn_enabled) { 301 + adev->discovery.offset = 302 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset; 303 + adev->discovery.size = 304 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10; 305 + if (!adev->discovery.offset || !adev->discovery.size) 306 + return -EINVAL; 307 + } else { 308 + goto out; 309 + } 308 310 } else { 309 311 tmr_size = RREG32(mmDRIVER_SCRATCH_2); 310 312 if (tmr_size) { ··· 328 322 adev->discovery.offset = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET; 329 323 } 330 324 } 331 - 325 + out: 332 326 adev->discovery.bin = kzalloc(adev->discovery.size, GFP_KERNEL); 333 327 if (!adev->discovery.bin) 334 328 return -ENOMEM; ··· 562 556 checksum = le16_to_cpu(info->checksum); 563 557 564 558 switch (table_id) { 565 - case IP_DISCOVERY: 559 + case IP_DISCOVERY: { 566 560 struct ip_discovery_header *ihdr = 567 561 (struct ip_discovery_header *)(discovery_bin + offset); 568 562 act_val = le32_to_cpu(ihdr->signature); ··· 570 564 table_size = le16_to_cpu(ihdr->size); 571 565 table_name = "data table"; 572 566 break; 573 - case GC: 567 + } 568 + case GC: { 574 569 struct gpu_info_header *ghdr = 575 570 (struct gpu_info_header *)(discovery_bin + offset); 576 571 act_val = le32_to_cpu(ghdr->table_id); ··· 579 572 table_size = le16_to_cpu(ghdr->size); 580 573 table_name = "gc table"; 581 574 break; 582 - case HARVEST_INFO: 575 + } 576 + case HARVEST_INFO: { 583 577 struct harvest_info_header *hhdr = 584 578 (struct harvest_info_header *)(discovery_bin + offset); 585 579 act_val = le32_to_cpu(hhdr->signature); ··· 588 580 table_size = sizeof(struct harvest_table); 589 581 table_name = "harvest table"; 590 582 break; 591 - case VCN_INFO: 583 + } 584 + case VCN_INFO: { 592 585 struct vcn_info_header *vhdr = 593 586 (struct vcn_info_header *)(discovery_bin + offset); 594 587 act_val = le32_to_cpu(vhdr->table_id); ··· 597 588 table_size = le32_to_cpu(vhdr->size_bytes); 598 589 table_name = "vcn table"; 599 590 break; 600 - case MALL_INFO: 591 + } 592 + case MALL_INFO: { 601 593 struct mall_info_header *mhdr = 602 594 (struct mall_info_header *)(discovery_bin + offset); 603 595 act_val = le32_to_cpu(mhdr->table_id); ··· 607 597 table_name = "mall table"; 608 598 check_table = false; 609 599 break; 600 + } 610 601 default: 611 602 dev_err(adev->dev, "invalid ip discovery table id %d specified\n", table_id); 612 603 check_table = false; ··· 2650 2639 case IP_VERSION(5, 0, 1): 2651 2640 amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block); 2652 2641 amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block); 2642 + break; 2643 + case IP_VERSION(5, 0, 2): 2644 + amdgpu_device_ip_block_add(adev, &vcn_v5_0_2_ip_block); 2645 + amdgpu_device_ip_block_add(adev, &jpeg_v5_0_2_ip_block); 2653 2646 break; 2654 2647 default: 2655 2648 dev_err(adev->dev,
+6 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 839 839 /** 840 840 * DOC: mtype_local (int) 841 841 */ 842 - int amdgpu_mtype_local; 843 - MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); 842 + int amdgpu_mtype_local = -1; 843 + MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (default: ASIC dependent, 0 = MTYPE_RW, 1 = MTYPE_NC, 2 = MTYPE_CC)"); 844 844 module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444); 845 845 846 846 /** ··· 2952 2952 int idx; 2953 2953 2954 2954 if (fpriv && drm_dev_enter(dev, &idx)) { 2955 - fpriv->evf_mgr.fd_closing = true; 2956 - amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); 2955 + amdgpu_evf_mgr_shutdown(&fpriv->evf_mgr); 2956 + amdgpu_userq_mgr_cancel_resume(&fpriv->userq_mgr); 2957 + amdgpu_evf_mgr_flush_suspend(&fpriv->evf_mgr); 2957 2958 amdgpu_userq_mgr_fini(&fpriv->userq_mgr); 2959 + amdgpu_evf_mgr_fini(&fpriv->evf_mgr); 2958 2960 drm_dev_exit(idx); 2959 2961 } 2960 2962
+85 -152
drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
··· 25 25 #include <drm/drm_exec.h> 26 26 #include "amdgpu.h" 27 27 28 - #define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name) 29 - #define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr) 30 - 31 28 static const char * 32 29 amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) 33 30 { ··· 40 43 return ef->timeline_name; 41 44 } 42 45 43 - int 44 - amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, 45 - struct drm_exec *exec) 46 - { 47 - struct amdgpu_eviction_fence *old_ef, *new_ef; 48 - struct drm_gem_object *obj; 49 - unsigned long index; 50 - int ret; 51 - 52 - if (evf_mgr->ev_fence && 53 - !dma_fence_is_signaled(&evf_mgr->ev_fence->base)) 54 - return 0; 55 - /* 56 - * Steps to replace eviction fence: 57 - * * lock all objects in exec (caller) 58 - * * create a new eviction fence 59 - * * update new eviction fence in evf_mgr 60 - * * attach the new eviction fence to BOs 61 - * * release the old fence 62 - * * unlock the objects (caller) 63 - */ 64 - new_ef = amdgpu_eviction_fence_create(evf_mgr); 65 - if (!new_ef) { 66 - DRM_ERROR("Failed to create new eviction fence\n"); 67 - return -ENOMEM; 68 - } 69 - 70 - /* Update the eviction fence now */ 71 - spin_lock(&evf_mgr->ev_fence_lock); 72 - old_ef = evf_mgr->ev_fence; 73 - evf_mgr->ev_fence = new_ef; 74 - spin_unlock(&evf_mgr->ev_fence_lock); 75 - 76 - /* Attach the new fence */ 77 - drm_exec_for_each_locked_object(exec, index, obj) { 78 - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 79 - 80 - if (!bo) 81 - continue; 82 - ret = amdgpu_eviction_fence_attach(evf_mgr, bo); 83 - if (ret) { 84 - DRM_ERROR("Failed to attch new eviction fence\n"); 85 - goto free_err; 86 - } 87 - } 88 - 89 - /* Free old fence */ 90 - if (old_ef) 91 - dma_fence_put(&old_ef->base); 92 - return 0; 93 - 94 - free_err: 95 - kfree(new_ef); 96 - return ret; 97 - } 98 - 99 - static void 100 - amdgpu_eviction_fence_suspend_worker(struct work_struct *work) 101 - { 102 - struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work); 103 - struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr); 104 - struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; 105 - struct amdgpu_eviction_fence *ev_fence; 106 - 107 - mutex_lock(&uq_mgr->userq_mutex); 108 - spin_lock(&evf_mgr->ev_fence_lock); 109 - ev_fence = evf_mgr->ev_fence; 110 - if (ev_fence) 111 - dma_fence_get(&ev_fence->base); 112 - else 113 - goto unlock; 114 - spin_unlock(&evf_mgr->ev_fence_lock); 115 - 116 - amdgpu_userq_evict(uq_mgr, ev_fence); 117 - 118 - mutex_unlock(&uq_mgr->userq_mutex); 119 - dma_fence_put(&ev_fence->base); 120 - return; 121 - 122 - unlock: 123 - spin_unlock(&evf_mgr->ev_fence_lock); 124 - mutex_unlock(&uq_mgr->userq_mutex); 125 - } 126 - 127 46 static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) 128 47 { 129 - struct amdgpu_eviction_fence_mgr *evf_mgr; 130 - struct amdgpu_eviction_fence *ev_fence; 48 + struct amdgpu_eviction_fence *ev_fence = to_ev_fence(f); 131 49 132 - if (!f) 133 - return true; 134 - 135 - ev_fence = to_ev_fence(f); 136 - evf_mgr = ev_fence->evf_mgr; 137 - 138 - schedule_delayed_work(&evf_mgr->suspend_work, 0); 50 + schedule_work(&ev_fence->evf_mgr->suspend_work); 139 51 return true; 140 52 } 141 53 ··· 54 148 .enable_signaling = amdgpu_eviction_fence_enable_signaling, 55 149 }; 56 150 57 - void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, 58 - struct amdgpu_eviction_fence *ev_fence) 151 + static void 152 + amdgpu_eviction_fence_suspend_worker(struct work_struct *work) 59 153 { 60 - spin_lock(&evf_mgr->ev_fence_lock); 61 - dma_fence_signal(&ev_fence->base); 62 - spin_unlock(&evf_mgr->ev_fence_lock); 154 + struct amdgpu_eviction_fence_mgr *evf_mgr = 155 + container_of(work, struct amdgpu_eviction_fence_mgr, 156 + suspend_work); 157 + struct amdgpu_fpriv *fpriv = 158 + container_of(evf_mgr, struct amdgpu_fpriv, evf_mgr); 159 + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; 160 + struct dma_fence *ev_fence; 161 + 162 + mutex_lock(&uq_mgr->userq_mutex); 163 + ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr); 164 + amdgpu_userq_evict(uq_mgr, !evf_mgr->shutdown); 165 + 166 + /* 167 + * Signaling the eviction fence must be done while holding the 168 + * userq_mutex. Otherwise we won't resume the queues before issuing the 169 + * next fence. 170 + */ 171 + dma_fence_signal(ev_fence); 172 + dma_fence_put(ev_fence); 173 + mutex_unlock(&uq_mgr->userq_mutex); 63 174 } 64 175 65 - struct amdgpu_eviction_fence * 66 - amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr) 176 + int amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, 177 + struct amdgpu_bo *bo) 178 + { 179 + struct dma_fence *ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr); 180 + struct ttm_operation_ctx ctx = { false, false }; 181 + struct dma_resv *resv = bo->tbo.base.resv; 182 + int ret; 183 + 184 + if (!dma_fence_is_signaled(ev_fence)) { 185 + 186 + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); 187 + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 188 + if (!ret) 189 + dma_resv_add_fence(resv, ev_fence, 190 + DMA_RESV_USAGE_BOOKKEEP); 191 + } else { 192 + ret = 0; 193 + } 194 + 195 + dma_fence_put(ev_fence); 196 + return ret; 197 + } 198 + 199 + int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr, 200 + struct drm_exec *exec) 67 201 { 68 202 struct amdgpu_eviction_fence *ev_fence; 203 + struct drm_gem_object *obj; 204 + unsigned long index; 69 205 206 + /* Create and initialize a new eviction fence */ 70 207 ev_fence = kzalloc_obj(*ev_fence); 71 208 if (!ev_fence) 72 - return NULL; 209 + return -ENOMEM; 73 210 74 211 ev_fence->evf_mgr = evf_mgr; 75 212 get_task_comm(ev_fence->timeline_name, current); ··· 120 171 dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops, 121 172 &ev_fence->lock, evf_mgr->ev_fence_ctx, 122 173 atomic_inc_return(&evf_mgr->ev_fence_seq)); 123 - return ev_fence; 124 - } 125 174 126 - void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) 127 - { 128 - struct amdgpu_eviction_fence *ev_fence; 175 + /* Remember it for newly added BOs */ 176 + dma_fence_put(evf_mgr->ev_fence); 177 + evf_mgr->ev_fence = &ev_fence->base; 129 178 130 - /* Wait for any pending work to execute */ 131 - flush_delayed_work(&evf_mgr->suspend_work); 179 + /* And add it to all existing BOs */ 180 + drm_exec_for_each_locked_object(exec, index, obj) { 181 + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 132 182 133 - spin_lock(&evf_mgr->ev_fence_lock); 134 - ev_fence = evf_mgr->ev_fence; 135 - spin_unlock(&evf_mgr->ev_fence_lock); 136 - 137 - if (!ev_fence) 138 - return; 139 - 140 - dma_fence_wait(&ev_fence->base, false); 141 - 142 - /* Last unref of ev_fence */ 143 - dma_fence_put(&ev_fence->base); 144 - } 145 - 146 - int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, 147 - struct amdgpu_bo *bo) 148 - { 149 - struct amdgpu_eviction_fence *ev_fence; 150 - struct dma_resv *resv = bo->tbo.base.resv; 151 - int ret; 152 - 153 - if (!resv) 154 - return 0; 155 - 156 - ret = dma_resv_reserve_fences(resv, 1); 157 - if (ret) { 158 - DRM_DEBUG_DRIVER("Failed to resv fence space\n"); 159 - return ret; 183 + amdgpu_evf_mgr_attach_fence(evf_mgr, bo); 160 184 } 161 - 162 - spin_lock(&evf_mgr->ev_fence_lock); 163 - ev_fence = evf_mgr->ev_fence; 164 - if (ev_fence) 165 - dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP); 166 - spin_unlock(&evf_mgr->ev_fence_lock); 167 - 168 185 return 0; 169 186 } 170 187 171 - void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, 172 - struct amdgpu_bo *bo) 188 + void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, 189 + struct amdgpu_bo *bo) 173 190 { 174 191 struct dma_fence *stub = dma_fence_get_stub(); 175 192 ··· 144 229 dma_fence_put(stub); 145 230 } 146 231 147 - int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) 232 + void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr) 148 233 { 149 - /* This needs to be done one time per open */ 150 234 atomic_set(&evf_mgr->ev_fence_seq, 0); 151 235 evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); 152 - spin_lock_init(&evf_mgr->ev_fence_lock); 236 + evf_mgr->ev_fence = dma_fence_get_stub(); 153 237 154 - INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); 155 - return 0; 238 + INIT_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); 239 + } 240 + 241 + void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr) 242 + { 243 + evf_mgr->shutdown = true; 244 + /* Make sure that the shutdown is visible to the suspend work */ 245 + flush_work(&evf_mgr->suspend_work); 246 + } 247 + 248 + void amdgpu_evf_mgr_flush_suspend(struct amdgpu_eviction_fence_mgr *evf_mgr) 249 + { 250 + dma_fence_wait(rcu_dereference_protected(evf_mgr->ev_fence, true), 251 + false); 252 + /* Make sure that we are done with the last suspend work */ 253 + flush_work(&evf_mgr->suspend_work); 254 + } 255 + 256 + void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr) 257 + { 258 + dma_fence_put(evf_mgr->ev_fence); 156 259 }
+29 -26
drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
··· 25 25 #ifndef AMDGPU_EV_FENCE_H_ 26 26 #define AMDGPU_EV_FENCE_H_ 27 27 28 + #include <linux/dma-fence.h> 29 + 28 30 struct amdgpu_eviction_fence { 29 31 struct dma_fence base; 30 32 spinlock_t lock; ··· 37 35 struct amdgpu_eviction_fence_mgr { 38 36 u64 ev_fence_ctx; 39 37 atomic_t ev_fence_seq; 40 - spinlock_t ev_fence_lock; 41 - struct amdgpu_eviction_fence *ev_fence; 42 - struct delayed_work suspend_work; 43 - uint8_t fd_closing; 38 + 39 + /* 40 + * Only updated while holding the VM resv lock. 41 + * Only signaled while holding the userq mutex. 42 + */ 43 + struct dma_fence __rcu *ev_fence; 44 + struct work_struct suspend_work; 45 + bool shutdown; 44 46 }; 45 47 46 - /* Eviction fence helper functions */ 47 - struct amdgpu_eviction_fence * 48 - amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr); 48 + static inline struct dma_fence * 49 + amdgpu_evf_mgr_get_fence(struct amdgpu_eviction_fence_mgr *evf_mgr) 50 + { 51 + struct dma_fence *ev_fence; 49 52 50 - void 51 - amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr); 53 + rcu_read_lock(); 54 + ev_fence = dma_fence_get_rcu_safe(&evf_mgr->ev_fence); 55 + rcu_read_unlock(); 56 + return ev_fence; 57 + } 52 58 53 - int 54 - amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, 55 - struct amdgpu_bo *bo); 59 + int amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, 60 + struct amdgpu_bo *bo); 61 + int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr, 62 + struct drm_exec *exec); 63 + void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, 64 + struct amdgpu_bo *bo); 65 + void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr); 66 + void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr); 67 + void amdgpu_evf_mgr_flush_suspend(struct amdgpu_eviction_fence_mgr *evf_mgr); 68 + void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr); 56 69 57 - void 58 - amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, 59 - struct amdgpu_bo *bo); 60 - 61 - int 62 - amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); 63 - 64 - void 65 - amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, 66 - struct amdgpu_eviction_fence *ev_fence); 67 - 68 - int 69 - amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, 70 - struct drm_exec *exec); 71 70 #endif
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
··· 106 106 *fru_addr = FRU_EEPROM_MADDR_8; 107 107 return true; 108 108 case IP_VERSION(13, 0, 12): 109 + case IP_VERSION(15, 0, 8): 109 110 if (fru_addr) 110 111 *fru_addr = FRU_EEPROM_MADDR_INV; 111 112 return true;
+7 -10
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
··· 258 258 259 259 amdgpu_vm_bo_update_shared(abo); 260 260 bo_va = amdgpu_vm_bo_find(vm, abo); 261 - if (!bo_va) 261 + if (!bo_va) { 262 262 bo_va = amdgpu_vm_bo_add(adev, vm, abo); 263 - else 263 + r = amdgpu_evf_mgr_attach_fence(&fpriv->evf_mgr, abo); 264 + if (r) 265 + goto out_unlock; 266 + } else { 264 267 ++bo_va->ref_count; 265 - 266 - /* attach gfx eviction fence */ 267 - r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); 268 - if (r) { 269 - DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n"); 270 - amdgpu_bo_unreserve(abo); 271 - return r; 272 268 } 269 + 273 270 drm_exec_fini(&exec); 274 271 275 272 /* Validate and add eviction fence to DMABuf imports with dynamic ··· 334 337 } 335 338 336 339 if (!amdgpu_vm_is_bo_always_valid(vm, bo)) 337 - amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); 340 + amdgpu_evf_mgr_detach_fence(&fpriv->evf_mgr, bo); 338 341 339 342 bo_va = amdgpu_vm_bo_find(vm, bo); 340 343 if (!bo_va || --bo_va->ref_count)
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
··· 463 463 struct amdgpu_irq_src cp_ecc_error_irq; 464 464 struct amdgpu_irq_src sq_irq; 465 465 struct amdgpu_irq_src rlc_gc_fed_irq; 466 + struct amdgpu_irq_src rlc_poison_irq; 466 467 struct sq_work sq_work; 467 468 468 469 /* gfx status */
+1 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
··· 1522 1522 "Failed to init usermode queue manager (%d), use legacy workload submission only\n", 1523 1523 r); 1524 1524 1525 - r = amdgpu_eviction_fence_init(&fpriv->evf_mgr); 1526 - if (r) 1527 - goto error_vm; 1528 - 1525 + amdgpu_evf_mgr_init(&fpriv->evf_mgr); 1529 1526 amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); 1530 1527 1531 1528 file_priv->driver_priv = fpriv;
+26
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
··· 63 63 uint64_t page_table_base); 64 64 void (*update_power_gating)(struct amdgpu_device *adev, 65 65 bool enable); 66 + int (*get_xgmi_info)(struct amdgpu_device *adev); 67 + }; 68 + 69 + struct amdgpu_mmhub_client_ids { 70 + const char * const (*names)[2]; 71 + unsigned int size; 66 72 }; 67 73 68 74 struct amdgpu_mmhub { 69 75 struct ras_common_if *ras_if; 70 76 const struct amdgpu_mmhub_funcs *funcs; 71 77 struct amdgpu_mmhub_ras *ras; 78 + struct amdgpu_mmhub_client_ids client_ids; 72 79 }; 80 + 81 + static inline void 82 + amdgpu_mmhub_init_client_info(struct amdgpu_mmhub *mmhub, 83 + const char * const (*names)[2], 84 + unsigned int size) 85 + { 86 + mmhub->client_ids.names = names; 87 + mmhub->client_ids.size = size; 88 + } 89 + 90 + static inline const char * 91 + amdgpu_mmhub_client_name(struct amdgpu_mmhub *mmhub, 92 + u32 cid, bool is_write) 93 + { 94 + if (cid < mmhub->client_ids.size) 95 + return mmhub->client_ids.names[cid][is_write]; 96 + 97 + return NULL; 98 + } 73 99 74 100 int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev); 75 101
+6 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 2250 2250 case IP_VERSION(13, 0, 6): 2251 2251 case IP_VERSION(13, 0, 12): 2252 2252 case IP_VERSION(13, 0, 14): 2253 + case IP_VERSION(13, 0, 15): 2253 2254 ret = true; 2254 2255 break; 2255 2256 default: ··· 4001 4000 case IP_VERSION(13, 0, 6): 4002 4001 case IP_VERSION(13, 0, 12): 4003 4002 case IP_VERSION(13, 0, 14): 4003 + case IP_VERSION(13, 0, 15): 4004 4004 return true; 4005 4005 default: 4006 4006 return false; ··· 4015 4013 case IP_VERSION(13, 0, 10): 4016 4014 case IP_VERSION(13, 0, 12): 4017 4015 case IP_VERSION(13, 0, 14): 4016 + case IP_VERSION(13, 0, 15): 4018 4017 case IP_VERSION(14, 0, 3): 4019 4018 return true; 4020 4019 default: ··· 4185 4182 adev->aca.is_enabled = 4186 4183 (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || 4187 4184 amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || 4188 - amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); 4185 + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14) || 4186 + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 15)); 4189 4187 } 4190 4188 4191 4189 /* bad page feature is not applicable to specific app platform */ ··· 4274 4270 case IP_VERSION(13, 0, 2): 4275 4271 case IP_VERSION(13, 0, 6): 4276 4272 case IP_VERSION(13, 0, 12): 4273 + case IP_VERSION(13, 0, 15): 4277 4274 con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT; 4278 4275 break; 4279 4276 case IP_VERSION(13, 0, 14):
-1
drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c
··· 314 314 "SMN base address query not supported for this device\n"); 315 315 return 0; 316 316 } 317 - return 0; 318 317 } 319 318 return adev->reg.smn.get_smn_base(adev, block, die_inst); 320 319 }
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
··· 1150 1150 if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) && 1151 1151 (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) { 1152 1152 amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, 1153 - (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? 1153 + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf || adev->gmc.xgmi.connected_to_cpu) ? 1154 1154 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, 1155 1155 &adev->firmware.fw_buf, 1156 1156 &adev->firmware.fw_buf_mc,
+11
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
··· 398 398 return 0; 399 399 } 400 400 401 + int amdgpu_umc_uniras_process_ecc_irq(struct amdgpu_device *adev, 402 + struct amdgpu_irq_src *source, 403 + struct amdgpu_iv_entry *entry) 404 + { 405 + struct ras_ih_info ih_info = {0}; 406 + 407 + ih_info.block = RAS_BLOCK_ID__UMC; 408 + amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info); 409 + return 0; 410 + } 411 + 401 412 int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, 402 413 uint64_t err_addr, 403 414 uint64_t retired_page,
+3
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
··· 161 161 int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, 162 162 struct amdgpu_irq_src *source, 163 163 struct amdgpu_iv_entry *entry); 164 + int amdgpu_umc_uniras_process_ecc_irq(struct amdgpu_device *adev, 165 + struct amdgpu_irq_src *source, 166 + struct amdgpu_iv_entry *entry); 164 167 int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, 165 168 uint64_t err_addr, 166 169 uint64_t retired_page,
+31 -33
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
··· 156 156 struct dma_fence *fence; 157 157 struct amdgpu_userq_mgr *uq_mgr; 158 158 159 - if (!queue || !queue->userq_mgr) 159 + if (!queue->userq_mgr) 160 160 return; 161 161 162 162 uq_mgr = queue->userq_mgr; ··· 472 472 amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, 473 473 struct amdgpu_eviction_fence_mgr *evf_mgr) 474 474 { 475 - struct amdgpu_eviction_fence *ev_fence; 475 + struct dma_fence *ev_fence; 476 476 477 477 retry: 478 478 /* Flush any pending resume work to create ev_fence */ 479 479 flush_delayed_work(&uq_mgr->resume_work); 480 480 481 481 mutex_lock(&uq_mgr->userq_mutex); 482 - spin_lock(&evf_mgr->ev_fence_lock); 483 - ev_fence = evf_mgr->ev_fence; 484 - spin_unlock(&evf_mgr->ev_fence_lock); 485 - if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) { 482 + ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr); 483 + if (dma_fence_is_signaled(ev_fence)) { 484 + dma_fence_put(ev_fence); 486 485 mutex_unlock(&uq_mgr->userq_mutex); 487 486 /* 488 487 * Looks like there was no pending resume work, ··· 490 491 schedule_delayed_work(&uq_mgr->resume_work, 0); 491 492 goto retry; 492 493 } 494 + dma_fence_put(ev_fence); 493 495 } 494 496 495 497 int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, ··· 623 623 int r = 0; 624 624 625 625 cancel_delayed_work_sync(&uq_mgr->resume_work); 626 - mutex_lock(&uq_mgr->userq_mutex); 627 - amdgpu_userq_wait_for_last_fence(queue); 626 + 628 627 /* Cancel any pending hang detection work and cleanup */ 629 - if (queue->hang_detect_fence) { 630 - cancel_delayed_work_sync(&queue->hang_detect_work); 631 - queue->hang_detect_fence = NULL; 632 - } 628 + cancel_delayed_work_sync(&queue->hang_detect_work); 629 + 630 + mutex_lock(&uq_mgr->userq_mutex); 631 + queue->hang_detect_fence = NULL; 632 + amdgpu_userq_wait_for_last_fence(queue); 633 + 633 634 r = amdgpu_bo_reserve(queue->db_obj.obj, true); 634 635 if (!r) { 635 636 amdgpu_bo_unpin(queue->db_obj.obj); ··· 1041 1040 struct amdgpu_bo *bo; 1042 1041 int ret; 1043 1042 1044 - spin_lock(&vm->invalidated_lock); 1043 + spin_lock(&vm->status_lock); 1045 1044 while (!list_empty(&vm->invalidated)) { 1046 1045 bo_va = list_first_entry(&vm->invalidated, 1047 1046 struct amdgpu_bo_va, 1048 1047 base.vm_status); 1049 - spin_unlock(&vm->invalidated_lock); 1048 + spin_unlock(&vm->status_lock); 1050 1049 1051 1050 bo = bo_va->base.bo; 1052 1051 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2); ··· 1063 1062 if (ret) 1064 1063 return ret; 1065 1064 1066 - spin_lock(&vm->invalidated_lock); 1065 + spin_lock(&vm->status_lock); 1067 1066 } 1068 - spin_unlock(&vm->invalidated_lock); 1067 + spin_unlock(&vm->status_lock); 1069 1068 1070 1069 return 0; 1071 1070 } ··· 1197 1196 dma_fence_wait(bo_va->last_pt_update, false); 1198 1197 dma_fence_wait(vm->last_update, false); 1199 1198 1200 - ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); 1199 + ret = amdgpu_evf_mgr_rearm(&fpriv->evf_mgr, &exec); 1201 1200 if (ret) 1202 1201 drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n"); 1203 1202 ··· 1217 1216 { 1218 1217 struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); 1219 1218 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 1219 + struct dma_fence *ev_fence; 1220 1220 int ret; 1221 1221 1222 - flush_delayed_work(&fpriv->evf_mgr.suspend_work); 1223 - 1224 1222 mutex_lock(&uq_mgr->userq_mutex); 1223 + ev_fence = amdgpu_evf_mgr_get_fence(&fpriv->evf_mgr); 1224 + if (!dma_fence_is_signaled(ev_fence)) 1225 + goto unlock; 1225 1226 1226 1227 ret = amdgpu_userq_vm_validate(uq_mgr); 1227 1228 if (ret) { ··· 1239 1236 1240 1237 unlock: 1241 1238 mutex_unlock(&uq_mgr->userq_mutex); 1239 + dma_fence_put(ev_fence); 1242 1240 } 1243 1241 1244 1242 static int ··· 1315 1311 } 1316 1312 1317 1313 void 1318 - amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, 1319 - struct amdgpu_eviction_fence *ev_fence) 1314 + amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, bool schedule_resume) 1320 1315 { 1321 - struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 1322 - struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; 1323 1316 struct amdgpu_device *adev = uq_mgr->adev; 1324 1317 int ret; 1325 1318 ··· 1329 1328 if (ret) 1330 1329 dev_err(adev->dev, "Failed to evict userqueue\n"); 1331 1330 1332 - /* Signal current eviction fence */ 1333 - amdgpu_eviction_fence_signal(evf_mgr, ev_fence); 1334 - 1335 - if (evf_mgr->fd_closing) { 1336 - cancel_delayed_work_sync(&uq_mgr->resume_work); 1337 - return; 1338 - } 1339 - 1340 - /* Schedule a resume work */ 1341 - schedule_delayed_work(&uq_mgr->resume_work, 0); 1331 + if (schedule_resume) 1332 + schedule_delayed_work(&uq_mgr->resume_work, 0); 1342 1333 } 1343 1334 1344 1335 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, ··· 1343 1350 1344 1351 INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); 1345 1352 return 0; 1353 + } 1354 + 1355 + void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr) 1356 + { 1357 + cancel_delayed_work_sync(&userq_mgr->resume_work); 1346 1358 } 1347 1359 1348 1360 void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
··· 123 123 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, 124 124 struct amdgpu_device *adev); 125 125 126 + void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr); 126 127 void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); 127 128 128 129 int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, ··· 134 133 struct amdgpu_userq_obj *userq_obj); 135 134 136 135 void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, 137 - struct amdgpu_eviction_fence *ev_fence); 136 + bool schedule_resume); 138 137 139 138 void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, 140 139 struct amdgpu_eviction_fence_mgr *evf_mgr);
+351 -291
drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
··· 597 597 put_gobj_write: 598 598 for (i = 0; i < num_write_bo_handles; i++) 599 599 drm_gem_object_put(gobj_write[i]); 600 - kfree(gobj_write); 600 + kvfree(gobj_write); 601 601 put_gobj_read: 602 602 for (i = 0; i < num_read_bo_handles; i++) 603 603 drm_gem_object_put(gobj_read[i]); 604 - kfree(gobj_read); 604 + kvfree(gobj_read); 605 605 free_syncobj: 606 606 while (entry-- > 0) 607 607 if (syncobj[entry]) ··· 616 616 return r; 617 617 } 618 618 619 + /* Count the number of expected fences so userspace can alloc a buffer */ 620 + static int 621 + amdgpu_userq_wait_count_fences(struct drm_file *filp, 622 + struct drm_amdgpu_userq_wait *wait_info, 623 + u32 *syncobj_handles, u32 *timeline_points, 624 + u32 *timeline_handles, 625 + struct drm_gem_object **gobj_write, 626 + struct drm_gem_object **gobj_read) 627 + { 628 + int num_read_bo_handles, num_write_bo_handles; 629 + struct dma_fence_unwrap iter; 630 + struct dma_fence *fence, *f; 631 + unsigned int num_fences = 0; 632 + struct drm_exec exec; 633 + int i, r; 634 + 635 + /* 636 + * This needs to be outside of the lock provided by drm_exec for 637 + * DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT to work correctly. 638 + */ 639 + 640 + /* Count timeline fences */ 641 + for (i = 0; i < wait_info->num_syncobj_timeline_handles; i++) { 642 + r = drm_syncobj_find_fence(filp, timeline_handles[i], 643 + timeline_points[i], 644 + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 645 + &fence); 646 + if (r) 647 + return r; 648 + 649 + dma_fence_unwrap_for_each(f, &iter, fence) 650 + num_fences++; 651 + 652 + dma_fence_put(fence); 653 + } 654 + 655 + /* Count boolean fences */ 656 + for (i = 0; i < wait_info->num_syncobj_handles; i++) { 657 + r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 658 + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 659 + &fence); 660 + if (r) 661 + return r; 662 + 663 + num_fences++; 664 + dma_fence_put(fence); 665 + } 666 + 667 + /* Lock all the GEM objects */ 668 + /* TODO: It is actually not necessary to lock them */ 669 + num_read_bo_handles = wait_info->num_bo_read_handles; 670 + num_write_bo_handles = wait_info->num_bo_write_handles; 671 + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 672 + num_read_bo_handles + num_write_bo_handles); 673 + 674 + drm_exec_until_all_locked(&exec) { 675 + r = drm_exec_prepare_array(&exec, gobj_read, 676 + num_read_bo_handles, 1); 677 + drm_exec_retry_on_contention(&exec); 678 + if (r) 679 + goto error_unlock; 680 + 681 + r = drm_exec_prepare_array(&exec, gobj_write, 682 + num_write_bo_handles, 1); 683 + drm_exec_retry_on_contention(&exec); 684 + if (r) 685 + goto error_unlock; 686 + } 687 + 688 + /* Count read fences */ 689 + for (i = 0; i < num_read_bo_handles; i++) { 690 + struct dma_resv_iter resv_cursor; 691 + struct dma_fence *fence; 692 + 693 + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 694 + DMA_RESV_USAGE_READ, fence) 695 + num_fences++; 696 + } 697 + 698 + /* Count write fences */ 699 + for (i = 0; i < num_write_bo_handles; i++) { 700 + struct dma_resv_iter resv_cursor; 701 + struct dma_fence *fence; 702 + 703 + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 704 + DMA_RESV_USAGE_WRITE, fence) 705 + num_fences++; 706 + } 707 + 708 + wait_info->num_fences = num_fences; 709 + r = 0; 710 + 711 + error_unlock: 712 + /* Unlock all the GEM objects */ 713 + drm_exec_fini(&exec); 714 + return r; 715 + } 716 + 717 + static int 718 + amdgpu_userq_wait_return_fence_info(struct drm_file *filp, 719 + struct drm_amdgpu_userq_wait *wait_info, 720 + u32 *syncobj_handles, u32 *timeline_points, 721 + u32 *timeline_handles, 722 + struct drm_gem_object **gobj_write, 723 + struct drm_gem_object **gobj_read) 724 + { 725 + struct amdgpu_fpriv *fpriv = filp->driver_priv; 726 + struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 727 + struct drm_amdgpu_userq_fence_info *fence_info; 728 + int num_read_bo_handles, num_write_bo_handles; 729 + struct amdgpu_usermode_queue *waitq; 730 + struct dma_fence **fences, *fence, *f; 731 + struct dma_fence_unwrap iter; 732 + int num_points, num_syncobj; 733 + unsigned int num_fences = 0; 734 + struct drm_exec exec; 735 + int i, cnt, r; 736 + 737 + fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), 738 + GFP_KERNEL); 739 + if (!fence_info) 740 + return -ENOMEM; 741 + 742 + fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), 743 + GFP_KERNEL); 744 + if (!fences) { 745 + r = -ENOMEM; 746 + goto free_fence_info; 747 + } 748 + 749 + /* Retrieve timeline fences */ 750 + num_points = wait_info->num_syncobj_timeline_handles; 751 + for (i = 0; i < num_points; i++) { 752 + r = drm_syncobj_find_fence(filp, timeline_handles[i], 753 + timeline_points[i], 754 + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 755 + &fence); 756 + if (r) 757 + goto free_fences; 758 + 759 + dma_fence_unwrap_for_each(f, &iter, fence) { 760 + if (num_fences >= wait_info->num_fences) { 761 + r = -EINVAL; 762 + dma_fence_put(fence); 763 + goto free_fences; 764 + } 765 + 766 + fences[num_fences++] = dma_fence_get(f); 767 + } 768 + 769 + dma_fence_put(fence); 770 + } 771 + 772 + /* Retrieve boolean fences */ 773 + num_syncobj = wait_info->num_syncobj_handles; 774 + for (i = 0; i < num_syncobj; i++) { 775 + struct dma_fence *fence; 776 + 777 + r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 778 + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 779 + &fence); 780 + if (r) 781 + goto free_fences; 782 + 783 + if (num_fences >= wait_info->num_fences) { 784 + dma_fence_put(fence); 785 + r = -EINVAL; 786 + goto free_fences; 787 + } 788 + 789 + /* Give the reference to the fence array */ 790 + fences[num_fences++] = fence; 791 + } 792 + 793 + /* Lock all the GEM objects */ 794 + num_read_bo_handles = wait_info->num_bo_read_handles; 795 + num_write_bo_handles = wait_info->num_bo_write_handles; 796 + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 797 + num_read_bo_handles + num_write_bo_handles); 798 + 799 + drm_exec_until_all_locked(&exec) { 800 + r = drm_exec_prepare_array(&exec, gobj_read, 801 + num_read_bo_handles, 1); 802 + drm_exec_retry_on_contention(&exec); 803 + if (r) 804 + goto error_unlock; 805 + 806 + r = drm_exec_prepare_array(&exec, gobj_write, 807 + num_write_bo_handles, 1); 808 + drm_exec_retry_on_contention(&exec); 809 + if (r) 810 + goto error_unlock; 811 + } 812 + 813 + /* Retrieve GEM read objects fence */ 814 + for (i = 0; i < num_read_bo_handles; i++) { 815 + struct dma_resv_iter resv_cursor; 816 + struct dma_fence *fence; 817 + 818 + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 819 + DMA_RESV_USAGE_READ, fence) { 820 + if (num_fences >= wait_info->num_fences) { 821 + r = -EINVAL; 822 + goto error_unlock; 823 + } 824 + 825 + fences[num_fences++] = dma_fence_get(fence); 826 + } 827 + } 828 + 829 + /* Retrieve GEM write objects fence */ 830 + for (i = 0; i < num_write_bo_handles; i++) { 831 + struct dma_resv_iter resv_cursor; 832 + struct dma_fence *fence; 833 + 834 + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 835 + DMA_RESV_USAGE_WRITE, fence) { 836 + if (num_fences >= wait_info->num_fences) { 837 + r = -EINVAL; 838 + goto error_unlock; 839 + } 840 + 841 + fences[num_fences++] = dma_fence_get(fence); 842 + } 843 + } 844 + 845 + drm_exec_fini(&exec); 846 + 847 + /* 848 + * Keep only the latest fences to reduce the number of values 849 + * given back to userspace. 850 + */ 851 + num_fences = dma_fence_dedup_array(fences, num_fences); 852 + 853 + waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); 854 + if (!waitq) { 855 + r = -EINVAL; 856 + goto free_fences; 857 + } 858 + 859 + for (i = 0, cnt = 0; i < num_fences; i++) { 860 + struct amdgpu_userq_fence_driver *fence_drv; 861 + struct amdgpu_userq_fence *userq_fence; 862 + u32 index; 863 + 864 + userq_fence = to_amdgpu_userq_fence(fences[i]); 865 + if (!userq_fence) { 866 + /* 867 + * Just waiting on other driver fences should 868 + * be good for now 869 + */ 870 + r = dma_fence_wait(fences[i], true); 871 + if (r) 872 + goto put_waitq; 873 + 874 + continue; 875 + } 876 + 877 + fence_drv = userq_fence->fence_drv; 878 + /* 879 + * We need to make sure the user queue release their reference 880 + * to the fence drivers at some point before queue destruction. 881 + * Otherwise, we would gather those references until we don't 882 + * have any more space left and crash. 883 + */ 884 + r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, 885 + xa_limit_32b, GFP_KERNEL); 886 + if (r) 887 + goto put_waitq; 888 + 889 + amdgpu_userq_fence_driver_get(fence_drv); 890 + 891 + /* Store drm syncobj's gpu va address and value */ 892 + fence_info[cnt].va = fence_drv->va; 893 + fence_info[cnt].value = fences[i]->seqno; 894 + 895 + /* Increment the actual userq fence count */ 896 + cnt++; 897 + } 898 + wait_info->num_fences = cnt; 899 + 900 + /* Copy userq fence info to user space */ 901 + if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), 902 + fence_info, cnt * sizeof(*fence_info))) 903 + r = -EFAULT; 904 + else 905 + r = 0; 906 + 907 + put_waitq: 908 + amdgpu_userq_put(waitq); 909 + 910 + free_fences: 911 + while (num_fences--) 912 + dma_fence_put(fences[num_fences]); 913 + kfree(fences); 914 + 915 + free_fence_info: 916 + kfree(fence_info); 917 + return r; 918 + 919 + error_unlock: 920 + drm_exec_fini(&exec); 921 + goto free_fences; 922 + } 923 + 619 924 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, 620 925 struct drm_file *filp) 621 926 { 927 + int num_points, num_syncobj, num_read_bo_handles, num_write_bo_handles; 928 + u32 *syncobj_handles, *timeline_points, *timeline_handles; 622 929 struct drm_amdgpu_userq_wait *wait_info = data; 623 - const unsigned int num_write_bo_handles = wait_info->num_bo_write_handles; 624 - const unsigned int num_read_bo_handles = wait_info->num_bo_read_handles; 625 - struct drm_amdgpu_userq_fence_info *fence_info = NULL; 626 - struct amdgpu_fpriv *fpriv = filp->driver_priv; 627 - struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; 628 - struct drm_gem_object **gobj_write, **gobj_read; 629 - u32 *timeline_points, *timeline_handles; 630 - struct amdgpu_usermode_queue *waitq = NULL; 631 - u32 *syncobj_handles, num_syncobj; 632 - struct dma_fence **fences = NULL; 633 - u16 num_points, num_fences = 0; 634 - struct drm_exec exec; 635 - int r, i, cnt; 930 + struct drm_gem_object **gobj_write; 931 + struct drm_gem_object **gobj_read; 932 + void __user *ptr; 933 + int r; 636 934 637 935 if (!amdgpu_userq_enabled(dev)) 638 936 return -ENOTSUPP; ··· 940 642 return -EINVAL; 941 643 942 644 num_syncobj = wait_info->num_syncobj_handles; 943 - syncobj_handles = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_handles), 944 - num_syncobj, sizeof(u32)); 645 + ptr = u64_to_user_ptr(wait_info->syncobj_handles); 646 + syncobj_handles = memdup_array_user(ptr, num_syncobj, sizeof(u32)); 945 647 if (IS_ERR(syncobj_handles)) 946 648 return PTR_ERR(syncobj_handles); 947 649 948 - 949 650 num_points = wait_info->num_syncobj_timeline_handles; 950 - timeline_handles = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), 951 - num_points, sizeof(u32)); 651 + ptr = u64_to_user_ptr(wait_info->syncobj_timeline_handles); 652 + timeline_handles = memdup_array_user(ptr, num_points, sizeof(u32)); 952 653 if (IS_ERR(timeline_handles)) { 953 654 r = PTR_ERR(timeline_handles); 954 655 goto free_syncobj_handles; 955 656 } 956 657 957 - timeline_points = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), 958 - num_points, sizeof(u32)); 959 - 658 + ptr = u64_to_user_ptr(wait_info->syncobj_timeline_points); 659 + timeline_points = memdup_array_user(ptr, num_points, sizeof(u32)); 960 660 if (IS_ERR(timeline_points)) { 961 661 r = PTR_ERR(timeline_points); 962 662 goto free_timeline_handles; 963 663 } 964 664 965 - r = drm_gem_objects_lookup(filp, 966 - u64_to_user_ptr(wait_info->bo_read_handles), 967 - num_read_bo_handles, 968 - &gobj_read); 665 + num_read_bo_handles = wait_info->num_bo_read_handles; 666 + ptr = u64_to_user_ptr(wait_info->bo_read_handles), 667 + r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); 969 668 if (r) 970 669 goto free_timeline_points; 971 670 972 - r = drm_gem_objects_lookup(filp, 973 - u64_to_user_ptr(wait_info->bo_write_handles), 974 - num_write_bo_handles, 671 + num_write_bo_handles = wait_info->num_bo_write_handles; 672 + ptr = u64_to_user_ptr(wait_info->bo_write_handles), 673 + r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, 975 674 &gobj_write); 976 675 if (r) 977 676 goto put_gobj_read; 978 677 979 - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 980 - (num_read_bo_handles + num_write_bo_handles)); 981 - 982 - /* Lock all BOs with retry handling */ 983 - drm_exec_until_all_locked(&exec) { 984 - r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); 985 - drm_exec_retry_on_contention(&exec); 986 - if (r) { 987 - drm_exec_fini(&exec); 988 - goto put_gobj_write; 989 - } 990 - 991 - r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); 992 - drm_exec_retry_on_contention(&exec); 993 - if (r) { 994 - drm_exec_fini(&exec); 995 - goto put_gobj_write; 996 - } 997 - } 998 - 678 + /* 679 + * Passing num_fences = 0 means that userspace doesn't want to 680 + * retrieve userq_fence_info. If num_fences = 0 we skip filling 681 + * userq_fence_info and return the actual number of fences on 682 + * args->num_fences. 683 + */ 999 684 if (!wait_info->num_fences) { 1000 - if (num_points) { 1001 - struct dma_fence_unwrap iter; 1002 - struct dma_fence *fence; 1003 - struct dma_fence *f; 1004 - 1005 - for (i = 0; i < num_points; i++) { 1006 - r = drm_syncobj_find_fence(filp, timeline_handles[i], 1007 - timeline_points[i], 1008 - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 1009 - &fence); 1010 - if (r) 1011 - goto exec_fini; 1012 - 1013 - dma_fence_unwrap_for_each(f, &iter, fence) 1014 - num_fences++; 1015 - 1016 - dma_fence_put(fence); 1017 - } 1018 - } 1019 - 1020 - /* Count syncobj's fence */ 1021 - for (i = 0; i < num_syncobj; i++) { 1022 - struct dma_fence *fence; 1023 - 1024 - r = drm_syncobj_find_fence(filp, syncobj_handles[i], 1025 - 0, 1026 - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 1027 - &fence); 1028 - if (r) 1029 - goto exec_fini; 1030 - 1031 - num_fences++; 1032 - dma_fence_put(fence); 1033 - } 1034 - 1035 - /* Count GEM objects fence */ 1036 - for (i = 0; i < num_read_bo_handles; i++) { 1037 - struct dma_resv_iter resv_cursor; 1038 - struct dma_fence *fence; 1039 - 1040 - dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 1041 - DMA_RESV_USAGE_READ, fence) 1042 - num_fences++; 1043 - } 1044 - 1045 - for (i = 0; i < num_write_bo_handles; i++) { 1046 - struct dma_resv_iter resv_cursor; 1047 - struct dma_fence *fence; 1048 - 1049 - dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 1050 - DMA_RESV_USAGE_WRITE, fence) 1051 - num_fences++; 1052 - } 1053 - 1054 - /* 1055 - * Passing num_fences = 0 means that userspace doesn't want to 1056 - * retrieve userq_fence_info. If num_fences = 0 we skip filling 1057 - * userq_fence_info and return the actual number of fences on 1058 - * args->num_fences. 1059 - */ 1060 - wait_info->num_fences = num_fences; 685 + r = amdgpu_userq_wait_count_fences(filp, wait_info, 686 + syncobj_handles, 687 + timeline_points, 688 + timeline_handles, 689 + gobj_write, 690 + gobj_read); 1061 691 } else { 1062 - /* Array of fence info */ 1063 - fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL); 1064 - if (!fence_info) { 1065 - r = -ENOMEM; 1066 - goto exec_fini; 1067 - } 1068 - 1069 - /* Array of fences */ 1070 - fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL); 1071 - if (!fences) { 1072 - r = -ENOMEM; 1073 - goto free_fence_info; 1074 - } 1075 - 1076 - /* Retrieve GEM read objects fence */ 1077 - for (i = 0; i < num_read_bo_handles; i++) { 1078 - struct dma_resv_iter resv_cursor; 1079 - struct dma_fence *fence; 1080 - 1081 - dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, 1082 - DMA_RESV_USAGE_READ, fence) { 1083 - if (num_fences >= wait_info->num_fences) { 1084 - r = -EINVAL; 1085 - goto free_fences; 1086 - } 1087 - 1088 - fences[num_fences++] = fence; 1089 - dma_fence_get(fence); 1090 - } 1091 - } 1092 - 1093 - /* Retrieve GEM write objects fence */ 1094 - for (i = 0; i < num_write_bo_handles; i++) { 1095 - struct dma_resv_iter resv_cursor; 1096 - struct dma_fence *fence; 1097 - 1098 - dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, 1099 - DMA_RESV_USAGE_WRITE, fence) { 1100 - if (num_fences >= wait_info->num_fences) { 1101 - r = -EINVAL; 1102 - goto free_fences; 1103 - } 1104 - 1105 - fences[num_fences++] = fence; 1106 - dma_fence_get(fence); 1107 - } 1108 - } 1109 - 1110 - if (num_points) { 1111 - struct dma_fence_unwrap iter; 1112 - struct dma_fence *fence; 1113 - struct dma_fence *f; 1114 - 1115 - for (i = 0; i < num_points; i++) { 1116 - r = drm_syncobj_find_fence(filp, timeline_handles[i], 1117 - timeline_points[i], 1118 - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 1119 - &fence); 1120 - if (r) 1121 - goto free_fences; 1122 - 1123 - dma_fence_unwrap_for_each(f, &iter, fence) { 1124 - if (num_fences >= wait_info->num_fences) { 1125 - r = -EINVAL; 1126 - dma_fence_put(fence); 1127 - goto free_fences; 1128 - } 1129 - 1130 - dma_fence_get(f); 1131 - fences[num_fences++] = f; 1132 - } 1133 - 1134 - dma_fence_put(fence); 1135 - } 1136 - } 1137 - 1138 - /* Retrieve syncobj's fence */ 1139 - for (i = 0; i < num_syncobj; i++) { 1140 - struct dma_fence *fence; 1141 - 1142 - r = drm_syncobj_find_fence(filp, syncobj_handles[i], 1143 - 0, 1144 - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 1145 - &fence); 1146 - if (r) 1147 - goto free_fences; 1148 - 1149 - if (num_fences >= wait_info->num_fences) { 1150 - r = -EINVAL; 1151 - dma_fence_put(fence); 1152 - goto free_fences; 1153 - } 1154 - 1155 - fences[num_fences++] = fence; 1156 - } 1157 - 1158 - /* 1159 - * Keep only the latest fences to reduce the number of values 1160 - * given back to userspace. 1161 - */ 1162 - num_fences = dma_fence_dedup_array(fences, num_fences); 1163 - 1164 - waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); 1165 - if (!waitq) { 1166 - r = -EINVAL; 1167 - goto free_fences; 1168 - } 1169 - 1170 - for (i = 0, cnt = 0; i < num_fences; i++) { 1171 - struct amdgpu_userq_fence_driver *fence_drv; 1172 - struct amdgpu_userq_fence *userq_fence; 1173 - u32 index; 1174 - 1175 - userq_fence = to_amdgpu_userq_fence(fences[i]); 1176 - if (!userq_fence) { 1177 - /* 1178 - * Just waiting on other driver fences should 1179 - * be good for now 1180 - */ 1181 - r = dma_fence_wait(fences[i], true); 1182 - if (r) { 1183 - dma_fence_put(fences[i]); 1184 - goto free_fences; 1185 - } 1186 - 1187 - dma_fence_put(fences[i]); 1188 - continue; 1189 - } 1190 - 1191 - fence_drv = userq_fence->fence_drv; 1192 - /* 1193 - * We need to make sure the user queue release their reference 1194 - * to the fence drivers at some point before queue destruction. 1195 - * Otherwise, we would gather those references until we don't 1196 - * have any more space left and crash. 1197 - */ 1198 - r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, 1199 - xa_limit_32b, GFP_KERNEL); 1200 - if (r) 1201 - goto free_fences; 1202 - 1203 - amdgpu_userq_fence_driver_get(fence_drv); 1204 - 1205 - /* Store drm syncobj's gpu va address and value */ 1206 - fence_info[cnt].va = fence_drv->va; 1207 - fence_info[cnt].value = fences[i]->seqno; 1208 - 1209 - dma_fence_put(fences[i]); 1210 - /* Increment the actual userq fence count */ 1211 - cnt++; 1212 - } 1213 - 1214 - wait_info->num_fences = cnt; 1215 - /* Copy userq fence info to user space */ 1216 - if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), 1217 - fence_info, wait_info->num_fences * sizeof(*fence_info))) { 1218 - r = -EFAULT; 1219 - goto free_fences; 1220 - } 692 + r = amdgpu_userq_wait_return_fence_info(filp, wait_info, 693 + syncobj_handles, 694 + timeline_points, 695 + timeline_handles, 696 + gobj_write, 697 + gobj_read); 1221 698 } 1222 699 1223 - free_fences: 1224 - if (fences) { 1225 - while (num_fences-- > 0) 1226 - dma_fence_put(fences[num_fences]); 1227 - kfree(fences); 1228 - } 1229 - free_fence_info: 1230 - kfree(fence_info); 1231 - exec_fini: 1232 - drm_exec_fini(&exec); 1233 - put_gobj_write: 1234 - for (i = 0; i < num_write_bo_handles; i++) 1235 - drm_gem_object_put(gobj_write[i]); 1236 - kfree(gobj_write); 700 + while (num_write_bo_handles--) 701 + drm_gem_object_put(gobj_write[num_write_bo_handles]); 702 + kvfree(gobj_write); 703 + 1237 704 put_gobj_read: 1238 - for (i = 0; i < num_read_bo_handles; i++) 1239 - drm_gem_object_put(gobj_read[i]); 1240 - kfree(gobj_read); 705 + while (num_read_bo_handles--) 706 + drm_gem_object_put(gobj_read[num_read_bo_handles]); 707 + kvfree(gobj_read); 708 + 1241 709 free_timeline_points: 1242 710 kfree(timeline_points); 1243 711 free_timeline_handles: 1244 712 kfree(timeline_handles); 1245 713 free_syncobj_handles: 1246 714 kfree(syncobj_handles); 1247 - 1248 - if (waitq) 1249 - amdgpu_userq_put(waitq); 1250 - 1251 715 return r; 1252 716 }
+6 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
··· 63 63 #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" 64 64 #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" 65 65 #define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" 66 + #define FIRMWARE_VCN5_0_2 "amdgpu/vcn_5_0_2.bin" 66 67 #define FIRMWARE_VCN5_3_0 "amdgpu/vcn_5_3_0.bin" 67 68 68 69 MODULE_FIRMWARE(FIRMWARE_RAVEN); ··· 92 91 MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); 93 92 MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); 94 93 MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); 94 + MODULE_FIRMWARE(FIRMWARE_VCN5_0_2); 95 95 MODULE_FIRMWARE(FIRMWARE_VCN5_3_0); 96 96 97 97 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); ··· 1097 1095 long r; 1098 1096 1099 1097 if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) && 1100 - (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) { 1098 + (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1)) && 1099 + (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 2))) { 1101 1100 r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); 1102 1101 if (r) 1103 1102 goto error; ··· 1135 1132 return; 1136 1133 1137 1134 if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) || 1138 - amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1)) 1135 + amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1) || 1136 + amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 2)) 1139 1137 && (i > 0)) 1140 1138 return; 1141 1139
+2 -7
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
··· 605 605 606 606 #ifdef MODULE 607 607 if (THIS_MODULE->version != NULL) 608 - strcpy(vf2pf_info->driver_version, THIS_MODULE->version); 608 + strscpy(vf2pf_info->driver_version, THIS_MODULE->version); 609 609 else 610 610 #endif 611 - strcpy(vf2pf_info->driver_version, "N/A"); 611 + strscpy(vf2pf_info->driver_version, "N/A"); 612 612 613 613 vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all 614 614 vf2pf_info->driver_cert = 0; ··· 949 949 /* Skip below init if critical region version != v2 */ 950 950 if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2) 951 951 return 0; 952 - 953 - if (init_hdr_offset < 0) { 954 - dev_err(adev->dev, "Invalid init header offset\n"); 955 - return -EINVAL; 956 - } 957 952 958 953 vram_size = RREG32(mmRCC_CONFIG_MEMSIZE); 959 954 if (!vram_size || vram_size == U32_MAX)
+111 -70
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 139 139 } 140 140 141 141 /** 142 + * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid 143 + * 144 + * @vm: VM to test against. 145 + * @bo: BO to be tested. 146 + * 147 + * Returns true if the BO shares the dma_resv object with the root PD and is 148 + * always guaranteed to be valid inside the VM. 149 + */ 150 + bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) 151 + { 152 + return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; 153 + } 154 + 155 + /** 142 156 * amdgpu_vm_bo_evicted - vm_bo is evicted 143 157 * 144 158 * @vm_bo: vm_bo which is evicted ··· 167 153 168 154 vm_bo->moved = true; 169 155 amdgpu_vm_assert_locked(vm); 156 + spin_lock(&vm_bo->vm->status_lock); 170 157 if (bo->tbo.type == ttm_bo_type_kernel) 171 158 list_move(&vm_bo->vm_status, &vm->evicted); 172 159 else 173 160 list_move_tail(&vm_bo->vm_status, &vm->evicted); 161 + spin_unlock(&vm_bo->vm->status_lock); 174 162 } 175 163 /** 176 164 * amdgpu_vm_bo_moved - vm_bo is moved ··· 185 169 static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) 186 170 { 187 171 amdgpu_vm_assert_locked(vm_bo->vm); 172 + spin_lock(&vm_bo->vm->status_lock); 188 173 list_move(&vm_bo->vm_status, &vm_bo->vm->moved); 174 + spin_unlock(&vm_bo->vm->status_lock); 189 175 } 190 176 191 177 /** ··· 201 183 static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) 202 184 { 203 185 amdgpu_vm_assert_locked(vm_bo->vm); 186 + spin_lock(&vm_bo->vm->status_lock); 204 187 list_move(&vm_bo->vm_status, &vm_bo->vm->idle); 188 + spin_unlock(&vm_bo->vm->status_lock); 205 189 vm_bo->moved = false; 206 190 } 207 191 ··· 217 197 */ 218 198 static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) 219 199 { 220 - spin_lock(&vm_bo->vm->invalidated_lock); 200 + spin_lock(&vm_bo->vm->status_lock); 221 201 list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); 222 - spin_unlock(&vm_bo->vm->invalidated_lock); 202 + spin_unlock(&vm_bo->vm->status_lock); 223 203 } 224 204 225 205 /** ··· 232 212 */ 233 213 static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) 234 214 { 235 - amdgpu_vm_assert_locked(vm_bo->vm); 236 215 vm_bo->moved = true; 216 + spin_lock(&vm_bo->vm->status_lock); 237 217 list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); 218 + spin_unlock(&vm_bo->vm->status_lock); 238 219 } 239 220 240 221 /** ··· 249 228 static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) 250 229 { 251 230 amdgpu_vm_assert_locked(vm_bo->vm); 252 - if (vm_bo->bo->parent) 231 + if (vm_bo->bo->parent) { 232 + spin_lock(&vm_bo->vm->status_lock); 253 233 list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); 254 - else 234 + spin_unlock(&vm_bo->vm->status_lock); 235 + } else { 255 236 amdgpu_vm_bo_idle(vm_bo); 237 + } 256 238 } 257 239 258 240 /** ··· 269 245 static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) 270 246 { 271 247 amdgpu_vm_assert_locked(vm_bo->vm); 248 + spin_lock(&vm_bo->vm->status_lock); 272 249 list_move(&vm_bo->vm_status, &vm_bo->vm->done); 250 + spin_unlock(&vm_bo->vm->status_lock); 273 251 } 274 252 275 253 /** ··· 285 259 { 286 260 struct amdgpu_vm_bo_base *vm_bo, *tmp; 287 261 288 - spin_lock(&vm->invalidated_lock); 262 + amdgpu_vm_assert_locked(vm); 263 + 264 + spin_lock(&vm->status_lock); 289 265 list_splice_init(&vm->done, &vm->invalidated); 290 266 list_for_each_entry(vm_bo, &vm->invalidated, vm_status) 291 267 vm_bo->moved = true; 292 - spin_unlock(&vm->invalidated_lock); 293 268 294 - amdgpu_vm_assert_locked(vm); 295 269 list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { 296 270 struct amdgpu_bo *bo = vm_bo->bo; 297 271 ··· 301 275 else if (bo->parent) 302 276 list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); 303 277 } 278 + spin_unlock(&vm->status_lock); 304 279 } 305 280 306 281 /** 307 282 * amdgpu_vm_update_shared - helper to update shared memory stat 308 283 * @base: base structure for tracking BO usage in a VM 309 284 * 310 - * Takes the vm stats_lock and updates the shared memory stat. If the basic 285 + * Takes the vm status_lock and updates the shared memory stat. If the basic 311 286 * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called 312 287 * as well. 313 288 */ ··· 321 294 bool shared; 322 295 323 296 dma_resv_assert_held(bo->tbo.base.resv); 324 - spin_lock(&vm->stats_lock); 297 + spin_lock(&vm->status_lock); 325 298 shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); 326 299 if (base->shared != shared) { 327 300 base->shared = shared; ··· 333 306 vm->stats[bo_memtype].drm.private += size; 334 307 } 335 308 } 336 - spin_unlock(&vm->stats_lock); 309 + spin_unlock(&vm->status_lock); 337 310 } 338 311 339 312 /** ··· 358 331 * be bo->tbo.resource 359 332 * @sign: if we should add (+1) or subtract (-1) from the stat 360 333 * 361 - * Caller need to have the vm stats_lock held. Useful for when multiple update 334 + * Caller need to have the vm status_lock held. Useful for when multiple update 362 335 * need to happen at the same time. 363 336 */ 364 337 static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base, 365 - struct ttm_resource *res, int sign) 338 + struct ttm_resource *res, int sign) 366 339 { 367 340 struct amdgpu_vm *vm = base->vm; 368 341 struct amdgpu_bo *bo = base->bo; ··· 386 359 */ 387 360 if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) 388 361 vm->stats[res_memtype].drm.purgeable += size; 389 - if (!(bo->preferred_domains & 390 - amdgpu_mem_type_to_domain(res_memtype))) 362 + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype))) 391 363 vm->stats[bo_memtype].evicted += size; 392 364 } 393 365 } ··· 405 379 { 406 380 struct amdgpu_vm *vm = base->vm; 407 381 408 - spin_lock(&vm->stats_lock); 382 + spin_lock(&vm->status_lock); 409 383 amdgpu_vm_update_stats_locked(base, res, sign); 410 - spin_unlock(&vm->stats_lock); 384 + spin_unlock(&vm->status_lock); 411 385 } 412 386 413 387 /** ··· 433 407 base->next = bo->vm_bo; 434 408 bo->vm_bo = base; 435 409 436 - spin_lock(&vm->stats_lock); 410 + spin_lock(&vm->status_lock); 437 411 base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); 438 412 amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); 439 - spin_unlock(&vm->stats_lock); 413 + spin_unlock(&vm->status_lock); 440 414 441 415 if (!amdgpu_vm_is_bo_always_valid(vm, bo)) 442 416 return; ··· 495 469 int ret; 496 470 497 471 /* We can only trust prev->next while holding the lock */ 498 - spin_lock(&vm->invalidated_lock); 472 + spin_lock(&vm->status_lock); 499 473 while (!list_is_head(prev->next, &vm->done)) { 500 474 bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status); 501 475 502 476 bo = bo_va->base.bo; 503 477 if (bo) { 504 478 amdgpu_bo_ref(bo); 505 - spin_unlock(&vm->invalidated_lock); 479 + spin_unlock(&vm->status_lock); 506 480 507 481 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1); 508 482 amdgpu_bo_unref(&bo); 509 483 if (unlikely(ret)) 510 484 return ret; 511 485 512 - spin_lock(&vm->invalidated_lock); 486 + spin_lock(&vm->status_lock); 513 487 } 514 488 prev = prev->next; 515 489 } 516 - spin_unlock(&vm->invalidated_lock); 490 + spin_unlock(&vm->status_lock); 517 491 518 492 return 0; 519 493 } ··· 609 583 void *param) 610 584 { 611 585 uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); 612 - struct amdgpu_vm_bo_base *bo_base, *tmp; 586 + struct amdgpu_vm_bo_base *bo_base; 613 587 struct amdgpu_bo *bo; 614 588 int r; 615 589 ··· 622 596 return r; 623 597 } 624 598 625 - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { 599 + spin_lock(&vm->status_lock); 600 + while (!list_empty(&vm->evicted)) { 601 + bo_base = list_first_entry(&vm->evicted, 602 + struct amdgpu_vm_bo_base, 603 + vm_status); 604 + spin_unlock(&vm->status_lock); 605 + 626 606 bo = bo_base->bo; 627 607 628 608 r = validate(param, bo); ··· 641 609 vm->update_funcs->map_table(to_amdgpu_bo_vm(bo)); 642 610 amdgpu_vm_bo_relocated(bo_base); 643 611 } 612 + spin_lock(&vm->status_lock); 644 613 } 614 + while (ticket && !list_empty(&vm->evicted_user)) { 615 + bo_base = list_first_entry(&vm->evicted_user, 616 + struct amdgpu_vm_bo_base, 617 + vm_status); 618 + spin_unlock(&vm->status_lock); 645 619 646 - if (ticket) { 647 - list_for_each_entry_safe(bo_base, tmp, &vm->evicted_user, 648 - vm_status) { 649 - bo = bo_base->bo; 650 - dma_resv_assert_held(bo->tbo.base.resv); 620 + bo = bo_base->bo; 621 + dma_resv_assert_held(bo->tbo.base.resv); 651 622 652 - r = validate(param, bo); 653 - if (r) 654 - return r; 623 + r = validate(param, bo); 624 + if (r) 625 + return r; 655 626 656 - amdgpu_vm_bo_invalidated(bo_base); 657 - } 627 + amdgpu_vm_bo_invalidated(bo_base); 628 + 629 + spin_lock(&vm->status_lock); 658 630 } 631 + spin_unlock(&vm->status_lock); 659 632 660 633 amdgpu_vm_eviction_lock(vm); 661 634 vm->evicting = false; ··· 689 652 ret = !vm->evicting; 690 653 amdgpu_vm_eviction_unlock(vm); 691 654 655 + spin_lock(&vm->status_lock); 692 656 ret &= list_empty(&vm->evicted); 657 + spin_unlock(&vm->status_lock); 693 658 694 659 spin_lock(&vm->immediate.lock); 695 660 ret &= !vm->immediate.stopped; ··· 985 946 struct amdgpu_vm *vm, bool immediate) 986 947 { 987 948 struct amdgpu_vm_update_params params; 988 - struct amdgpu_vm_bo_base *entry, *tmp; 949 + struct amdgpu_vm_bo_base *entry; 989 950 bool flush_tlb_needed = false; 951 + LIST_HEAD(relocated); 990 952 int r, idx; 991 953 992 954 amdgpu_vm_assert_locked(vm); 993 955 994 - if (list_empty(&vm->relocated)) 956 + spin_lock(&vm->status_lock); 957 + list_splice_init(&vm->relocated, &relocated); 958 + spin_unlock(&vm->status_lock); 959 + 960 + if (list_empty(&relocated)) 995 961 return 0; 996 962 997 963 if (!drm_dev_enter(adev_to_drm(adev), &idx)) ··· 1012 968 if (r) 1013 969 goto error; 1014 970 1015 - list_for_each_entry(entry, &vm->relocated, vm_status) { 971 + list_for_each_entry(entry, &relocated, vm_status) { 1016 972 /* vm_flush_needed after updating moved PDEs */ 1017 973 flush_tlb_needed |= entry->moved; 1018 974 ··· 1028 984 if (flush_tlb_needed) 1029 985 atomic64_inc(&vm->tlb_seq); 1030 986 1031 - list_for_each_entry_safe(entry, tmp, &vm->relocated, vm_status) { 987 + while (!list_empty(&relocated)) { 988 + entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base, 989 + vm_status); 1032 990 amdgpu_vm_bo_idle(entry); 1033 991 } 1034 992 ··· 1087 1041 } 1088 1042 1089 1043 /* Prepare a TLB flush fence to be attached to PTs */ 1090 - if (!params->unlocked) { 1044 + /* The check for need_tlb_fence should be dropped once we 1045 + * sort out the issues with KIQ/MES TLB invalidation timeouts. 1046 + */ 1047 + if (!params->unlocked && vm->need_tlb_fence) { 1091 1048 amdgpu_vm_tlb_fence_create(params->adev, vm, fence); 1092 1049 1093 1050 /* Makes sure no PD/PT is freed before the flush */ ··· 1260 1211 void amdgpu_vm_get_memory(struct amdgpu_vm *vm, 1261 1212 struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]) 1262 1213 { 1263 - spin_lock(&vm->stats_lock); 1214 + spin_lock(&vm->status_lock); 1264 1215 memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); 1265 - spin_unlock(&vm->stats_lock); 1216 + spin_unlock(&vm->status_lock); 1266 1217 } 1267 1218 1268 1219 /** ··· 1629 1580 struct amdgpu_vm *vm, 1630 1581 struct ww_acquire_ctx *ticket) 1631 1582 { 1632 - struct amdgpu_bo_va *bo_va, *tmp; 1583 + struct amdgpu_bo_va *bo_va; 1633 1584 struct dma_resv *resv; 1634 1585 bool clear, unlock; 1635 1586 int r; 1636 1587 1637 - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { 1588 + spin_lock(&vm->status_lock); 1589 + while (!list_empty(&vm->moved)) { 1590 + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, 1591 + base.vm_status); 1592 + spin_unlock(&vm->status_lock); 1593 + 1638 1594 /* Per VM BOs never need to bo cleared in the page tables */ 1639 1595 r = amdgpu_vm_bo_update(adev, bo_va, false); 1640 1596 if (r) 1641 1597 return r; 1598 + spin_lock(&vm->status_lock); 1642 1599 } 1643 1600 1644 - spin_lock(&vm->invalidated_lock); 1645 1601 while (!list_empty(&vm->invalidated)) { 1646 1602 bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, 1647 1603 base.vm_status); 1648 1604 resv = bo_va->base.bo->tbo.base.resv; 1649 - spin_unlock(&vm->invalidated_lock); 1605 + spin_unlock(&vm->status_lock); 1650 1606 1651 1607 /* Try to reserve the BO to avoid clearing its ptes */ 1652 1608 if (!adev->debug_vm && dma_resv_trylock(resv)) { ··· 1683 1629 bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) 1684 1630 amdgpu_vm_bo_evicted_user(&bo_va->base); 1685 1631 1686 - spin_lock(&vm->invalidated_lock); 1632 + spin_lock(&vm->status_lock); 1687 1633 } 1688 - spin_unlock(&vm->invalidated_lock); 1634 + spin_unlock(&vm->status_lock); 1689 1635 1690 1636 return 0; 1691 1637 } ··· 2228 2174 } 2229 2175 } 2230 2176 2231 - spin_lock(&vm->invalidated_lock); 2177 + spin_lock(&vm->status_lock); 2232 2178 list_del(&bo_va->base.vm_status); 2233 - spin_unlock(&vm->invalidated_lock); 2179 + spin_unlock(&vm->status_lock); 2234 2180 2235 2181 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2236 2182 list_del(&mapping->list); ··· 2338 2284 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { 2339 2285 struct amdgpu_vm *vm = bo_base->vm; 2340 2286 2341 - spin_lock(&vm->stats_lock); 2287 + spin_lock(&vm->status_lock); 2342 2288 amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); 2343 2289 amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); 2344 - spin_unlock(&vm->stats_lock); 2290 + spin_unlock(&vm->status_lock); 2345 2291 } 2346 2292 2347 2293 amdgpu_vm_bo_invalidate(bo, evicted); ··· 2610 2556 INIT_LIST_HEAD(&vm->relocated); 2611 2557 INIT_LIST_HEAD(&vm->moved); 2612 2558 INIT_LIST_HEAD(&vm->idle); 2613 - spin_lock_init(&vm->invalidated_lock); 2614 2559 INIT_LIST_HEAD(&vm->invalidated); 2560 + spin_lock_init(&vm->status_lock); 2615 2561 INIT_LIST_HEAD(&vm->freed); 2616 2562 INIT_LIST_HEAD(&vm->done); 2617 2563 INIT_KFIFO(vm->faults); 2618 - spin_lock_init(&vm->stats_lock); 2619 2564 2620 2565 r = amdgpu_vm_init_entities(adev, vm); 2621 2566 if (r) ··· 2623 2570 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 2624 2571 2625 2572 vm->is_compute_context = false; 2573 + vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev); 2626 2574 2627 2575 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2628 2576 AMDGPU_VM_USE_CPU_FOR_GFX); ··· 2761 2707 dma_fence_put(vm->last_update); 2762 2708 vm->last_update = dma_fence_get_stub(); 2763 2709 vm->is_compute_context = true; 2710 + vm->need_tlb_fence = true; 2764 2711 2765 2712 unreserve_bo: 2766 2713 amdgpu_bo_unreserve(vm->root.bo); ··· 3084 3029 3085 3030 amdgpu_vm_assert_locked(vm); 3086 3031 3032 + spin_lock(&vm->status_lock); 3087 3033 seq_puts(m, "\tIdle BOs:\n"); 3088 3034 list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { 3089 3035 if (!bo_va->base.bo) ··· 3122 3066 id = 0; 3123 3067 3124 3068 seq_puts(m, "\tInvalidated BOs:\n"); 3125 - spin_lock(&vm->invalidated_lock); 3126 3069 list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { 3127 3070 if (!bo_va->base.bo) 3128 3071 continue; 3129 3072 total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m); 3130 3073 } 3131 - spin_unlock(&vm->invalidated_lock); 3132 3074 total_invalidated_objs = id; 3133 3075 id = 0; 3134 3076 ··· 3136 3082 continue; 3137 3083 total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); 3138 3084 } 3085 + spin_unlock(&vm->status_lock); 3139 3086 total_done_objs = id; 3140 3087 3141 3088 seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle, ··· 3209 3154 } 3210 3155 } 3211 3156 xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); 3212 - } 3213 - 3214 - /** 3215 - * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid 3216 - * 3217 - * @vm: VM to test against. 3218 - * @bo: BO to be tested. 3219 - * 3220 - * Returns true if the BO shares the dma_resv object with the root PD and is 3221 - * always guaranteed to be valid inside the VM. 3222 - */ 3223 - bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) 3224 - { 3225 - return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; 3226 3157 } 3227 3158 3228 3159 void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+8 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
··· 205 205 /* protected by bo being reserved */ 206 206 struct amdgpu_vm_bo_base *next; 207 207 208 - /* protected by vm reservation and invalidated_lock */ 208 + /* protected by vm status_lock */ 209 209 struct list_head vm_status; 210 210 211 211 /* if the bo is counted as shared in mem stats 212 - * protected by vm BO being reserved */ 212 + * protected by vm status_lock */ 213 213 bool shared; 214 214 215 215 /* protected by the BO being reserved */ ··· 345 345 bool evicting; 346 346 unsigned int saved_flags; 347 347 348 - /* Memory statistics for this vm, protected by stats_lock */ 349 - spinlock_t stats_lock; 348 + /* Lock to protect vm_bo add/del/move on all lists of vm */ 349 + spinlock_t status_lock; 350 + 351 + /* Memory statistics for this vm, protected by status_lock */ 350 352 struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; 351 353 352 354 /* ··· 356 354 * PDs, PTs or per VM BOs. The state transits are: 357 355 * 358 356 * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle 359 - * 360 - * Lists are protected by the root PD dma_resv lock. 361 357 */ 362 358 363 359 /* Per-VM and PT BOs who needs a validation */ ··· 376 376 * state transits are: 377 377 * 378 378 * evicted_user or invalidated -> done 379 - * 380 - * Lists are protected by the invalidated_lock. 381 379 */ 382 - spinlock_t invalidated_lock; 383 380 384 381 /* BOs for user mode queues that need a validation */ 385 382 struct list_head evicted_user; ··· 441 444 struct ttm_lru_bulk_move lru_bulk_move; 442 445 /* Flag to indicate if VM is used for compute */ 443 446 bool is_compute_context; 447 + /* Flag to indicate if VM needs a TLB fence (KFD or KGD) */ 448 + bool need_tlb_fence; 444 449 445 450 /* Memory partition number, -1 means any partition */ 446 451 int8_t mem_id;
+4
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
··· 544 544 entry->bo->vm_bo = NULL; 545 545 ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); 546 546 547 + spin_lock(&entry->vm->status_lock); 547 548 list_del(&entry->vm_status); 549 + spin_unlock(&entry->vm->status_lock); 548 550 amdgpu_bo_unref(&entry->bo); 549 551 } 550 552 ··· 590 588 struct amdgpu_vm_pt_cursor seek; 591 589 struct amdgpu_vm_bo_base *entry; 592 590 591 + spin_lock(&params->vm->status_lock); 593 592 for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { 594 593 if (entry && entry->bo) 595 594 list_move(&entry->vm_status, &params->tlb_flush_waitlist); ··· 598 595 599 596 /* enter start node now */ 600 597 list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist); 598 + spin_unlock(&params->vm->status_lock); 601 599 } 602 600 603 601 /**
+6 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
··· 472 472 struct drm_device *ddev = dev_get_drvdata(dev); 473 473 struct amdgpu_device *adev = drm_to_adev(ddev); 474 474 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; 475 - int i; 475 + int i, offset = 0; 476 476 477 477 for (i = 0; i < top->num_nodes; i++) 478 - sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops); 478 + offset += sysfs_emit_at(buf, offset, "%02x ", top->nodes[i].num_hops); 479 479 480 - return sysfs_emit(buf, "%s\n", buf); 480 + return offset + sysfs_emit_at(buf, offset, "\n"); 481 481 } 482 482 483 483 static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, ··· 487 487 struct drm_device *ddev = dev_get_drvdata(dev); 488 488 struct amdgpu_device *adev = drm_to_adev(ddev); 489 489 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; 490 - int i; 490 + int i, offset = 0; 491 491 492 492 for (i = 0; i < top->num_nodes; i++) 493 - sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links); 493 + offset += sysfs_emit_at(buf, offset, "%02x ", top->nodes[i].num_links); 494 494 495 - return sysfs_emit(buf, "%s\n", buf); 495 + return offset + sysfs_emit_at(buf, offset, "\n"); 496 496 } 497 497 498 498 static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
+49 -31
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
··· 45 45 #include "v12_structs.h" 46 46 #include "gfx_v12_1.h" 47 47 #include "mes_v12_1.h" 48 + #include "amdgpu_ras_mgr.h" 48 49 49 50 #define GFX12_MEC_HPD_SIZE 2048 50 51 #define NUM_SIMD_PER_CU_GFX12_1 4 ··· 137 136 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 138 137 PACKET3_MAP_QUEUES_ME((me)) | 139 138 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 140 - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 141 139 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 142 140 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 143 141 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); ··· 245 245 /* memory (1) or register (0) */ 246 246 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 247 247 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 248 - WAIT_REG_MEM_FUNCTION(3) | /* equal */ 249 - WAIT_REG_MEM_ENGINE(eng_sel))); 248 + WAIT_REG_MEM_FUNCTION(3))); /* equal */ 250 249 251 250 if (mem_space) 252 251 BUG_ON(addr0 & 0x3); /* Dword align */ ··· 1182 1183 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP, 1183 1184 GFX_12_1_0__SRCID__CP_PRIV_INSTR_FAULT, 1184 1185 &adev->gfx.priv_inst_irq); 1186 + if (r) 1187 + return r; 1188 + 1189 + /* RLC POISON Error */ 1190 + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_RLC, 1191 + GFX_12_1_0__SRCID__RLC_POISON_INTERRUPT, 1192 + &adev->gfx.rlc_poison_irq); 1185 1193 if (r) 1186 1194 return r; 1187 1195 ··· 2637 2631 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG, data); 2638 2632 } 2639 2633 2640 - static void gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(struct amdgpu_device *adev, 2641 - int xcc_id) 2642 - { 2643 - uint32_t val; 2644 - 2645 - /* Set the TCP UTCL0 register to enable atomics */ 2646 - val = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 2647 - regTCP_UTCL0_THRASHING_CTRL); 2648 - val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, THRASHING_EN, 0x2); 2649 - val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, 2650 - RETRY_FRAGMENT_THRESHOLD_UP_EN, 0x1); 2651 - val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, 2652 - RETRY_FRAGMENT_THRESHOLD_DOWN_EN, 0x1); 2653 - 2654 - WREG32_SOC15(GC, GET_INST(GC, xcc_id), 2655 - regTCP_UTCL0_THRASHING_CTRL, val); 2656 - } 2657 - 2658 2634 static void gfx_v12_1_xcc_enable_atomics(struct amdgpu_device *adev, 2659 2635 int xcc_id) 2660 2636 { ··· 2685 2697 for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); i++) { 2686 2698 gfx_v12_1_xcc_disable_burst(adev, i); 2687 2699 gfx_v12_1_xcc_enable_atomics(adev, i); 2688 - gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(adev, i); 2689 2700 gfx_v12_1_xcc_disable_early_write_ack(adev, i); 2690 2701 gfx_v12_1_xcc_disable_tcp_spill_cache(adev, i); 2691 2702 } ··· 3418 3431 3419 3432 static void gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 3420 3433 { 3421 - int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 3422 3434 uint32_t seq = ring->fence_drv.sync_seq; 3423 3435 uint64_t addr = ring->fence_drv.gpu_addr; 3424 3436 3425 - gfx_v12_1_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 3437 + gfx_v12_1_wait_reg_mem(ring, 0, 1, 0, lower_32_bits(addr), 3426 3438 upper_32_bits(addr), seq, 0xffffffff, 4); 3427 3439 } 3428 3440 ··· 3460 3474 3461 3475 /* write fence seq to the "addr" */ 3462 3476 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3463 - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3464 - WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 3477 + amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 3465 3478 amdgpu_ring_write(ring, lower_32_bits(addr)); 3466 3479 amdgpu_ring_write(ring, upper_32_bits(addr)); 3467 3480 amdgpu_ring_write(ring, lower_32_bits(seq)); ··· 3468 3483 if (flags & AMDGPU_FENCE_FLAG_INT) { 3469 3484 /* set register to trigger INT */ 3470 3485 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3471 - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3472 - WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 3486 + amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 3473 3487 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS)); 3474 3488 amdgpu_ring_write(ring, 0); 3475 3489 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ ··· 3527 3543 uint32_t reg0, uint32_t reg1, 3528 3544 uint32_t ref, uint32_t mask) 3529 3545 { 3530 - int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 3531 - 3532 - gfx_v12_1_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 3546 + gfx_v12_1_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, 3533 3547 ref, mask, 0x20); 3534 3548 } 3535 3549 ··· 3786 3804 return 0; 3787 3805 } 3788 3806 3807 + static int gfx_v12_1_rlc_poison_irq(struct amdgpu_device *adev, 3808 + struct amdgpu_irq_src *source, 3809 + struct amdgpu_iv_entry *entry) 3810 + { 3811 + uint32_t rlc_fed_status = 0; 3812 + uint32_t ras_blk = RAS_BLOCK_ID__GFX; 3813 + struct ras_ih_info ih_info = {0}; 3814 + int i, num_xcc; 3815 + 3816 + num_xcc = NUM_XCC(adev->gfx.xcc_mask); 3817 + for (i = 0; i < num_xcc; i++) 3818 + rlc_fed_status |= RREG32(SOC15_REG_OFFSET(GC, 3819 + GET_INST(GC, i), regRLC_RLCS_FED_STATUS)); 3820 + 3821 + if (!rlc_fed_status) 3822 + return 0; 3823 + 3824 + if (REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA0_FED_ERR) || 3825 + REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA1_FED_ERR)) 3826 + ras_blk = RAS_BLOCK_ID__SDMA; 3827 + 3828 + dev_warn(adev->dev, "RLC %d FED IRQ\n", ras_blk); 3829 + 3830 + ih_info.block = ras_blk; 3831 + ih_info.reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; 3832 + amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info); 3833 + return 0; 3834 + } 3835 + 3789 3836 static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring) 3790 3837 { 3791 3838 const unsigned int gcr_cntl = ··· 3939 3928 .process = gfx_v12_1_priv_inst_irq, 3940 3929 }; 3941 3930 3931 + static const struct amdgpu_irq_src_funcs gfx_v12_1_rlc_poison_irq_funcs = { 3932 + .process = gfx_v12_1_rlc_poison_irq, 3933 + }; 3934 + 3942 3935 static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev) 3943 3936 { 3944 3937 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; ··· 3953 3938 3954 3939 adev->gfx.priv_inst_irq.num_types = 1; 3955 3940 adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs; 3941 + 3942 + adev->gfx.rlc_poison_irq.num_types = 1; 3943 + adev->gfx.rlc_poison_irq.funcs = &gfx_v12_1_rlc_poison_irq_funcs; 3956 3944 } 3957 3945 3958 3946 static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev)
+101 -122
drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
··· 53 53 54 54 /* Packet 3 types */ 55 55 #define PACKET3_NOP 0x10 56 - #define PACKET3_SET_BASE 0x11 57 - #define PACKET3_BASE_INDEX(x) ((x) << 0) 58 - #define CE_PARTITION_BASE 3 59 56 #define PACKET3_CLEAR_STATE 0x12 60 57 #define PACKET3_INDEX_BUFFER_SIZE 0x13 61 58 #define PACKET3_DISPATCH_DIRECT 0x15 62 59 #define PACKET3_DISPATCH_INDIRECT 0x16 63 - #define PACKET3_INDIRECT_BUFFER_END 0x17 64 - #define PACKET3_INDIRECT_BUFFER_CNST_END 0x19 65 - #define PACKET3_ATOMIC_GDS 0x1D 66 60 #define PACKET3_ATOMIC_MEM 0x1E 67 61 #define PACKET3_OCCLUSION_QUERY 0x1F 68 62 #define PACKET3_SET_PREDICATION 0x20 ··· 68 74 #define PACKET3_INDEX_BASE 0x26 69 75 #define PACKET3_DRAW_INDEX_2 0x27 70 76 #define PACKET3_CONTEXT_CONTROL 0x28 71 - #define PACKET3_INDEX_TYPE 0x2A 72 77 #define PACKET3_DRAW_INDIRECT_MULTI 0x2C 73 78 #define PACKET3_DRAW_INDEX_AUTO 0x2D 74 79 #define PACKET3_NUM_INSTANCES 0x2F 75 80 #define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 76 - #define PACKET3_INDIRECT_BUFFER_PRIV 0x32 77 - #define PACKET3_INDIRECT_BUFFER_CNST 0x33 78 - #define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33 79 - #define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 80 81 #define PACKET3_DRAW_INDEX_OFFSET_2 0x35 81 - #define PACKET3_DRAW_PREAMBLE 0x36 82 82 #define PACKET3_WRITE_DATA 0x37 83 - #define WRITE_DATA_DST_SEL(x) ((x) << 8) 83 + #define WRITE_DATA_DST_SEL(x) (((x) & 0xf) << 8) 84 84 /* 0 - register 85 - * 1 - memory (sync - via GRBM) 86 - * 2 - gl2 87 - * 3 - gds 85 + * 1 - reserved 86 + * 2 - tc_l2 87 + * 3 - reserved 88 88 * 4 - reserved 89 - * 5 - memory (async - direct) 89 + * 5 - memory (same as tc_l2) 90 + * 6 - memory_mapped_adc_persistent_state 90 91 */ 91 - #define WR_ONE_ADDR (1 << 16) 92 + #define WRITE_DATA_SCOPE(x) (((x) & 0x3) << 12) 93 + #define WRITE_DATA_MODE(x) (((x) & 0x3) << 14) 94 + /* 0 - local xcd 95 + * 1 - remote/local aid 96 + * 2 - remote xcd 97 + * 3 - remote mid 98 + */ 99 + #define WRITE_DATA_ADDR_INCR (1 << 16) 100 + #define WRITE_DATA_MID_DIE_ID(x) (((x) & 0x3) << 18) 92 101 #define WR_CONFIRM (1 << 20) 93 - #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 94 - /* 0 - LRU 95 - * 1 - Stream 102 + #define WRITE_DATA_XCD_DIE_ID(x) (((x) & 0xf) << 21) 103 + #define WRITE_DATA_TEMPORAL(x) (((x) & 0x3) << 25) 104 + /* 0 - rt 105 + * 1 - nt 106 + * 2 - ht 107 + * 3 - lu 96 108 */ 97 - #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 98 - /* 0 - me 99 - * 1 - pfp 100 - * 2 - ce 101 - */ 109 + #define WRITE_DATA_COOP_DISABLE (1 << 27) 102 110 #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 103 - #define PACKET3_MEM_SEMAPHORE 0x39 104 - # define PACKET3_SEM_USE_MAILBOX (0x1 << 16) 105 - # define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */ 106 - # define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) 107 - # define PACKET3_SEM_SEL_WAIT (0x7 << 29) 108 - #define PACKET3_DRAW_INDEX_MULTI_INST 0x3A 109 - #define PACKET3_COPY_DW 0x3B 110 111 #define PACKET3_WAIT_REG_MEM 0x3C 111 - #define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) 112 + #define WAIT_REG_MEM_FUNCTION(x) (((x) & 0x7) << 0) 112 113 /* 0 - always 113 114 * 1 - < 114 115 * 2 - <= ··· 112 123 * 5 - >= 113 124 * 6 - > 114 125 */ 115 - #define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) 126 + #define WAIT_REG_MEM_MEM_SPACE(x) (((x) & 0x3) << 4) 116 127 /* 0 - reg 117 128 * 1 - mem 118 129 */ 119 - #define WAIT_REG_MEM_OPERATION(x) ((x) << 6) 130 + #define WAIT_REG_MEM_OPERATION(x) (((x) & 0x3) << 6) 120 131 /* 0 - wait_reg_mem 121 132 * 1 - wr_wait_wr_reg 122 133 */ 123 - #define WAIT_REG_MEM_ENGINE(x) ((x) << 8) 124 - /* 0 - me 125 - * 1 - pfp 134 + #define WAIT_REG_MEM_MODE(x) (((x) & 0x3) << 10) 135 + /* 0 - local xcd 136 + * 1 - remote/local aid 137 + * 2 - remote xcd 138 + * 3 - remote mid 139 + */ 140 + #define WAIT_REG_MEM_MID_DIE_ID(x) (((x) & 0x3) << 12) 141 + #define WAIT_REG_MEM_XCD_DIE_ID(x) (((x) & 0xf) << 14) 142 + #define WAIT_REG_MEM_MES_INTR_PIPE(x) (((x) & 0x3) << 22) 143 + #define WAIT_REG_MEM_MES_ACTION(x) (((x) & 0x1) << 24) 144 + #define WAIT_REG_MEM_TEMPORAL(x) (((x) & 0x3) << 25) 145 + /* 0 - rt 146 + * 1 - nt 147 + * 2 - ht 148 + * 3 - lu 126 149 */ 127 150 #define PACKET3_INDIRECT_BUFFER 0x3F 128 151 #define INDIRECT_BUFFER_VALID (1 << 23) 129 - #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) 130 - /* 0 - LRU 131 - * 1 - Stream 132 - * 2 - Bypass 152 + #define INDIRECT_BUFFER_TEMPORAL(x) (x) << 28) 153 + /* 0 - rt 154 + * 1 - nt 155 + * 2 - ht 156 + * 3 - lu 133 157 */ 134 - #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) 135 - #define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) 136 158 #define PACKET3_COND_INDIRECT_BUFFER 0x3F 137 159 #define PACKET3_COPY_DATA 0x40 138 - #define PACKET3_CP_DMA 0x41 160 + #define COPY_DATA_SRC_SEL(x) (((x) & 0xf) << 0) 161 + #define COPY_DATA_DST_SEL(x) (((x) & 0xf) << 8) 162 + #define COPY_DATA_SRC_SCOPE(x) (((x) & 0x3) << 4) 163 + #define COPY_DATA_DST_SCOPE(x) (((x) & 0x3) << 27) 164 + #define COPY_DATA_MODE(x) (((x) & 0x3) << 6) 165 + /* 0 - local xcd 166 + * 1 - remote/local aid 167 + * 2 - remote xcd 168 + * 3 - remote mid 169 + */ 170 + #define COPY_DATA_SRC_TEMPORAL(x) (((x) & 0x3) << 13) 171 + #define COPY_DATA_DST_TEMPORAL(x) (((x) & 0x3) << 25) 172 + /* 0 - rt 173 + * 1 - nt 174 + * 2 - ht 175 + * 3 - lu 176 + */ 177 + #define COPY_DATA_COUNT_SEL (1 << 16) 178 + #define COPY_DATA_SRC_DST_REMOTE_MODE(x) (((x)) & 0x1 << 16) 179 + /* 0 - src remote 180 + * 1 - dst remote 181 + */ 182 + #define COPY_DATA_MID_DIE_ID(x) (((x) & 0x3) << 18) 183 + #define COPY_DATA_XCD_DIE_ID(x) (((x) & 0xf) << 21) 184 + #define COPY_DATA_PQ_EXE_STATUS (1 << 27) 139 185 #define PACKET3_PFP_SYNC_ME 0x42 140 - #define PACKET3_SURFACE_SYNC 0x43 141 - #define PACKET3_ME_INITIALIZE 0x44 142 186 #define PACKET3_COND_WRITE 0x45 143 187 #define PACKET3_EVENT_WRITE 0x46 144 188 #define EVENT_TYPE(x) ((x) << 0) ··· 182 160 * 3 - SAMPLE_STREAMOUTSTAT* 183 161 * 4 - *S_PARTIAL_FLUSH 184 162 */ 185 - #define PACKET3_EVENT_WRITE_EOP 0x47 186 - #define PACKET3_EVENT_WRITE_EOS 0x48 187 163 #define PACKET3_RELEASE_MEM 0x49 188 164 #define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0) 189 165 #define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8) ··· 200 180 * 2 - temporal__release_mem__ht 201 181 * 3 - temporal__release_mem__lu 202 182 */ 203 - #define PACKET3_RELEASE_MEM_EXECUTE (1 << 28) 183 + #define PACKET3_RELEASE_MEM_PQ_EXE_STATUS (1 << 28) 184 + #define PACKET3_RELEASE_MEM_GCR_GLK_INV (1 << 30) 204 185 186 + #define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16) 187 + /* 0 - memory controller 188 + * 1 - TC/L2 189 + * 2 - register 190 + */ 191 + #define PACKET3_RELEASE_MEM_MES_INTR_PIPE(x) ((x) << 20) 192 + #define PACKET3_RELEASE_MEM_MES_ACTION_ID(x) ((x) << 22) 193 + #define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24) 194 + /* 0 - none 195 + * 1 - interrupt only (DATA_SEL = 0) 196 + * 2 - interrupt when data write is confirmed 197 + */ 198 + #define PACKET3_RELEASE_MEM_ADD_DOOREBLL_OFFSET(x) (1 << 28) 205 199 #define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29) 206 200 /* 0 - discard 207 201 * 1 - send low 32bit data ··· 223 189 * 3 - send 64bit GPU counter value 224 190 * 4 - send 64bit sys counter value 225 191 */ 226 - #define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24) 227 - /* 0 - none 228 - * 1 - interrupt only (DATA_SEL = 0) 229 - * 2 - interrupt when data write is confirmed 230 - */ 231 - #define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16) 232 - /* 0 - MC 233 - * 1 - TC/L2 234 - */ 235 - 236 - 237 192 238 193 #define PACKET3_PREAMBLE_CNTL 0x4A 239 194 # define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) ··· 241 218 /* 0 - ME 242 219 * 1 - PFP 243 220 */ 244 - # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 245 - /* 0 - LRU 246 - * 1 - Stream 221 + # define PACKET3_DMA_DATA_SRC_TEMPORAL(x) ((x) << 13) 222 + /* 0 - rt 223 + * 1 - nt 224 + * 2 - ht 225 + * 3 - lu 247 226 */ 248 - # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 227 + # define PACKET3_DMA_DATA_SRC_SCOPE(x) ((x) << 15) 228 + # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 249 229 /* 0 - DST_ADDR using DAS 250 230 * 1 - GDS 251 231 * 3 - DST_ADDR using L2 252 232 */ 253 - # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 233 + # define PACKET3_DMA_DATA_DST_TEMPORAL(x) ((x) << 25) 254 234 /* 0 - LRU 255 235 * 1 - Stream 256 236 */ 257 - # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 237 + # define PACKET3_DMA_DATA_DST_SCOPE(x) ((x) << 27) 238 + # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 258 239 /* 0 - SRC_ADDR using SAS 259 240 * 1 - GDS 260 241 * 2 - DATA 261 242 * 3 - SRC_ADDR using L2 262 243 */ 263 - # define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 264 244 /* COMMAND */ 265 245 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 266 246 /* 0 - memory ··· 273 247 /* 0 - memory 274 248 * 1 - register 275 249 */ 276 - # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 277 - # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 278 - # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 250 + # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 251 + # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 252 + # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 253 + # define PACKET3_DMA_DATA_CMD_DIS_WC (1 << 30) 279 254 #define PACKET3_CONTEXT_REG_RMW 0x51 280 - #define PACKET3_GFX_CNTX_UPDATE 0x52 281 - #define PACKET3_BLK_CNTX_UPDATE 0x53 282 - #define PACKET3_INCR_UPDT_STATE 0x55 283 255 #define PACKET3_ACQUIRE_MEM 0x58 284 256 /* 1. HEADER 285 257 * 2. COHER_CNTL [30:0] ··· 331 307 * 2: REVERSE 332 308 */ 333 309 #define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18) 334 - #define PACKET3_REWIND 0x59 335 - #define PACKET3_INTERRUPT 0x5A 336 310 #define PACKET3_GEN_PDEPTE 0x5B 337 - #define PACKET3_INDIRECT_BUFFER_PASID 0x5C 338 311 #define PACKET3_PRIME_UTCL2 0x5D 339 312 #define PACKET3_LOAD_UCONFIG_REG 0x5E 340 313 #define PACKET3_LOAD_SH_REG 0x5F ··· 345 324 #define PACKET3_SET_CONTEXT_REG 0x69 346 325 #define PACKET3_SET_CONTEXT_REG_START 0x0000a000 347 326 #define PACKET3_SET_CONTEXT_REG_END 0x0000a400 348 - #define PACKET3_SET_CONTEXT_REG_INDEX 0x6A 349 - #define PACKET3_SET_VGPR_REG_DI_MULTI 0x71 350 - #define PACKET3_SET_SH_REG_DI 0x72 351 - #define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 352 - #define PACKET3_SET_SH_REG_DI_MULTI 0x74 353 - #define PACKET3_GFX_PIPE_LOCK 0x75 354 327 #define PACKET3_SET_SH_REG 0x76 355 328 #define PACKET3_SET_SH_REG_START 0x00002c00 356 329 #define PACKET3_SET_SH_REG_END 0x00003000 ··· 354 339 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 355 340 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 356 341 #define PACKET3_SET_UCONFIG_REG_INDEX 0x7A 357 - #define PACKET3_FORWARD_HEADER 0x7C 358 - #define PACKET3_SCRATCH_RAM_WRITE 0x7D 359 - #define PACKET3_SCRATCH_RAM_READ 0x7E 360 - #define PACKET3_LOAD_CONST_RAM 0x80 361 - #define PACKET3_WRITE_CONST_RAM 0x81 362 - #define PACKET3_DUMP_CONST_RAM 0x83 363 - #define PACKET3_INCREMENT_CE_COUNTER 0x84 364 - #define PACKET3_INCREMENT_DE_COUNTER 0x85 365 - #define PACKET3_WAIT_ON_CE_COUNTER 0x86 366 - #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 367 - #define PACKET3_SWITCH_BUFFER 0x8B 368 342 #define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C 369 - #define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C 370 343 #define PACKET3_DISPATCH_DRAW 0x8D 371 - #define PACKET3_DISPATCH_DRAW_ACE 0x8D 372 - #define PACKET3_GET_LOD_STATS 0x8E 373 - #define PACKET3_DRAW_MULTI_PREAMBLE 0x8F 374 - #define PACKET3_FRAME_CONTROL 0x90 375 - # define FRAME_TMZ (1 << 0) 376 - # define FRAME_CMD(x) ((x) << 28) 377 - /* 378 - * x=0: tmz_begin 379 - * x=1: tmz_end 380 - */ 381 344 #define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91 382 345 #define PACKET3_WAIT_REG_MEM64 0x93 383 - #define PACKET3_COND_PREEMPT 0x94 384 346 #define PACKET3_HDP_FLUSH 0x95 385 - #define PACKET3_COPY_DATA_RB 0x96 386 347 #define PACKET3_INVALIDATE_TLBS 0x98 387 348 #define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) 388 349 #define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) 389 350 #define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) 390 351 #define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) 391 352 392 - #define PACKET3_AQL_PACKET 0x99 393 353 #define PACKET3_DMA_DATA_FILL_MULTI 0x9A 394 354 #define PACKET3_SET_SH_REG_INDEX 0x9B 395 - #define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C 396 - #define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D 397 - #define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E 398 355 #define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F 399 356 #define PACKET3_SET_RESOURCES 0xA0 400 357 /* 1. header ··· 381 394 # define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) 382 395 # define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) 383 396 # define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) 384 - #define PACKET3_MAP_PROCESS 0xA1 385 397 #define PACKET3_MAP_QUEUES 0xA2 386 398 /* 1. header 387 399 * 2. CONTROL ··· 397 411 # define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16) 398 412 # define PACKET3_MAP_QUEUES_ME(x) ((x) << 18) 399 413 # define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) 400 - # define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) 414 + # define PACKET3_MAP_QUEUES_QUEUE_GROUP(x) ((x) << 24) 401 415 # define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) 402 416 # define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) 403 417 /* CONTROL2 */ 404 - # define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) 405 418 # define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) 406 419 #define PACKET3_UNMAP_QUEUES 0xA3 407 420 /* 1. header ··· 449 464 # define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) 450 465 /* CONTROL2b */ 451 466 # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) 452 - # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) 453 - #define PACKET3_RUN_LIST 0xA5 454 - #define PACKET3_MAP_PROCESS_VM 0xA6 455 - /* GFX11 */ 456 - #define PACKET3_SET_Q_PREEMPTION_MODE 0xF0 457 - # define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0) 458 - # define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0) 467 + # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 28) 459 468 460 469 #endif
+83 -110
drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
··· 146 146 uint32_t tmp; 147 147 int i; 148 148 149 + /*TODO: revisit whether the SRIOV guest access to theseregisters 150 + * is blocked by security policy or not */ 151 + if (amdgpu_sriov_vf(adev)) 152 + return; 153 + 149 154 for_each_inst(i, xcc_mask) { 150 - /* Program the AGP BAR */ 151 - WREG32_SOC15_RLC(GC, GET_INST(GC, i), 152 - regGCMC_VM_AGP_BASE_LO32, 0); 153 - WREG32_SOC15_RLC(GC, GET_INST(GC, i), 154 - regGCMC_VM_AGP_BASE_HI32, 0); 155 - WREG32_SOC15_RLC(GC, GET_INST(GC, i), 156 - regGCMC_VM_AGP_BOT_LO32, 157 - lower_32_bits(adev->gmc.agp_start >> 24)); 158 - WREG32_SOC15_RLC(GC, GET_INST(GC, i), 159 - regGCMC_VM_AGP_BOT_HI32, 160 - upper_32_bits(adev->gmc.agp_start >> 24)); 161 - WREG32_SOC15_RLC(GC, GET_INST(GC, i), 162 - regGCMC_VM_AGP_TOP_LO32, 163 - lower_32_bits(adev->gmc.agp_end >> 24)); 164 - WREG32_SOC15_RLC(GC, GET_INST(GC, i), 165 - regGCMC_VM_AGP_TOP_HI32, 166 - upper_32_bits(adev->gmc.agp_end >> 24)); 167 - 168 - if (!amdgpu_sriov_vf(adev)) { 169 - /* Program the system aperture low logical page number. */ 170 - WREG32_SOC15(GC, GET_INST(GC, i), 171 - regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, 172 - lower_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18)); 173 - WREG32_SOC15(GC, GET_INST(GC, i), 174 - regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, 175 - upper_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18)); 176 - WREG32_SOC15(GC, GET_INST(GC, i), 177 - regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 178 - lower_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18)); 179 - WREG32_SOC15(GC, GET_INST(GC, i), 180 - regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 181 - upper_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18)); 182 - 183 - /* Set default page address. */ 184 - value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); 185 - WREG32_SOC15(GC, GET_INST(GC, i), 186 - regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 187 - (u32)(value >> 12)); 188 - WREG32_SOC15(GC, GET_INST(GC, i), 189 - regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, 190 - (u32)(value >> 44)); 191 - 192 - /* Program "protection fault". */ 193 - WREG32_SOC15(GC, GET_INST(GC, i), 194 - regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, 195 - (u32)(adev->dummy_page_addr >> 12)); 196 - WREG32_SOC15(GC, GET_INST(GC, i), 197 - regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, 198 - (u32)((u64)adev->dummy_page_addr >> 44)); 199 - 200 - tmp = RREG32_SOC15(GC, GET_INST(GC, i), 201 - regGCVM_L2_PROTECTION_FAULT_CNTL2); 202 - tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, 203 - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); 204 - tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, 205 - ENABLE_RETRY_FAULT_INTERRUPT, 0x1); 206 - WREG32_SOC15(GC, GET_INST(GC, i), 207 - regGCVM_L2_PROTECTION_FAULT_CNTL2, tmp); 208 - } 209 - 210 - /* In the case squeezing vram into GART aperture, we don't use 211 - * FB aperture and AGP aperture. Disable them. 212 - */ 213 155 if (adev->gmc.pdb0_bo) { 156 + /* Disable agp and system aperture 157 + * when vmid0 page table is enabled */ 214 158 WREG32_SOC15(GC, GET_INST(GC, i), 215 159 regGCMC_VM_FB_LOCATION_TOP_LO32, 0); 216 160 WREG32_SOC15(GC, GET_INST(GC, i), ··· 169 225 WREG32_SOC15(GC, GET_INST(GC, i), 170 226 regGCMC_VM_AGP_TOP_HI32, 0); 171 227 WREG32_SOC15(GC, GET_INST(GC, i), 172 - regGCMC_VM_AGP_BOT_LO32, 0xFFFFFFFF); 228 + regGCMC_VM_AGP_BOT_LO32, 229 + 0xFFFFFFFF); 173 230 WREG32_SOC15(GC, GET_INST(GC, i), 174 231 regGCMC_VM_AGP_BOT_HI32, 1); 175 232 WREG32_SOC15(GC, GET_INST(GC, i), ··· 183 238 regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0); 184 239 WREG32_SOC15(GC, GET_INST(GC, i), 185 240 regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0); 241 + } else { 242 + /* Program the AGP BAR */ 243 + WREG32_SOC15_RLC(GC, GET_INST(GC, i), 244 + regGCMC_VM_AGP_BASE_LO32, 0); 245 + WREG32_SOC15_RLC(GC, GET_INST(GC, i), 246 + regGCMC_VM_AGP_BASE_HI32, 0); 247 + WREG32_SOC15_RLC(GC, GET_INST(GC, i), 248 + regGCMC_VM_AGP_BOT_LO32, 249 + lower_32_bits(adev->gmc.agp_start >> 24)); 250 + WREG32_SOC15_RLC(GC, GET_INST(GC, i), 251 + regGCMC_VM_AGP_BOT_HI32, 252 + upper_32_bits(adev->gmc.agp_start >> 24)); 253 + WREG32_SOC15_RLC(GC, GET_INST(GC, i), 254 + regGCMC_VM_AGP_TOP_LO32, 255 + lower_32_bits(adev->gmc.agp_end >> 24)); 256 + WREG32_SOC15_RLC(GC, GET_INST(GC, i), 257 + regGCMC_VM_AGP_TOP_HI32, 258 + upper_32_bits(adev->gmc.agp_end >> 24)); 259 + 260 + /* Program the system aperture low logical page number. */ 261 + WREG32_SOC15(GC, GET_INST(GC, i), 262 + regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, 263 + lower_32_bits(min(adev->gmc.fb_start, 264 + adev->gmc.agp_start) >> 18)); 265 + WREG32_SOC15(GC, GET_INST(GC, i), 266 + regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, 267 + upper_32_bits(min(adev->gmc.fb_start, 268 + adev->gmc.agp_start) >> 18)); 269 + WREG32_SOC15(GC, GET_INST(GC, i), 270 + regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 271 + lower_32_bits(max(adev->gmc.fb_end, 272 + adev->gmc.agp_end) >> 18)); 273 + WREG32_SOC15(GC, GET_INST(GC, i), 274 + regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 275 + upper_32_bits(max(adev->gmc.fb_end, 276 + adev->gmc.agp_end) >> 18)); 186 277 } 278 + 279 + /* Set default page address. */ 280 + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); 281 + WREG32_SOC15(GC, GET_INST(GC, i), 282 + regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 283 + (u32)(value >> 12)); 284 + WREG32_SOC15(GC, GET_INST(GC, i), 285 + regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, 286 + (u32)(value >> 44)); 287 + 288 + /* Program "protection fault". */ 289 + WREG32_SOC15(GC, GET_INST(GC, i), 290 + regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, 291 + (u32)(adev->dummy_page_addr >> 12)); 292 + WREG32_SOC15(GC, GET_INST(GC, i), 293 + regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, 294 + (u32)((u64)adev->dummy_page_addr >> 44)); 295 + 296 + tmp = RREG32_SOC15(GC, GET_INST(GC, i), 297 + regGCVM_L2_PROTECTION_FAULT_CNTL2); 298 + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, 299 + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); 300 + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, 301 + ENABLE_RETRY_FAULT_INTERRUPT, 0x1); 302 + WREG32_SOC15(GC, GET_INST(GC, i), 303 + regGCVM_L2_PROTECTION_FAULT_CNTL2, tmp); 187 304 } 188 305 } 189 306 ··· 332 325 WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL3, tmp); 333 326 334 327 tmp = regGCVM_L2_CNTL4_DEFAULT; 335 - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, 336 - VMC_TAP_PDE_REQUEST_PHYSICAL, 1); 337 - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, 338 - VMC_TAP_PTE_REQUEST_PHYSICAL, 1); 328 + if (adev->gmc.xgmi.connected_to_cpu) { 329 + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, 330 + VMC_TAP_PDE_REQUEST_PHYSICAL, 1); 331 + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, 332 + VMC_TAP_PTE_REQUEST_PHYSICAL, 1); 333 + } else { 334 + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, 335 + VMC_TAP_PDE_REQUEST_PHYSICAL, 0); 336 + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, 337 + VMC_TAP_PTE_REQUEST_PHYSICAL, 0); 338 + } 339 + 339 340 WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL4, tmp); 340 341 341 342 tmp = regGCVM_L2_CNTL5_DEFAULT; ··· 822 807 gfxhub_v12_1_xcc_init(adev, xcc_mask); 823 808 } 824 809 825 - static int gfxhub_v12_1_get_xgmi_info(struct amdgpu_device *adev) 826 - { 827 - u32 max_num_physical_nodes; 828 - u32 max_physical_node_id; 829 - u32 xgmi_lfb_cntl; 830 - u32 max_region; 831 - u64 seg_size; 832 - 833 - xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), 834 - regGCMC_VM_XGMI_LFB_CNTL); 835 - seg_size = REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, 0), 836 - regGCMC_VM_XGMI_LFB_SIZE), 837 - GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; 838 - max_region = REG_GET_FIELD(xgmi_lfb_cntl, 839 - GCMC_VM_XGMI_LFB_CNTL, 840 - PF_MAX_REGION); 841 - 842 - max_num_physical_nodes = 8; 843 - max_physical_node_id = 7; 844 - 845 - /* PF_MAX_REGION=0 means xgmi is disabled */ 846 - if (max_region || adev->gmc.xgmi.connected_to_cpu) { 847 - adev->gmc.xgmi.num_physical_nodes = max_region + 1; 848 - 849 - if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) 850 - return -EINVAL; 851 - 852 - adev->gmc.xgmi.physical_node_id = 853 - REG_GET_FIELD(xgmi_lfb_cntl, 854 - GCMC_VM_XGMI_LFB_CNTL, 855 - PF_LFB_REGION); 856 - 857 - if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) 858 - return -EINVAL; 859 - 860 - adev->gmc.xgmi.node_segment_size = seg_size; 861 - } 862 - 863 - return 0; 864 - } 865 - 866 810 const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs = { 867 811 .get_fb_location = gfxhub_v12_1_get_fb_location, 868 812 .get_mc_fb_offset = gfxhub_v12_1_get_mc_fb_offset, ··· 830 856 .gart_disable = gfxhub_v12_1_gart_disable, 831 857 .set_fault_enable_default = gfxhub_v12_1_set_fault_enable_default, 832 858 .init = gfxhub_v12_1_init, 833 - .get_xgmi_info = gfxhub_v12_1_get_xgmi_info, 834 859 }; 835 860 836 861 static int gfxhub_v12_1_xcp_resume(void *handle, uint32_t inst_mask)
+60 -16
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
··· 636 636 { 637 637 struct amdgpu_device *adev = ip_block->adev; 638 638 639 + if (adev->smuio.funcs && 640 + adev->smuio.funcs->is_host_gpu_xgmi_supported) 641 + adev->gmc.xgmi.connected_to_cpu = 642 + adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); 643 + 639 644 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 640 645 case IP_VERSION(12, 1, 0): 641 646 gmc_v12_1_set_gmc_funcs(adev); ··· 696 691 697 692 base = adev->mmhub.funcs->get_fb_location(adev); 698 693 699 - amdgpu_gmc_set_agp_default(adev, mc); 700 - amdgpu_gmc_vram_location(adev, &adev->gmc, base); 701 - amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW); 702 - if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) 703 - amdgpu_gmc_agp_location(adev, mc); 704 - 694 + if (amdgpu_gmc_is_pdb0_enabled(adev)) { 695 + amdgpu_gmc_sysvm_location(adev, mc); 696 + } else { 697 + amdgpu_gmc_set_agp_default(adev, mc); 698 + amdgpu_gmc_vram_location(adev, &adev->gmc, base); 699 + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW); 700 + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) 701 + amdgpu_gmc_agp_location(adev, mc); 702 + } 705 703 /* base offset of vram pages */ 706 704 if (amdgpu_sriov_vf(adev)) 707 705 adev->vm_manager.vram_base_offset = 0; 708 706 else 709 707 adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev); 708 + 709 + adev->vm_manager.vram_base_offset += 710 + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; 710 711 } 711 712 712 713 /** ··· 728 717 { 729 718 int r; 730 719 731 - /* size in MB on si */ 732 - adev->gmc.mc_vram_size = 733 - adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; 720 + if (adev->gmc.xgmi.connected_to_cpu) 721 + adev->gmc.mc_vram_size = 722 + adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; 723 + else 724 + adev->gmc.mc_vram_size = 725 + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; 726 + 734 727 adev->gmc.real_vram_size = adev->gmc.mc_vram_size; 735 728 736 - if (!(adev->flags & AMD_IS_APU)) { 729 + if (!(adev->flags & AMD_IS_APU) && 730 + !adev->gmc.xgmi.connected_to_cpu) { 737 731 r = amdgpu_device_resize_fb_bar(adev); 738 732 if (r) 739 733 return r; ··· 748 732 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); 749 733 750 734 #ifdef CONFIG_X86_64 751 - if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { 752 - adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev); 735 + if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || 736 + (adev->gmc.xgmi.connected_to_cpu)) { 737 + adev->gmc.aper_base = 738 + adev->mmhub.funcs->get_mc_fb_offset(adev) + 739 + adev->gmc.xgmi.physical_node_id * 740 + adev->gmc.xgmi.node_segment_size; 753 741 adev->gmc.aper_size = adev->gmc.real_vram_size; 754 742 } 755 743 #endif ··· 782 762 return 0; 783 763 } 784 764 765 + if (amdgpu_gmc_is_pdb0_enabled(adev)) { 766 + adev->gmc.vmid0_page_table_depth = 1; 767 + adev->gmc.vmid0_page_table_block_size = 12; 768 + } else { 769 + adev->gmc.vmid0_page_table_depth = 0; 770 + adev->gmc.vmid0_page_table_block_size = 0; 771 + } 772 + 785 773 /* Initialize common gart structure */ 786 774 r = amdgpu_gart_init(adev); 787 775 if (r) ··· 800 772 AMDGPU_PTE_EXECUTABLE | 801 773 AMDGPU_PTE_IS_PTE; 802 774 803 - return amdgpu_gart_table_vram_alloc(adev); 775 + r = amdgpu_gart_table_vram_alloc(adev); 776 + if (r) 777 + return r; 778 + 779 + if (amdgpu_gmc_is_pdb0_enabled(adev)) 780 + r = amdgpu_gmc_pdb0_alloc(adev); 781 + 782 + return r; 804 783 } 805 784 806 785 static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) ··· 893 858 if (r) 894 859 return r; 895 860 896 - if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0)) && 897 - !amdgpu_sriov_vf(adev)) { 861 + if (!amdgpu_sriov_vf(adev)) { 898 862 /* interrupt sent to DF. */ 899 - r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0, 863 + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0)) 864 + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0, 900 865 &adev->gmc.ecc_irq); 866 + else 867 + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_DF, 0, 868 + &adev->gmc.ecc_irq); 869 + 901 870 if (r) 902 871 return r; 903 872 } ··· 983 944 amdgpu_vm_manager_fini(adev); 984 945 gmc_v12_0_gart_fini(adev); 985 946 amdgpu_gem_force_release(adev); 947 + amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); 986 948 amdgpu_bo_fini(adev); 987 949 988 950 return 0; ··· 1002 962 { 1003 963 int r; 1004 964 bool value; 965 + 966 + if (adev->gmc.xgmi.connected_to_cpu) 967 + amdgpu_gmc_init_pdb0(adev); 1005 968 1006 969 if (adev->gart.bo == NULL) { 1007 970 dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); ··· 1027 984 1028 985 drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n", 1029 986 (unsigned)(adev->gmc.gart_size >> 20), 987 + (adev->gmc.pdb0_bo) ? (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo) : 1030 988 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); 1031 989 1032 990 return 0;
+20 -9
drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
··· 524 524 bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; 525 525 uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); 526 526 bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; 527 - unsigned int mtype, mtype_local; 527 + unsigned int mtype, mtype_local, mtype_remote; 528 528 bool snoop = false; 529 529 bool is_local = false; 530 + bool is_aid_a1; 530 531 531 532 switch (gc_ip_version) { 532 533 case IP_VERSION(12, 1, 0): 533 - mtype_local = MTYPE_RW; 534 - if (amdgpu_mtype_local == 1) { 534 + is_aid_a1 = (adev->rev_id & 0x10); 535 + 536 + mtype_local = is_aid_a1 ? MTYPE_RW : MTYPE_NC; 537 + mtype_remote = is_aid_a1 ? MTYPE_NC : MTYPE_UC; 538 + if (amdgpu_mtype_local == 0) { 539 + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); 540 + mtype_local = MTYPE_RW; 541 + } else if (amdgpu_mtype_local == 1) { 535 542 DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); 536 543 mtype_local = MTYPE_NC; 537 544 } else if (amdgpu_mtype_local == 2) { 538 - DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n"); 545 + DRM_INFO_ONCE("MTYPE_CC not supported, using %s for local memory\n", is_aid_a1 ? "MTYPE_RW" : "MTYPE_NC"); 539 546 } else { 540 - DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); 547 + DRM_INFO_ONCE("Using %s for local memory\n", is_aid_a1 ? "MTYPE_RW" : "MTYPE_NC"); 541 548 } 542 549 543 550 is_local = (is_vram && adev == bo_adev); ··· 554 547 } else if (ext_coherent) { 555 548 mtype = is_local ? mtype_local : MTYPE_UC; 556 549 } else { 557 - if (is_local) 558 - mtype = mtype_local; 559 - else 560 - mtype = MTYPE_NC; 550 + mtype = is_local ? mtype_local : mtype_remote; 561 551 } 562 552 break; 563 553 default: ··· 625 621 .process = gmc_v12_1_process_interrupt, 626 622 }; 627 623 624 + static const struct amdgpu_irq_src_funcs gmc_v12_1_ecc_funcs = { 625 + .process = amdgpu_umc_uniras_process_ecc_irq, 626 + }; 627 + 628 628 void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev) 629 629 { 630 630 adev->gmc.vm_fault.num_types = 1; 631 631 adev->gmc.vm_fault.funcs = &gmc_v12_1_irq_funcs; 632 + 633 + adev->gmc.ecc_irq.num_types = 1; 634 + adev->gmc.ecc_irq.funcs = &gmc_v12_1_ecc_funcs; 632 635 } 633 636 634 637 void gmc_v12_1_init_vram_info(struct amdgpu_device *adev)
+49 -29
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 660 660 gfxhub_client_ids[cid], 661 661 cid); 662 662 } else { 663 - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 664 - case IP_VERSION(9, 0, 0): 665 - mmhub_cid = mmhub_client_ids_vega10[cid][rw]; 666 - break; 667 - case IP_VERSION(9, 3, 0): 668 - mmhub_cid = mmhub_client_ids_vega12[cid][rw]; 669 - break; 670 - case IP_VERSION(9, 4, 0): 671 - mmhub_cid = mmhub_client_ids_vega20[cid][rw]; 672 - break; 673 - case IP_VERSION(9, 4, 1): 674 - mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; 675 - break; 676 - case IP_VERSION(9, 1, 0): 677 - case IP_VERSION(9, 2, 0): 678 - mmhub_cid = mmhub_client_ids_raven[cid][rw]; 679 - break; 680 - case IP_VERSION(1, 5, 0): 681 - case IP_VERSION(2, 4, 0): 682 - mmhub_cid = mmhub_client_ids_renoir[cid][rw]; 683 - break; 684 - case IP_VERSION(1, 8, 0): 685 - case IP_VERSION(9, 4, 2): 686 - mmhub_cid = mmhub_client_ids_aldebaran[cid][rw]; 687 - break; 688 - default: 689 - mmhub_cid = NULL; 690 - break; 691 - } 663 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 692 664 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 693 665 mmhub_cid ? mmhub_cid : "unknown", cid); 694 666 } ··· 1400 1428 } 1401 1429 } 1402 1430 1431 + static void gmc_v9_0_init_mmhub_client_info(struct amdgpu_device *adev) 1432 + { 1433 + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 1434 + case IP_VERSION(9, 0, 0): 1435 + amdgpu_mmhub_init_client_info(&adev->mmhub, 1436 + mmhub_client_ids_vega10, 1437 + ARRAY_SIZE(mmhub_client_ids_vega10)); 1438 + break; 1439 + case IP_VERSION(9, 3, 0): 1440 + amdgpu_mmhub_init_client_info(&adev->mmhub, 1441 + mmhub_client_ids_vega12, 1442 + ARRAY_SIZE(mmhub_client_ids_vega12)); 1443 + break; 1444 + case IP_VERSION(9, 4, 0): 1445 + amdgpu_mmhub_init_client_info(&adev->mmhub, 1446 + mmhub_client_ids_vega20, 1447 + ARRAY_SIZE(mmhub_client_ids_vega20)); 1448 + break; 1449 + case IP_VERSION(9, 4, 1): 1450 + amdgpu_mmhub_init_client_info(&adev->mmhub, 1451 + mmhub_client_ids_arcturus, 1452 + ARRAY_SIZE(mmhub_client_ids_arcturus)); 1453 + break; 1454 + case IP_VERSION(9, 1, 0): 1455 + case IP_VERSION(9, 2, 0): 1456 + amdgpu_mmhub_init_client_info(&adev->mmhub, 1457 + mmhub_client_ids_raven, 1458 + ARRAY_SIZE(mmhub_client_ids_raven)); 1459 + break; 1460 + case IP_VERSION(1, 5, 0): 1461 + case IP_VERSION(2, 4, 0): 1462 + amdgpu_mmhub_init_client_info(&adev->mmhub, 1463 + mmhub_client_ids_renoir, 1464 + ARRAY_SIZE(mmhub_client_ids_renoir)); 1465 + break; 1466 + case IP_VERSION(1, 8, 0): 1467 + case IP_VERSION(9, 4, 2): 1468 + amdgpu_mmhub_init_client_info(&adev->mmhub, 1469 + mmhub_client_ids_aldebaran, 1470 + ARRAY_SIZE(mmhub_client_ids_aldebaran)); 1471 + break; 1472 + default: 1473 + break; 1474 + } 1475 + } 1476 + 1403 1477 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) 1404 1478 { 1405 1479 switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { ··· 1463 1445 adev->mmhub.funcs = &mmhub_v1_0_funcs; 1464 1446 break; 1465 1447 } 1448 + 1449 + gmc_v9_0_init_mmhub_client_info(adev); 1466 1450 } 1467 1451 1468 1452 static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
+2 -2
drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
··· 129 129 if (!pdev) 130 130 return -EINVAL; 131 131 132 - if (!dev->type->name) { 132 + if (!dev->type || !dev->type->name) { 133 133 drm_dbg(&adev->ddev, "Invalid device type to add\n"); 134 134 goto exit; 135 135 } ··· 165 165 if (!pdev) 166 166 return -EINVAL; 167 167 168 - if (!dev->type->name) { 168 + if (!dev->type || !dev->type->name) { 169 169 drm_dbg(&adev->ddev, "Invalid device type to remove\n"); 170 170 goto exit; 171 171 }
+840
drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR MIT 2 + /* 3 + * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the "Software"), 7 + * to deal in the Software without restriction, including without limitation 8 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 + * and/or sell copies of the Software, and to permit persons to whom the 10 + * Software is furnished to do so, subject to the following conditions: 11 + * 12 + * The above copyright notice and this permission notice shall be included in 13 + * all copies or substantial portions of the Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 + * OTHER DEALINGS IN THE SOFTWARE. 22 + */ 23 + 24 + #include "amdgpu.h" 25 + #include "amdgpu_jpeg.h" 26 + #include "amdgpu_pm.h" 27 + #include "soc15.h" 28 + #include "soc15d.h" 29 + #include "jpeg_v4_0_3.h" 30 + #include "jpeg_v5_0_2.h" 31 + #include "mmsch_v5_0.h" 32 + 33 + #include "vcn/vcn_5_0_0_offset.h" 34 + #include "vcn/vcn_5_0_0_sh_mask.h" 35 + #include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" 36 + 37 + static void jpeg_v5_0_2_set_dec_ring_funcs(struct amdgpu_device *adev); 38 + static void jpeg_v5_0_2_set_irq_funcs(struct amdgpu_device *adev); 39 + static int jpeg_v5_0_2_set_powergating_state(struct amdgpu_ip_block *ip_block, 40 + enum amd_powergating_state state); 41 + static void jpeg_v5_0_2_dec_ring_set_wptr(struct amdgpu_ring *ring); 42 + 43 + static int amdgpu_ih_srcid_jpeg[] = { 44 + VCN_5_0__SRCID__JPEG_DECODE, 45 + VCN_5_0__SRCID__JPEG1_DECODE, 46 + VCN_5_0__SRCID__JPEG2_DECODE, 47 + VCN_5_0__SRCID__JPEG3_DECODE, 48 + VCN_5_0__SRCID__JPEG4_DECODE, 49 + VCN_5_0__SRCID__JPEG5_DECODE, 50 + VCN_5_0__SRCID__JPEG6_DECODE, 51 + VCN_5_0__SRCID__JPEG7_DECODE, 52 + VCN_5_0__SRCID__JPEG8_DECODE, 53 + VCN_5_0__SRCID__JPEG9_DECODE, 54 + }; 55 + 56 + static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0_2[] = { 57 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), 58 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), 59 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR), 60 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR), 61 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS), 62 + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), 63 + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), 64 + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), 65 + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), 66 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), 67 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), 68 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR), 69 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR), 70 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS), 71 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR), 72 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR), 73 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS), 74 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR), 75 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR), 76 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS), 77 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR), 78 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR), 79 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS), 80 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR), 81 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR), 82 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS), 83 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR), 84 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR), 85 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS), 86 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR), 87 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR), 88 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS), 89 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_RPTR), 90 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_WPTR), 91 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_STATUS), 92 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_RPTR), 93 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_WPTR), 94 + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_STATUS), 95 + }; 96 + 97 + static int jpeg_v5_0_2_core_reg_offset(u32 pipe) 98 + { 99 + if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3) 100 + return ((0x40 * pipe) - 0xc80); 101 + else 102 + return ((0x40 * pipe) - 0x440); 103 + } 104 + 105 + /** 106 + * jpeg_v5_0_2_early_init - set function pointers 107 + * 108 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 109 + * 110 + * Set ring and irq function pointers 111 + */ 112 + static int jpeg_v5_0_2_early_init(struct amdgpu_ip_block *ip_block) 113 + { 114 + struct amdgpu_device *adev = ip_block->adev; 115 + 116 + if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES) 117 + return -ENOENT; 118 + 119 + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; 120 + jpeg_v5_0_2_set_dec_ring_funcs(adev); 121 + jpeg_v5_0_2_set_irq_funcs(adev); 122 + 123 + return 0; 124 + } 125 + 126 + /** 127 + * jpeg_v5_0_2_sw_init - sw init for JPEG block 128 + * 129 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 130 + * 131 + * Load firmware and sw initialization 132 + */ 133 + static int jpeg_v5_0_2_sw_init(struct amdgpu_ip_block *ip_block) 134 + { 135 + struct amdgpu_device *adev = ip_block->adev; 136 + struct amdgpu_ring *ring; 137 + int i, j, r, jpeg_inst; 138 + 139 + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 140 + /* JPEG TRAP */ 141 + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 142 + amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); 143 + if (r) 144 + return r; 145 + } 146 + 147 + r = amdgpu_jpeg_sw_init(adev); 148 + if (r) 149 + return r; 150 + 151 + r = amdgpu_jpeg_resume(adev); 152 + if (r) 153 + return r; 154 + 155 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 156 + jpeg_inst = GET_INST(JPEG, i); 157 + 158 + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 159 + ring = &adev->jpeg.inst[i].ring_dec[j]; 160 + ring->use_doorbell = false; 161 + ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); 162 + ring->doorbell_index = 163 + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 164 + 1 + j + 11 * jpeg_inst; 165 + sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); 166 + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, 167 + AMDGPU_RING_PRIO_DEFAULT, NULL); 168 + if (r) 169 + return r; 170 + 171 + adev->jpeg.internal.jpeg_pitch[j] = 172 + regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; 173 + adev->jpeg.inst[i].external.jpeg_pitch[j] = 174 + SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0, 175 + (j ? jpeg_v5_0_2_core_reg_offset(j) : 0)); 176 + } 177 + } 178 + 179 + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_2, ARRAY_SIZE(jpeg_reg_list_5_0_2)); 180 + if (r) 181 + return r; 182 + 183 + adev->jpeg.supported_reset = 184 + amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); 185 + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 186 + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); 187 + 188 + return r; 189 + } 190 + 191 + /** 192 + * jpeg_v5_0_2_sw_fini - sw fini for JPEG block 193 + * 194 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 195 + * 196 + * JPEG suspend and free up sw allocation 197 + */ 198 + static int jpeg_v5_0_2_sw_fini(struct amdgpu_ip_block *ip_block) 199 + { 200 + struct amdgpu_device *adev = ip_block->adev; 201 + int r; 202 + 203 + r = amdgpu_jpeg_suspend(adev); 204 + if (r) 205 + return r; 206 + 207 + amdgpu_jpeg_sysfs_reset_mask_fini(adev); 208 + 209 + r = amdgpu_jpeg_sw_fini(adev); 210 + 211 + return r; 212 + } 213 + 214 + /** 215 + * jpeg_v5_0_2_hw_init - start and test JPEG block 216 + * 217 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 218 + * 219 + */ 220 + static int jpeg_v5_0_2_hw_init(struct amdgpu_ip_block *ip_block) 221 + { 222 + struct amdgpu_device *adev = ip_block->adev; 223 + struct amdgpu_ring *ring; 224 + int i, j, r, jpeg_inst, tmp; 225 + 226 + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) 227 + adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED); 228 + 229 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 230 + jpeg_inst = GET_INST(JPEG, i); 231 + ring = adev->jpeg.inst[i].ring_dec; 232 + 233 + /* Remove JPEG Tile antihang mechanism */ 234 + tmp = RREG32_SOC15(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS); 235 + tmp &= (~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 236 + WREG32_SOC15(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS, tmp); 237 + 238 + if (ring->use_doorbell) 239 + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, 240 + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst, 241 + adev->jpeg.inst[i].aid_id); 242 + 243 + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 244 + ring = &adev->jpeg.inst[i].ring_dec[j]; 245 + if (ring->use_doorbell) 246 + WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL, 247 + ring->pipe, 248 + ring->doorbell_index << 249 + VCN_JPEG_DB_CTRL__OFFSET__SHIFT | 250 + VCN_JPEG_DB_CTRL__EN_MASK); 251 + r = amdgpu_ring_test_helper(ring); 252 + if (r) 253 + return r; 254 + } 255 + } 256 + 257 + return 0; 258 + } 259 + 260 + /** 261 + * jpeg_v5_0_2_hw_fini - stop the hardware block 262 + * 263 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 264 + * 265 + * Stop the JPEG block, mark ring as not ready any more 266 + */ 267 + static int jpeg_v5_0_2_hw_fini(struct amdgpu_ip_block *ip_block) 268 + { 269 + struct amdgpu_device *adev = ip_block->adev; 270 + int ret = 0; 271 + 272 + cancel_delayed_work_sync(&adev->jpeg.idle_work); 273 + 274 + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) 275 + ret = jpeg_v5_0_2_set_powergating_state(ip_block, AMD_PG_STATE_GATE); 276 + 277 + return ret; 278 + } 279 + 280 + /** 281 + * jpeg_v5_0_2_suspend - suspend JPEG block 282 + * 283 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 284 + * 285 + * HW fini and suspend JPEG block 286 + */ 287 + static int jpeg_v5_0_2_suspend(struct amdgpu_ip_block *ip_block) 288 + { 289 + struct amdgpu_device *adev = ip_block->adev; 290 + int r; 291 + 292 + r = jpeg_v5_0_2_hw_fini(ip_block); 293 + if (r) 294 + return r; 295 + 296 + r = amdgpu_jpeg_suspend(adev); 297 + 298 + return r; 299 + } 300 + 301 + /** 302 + * jpeg_v5_0_2_resume - resume JPEG block 303 + * 304 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 305 + * 306 + * Resume firmware and hw init JPEG block 307 + */ 308 + static int jpeg_v5_0_2_resume(struct amdgpu_ip_block *ip_block) 309 + { 310 + struct amdgpu_device *adev = ip_block->adev; 311 + int r; 312 + 313 + r = amdgpu_jpeg_resume(adev); 314 + if (r) 315 + return r; 316 + 317 + r = jpeg_v5_0_2_hw_init(ip_block); 318 + 319 + return r; 320 + } 321 + 322 + static void jpeg_v5_0_2_init_inst(struct amdgpu_device *adev, int i) 323 + { 324 + int jpeg_inst = GET_INST(JPEG, i); 325 + 326 + /* disable anti hang mechanism */ 327 + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, 328 + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 329 + 330 + /* keep the JPEG in static PG mode */ 331 + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, 332 + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); 333 + 334 + /* MJPEG global tiling registers */ 335 + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, 336 + adev->gfx.config.gb_addr_config); 337 + 338 + /* enable JMI channel */ 339 + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, 340 + ~UVD_JMI_CNTL__SOFT_RESET_MASK); 341 + } 342 + 343 + static void jpeg_v5_0_2_deinit_inst(struct amdgpu_device *adev, int i) 344 + { 345 + int jpeg_inst = GET_INST(JPEG, i); 346 + /* reset JMI */ 347 + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 348 + UVD_JMI_CNTL__SOFT_RESET_MASK, 349 + ~UVD_JMI_CNTL__SOFT_RESET_MASK); 350 + 351 + /* enable anti hang mechanism */ 352 + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 353 + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, 354 + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 355 + } 356 + 357 + static void jpeg_v5_0_2_init_jrbc(struct amdgpu_ring *ring) 358 + { 359 + struct amdgpu_device *adev = ring->adev; 360 + u32 reg, data, mask; 361 + int jpeg_inst = GET_INST(JPEG, ring->me); 362 + int reg_offset = ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0; 363 + 364 + /* enable System Interrupt for JRBC */ 365 + reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN); 366 + if (ring->pipe < AMDGPU_MAX_JPEG_RINGS_4_0_3) { 367 + data = JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe; 368 + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe); 369 + WREG32_P(reg, data, mask); 370 + } else { 371 + data = JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12); 372 + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12)); 373 + WREG32_P(reg, data, mask); 374 + } 375 + 376 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 377 + regUVD_LMI_JRBC_RB_VMID, 378 + reg_offset, 0); 379 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 380 + regUVD_JRBC_RB_CNTL, 381 + reg_offset, 382 + (0x00000001L | 0x00000002L)); 383 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 384 + regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, 385 + reg_offset, lower_32_bits(ring->gpu_addr)); 386 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 387 + regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, 388 + reg_offset, upper_32_bits(ring->gpu_addr)); 389 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 390 + regUVD_JRBC_RB_RPTR, 391 + reg_offset, 0); 392 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 393 + regUVD_JRBC_RB_WPTR, 394 + reg_offset, 0); 395 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 396 + regUVD_JRBC_RB_CNTL, 397 + reg_offset, 0x00000002L); 398 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 399 + regUVD_JRBC_RB_SIZE, 400 + reg_offset, ring->ring_size / 4); 401 + ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR, 402 + reg_offset); 403 + } 404 + 405 + /** 406 + * jpeg_v5_0_2_start - start JPEG block 407 + * 408 + * @adev: amdgpu_device pointer 409 + * 410 + * Setup and start the JPEG block 411 + */ 412 + static int jpeg_v5_0_2_start(struct amdgpu_device *adev) 413 + { 414 + struct amdgpu_ring *ring; 415 + int i, j; 416 + 417 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 418 + jpeg_v5_0_2_init_inst(adev, i); 419 + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 420 + ring = &adev->jpeg.inst[i].ring_dec[j]; 421 + jpeg_v5_0_2_init_jrbc(ring); 422 + } 423 + } 424 + 425 + return 0; 426 + } 427 + 428 + /** 429 + * jpeg_v5_0_2_stop - stop JPEG block 430 + * 431 + * @adev: amdgpu_device pointer 432 + * 433 + * stop the JPEG block 434 + */ 435 + static int jpeg_v5_0_2_stop(struct amdgpu_device *adev) 436 + { 437 + int i; 438 + 439 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) 440 + jpeg_v5_0_2_deinit_inst(adev, i); 441 + 442 + return 0; 443 + } 444 + 445 + /** 446 + * jpeg_v5_0_2_dec_ring_get_rptr - get read pointer 447 + * 448 + * @ring: amdgpu_ring pointer 449 + * 450 + * Returns the current hardware read pointer 451 + */ 452 + static uint64_t jpeg_v5_0_2_dec_ring_get_rptr(struct amdgpu_ring *ring) 453 + { 454 + struct amdgpu_device *adev = ring->adev; 455 + 456 + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR, 457 + ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0); 458 + } 459 + 460 + /** 461 + * jpeg_v5_0_2_dec_ring_get_wptr - get write pointer 462 + * 463 + * @ring: amdgpu_ring pointer 464 + * 465 + * Returns the current hardware write pointer 466 + */ 467 + static uint64_t jpeg_v5_0_2_dec_ring_get_wptr(struct amdgpu_ring *ring) 468 + { 469 + struct amdgpu_device *adev = ring->adev; 470 + 471 + if (ring->use_doorbell) 472 + return adev->wb.wb[ring->wptr_offs]; 473 + 474 + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR, 475 + ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0); 476 + } 477 + 478 + /** 479 + * jpeg_v5_0_2_dec_ring_set_wptr - set write pointer 480 + * 481 + * @ring: amdgpu_ring pointer 482 + * 483 + * Commits the write pointer to the hardware 484 + */ 485 + static void jpeg_v5_0_2_dec_ring_set_wptr(struct amdgpu_ring *ring) 486 + { 487 + struct amdgpu_device *adev = ring->adev; 488 + 489 + if (ring->use_doorbell) { 490 + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 491 + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 492 + } else { 493 + WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), 494 + regUVD_JRBC_RB_WPTR, 495 + (ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0), 496 + lower_32_bits(ring->wptr)); 497 + } 498 + } 499 + 500 + static bool jpeg_v5_0_2_is_idle(struct amdgpu_ip_block *ip_block) 501 + { 502 + struct amdgpu_device *adev = ip_block->adev; 503 + bool ret = false; 504 + int i, j; 505 + 506 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 507 + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 508 + int reg_offset = (j ? jpeg_v5_0_2_core_reg_offset(j) : 0); 509 + 510 + ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i), 511 + regUVD_JRBC_STATUS, reg_offset) & 512 + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == 513 + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); 514 + } 515 + } 516 + 517 + return ret; 518 + } 519 + 520 + static int jpeg_v5_0_2_wait_for_idle(struct amdgpu_ip_block *ip_block) 521 + { 522 + struct amdgpu_device *adev = ip_block->adev; 523 + int ret = 0; 524 + int i, j; 525 + 526 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 527 + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 528 + int reg_offset = (j ? jpeg_v5_0_2_core_reg_offset(j) : 0); 529 + 530 + ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i), 531 + regUVD_JRBC_STATUS, reg_offset, 532 + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, 533 + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); 534 + } 535 + } 536 + return ret; 537 + } 538 + 539 + static int jpeg_v5_0_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, 540 + enum amd_clockgating_state state) 541 + { 542 + struct amdgpu_device *adev = ip_block->adev; 543 + bool enable = state == AMD_CG_STATE_GATE; 544 + 545 + int i; 546 + 547 + if (!enable) 548 + return 0; 549 + 550 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 551 + if (!jpeg_v5_0_2_is_idle(ip_block)) 552 + return -EBUSY; 553 + } 554 + 555 + return 0; 556 + } 557 + 558 + static int jpeg_v5_0_2_set_powergating_state(struct amdgpu_ip_block *ip_block, 559 + enum amd_powergating_state state) 560 + { 561 + struct amdgpu_device *adev = ip_block->adev; 562 + int ret; 563 + 564 + if (state == adev->jpeg.cur_state) 565 + return 0; 566 + 567 + if (state == AMD_PG_STATE_GATE) 568 + ret = jpeg_v5_0_2_stop(adev); 569 + else 570 + ret = jpeg_v5_0_2_start(adev); 571 + 572 + if (!ret) 573 + adev->jpeg.cur_state = state; 574 + 575 + return ret; 576 + } 577 + 578 + static int jpeg_v5_0_2_set_interrupt_state(struct amdgpu_device *adev, 579 + struct amdgpu_irq_src *source, 580 + unsigned int type, 581 + enum amdgpu_interrupt_state state) 582 + { 583 + return 0; 584 + } 585 + 586 + static int jpeg_v5_0_2_process_interrupt(struct amdgpu_device *adev, 587 + struct amdgpu_irq_src *source, 588 + struct amdgpu_iv_entry *entry) 589 + { 590 + u32 i, inst; 591 + 592 + i = node_id_to_phys_map[entry->node_id]; 593 + DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); 594 + 595 + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) 596 + if (adev->jpeg.inst[inst].aid_id == i) 597 + break; 598 + 599 + if (inst >= adev->jpeg.num_jpeg_inst) { 600 + dev_WARN_ONCE(adev->dev, 1, 601 + "Interrupt received for unknown JPEG instance %d", 602 + entry->node_id); 603 + return 0; 604 + } 605 + 606 + switch (entry->src_id) { 607 + case VCN_5_0__SRCID__JPEG_DECODE: 608 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); 609 + break; 610 + case VCN_5_0__SRCID__JPEG1_DECODE: 611 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); 612 + break; 613 + case VCN_5_0__SRCID__JPEG2_DECODE: 614 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); 615 + break; 616 + case VCN_5_0__SRCID__JPEG3_DECODE: 617 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); 618 + break; 619 + case VCN_5_0__SRCID__JPEG4_DECODE: 620 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); 621 + break; 622 + case VCN_5_0__SRCID__JPEG5_DECODE: 623 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); 624 + break; 625 + case VCN_5_0__SRCID__JPEG6_DECODE: 626 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); 627 + break; 628 + case VCN_5_0__SRCID__JPEG7_DECODE: 629 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); 630 + break; 631 + case VCN_5_0__SRCID__JPEG8_DECODE: 632 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]); 633 + break; 634 + case VCN_5_0__SRCID__JPEG9_DECODE: 635 + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]); 636 + break; 637 + default: 638 + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", 639 + entry->src_id, entry->src_data[0]); 640 + break; 641 + } 642 + 643 + return 0; 644 + } 645 + 646 + static void jpeg_v5_0_2_core_stall_reset(struct amdgpu_ring *ring) 647 + { 648 + struct amdgpu_device *adev = ring->adev; 649 + int jpeg_inst = GET_INST(JPEG, ring->me); 650 + int reg_offset = ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0; 651 + 652 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 653 + regUVD_JMI0_UVD_JMI_CLIENT_STALL, 654 + reg_offset, 0x1F); 655 + SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst, 656 + regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS, 657 + reg_offset, 0x1F, 0x1F); 658 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 659 + regUVD_JMI0_JPEG_LMI_DROP, 660 + reg_offset, 0x1F); 661 + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe); 662 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 663 + regUVD_JMI0_UVD_JMI_CLIENT_STALL, 664 + reg_offset, 0x00); 665 + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 666 + regUVD_JMI0_JPEG_LMI_DROP, 667 + reg_offset, 0x00); 668 + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00); 669 + } 670 + 671 + static int jpeg_v5_0_2_ring_reset(struct amdgpu_ring *ring, 672 + unsigned int vmid, 673 + struct amdgpu_fence *timedout_fence) 674 + { 675 + amdgpu_ring_reset_helper_begin(ring, timedout_fence); 676 + jpeg_v5_0_2_core_stall_reset(ring); 677 + jpeg_v5_0_2_init_jrbc(ring); 678 + return amdgpu_ring_reset_helper_end(ring, timedout_fence); 679 + } 680 + 681 + static const struct amd_ip_funcs jpeg_v5_0_2_ip_funcs = { 682 + .name = "jpeg_v5_0_2", 683 + .early_init = jpeg_v5_0_2_early_init, 684 + .late_init = NULL, 685 + .sw_init = jpeg_v5_0_2_sw_init, 686 + .sw_fini = jpeg_v5_0_2_sw_fini, 687 + .hw_init = jpeg_v5_0_2_hw_init, 688 + .hw_fini = jpeg_v5_0_2_hw_fini, 689 + .suspend = jpeg_v5_0_2_suspend, 690 + .resume = jpeg_v5_0_2_resume, 691 + .is_idle = jpeg_v5_0_2_is_idle, 692 + .wait_for_idle = jpeg_v5_0_2_wait_for_idle, 693 + .check_soft_reset = NULL, 694 + .pre_soft_reset = NULL, 695 + .soft_reset = NULL, 696 + .post_soft_reset = NULL, 697 + .set_clockgating_state = jpeg_v5_0_2_set_clockgating_state, 698 + .set_powergating_state = jpeg_v5_0_2_set_powergating_state, 699 + .dump_ip_state = amdgpu_jpeg_dump_ip_state, 700 + .print_ip_state = amdgpu_jpeg_print_ip_state, 701 + }; 702 + 703 + static const struct amdgpu_ring_funcs jpeg_v5_0_2_dec_ring_vm_funcs = { 704 + .type = AMDGPU_RING_TYPE_VCN_JPEG, 705 + .align_mask = 0xf, 706 + .get_rptr = jpeg_v5_0_2_dec_ring_get_rptr, 707 + .get_wptr = jpeg_v5_0_2_dec_ring_get_wptr, 708 + .set_wptr = jpeg_v5_0_2_dec_ring_set_wptr, 709 + .emit_frame_size = 710 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 711 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 712 + 8 + /* jpeg_v5_0_2_dec_ring_emit_vm_flush */ 713 + 22 + 22 + /* jpeg_v5_0_2_dec_ring_emit_fence x2 vm fence */ 714 + 8 + 16, 715 + .emit_ib_size = 22, /* jpeg_v5_0_2_dec_ring_emit_ib */ 716 + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, 717 + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, 718 + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, 719 + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, 720 + .test_ring = amdgpu_jpeg_dec_ring_test_ring, 721 + .test_ib = amdgpu_jpeg_dec_ring_test_ib, 722 + .insert_nop = jpeg_v4_0_3_dec_ring_nop, 723 + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, 724 + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, 725 + .pad_ib = amdgpu_ring_generic_pad_ib, 726 + .begin_use = amdgpu_jpeg_ring_begin_use, 727 + .end_use = amdgpu_jpeg_ring_end_use, 728 + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, 729 + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, 730 + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 731 + .reset = jpeg_v5_0_2_ring_reset, 732 + }; 733 + 734 + static void jpeg_v5_0_2_set_dec_ring_funcs(struct amdgpu_device *adev) 735 + { 736 + int i, j, jpeg_inst; 737 + 738 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 739 + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 740 + adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_2_dec_ring_vm_funcs; 741 + adev->jpeg.inst[i].ring_dec[j].me = i; 742 + adev->jpeg.inst[i].ring_dec[j].pipe = j; 743 + } 744 + jpeg_inst = GET_INST(JPEG, i); 745 + adev->jpeg.inst[i].aid_id = 746 + jpeg_inst / adev->jpeg.num_inst_per_aid; 747 + } 748 + } 749 + 750 + static const struct amdgpu_irq_src_funcs jpeg_v5_0_2_irq_funcs = { 751 + .set = jpeg_v5_0_2_set_interrupt_state, 752 + .process = jpeg_v5_0_2_process_interrupt, 753 + }; 754 + 755 + static void jpeg_v5_0_2_set_irq_funcs(struct amdgpu_device *adev) 756 + { 757 + int i; 758 + 759 + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) 760 + adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; 761 + 762 + adev->jpeg.inst->irq.funcs = &jpeg_v5_0_2_irq_funcs; 763 + } 764 + 765 + const struct amdgpu_ip_block_version jpeg_v5_0_2_ip_block = { 766 + .type = AMD_IP_BLOCK_TYPE_JPEG, 767 + .major = 5, 768 + .minor = 0, 769 + .rev = 2, 770 + .funcs = &jpeg_v5_0_2_ip_funcs, 771 + }; 772 + 773 + #if 0 774 + static int jpeg_v5_0_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, 775 + enum aca_smu_type type, void *data) 776 + { 777 + struct aca_bank_info info; 778 + u64 misc0; 779 + int ret; 780 + 781 + ret = aca_bank_info_decode(bank, &info); 782 + if (ret) 783 + return ret; 784 + 785 + misc0 = bank->regs[ACA_REG_IDX_MISC0]; 786 + switch (type) { 787 + case ACA_SMU_TYPE_UE: 788 + bank->aca_err_type = ACA_ERROR_TYPE_UE; 789 + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 790 + 1ULL); 791 + break; 792 + case ACA_SMU_TYPE_CE: 793 + bank->aca_err_type = ACA_ERROR_TYPE_CE; 794 + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 795 + ACA_REG__MISC0__ERRCNT(misc0)); 796 + break; 797 + default: 798 + return -EINVAL; 799 + } 800 + 801 + return ret; 802 + } 803 + 804 + /* reference to smu driver if header file */ 805 + static int jpeg_v5_0_2_err_codes[] = { 806 + 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-9][S|D] */ 807 + 24, 25, 26, 27, 28, 29, 30, 31, 808 + 48, 49, 50, 51, 809 + }; 810 + 811 + static bool jpeg_v5_0_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, 812 + enum aca_smu_type type, void *data) 813 + { 814 + u32 instlo; 815 + 816 + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); 817 + instlo &= GENMASK(31, 1); 818 + 819 + if (instlo != mmSMNAID_AID0_MCA_SMU) 820 + return false; 821 + 822 + if (aca_bank_check_error_codes(handle->adev, bank, 823 + jpeg_v5_0_2_err_codes, 824 + ARRAY_SIZE(jpeg_v5_0_2_err_codes))) 825 + return false; 826 + 827 + return true; 828 + } 829 + 830 + static const struct aca_bank_ops jpeg_v5_0_2_aca_bank_ops = { 831 + .aca_bank_parser = jpeg_v5_0_2_aca_bank_parser, 832 + .aca_bank_is_valid = jpeg_v5_0_2_aca_bank_is_valid, 833 + }; 834 + 835 + static const struct aca_info jpeg_v5_0_2_aca_info = { 836 + .hwip = ACA_HWIP_TYPE_SMU, 837 + .mask = ACA_ERROR_UE_MASK, 838 + .bank_ops = &jpeg_v5_0_2_aca_bank_ops, 839 + }; 840 + #endif
+111
drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.h
··· 1 + /* 2 + * Copyright 2025-2026 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __JPEG_V5_0_2_H__ 25 + #define __JPEG_V5_0_2_H__ 26 + 27 + extern const struct amdgpu_ip_block_version jpeg_v5_0_2_ip_block; 28 + 29 + #define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094 30 + #define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe 31 + 32 + #define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640 33 + #define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1 34 + #define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649 35 + #define regUVD_JRBC0_UVD_JRBC_STATUS_BASE_IDX 1 36 + #define regUVD_JRBC0_UVD_JRBC_RB_RPTR 0x064a 37 + #define regUVD_JRBC0_UVD_JRBC_RB_RPTR_BASE_IDX 1 38 + #define regUVD_JRBC1_UVD_JRBC_RB_WPTR 0x0000 39 + #define regUVD_JRBC1_UVD_JRBC_RB_WPTR_BASE_IDX 0 40 + #define regUVD_JRBC1_UVD_JRBC_STATUS 0x0009 41 + #define regUVD_JRBC1_UVD_JRBC_STATUS_BASE_IDX 0 42 + #define regUVD_JRBC1_UVD_JRBC_RB_RPTR 0x000a 43 + #define regUVD_JRBC1_UVD_JRBC_RB_RPTR_BASE_IDX 0 44 + #define regUVD_JRBC2_UVD_JRBC_RB_WPTR 0x0040 45 + #define regUVD_JRBC2_UVD_JRBC_RB_WPTR_BASE_IDX 0 46 + #define regUVD_JRBC2_UVD_JRBC_STATUS 0x0049 47 + #define regUVD_JRBC2_UVD_JRBC_STATUS_BASE_IDX 0 48 + #define regUVD_JRBC2_UVD_JRBC_RB_RPTR 0x004a 49 + #define regUVD_JRBC2_UVD_JRBC_RB_RPTR_BASE_IDX 0 50 + #define regUVD_JRBC3_UVD_JRBC_RB_WPTR 0x0080 51 + #define regUVD_JRBC3_UVD_JRBC_RB_WPTR_BASE_IDX 0 52 + #define regUVD_JRBC3_UVD_JRBC_STATUS 0x0089 53 + #define regUVD_JRBC3_UVD_JRBC_STATUS_BASE_IDX 0 54 + #define regUVD_JRBC3_UVD_JRBC_RB_RPTR 0x008a 55 + #define regUVD_JRBC3_UVD_JRBC_RB_RPTR_BASE_IDX 0 56 + #define regUVD_JRBC4_UVD_JRBC_RB_WPTR 0x00c0 57 + #define regUVD_JRBC4_UVD_JRBC_RB_WPTR_BASE_IDX 0 58 + #define regUVD_JRBC4_UVD_JRBC_STATUS 0x00c9 59 + #define regUVD_JRBC4_UVD_JRBC_STATUS_BASE_IDX 0 60 + #define regUVD_JRBC4_UVD_JRBC_RB_RPTR 0x00ca 61 + #define regUVD_JRBC4_UVD_JRBC_RB_RPTR_BASE_IDX 0 62 + #define regUVD_JRBC5_UVD_JRBC_RB_WPTR 0x0100 63 + #define regUVD_JRBC5_UVD_JRBC_RB_WPTR_BASE_IDX 0 64 + #define regUVD_JRBC5_UVD_JRBC_STATUS 0x0109 65 + #define regUVD_JRBC5_UVD_JRBC_STATUS_BASE_IDX 0 66 + #define regUVD_JRBC5_UVD_JRBC_RB_RPTR 0x010a 67 + #define regUVD_JRBC5_UVD_JRBC_RB_RPTR_BASE_IDX 0 68 + #define regUVD_JRBC6_UVD_JRBC_RB_WPTR 0x0140 69 + #define regUVD_JRBC6_UVD_JRBC_RB_WPTR_BASE_IDX 0 70 + #define regUVD_JRBC6_UVD_JRBC_STATUS 0x0149 71 + #define regUVD_JRBC6_UVD_JRBC_STATUS_BASE_IDX 0 72 + #define regUVD_JRBC6_UVD_JRBC_RB_RPTR 0x014a 73 + #define regUVD_JRBC6_UVD_JRBC_RB_RPTR_BASE_IDX 0 74 + #define regUVD_JRBC7_UVD_JRBC_RB_WPTR 0x0180 75 + #define regUVD_JRBC7_UVD_JRBC_RB_WPTR_BASE_IDX 0 76 + #define regUVD_JRBC7_UVD_JRBC_STATUS 0x0189 77 + #define regUVD_JRBC7_UVD_JRBC_STATUS_BASE_IDX 0 78 + #define regUVD_JRBC7_UVD_JRBC_RB_RPTR 0x018a 79 + #define regUVD_JRBC7_UVD_JRBC_RB_RPTR_BASE_IDX 0 80 + #define regUVD_JRBC8_UVD_JRBC_RB_WPTR 0x01c0 81 + #define regUVD_JRBC8_UVD_JRBC_RB_WPTR_BASE_IDX 0 82 + #define regUVD_JRBC8_UVD_JRBC_STATUS 0x01c9 83 + #define regUVD_JRBC8_UVD_JRBC_STATUS_BASE_IDX 0 84 + #define regUVD_JRBC8_UVD_JRBC_RB_RPTR 0x01ca 85 + #define regUVD_JRBC8_UVD_JRBC_RB_RPTR_BASE_IDX 0 86 + #define regUVD_JRBC9_UVD_JRBC_RB_WPTR 0x0440 87 + #define regUVD_JRBC9_UVD_JRBC_RB_WPTR_BASE_IDX 1 88 + #define regUVD_JRBC9_UVD_JRBC_STATUS 0x0449 89 + #define regUVD_JRBC9_UVD_JRBC_STATUS_BASE_IDX 1 90 + #define regUVD_JRBC9_UVD_JRBC_RB_RPTR 0x044a 91 + #define regUVD_JRBC9_UVD_JRBC_RB_RPTR_BASE_IDX 1 92 + #define regUVD_JMI0_JPEG_LMI_DROP 0x0663 93 + #define regUVD_JMI0_JPEG_LMI_DROP_BASE_IDX 1 94 + #define regUVD_JMI0_UVD_JMI_CLIENT_STALL 0x067a 95 + #define regUVD_JMI0_UVD_JMI_CLIENT_STALL_BASE_IDX 1 96 + #define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS 0x067b 97 + #define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS_BASE_IDX 1 98 + #define regJPEG_CORE_RST_CTRL 0x072e 99 + #define regJPEG_CORE_RST_CTRL_BASE_IDX 1 100 + 101 + #define regVCN_RRMT_CNTL 0x0940 102 + #define regVCN_RRMT_CNTL_BASE_IDX 1 103 + 104 + enum amdgpu_jpeg_v5_0_2_sub_block { 105 + AMDGPU_JPEG_V5_0_2_JPEG0 = 0, 106 + AMDGPU_JPEG_V5_0_2_JPEG1, 107 + 108 + AMDGPU_JPEG_V5_0_2_MAX_SUB_BLOCK, 109 + }; 110 + 111 + #endif /* __JPEG_V5_0_2_H__ */
+352 -40
drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
··· 31 31 #include "gc/gc_11_0_0_default.h" 32 32 #include "v12_structs.h" 33 33 #include "mes_v12_api_def.h" 34 + #include "gfx_v12_1_pkt.h" 35 + #include "sdma_v7_1_0_pkt_open.h" 34 36 35 37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 36 38 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); ··· 43 41 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 44 42 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 45 43 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 44 + static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id); 46 45 47 46 #define MES_EOP_SIZE 2048 48 47 ··· 494 491 } 495 492 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) 496 493 { 497 - /* Check xcc reg offset range */ 498 - uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0; 499 - /* Each XCC has two register ranges. 500 - * These are represented in reg_offset[17:16] 501 - */ 502 - return ((reg_offset >> 16) & 0x3) + xcc; 494 + return ((reg_offset >> 16) & 0x7); 503 495 } 504 496 505 497 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, 506 - struct RRMT_OPTION *rrmt_opt) 498 + struct RRMT_OPTION *rrmt_opt, 499 + uint32_t *out_reg) 507 500 { 508 501 uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); 509 502 ··· 508 509 rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? 509 510 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; 510 511 } else { 511 - rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID; 512 + rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID; 513 + if (soc_v1_0_mid1_reg_range(reg)) 514 + rrmt_opt->mid_die_id = 1; 512 515 } 516 + 517 + *out_reg = soc_v1_0_normalize_reg_offset(reg); 513 518 } 514 519 515 520 static int mes_v12_1_misc_op(struct amdgpu_mes *mes, ··· 537 534 switch (input->op) { 538 535 case MES_MISC_OP_READ_REG: 539 536 misc_pkt.opcode = MESAPI_MISC__READ_REG; 540 - misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; 541 537 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; 542 538 mes_v12_1_get_rrmt(input->read_reg.reg_offset, 543 539 GET_INST(GC, input->xcc_id), 544 - &misc_pkt.read_reg.rrmt_opt); 545 - if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { 546 - misc_pkt.read_reg.reg_offset = 547 - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.read_reg.reg_offset); 548 - } 540 + &misc_pkt.read_reg.rrmt_opt, 541 + &misc_pkt.read_reg.reg_offset); 549 542 break; 550 543 case MES_MISC_OP_WRITE_REG: 551 544 misc_pkt.opcode = MESAPI_MISC__WRITE_REG; 552 - misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; 553 545 misc_pkt.write_reg.reg_value = input->write_reg.reg_value; 554 546 mes_v12_1_get_rrmt(input->write_reg.reg_offset, 555 547 GET_INST(GC, input->xcc_id), 556 - &misc_pkt.write_reg.rrmt_opt); 557 - if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { 558 - misc_pkt.write_reg.reg_offset = 559 - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.write_reg.reg_offset); 560 - } 548 + &misc_pkt.write_reg.rrmt_opt, 549 + &misc_pkt.write_reg.reg_offset); 561 550 break; 562 551 case MES_MISC_OP_WRM_REG_WAIT: 563 552 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 564 553 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; 565 554 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 566 555 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 567 - misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 568 556 misc_pkt.wait_reg_mem.reg_offset2 = 0; 569 557 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 570 558 GET_INST(GC, input->xcc_id), 571 - &misc_pkt.wait_reg_mem.rrmt_opt1); 572 - if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { 573 - misc_pkt.wait_reg_mem.reg_offset1 = 574 - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1); 575 - } 559 + &misc_pkt.wait_reg_mem.rrmt_opt1, 560 + &misc_pkt.wait_reg_mem.reg_offset1); 576 561 break; 577 562 case MES_MISC_OP_WRM_REG_WR_WAIT: 578 563 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 579 564 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; 580 565 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 581 566 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 582 - misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 583 - misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; 584 567 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 585 568 GET_INST(GC, input->xcc_id), 586 - &misc_pkt.wait_reg_mem.rrmt_opt1); 569 + &misc_pkt.wait_reg_mem.rrmt_opt1, 570 + &misc_pkt.wait_reg_mem.reg_offset1); 587 571 mes_v12_1_get_rrmt(input->wrm_reg.reg1, 588 572 GET_INST(GC, input->xcc_id), 589 - &misc_pkt.wait_reg_mem.rrmt_opt2); 590 - 591 - if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { 592 - misc_pkt.wait_reg_mem.reg_offset1 = 593 - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1); 594 - } 595 - if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) { 596 - misc_pkt.wait_reg_mem.reg_offset2 = 597 - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset2); 598 - } 573 + &misc_pkt.wait_reg_mem.rrmt_opt2, 574 + &misc_pkt.wait_reg_mem.reg_offset2); 599 575 break; 600 576 case MES_MISC_OP_SET_SHADER_DEBUGGER: 601 577 pipe = AMDGPU_MES_SCHED_PIPE; ··· 1931 1949 return 0; 1932 1950 } 1933 1951 1952 + static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block) 1953 + { 1954 + struct amdgpu_device *adev = ip_block->adev; 1955 + int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1956 + 1957 + /* TODO: remove it if issue fixed. */ 1958 + if (adev->mes.enable_coop_mode) 1959 + return 0; 1960 + 1961 + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1962 + /* for COOP mode, only test master xcc. */ 1963 + if (adev->mes.enable_coop_mode && 1964 + adev->mes.master_xcc_ids[xcc_id] != xcc_id) 1965 + continue; 1966 + 1967 + mes_v12_1_self_test(adev, xcc_id); 1968 + } 1969 + 1970 + return 0; 1971 + } 1972 + 1934 1973 static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 1935 1974 .name = "mes_v12_1", 1936 1975 .early_init = mes_v12_1_early_init, 1937 - .late_init = NULL, 1976 + .late_init = mes_v12_1_late_init, 1938 1977 .sw_init = mes_v12_1_sw_init, 1939 1978 .sw_fini = mes_v12_1_sw_fini, 1940 1979 .hw_init = mes_v12_1_hw_init, ··· 1971 1968 .rev = 0, 1972 1969 .funcs = &mes_v12_1_ip_funcs, 1973 1970 }; 1971 + 1972 + static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev, 1973 + struct amdgpu_bo **bo, uint64_t *addr, 1974 + void **ptr, int size) 1975 + { 1976 + amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1977 + bo, addr, ptr); 1978 + if (!*bo) { 1979 + dev_err(adev->dev, "failed to allocate test buffer bo\n"); 1980 + return -ENOMEM; 1981 + } 1982 + memset(*ptr, 0, size); 1983 + return 0; 1984 + } 1985 + 1986 + static int mes_v12_1_map_test_bo(struct amdgpu_device *adev, 1987 + struct amdgpu_bo *bo, struct amdgpu_vm *vm, 1988 + struct amdgpu_bo_va **bo_va, u64 va, int size) 1989 + { 1990 + struct amdgpu_sync sync; 1991 + int r; 1992 + 1993 + r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size); 1994 + if (r) 1995 + return r; 1996 + 1997 + amdgpu_sync_create(&sync); 1998 + 1999 + r = amdgpu_vm_bo_update(adev, *bo_va, false); 2000 + if (r) { 2001 + dev_err(adev->dev, "failed to do vm_bo_update on meta data\n"); 2002 + goto error; 2003 + } 2004 + amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL); 2005 + 2006 + r = amdgpu_vm_update_pdes(adev, vm, false); 2007 + if (r) { 2008 + dev_err(adev->dev, "failed to update pdes on meta data\n"); 2009 + goto error; 2010 + } 2011 + amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); 2012 + amdgpu_sync_wait(&sync, false); 2013 + 2014 + error: 2015 + amdgpu_sync_free(&sync); 2016 + return 0; 2017 + } 2018 + 2019 + static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id, 2020 + u32 *queue_ptr, u64 fence_gpu_addr, 2021 + void *fence_cpu_ptr, void *wptr_cpu_addr, 2022 + u64 doorbell_idx, int queue_type) 2023 + { 2024 + volatile uint32_t *cpu_ptr = fence_cpu_ptr; 2025 + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2026 + int sdma_ring_align = 0x10, compute_ring_align = 0x100; 2027 + uint32_t tmp, xcc_offset; 2028 + int r = 0, i, wptr = 0; 2029 + 2030 + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2031 + if (!adev->mes.enable_coop_mode) { 2032 + WREG32_SOC15(GC, GET_INST(GC, xcc_id), 2033 + regSCRATCH_REG0, 0xCAFEDEAD); 2034 + } else { 2035 + for (i = 0; i < num_xcc; i++) { 2036 + if (adev->mes.master_xcc_ids[i] == xcc_id) 2037 + WREG32_SOC15(GC, GET_INST(GC, i), 2038 + regSCRATCH_REG0, 0xCAFEDEAD); 2039 + } 2040 + } 2041 + 2042 + xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 2043 + queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2044 + queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START; 2045 + queue_ptr[wptr++] = 0xDEADBEEF; 2046 + 2047 + for (i = wptr; i < compute_ring_align; i++) 2048 + queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF); 2049 + 2050 + } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2051 + *cpu_ptr = 0xCAFEDEAD; 2052 + 2053 + queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 2054 + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 2055 + queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr); 2056 + queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr); 2057 + queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 2058 + queue_ptr[wptr++] = 0xDEADBEEF; 2059 + 2060 + for (i = wptr; i < sdma_ring_align; i++) 2061 + queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 2062 + 2063 + wptr <<= 2; 2064 + } 2065 + 2066 + atomic64_set((atomic64_t *)wptr_cpu_addr, wptr); 2067 + WDOORBELL64(doorbell_idx, wptr); 2068 + 2069 + for (i = 0; i < adev->usec_timeout; i++) { 2070 + if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2071 + tmp = le32_to_cpu(*cpu_ptr); 2072 + } else { 2073 + if (!adev->mes.enable_coop_mode) { 2074 + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 2075 + regSCRATCH_REG0); 2076 + } else { 2077 + for (i = 0; i < num_xcc; i++) { 2078 + if (xcc_id != adev->mes.master_xcc_ids[i]) 2079 + continue; 2080 + 2081 + tmp = RREG32_SOC15(GC, GET_INST(GC, i), 2082 + regSCRATCH_REG0); 2083 + if (tmp != 0xDEADBEEF) 2084 + break; 2085 + } 2086 + } 2087 + } 2088 + 2089 + if (tmp == 0xDEADBEEF) 2090 + break; 2091 + 2092 + if (amdgpu_emu_mode == 1) 2093 + msleep(1); 2094 + else 2095 + udelay(1); 2096 + } 2097 + 2098 + if (i >= adev->usec_timeout) { 2099 + dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id, 2100 + queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2101 + 2102 + while (halt_if_hws_hang) 2103 + schedule(); 2104 + 2105 + r = -ETIMEDOUT; 2106 + } else { 2107 + dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id, 2108 + queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2109 + } 2110 + 2111 + return r; 2112 + } 2113 + 2114 + #define USER_CTX_SIZE (PAGE_SIZE * 2) 2115 + #define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM 2116 + #define RING_OFFSET(addr) ((addr)) 2117 + #define EOP_OFFSET(addr) ((addr) + PAGE_SIZE) 2118 + #define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64)) 2119 + #define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2) 2120 + #define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3) 2121 + 2122 + static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id, 2123 + int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr, 2124 + u64 queue_gpu_addr, void *ctx_ptr, int queue_type) 2125 + { 2126 + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; 2127 + struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type]; 2128 + struct amdgpu_mqd_prop mqd_prop = {0}; 2129 + struct mes_add_queue_input add_queue = {0}; 2130 + struct mes_remove_queue_input remove_queue = {0}; 2131 + struct amdgpu_bo *mqd_bo = NULL; 2132 + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2133 + int i, r, off, mqd_size, mqd_count = 1; 2134 + void *mqd_ptr = NULL; 2135 + u64 mqd_gpu_addr, doorbell_idx; 2136 + 2137 + /* extra one page size padding for mes fw */ 2138 + mqd_size = mqd_mgr->mqd_size + PAGE_SIZE; 2139 + 2140 + if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2141 + doorbell_idx = adev->mes.db_start_dw_offset + \ 2142 + adev->doorbell_index.sdma_engine[0]; 2143 + } else { 2144 + doorbell_idx = adev->mes.db_start_dw_offset + \ 2145 + adev->doorbell_index.userqueue_start; 2146 + } 2147 + 2148 + if (adev->mes.enable_coop_mode && 2149 + queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2150 + for (i = 0, mqd_count = 0; i < num_xcc; i++) { 2151 + if (adev->mes.master_xcc_ids[i] == xcc_id) 2152 + mqd_count++; 2153 + } 2154 + mqd_size *= mqd_count; 2155 + } 2156 + 2157 + r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr, 2158 + &mqd_ptr, mqd_size * mqd_count); 2159 + if (r < 0) 2160 + return r; 2161 + 2162 + mqd_prop.mqd_gpu_addr = mqd_gpu_addr; 2163 + mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA); 2164 + mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA); 2165 + mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA); 2166 + mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA); 2167 + mqd_prop.doorbell_index = doorbell_idx; 2168 + mqd_prop.queue_size = PAGE_SIZE; 2169 + mqd_prop.mqd_stride_size = mqd_size; 2170 + mqd_prop.use_doorbell = true; 2171 + mqd_prop.hqd_active = false; 2172 + 2173 + mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop); 2174 + if (mqd_count > 1) { 2175 + for (i = 1; i < mqd_count; i++) { 2176 + off = mqd_size * i; 2177 + mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off; 2178 + mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off, 2179 + &mqd_prop); 2180 + } 2181 + } 2182 + 2183 + add_queue.xcc_id = xcc_id; 2184 + add_queue.process_id = pasid; 2185 + add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset + 2186 + amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start; 2187 + add_queue.process_va_start = 0; 2188 + add_queue.process_va_end = adev->vm_manager.max_pfn - 1; 2189 + add_queue.process_context_addr = meta_gpu_addr; 2190 + add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE; 2191 + add_queue.doorbell_offset = doorbell_idx; 2192 + add_queue.mqd_addr = mqd_gpu_addr; 2193 + add_queue.wptr_addr = mqd_prop.wptr_gpu_addr; 2194 + add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr); 2195 + add_queue.queue_type = queue_type; 2196 + add_queue.vm_cntx_cntl = hub->vm_cntx_cntl; 2197 + 2198 + r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue); 2199 + if (r) 2200 + goto error; 2201 + 2202 + mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr), 2203 + FENCE_OFFSET(USER_CTX_VA), 2204 + FENCE_OFFSET((char *)ctx_ptr), 2205 + WPTR_OFFSET((char *)ctx_ptr), 2206 + doorbell_idx, queue_type); 2207 + 2208 + remove_queue.xcc_id = xcc_id; 2209 + remove_queue.doorbell_offset = doorbell_idx; 2210 + remove_queue.gang_context_addr = add_queue.gang_context_addr; 2211 + r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue); 2212 + 2213 + error: 2214 + amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr); 2215 + return r; 2216 + } 2217 + 2218 + static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id) 2219 + { 2220 + int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, 2221 + /* AMDGPU_RING_TYPE_SDMA */ }; 2222 + struct amdgpu_bo_va *bo_va = NULL; 2223 + struct amdgpu_vm *vm = NULL; 2224 + struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL; 2225 + void *meta_ptr = NULL, *ctx_ptr = NULL; 2226 + u64 meta_gpu_addr, ctx_gpu_addr; 2227 + int size, i, r, pasid;; 2228 + 2229 + pasid = amdgpu_pasid_alloc(16); 2230 + if (pasid < 0) 2231 + pasid = 0; 2232 + 2233 + size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE; 2234 + r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr, 2235 + &meta_ptr, size); 2236 + if (r < 0) 2237 + goto err2; 2238 + 2239 + r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr, 2240 + &ctx_ptr, USER_CTX_SIZE); 2241 + if (r < 0) 2242 + goto err2; 2243 + 2244 + vm = kzalloc(sizeof(*vm), GFP_KERNEL); 2245 + if (!vm) { 2246 + r = -ENOMEM; 2247 + goto err2; 2248 + } 2249 + 2250 + r = amdgpu_vm_init(adev, vm, -1, pasid); 2251 + if (r) 2252 + goto err1; 2253 + 2254 + r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va, 2255 + USER_CTX_VA, USER_CTX_SIZE); 2256 + if (r) 2257 + goto err0; 2258 + 2259 + for (i = 0; i < ARRAY_SIZE(queue_types); i++) { 2260 + memset(ctx_ptr, 0, USER_CTX_SIZE); 2261 + 2262 + r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr, 2263 + ctx_gpu_addr, ctx_ptr, queue_types[i]); 2264 + if (r) 2265 + break; 2266 + } 2267 + 2268 + amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA); 2269 + err0: 2270 + amdgpu_vm_fini(adev, vm); 2271 + err1: 2272 + kfree(vm); 2273 + err2: 2274 + amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr); 2275 + amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr); 2276 + amdgpu_pasid_free(pasid); 2277 + return r; 2278 + } 2279 +
+29 -17
drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
··· 141 141 uint32_t status) 142 142 { 143 143 uint32_t cid, rw; 144 - const char *mmhub_cid = NULL; 144 + const char *mmhub_cid; 145 145 146 146 cid = REG_GET_FIELD(status, 147 147 MMVM_L2_PROTECTION_FAULT_STATUS, CID); ··· 151 151 dev_err(adev->dev, 152 152 "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 153 153 status); 154 - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 155 - case IP_VERSION(2, 0, 0): 156 - case IP_VERSION(2, 0, 2): 157 - mmhub_cid = mmhub_client_ids_navi1x[cid][rw]; 158 - break; 159 - case IP_VERSION(2, 1, 0): 160 - case IP_VERSION(2, 1, 1): 161 - mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw]; 162 - break; 163 - case IP_VERSION(2, 1, 2): 164 - mmhub_cid = mmhub_client_ids_beige_goby[cid][rw]; 165 - break; 166 - default: 167 - mmhub_cid = NULL; 168 - break; 169 - } 154 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 170 155 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 171 156 mmhub_cid ? mmhub_cid : "unknown", cid); 172 157 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", ··· 503 518 .get_invalidate_req = mmhub_v2_0_get_invalidate_req, 504 519 }; 505 520 521 + static void mmhub_v2_0_init_client_info(struct amdgpu_device *adev) 522 + { 523 + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 524 + case IP_VERSION(2, 0, 0): 525 + case IP_VERSION(2, 0, 2): 526 + amdgpu_mmhub_init_client_info(&adev->mmhub, 527 + mmhub_client_ids_navi1x, 528 + ARRAY_SIZE(mmhub_client_ids_navi1x)); 529 + break; 530 + case IP_VERSION(2, 1, 0): 531 + case IP_VERSION(2, 1, 1): 532 + amdgpu_mmhub_init_client_info(&adev->mmhub, 533 + mmhub_client_ids_sienna_cichlid, 534 + ARRAY_SIZE(mmhub_client_ids_sienna_cichlid)); 535 + break; 536 + case IP_VERSION(2, 1, 2): 537 + amdgpu_mmhub_init_client_info(&adev->mmhub, 538 + mmhub_client_ids_beige_goby, 539 + ARRAY_SIZE(mmhub_client_ids_beige_goby)); 540 + break; 541 + default: 542 + break; 543 + } 544 + } 545 + 506 546 static void mmhub_v2_0_init(struct amdgpu_device *adev) 507 547 { 508 548 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; ··· 568 558 MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 569 559 570 560 hub->vmhub_funcs = &mmhub_v2_0_vmhub_funcs; 561 + 562 + mmhub_v2_0_init_client_info(adev); 571 563 } 572 564 573 565 static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+6 -11
drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
··· 80 80 uint32_t status) 81 81 { 82 82 uint32_t cid, rw; 83 - const char *mmhub_cid = NULL; 83 + const char *mmhub_cid; 84 84 85 85 cid = REG_GET_FIELD(status, 86 86 MMVM_L2_PROTECTION_FAULT_STATUS, CID); ··· 90 90 dev_err(adev->dev, 91 91 "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 92 92 status); 93 - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 94 - case IP_VERSION(2, 3, 0): 95 - case IP_VERSION(2, 4, 0): 96 - case IP_VERSION(2, 4, 1): 97 - mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; 98 - break; 99 - default: 100 - mmhub_cid = NULL; 101 - break; 102 - } 93 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 103 94 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 104 95 mmhub_cid ? mmhub_cid : "unknown", cid); 105 96 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", ··· 477 486 MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 478 487 479 488 hub->vmhub_funcs = &mmhub_v2_3_vmhub_funcs; 489 + 490 + amdgpu_mmhub_init_client_info(&adev->mmhub, 491 + mmhub_client_ids_vangogh, 492 + ARRAY_SIZE(mmhub_client_ids_vangogh)); 480 493 } 481 494 482 495 static void
+6 -10
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
··· 97 97 uint32_t status) 98 98 { 99 99 uint32_t cid, rw; 100 - const char *mmhub_cid = NULL; 100 + const char *mmhub_cid; 101 101 102 102 cid = REG_GET_FIELD(status, 103 103 MMVM_L2_PROTECTION_FAULT_STATUS, CID); ··· 107 107 dev_err(adev->dev, 108 108 "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 109 109 status); 110 - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 111 - case IP_VERSION(3, 0, 0): 112 - case IP_VERSION(3, 0, 1): 113 - mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw]; 114 - break; 115 - default: 116 - mmhub_cid = NULL; 117 - break; 118 - } 110 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 119 111 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 120 112 mmhub_cid ? mmhub_cid : "unknown", cid); 121 113 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", ··· 512 520 SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE); 513 521 514 522 hub->vmhub_funcs = &mmhub_v3_0_vmhub_funcs; 523 + 524 + amdgpu_mmhub_init_client_info(&adev->mmhub, 525 + mmhub_client_ids_v3_0_0, 526 + ARRAY_SIZE(mmhub_client_ids_v3_0_0)); 515 527 } 516 528 517 529 static u64 mmhub_v3_0_get_fb_location(struct amdgpu_device *adev)
+6 -11
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
··· 104 104 uint32_t status) 105 105 { 106 106 uint32_t cid, rw; 107 - const char *mmhub_cid = NULL; 107 + const char *mmhub_cid; 108 108 109 109 cid = REG_GET_FIELD(status, 110 110 MMVM_L2_PROTECTION_FAULT_STATUS, CID); ··· 114 114 dev_err(adev->dev, 115 115 "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 116 116 status); 117 - 118 - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 119 - case IP_VERSION(3, 0, 1): 120 - mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw]; 121 - break; 122 - default: 123 - mmhub_cid = NULL; 124 - break; 125 - } 126 - 117 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 127 118 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 128 119 mmhub_cid ? mmhub_cid : "unknown", cid); 129 120 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", ··· 494 503 MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 495 504 496 505 hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs; 506 + 507 + amdgpu_mmhub_init_client_info(&adev->mmhub, 508 + mmhub_client_ids_v3_0_1, 509 + ARRAY_SIZE(mmhub_client_ids_v3_0_1)); 497 510 } 498 511 499 512 static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev)
+6 -3
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
··· 97 97 uint32_t status) 98 98 { 99 99 uint32_t cid, rw; 100 - const char *mmhub_cid = NULL; 100 + const char *mmhub_cid; 101 101 102 102 cid = REG_GET_FIELD(status, 103 103 MMVM_L2_PROTECTION_FAULT_STATUS, CID); ··· 107 107 dev_err(adev->dev, 108 108 "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 109 109 status); 110 - 111 - mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw]; 110 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 112 111 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 113 112 mmhub_cid ? mmhub_cid : "unknown", cid); 114 113 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", ··· 508 509 SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2); 509 510 510 511 hub->vmhub_funcs = &mmhub_v3_0_2_vmhub_funcs; 512 + 513 + amdgpu_mmhub_init_client_info(&adev->mmhub, 514 + mmhub_client_ids_v3_0_2, 515 + ARRAY_SIZE(mmhub_client_ids_v3_0_2)); 511 516 } 512 517 513 518 static u64 mmhub_v3_0_2_get_fb_location(struct amdgpu_device *adev)
+31 -24
drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
··· 217 217 uint32_t status) 218 218 { 219 219 uint32_t cid, rw; 220 - const char *mmhub_cid = NULL; 220 + const char *mmhub_cid; 221 221 222 222 cid = REG_GET_FIELD(status, 223 223 MMVM_L2_PROTECTION_FAULT_STATUS, CID); ··· 227 227 dev_err(adev->dev, 228 228 "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 229 229 status); 230 - 231 - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 232 - case IP_VERSION(3, 3, 0): 233 - case IP_VERSION(3, 3, 2): 234 - mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ? 235 - mmhub_client_ids_v3_3[cid][rw] : 236 - cid == 0x140 ? "UMSCH" : NULL; 237 - break; 238 - case IP_VERSION(3, 3, 1): 239 - mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3_1) ? 240 - mmhub_client_ids_v3_3_1[cid][rw] : 241 - cid == 0x140 ? "UMSCH" : NULL; 242 - break; 243 - case IP_VERSION(3, 4, 0): 244 - mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_4) ? 245 - mmhub_client_ids_v3_4[cid][rw] : 246 - cid == 0x140 ? "UMSCH" : NULL; 247 - break; 248 - default: 249 - mmhub_cid = NULL; 250 - break; 251 - } 252 - 230 + if (cid == 0x140) 231 + mmhub_cid = "UMSCH"; 232 + else 233 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 253 234 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 254 235 mmhub_cid ? mmhub_cid : "unknown", cid); 255 236 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", ··· 621 640 .get_invalidate_req = mmhub_v3_3_get_invalidate_req, 622 641 }; 623 642 643 + static void mmhub_v3_3_init_client_info(struct amdgpu_device *adev) 644 + { 645 + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 646 + case IP_VERSION(3, 3, 0): 647 + case IP_VERSION(3, 3, 2): 648 + amdgpu_mmhub_init_client_info(&adev->mmhub, 649 + mmhub_client_ids_v3_3, 650 + ARRAY_SIZE(mmhub_client_ids_v3_3)); 651 + break; 652 + case IP_VERSION(3, 3, 1): 653 + amdgpu_mmhub_init_client_info(&adev->mmhub, 654 + mmhub_client_ids_v3_3_1, 655 + ARRAY_SIZE(mmhub_client_ids_v3_3_1)); 656 + break; 657 + case IP_VERSION(3, 4, 0): 658 + amdgpu_mmhub_init_client_info(&adev->mmhub, 659 + mmhub_client_ids_v3_4, 660 + ARRAY_SIZE(mmhub_client_ids_v3_4)); 661 + break; 662 + default: 663 + break; 664 + } 665 + } 666 + 624 667 static void mmhub_v3_3_init(struct amdgpu_device *adev) 625 668 { 626 669 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; ··· 685 680 MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 686 681 687 682 hub->vmhub_funcs = &mmhub_v3_3_vmhub_funcs; 683 + 684 + mmhub_v3_3_init_client_info(adev); 688 685 } 689 686 690 687 static u64 mmhub_v3_3_get_fb_location(struct amdgpu_device *adev)
+6 -9
drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
··· 90 90 uint32_t status) 91 91 { 92 92 uint32_t cid, rw; 93 - const char *mmhub_cid = NULL; 93 + const char *mmhub_cid; 94 94 95 95 cid = REG_GET_FIELD(status, 96 96 MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID); ··· 100 100 dev_err(adev->dev, 101 101 "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", 102 102 status); 103 - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 104 - case IP_VERSION(4, 1, 0): 105 - mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw]; 106 - break; 107 - default: 108 - mmhub_cid = NULL; 109 - break; 110 - } 103 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 111 104 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 112 105 mmhub_cid ? mmhub_cid : "unknown", cid); 113 106 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", ··· 507 514 SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE); 508 515 509 516 hub->vmhub_funcs = &mmhub_v4_1_0_vmhub_funcs; 517 + 518 + amdgpu_mmhub_init_client_info(&adev->mmhub, 519 + mmhub_client_ids_v4_1_0, 520 + ARRAY_SIZE(mmhub_client_ids_v4_1_0)); 510 521 } 511 522 512 523 static u64 mmhub_v4_1_0_get_fb_location(struct amdgpu_device *adev)
+116 -78
drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
··· 72 72 [23][1] = "VCN1", 73 73 }; 74 74 75 + static int mmhub_v4_2_0_get_xgmi_info(struct amdgpu_device *adev) 76 + { 77 + u32 max_num_physical_nodes; 78 + u32 max_physical_node_id; 79 + u32 xgmi_lfb_cntl; 80 + u32 max_region; 81 + u64 seg_size; 82 + 83 + /* limit this callback to A + A configuration only */ 84 + if (!adev->gmc.xgmi.connected_to_cpu) 85 + return 0; 86 + 87 + xgmi_lfb_cntl = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), 88 + regMMMC_VM_XGMI_LFB_CNTL); 89 + seg_size = REG_GET_FIELD( 90 + RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMMMC_VM_XGMI_LFB_SIZE), 91 + MMMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; 92 + max_region = 93 + REG_GET_FIELD(xgmi_lfb_cntl, MMMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); 94 + 95 + max_num_physical_nodes = 4; 96 + max_physical_node_id = 3; 97 + 98 + adev->gmc.xgmi.num_physical_nodes = max_region + 1; 99 + 100 + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) 101 + return -EINVAL; 102 + 103 + adev->gmc.xgmi.physical_node_id = 104 + REG_GET_FIELD(xgmi_lfb_cntl, MMMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); 105 + 106 + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) 107 + return -EINVAL; 108 + 109 + adev->gmc.xgmi.node_segment_size = seg_size; 110 + 111 + return 0; 112 + } 113 + 75 114 static u64 mmhub_v4_2_0_get_fb_location(struct amdgpu_device *adev) 76 115 { 77 116 u64 base; ··· 170 131 static void mmhub_v4_2_0_mid_init_gart_aperture_regs(struct amdgpu_device *adev, 171 132 uint32_t mid_mask) 172 133 { 173 - uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); 134 + uint64_t pt_base; 174 135 int i; 175 136 176 137 if (adev->gmc.pdb0_bo) ··· 191 152 192 153 WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 193 154 regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, 194 - (u32)(adev->gmc.fb_end >> 12)); 155 + (u32)(adev->gmc.gart_end >> 12)); 195 156 WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 196 157 regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, 197 - (u32)(adev->gmc.fb_end >> 44)); 158 + (u32)(adev->gmc.gart_end >> 44)); 198 159 } else { 199 160 WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 200 161 regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, ··· 229 190 return; 230 191 231 192 for_each_inst(i, mid_mask) { 232 - /* Program the AGP BAR */ 233 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 234 - regMMMC_VM_AGP_BASE_LO32, 0); 235 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 236 - regMMMC_VM_AGP_BASE_HI32, 0); 237 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 238 - regMMMC_VM_AGP_BOT_LO32, 239 - lower_32_bits(adev->gmc.agp_start >> 24)); 240 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 241 - regMMMC_VM_AGP_BOT_HI32, 242 - upper_32_bits(adev->gmc.agp_start >> 24)); 243 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 244 - regMMMC_VM_AGP_TOP_LO32, 245 - lower_32_bits(adev->gmc.agp_end >> 24)); 246 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 247 - regMMMC_VM_AGP_TOP_HI32, 248 - upper_32_bits(adev->gmc.agp_end >> 24)); 193 + if (adev->gmc.pdb0_bo) { 194 + /* Disable agp and system aperture 195 + * when vmid0 page table is enabled */ 196 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 197 + regMMMC_VM_FB_LOCATION_TOP_LO32, 0); 198 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 199 + regMMMC_VM_FB_LOCATION_TOP_HI32, 0); 200 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 201 + regMMMC_VM_FB_LOCATION_BASE_LO32, 202 + 0xFFFFFFFF); 203 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 204 + regMMMC_VM_FB_LOCATION_BASE_HI32, 1); 205 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 206 + regMMMC_VM_AGP_TOP_LO32, 0); 207 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 208 + regMMMC_VM_AGP_TOP_HI32, 0); 209 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 210 + regMMMC_VM_AGP_BOT_LO32, 211 + 0xFFFFFFFF); 212 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 213 + regMMMC_VM_AGP_BOT_HI32, 1); 214 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 215 + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, 216 + 0xFFFFFFFF); 217 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 218 + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, 219 + 0x7F); 220 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 221 + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0); 222 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 223 + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0); 224 + } else { 225 + /* Program the AGP BAR */ 226 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 227 + regMMMC_VM_AGP_BASE_LO32, 0); 228 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 229 + regMMMC_VM_AGP_BASE_HI32, 0); 230 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 231 + regMMMC_VM_AGP_BOT_LO32, 232 + lower_32_bits(adev->gmc.agp_start >> 24)); 233 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 234 + regMMMC_VM_AGP_BOT_HI32, 235 + upper_32_bits(adev->gmc.agp_start >> 24)); 236 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 237 + regMMMC_VM_AGP_TOP_LO32, 238 + lower_32_bits(adev->gmc.agp_end >> 24)); 239 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 240 + regMMMC_VM_AGP_TOP_HI32, 241 + upper_32_bits(adev->gmc.agp_end >> 24)); 249 242 250 - /* Program the system aperture low logical page number. */ 251 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 252 - regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, 253 - lower_32_bits(min(adev->gmc.fb_start, 254 - adev->gmc.agp_start) >> 18)); 255 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 256 - regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, 257 - upper_32_bits(min(adev->gmc.fb_start, 258 - adev->gmc.agp_start) >> 18)); 259 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 260 - regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 261 - lower_32_bits(max(adev->gmc.fb_end, 262 - adev->gmc.agp_end) >> 18)); 263 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 264 - regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 265 - upper_32_bits(max(adev->gmc.fb_end, 266 - adev->gmc.agp_end) >> 18)); 243 + /* Program the system aperture low logical page number. */ 244 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 245 + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, 246 + lower_32_bits(min(adev->gmc.fb_start, 247 + adev->gmc.agp_start) >> 18)); 248 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 249 + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, 250 + upper_32_bits(min(adev->gmc.fb_start, 251 + adev->gmc.agp_start) >> 18)); 252 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 253 + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 254 + lower_32_bits(max(adev->gmc.fb_end, 255 + adev->gmc.agp_end) >> 18)); 256 + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 257 + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 258 + upper_32_bits(max(adev->gmc.fb_end, 259 + adev->gmc.agp_end) >> 18)); 260 + } 267 261 268 262 /* Set default page address. */ 269 263 value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); ··· 323 251 ENABLE_RETRY_FAULT_INTERRUPT, 0x1); 324 252 WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 325 253 regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); 326 - } 327 - 328 - /* In the case squeezing vram into GART aperture, we don't use 329 - * FB aperture and AGP aperture. Disable them. 330 - */ 331 - if (adev->gmc.pdb0_bo) { 332 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 333 - regMMMC_VM_FB_LOCATION_TOP_LO32, 0); 334 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 335 - regMMMC_VM_FB_LOCATION_TOP_HI32, 0); 336 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 337 - regMMMC_VM_FB_LOCATION_BASE_LO32, 0xFFFFFFFF); 338 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 339 - regMMMC_VM_FB_LOCATION_BASE_HI32, 1); 340 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 341 - regMMMC_VM_AGP_TOP_LO32, 0); 342 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 343 - regMMMC_VM_AGP_TOP_HI32, 0); 344 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 345 - regMMMC_VM_AGP_BOT_LO32, 0xFFFFFFFF); 346 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 347 - regMMMC_VM_AGP_BOT_HI32, 1); 348 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 349 - regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, 350 - 0xFFFFFFFF); 351 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 352 - regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, 353 - 0x7F); 354 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 355 - regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0); 356 - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), 357 - regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0); 358 254 } 359 255 } 360 256 ··· 716 676 uint32_t status) 717 677 { 718 678 uint32_t cid, rw; 719 - const char *mmhub_cid = NULL; 679 + const char *mmhub_cid; 720 680 721 681 cid = REG_GET_FIELD(status, 722 682 MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID); ··· 726 686 dev_err(adev->dev, 727 687 "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", 728 688 status); 729 - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { 730 - case IP_VERSION(4, 2, 0): 731 - mmhub_cid = mmhub_client_ids_v4_2_0[cid][rw]; 732 - break; 733 - default: 734 - mmhub_cid = NULL; 735 - break; 736 - } 689 + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); 737 690 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", 738 691 mmhub_cid ? mmhub_cid : "unknown", cid); 739 692 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", ··· 817 784 818 785 mid_mask = adev->aid_mask; 819 786 mmhub_v4_2_0_mid_init(adev, mid_mask); 787 + 788 + amdgpu_mmhub_init_client_info(&adev->mmhub, 789 + mmhub_client_ids_v4_2_0, 790 + ARRAY_SIZE(mmhub_client_ids_v4_2_0)); 820 791 } 821 792 822 793 static void ··· 920 883 .set_fault_enable_default = mmhub_v4_2_0_set_fault_enable_default, 921 884 .set_clockgating = mmhub_v4_2_0_set_clockgating, 922 885 .get_clockgating = mmhub_v4_2_0_get_clockgating, 886 + .get_xgmi_info = mmhub_v4_2_0_get_xgmi_info, 923 887 }; 924 888 925 889 static int mmhub_v4_2_0_xcp_resume(void *handle, uint32_t inst_mask)
+15 -6
drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
··· 1129 1129 /* for physically contiguous pages (vram) */ 1130 1130 u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE); 1131 1131 1132 - if (amdgpu_mtype_local) 1133 - header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3); 1134 - else 1135 - header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2) | 1136 - SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) | 1137 - SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3)); 1132 + /* TODO: 1133 + * When VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is enabled, change below MTYPE 1134 + * to RW for AID A1 and UC for AID A0. NC needs additional GCR flush and need not 1135 + * be supported. Also, honour amdgpu_mtype_local override. RW would additionally 1136 + * require setting SCOPE bits in the header. 1137 + * 1138 + * header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2:RW) | 1139 + * SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) | 1140 + * SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3:SYS_SCOPE)); 1141 + */ 1142 + 1143 + /* VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is 0 which defaults to UC. So, 1144 + * use MTYPE_UC (0x3). For ref. MTYPE_RW=0x2 MTYPE_NC=0x0 1145 + */ 1146 + header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3) | SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1); 1138 1147 1139 1148 ib->ptr[ib->length_dw++] = header; 1140 1149 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+68 -1
drivers/gpu/drm/amd/amdgpu/soc_v1_0.c
··· 41 41 #define NORMALIZE_XCC_REG_OFFSET(offset) \ 42 42 (offset & 0xFFFF) 43 43 44 + #define MID1_REG_RANGE_0_LOW 0x40000 45 + #define MID1_REG_RANGE_0_HIGH 0x80000 46 + #define NORMALIZE_MID_REG_OFFSET(offset) \ 47 + (offset & 0x3FFFF) 48 + 49 + static const struct amdgpu_video_codecs vcn_5_0_2_video_codecs_encode_vcn0 = { 50 + .codec_count = 0, 51 + .codec_array = NULL, 52 + }; 53 + 54 + static const struct amdgpu_video_codec_info vcn_5_0_2_video_codecs_decode_array_vcn0[] = { 55 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 56 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, 57 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, 58 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, 59 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, 60 + }; 61 + 62 + static const struct amdgpu_video_codecs vcn_5_0_2_video_codecs_decode_vcn0 = { 63 + .codec_count = ARRAY_SIZE(vcn_5_0_2_video_codecs_decode_array_vcn0), 64 + .codec_array = vcn_5_0_2_video_codecs_decode_array_vcn0, 65 + }; 66 + 67 + static int soc_v1_0_query_video_codecs(struct amdgpu_device *adev, bool encode, 68 + const struct amdgpu_video_codecs **codecs) 69 + { 70 + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { 71 + case IP_VERSION(5, 0, 2): 72 + if (encode) 73 + *codecs = &vcn_5_0_2_video_codecs_encode_vcn0; 74 + else 75 + *codecs = &vcn_5_0_2_video_codecs_decode_vcn0; 76 + return 0; 77 + default: 78 + return -EINVAL; 79 + } 80 + } 81 + 44 82 /* Initialized doorbells for amdgpu including multimedia 45 83 * KFD can use all the rest in 2M doorbell bar */ 46 84 static void soc_v1_0_doorbell_index_init(struct amdgpu_device *adev) ··· 295 257 .encode_ext_smn_addressing = &soc_v1_0_encode_ext_smn_addressing, 296 258 .reset = soc_v1_0_asic_reset, 297 259 .reset_method = &soc_v1_0_asic_reset_method, 260 + .query_video_codecs = &soc_v1_0_query_video_codecs, 298 261 }; 299 262 300 263 static int soc_v1_0_common_early_init(struct amdgpu_ip_block *ip_block) ··· 322 283 case IP_VERSION(12, 1, 0): 323 284 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | 324 285 AMD_CG_SUPPORT_GFX_CGLS; 325 - adev->pg_flags = 0; 286 + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG; 326 287 adev->external_rev_id = adev->rev_id + 0x50; 327 288 break; 328 289 default: ··· 909 870 else 910 871 return reg; 911 872 } 873 + 874 + bool soc_v1_0_mid1_reg_range(uint32_t reg) 875 + { 876 + uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); 877 + 878 + if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) 879 + return false; 880 + 881 + if ((reg >= MID1_REG_RANGE_0_LOW) && (reg < MID1_REG_RANGE_0_HIGH)) 882 + return true; 883 + else 884 + return false; 885 + } 886 + 887 + uint32_t soc_v1_0_normalize_reg_offset(uint32_t reg) 888 + { 889 + uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); 890 + 891 + if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) 892 + return soc_v1_0_normalize_xcc_reg_offset(reg); 893 + 894 + /* check if the reg offset is inside MID1. */ 895 + if (soc_v1_0_mid1_reg_range(reg)) 896 + return NORMALIZE_MID_REG_OFFSET(reg); 897 + 898 + return reg; 899 + } 900 +
+2
drivers/gpu/drm/amd/amdgpu/soc_v1_0.h
··· 31 31 int xcc_id); 32 32 int soc_v1_0_init_soc_config(struct amdgpu_device *adev); 33 33 bool soc_v1_0_normalize_xcc_reg_range(uint32_t reg); 34 + bool soc_v1_0_mid1_reg_range(uint32_t reg); 34 35 uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg); 36 + uint32_t soc_v1_0_normalize_reg_offset(uint32_t reg); 35 37 u64 soc_v1_0_encode_ext_smn_addressing(int ext_id); 36 38 37 39 #endif
+82 -35
drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
··· 183 183 if (adev->gmc.gmc_funcs->query_mem_partition_mode) 184 184 nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); 185 185 186 - /* default setting */ 187 - flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT; 188 - flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT; 189 - flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT; 190 - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT; 191 - flip_bits->flip_row_bit = 13; 192 - flip_bits->bit_num = 4; 193 - flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT; 186 + if (adev->gmc.num_umc == 16) { 187 + /* default setting */ 188 + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT; 189 + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT; 190 + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT; 191 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT; 192 + flip_bits->flip_row_bit = 13; 193 + flip_bits->bit_num = 4; 194 + flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT; 194 195 195 - if (nps == AMDGPU_NPS2_PARTITION_MODE) { 196 + if (nps == AMDGPU_NPS2_PARTITION_MODE) { 197 + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT; 198 + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT; 199 + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT; 200 + flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT; 201 + } else if (nps == AMDGPU_NPS4_PARTITION_MODE) { 202 + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; 203 + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; 204 + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; 205 + flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; 206 + } 207 + 208 + switch (vram_type) { 209 + case AMDGPU_VRAM_TYPE_HBM: 210 + /* other nps modes are taken as nps1 */ 211 + if (nps == AMDGPU_NPS2_PARTITION_MODE) 212 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; 213 + else if (nps == AMDGPU_NPS4_PARTITION_MODE) 214 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; 215 + 216 + break; 217 + case AMDGPU_VRAM_TYPE_HBM3E: 218 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; 219 + flip_bits->flip_row_bit = 12; 220 + 221 + if (nps == AMDGPU_NPS2_PARTITION_MODE) 222 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; 223 + else if (nps == AMDGPU_NPS4_PARTITION_MODE) 224 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; 225 + 226 + break; 227 + default: 228 + dev_warn(adev->dev, 229 + "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); 230 + break; 231 + } 232 + } else if (adev->gmc.num_umc == 8) { 233 + /* default setting */ 196 234 flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT; 197 235 flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT; 198 236 flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT; 199 - flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT; 200 - } else if (nps == AMDGPU_NPS4_PARTITION_MODE) { 201 - flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; 202 - flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; 203 - flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; 204 - flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; 205 - } 206 - 207 - switch (vram_type) { 208 - case AMDGPU_VRAM_TYPE_HBM: 209 - /* other nps modes are taken as nps1 */ 210 - if (nps == AMDGPU_NPS2_PARTITION_MODE) 211 - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; 212 - else if (nps == AMDGPU_NPS4_PARTITION_MODE) 213 - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; 214 - 215 - break; 216 - case AMDGPU_VRAM_TYPE_HBM3E: 217 - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; 237 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; 218 238 flip_bits->flip_row_bit = 12; 239 + flip_bits->bit_num = 4; 240 + flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT; 219 241 220 - if (nps == AMDGPU_NPS2_PARTITION_MODE) 242 + if (nps == AMDGPU_NPS2_PARTITION_MODE) { 243 + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; 244 + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; 245 + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; 246 + flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; 247 + } 248 + 249 + switch (vram_type) { 250 + case AMDGPU_VRAM_TYPE_HBM: 251 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; 252 + 253 + /* other nps modes are taken as nps1 */ 254 + if (nps == AMDGPU_NPS2_PARTITION_MODE) 255 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; 256 + 257 + break; 258 + case AMDGPU_VRAM_TYPE_HBM3E: 221 259 flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; 222 - else if (nps == AMDGPU_NPS4_PARTITION_MODE) 223 - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; 260 + flip_bits->flip_row_bit = 12; 224 261 225 - break; 226 - default: 262 + if (nps == AMDGPU_NPS2_PARTITION_MODE) 263 + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; 264 + 265 + break; 266 + default: 267 + dev_warn(adev->dev, 268 + "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); 269 + break; 270 + } 271 + } else { 227 272 dev_warn(adev->dev, 228 - "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); 229 - break; 273 + "Unsupported UMC number(%d), failed to set RAS flip bits.\n", 274 + adev->gmc.num_umc); 275 + 276 + return; 230 277 } 231 278 232 279 adev->umc.retire_unit = 0x1 << flip_bits->bit_num;
+1219
drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c
··· 1 + /* 2 + * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #include <linux/firmware.h> 25 + #include "amdgpu.h" 26 + #include "amdgpu_vcn.h" 27 + #include "amdgpu_pm.h" 28 + #include "soc15.h" 29 + #include "soc15d.h" 30 + #include "soc15_hw_ip.h" 31 + #include "vcn_v2_0.h" 32 + #include "vcn_v4_0_3.h" 33 + 34 + #include "vcn/vcn_5_0_0_offset.h" 35 + #include "vcn/vcn_5_0_0_sh_mask.h" 36 + #include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" 37 + #include "vcn_v5_0_0.h" 38 + #include "vcn_v5_0_1.h" 39 + #include "vcn_v5_0_2.h" 40 + 41 + #include <drm/drm_drv.h> 42 + 43 + static void vcn_v5_0_2_set_unified_ring_funcs(struct amdgpu_device *adev); 44 + static void vcn_v5_0_2_set_irq_funcs(struct amdgpu_device *adev); 45 + static int vcn_v5_0_2_set_pg_state(struct amdgpu_vcn_inst *vinst, 46 + enum amd_powergating_state state); 47 + static void vcn_v5_0_2_unified_ring_set_wptr(struct amdgpu_ring *ring); 48 + 49 + /** 50 + * vcn_v5_0_2_early_init - set function pointers and load microcode 51 + * 52 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 53 + * 54 + * Set ring and irq function pointers 55 + * Load microcode from filesystem 56 + */ 57 + static int vcn_v5_0_2_early_init(struct amdgpu_ip_block *ip_block) 58 + { 59 + struct amdgpu_device *adev = ip_block->adev; 60 + int i, r; 61 + 62 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) 63 + /* re-use enc ring as unified ring */ 64 + adev->vcn.inst[i].num_enc_rings = 1; 65 + 66 + vcn_v5_0_2_set_unified_ring_funcs(adev); 67 + vcn_v5_0_2_set_irq_funcs(adev); 68 + 69 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 70 + adev->vcn.inst[i].set_pg_state = vcn_v5_0_2_set_pg_state; 71 + 72 + r = amdgpu_vcn_early_init(adev, i); 73 + if (r) 74 + return r; 75 + } 76 + 77 + return 0; 78 + } 79 + 80 + static void vcn_v5_0_2_fw_shared_init(struct amdgpu_device *adev, int inst_idx) 81 + { 82 + struct amdgpu_vcn5_fw_shared *fw_shared; 83 + 84 + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; 85 + 86 + if (fw_shared->sq.is_enabled) 87 + return; 88 + fw_shared->present_flag_0 = 89 + cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); 90 + fw_shared->sq.is_enabled = 1; 91 + 92 + if (amdgpu_vcnfw_log) 93 + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); 94 + } 95 + 96 + /** 97 + * vcn_v5_0_2_sw_init - sw init for VCN block 98 + * 99 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 100 + * 101 + * Load firmware and sw initialization 102 + */ 103 + static int vcn_v5_0_2_sw_init(struct amdgpu_ip_block *ip_block) 104 + { 105 + struct amdgpu_device *adev = ip_block->adev; 106 + struct amdgpu_ring *ring; 107 + int i, r, vcn_inst; 108 + 109 + /* VCN UNIFIED TRAP */ 110 + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_VCN, 111 + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); 112 + if (r) 113 + return r; 114 + 115 + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 116 + vcn_inst = GET_INST(VCN, i); 117 + 118 + r = amdgpu_vcn_sw_init(adev, i); 119 + if (r) 120 + return r; 121 + 122 + amdgpu_vcn_setup_ucode(adev, i); 123 + 124 + r = amdgpu_vcn_resume(adev, i); 125 + if (r) 126 + return r; 127 + 128 + ring = &adev->vcn.inst[i].ring_enc[0]; 129 + ring->use_doorbell = true; 130 + 131 + ring->doorbell_index = 132 + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 32 * vcn_inst; 133 + 134 + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); 135 + sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); 136 + 137 + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, 138 + AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); 139 + if (r) 140 + return r; 141 + 142 + vcn_v5_0_2_fw_shared_init(adev, i); 143 + } 144 + 145 + /* TODO: Add queue reset mask when FW fully supports it */ 146 + adev->vcn.supported_reset = 147 + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); 148 + 149 + return amdgpu_vcn_sysfs_reset_mask_init(adev); 150 + } 151 + 152 + /** 153 + * vcn_v5_0_2_sw_fini - sw fini for VCN block 154 + * 155 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 156 + * 157 + * VCN suspend and free up sw allocation 158 + */ 159 + static int vcn_v5_0_2_sw_fini(struct amdgpu_ip_block *ip_block) 160 + { 161 + struct amdgpu_device *adev = ip_block->adev; 162 + int i, r, idx; 163 + 164 + if (drm_dev_enter(adev_to_drm(adev), &idx)) { 165 + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 166 + struct amdgpu_vcn5_fw_shared *fw_shared; 167 + 168 + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 169 + fw_shared->present_flag_0 = 0; 170 + fw_shared->sq.is_enabled = 0; 171 + } 172 + 173 + drm_dev_exit(idx); 174 + } 175 + 176 + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 177 + r = amdgpu_vcn_suspend(adev, i); 178 + if (r) 179 + return r; 180 + } 181 + 182 + for (i = 0; i < adev->vcn.num_vcn_inst; i++) 183 + amdgpu_vcn_sw_fini(adev, i); 184 + 185 + amdgpu_vcn_sysfs_reset_mask_fini(adev); 186 + 187 + kfree(adev->vcn.ip_dump); //TODO check 188 + 189 + return 0; 190 + } 191 + 192 + /** 193 + * vcn_v5_0_2_hw_init - start and test VCN block 194 + * 195 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 196 + * 197 + * Initialize the hardware, boot up the VCPU and do some testing 198 + */ 199 + static int vcn_v5_0_2_hw_init(struct amdgpu_ip_block *ip_block) 200 + { 201 + struct amdgpu_device *adev = ip_block->adev; 202 + struct amdgpu_ring *ring; 203 + int i, r, vcn_inst; 204 + uint32_t tmp; 205 + 206 + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x200) 207 + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); 208 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 209 + vcn_inst = GET_INST(VCN, i); 210 + ring = &adev->vcn.inst[i].ring_enc[0]; 211 + 212 + /* Remove Video Tiles antihang mechanism */ 213 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); 214 + tmp &= (~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 215 + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); 216 + 217 + if (ring->use_doorbell) 218 + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, 219 + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 220 + 11 * vcn_inst), 221 + adev->vcn.inst[i].aid_id); 222 + 223 + /* Re-init fw_shared, if required */ 224 + vcn_v5_0_2_fw_shared_init(adev, i); 225 + 226 + r = amdgpu_ring_test_helper(ring); 227 + if (r) 228 + return r; 229 + } 230 + 231 + return 0; 232 + } 233 + 234 + /** 235 + * vcn_v5_0_2_hw_fini - stop the hardware block 236 + * 237 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 238 + * 239 + * Stop the VCN block, mark ring as not ready any more 240 + */ 241 + static int vcn_v5_0_2_hw_fini(struct amdgpu_ip_block *ip_block) 242 + { 243 + struct amdgpu_device *adev = ip_block->adev; 244 + int i; 245 + 246 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 247 + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; 248 + 249 + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); 250 + if (vinst->cur_state != AMD_PG_STATE_GATE) 251 + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); 252 + } 253 + 254 + return 0; 255 + } 256 + 257 + /** 258 + * vcn_v5_0_2_suspend - suspend VCN block 259 + * 260 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 261 + * 262 + * HW fini and suspend VCN block 263 + */ 264 + static int vcn_v5_0_2_suspend(struct amdgpu_ip_block *ip_block) 265 + { 266 + struct amdgpu_device *adev = ip_block->adev; 267 + int r, i; 268 + 269 + r = vcn_v5_0_2_hw_fini(ip_block); 270 + if (r) 271 + return r; 272 + 273 + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 274 + r = amdgpu_vcn_suspend(ip_block->adev, i); 275 + if (r) 276 + return r; 277 + } 278 + 279 + return r; 280 + } 281 + 282 + /** 283 + * vcn_v5_0_2_resume - resume VCN block 284 + * 285 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 286 + * 287 + * Resume firmware and hw init VCN block 288 + */ 289 + static int vcn_v5_0_2_resume(struct amdgpu_ip_block *ip_block) 290 + { 291 + struct amdgpu_device *adev = ip_block->adev; 292 + int r, i; 293 + 294 + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 295 + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; 296 + 297 + if (amdgpu_in_reset(adev)) 298 + vinst->cur_state = AMD_PG_STATE_GATE; 299 + 300 + r = amdgpu_vcn_resume(ip_block->adev, i); 301 + if (r) 302 + return r; 303 + } 304 + 305 + r = vcn_v5_0_2_hw_init(ip_block); 306 + 307 + return r; 308 + } 309 + 310 + /** 311 + * vcn_v5_0_2_mc_resume - memory controller programming 312 + * 313 + * @vinst: VCN instance 314 + * 315 + * Let the VCN memory controller know it's offsets 316 + */ 317 + static void vcn_v5_0_2_mc_resume(struct amdgpu_vcn_inst *vinst) 318 + { 319 + struct amdgpu_device *adev = vinst->adev; 320 + int inst = vinst->inst; 321 + uint32_t offset, size, vcn_inst; 322 + const struct common_firmware_header *hdr; 323 + 324 + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; 325 + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 326 + 327 + vcn_inst = GET_INST(VCN, inst); 328 + /* cache window 0: fw */ 329 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 330 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 331 + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); 332 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 333 + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); 334 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); 335 + offset = 0; 336 + } else { 337 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 338 + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); 339 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 340 + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); 341 + offset = size; 342 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 343 + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 344 + } 345 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); 346 + 347 + /* cache window 1: stack */ 348 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, 349 + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); 350 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, 351 + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); 352 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); 353 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); 354 + 355 + /* cache window 2: context */ 356 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, 357 + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); 358 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, 359 + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); 360 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); 361 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); 362 + 363 + /* non-cache window */ 364 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, 365 + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); 366 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, 367 + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); 368 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); 369 + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, 370 + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared))); 371 + } 372 + 373 + /** 374 + * vcn_v5_0_2_mc_resume_dpg_mode - memory controller programming for dpg mode 375 + * 376 + * @vinst: VCN instance 377 + * @indirect: indirectly write sram 378 + * 379 + * Let the VCN memory controller know it's offsets with dpg mode 380 + */ 381 + static void vcn_v5_0_2_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, 382 + bool indirect) 383 + { 384 + struct amdgpu_device *adev = vinst->adev; 385 + int inst_idx = vinst->inst; 386 + uint32_t offset, size; 387 + const struct common_firmware_header *hdr; 388 + 389 + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; 390 + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 391 + 392 + /* cache window 0: fw */ 393 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 394 + if (!indirect) { 395 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 396 + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 397 + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + 398 + inst_idx].tmr_mc_addr_lo), 0, indirect); 399 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 400 + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 401 + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + 402 + inst_idx].tmr_mc_addr_hi), 0, indirect); 403 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 404 + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); 405 + } else { 406 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 407 + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); 408 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 409 + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); 410 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 411 + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); 412 + } 413 + offset = 0; 414 + } else { 415 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 416 + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 417 + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); 418 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 419 + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 420 + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); 421 + offset = size; 422 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 423 + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 424 + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); 425 + } 426 + 427 + if (!indirect) 428 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 429 + VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); 430 + else 431 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 432 + VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); 433 + 434 + /* cache window 1: stack */ 435 + if (!indirect) { 436 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 437 + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 438 + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); 439 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 440 + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 441 + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); 442 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 443 + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); 444 + } else { 445 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 446 + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); 447 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 448 + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); 449 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 450 + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); 451 + } 452 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 453 + VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); 454 + 455 + /* cache window 2: context */ 456 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 457 + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), 458 + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + 459 + AMDGPU_VCN_STACK_SIZE), 0, indirect); 460 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 461 + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), 462 + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + 463 + AMDGPU_VCN_STACK_SIZE), 0, indirect); 464 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 465 + VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); 466 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 467 + VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); 468 + 469 + /* non-cache window */ 470 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 471 + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 472 + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); 473 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 474 + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 475 + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); 476 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 477 + VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); 478 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 479 + VCN, 0, regUVD_VCPU_NONCACHE_SIZE0), 480 + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect); 481 + 482 + /* VCN global tiling registers */ 483 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 484 + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); 485 + } 486 + 487 + /** 488 + * vcn_v5_0_2_disable_clock_gating - disable VCN clock gating 489 + * 490 + * @vinst: VCN instance 491 + * 492 + * Disable clock gating for VCN block 493 + */ 494 + static void vcn_v5_0_2_disable_clock_gating(struct amdgpu_vcn_inst *vinst) 495 + { 496 + } 497 + 498 + /** 499 + * vcn_v5_0_2_enable_clock_gating - enable VCN clock gating 500 + * 501 + * @vinst: VCN instance 502 + * 503 + * Enable clock gating for VCN block 504 + */ 505 + static void vcn_v5_0_2_enable_clock_gating(struct amdgpu_vcn_inst *vinst) 506 + { 507 + } 508 + 509 + /** 510 + * vcn_v5_0_2_pause_dpg_mode - VCN pause with dpg mode 511 + * 512 + * @vinst: VCN instance 513 + * @new_state: pause state 514 + * 515 + * Pause dpg mode for VCN block 516 + */ 517 + static int vcn_v5_0_2_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, 518 + struct dpg_pause_state *new_state) 519 + { 520 + struct amdgpu_device *adev = vinst->adev; 521 + uint32_t reg_data = 0; 522 + int vcn_inst; 523 + 524 + vcn_inst = GET_INST(VCN, vinst->inst); 525 + 526 + /* pause/unpause if state is changed */ 527 + if (vinst->pause_state.fw_based != new_state->fw_based) { 528 + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", 529 + vinst->pause_state.fw_based, new_state->fw_based, 530 + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); 531 + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & 532 + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); 533 + 534 + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { 535 + /* pause DPG */ 536 + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; 537 + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); 538 + 539 + /* wait for ACK */ 540 + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, 541 + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, 542 + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); 543 + } else { 544 + /* unpause DPG, no need to wait */ 545 + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; 546 + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); 547 + } 548 + vinst->pause_state.fw_based = new_state->fw_based; 549 + } 550 + 551 + return 0; 552 + } 553 + 554 + /** 555 + * vcn_v5_0_2_start_dpg_mode - VCN start with dpg mode 556 + * 557 + * @vinst: VCN instance 558 + * @indirect: indirectly write sram 559 + * 560 + * Start VCN block with dpg mode 561 + */ 562 + static int vcn_v5_0_2_start_dpg_mode(struct amdgpu_vcn_inst *vinst, 563 + bool indirect) 564 + { 565 + struct amdgpu_device *adev = vinst->adev; 566 + int inst_idx = vinst->inst; 567 + struct amdgpu_vcn5_fw_shared *fw_shared = 568 + adev->vcn.inst[inst_idx].fw_shared.cpu_addr; 569 + struct amdgpu_ring *ring; 570 + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; 571 + int vcn_inst, ret; 572 + uint32_t tmp; 573 + 574 + vcn_inst = GET_INST(VCN, inst_idx); 575 + 576 + /* disable register anti-hang mechanism */ 577 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, 578 + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 579 + 580 + /* enable dynamic power gating mode */ 581 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); 582 + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; 583 + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); 584 + 585 + if (indirect) { 586 + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = 587 + (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; 588 + /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */ 589 + WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF, 590 + adev->vcn.inst[inst_idx].aid_id, 0, true); 591 + } 592 + 593 + /* enable VCPU clock */ 594 + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); 595 + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; 596 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 597 + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); 598 + 599 + /* disable master interrupt */ 600 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 601 + VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect); 602 + 603 + /* setup regUVD_LMI_CTRL */ 604 + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 605 + UVD_LMI_CTRL__REQ_MODE_MASK | 606 + UVD_LMI_CTRL__CRC_RESET_MASK | 607 + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | 608 + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 609 + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | 610 + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 611 + 0x00100000L); 612 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 613 + VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect); 614 + 615 + vcn_v5_0_2_mc_resume_dpg_mode(vinst, indirect); 616 + 617 + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); 618 + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; 619 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 620 + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); 621 + 622 + /* enable LMI MC and UMC channels */ 623 + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; 624 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 625 + VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect); 626 + 627 + /* enable master interrupt */ 628 + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( 629 + VCN, 0, regUVD_MASTINT_EN), 630 + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); 631 + 632 + if (indirect) { 633 + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); 634 + if (ret) { 635 + dev_err(adev->dev, "vcn sram load failed %d\n", ret); 636 + return ret; 637 + } 638 + } 639 + 640 + /* resetting ring, fw should not check RB ring */ 641 + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; 642 + 643 + /* Pause dpg */ 644 + vcn_v5_0_2_pause_dpg_mode(vinst, &state); 645 + 646 + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; 647 + 648 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); 649 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); 650 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t)); 651 + 652 + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); 653 + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); 654 + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); 655 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); 656 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); 657 + 658 + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); 659 + 660 + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); 661 + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; 662 + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); 663 + /* resetting done, fw can check RB ring */ 664 + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); 665 + 666 + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, 667 + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | 668 + VCN_RB1_DB_CTRL__EN_MASK); 669 + /* Read DB_CTRL to flush the write DB_CTRL command. */ 670 + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); 671 + 672 + return 0; 673 + } 674 + 675 + /** 676 + * vcn_v5_0_2_start - VCN start 677 + * 678 + * @vinst: VCN instance 679 + * 680 + * Start VCN block 681 + */ 682 + static int vcn_v5_0_2_start(struct amdgpu_vcn_inst *vinst) 683 + { 684 + struct amdgpu_device *adev = vinst->adev; 685 + int i = vinst->inst; 686 + struct amdgpu_vcn5_fw_shared *fw_shared; 687 + struct amdgpu_ring *ring; 688 + uint32_t tmp; 689 + int j, k, r, vcn_inst; 690 + 691 + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 692 + 693 + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) 694 + return vcn_v5_0_2_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); 695 + 696 + vcn_inst = GET_INST(VCN, i); 697 + 698 + /* set VCN status busy */ 699 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; 700 + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); 701 + 702 + /* enable VCPU clock */ 703 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 704 + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); 705 + 706 + /* disable master interrupt */ 707 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, 708 + ~UVD_MASTINT_EN__VCPU_EN_MASK); 709 + 710 + /* enable LMI MC and UMC channels */ 711 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, 712 + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 713 + 714 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); 715 + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; 716 + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; 717 + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); 718 + 719 + /* setup regUVD_LMI_CTRL */ 720 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); 721 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp | 722 + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 723 + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | 724 + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 725 + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); 726 + 727 + vcn_v5_0_2_mc_resume(vinst); 728 + 729 + /* VCN global tiling registers */ 730 + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, 731 + adev->gfx.config.gb_addr_config); 732 + 733 + /* unblock VCPU register access */ 734 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, 735 + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); 736 + 737 + /* release VCPU reset to boot */ 738 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, 739 + ~UVD_VCPU_CNTL__BLK_RST_MASK); 740 + 741 + for (j = 0; j < 10; ++j) { 742 + uint32_t status; 743 + 744 + for (k = 0; k < 100; ++k) { 745 + status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); 746 + if (status & 2) 747 + break; 748 + mdelay(1000); 749 + if (amdgpu_emu_mode == 1) 750 + msleep(520); 751 + } 752 + 753 + if (amdgpu_emu_mode == 1) { 754 + r = -1; 755 + if (status & 2) { 756 + r = 0; 757 + break; 758 + } 759 + } else { 760 + r = 0; 761 + if (status & 2) 762 + break; 763 + 764 + dev_err(adev->dev, 765 + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); 766 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 767 + UVD_VCPU_CNTL__BLK_RST_MASK, 768 + ~UVD_VCPU_CNTL__BLK_RST_MASK); 769 + mdelay(10); 770 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, 771 + ~UVD_VCPU_CNTL__BLK_RST_MASK); 772 + 773 + mdelay(10); 774 + r = -1; 775 + } 776 + } 777 + 778 + if (r) { 779 + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); 780 + return r; 781 + } 782 + 783 + /* enable master interrupt */ 784 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 785 + UVD_MASTINT_EN__VCPU_EN_MASK, 786 + ~UVD_MASTINT_EN__VCPU_EN_MASK); 787 + 788 + /* clear the busy bit of VCN_STATUS */ 789 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, 790 + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); 791 + 792 + ring = &adev->vcn.inst[i].ring_enc[0]; 793 + 794 + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, 795 + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | 796 + VCN_RB1_DB_CTRL__EN_MASK); 797 + 798 + /* Read DB_CTRL to flush the write DB_CTRL command. */ 799 + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); 800 + 801 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr); 802 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); 803 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4); 804 + 805 + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); 806 + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); 807 + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); 808 + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; 809 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); 810 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); 811 + 812 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); 813 + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); 814 + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); 815 + 816 + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); 817 + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; 818 + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); 819 + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); 820 + 821 + return 0; 822 + } 823 + 824 + /** 825 + * vcn_v5_0_2_stop_dpg_mode - VCN stop with dpg mode 826 + * 827 + * @vinst: VCN instance 828 + * 829 + * Stop VCN block with dpg mode 830 + */ 831 + static void vcn_v5_0_2_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) 832 + { 833 + struct amdgpu_device *adev = vinst->adev; 834 + int inst_idx = vinst->inst; 835 + uint32_t tmp; 836 + int vcn_inst; 837 + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; 838 + 839 + vcn_inst = GET_INST(VCN, inst_idx); 840 + 841 + /* Unpause dpg */ 842 + vcn_v5_0_2_pause_dpg_mode(vinst, &state); 843 + 844 + /* Wait for power status to be 1 */ 845 + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, 846 + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 847 + 848 + /* wait for read ptr to be equal to write ptr */ 849 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); 850 + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); 851 + 852 + /* disable dynamic power gating mode */ 853 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, 854 + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); 855 + } 856 + 857 + /** 858 + * vcn_v5_0_2_stop - VCN stop 859 + * 860 + * @vinst: VCN instance 861 + * 862 + * Stop VCN block 863 + */ 864 + static int vcn_v5_0_2_stop(struct amdgpu_vcn_inst *vinst) 865 + { 866 + struct amdgpu_device *adev = vinst->adev; 867 + int i = vinst->inst; 868 + struct amdgpu_vcn5_fw_shared *fw_shared; 869 + uint32_t tmp; 870 + int r = 0, vcn_inst; 871 + 872 + vcn_inst = GET_INST(VCN, i); 873 + 874 + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 875 + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; 876 + 877 + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 878 + vcn_v5_0_2_stop_dpg_mode(vinst); 879 + return 0; 880 + } 881 + 882 + /* wait for vcn idle */ 883 + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); 884 + if (r) 885 + return r; 886 + 887 + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | 888 + UVD_LMI_STATUS__READ_CLEAN_MASK | 889 + UVD_LMI_STATUS__WRITE_CLEAN_MASK | 890 + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; 891 + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); 892 + if (r) 893 + return r; 894 + 895 + /* disable LMI UMC channel */ 896 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); 897 + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; 898 + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); 899 + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | 900 + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; 901 + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); 902 + if (r) 903 + return r; 904 + 905 + /* block VCPU register access */ 906 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 907 + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, 908 + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); 909 + 910 + /* reset VCPU */ 911 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 912 + UVD_VCPU_CNTL__BLK_RST_MASK, 913 + ~UVD_VCPU_CNTL__BLK_RST_MASK); 914 + 915 + /* disable VCPU clock */ 916 + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, 917 + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); 918 + 919 + /* apply soft reset */ 920 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); 921 + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; 922 + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); 923 + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); 924 + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; 925 + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); 926 + 927 + /* clear status */ 928 + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); 929 + 930 + return 0; 931 + } 932 + 933 + /** 934 + * vcn_v5_0_2_unified_ring_get_rptr - get unified read pointer 935 + * 936 + * @ring: amdgpu_ring pointer 937 + * 938 + * Returns the current hardware unified read pointer 939 + */ 940 + static uint64_t vcn_v5_0_2_unified_ring_get_rptr(struct amdgpu_ring *ring) 941 + { 942 + struct amdgpu_device *adev = ring->adev; 943 + 944 + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) 945 + DRM_ERROR("wrong ring id is identified in %s", __func__); 946 + 947 + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); 948 + } 949 + 950 + /** 951 + * vcn_v5_0_2_unified_ring_get_wptr - get unified write pointer 952 + * 953 + * @ring: amdgpu_ring pointer 954 + * 955 + * Returns the current hardware unified write pointer 956 + */ 957 + static uint64_t vcn_v5_0_2_unified_ring_get_wptr(struct amdgpu_ring *ring) 958 + { 959 + struct amdgpu_device *adev = ring->adev; 960 + 961 + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) 962 + DRM_ERROR("wrong ring id is identified in %s", __func__); 963 + 964 + if (ring->use_doorbell) 965 + return *ring->wptr_cpu_addr; 966 + else 967 + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR); 968 + } 969 + 970 + /** 971 + * vcn_v5_0_2_unified_ring_set_wptr - set enc write pointer 972 + * 973 + * @ring: amdgpu_ring pointer 974 + * 975 + * Commits the enc write pointer to the hardware 976 + */ 977 + static void vcn_v5_0_2_unified_ring_set_wptr(struct amdgpu_ring *ring) 978 + { 979 + struct amdgpu_device *adev = ring->adev; 980 + 981 + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) 982 + DRM_ERROR("wrong ring id is identified in %s", __func__); 983 + 984 + if (ring->use_doorbell) { 985 + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); 986 + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 987 + } else { 988 + WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, 989 + lower_32_bits(ring->wptr)); 990 + } 991 + } 992 + 993 + static const struct amdgpu_ring_funcs vcn_v5_0_2_unified_ring_vm_funcs = { 994 + .type = AMDGPU_RING_TYPE_VCN_ENC, 995 + .align_mask = 0x3f, 996 + .nop = VCN_ENC_CMD_NO_OP, 997 + .get_rptr = vcn_v5_0_2_unified_ring_get_rptr, 998 + .get_wptr = vcn_v5_0_2_unified_ring_get_wptr, 999 + .set_wptr = vcn_v5_0_2_unified_ring_set_wptr, 1000 + .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1001 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1002 + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ 1003 + 5 + 1004 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ 1005 + 1, /* vcn_v2_0_enc_ring_insert_end */ 1006 + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ 1007 + .emit_ib = vcn_v2_0_enc_ring_emit_ib, 1008 + .emit_fence = vcn_v2_0_enc_ring_emit_fence, 1009 + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, 1010 + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, 1011 + .test_ring = amdgpu_vcn_enc_ring_test_ring, 1012 + .test_ib = amdgpu_vcn_unified_ring_test_ib, 1013 + .insert_nop = amdgpu_ring_insert_nop, 1014 + .insert_end = vcn_v2_0_enc_ring_insert_end, 1015 + .pad_ib = amdgpu_ring_generic_pad_ib, 1016 + .begin_use = amdgpu_vcn_ring_begin_use, 1017 + .end_use = amdgpu_vcn_ring_end_use, 1018 + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, 1019 + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, 1020 + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1021 + }; 1022 + 1023 + /** 1024 + * vcn_v5_0_2_set_unified_ring_funcs - set unified ring functions 1025 + * 1026 + * @adev: amdgpu_device pointer 1027 + * 1028 + * Set unified ring functions 1029 + */ 1030 + static void vcn_v5_0_2_set_unified_ring_funcs(struct amdgpu_device *adev) 1031 + { 1032 + int i, vcn_inst; 1033 + 1034 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1035 + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_2_unified_ring_vm_funcs; 1036 + adev->vcn.inst[i].ring_enc[0].me = i; 1037 + vcn_inst = GET_INST(VCN, i); 1038 + adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid; 1039 + } 1040 + } 1041 + 1042 + /** 1043 + * vcn_v5_0_2_is_idle - check VCN block is idle 1044 + * 1045 + * @ip_block: Pointer to the amdgpu_ip_block structure 1046 + * 1047 + * Check whether VCN block is idle 1048 + */ 1049 + static bool vcn_v5_0_2_is_idle(struct amdgpu_ip_block *ip_block) 1050 + { 1051 + struct amdgpu_device *adev = ip_block->adev; 1052 + int i, ret = 1; 1053 + 1054 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) 1055 + ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE); 1056 + 1057 + return ret; 1058 + } 1059 + 1060 + /** 1061 + * vcn_v5_0_2_wait_for_idle - wait for VCN block idle 1062 + * 1063 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 1064 + * 1065 + * Wait for VCN block idle 1066 + */ 1067 + static int vcn_v5_0_2_wait_for_idle(struct amdgpu_ip_block *ip_block) 1068 + { 1069 + struct amdgpu_device *adev = ip_block->adev; 1070 + int i, ret = 0; 1071 + 1072 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1073 + ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE, 1074 + UVD_STATUS__IDLE); 1075 + if (ret) 1076 + return ret; 1077 + } 1078 + 1079 + return ret; 1080 + } 1081 + 1082 + /** 1083 + * vcn_v5_0_2_set_clockgating_state - set VCN block clockgating state 1084 + * 1085 + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 1086 + * @state: clock gating state 1087 + * 1088 + * Set VCN block clockgating state 1089 + */ 1090 + static int vcn_v5_0_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, 1091 + enum amd_clockgating_state state) 1092 + { 1093 + struct amdgpu_device *adev = ip_block->adev; 1094 + bool enable = state == AMD_CG_STATE_GATE; 1095 + int i; 1096 + 1097 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1098 + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; 1099 + 1100 + if (enable) { 1101 + if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE) 1102 + return -EBUSY; 1103 + vcn_v5_0_2_enable_clock_gating(vinst); 1104 + } else { 1105 + vcn_v5_0_2_disable_clock_gating(vinst); 1106 + } 1107 + } 1108 + 1109 + return 0; 1110 + } 1111 + 1112 + static int vcn_v5_0_2_set_pg_state(struct amdgpu_vcn_inst *vinst, 1113 + enum amd_powergating_state state) 1114 + { 1115 + int ret = 0; 1116 + 1117 + if (state == vinst->cur_state) 1118 + return 0; 1119 + 1120 + if (state == AMD_PG_STATE_GATE) 1121 + ret = vcn_v5_0_2_stop(vinst); 1122 + else 1123 + ret = vcn_v5_0_2_start(vinst); 1124 + 1125 + if (!ret) 1126 + vinst->cur_state = state; 1127 + 1128 + return ret; 1129 + } 1130 + 1131 + /** 1132 + * vcn_v5_0_2_process_interrupt - process VCN block interrupt 1133 + * 1134 + * @adev: amdgpu_device pointer 1135 + * @source: interrupt sources 1136 + * @entry: interrupt entry from clients and sources 1137 + * 1138 + * Process VCN block interrupt 1139 + */ 1140 + static int vcn_v5_0_2_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, 1141 + struct amdgpu_iv_entry *entry) 1142 + { 1143 + uint32_t i, inst; 1144 + 1145 + i = node_id_to_phys_map[entry->node_id]; 1146 + 1147 + DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); 1148 + 1149 + for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) 1150 + if (adev->vcn.inst[inst].aid_id == i) 1151 + break; 1152 + 1153 + if (inst >= adev->vcn.num_vcn_inst) { 1154 + dev_WARN_ONCE(adev->dev, 1, 1155 + "Interrupt received for unknown VCN instance %d", 1156 + entry->node_id); 1157 + return 0; 1158 + } 1159 + 1160 + switch (entry->src_id) { 1161 + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: 1162 + amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); 1163 + break; 1164 + default: 1165 + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", 1166 + entry->src_id, entry->src_data[0]); 1167 + break; 1168 + } 1169 + 1170 + return 0; 1171 + } 1172 + 1173 + static const struct amdgpu_irq_src_funcs vcn_v5_0_2_irq_funcs = { 1174 + .process = vcn_v5_0_2_process_interrupt, 1175 + }; 1176 + 1177 + /** 1178 + * vcn_v5_0_2_set_irq_funcs - set VCN block interrupt irq functions 1179 + * 1180 + * @adev: amdgpu_device pointer 1181 + * 1182 + * Set VCN block interrupt irq functions 1183 + */ 1184 + static void vcn_v5_0_2_set_irq_funcs(struct amdgpu_device *adev) 1185 + { 1186 + int i; 1187 + 1188 + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) 1189 + adev->vcn.inst->irq.num_types++; 1190 + adev->vcn.inst->irq.funcs = &vcn_v5_0_2_irq_funcs; 1191 + } 1192 + 1193 + static const struct amd_ip_funcs vcn_v5_0_2_ip_funcs = { 1194 + .name = "vcn_v5_0_2", 1195 + .early_init = vcn_v5_0_2_early_init, 1196 + .late_init = NULL, 1197 + .sw_init = vcn_v5_0_2_sw_init, 1198 + .sw_fini = vcn_v5_0_2_sw_fini, 1199 + .hw_init = vcn_v5_0_2_hw_init, 1200 + .hw_fini = vcn_v5_0_2_hw_fini, 1201 + .suspend = vcn_v5_0_2_suspend, 1202 + .resume = vcn_v5_0_2_resume, 1203 + .is_idle = vcn_v5_0_2_is_idle, 1204 + .wait_for_idle = vcn_v5_0_2_wait_for_idle, 1205 + .check_soft_reset = NULL, 1206 + .pre_soft_reset = NULL, 1207 + .soft_reset = NULL, 1208 + .post_soft_reset = NULL, 1209 + .set_clockgating_state = vcn_v5_0_2_set_clockgating_state, 1210 + .set_powergating_state = vcn_set_powergating_state, 1211 + }; 1212 + 1213 + const struct amdgpu_ip_block_version vcn_v5_0_2_ip_block = { 1214 + .type = AMD_IP_BLOCK_TYPE_VCN, 1215 + .major = 5, 1216 + .minor = 0, 1217 + .rev = 2, 1218 + .funcs = &vcn_v5_0_2_ip_funcs, 1219 + };
+29
drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.h
··· 1 + /* 2 + * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __VCN_v5_0_2_H__ 25 + #define __VCN_v5_0_2_H__ 26 + 27 + extern const struct amdgpu_ip_block_version vcn_v5_0_2_ip_block; 28 + 29 + #endif /* __VCN_v5_0_2_H__ */
+3 -3
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
··· 97 97 return; 98 98 99 99 if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) 100 - kfd_signal_event_interrupt(pasid, context_id, 28); 100 + kfd_signal_event_interrupt(pasid, context_id, 28, true); 101 101 else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) 102 - kfd_signal_event_interrupt(pasid, context_id, 28); 102 + kfd_signal_event_interrupt(pasid, context_id, 28, true); 103 103 else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) 104 - kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); 104 + kfd_signal_event_interrupt(pasid, context_id & 0xff, 8, true); 105 105 else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) 106 106 kfd_signal_hw_exception_event(pasid); 107 107 else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+8 -4
drivers/gpu/drm/amd/amdkfd/kfd_events.c
··· 142 142 * @p: Pointer to struct kfd_process 143 143 * @id: ID to look up 144 144 * @bits: Number of valid bits in @id 145 + * @signal_mailbox_updated: flag indicates if FW updates signal mailbox entry 145 146 * 146 147 * Finds the first signaled event with a matching partial ID. If no 147 148 * matching signaled event is found, returns NULL. In that case the ··· 156 155 * driver. 157 156 */ 158 157 static struct kfd_event *lookup_signaled_event_by_partial_id( 159 - struct kfd_process *p, uint32_t id, uint32_t bits) 158 + struct kfd_process *p, uint32_t id, uint32_t bits, 159 + bool signal_mailbox_updated) 160 160 { 161 161 struct kfd_event *ev; 162 162 ··· 168 166 * and we only need a single lookup. 169 167 */ 170 168 if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { 171 - if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) 169 + if (signal_mailbox_updated && 170 + page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) 172 171 return NULL; 173 172 174 173 return idr_find(&p->event_idr, id); ··· 727 724 } 728 725 729 726 void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, 730 - uint32_t valid_id_bits) 727 + uint32_t valid_id_bits, bool signal_mailbox_updated) 731 728 { 732 729 struct kfd_event *ev = NULL; 733 730 ··· 745 742 746 743 if (valid_id_bits) 747 744 ev = lookup_signaled_event_by_partial_id(p, partial_id, 748 - valid_id_bits); 745 + valid_id_bits, 746 + signal_mailbox_updated); 749 747 if (ev) { 750 748 set_event_from_interrupt(p, ev); 751 749 } else if (p->signal_page) {
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_events.h
··· 85 85 #define KFD_EVENT_TYPE_MEMORY 8 86 86 87 87 extern void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, 88 - uint32_t valid_id_bits); 88 + uint32_t valid_id_bits, 89 + bool signal_mailbox_updated); 89 90 90 91 #endif
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
··· 211 211 client_id == SOC15_IH_CLIENTID_SE2SH || 212 212 client_id == SOC15_IH_CLIENTID_SE3SH) { 213 213 if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) 214 - kfd_signal_event_interrupt(pasid, context_id0, 32); 214 + kfd_signal_event_interrupt(pasid, context_id0, 32, true); 215 215 else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) { 216 216 encoding = REG_GET_FIELD(context_id1, 217 217 SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); ··· 324 324 default: 325 325 break; 326 326 } 327 - kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23); 327 + kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23, true); 328 328 } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && 329 329 KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { 330 330 kfd_set_dbg_ev_from_interrupt(dev, pasid, ··· 344 344 client_id == SOC15_IH_CLIENTID_SDMA6 || 345 345 client_id == SOC15_IH_CLIENTID_SDMA7) { 346 346 if (source_id == SOC15_INTSRC_SDMA_TRAP) { 347 - kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); 347 + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28, true); 348 348 } 349 349 } else if (client_id == SOC15_IH_CLIENTID_VMC || 350 350 client_id == SOC15_IH_CLIENTID_VMC1 ||
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
··· 353 353 354 354 /* CP */ 355 355 if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) 356 - kfd_signal_event_interrupt(pasid, context_id0, 32); 356 + kfd_signal_event_interrupt(pasid, context_id0, 32, true); 357 357 else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && 358 358 KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { 359 359 u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); ··· 366 366 367 367 /* SDMA */ 368 368 else if (source_id == SOC21_INTSRC_SDMA_TRAP) 369 - kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); 369 + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28, true); 370 370 else if (source_id == SOC21_INTSRC_SDMA_ECC) { 371 371 event_interrupt_poison_consumption_v11(dev, pasid, source_id); 372 372 return; ··· 404 404 default: 405 405 break; 406 406 } 407 - kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); 407 + kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24, true); 408 408 } 409 409 410 410 } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+14 -4
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v12_1.c
··· 28 28 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" 29 29 #include "kfd_smi_events.h" 30 30 #include "kfd_debug.h" 31 + #include "amdgpu_ras_mgr.h" 31 32 32 33 /* 33 34 * GFX12.1 SQ Interrupts ··· 186 185 enum amdgpu_ras_block block = 0; 187 186 int ret = -EINVAL; 188 187 uint32_t reset = 0; 188 + u64 event_id = RAS_EVENT_INVALID_ID; 189 189 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL); 190 190 191 191 if (!p) ··· 222 220 * resetting queue passes, do page retirement without gpu reset 223 221 * resetting queue fails, fallback to gpu reset solution 224 222 */ 225 - amdgpu_amdkfd_ras_poison_consumption_handler(node->adev, block, reset); 223 + if (amdgpu_uniras_enabled(node->adev)) 224 + event_id = amdgpu_ras_mgr_gen_ras_event_seqno(node->adev, 225 + RAS_SEQNO_TYPE_POISON_CONSUMPTION); 226 + 227 + RAS_EVENT_LOG(node->adev, event_id, 228 + "poison is consumed by source %d, kick off gpu reset flow\n", source_id); 229 + 230 + amdgpu_amdkfd_ras_pasid_poison_consumption_handler(node->adev, 231 + block, pasid, NULL, NULL, reset); 226 232 } 227 233 228 234 static bool event_interrupt_isr_v12_1(struct kfd_node *node, ··· 336 326 337 327 /* CP */ 338 328 if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) 339 - kfd_signal_event_interrupt(pasid, context_id0, 32); 329 + kfd_signal_event_interrupt(pasid, context_id0, 32, false); 340 330 else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && 341 331 KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { 342 332 u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); ··· 349 339 350 340 /* SDMA */ 351 341 else if (source_id == SOC21_INTSRC_SDMA_TRAP) 352 - kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); 342 + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28, true); 353 343 else if (source_id == SOC21_INTSRC_SDMA_ECC) { 354 344 event_interrupt_poison_consumption_v12_1(node, pasid, source_id); 355 345 return; ··· 387 377 default: 388 378 break; 389 379 } 390 - kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); 380 + kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24, true); 391 381 } 392 382 393 383 } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
··· 379 379 client_id == SOC15_IH_CLIENTID_SE2SH || 380 380 client_id == SOC15_IH_CLIENTID_SE3SH) { 381 381 if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) 382 - kfd_signal_event_interrupt(pasid, context_id0, 32); 382 + kfd_signal_event_interrupt(pasid, context_id0, 32, true); 383 383 else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) { 384 384 sq_int_data = KFD_CONTEXT_ID_GET_SQ_INT_DATA(context_id0, context_id1); 385 385 encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING); ··· 513 513 default: 514 514 break; 515 515 } 516 - kfd_signal_event_interrupt(pasid, sq_int_data, 24); 516 + kfd_signal_event_interrupt(pasid, sq_int_data, 24, true); 517 517 } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && 518 518 KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { 519 519 kfd_set_dbg_ev_from_interrupt(dev, pasid, ··· 530 530 client_id == SOC15_IH_CLIENTID_SDMA6 || 531 531 client_id == SOC15_IH_CLIENTID_SDMA7) { 532 532 if (source_id == SOC15_INTSRC_SDMA_TRAP) { 533 - kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); 533 + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28, true); 534 534 } else if (source_id == SOC15_INTSRC_SDMA_ECC) { 535 535 event_interrupt_poison_consumption_v9(dev, pasid, client_id); 536 536 return;
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 1525 1525 bool all, uint32_t *user_timeout_ms, 1526 1526 uint32_t *wait_result); 1527 1527 void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, 1528 - uint32_t valid_id_bits); 1528 + uint32_t valid_id_bits, bool signal_mailbox_updated); 1529 1529 void kfd_signal_hw_exception_event(u32 pasid); 1530 1530 int kfd_set_event(struct kfd_process *p, uint32_t event_id); 1531 1531 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 590 590 return err; 591 591 592 592 if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo, 593 - p->queue_size)) { 593 + p->queue_size + 594 + pqn->q->properties.metadata_queue_size)) { 594 595 pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", 595 596 p->queue_address, p->queue_size); 596 597 amdgpu_bo_unreserve(vm->root.bo);
+17 -13
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 1219 1219 bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); 1220 1220 bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); 1221 1221 bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT; 1222 - unsigned int mtype_local; 1222 + unsigned int mtype_local, mtype_remote; 1223 + bool is_aid_a1, is_local; 1223 1224 1224 1225 if (domain == SVM_RANGE_VRAM_DOMAIN) 1225 1226 bo_node = prange->svm_bo->node; ··· 1308 1307 mapping_flags |= AMDGPU_VM_MTYPE_NC; 1309 1308 break; 1310 1309 case IP_VERSION(12, 1, 0): 1310 + is_aid_a1 = (node->adev->rev_id & 0x10); 1311 + is_local = (domain == SVM_RANGE_VRAM_DOMAIN) && 1312 + (bo_node->adev == node->adev); 1313 + 1314 + mtype_local = amdgpu_mtype_local == 0 ? AMDGPU_VM_MTYPE_RW : 1315 + amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : 1316 + is_aid_a1 ? AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_NC; 1317 + mtype_remote = is_aid_a1 ? AMDGPU_VM_MTYPE_NC : AMDGPU_VM_MTYPE_UC; 1311 1318 snoop = true; 1312 - if (domain == SVM_RANGE_VRAM_DOMAIN) { 1313 - mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : 1314 - AMDGPU_VM_MTYPE_RW; 1315 - /* local HBM */ 1316 - if (bo_node->adev == node->adev) 1317 - mapping_flags |= mtype_local; 1318 - /* Remote GPU memory */ 1319 - else 1320 - mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : 1321 - AMDGPU_VM_MTYPE_NC; 1322 - /* system memory accessed by the dGPU */ 1319 + 1320 + if (is_local) /* local HBM */ { 1321 + mapping_flags |= mtype_local; 1322 + } else if (ext_coherent) { 1323 + mapping_flags |= AMDGPU_VM_MTYPE_UC; 1323 1324 } else { 1324 - mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; 1325 + /* system memory or remote VRAM */ 1326 + mapping_flags |= mtype_remote; 1325 1327 } 1326 1328 break; 1327 1329 default:
+61 -3
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
··· 2290 2290 &adev->dm.dmub_bo_gpu_addr, 2291 2291 &adev->dm.dmub_bo_cpu_addr); 2292 2292 2293 + if (adev->dm.boot_time_crc_info.bo_ptr) 2294 + amdgpu_bo_free_kernel(&adev->dm.boot_time_crc_info.bo_ptr, 2295 + &adev->dm.boot_time_crc_info.gpu_addr, 2296 + &adev->dm.boot_time_crc_info.cpu_addr); 2297 + 2293 2298 if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) { 2294 2299 for (i = 0; i < adev->dm.dc->caps.max_links; i++) { 2295 2300 if (adev->dm.hpd_rx_offload_wq[i].wq) { ··· 2582 2577 fw_meta_info_params.fw_inst_const = adev->dm.dmub_fw->data + 2583 2578 le32_to_cpu(hdr->header.ucode_array_offset_bytes) + 2584 2579 PSP_HEADER_BYTES_256; 2585 - fw_meta_info_params.fw_bss_data = region_params.bss_data_size ? adev->dm.dmub_fw->data + 2580 + fw_meta_info_params.fw_bss_data = fw_meta_info_params.bss_data_size ? adev->dm.dmub_fw->data + 2586 2581 le32_to_cpu(hdr->header.ucode_array_offset_bytes) + 2587 2582 le32_to_cpu(hdr->inst_const_bytes) : NULL; 2588 2583 fw_meta_info_params.custom_psp_footer_size = 0; ··· 2743 2738 return ret; 2744 2739 } 2745 2740 2741 + static void amdgpu_dm_boot_time_crc_init(struct amdgpu_device *adev) 2742 + { 2743 + struct dm_boot_time_crc_info *bootcrc_info = NULL; 2744 + struct dmub_srv *dmub = NULL; 2745 + union dmub_fw_boot_options option = {0}; 2746 + int ret = 0; 2747 + const uint32_t fb_size = 3 * 1024 * 1024; /* 3MB for DCC pattern */ 2748 + 2749 + if (!adev || !adev->dm.dc || !adev->dm.dc->ctx || 2750 + !adev->dm.dc->ctx->dmub_srv) { 2751 + return; 2752 + } 2753 + 2754 + dmub = adev->dm.dc->ctx->dmub_srv->dmub; 2755 + bootcrc_info = &adev->dm.boot_time_crc_info; 2756 + 2757 + if (!dmub || !dmub->hw_funcs.get_fw_boot_option) { 2758 + drm_dbg(adev_to_drm(adev), "failed to init boot time crc buffer\n"); 2759 + return; 2760 + } 2761 + 2762 + option = dmub->hw_funcs.get_fw_boot_option(dmub); 2763 + 2764 + /* Return if boot time CRC is not enabled */ 2765 + if (option.bits.bootcrc_en_at_S0i3 == 0) 2766 + return; 2767 + 2768 + /* Create a buffer for boot time CRC */ 2769 + ret = amdgpu_bo_create_kernel(adev, fb_size, PAGE_SIZE, 2770 + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, 2771 + &bootcrc_info->bo_ptr, 2772 + &bootcrc_info->gpu_addr, 2773 + &bootcrc_info->cpu_addr); 2774 + 2775 + if (ret) { 2776 + drm_dbg(adev_to_drm(adev), "failed to create boot time crc buffer\n"); 2777 + } else { 2778 + bootcrc_info->size = fb_size; 2779 + 2780 + drm_dbg(adev_to_drm(adev), "boot time crc buffer created addr 0x%llx, size %u\n", 2781 + bootcrc_info->gpu_addr, bootcrc_info->size); 2782 + 2783 + /* Send the buffer info to DMUB */ 2784 + dc_dmub_srv_boot_time_crc_init(adev->dm.dc, 2785 + bootcrc_info->gpu_addr, bootcrc_info->size); 2786 + } 2787 + } 2788 + 2746 2789 static int dm_late_init(struct amdgpu_ip_block *ip_block) 2747 2790 { 2748 2791 struct amdgpu_device *adev = ip_block->adev; ··· 2801 2748 struct dmcu *dmcu = NULL; 2802 2749 2803 2750 dmcu = adev->dm.dc->res_pool->dmcu; 2751 + 2752 + /* Init the boot time CRC (skip in resume) */ 2753 + if ((adev->in_suspend == 0) && 2754 + (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 6, 0))) 2755 + amdgpu_dm_boot_time_crc_init(adev); 2804 2756 2805 2757 for (i = 0; i < 16; i++) 2806 2758 linear_lut[i] = 0xFFFF * i / 15; ··· 13125 13067 u16 min_vfreq; 13126 13068 u16 max_vfreq; 13127 13069 13128 - if (edid == NULL || edid->extensions == 0) 13070 + if (!edid || !edid->extensions) 13129 13071 return; 13130 13072 13131 13073 /* Find DisplayID extension */ ··· 13135 13077 break; 13136 13078 } 13137 13079 13138 - if (edid_ext == NULL) 13080 + if (i == edid->extensions) 13139 13081 return; 13140 13082 13141 13083 while (j < EDID_LENGTH) {
+21
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
··· 123 123 uint64_t gpu_addr; 124 124 }; 125 125 126 + /** 127 + * struct dm_boot_time_crc_info - Buffer info used by boot time CRC 128 + * @cpu_addr: MMIO cpu addr 129 + * @bo_ptr: Pointer to the buffer object 130 + * @gpu_addr: MMIO gpu addr 131 + * @size: Size of the buffer 132 + */ 133 + struct dm_boot_time_crc_info { 134 + void *cpu_addr; 135 + struct amdgpu_bo *bo_ptr; 136 + uint64_t gpu_addr; 137 + uint32_t size; 138 + }; 139 + 126 140 typedef void (*dmub_notify_interrupt_callback_t)(struct amdgpu_device *adev, struct dmub_notification *notify); 127 141 128 142 /** ··· 712 698 struct completion replied; 713 699 char reply_data[0x40]; // Cannot include dmub_cmd here 714 700 } fused_io[8]; 701 + 702 + /** 703 + * @dm_boot_time_crc_info: 704 + * 705 + * Buffer info for the boot time crc. 706 + */ 707 + struct dm_boot_time_crc_info boot_time_crc_info; 715 708 }; 716 709 717 710 enum dsc_clock_force_state {
+3 -3
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
··· 37 37 BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | 38 38 BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | 39 39 BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | 40 - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); 40 + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); 41 41 42 42 const u64 amdgpu_dm_supported_shaper_tfs = 43 43 BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) | 44 44 BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF) | 45 45 BIT(DRM_COLOROP_1D_CURVE_BT2020_OETF) | 46 - BIT(DRM_COLOROP_1D_CURVE_GAMMA22); 46 + BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); 47 47 48 48 const u64 amdgpu_dm_supported_blnd_tfs = 49 49 BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | 50 50 BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | 51 51 BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | 52 - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); 52 + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); 53 53 54 54 #define MAX_COLOR_PIPELINE_OPS 10 55 55
+52
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
··· 3305 3305 return 0; 3306 3306 } 3307 3307 3308 + /* check if kernel disallow eDP enter replay state 3309 + * cat /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_replay 3310 + * 0: allow edp enter replay; 1: disallow 3311 + */ 3312 + static int disallow_edp_enter_replay_get(void *data, u64 *val) 3313 + { 3314 + struct amdgpu_dm_connector *aconnector = data; 3315 + 3316 + *val = (u64) aconnector->disallow_edp_enter_replay; 3317 + return 0; 3318 + } 3319 + 3320 + /* set kernel disallow eDP enter replay state 3321 + * echo 0x0 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_replay 3322 + * 0: allow edp enter replay; 1: disallow 3323 + * 3324 + * usage: test app read crc from PSR eDP rx. 3325 + * 3326 + * during kernel boot up, kernel write dpcd 0x37b to 3327 + * notify eDP rx replay enable. 3328 + * rx fw will start checking crc for rx internal logic. 3329 + * crc read count within dpcd 0x246 is not updated and 3330 + * value is 0. when eDP tx driver wants to read rx crc 3331 + * from dpcd 0x246, 0x270, read count 0 lead tx driver 3332 + * timeout. 3333 + * 3334 + * to avoid this, we add this debugfs to let test app to disbable 3335 + * rx replay. then test app can read non-zero crc read count. 3336 + * 3337 + * expected app sequence is as below: 3338 + * 1. disable eDP PHY and notify eDP rx with dpcd 0x600 = 2. 3339 + * 2. echo 0x1 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_replay 3340 + * 3. enable eDP PHY and notify eDP rx with dpcd 0x600 = 1 but 3341 + * without programming dpcd 0x37b. 3342 + * 4. read crc from rx dpcd 0x270, 0x246, etc. 3343 + * 5. echo 0x0 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_replay. 3344 + * this will let eDP back to normal with replay setup dpcd 0x37b. 3345 + */ 3346 + static int disallow_edp_enter_replay_set(void *data, u64 val) 3347 + { 3348 + struct amdgpu_dm_connector *aconnector = data; 3349 + 3350 + aconnector->disallow_edp_enter_replay = val ? true : false; 3351 + return 0; 3352 + } 3353 + 3308 3354 static int dmub_trace_mask_set(void *data, u64 val) 3309 3355 { 3310 3356 struct amdgpu_device *adev = data; ··· 3478 3432 DEFINE_DEBUGFS_ATTRIBUTE(disallow_edp_enter_psr_fops, 3479 3433 disallow_edp_enter_psr_get, 3480 3434 disallow_edp_enter_psr_set, "%llu\n"); 3435 + 3436 + DEFINE_DEBUGFS_ATTRIBUTE(disallow_edp_enter_replay_fops, 3437 + disallow_edp_enter_replay_get, 3438 + disallow_edp_enter_replay_set, "%llu\n"); 3481 3439 3482 3440 DEFINE_DEBUGFS_ATTRIBUTE(ips_residency_cntl_fops, ips_residency_cntl_get, 3483 3441 ips_residency_cntl_set, "%llu\n"); ··· 3685 3635 &allow_edp_hotplug_detection_fops); 3686 3636 debugfs_create_file("disallow_edp_enter_psr", 0644, dir, connector, 3687 3637 &disallow_edp_enter_psr_fops); 3638 + debugfs_create_file("disallow_edp_enter_replay", 0644, dir, connector, 3639 + &disallow_edp_enter_replay_fops); 3688 3640 } 3689 3641 3690 3642 for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) {
+4 -7
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
··· 62 62 DRM_FORMAT_XBGR8888, 63 63 DRM_FORMAT_ABGR8888, 64 64 DRM_FORMAT_RGB565, 65 - DRM_FORMAT_NV21, 66 - DRM_FORMAT_NV12, 67 - DRM_FORMAT_P010 68 65 }; 69 66 70 67 static const uint32_t overlay_formats[] = { ··· 704 707 uint8_t max_comp_block[] = {2, 1, 0}; 705 708 uint64_t max_comp_block_mod[ARRAY_SIZE(max_comp_block)] = {0}; 706 709 uint8_t i = 0, j = 0; 707 - uint64_t gfx12_modifiers[] = {mod_256k, mod_64k, mod_4k, mod_256b, DRM_FORMAT_MOD_LINEAR}; 710 + /* Note, linear (no DCC) gets added to the modifier list for all chips by the caller. */ 711 + uint64_t gfx12_modifiers[] = {mod_256k, mod_64k, mod_4k, mod_256b}; 708 712 709 713 for (i = 0; i < ARRAY_SIZE(max_comp_block); i++) 710 714 max_comp_block_mod[i] = AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block[i]); 711 715 712 716 /* With DCC: Best choice should be kept first. Hence, add all 256k modifiers of different 713 717 * max compressed blocks first and then move on to the next smaller sized layouts. 714 - * Do not add the linear modifier here, and hence the condition of size-1 for the loop 715 718 */ 716 - for (j = 0; j < ARRAY_SIZE(gfx12_modifiers) - 1; j++) 719 + for (j = 0; j < ARRAY_SIZE(gfx12_modifiers); j++) 717 720 for (i = 0; i < ARRAY_SIZE(max_comp_block); i++) 718 721 amdgpu_dm_plane_add_modifier(mods, size, capacity, 719 722 ver | dcc | max_comp_block_mod[i] | gfx12_modifiers[j]); 720 723 721 - /* Without DCC. Add all modifiers including linear at the end */ 724 + /* Without DCC. */ 722 725 for (i = 0; i < ARRAY_SIZE(gfx12_modifiers); i++) 723 726 amdgpu_dm_plane_add_modifier(mods, size, capacity, gfx12_modifiers[i]); 724 727
+4 -4
drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
··· 244 244 BREAK_TO_DEBUGGER(); 245 245 return NULL; 246 246 } 247 + if (ctx->dce_version == DCN_VERSION_2_01) { 248 + dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); 249 + return &clk_mgr->base; 250 + } 247 251 if (ASICREV_IS_SIENNA_CICHLID_P(asic_id.hw_internal_rev)) { 248 252 dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); 249 253 return &clk_mgr->base; ··· 258 254 } 259 255 if (ASICREV_IS_BEIGE_GOBY_P(asic_id.hw_internal_rev)) { 260 256 dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); 261 - return &clk_mgr->base; 262 - } 263 - if (ctx->dce_version == DCN_VERSION_2_01) { 264 - dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); 265 257 return &clk_mgr->base; 266 258 } 267 259 dcn20_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+10
drivers/gpu/drm/amd/display/dc/core/dc.c
··· 2617 2617 dc->optimized_required = false; 2618 2618 } 2619 2619 2620 + void dc_get_default_tiling_info(const struct dc *dc, struct dc_tiling_info *tiling_info) 2621 + { 2622 + if (!dc || !tiling_info) 2623 + return; 2624 + if (dc->res_pool && dc->res_pool->funcs && dc->res_pool->funcs->get_default_tiling_info) { 2625 + dc->res_pool->funcs->get_default_tiling_info(tiling_info); 2626 + return; 2627 + } 2628 + } 2629 + 2620 2630 bool dc_set_generic_gpio_for_stereo(bool enable, 2621 2631 struct gpio_service *gpio_service) 2622 2632 {
+10 -1
drivers/gpu/drm/amd/display/dc/dc.h
··· 63 63 struct dcn_optc_reg_state; 64 64 struct dcn_dccg_reg_state; 65 65 66 - #define DC_VER "3.2.373" 66 + #define DC_VER "3.2.374" 67 67 68 68 /** 69 69 * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC ··· 1969 1969 1970 1970 void dc_post_update_surfaces_to_stream( 1971 1971 struct dc *dc); 1972 + 1973 + /* 1974 + * dc_get_default_tiling_info() - Retrieve an ASIC-appropriate default tiling 1975 + * description for (typically) linear surfaces. 1976 + * 1977 + * This is used by OS/DM paths that need a valid, fully-initialized tiling 1978 + * description without hardcoding gfx-version specifics in the caller. 1979 + */ 1980 + void dc_get_default_tiling_info(const struct dc *dc, struct dc_tiling_info *tiling_info); 1972 1981 1973 1982 /** 1974 1983 * struct dc_validation_set - Struct to store surface/stream associations for validation
+27
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
··· 2349 2349 return dc->ctx->dmub_srv && dc->ctx->dmub_srv->cursor_offload_enabled; 2350 2350 } 2351 2351 2352 + void dc_dmub_srv_boot_time_crc_init(const struct dc *dc, uint64_t gpu_addr, uint32_t size) 2353 + { 2354 + struct dc_dmub_srv *dc_dmub_srv; 2355 + struct dc_context *dc_ctx; 2356 + union dmub_rb_cmd cmd = {0}; 2357 + bool result = false; 2358 + 2359 + if (!dc || !dc->ctx || !dc->ctx->dmub_srv || size == 0) 2360 + return; 2361 + 2362 + dc_dmub_srv = dc->ctx->dmub_srv; 2363 + dc_ctx = dc_dmub_srv->ctx; 2364 + 2365 + memset(&cmd, 0, sizeof(cmd)); 2366 + cmd.boot_time_crc_init.header.type = DMUB_CMD__BOOT_TIME_CRC; 2367 + cmd.boot_time_crc_init.header.sub_type = DMUB_CMD__BOOT_TIME_CRC_INIT_MEM; 2368 + cmd.boot_time_crc_init.header.payload_bytes = 2369 + sizeof(struct dmub_rb_cmd_boot_time_crc_init); 2370 + cmd.boot_time_crc_init.data.buffer_addr.quad_part = gpu_addr; 2371 + cmd.boot_time_crc_init.data.buffer_size = size; 2372 + 2373 + result = dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); 2374 + 2375 + if (!result) 2376 + DC_ERROR("Boot time crc init failed in DMUB"); 2377 + } 2378 + 2352 2379 void dc_dmub_srv_release_hw(const struct dc *dc) 2353 2380 { 2354 2381 struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+9
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
··· 362 362 bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc); 363 363 364 364 /** 365 + * dc_dmub_srv_boot_time_crc_init() - Initializes DMUB boot time CRC. 366 + * 367 + * @dc - pointer to DC object 368 + * @gpu_addr - address for the boot time CRC buffer 369 + * @size - size of the boot time CRC buffer 370 + */ 371 + void dc_dmub_srv_boot_time_crc_init(const struct dc *dc, uint64_t gpu_addr, uint32_t size); 372 + 373 + /** 365 374 * dc_dmub_srv_release_hw() - Notifies DMUB service that HW access is no longer required. 366 375 * 367 376 * @dc - pointer to DC object
+1 -1
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
··· 5304 5304 double LinesInDETC; 5305 5305 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5306 5306 unsigned int LinesInDETCRoundedDownToSwath; 5307 - double FullDETBufferingTimeY[DC__NUM_DPP__MAX]; 5307 + double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 }; 5308 5308 double FullDETBufferingTimeC; 5309 5309 double ActiveDRAMClockChangeLatencyMarginY; 5310 5310 double ActiveDRAMClockChangeLatencyMarginC;
+3 -3
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
··· 459 459 { 460 460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 461 461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 462 - unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 463 - unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 462 + unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX] = { 0 }; 463 + unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX] = { 0 }; 464 464 unsigned int RoundedUpSwathSizeBytesY; 465 465 unsigned int RoundedUpSwathSizeBytesC; 466 466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; ··· 2988 2988 for (j = 0; j <= 1; ++j) { 2989 2989 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2990 2990 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2991 - double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2991 + double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX] = { 0 }; 2992 2992 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2993 2993 double MinimumTWait = 0.0; 2994 2994 double DPTEBandwidth;
+1
drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h
··· 192 192 unsigned int max_flip_time_us; 193 193 unsigned int max_flip_time_lines; 194 194 unsigned int hostvm_mode; 195 + bool dcn_mrq_present; 195 196 unsigned int subvp_drr_scheduling_margin_us; 196 197 unsigned int subvp_prefetch_end_to_mall_start_us; 197 198 unsigned int subvp_fw_processing_delay;
+1
drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c
··· 187 187 { 188 188 ip_params->max_num_dpp = ip_caps->pipe_count; 189 189 ip_params->max_num_otg = ip_caps->otg_count; 190 + ip_params->max_num_opp = ip_caps->otg_count; 190 191 ip_params->num_dsc = ip_caps->num_dsc; 191 192 ip_params->max_num_dp2p0_streams = ip_caps->max_num_dp2p0_streams; 192 193 ip_params->max_num_dp2p0_outputs = ip_caps->max_num_dp2p0_outputs;
+22
drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
··· 12262 12262 12263 12263 unsigned int pixel_chunk_bytes = 0; 12264 12264 unsigned int min_pixel_chunk_bytes = 0; 12265 + unsigned int meta_chunk_bytes = 0; 12266 + unsigned int min_meta_chunk_bytes = 0; 12265 12267 unsigned int dpte_group_bytes = 0; 12266 12268 unsigned int mpte_group_bytes = 0; 12267 12269 12268 12270 unsigned int p1_pixel_chunk_bytes = 0; 12269 12271 unsigned int p1_min_pixel_chunk_bytes = 0; 12272 + unsigned int p1_meta_chunk_bytes = 0; 12273 + unsigned int p1_min_meta_chunk_bytes = 0; 12270 12274 unsigned int p1_dpte_group_bytes = 0; 12271 12275 unsigned int p1_mpte_group_bytes = 0; 12272 12276 ··· 12291 12287 dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); 12292 12288 mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); 12293 12289 12290 + meta_chunk_bytes = (unsigned int)(mode_lib->ip.meta_chunk_size_kbytes * 1024); 12291 + min_meta_chunk_bytes = (unsigned int)(mode_lib->ip.min_meta_chunk_size_bytes); 12292 + 12294 12293 p1_pixel_chunk_bytes = pixel_chunk_bytes; 12295 12294 p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; 12295 + p1_meta_chunk_bytes = meta_chunk_bytes; 12296 + p1_min_meta_chunk_bytes = min_meta_chunk_bytes; 12296 12297 p1_dpte_group_bytes = dpte_group_bytes; 12297 12298 p1_mpte_group_bytes = mpte_group_bytes; 12298 12299 ··· 12317 12308 rq_regs->rq_regs_c.min_chunk_size = 0; 12318 12309 else 12319 12310 rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1); 12311 + 12312 + rq_regs->rq_regs_l.meta_chunk_size = log_and_substract_if_non_zero(meta_chunk_bytes, 10); 12313 + rq_regs->rq_regs_c.meta_chunk_size = log_and_substract_if_non_zero(p1_meta_chunk_bytes, 10); 12314 + 12315 + if (min_meta_chunk_bytes == 0) 12316 + rq_regs->rq_regs_l.min_meta_chunk_size = 0; 12317 + else 12318 + rq_regs->rq_regs_l.min_meta_chunk_size = log_and_substract_if_non_zero(min_meta_chunk_bytes, 6 - 1); 12319 + 12320 + if (min_meta_chunk_bytes == 0) 12321 + rq_regs->rq_regs_c.min_meta_chunk_size = 0; 12322 + else 12323 + rq_regs->rq_regs_c.min_meta_chunk_size = log_and_substract_if_non_zero(p1_min_meta_chunk_bytes, 6 - 1); 12320 12324 12321 12325 rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6); 12322 12326 rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6);
+4 -3
drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
··· 812 812 int x_pos_viewport = 0; 813 813 int x_hot_viewport = 0; 814 814 uint32_t cur_en = pos->enable ? 1 : 0; 815 - 815 + uint32_t x_hotspot_clamped = pos->x_hotspot; 816 816 hubp->curs_pos = *pos; 817 - 818 817 /* Recout is zero for pipes if the entire dst_rect is contained 819 818 * within preceeding ODM slices. 820 819 */ ··· 844 845 845 846 ASSERT(param->h_scale_ratio.value); 846 847 848 + if (x_hotspot_clamped > 0xFF) 849 + x_hotspot_clamped = 0xFF; 847 850 if (param->h_scale_ratio.value) 848 851 dst_x_offset = dc_fixpt_floor(dc_fixpt_div( 849 852 dc_fixpt_from_int(dst_x_offset), ··· 866 865 CURSOR_Y_POSITION, pos->y); 867 866 868 867 REG_SET_2(CURSOR_HOT_SPOT, 0, 869 - CURSOR_HOT_SPOT_X, pos->x_hotspot, 868 + CURSOR_HOT_SPOT_X, x_hotspot_clamped, 870 869 CURSOR_HOT_SPOT_Y, pos->y_hotspot); 871 870 872 871 REG_SET(CURSOR_DST_OFFSET, 0,
+1
drivers/gpu/drm/amd/display/dc/inc/core_types.h
··· 214 214 unsigned int index); 215 215 216 216 void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); 217 + void (*get_default_tiling_info)(struct dc_tiling_info *tiling_info); 217 218 void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx); 218 219 /* 219 220 * Get indicator of power from a context that went through full validation
+8 -1
drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
··· 1273 1273 .get_dcc_compression_cap = dcn10_get_dcc_compression_cap 1274 1274 }; 1275 1275 1276 + void dcn10_get_default_tiling_info(struct dc_tiling_info *tiling_info) 1277 + { 1278 + tiling_info->gfxversion = DcGfxVersion9; 1279 + tiling_info->gfx9.swizzle = DC_SW_LINEAR; 1280 + } 1281 + 1276 1282 static const struct resource_funcs dcn10_res_pool_funcs = { 1277 1283 .destroy = dcn10_destroy_resource_pool, 1278 1284 .link_enc_create = dcn10_link_encoder_create, ··· 1290 1284 .add_stream_to_ctx = dcn10_add_stream_to_ctx, 1291 1285 .patch_unknown_plane_state = dcn10_patch_unknown_plane_state, 1292 1286 .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, 1293 - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1287 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1288 + .get_default_tiling_info = dcn10_get_default_tiling_info 1294 1289 }; 1295 1290 1296 1291 static uint32_t read_pipe_fuses(struct dc_context *ctx)
+2
drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
··· 53 53 54 54 unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx); 55 55 56 + void dcn10_get_default_tiling_info(struct dc_tiling_info *tiling_info); 57 + 56 58 #endif /* __DC_RESOURCE_DCN10_H__ */ 57 59
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
··· 2232 2232 .set_mcif_arb_params = dcn20_set_mcif_arb_params, 2233 2233 .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, 2234 2234 .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, 2235 - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 2235 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 2236 + .get_default_tiling_info = dcn10_get_default_tiling_info 2236 2237 }; 2237 2238 2238 2239 bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
··· 1081 1081 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1082 1082 .set_mcif_arb_params = dcn20_set_mcif_arb_params, 1083 1083 .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, 1084 - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1084 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1085 + .get_default_tiling_info = dcn10_get_default_tiling_info 1085 1086 }; 1086 1087 1087 1088 static bool dcn201_resource_construct(
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
··· 1378 1378 .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, 1379 1379 .update_bw_bounding_box = dcn21_update_bw_bounding_box, 1380 1380 .get_panel_config_defaults = dcn21_get_panel_config_defaults, 1381 - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1381 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1382 + .get_default_tiling_info = dcn10_get_default_tiling_info 1382 1383 }; 1383 1384 1384 1385 static bool dcn21_resource_construct(
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
··· 2248 2248 .update_bw_bounding_box = dcn30_update_bw_bounding_box, 2249 2249 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 2250 2250 .get_panel_config_defaults = dcn30_get_panel_config_defaults, 2251 - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 2251 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 2252 + .get_default_tiling_info = dcn10_get_default_tiling_info 2252 2253 }; 2253 2254 2254 2255 #define CTX ctx
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
··· 1400 1400 .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, 1401 1401 .update_bw_bounding_box = dcn301_update_bw_bounding_box, 1402 1402 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1403 - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1403 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1404 + .get_default_tiling_info = dcn10_get_default_tiling_info, 1404 1405 }; 1405 1406 1406 1407 static bool dcn301_resource_construct(
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
··· 1155 1155 .update_bw_bounding_box = dcn302_update_bw_bounding_box, 1156 1156 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1157 1157 .get_panel_config_defaults = dcn302_get_panel_config_defaults, 1158 - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1158 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1159 + .get_default_tiling_info = dcn10_get_default_tiling_info 1159 1160 }; 1160 1161 1161 1162 static struct dc_cap_funcs cap_funcs = {
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
··· 1099 1099 .update_bw_bounding_box = dcn303_update_bw_bounding_box, 1100 1100 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 1101 1101 .get_panel_config_defaults = dcn303_get_panel_config_defaults, 1102 - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe 1102 + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1103 + .get_default_tiling_info = dcn10_get_default_tiling_info 1103 1104 }; 1104 1105 1105 1106 static struct dc_cap_funcs cap_funcs = {
+1
drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
··· 1851 1851 .get_det_buffer_size = dcn31_get_det_buffer_size, 1852 1852 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1853 1853 .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, 1854 + .get_default_tiling_info = dcn10_get_default_tiling_info, 1854 1855 .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params 1855 1856 }; 1856 1857
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
··· 1782 1782 .get_det_buffer_size = dcn31_get_det_buffer_size, 1783 1783 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1784 1784 .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, 1785 - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params 1785 + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 1786 + .get_default_tiling_info = dcn10_get_default_tiling_info 1786 1787 }; 1787 1788 1788 1789 static struct clock_source *dcn30_clock_source_create(
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
··· 1846 1846 .get_det_buffer_size = dcn31_get_det_buffer_size, 1847 1847 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1848 1848 .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, 1849 - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params 1849 + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 1850 + .get_default_tiling_info = dcn10_get_default_tiling_info 1850 1851 }; 1851 1852 1852 1853 static bool dcn315_resource_construct(
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
··· 1721 1721 .get_det_buffer_size = dcn31_get_det_buffer_size, 1722 1722 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1723 1723 .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, 1724 - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params 1724 + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 1725 + .get_default_tiling_info = dcn10_get_default_tiling_info 1725 1726 }; 1726 1727 1727 1728 static bool dcn316_resource_construct(
+4
drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
··· 1785 1785 1786 1786 dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); 1787 1787 1788 + DC_FP_START(); 1788 1789 dcn32_override_min_req_memclk(dc, context); 1790 + DC_FP_END(); 1791 + 1789 1792 dcn32_override_min_req_dcfclk(dc, context); 1790 1793 1791 1794 BW_VAL_TRACE_END_WATERMARKS(); ··· 2112 2109 .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, 2113 2110 .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, 2114 2111 .add_phantom_pipes = dcn32_add_phantom_pipes, 2112 + .get_default_tiling_info = dcn10_get_default_tiling_info, 2115 2113 .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 2116 2114 .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, 2117 2115 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+1
drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
··· 1619 1619 .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, 1620 1620 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1621 1621 .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size, 1622 + .get_default_tiling_info = dcn10_get_default_tiling_info, 1622 1623 }; 1623 1624 1624 1625 static uint32_t read_pipe_fuses(struct dc_context *ctx)
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
··· 1802 1802 .get_det_buffer_size = dcn31_get_det_buffer_size, 1803 1803 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1804 1804 .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, 1805 - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params 1805 + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 1806 + .get_default_tiling_info = dcn10_get_default_tiling_info 1806 1807 }; 1807 1808 1808 1809 static bool dcn35_resource_construct(
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
··· 1775 1775 .get_det_buffer_size = dcn31_get_det_buffer_size, 1776 1776 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1777 1777 .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, 1778 - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params 1778 + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 1779 + .get_default_tiling_info = dcn10_get_default_tiling_info 1779 1780 }; 1780 1781 1781 1782 static bool dcn351_resource_construct(
+2 -1
drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
··· 1781 1781 .get_preferred_eng_id_dpia = dcn36_get_preferred_eng_id_dpia, 1782 1782 .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, 1783 1783 .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, 1784 - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params 1784 + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, 1785 + .get_default_tiling_info = dcn10_get_default_tiling_info, 1785 1786 }; 1786 1787 1787 1788 static bool dcn36_resource_construct(
+8 -1
drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
··· 1839 1839 .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, 1840 1840 .get_power_profile = dcn401_get_power_profile, 1841 1841 .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe, 1842 - .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size 1842 + .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size, 1843 + .get_default_tiling_info = dcn401_get_default_tiling_info 1843 1844 }; 1845 + 1846 + void dcn401_get_default_tiling_info(struct dc_tiling_info *tiling_info) 1847 + { 1848 + tiling_info->gfxversion = DcGfxAddr3; 1849 + tiling_info->gfx_addr3.swizzle = DC_ADDR3_SW_LINEAR; 1850 + } 1844 1851 1845 1852 static uint32_t read_pipe_fuses(struct dc_context *ctx) 1846 1853 {
+2
drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
··· 28 28 29 29 void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); 30 30 31 + void dcn401_get_default_tiling_info(struct dc_tiling_info *tiling_info); 32 + 31 33 unsigned int dcn401_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx); 32 34 33 35 /* Following are definitions for run time init of reg offsets */
+4 -1
drivers/gpu/drm/amd/include/atomfirmware.h
··· 1695 1695 uint8_t gpu_package_id; 1696 1696 struct edp_info_table edp1_info; 1697 1697 struct edp_info_table edp2_info; 1698 - uint32_t reserved2[8]; 1698 + uint32_t cpuid; 1699 + uint32_t vram_bit_width; 1700 + uint32_t reserved2[6]; 1699 1701 struct atom_external_display_connection_info extdispconninfo; 1700 1702 uint8_t UMACarveoutVersion; 1701 1703 uint8_t UMACarveoutIndexMax; ··· 1772 1770 Hbm2MemType, ///< Assign 33 to HBM2 1773 1771 Ddr5MemType, ///< Assign 34 to DDR5 1774 1772 LpDdr5MemType, ///< Assign 35 to LPDDR5 1773 + LpDdr5xMemType, ///< Assign 36 to LPDDR5x 1775 1774 }; 1776 1775 1777 1776
+10 -26
drivers/gpu/drm/amd/pm/amdgpu_pm.c
··· 1910 1910 static int pp_od_clk_voltage_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, 1911 1911 uint32_t mask, enum amdgpu_device_attr_states *states) 1912 1912 { 1913 - uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); 1914 - 1915 1913 *states = ATTR_STATE_SUPPORTED; 1916 1914 1917 1915 if (!amdgpu_dpm_is_overdrive_supported(adev)) { ··· 1917 1919 return 0; 1918 1920 } 1919 1921 1920 - /* Enable pp_od_clk_voltage node for gc 9.4.3, 9.4.4, 9.5.0 SRIOV/BM support */ 1921 - if (gc_ver == IP_VERSION(9, 4, 3) || 1922 - gc_ver == IP_VERSION(9, 4, 4) || 1923 - gc_ver == IP_VERSION(9, 5, 0)) { 1922 + /* Enable pp_od_clk_voltage node for gc 9.4.3, 9.4.4, 9.5.0, 12.1.0 SRIOV/BM support */ 1923 + if (amdgpu_is_multi_aid(adev)) { 1924 1924 if (amdgpu_sriov_multi_vf_mode(adev)) 1925 1925 *states = ATTR_STATE_UNSUPPORTED; 1926 1926 return 0; ··· 1996 2000 gc_ver == IP_VERSION(11, 5, 0) || 1997 2001 gc_ver == IP_VERSION(11, 0, 2) || 1998 2002 gc_ver == IP_VERSION(11, 0, 3) || 1999 - gc_ver == IP_VERSION(9, 4, 3) || 2000 - gc_ver == IP_VERSION(9, 4, 4) || 2001 - gc_ver == IP_VERSION(9, 5, 0))) 2003 + amdgpu_is_multi_aid(adev))) 2002 2004 *states = ATTR_STATE_UNSUPPORTED; 2003 2005 } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) { 2004 2006 if (!((gc_ver == IP_VERSION(10, 3, 1) || ··· 2017 2023 gc_ver == IP_VERSION(11, 5, 0) || 2018 2024 gc_ver == IP_VERSION(11, 0, 2) || 2019 2025 gc_ver == IP_VERSION(11, 0, 3) || 2020 - gc_ver == IP_VERSION(9, 4, 3) || 2021 - gc_ver == IP_VERSION(9, 4, 4) || 2022 - gc_ver == IP_VERSION(9, 5, 0))) 2026 + amdgpu_is_multi_aid(adev))) 2023 2027 *states = ATTR_STATE_UNSUPPORTED; 2024 2028 } else if (DEVICE_ATTR_IS(pp_dpm_dclk1)) { 2025 2029 if (!((gc_ver == IP_VERSION(10, 3, 1) || ··· 2027 2035 *states = ATTR_STATE_UNSUPPORTED; 2028 2036 } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { 2029 2037 if (gc_ver == IP_VERSION(9, 4, 2) || 2030 - gc_ver == IP_VERSION(9, 4, 3) || 2031 - gc_ver == IP_VERSION(9, 4, 4) || 2032 - gc_ver == IP_VERSION(9, 5, 0)) 2038 + amdgpu_is_multi_aid(adev)) 2033 2039 *states = ATTR_STATE_UNSUPPORTED; 2034 2040 } 2035 2041 ··· 2641 2651 case IP_VERSION(11, 0, 3): 2642 2652 case IP_VERSION(12, 0, 0): 2643 2653 case IP_VERSION(12, 0, 1): 2654 + case IP_VERSION(12, 1, 0): 2644 2655 *states = ATTR_STATE_SUPPORTED; 2645 2656 break; 2646 2657 default: ··· 3723 3732 3724 3733 /* Skip crit temp on APU */ 3725 3734 if ((((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ)) || 3726 - (gc_ver == IP_VERSION(9, 4, 3) || gc_ver == IP_VERSION(9, 4, 4) || 3727 - gc_ver == IP_VERSION(9, 5, 0))) && 3735 + amdgpu_is_multi_aid(adev)) && 3728 3736 (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || 3729 3737 attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) 3730 3738 return 0; ··· 3805 3815 3806 3816 if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ 3807 3817 adev->family == AMDGPU_FAMILY_KV || /* not implemented yet */ 3808 - (gc_ver == IP_VERSION(9, 4, 3) || 3809 - gc_ver == IP_VERSION(9, 4, 4) || 3810 - gc_ver == IP_VERSION(9, 5, 0))) && 3818 + amdgpu_is_multi_aid(adev)) && 3811 3819 (attr == &sensor_dev_attr_in0_input.dev_attr.attr || 3812 3820 attr == &sensor_dev_attr_in0_label.dev_attr.attr)) 3813 3821 return 0; 3814 3822 3815 3823 /* only APUs other than gc 9,4,3 have vddnb */ 3816 3824 if ((!(adev->flags & AMD_IS_APU) || 3817 - (gc_ver == IP_VERSION(9, 4, 3) || 3818 - gc_ver == IP_VERSION(9, 4, 4) || 3819 - gc_ver == IP_VERSION(9, 5, 0))) && 3825 + amdgpu_is_multi_aid(adev)) && 3820 3826 (attr == &sensor_dev_attr_in1_input.dev_attr.attr || 3821 3827 attr == &sensor_dev_attr_in1_label.dev_attr.attr)) 3822 3828 return 0; ··· 3841 3855 return 0; 3842 3856 3843 3857 /* hotspot temperature for gc 9,4,3*/ 3844 - if (gc_ver == IP_VERSION(9, 4, 3) || 3845 - gc_ver == IP_VERSION(9, 4, 4) || 3846 - gc_ver == IP_VERSION(9, 5, 0)) { 3858 + if (amdgpu_is_multi_aid(adev)) { 3847 3859 if (attr == &sensor_dev_attr_temp1_input.dev_attr.attr || 3848 3860 attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || 3849 3861 attr == &sensor_dev_attr_temp1_label.dev_attr.attr)
+3 -1
drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
··· 3454 3454 if (adev->asic_type == CHIP_HAINAN) { 3455 3455 if ((adev->pdev->revision == 0x81) || 3456 3456 (adev->pdev->revision == 0xC3) || 3457 + (adev->pdev->device == 0x6660) || 3457 3458 (adev->pdev->device == 0x6664) || 3458 3459 (adev->pdev->device == 0x6665) || 3459 - (adev->pdev->device == 0x6667)) { 3460 + (adev->pdev->device == 0x6667) || 3461 + (adev->pdev->device == 0x666F)) { 3460 3462 max_sclk = 75000; 3461 3463 } 3462 3464 if ((adev->pdev->revision == 0xC3) ||
+1
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
··· 389 389 void *metrics_table; 390 390 void *clocks_table; 391 391 void *watermarks_table; 392 + struct mutex metrics_lock; 392 393 393 394 void *max_sustainable_clocks; 394 395 struct smu_bios_boot_up_values boot_values;
-2
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
··· 132 132 133 133 int smu_v13_0_get_vbios_bootup_values(struct smu_context *smu); 134 134 135 - int smu_v13_0_check_fw_version(struct smu_context *smu); 136 - 137 135 int smu_v13_0_set_driver_table_location(struct smu_context *smu); 138 136 139 137 int smu_v13_0_set_tool_table_location(struct smu_context *smu);
-2
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v15_0.h
··· 142 142 143 143 int smu_v15_0_get_vbios_bootup_values(struct smu_context *smu); 144 144 145 - int smu_v15_0_check_fw_version(struct smu_context *smu); 146 - 147 145 int smu_v15_0_set_driver_table_location(struct smu_context *smu); 148 146 149 147 int smu_v15_0_set_tool_table_location(struct smu_context *smu);
+1 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
··· 1988 1988 /* pptable related */ 1989 1989 .setup_pptable = aldebaran_setup_pptable, 1990 1990 .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, 1991 - .check_fw_version = smu_v13_0_check_fw_version, 1991 + .check_fw_version = smu_cmn_check_fw_version, 1992 1992 .write_pptable = smu_cmn_write_pptable, 1993 1993 .set_driver_table_location = smu_v13_0_set_driver_table_location, 1994 1994 .set_tool_table_location = smu_v13_0_set_tool_table_location,
-43
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
··· 258 258 return -EIO; 259 259 } 260 260 261 - int smu_v13_0_check_fw_version(struct smu_context *smu) 262 - { 263 - struct amdgpu_device *adev = smu->adev; 264 - uint32_t if_version = 0xff, smu_version = 0xff; 265 - uint8_t smu_program, smu_major, smu_minor, smu_debug; 266 - int ret = 0; 267 - 268 - ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); 269 - if (ret) 270 - return ret; 271 - 272 - smu_program = (smu_version >> 24) & 0xff; 273 - smu_major = (smu_version >> 16) & 0xff; 274 - smu_minor = (smu_version >> 8) & 0xff; 275 - smu_debug = (smu_version >> 0) & 0xff; 276 - adev->pm.fw_version = smu_version; 277 - 278 - /* only for dGPU w/ SMU13*/ 279 - if (adev->pm.fw) 280 - dev_dbg(smu->adev->dev, "smu fw reported program %d, version = 0x%08x (%d.%d.%d)\n", 281 - smu_program, smu_version, smu_major, smu_minor, smu_debug); 282 - 283 - /* 284 - * 1. if_version mismatch is not critical as our fw is designed 285 - * to be backward compatible. 286 - * 2. New fw usually brings some optimizations. But that's visible 287 - * only on the paired driver. 288 - * Considering above, we just leave user a verbal message instead 289 - * of halt driver loading. 290 - */ 291 - dev_info_once(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " 292 - "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", 293 - smu->smc_driver_if_version, if_version, 294 - smu_program, smu_version, smu_major, smu_minor, smu_debug); 295 - 296 - if (smu->smc_driver_if_version != SMU_IGNORE_IF_VERSION && 297 - if_version != smu->smc_driver_if_version) { 298 - dev_info(adev->dev, "SMU driver if version not matched\n"); 299 - } 300 - 301 - return ret; 302 - } 303 - 304 261 static int smu_v13_0_set_pptable_v2_0(struct smu_context *smu, void **table, uint32_t *size) 305 262 { 306 263 struct amdgpu_device *adev = smu->adev;
+1 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
··· 3133 3133 .fini_power = smu_v13_0_fini_power, 3134 3134 .check_fw_status = smu_v13_0_check_fw_status, 3135 3135 .setup_pptable = smu_v13_0_0_setup_pptable, 3136 - .check_fw_version = smu_v13_0_check_fw_version, 3136 + .check_fw_version = smu_cmn_check_fw_version, 3137 3137 .write_pptable = smu_cmn_write_pptable, 3138 3138 .set_driver_table_location = smu_v13_0_set_driver_table_location, 3139 3139 .system_features_control = smu_v13_0_0_system_features_control,
+1 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
··· 1098 1098 1099 1099 static const struct pptable_funcs smu_v13_0_4_ppt_funcs = { 1100 1100 .check_fw_status = smu_v13_0_check_fw_status, 1101 - .check_fw_version = smu_v13_0_check_fw_version, 1101 + .check_fw_version = smu_cmn_check_fw_version, 1102 1102 .init_smc_tables = smu_v13_0_4_init_smc_tables, 1103 1103 .fini_smc_tables = smu_v13_0_4_fini_smc_tables, 1104 1104 .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values,
+1 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
··· 1102 1102 1103 1103 static const struct pptable_funcs smu_v13_0_5_ppt_funcs = { 1104 1104 .check_fw_status = smu_v13_0_check_fw_status, 1105 - .check_fw_version = smu_v13_0_check_fw_version, 1105 + .check_fw_version = smu_cmn_check_fw_version, 1106 1106 .init_smc_tables = smu_v13_0_5_init_smc_tables, 1107 1107 .fini_smc_tables = smu_v13_0_5_fini_smc_tables, 1108 1108 .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values,
+1 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
··· 481 481 { 482 482 int r; 483 483 484 - r = smu_v13_0_check_fw_version(smu); 484 + r = smu_cmn_check_fw_version(smu); 485 485 /* Initialize caps flags once fw version is fetched */ 486 486 if (!r) 487 487 smu_v13_0_x_init_caps(smu);
+1 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
··· 2788 2788 .fini_power = smu_v13_0_fini_power, 2789 2789 .check_fw_status = smu_v13_0_7_check_fw_status, 2790 2790 .setup_pptable = smu_v13_0_7_setup_pptable, 2791 - .check_fw_version = smu_v13_0_check_fw_version, 2791 + .check_fw_version = smu_cmn_check_fw_version, 2792 2792 .write_pptable = smu_cmn_write_pptable, 2793 2793 .set_driver_table_location = smu_v13_0_set_driver_table_location, 2794 2794 .system_features_control = smu_v13_0_system_features_control,
+1 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
··· 1331 1331 1332 1332 static const struct pptable_funcs yellow_carp_ppt_funcs = { 1333 1333 .check_fw_status = smu_v13_0_check_fw_status, 1334 - .check_fw_version = smu_v13_0_check_fw_version, 1334 + .check_fw_version = smu_cmn_check_fw_version, 1335 1335 .init_smc_tables = yellow_carp_init_smc_tables, 1336 1336 .fini_smc_tables = yellow_carp_fini_smc_tables, 1337 1337 .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values,
-52
drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c
··· 207 207 return -EIO; 208 208 } 209 209 210 - int smu_v15_0_check_fw_version(struct smu_context *smu) 211 - { 212 - struct amdgpu_device *adev = smu->adev; 213 - uint32_t if_version = 0xff, smu_version = 0xff; 214 - uint8_t smu_program, smu_major, smu_minor, smu_debug; 215 - int ret = 0; 216 - 217 - ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); 218 - if (ret) 219 - return ret; 220 - 221 - smu_program = (smu_version >> 24) & 0xff; 222 - smu_major = (smu_version >> 16) & 0xff; 223 - smu_minor = (smu_version >> 8) & 0xff; 224 - smu_debug = (smu_version >> 0) & 0xff; 225 - if (smu->is_apu) 226 - adev->pm.fw_version = smu_version; 227 - 228 - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { 229 - case IP_VERSION(15, 0, 0): 230 - smu->smc_driver_if_version = SMU15_DRIVER_IF_VERSION_SMU_V15_0; 231 - break; 232 - default: 233 - dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n", 234 - amdgpu_ip_version(adev, MP1_HWIP, 0)); 235 - smu->smc_driver_if_version = SMU15_DRIVER_IF_VERSION_INV; 236 - break; 237 - } 238 - 239 - if (adev->pm.fw) 240 - dev_dbg(smu->adev->dev, "smu fw reported program %d, version = 0x%08x (%d.%d.%d)\n", 241 - smu_program, smu_version, smu_major, smu_minor, smu_debug); 242 - 243 - /* 244 - * 1. if_version mismatch is not critical as our fw is designed 245 - * to be backward compatible. 246 - * 2. New fw usually brings some optimizations. But that's visible 247 - * only on the paired driver. 248 - * Considering above, we just leave user a verbal message instead 249 - * of halt driver loading. 250 - */ 251 - if (if_version != smu->smc_driver_if_version) { 252 - dev_info(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " 253 - "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", 254 - smu->smc_driver_if_version, if_version, 255 - smu_program, smu_version, smu_major, smu_minor, smu_debug); 256 - dev_info(adev->dev, "SMU driver if version not matched\n"); 257 - } 258 - 259 - return ret; 260 - } 261 - 262 210 static int smu_v15_0_set_pptable_v2_0(struct smu_context *smu, void **table, uint32_t *size) 263 211 { 264 212 struct amdgpu_device *adev = smu->adev;
+2 -1
drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c
··· 1416 1416 1417 1417 static const struct pptable_funcs smu_v15_0_0_ppt_funcs = { 1418 1418 .check_fw_status = smu_v15_0_check_fw_status, 1419 - .check_fw_version = smu_v15_0_check_fw_version, 1419 + .check_fw_version = smu_cmn_check_fw_version, 1420 1420 .init_smc_tables = smu_v15_0_0_init_smc_tables, 1421 1421 .fini_smc_tables = smu_v15_0_0_fini_smc_tables, 1422 1422 .get_vbios_bootup_values = smu_v15_0_get_vbios_bootup_values, ··· 1468 1468 smu->feature_map = smu_v15_0_0_feature_mask_map; 1469 1469 smu->table_map = smu_v15_0_0_table_map; 1470 1470 smu->is_apu = true; 1471 + smu->smc_driver_if_version = SMU15_DRIVER_IF_VERSION_SMU_V15_0; 1471 1472 1472 1473 smu_v15_0_0_init_msg_ctl(smu); 1473 1474 }
+29
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
··· 1035 1035 return ret; 1036 1036 } 1037 1037 1038 + int smu_cmn_check_fw_version(struct smu_context *smu) 1039 + { 1040 + struct amdgpu_device *adev = smu->adev; 1041 + uint32_t if_version = 0xff, smu_version = 0xff; 1042 + uint8_t smu_program, smu_major, smu_minor, smu_debug; 1043 + int ret; 1044 + 1045 + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); 1046 + if (ret) 1047 + return ret; 1048 + 1049 + smu_program = (smu_version >> 24) & 0xff; 1050 + smu_major = (smu_version >> 16) & 0xff; 1051 + smu_minor = (smu_version >> 8) & 0xff; 1052 + smu_debug = (smu_version >> 0) & 0xff; 1053 + adev->pm.fw_version = smu_version; 1054 + 1055 + dev_info_once(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " 1056 + "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", 1057 + smu->smc_driver_if_version, if_version, 1058 + smu_program, smu_version, smu_major, smu_minor, smu_debug); 1059 + 1060 + if (smu->smc_driver_if_version != SMU_IGNORE_IF_VERSION && 1061 + if_version != smu->smc_driver_if_version) 1062 + dev_info(adev->dev, "SMU driver if version not matched\n"); 1063 + 1064 + return 0; 1065 + } 1066 + 1038 1067 int smu_cmn_update_table(struct smu_context *smu, 1039 1068 enum smu_table_id table_index, 1040 1069 int argument,
+1
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
··· 207 207 208 208 int smu_cmn_dpm_pcie_gen_idx(int gen); 209 209 int smu_cmn_dpm_pcie_width_idx(int width); 210 + int smu_cmn_check_fw_version(struct smu_context *smu); 210 211 211 212 /*SMU gpu metrics */ 212 213
+31
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c
··· 535 535 return ret; 536 536 } 537 537 538 + int amdgpu_ras_mgr_dispatch_interrupt(struct amdgpu_device *adev, struct ras_ih_info *ih_info) 539 + { 540 + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 541 + uint64_t seq_no = 0; 542 + int ret = 0; 543 + 544 + if (!amdgpu_ras_mgr_is_ready(adev)) 545 + return -EPERM; 546 + 547 + if (!ih_info) 548 + return 0; 549 + 550 + if (ih_info->block == RAS_BLOCK_ID__UMC) { 551 + if (ras_mgr->ras_core->poison_supported) { 552 + seq_no = amdgpu_ras_mgr_gen_ras_event_seqno(adev, RAS_SEQNO_TYPE_DE); 553 + RAS_DEV_INFO(adev, 554 + "{%llu} RAS poison is created, no user action is needed.\n", 555 + seq_no); 556 + } 557 + 558 + ret = amdgpu_ras_process_handle_umc_interrupt(adev, ih_info); 559 + } else if (ras_mgr->ras_core->poison_supported) { 560 + ret = amdgpu_ras_process_handle_consumption_interrupt(adev, ih_info); 561 + } else { 562 + RAS_DEV_WARN(adev, 563 + "No RAS interrupt handler for non-UMC block with poison disabled.\n"); 564 + } 565 + 566 + return ret; 567 + } 568 + 538 569 int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data) 539 570 { 540 571 if (!amdgpu_ras_mgr_is_ready(adev))
+1
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h
··· 67 67 int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data); 68 68 int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data); 69 69 int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data); 70 + int amdgpu_ras_mgr_dispatch_interrupt(struct amdgpu_device *adev, struct ras_ih_info *ih_info); 70 71 int amdgpu_ras_mgr_update_ras_ecc(struct amdgpu_device *adev); 71 72 int amdgpu_ras_mgr_reset_gpu(struct amdgpu_device *adev, uint32_t flags); 72 73 uint64_t amdgpu_ras_mgr_gen_ras_event_seqno(struct amdgpu_device *adev,
+10
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c
··· 103 103 if (!ih_info) 104 104 return -EINVAL; 105 105 106 + if (amdgpu_sriov_vf(adev)) { 107 + if (adev->virt.ops && adev->virt.ops->ras_poison_handler) 108 + adev->virt.ops->ras_poison_handler(adev, ih_info->block); 109 + else 110 + dev_warn(adev->dev, 111 + "No ras_poison_handler interface in SRIOV for block[%d]!\n", 112 + ih_info->block); 113 + return 0; 114 + } 115 + 106 116 memset(&req, 0, sizeof(req)); 107 117 req.block = ih_info->block; 108 118 req.data = ih_info->data;
+8 -3
drivers/gpu/drm/amd/ras/rascore/ras_core.c
··· 527 527 528 528 uint64_t ras_core_get_utc_second_timestamp(struct ras_core_context *ras_core) 529 529 { 530 - if (ras_core && ras_core->sys_fn && 531 - ras_core->sys_fn->get_utc_second_timestamp) 530 + if (!ras_core) 531 + return 0; 532 + 533 + if (ras_core->sys_fn && 534 + ras_core->sys_fn->get_utc_second_timestamp) 532 535 return ras_core->sys_fn->get_utc_second_timestamp(ras_core); 533 536 534 537 RAS_DEV_ERR(ras_core->dev, "Failed to get system timestamp!\n"); ··· 553 550 ras_core->sys_fn->detect_ras_interrupt) 554 551 return ras_core->sys_fn->detect_ras_interrupt(ras_core); 555 552 556 - RAS_DEV_ERR(ras_core->dev, "Failed to detect ras interrupt!\n"); 553 + if (ras_core && ras_core->dev) 554 + RAS_DEV_ERR(ras_core->dev, "Failed to detect ras interrupt!\n"); 555 + 557 556 return false; 558 557 } 559 558
+3 -1
drivers/gpu/drm/radeon/si_dpm.c
··· 2915 2915 if (rdev->family == CHIP_HAINAN) { 2916 2916 if ((rdev->pdev->revision == 0x81) || 2917 2917 (rdev->pdev->revision == 0xC3) || 2918 + (rdev->pdev->device == 0x6660) || 2918 2919 (rdev->pdev->device == 0x6664) || 2919 2920 (rdev->pdev->device == 0x6665) || 2920 - (rdev->pdev->device == 0x6667)) { 2921 + (rdev->pdev->device == 0x6667) || 2922 + (rdev->pdev->device == 0x666F)) { 2921 2923 max_sclk = 75000; 2922 2924 } 2923 2925 if ((rdev->pdev->revision == 0xC3) ||