Merge tag 'drm-next-2022-12-23' of git://anongit.freedesktop.org/drm/drm

+12 -5

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

··· 29 29 #include <linux/mm.h> 30 30 #include <linux/kthread.h> 31 31 #include <linux/workqueue.h> 32 + #include <linux/mmu_notifier.h> 32 33 #include <kgd_kfd_interface.h> 33 34 #include <drm/ttm/ttm_execbuf_util.h> 34 35 #include "amdgpu_sync.h" ··· 66 65 struct mutex lock; 67 66 struct amdgpu_bo *bo; 68 67 struct dma_buf *dmabuf; 68 + struct hmm_range *range; 69 69 struct list_head attachments; 70 70 /* protected by amdkfd_process_info.lock */ 71 71 struct ttm_validate_buffer validate_list; ··· 77 75 78 76 uint32_t alloc_flags; 79 77 80 - atomic_t invalid; 78 + uint32_t invalid; 81 79 struct amdkfd_process_info *process_info; 82 80 83 81 struct amdgpu_sync sync; ··· 133 131 struct amdgpu_amdkfd_fence *eviction_fence; 134 132 135 133 /* MMU-notifier related fields */ 136 - atomic_t evicted_bos; 134 + struct mutex notifier_lock; 135 + uint32_t evicted_bos; 137 136 struct delayed_work restore_userptr_work; 138 137 struct pid *pid; 139 138 bool block_mmu_notifications; ··· 183 180 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); 184 181 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); 185 182 int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); 186 - int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); 183 + int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, 184 + unsigned long cur_seq, struct kgd_mem *mem); 187 185 #else 188 186 static inline 189 187 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) ··· 205 201 } 206 202 207 203 static inline 208 - int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) 204 + int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, 205 + unsigned long cur_seq, struct kgd_mem *mem) 209 206 { 210 207 return 0; 211 208 } ··· 270 265 (&((struct amdgpu_fpriv *) \ 271 266 ((struct drm_file *)(drm_priv))->driver_priv)->vm) 272 267 268 + int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, 269 + struct file *filp, u32 pasid); 273 270 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, 274 - struct file *filp, u32 pasid, 271 + struct file *filp, 275 272 void **process_info, 276 273 struct dma_fence **ef); 277 274 void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,

+169 -84

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

··· 964 964 * later stage when it is scheduled by another ioctl called by 965 965 * CRIU master process for the target pid for restore. 966 966 */ 967 - atomic_inc(&mem->invalid); 967 + mutex_lock(&process_info->notifier_lock); 968 + mem->invalid++; 969 + mutex_unlock(&process_info->notifier_lock); 968 970 mutex_unlock(&process_info->lock); 969 971 return 0; 970 972 } ··· 1303 1301 return -ENOMEM; 1304 1302 1305 1303 mutex_init(&info->lock); 1304 + mutex_init(&info->notifier_lock); 1306 1305 INIT_LIST_HEAD(&info->vm_list_head); 1307 1306 INIT_LIST_HEAD(&info->kfd_bo_list); 1308 1307 INIT_LIST_HEAD(&info->userptr_valid_list); ··· 1320 1317 } 1321 1318 1322 1319 info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); 1323 - atomic_set(&info->evicted_bos, 0); 1324 1320 INIT_DELAYED_WORK(&info->restore_userptr_work, 1325 1321 amdgpu_amdkfd_restore_userptr_worker); 1326 1322 ··· 1374 1372 put_pid(info->pid); 1375 1373 create_evict_fence_fail: 1376 1374 mutex_destroy(&info->lock); 1375 + mutex_destroy(&info->notifier_lock); 1377 1376 kfree(info); 1378 1377 } 1379 1378 return ret; ··· 1429 1426 amdgpu_bo_unreserve(bo); 1430 1427 } 1431 1428 1429 + int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, 1430 + struct file *filp, u32 pasid) 1431 + 1432 + { 1433 + struct amdgpu_fpriv *drv_priv; 1434 + struct amdgpu_vm *avm; 1435 + int ret; 1436 + 1437 + ret = amdgpu_file_to_fpriv(filp, &drv_priv); 1438 + if (ret) 1439 + return ret; 1440 + avm = &drv_priv->vm; 1441 + 1442 + /* Free the original amdgpu allocated pasid, 1443 + * will be replaced with kfd allocated pasid. 1444 + */ 1445 + if (avm->pasid) { 1446 + amdgpu_pasid_free(avm->pasid); 1447 + amdgpu_vm_set_pasid(adev, avm, 0); 1448 + } 1449 + 1450 + ret = amdgpu_vm_set_pasid(adev, avm, pasid); 1451 + if (ret) 1452 + return ret; 1453 + 1454 + return 0; 1455 + } 1456 + 1432 1457 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, 1433 - struct file *filp, u32 pasid, 1458 + struct file *filp, 1434 1459 void **process_info, 1435 1460 struct dma_fence **ef) 1436 1461 { ··· 1475 1444 if (avm->process_info) 1476 1445 return -EINVAL; 1477 1446 1478 - /* Free the original amdgpu allocated pasid, 1479 - * will be replaced with kfd allocated pasid. 1480 - */ 1481 - if (avm->pasid) { 1482 - amdgpu_pasid_free(avm->pasid); 1483 - amdgpu_vm_set_pasid(adev, avm, 0); 1484 - } 1485 - 1486 1447 /* Convert VM into a compute VM */ 1487 1448 ret = amdgpu_vm_make_compute(adev, avm); 1488 1449 if (ret) 1489 1450 return ret; 1490 1451 1491 - ret = amdgpu_vm_set_pasid(adev, avm, pasid); 1492 - if (ret) 1493 - return ret; 1494 1452 /* Initialize KFD part of the VM and process info */ 1495 1453 ret = init_kfd_vm(avm, process_info, ef); 1496 1454 if (ret) ··· 1516 1496 cancel_delayed_work_sync(&process_info->restore_userptr_work); 1517 1497 put_pid(process_info->pid); 1518 1498 mutex_destroy(&process_info->lock); 1499 + mutex_destroy(&process_info->notifier_lock); 1519 1500 kfree(process_info); 1520 1501 } 1521 1502 } ··· 1569 1548 1570 1549 mutex_lock(&pinfo->lock); 1571 1550 pr_debug("scheduling work\n"); 1572 - atomic_inc(&pinfo->evicted_bos); 1551 + mutex_lock(&pinfo->notifier_lock); 1552 + pinfo->evicted_bos++; 1553 + mutex_unlock(&pinfo->notifier_lock); 1573 1554 if (!READ_ONCE(pinfo->block_mmu_notifications)) { 1574 1555 ret = -EINVAL; 1575 1556 goto out_unlock; ··· 1796 1773 list_del(&bo_list_entry->head); 1797 1774 mutex_unlock(&process_info->lock); 1798 1775 1799 - /* No more MMU notifiers */ 1800 - amdgpu_hmm_unregister(mem->bo); 1776 + /* Cleanup user pages and MMU notifiers */ 1777 + if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { 1778 + amdgpu_hmm_unregister(mem->bo); 1779 + mutex_lock(&process_info->notifier_lock); 1780 + amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range); 1781 + mutex_unlock(&process_info->notifier_lock); 1782 + } 1801 1783 1802 1784 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1803 1785 if (unlikely(ret)) ··· 1891 1863 * worker to do the mapping 1892 1864 */ 1893 1865 mutex_lock(&mem->process_info->lock); 1866 + 1867 + /* Lock notifier lock. If we find an invalid userptr BO, we can be 1868 + * sure that the MMU notifier is no longer running 1869 + * concurrently and the queues are actually stopped 1870 + */ 1871 + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { 1872 + mutex_lock(&mem->process_info->notifier_lock); 1873 + is_invalid_userptr = !!mem->invalid; 1874 + mutex_unlock(&mem->process_info->notifier_lock); 1875 + } 1894 1876 1895 1877 mutex_lock(&mem->lock); 1896 1878 ··· 2279 2241 * 2280 2242 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it 2281 2243 * cannot do any memory allocations, and cannot take any locks that 2282 - * are held elsewhere while allocating memory. Therefore this is as 2283 - * simple as possible, using atomic counters. 2244 + * are held elsewhere while allocating memory. 2284 2245 * 2285 2246 * It doesn't do anything to the BO itself. The real work happens in 2286 2247 * restore, where we get updated page addresses. This function only 2287 2248 * ensures that GPU access to the BO is stopped. 2288 2249 */ 2289 - int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, 2290 - struct mm_struct *mm) 2250 + int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, 2251 + unsigned long cur_seq, struct kgd_mem *mem) 2291 2252 { 2292 2253 struct amdkfd_process_info *process_info = mem->process_info; 2293 - int evicted_bos; 2294 2254 int r = 0; 2295 2255 2296 - /* Do not process MMU notifications until stage-4 IOCTL is received */ 2256 + /* Do not process MMU notifications during CRIU restore until 2257 + * KFD_CRIU_OP_RESUME IOCTL is received 2258 + */ 2297 2259 if (READ_ONCE(process_info->block_mmu_notifications)) 2298 2260 return 0; 2299 2261 2300 - atomic_inc(&mem->invalid); 2301 - evicted_bos = atomic_inc_return(&process_info->evicted_bos); 2302 - if (evicted_bos == 1) { 2262 + mutex_lock(&process_info->notifier_lock); 2263 + mmu_interval_set_seq(mni, cur_seq); 2264 + 2265 + mem->invalid++; 2266 + if (++process_info->evicted_bos == 1) { 2303 2267 /* First eviction, stop the queues */ 2304 - r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); 2268 + r = kgd2kfd_quiesce_mm(mni->mm, 2269 + KFD_QUEUE_EVICTION_TRIGGER_USERPTR); 2305 2270 if (r) 2306 2271 pr_err("Failed to quiesce KFD\n"); 2307 2272 schedule_delayed_work(&process_info->restore_userptr_work, 2308 2273 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); 2309 2274 } 2275 + mutex_unlock(&process_info->notifier_lock); 2310 2276 2311 2277 return r; 2312 2278 } ··· 2327 2285 struct kgd_mem *mem, *tmp_mem; 2328 2286 struct amdgpu_bo *bo; 2329 2287 struct ttm_operation_ctx ctx = { false, false }; 2330 - int invalid, ret; 2288 + uint32_t invalid; 2289 + int ret = 0; 2331 2290 2332 - /* Move all invalidated BOs to the userptr_inval_list and 2333 - * release their user pages by migration to the CPU domain 2334 - */ 2291 + mutex_lock(&process_info->notifier_lock); 2292 + 2293 + /* Move all invalidated BOs to the userptr_inval_list */ 2335 2294 list_for_each_entry_safe(mem, tmp_mem, 2336 2295 &process_info->userptr_valid_list, 2337 - validate_list.head) { 2338 - if (!atomic_read(&mem->invalid)) 2339 - continue; /* BO is still valid */ 2340 - 2341 - bo = mem->bo; 2342 - 2343 - if (amdgpu_bo_reserve(bo, true)) 2344 - return -EAGAIN; 2345 - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 2346 - ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 2347 - amdgpu_bo_unreserve(bo); 2348 - if (ret) { 2349 - pr_err("%s: Failed to invalidate userptr BO\n", 2350 - __func__); 2351 - return -EAGAIN; 2352 - } 2353 - 2354 - list_move_tail(&mem->validate_list.head, 2355 - &process_info->userptr_inval_list); 2356 - } 2357 - 2358 - if (list_empty(&process_info->userptr_inval_list)) 2359 - return 0; /* All evicted userptr BOs were freed */ 2296 + validate_list.head) 2297 + if (mem->invalid) 2298 + list_move_tail(&mem->validate_list.head, 2299 + &process_info->userptr_inval_list); 2360 2300 2361 2301 /* Go through userptr_inval_list and update any invalid user_pages */ 2362 2302 list_for_each_entry(mem, &process_info->userptr_inval_list, 2363 2303 validate_list.head) { 2364 - struct hmm_range *range; 2365 - 2366 - invalid = atomic_read(&mem->invalid); 2304 + invalid = mem->invalid; 2367 2305 if (!invalid) 2368 2306 /* BO hasn't been invalidated since the last 2369 - * revalidation attempt. Keep its BO list. 2307 + * revalidation attempt. Keep its page list. 2370 2308 */ 2371 2309 continue; 2372 2310 2373 2311 bo = mem->bo; 2374 2312 2313 + amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range); 2314 + mem->range = NULL; 2315 + 2316 + /* BO reservations and getting user pages (hmm_range_fault) 2317 + * must happen outside the notifier lock 2318 + */ 2319 + mutex_unlock(&process_info->notifier_lock); 2320 + 2321 + /* Move the BO to system (CPU) domain if necessary to unmap 2322 + * and free the SG table 2323 + */ 2324 + if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) { 2325 + if (amdgpu_bo_reserve(bo, true)) 2326 + return -EAGAIN; 2327 + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 2328 + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 2329 + amdgpu_bo_unreserve(bo); 2330 + if (ret) { 2331 + pr_err("%s: Failed to invalidate userptr BO\n", 2332 + __func__); 2333 + return -EAGAIN; 2334 + } 2335 + } 2336 + 2375 2337 /* Get updated user pages */ 2376 2338 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, 2377 - &range); 2339 + &mem->range); 2378 2340 if (ret) { 2379 2341 pr_debug("Failed %d to get user pages\n", ret); 2380 2342 ··· 2391 2345 */ 2392 2346 if (ret != -EFAULT) 2393 2347 return ret; 2394 - } else { 2395 2348 2396 - /* 2397 - * FIXME: Cannot ignore the return code, must hold 2398 - * notifier_lock 2399 - */ 2400 - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); 2349 + ret = 0; 2401 2350 } 2351 + 2352 + mutex_lock(&process_info->notifier_lock); 2402 2353 2403 2354 /* Mark the BO as valid unless it was invalidated 2404 2355 * again concurrently. 2405 2356 */ 2406 - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) 2407 - return -EAGAIN; 2357 + if (mem->invalid != invalid) { 2358 + ret = -EAGAIN; 2359 + goto unlock_out; 2360 + } 2361 + mem->invalid = 0; 2408 2362 } 2409 2363 2410 - return 0; 2364 + unlock_out: 2365 + mutex_unlock(&process_info->notifier_lock); 2366 + 2367 + return ret; 2411 2368 } 2412 2369 2413 2370 /* Validate invalid userptr BOs 2414 2371 * 2415 - * Validates BOs on the userptr_inval_list, and moves them back to the 2416 - * userptr_valid_list. Also updates GPUVM page tables with new page 2417 - * addresses and waits for the page table updates to complete. 2372 + * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables 2373 + * with new page addresses and waits for the page table updates to complete. 2418 2374 */ 2419 2375 static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) 2420 2376 { ··· 2487 2439 } 2488 2440 } 2489 2441 2490 - list_move_tail(&mem->validate_list.head, 2491 - &process_info->userptr_valid_list); 2492 - 2493 2442 /* Update mapping. If the BO was not validated 2494 2443 * (because we couldn't get user pages), this will 2495 2444 * clear the page table entries, which will result in ··· 2502 2457 if (ret) { 2503 2458 pr_err("%s: update PTE failed\n", __func__); 2504 2459 /* make sure this gets validated again */ 2505 - atomic_inc(&mem->invalid); 2460 + mutex_lock(&process_info->notifier_lock); 2461 + mem->invalid++; 2462 + mutex_unlock(&process_info->notifier_lock); 2506 2463 goto unreserve_out; 2507 2464 } 2508 2465 } ··· 2524 2477 return ret; 2525 2478 } 2526 2479 2480 + /* Confirm that all user pages are valid while holding the notifier lock 2481 + * 2482 + * Moves valid BOs from the userptr_inval_list back to userptr_val_list. 2483 + */ 2484 + static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info) 2485 + { 2486 + struct kgd_mem *mem, *tmp_mem; 2487 + int ret = 0; 2488 + 2489 + list_for_each_entry_safe(mem, tmp_mem, 2490 + &process_info->userptr_inval_list, 2491 + validate_list.head) { 2492 + bool valid = amdgpu_ttm_tt_get_user_pages_done( 2493 + mem->bo->tbo.ttm, mem->range); 2494 + 2495 + mem->range = NULL; 2496 + if (!valid) { 2497 + WARN(!mem->invalid, "Invalid BO not marked invalid"); 2498 + ret = -EAGAIN; 2499 + continue; 2500 + } 2501 + WARN(mem->invalid, "Valid BO is marked invalid"); 2502 + 2503 + list_move_tail(&mem->validate_list.head, 2504 + &process_info->userptr_valid_list); 2505 + } 2506 + 2507 + return ret; 2508 + } 2509 + 2527 2510 /* Worker callback to restore evicted userptr BOs 2528 2511 * 2529 2512 * Tries to update and validate all userptr BOs. If successful and no ··· 2568 2491 restore_userptr_work); 2569 2492 struct task_struct *usertask; 2570 2493 struct mm_struct *mm; 2571 - int evicted_bos; 2494 + uint32_t evicted_bos; 2572 2495 2573 - evicted_bos = atomic_read(&process_info->evicted_bos); 2496 + mutex_lock(&process_info->notifier_lock); 2497 + evicted_bos = process_info->evicted_bos; 2498 + mutex_unlock(&process_info->notifier_lock); 2574 2499 if (!evicted_bos) 2575 2500 return; 2576 2501 ··· 2595 2516 * and we can just restart the queues. 2596 2517 */ 2597 2518 if (!list_empty(&process_info->userptr_inval_list)) { 2598 - if (atomic_read(&process_info->evicted_bos) != evicted_bos) 2599 - goto unlock_out; /* Concurrent eviction, try again */ 2600 - 2601 2519 if (validate_invalid_user_pages(process_info)) 2602 2520 goto unlock_out; 2603 2521 } ··· 2603 2527 * be a first eviction that calls quiesce_mm. The eviction 2604 2528 * reference counting inside KFD will handle this case. 2605 2529 */ 2606 - if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != 2607 - evicted_bos) 2608 - goto unlock_out; 2609 - evicted_bos = 0; 2530 + mutex_lock(&process_info->notifier_lock); 2531 + if (process_info->evicted_bos != evicted_bos) 2532 + goto unlock_notifier_out; 2533 + 2534 + if (confirm_valid_user_pages_locked(process_info)) { 2535 + WARN(1, "User pages unexpectedly invalid"); 2536 + goto unlock_notifier_out; 2537 + } 2538 + 2539 + process_info->evicted_bos = evicted_bos = 0; 2540 + 2610 2541 if (kgd2kfd_resume_mm(mm)) { 2611 2542 pr_err("%s: Failed to resume KFD\n", __func__); 2612 2543 /* No recovery from this failure. Probably the CP is ··· 2621 2538 */ 2622 2539 } 2623 2540 2541 + unlock_notifier_out: 2542 + mutex_unlock(&process_info->notifier_lock); 2624 2543 unlock_out: 2625 2544 mutex_unlock(&process_info->lock); 2626 2545

+8 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 3016 3016 continue; 3017 3017 } 3018 3018 3019 - /* skip suspend of gfx and psp for S0ix 3019 + /* skip suspend of gfx/mes and psp for S0ix 3020 3020 * gfx is in gfxoff state, so on resume it will exit gfxoff just 3021 3021 * like at runtime. PSP is also part of the always on hardware 3022 3022 * so no need to suspend it. 3023 3023 */ 3024 3024 if (adev->in_s0ix && 3025 3025 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || 3026 - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)) 3026 + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 3027 + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES)) 3027 3028 continue; 3028 3029 3029 3030 /* XXX handle errors */ ··· 4112 4111 return 0; 4113 4112 4114 4113 adev->in_suspend = true; 4114 + 4115 + /* Evict the majority of BOs before grabbing the full access */ 4116 + r = amdgpu_device_evict_resources(adev); 4117 + if (r) 4118 + return r; 4115 4119 4116 4120 if (amdgpu_sriov_vf(adev)) { 4117 4121 amdgpu_virt_fini_data_exchange(adev);

+11 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 2039 2039 "See modparam exp_hw_support\n"); 2040 2040 return -ENODEV; 2041 2041 } 2042 + /* differentiate between P10 and P11 asics with the same DID */ 2043 + if (pdev->device == 0x67FF && 2044 + (pdev->revision == 0xE3 || 2045 + pdev->revision == 0xE7 || 2046 + pdev->revision == 0xF3 || 2047 + pdev->revision == 0xF7)) { 2048 + flags &= ~AMD_ASIC_MASK; 2049 + flags |= CHIP_POLARIS10; 2050 + } 2042 2051 2043 2052 /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping, 2044 2053 * however, SME requires an indirect IOMMU mapping because the encryption ··· 2117 2108 2118 2109 pci_set_drvdata(pdev, ddev); 2119 2110 2120 - ret = amdgpu_driver_load_kms(adev, ent->driver_data); 2111 + ret = amdgpu_driver_load_kms(adev, flags); 2121 2112 if (ret) 2122 2113 goto err_pci; 2123 2114 2124 2115 retry_init: 2125 - ret = drm_dev_register(ddev, ent->driver_data); 2116 + ret = drm_dev_register(ddev, flags); 2126 2117 if (ret == -EAGAIN && ++retry <= 3) { 2127 2118 DRM_INFO("retry init %d\n", retry); 2128 2119 /* Don't request EX mode too frequently which is attacking */

+4 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c

··· 64 64 sizeof(atom_ctx->vbios_version)) || 65 65 strnstr(atom_ctx->vbios_version, "D163", 66 66 sizeof(atom_ctx->vbios_version))) { 67 - *fru_addr = FRU_EEPROM_MADDR_6; 67 + if (fru_addr) 68 + *fru_addr = FRU_EEPROM_MADDR_6; 68 69 return true; 69 70 } else { 70 71 return false; ··· 84 83 sizeof(atom_ctx->vbios_version))) { 85 84 return false; 86 85 } else { 87 - *fru_addr = FRU_EEPROM_MADDR_6; 86 + if (fru_addr) 87 + *fru_addr = FRU_EEPROM_MADDR_6; 88 88 return true; 89 89 } 90 90 } else {

+13 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

··· 113 113 bp.resv = resv; 114 114 bp.preferred_domain = initial_domain; 115 115 bp.flags = flags; 116 - bp.domain = initial_domain | AMDGPU_GEM_DOMAIN_CPU; 116 + bp.domain = initial_domain; 117 117 bp.bo_ptr_size = sizeof(struct amdgpu_bo); 118 118 119 119 r = amdgpu_bo_create_user(adev, &bp, &ubo); ··· 332 332 } 333 333 334 334 initial_domain = (u32)(0xffffffff & args->in.domains); 335 + retry: 335 336 r = amdgpu_gem_object_create(adev, size, args->in.alignment, 336 - initial_domain, flags, ttm_bo_type_device, 337 - resv, &gobj); 337 + initial_domain, 338 + flags, ttm_bo_type_device, resv, &gobj); 338 339 if (r && r != -ERESTARTSYS) { 340 + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { 341 + flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 342 + goto retry; 343 + } 344 + 345 + if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { 346 + initial_domain |= AMDGPU_GEM_DOMAIN_GTT; 347 + goto retry; 348 + } 339 349 DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", 340 350 size, initial_domain, args->in.alignment, r); 341 351 }

+3 -9

drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c

··· 105 105 unsigned long cur_seq) 106 106 { 107 107 struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); 108 - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 109 108 110 109 if (!mmu_notifier_range_blockable(range)) 111 110 return false; 112 111 113 - mutex_lock(&adev->notifier_lock); 114 - 115 - mmu_interval_set_seq(mni, cur_seq); 116 - 117 - amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); 118 - mutex_unlock(&adev->notifier_lock); 112 + amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo); 119 113 120 114 return true; 121 115 } ··· 238 244 return r; 239 245 } 240 246 241 - int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) 247 + bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) 242 248 { 243 - int r; 249 + bool r; 244 250 245 251 r = mmu_interval_read_retry(hmm_range->notifier, 246 252 hmm_range->notifier_seq);

+2 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h

··· 29 29 #include <linux/rwsem.h> 30 30 #include <linux/workqueue.h> 31 31 #include <linux/interval_tree.h> 32 + #include <linux/mmu_notifier.h> 32 33 33 34 int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, 34 35 uint64_t start, uint64_t npages, bool readonly, 35 36 void *owner, struct page **pages, 36 37 struct hmm_range **phmm_range); 37 - int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); 38 + bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); 38 39 39 40 #if defined(CONFIG_HMM_MIRROR) 40 41 int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);

+57 -33

drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c

··· 165 165 atomic_read(&adev->gpu_reset_counter); 166 166 } 167 167 168 + /* Check if we need to switch to another set of resources */ 169 + static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id, 170 + struct amdgpu_job *job) 171 + { 172 + return id->gds_base != job->gds_base || 173 + id->gds_size != job->gds_size || 174 + id->gws_base != job->gws_base || 175 + id->gws_size != job->gws_size || 176 + id->oa_base != job->oa_base || 177 + id->oa_size != job->oa_size; 178 + } 179 + 180 + /* Check if the id is compatible with the job */ 181 + static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id, 182 + struct amdgpu_job *job) 183 + { 184 + return id->pd_gpu_addr == job->vm_pd_addr && 185 + !amdgpu_vmid_gds_switch_needed(id, job); 186 + } 187 + 168 188 /** 169 189 * amdgpu_vmid_grab_idle - grab idle VMID 170 190 * ··· 278 258 { 279 259 struct amdgpu_device *adev = ring->adev; 280 260 unsigned vmhub = ring->funcs->vmhub; 261 + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 281 262 uint64_t fence_context = adev->fence_context + ring->idx; 282 263 bool needs_flush = vm->use_cpu_for_update; 283 264 uint64_t updates = amdgpu_vm_tlb_seq(vm); 284 265 int r; 285 266 286 - *id = vm->reserved_vmid[vmhub]; 267 + *id = id_mgr->reserved; 287 268 if ((*id)->owner != vm->immediate.fence_context || 288 - (*id)->pd_gpu_addr != job->vm_pd_addr || 269 + !amdgpu_vmid_compatible(*id, job) || 289 270 (*id)->flushed_updates < updates || 290 271 !(*id)->last_flush || 291 272 ((*id)->last_flush->context != fence_context && ··· 315 294 if (r) 316 295 return r; 317 296 318 - (*id)->flushed_updates = updates; 319 297 job->vm_needs_flush = needs_flush; 298 + job->spm_update_needed = true; 320 299 return 0; 321 300 } 322 301 ··· 354 333 if ((*id)->owner != vm->immediate.fence_context) 355 334 continue; 356 335 357 - if ((*id)->pd_gpu_addr != job->vm_pd_addr) 336 + if (!amdgpu_vmid_compatible(*id, job)) 358 337 continue; 359 338 360 339 if (!(*id)->last_flush || ··· 376 355 if (r) 377 356 return r; 378 357 379 - (*id)->flushed_updates = updates; 380 358 job->vm_needs_flush |= needs_flush; 381 359 return 0; 382 360 } ··· 428 408 if (r) 429 409 goto error; 430 410 431 - id->flushed_updates = amdgpu_vm_tlb_seq(vm); 432 411 job->vm_needs_flush = true; 433 412 } 434 413 435 414 list_move_tail(&id->list, &id_mgr->ids_lru); 436 415 } 437 416 438 - id->pd_gpu_addr = job->vm_pd_addr; 439 - id->owner = vm->immediate.fence_context; 440 - 417 + job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job); 441 418 if (job->vm_needs_flush) { 419 + id->flushed_updates = amdgpu_vm_tlb_seq(vm); 442 420 dma_fence_put(id->last_flush); 443 421 id->last_flush = NULL; 444 422 } 445 423 job->vmid = id - id_mgr->ids; 446 424 job->pasid = vm->pasid; 425 + 426 + id->gds_base = job->gds_base; 427 + id->gds_size = job->gds_size; 428 + id->gws_base = job->gws_base; 429 + id->gws_size = job->gws_size; 430 + id->oa_base = job->oa_base; 431 + id->oa_size = job->oa_size; 432 + id->pd_gpu_addr = job->vm_pd_addr; 433 + id->owner = vm->immediate.fence_context; 434 + 447 435 trace_amdgpu_vm_grab_id(vm, ring, job); 448 436 449 437 error: ··· 463 435 struct amdgpu_vm *vm, 464 436 unsigned vmhub) 465 437 { 466 - struct amdgpu_vmid_mgr *id_mgr; 467 - struct amdgpu_vmid *idle; 468 - int r = 0; 438 + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 469 439 470 - id_mgr = &adev->vm_manager.id_mgr[vmhub]; 471 440 mutex_lock(&id_mgr->lock); 472 441 if (vm->reserved_vmid[vmhub]) 473 442 goto unlock; 474 - if (atomic_inc_return(&id_mgr->reserved_vmid_num) > 475 - AMDGPU_VM_MAX_RESERVED_VMID) { 476 - DRM_ERROR("Over limitation of reserved vmid\n"); 477 - atomic_dec(&id_mgr->reserved_vmid_num); 478 - r = -EINVAL; 479 - goto unlock; 480 - } 481 - /* Select the first entry VMID */ 482 - idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); 483 - list_del_init(&idle->list); 484 - vm->reserved_vmid[vmhub] = idle; 485 - mutex_unlock(&id_mgr->lock); 486 443 487 - return 0; 444 + ++id_mgr->reserved_use_count; 445 + if (!id_mgr->reserved) { 446 + struct amdgpu_vmid *id; 447 + 448 + id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, 449 + list); 450 + /* Remove from normal round robin handling */ 451 + list_del_init(&id->list); 452 + id_mgr->reserved = id; 453 + } 454 + vm->reserved_vmid[vmhub] = true; 455 + 488 456 unlock: 489 457 mutex_unlock(&id_mgr->lock); 490 - return r; 458 + return 0; 491 459 } 492 460 493 461 void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, ··· 493 469 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 494 470 495 471 mutex_lock(&id_mgr->lock); 496 - if (vm->reserved_vmid[vmhub]) { 497 - list_add(&vm->reserved_vmid[vmhub]->list, 498 - &id_mgr->ids_lru); 499 - vm->reserved_vmid[vmhub] = NULL; 500 - atomic_dec(&id_mgr->reserved_vmid_num); 472 + if (vm->reserved_vmid[vmhub] && 473 + !--id_mgr->reserved_use_count) { 474 + /* give the reserved ID back to normal round robin */ 475 + list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); 501 476 } 477 + vm->reserved_vmid[vmhub] = false; 502 478 mutex_unlock(&id_mgr->lock); 503 479 } 504 480 ··· 565 541 566 542 mutex_init(&id_mgr->lock); 567 543 INIT_LIST_HEAD(&id_mgr->ids_lru); 568 - atomic_set(&id_mgr->reserved_vmid_num, 0); 544 + id_mgr->reserved_use_count = 0; 569 545 570 546 /* manage only VMIDs not used by KFD */ 571 547 id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;

+2 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

··· 67 67 unsigned num_ids; 68 68 struct list_head ids_lru; 69 69 struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; 70 - atomic_t reserved_vmid_num; 70 + struct amdgpu_vmid *reserved; 71 + unsigned int reserved_use_count; 71 72 }; 72 73 73 74 int amdgpu_pasid_alloc(unsigned int bits);

+2

drivers/gpu/drm/amd/amdgpu/amdgpu_job.h

··· 53 53 uint32_t preamble_status; 54 54 uint32_t preemption_status; 55 55 bool vm_needs_flush; 56 + bool gds_switch_needed; 57 + bool spm_update_needed; 56 58 uint64_t vm_pd_addr; 57 59 unsigned vmid; 58 60 unsigned pasid;

+22 -18

drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

··· 346 346 * @adev: amdgpu device object 347 347 * @offset: offset of the BO 348 348 * @size: size of the BO 349 - * @domain: where to place it 350 349 * @bo_ptr: used to initialize BOs in structures 351 350 * @cpu_addr: optional CPU address mapping 352 351 * 353 - * Creates a kernel BO at a specific offset in the address space of the domain. 352 + * Creates a kernel BO at a specific offset in VRAM. 354 353 * 355 354 * Returns: 356 355 * 0 on success, negative error code otherwise. 357 356 */ 358 357 int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, 359 - uint64_t offset, uint64_t size, uint32_t domain, 358 + uint64_t offset, uint64_t size, 360 359 struct amdgpu_bo **bo_ptr, void **cpu_addr) 361 360 { 362 361 struct ttm_operation_ctx ctx = { false, false }; ··· 365 366 offset &= PAGE_MASK; 366 367 size = ALIGN(size, PAGE_SIZE); 367 368 368 - r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr, 369 - NULL, cpu_addr); 369 + r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, 370 + AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL, 371 + cpu_addr); 370 372 if (r) 371 373 return r; 372 374 ··· 422 422 if (*bo == NULL) 423 423 return; 424 424 425 + WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend); 426 + 425 427 if (likely(amdgpu_bo_reserve(*bo, true) == 0)) { 426 428 if (cpu_addr) 427 429 amdgpu_bo_kunmap(*bo); ··· 448 446 449 447 /* 450 448 * If GTT is part of requested domains the check must succeed to 451 - * allow fall back to GTT 449 + * allow fall back to GTT. 452 450 */ 453 451 if (domain & AMDGPU_GEM_DOMAIN_GTT) { 454 452 man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); 455 453 456 - if (size < man->size) 454 + if (man && size < man->size) 457 455 return true; 458 - else 459 - goto fail; 460 - } 461 - 462 - if (domain & AMDGPU_GEM_DOMAIN_VRAM) { 456 + else if (!man) 457 + WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); 458 + goto fail; 459 + } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { 463 460 man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); 464 461 465 - if (size < man->size) 462 + if (man && size < man->size) 466 463 return true; 467 - else 468 - goto fail; 464 + goto fail; 469 465 } 470 - 471 466 472 467 /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ 473 468 return true; ··· 580 581 bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; 581 582 582 583 bo->tbo.bdev = &adev->mman.bdev; 583 - amdgpu_bo_placement_from_domain(bo, bp->domain); 584 + if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | 585 + AMDGPU_GEM_DOMAIN_GDS)) 586 + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 587 + else 588 + amdgpu_bo_placement_from_domain(bo, bp->domain); 584 589 if (bp->type == ttm_bo_type_kernel) 585 590 bo->tbo.priority = 1; 586 591 ··· 1509 1506 uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, 1510 1507 uint32_t domain) 1511 1508 { 1512 - if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { 1509 + if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) && 1510 + ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) { 1513 1511 domain = AMDGPU_GEM_DOMAIN_VRAM; 1514 1512 if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD) 1515 1513 domain = AMDGPU_GEM_DOMAIN_GTT;

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_object.h

··· 284 284 u32 domain, struct amdgpu_bo **bo_ptr, 285 285 u64 *gpu_addr, void **cpu_addr); 286 286 int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, 287 - uint64_t offset, uint64_t size, uint32_t domain, 287 + uint64_t offset, uint64_t size, 288 288 struct amdgpu_bo **bo_ptr, void **cpu_addr); 289 289 int amdgpu_bo_create_user(struct amdgpu_device *adev, 290 290 struct amdgpu_bo_param *bp,

+12 -12

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

··· 695 695 return r; 696 696 } 697 697 698 + /* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations 699 + */ 700 + void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, 701 + struct hmm_range *range) 702 + { 703 + struct amdgpu_ttm_tt *gtt = (void *)ttm; 704 + 705 + if (gtt && gtt->userptr && range) 706 + amdgpu_hmm_range_get_pages_done(range); 707 + } 708 + 698 709 /* 699 - * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change 710 + * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change 700 711 * Check if the pages backing this ttm range have been invalidated 701 712 * 702 713 * Returns: true if pages are still valid ··· 725 714 726 715 WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); 727 716 728 - /* 729 - * FIXME: Must always hold notifier_lock for this, and must 730 - * not ignore the return code. 731 - */ 732 717 return !amdgpu_hmm_range_get_pages_done(range); 733 718 } 734 719 #endif ··· 1576 1569 return amdgpu_bo_create_kernel_at(adev, 1577 1570 adev->mman.fw_vram_usage_start_offset, 1578 1571 adev->mman.fw_vram_usage_size, 1579 - AMDGPU_GEM_DOMAIN_VRAM, 1580 1572 &adev->mman.fw_vram_usage_reserved_bo, 1581 1573 &adev->mman.fw_vram_usage_va); 1582 1574 } ··· 1601 1595 return amdgpu_bo_create_kernel_at(adev, 1602 1596 adev->mman.drv_vram_usage_start_offset, 1603 1597 adev->mman.drv_vram_usage_size, 1604 - AMDGPU_GEM_DOMAIN_VRAM, 1605 1598 &adev->mman.drv_vram_usage_reserved_bo, 1606 1599 &adev->mman.drv_vram_usage_va); 1607 1600 } ··· 1681 1676 ret = amdgpu_bo_create_kernel_at(adev, 1682 1677 ctx->c2p_train_data_offset, 1683 1678 ctx->train_data_size, 1684 - AMDGPU_GEM_DOMAIN_VRAM, 1685 1679 &ctx->c2p_bo, 1686 1680 NULL); 1687 1681 if (ret) { ··· 1694 1690 ret = amdgpu_bo_create_kernel_at(adev, 1695 1691 adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, 1696 1692 adev->mman.discovery_tmr_size, 1697 - AMDGPU_GEM_DOMAIN_VRAM, 1698 1693 &adev->mman.discovery_memory, 1699 1694 NULL); 1700 1695 if (ret) { ··· 1794 1791 * avoid display artifacts while transitioning between pre-OS 1795 1792 * and driver. */ 1796 1793 r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, 1797 - AMDGPU_GEM_DOMAIN_VRAM, 1798 1794 &adev->mman.stolen_vga_memory, 1799 1795 NULL); 1800 1796 if (r) 1801 1797 return r; 1802 1798 r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, 1803 1799 adev->mman.stolen_extended_size, 1804 - AMDGPU_GEM_DOMAIN_VRAM, 1805 1800 &adev->mman.stolen_extended_memory, 1806 1801 NULL); 1807 1802 if (r) 1808 1803 return r; 1809 1804 r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, 1810 1805 adev->mman.stolen_reserved_size, 1811 - AMDGPU_GEM_DOMAIN_VRAM, 1812 1806 &adev->mman.stolen_reserved_memory, 1813 1807 NULL); 1814 1808 if (r)

+6

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

··· 159 159 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) 160 160 int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, 161 161 struct hmm_range **range); 162 + void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, 163 + struct hmm_range *range); 162 164 bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, 163 165 struct hmm_range *range); 164 166 #else ··· 169 167 struct hmm_range **range) 170 168 { 171 169 return -EPERM; 170 + } 171 + static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, 172 + struct hmm_range *range) 173 + { 172 174 } 173 175 static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, 174 176 struct hmm_range *range)

-1

drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

··· 395 395 */ 396 396 if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, 397 397 AMDGPU_GPU_PAGE_SIZE, 398 - AMDGPU_GEM_DOMAIN_VRAM, 399 398 &bo, NULL)) 400 399 DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); 401 400

+23 -54

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

··· 484 484 struct amdgpu_device *adev = ring->adev; 485 485 unsigned vmhub = ring->funcs->vmhub; 486 486 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 487 - struct amdgpu_vmid *id; 488 - bool gds_switch_needed; 489 - bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; 490 487 491 488 if (job->vmid == 0) 492 489 return false; 493 - id = &id_mgr->ids[job->vmid]; 494 - gds_switch_needed = ring->funcs->emit_gds_switch && ( 495 - id->gds_base != job->gds_base || 496 - id->gds_size != job->gds_size || 497 - id->gws_base != job->gws_base || 498 - id->gws_size != job->gws_size || 499 - id->oa_base != job->oa_base || 500 - id->oa_size != job->oa_size); 501 490 502 - if (amdgpu_vmid_had_gpu_reset(adev, id)) 491 + if (job->vm_needs_flush || ring->has_compute_vm_bug) 503 492 return true; 504 493 505 - return vm_flush_needed || gds_switch_needed; 494 + if (ring->funcs->emit_gds_switch && job->gds_switch_needed) 495 + return true; 496 + 497 + if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) 498 + return true; 499 + 500 + return false; 506 501 } 507 502 508 503 /** ··· 519 524 unsigned vmhub = ring->funcs->vmhub; 520 525 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 521 526 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; 522 - bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 523 - id->gds_base != job->gds_base || 524 - id->gds_size != job->gds_size || 525 - id->gws_base != job->gws_base || 526 - id->gws_size != job->gws_size || 527 - id->oa_base != job->oa_base || 528 - id->oa_size != job->oa_size); 527 + bool spm_update_needed = job->spm_update_needed; 528 + bool gds_switch_needed = ring->funcs->emit_gds_switch && 529 + job->gds_switch_needed; 529 530 bool vm_flush_needed = job->vm_needs_flush; 530 531 struct dma_fence *fence = NULL; 531 532 bool pasid_mapping_needed = false; 532 533 unsigned patch_offset = 0; 533 - bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL)); 534 534 int r; 535 - 536 - if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid) 537 - adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); 538 535 539 536 if (amdgpu_vmid_had_gpu_reset(adev, id)) { 540 537 gds_switch_needed = true; 541 538 vm_flush_needed = true; 542 539 pasid_mapping_needed = true; 540 + spm_update_needed = true; 543 541 } 544 542 545 543 mutex_lock(&id_mgr->lock); ··· 565 577 if (pasid_mapping_needed) 566 578 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); 567 579 580 + if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) 581 + adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); 582 + 583 + if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && 584 + gds_switch_needed) { 585 + amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, 586 + job->gds_size, job->gws_base, 587 + job->gws_size, job->oa_base, 588 + job->oa_size); 589 + } 590 + 568 591 if (vm_flush_needed || pasid_mapping_needed) { 569 592 r = amdgpu_fence_emit(ring, &fence, NULL, 0); 570 593 if (r) ··· 599 600 mutex_unlock(&id_mgr->lock); 600 601 } 601 602 dma_fence_put(fence); 602 - 603 - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && 604 - gds_switch_needed) { 605 - id->gds_base = job->gds_base; 606 - id->gds_size = job->gds_size; 607 - id->gws_base = job->gws_base; 608 - id->gws_size = job->gws_size; 609 - id->oa_base = job->oa_base; 610 - id->oa_size = job->oa_size; 611 - amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, 612 - job->gds_size, job->gws_base, 613 - job->gws_size, job->oa_base, 614 - job->oa_size); 615 - } 616 603 617 604 if (ring->funcs->patch_cond_exec) 618 605 amdgpu_ring_patch_cond_exec(ring, patch_offset); ··· 2368 2383 union drm_amdgpu_vm *args = data; 2369 2384 struct amdgpu_device *adev = drm_to_adev(dev); 2370 2385 struct amdgpu_fpriv *fpriv = filp->driver_priv; 2371 - long timeout = msecs_to_jiffies(2000); 2372 2386 int r; 2373 2387 2374 2388 switch (args->in.op) { ··· 2379 2395 return r; 2380 2396 break; 2381 2397 case AMDGPU_VM_OP_UNRESERVE_VMID: 2382 - if (amdgpu_sriov_runtime(adev)) 2383 - timeout = 8 * timeout; 2384 - 2385 - /* Wait vm idle to make sure the vmid set in SPM_VMID is 2386 - * not referenced anymore. 2387 - */ 2388 - r = amdgpu_bo_reserve(fpriv->vm.root.bo, true); 2389 - if (r) 2390 - return r; 2391 - 2392 - r = amdgpu_vm_wait_idle(&fpriv->vm, timeout); 2393 - if (r < 0) 2394 - return r; 2395 - 2396 - amdgpu_bo_unreserve(fpriv->vm.root.bo); 2397 2398 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); 2398 2399 break; 2399 2400 default:

+1 -5

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

··· 119 119 /* Reserve 2MB at top/bottom of address space for kernel use */ 120 120 #define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) 121 121 122 - /* max vmids dedicated for process */ 123 - #define AMDGPU_VM_MAX_RESERVED_VMID 1 124 - 125 122 /* See vm_update_mode */ 126 123 #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) 127 124 #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) ··· 295 298 struct dma_fence *last_unlocked; 296 299 297 300 unsigned int pasid; 298 - /* dedicated to vm */ 299 - struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; 301 + bool reserved_vmid[AMDGPU_MAX_VMHUBS]; 300 302 301 303 /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ 302 304 bool use_cpu_for_update;

+2

drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c

··· 238 238 /* Wait for PD/PT moves to be completed */ 239 239 dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL); 240 240 dma_resv_for_each_fence_unlocked(&cursor, fence) { 241 + dma_fence_get(fence); 241 242 r = drm_sched_job_add_dependency(&p->job->base, fence); 242 243 if (r) { 244 + dma_fence_put(fence); 243 245 dma_resv_iter_end(&cursor); 244 246 return r; 245 247 }

+3 -1

drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

··· 1185 1185 struct amdgpu_bo_va_mapping *mapping, 1186 1186 uint64_t *flags) 1187 1187 { 1188 + struct amdgpu_bo *bo = mapping->bo_va->base.bo; 1189 + 1188 1190 *flags &= ~AMDGPU_PTE_EXECUTABLE; 1189 1191 *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; 1190 1192 ··· 1198 1196 *flags &= ~AMDGPU_PTE_VALID; 1199 1197 } 1200 1198 1201 - if (mapping->bo_va->base.bo) 1199 + if (bo && bo->tbo.resource) 1202 1200 gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo, 1203 1201 mapping, flags); 1204 1202 }

+2 -1

drivers/gpu/drm/amd/amdgpu/mes_v11_0.c

··· 1342 1342 { 1343 1343 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1344 1344 1345 - if (!amdgpu_in_reset(adev) && 1345 + /* it's only intended for use in mes_self_test case, not for s0ix and reset */ 1346 + if (!amdgpu_in_reset(adev) && !adev->in_s0ix && 1346 1347 (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))) 1347 1348 amdgpu_mes_self_test(adev); 1348 1349

+1

drivers/gpu/drm/amd/amdgpu/soc21.c

··· 666 666 AMD_CG_SUPPORT_VCN_MGCG | 667 667 AMD_CG_SUPPORT_JPEG_MGCG; 668 668 adev->pg_flags = AMD_PG_SUPPORT_VCN | 669 + AMD_PG_SUPPORT_VCN_DPG | 669 670 AMD_PG_SUPPORT_GFX_PG | 670 671 AMD_PG_SUPPORT_JPEG; 671 672 adev->external_rev_id = adev->rev_id + 0x1;

+15 -9

drivers/gpu/drm/amd/amdkfd/kfd_process.c

··· 689 689 } 690 690 691 691 static void kfd_process_free_gpuvm(struct kgd_mem *mem, 692 - struct kfd_process_device *pdd, void *kptr) 692 + struct kfd_process_device *pdd, void **kptr) 693 693 { 694 694 struct kfd_dev *dev = pdd->dev; 695 695 696 - if (kptr) { 696 + if (kptr && *kptr) { 697 697 amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); 698 - kptr = NULL; 698 + *kptr = NULL; 699 699 } 700 700 701 701 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); ··· 795 795 if (!qpd->ib_kaddr || !qpd->ib_base) 796 796 return; 797 797 798 - kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr); 798 + kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr); 799 799 } 800 800 801 801 struct kfd_process *kfd_create_process(struct file *filep) ··· 1277 1277 if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) 1278 1278 return; 1279 1279 1280 - kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr); 1280 + kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr); 1281 1281 } 1282 1282 1283 1283 void kfd_process_set_trap_handler(struct qcm_process_device *qpd, ··· 1576 1576 p = pdd->process; 1577 1577 dev = pdd->dev; 1578 1578 1579 - ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( 1580 - dev->adev, drm_file, p->pasid, 1581 - &p->kgd_process_info, &p->ef); 1579 + ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file, 1580 + &p->kgd_process_info, 1581 + &p->ef); 1582 1582 if (ret) { 1583 1583 pr_err("Failed to create process VM object\n"); 1584 1584 return ret; ··· 1593 1593 if (ret) 1594 1594 goto err_init_cwsr; 1595 1595 1596 + ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid); 1597 + if (ret) 1598 + goto err_set_pasid; 1599 + 1596 1600 pdd->drm_file = drm_file; 1597 1601 1598 1602 return 0; 1599 1603 1604 + err_set_pasid: 1605 + kfd_process_device_destroy_cwsr_dgpu(pdd); 1600 1606 err_init_cwsr: 1607 + kfd_process_device_destroy_ib_mem(pdd); 1601 1608 err_reserve_ib_mem: 1602 - kfd_process_device_free_bos(pdd); 1603 1609 pdd->drm_priv = NULL; 1604 1610 1605 1611 return ret;

+1

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

··· 1503 1503 case IP_VERSION(3, 0, 1): 1504 1504 case IP_VERSION(3, 1, 2): 1505 1505 case IP_VERSION(3, 1, 3): 1506 + case IP_VERSION(3, 1, 4): 1506 1507 case IP_VERSION(3, 1, 5): 1507 1508 case IP_VERSION(3, 1, 6): 1508 1509 init_data.flags.gpu_vm_support = true;

+4 -4

drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c

··· 55 55 s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A); 56 56 s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A); 57 57 } 58 - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); 58 + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); 59 59 60 60 s = &wm->sets[1]; 61 61 s->wm_set = 1; ··· 65 65 s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B); 66 66 s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B); 67 67 } 68 - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B); 68 + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B); 69 69 70 70 s = &wm->sets[2]; 71 71 s->wm_set = 2; ··· 75 75 s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C); 76 76 s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C); 77 77 } 78 - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C); 78 + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C); 79 79 80 80 s = &wm->sets[3]; 81 81 s->wm_set = 3; ··· 85 85 s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D); 86 86 s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D); 87 87 } 88 - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); 88 + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); 89 89 } 90 90 91 91 void hubbub1_allow_self_refresh_control(struct hubbub *hubbub, bool allow)

+1 -1

drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c

··· 159 159 DTN_INFO_MICRO_SEC(s->pte_meta_urgent); 160 160 DTN_INFO_MICRO_SEC(s->sr_enter); 161 161 DTN_INFO_MICRO_SEC(s->sr_exit); 162 - DTN_INFO_MICRO_SEC(s->dram_clk_chanage); 162 + DTN_INFO_MICRO_SEC(s->dram_clk_change); 163 163 DTN_INFO("\n"); 164 164 } 165 165

+2 -2

drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c

··· 83 83 memset(&wm, 0, sizeof(struct dcn_hubbub_wm)); 84 84 dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm); 85 85 86 - chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n"); 86 + chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_change\n"); 87 87 remaining_buffer -= chars_printed; 88 88 pBuf += chars_printed; 89 89 ··· 98 98 (s->pte_meta_urgent * frac) / ref_clk_mhz / frac, (s->pte_meta_urgent * frac) / ref_clk_mhz % frac, 99 99 (s->sr_enter * frac) / ref_clk_mhz / frac, (s->sr_enter * frac) / ref_clk_mhz % frac, 100 100 (s->sr_exit * frac) / ref_clk_mhz / frac, (s->sr_exit * frac) / ref_clk_mhz % frac, 101 - (s->dram_clk_chanage * frac) / ref_clk_mhz / frac, (s->dram_clk_chanage * frac) / ref_clk_mhz % frac); 101 + (s->dram_clk_change * frac) / ref_clk_mhz / frac, (s->dram_clk_change * frac) / ref_clk_mhz % frac); 102 102 remaining_buffer -= chars_printed; 103 103 pBuf += chars_printed; 104 104 }

+4 -4

drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c

··· 500 500 s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A); 501 501 s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A); 502 502 } 503 - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); 503 + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); 504 504 505 505 s = &wm->sets[1]; 506 506 s->wm_set = 1; ··· 511 511 s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B); 512 512 s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B); 513 513 } 514 - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B); 514 + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B); 515 515 516 516 s = &wm->sets[2]; 517 517 s->wm_set = 2; ··· 522 522 s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C); 523 523 s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C); 524 524 } 525 - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C); 525 + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C); 526 526 527 527 s = &wm->sets[3]; 528 528 s->wm_set = 3; ··· 533 533 s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D); 534 534 s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D); 535 535 } 536 - s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); 536 + s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); 537 537 } 538 538 539 539 void hubbub2_get_dchub_ref_freq(struct hubbub *hubbub,

+4 -4

drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c

··· 635 635 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit); 636 636 637 637 REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 638 - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, &s->dram_clk_chanage); 638 + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, &s->dram_clk_change); 639 639 640 640 s = &wm->sets[1]; 641 641 s->wm_set = 1; ··· 649 649 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit); 650 650 651 651 REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 652 - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, &s->dram_clk_chanage); 652 + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, &s->dram_clk_change); 653 653 654 654 s = &wm->sets[2]; 655 655 s->wm_set = 2; ··· 663 663 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit); 664 664 665 665 REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 666 - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, &s->dram_clk_chanage); 666 + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, &s->dram_clk_change); 667 667 668 668 s = &wm->sets[3]; 669 669 s->wm_set = 3; ··· 677 677 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit); 678 678 679 679 REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, 680 - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_chanage); 680 + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_change); 681 681 } 682 682 683 683 static void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub)

+4 -4

drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c

··· 865 865 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit); 866 866 867 867 REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, 868 - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_chanage); 868 + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_change); 869 869 870 870 REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, 871 871 DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, &s->usr_retrain); ··· 885 885 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit); 886 886 887 887 REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, 888 - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_chanage); 888 + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_change); 889 889 890 890 REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, 891 891 DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, &s->usr_retrain); ··· 905 905 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit); 906 906 907 907 REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, 908 - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_chanage); 908 + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_change); 909 909 910 910 REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C, 911 911 DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C, &s->usr_retrain); ··· 925 925 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit); 926 926 927 927 REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, 928 - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_chanage); 928 + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_change); 929 929 930 930 REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D, 931 931 DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D, &s->usr_retrain);

+1 -1

drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h

··· 46 46 uint32_t pte_meta_urgent; 47 47 uint32_t sr_enter; 48 48 uint32_t sr_exit; 49 - uint32_t dram_clk_chanage; 49 + uint32_t dram_clk_change; 50 50 uint32_t usr_retrain; 51 51 uint32_t fclk_pstate_change; 52 52 };

+1 -1

drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h

··· 522 522 TEMP_HOTSPOT_M, 523 523 TEMP_MEM, 524 524 TEMP_VR_GFX, 525 + TEMP_VR_SOC, 525 526 TEMP_VR_MEM0, 526 527 TEMP_VR_MEM1, 527 - TEMP_VR_SOC, 528 528 TEMP_VR_U, 529 529 TEMP_LIQUID0, 530 530 TEMP_LIQUID1,

+2 -1

drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h

··· 241 241 __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ 242 242 __SMU_DUMMY_MAP(LogGfxOffResidency), \ 243 243 __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired), \ 244 - __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), 244 + __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), \ 245 + __SMU_DUMMY_MAP(AllowGpo), 245 246 246 247 #undef __SMU_DUMMY_MAP 247 248 #define __SMU_DUMMY_MAP(type) SMU_MSG_##type

+4

drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h

··· 28 28 #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF 29 29 #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 30 30 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 31 + #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34 31 32 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 32 33 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 33 34 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 ··· 272 271 int smu_v13_0_init_pptable_microcode(struct smu_context *smu); 273 272 274 273 int smu_v13_0_run_btc(struct smu_context *smu); 274 + 275 + int smu_v13_0_gpo_control(struct smu_context *smu, 276 + bool enablement); 275 277 276 278 int smu_v13_0_deep_sleep_control(struct smu_context *smu, 277 279 bool enablement);

+17

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c

··· 290 290 smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_ALDE; 291 291 break; 292 292 case IP_VERSION(13, 0, 0): 293 + smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0; 294 + break; 293 295 case IP_VERSION(13, 0, 10): 294 296 smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10; 295 297 break; ··· 2178 2176 res = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); 2179 2177 if (res) 2180 2178 dev_err(smu->adev->dev, "RunDcBtc failed!\n"); 2179 + 2180 + return res; 2181 + } 2182 + 2183 + int smu_v13_0_gpo_control(struct smu_context *smu, 2184 + bool enablement) 2185 + { 2186 + int res; 2187 + 2188 + res = smu_cmn_send_smc_msg_with_param(smu, 2189 + SMU_MSG_AllowGpo, 2190 + enablement ? 1 : 0, 2191 + NULL); 2192 + if (res) 2193 + dev_err(smu->adev->dev, "SetGpoAllow %d failed!\n", enablement); 2181 2194 2182 2195 return res; 2183 2196 }

+102 -11

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c

··· 144 144 MSG_MAP(SetNumBadMemoryPagesRetired, PPSMC_MSG_SetNumBadMemoryPagesRetired, 0), 145 145 MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel, 146 146 PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0), 147 + MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), 147 148 }; 148 149 149 150 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { ··· 211 210 FEA_MAP(MEM_TEMP_READ), 212 211 FEA_MAP(ATHUB_MMHUB_PG), 213 212 FEA_MAP(SOC_PCC), 213 + [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, 214 + [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, 214 215 }; 215 216 216 217 static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { ··· 543 540 dpm_table); 544 541 if (ret) 545 542 return ret; 543 + 544 + /* 545 + * Update the reported maximum shader clock to the value 546 + * which can be guarded to be achieved on all cards. This 547 + * is aligned with Window setting. And considering that value 548 + * might be not the peak frequency the card can achieve, it 549 + * is normal some real-time clock frequency can overtake this 550 + * labelled maximum clock frequency(for example in pp_dpm_sclk 551 + * sysfs output). 552 + */ 553 + if (skutable->DriverReportedClocks.GameClockAc && 554 + (dpm_table->dpm_levels[dpm_table->count - 1].value > 555 + skutable->DriverReportedClocks.GameClockAc)) { 556 + dpm_table->dpm_levels[dpm_table->count - 1].value = 557 + skutable->DriverReportedClocks.GameClockAc; 558 + dpm_table->max = skutable->DriverReportedClocks.GameClockAc; 559 + } 546 560 } else { 547 561 dpm_table->count = 1; 548 562 dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; ··· 820 800 } 821 801 822 802 return ret; 803 + } 804 + 805 + static int smu_v13_0_0_get_dpm_ultimate_freq(struct smu_context *smu, 806 + enum smu_clk_type clk_type, 807 + uint32_t *min, 808 + uint32_t *max) 809 + { 810 + struct smu_13_0_dpm_context *dpm_context = 811 + smu->smu_dpm.dpm_context; 812 + struct smu_13_0_dpm_table *dpm_table; 813 + 814 + switch (clk_type) { 815 + case SMU_MCLK: 816 + case SMU_UCLK: 817 + /* uclk dpm table */ 818 + dpm_table = &dpm_context->dpm_tables.uclk_table; 819 + break; 820 + case SMU_GFXCLK: 821 + case SMU_SCLK: 822 + /* gfxclk dpm table */ 823 + dpm_table = &dpm_context->dpm_tables.gfx_table; 824 + break; 825 + case SMU_SOCCLK: 826 + /* socclk dpm table */ 827 + dpm_table = &dpm_context->dpm_tables.soc_table; 828 + break; 829 + case SMU_FCLK: 830 + /* fclk dpm table */ 831 + dpm_table = &dpm_context->dpm_tables.fclk_table; 832 + break; 833 + case SMU_VCLK: 834 + case SMU_VCLK1: 835 + /* vclk dpm table */ 836 + dpm_table = &dpm_context->dpm_tables.vclk_table; 837 + break; 838 + case SMU_DCLK: 839 + case SMU_DCLK1: 840 + /* dclk dpm table */ 841 + dpm_table = &dpm_context->dpm_tables.dclk_table; 842 + break; 843 + default: 844 + dev_err(smu->adev->dev, "Unsupported clock type!\n"); 845 + return -EINVAL; 846 + } 847 + 848 + if (min) 849 + *min = dpm_table->min; 850 + if (max) 851 + *max = dpm_table->max; 852 + 853 + return 0; 823 854 } 824 855 825 856 static int smu_v13_0_0_read_sensor(struct smu_context *smu, ··· 1375 1304 &dpm_context->dpm_tables.fclk_table; 1376 1305 struct smu_umd_pstate_table *pstate_table = 1377 1306 &smu->pstate_table; 1307 + struct smu_table_context *table_context = &smu->smu_table; 1308 + PPTable_t *pptable = table_context->driver_pptable; 1309 + DriverReportedClocks_t driver_clocks = 1310 + pptable->SkuTable.DriverReportedClocks; 1378 1311 1379 1312 pstate_table->gfxclk_pstate.min = gfx_table->min; 1380 - pstate_table->gfxclk_pstate.peak = gfx_table->max; 1313 + if (driver_clocks.GameClockAc && 1314 + (driver_clocks.GameClockAc < gfx_table->max)) 1315 + pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc; 1316 + else 1317 + pstate_table->gfxclk_pstate.peak = gfx_table->max; 1381 1318 1382 1319 pstate_table->uclk_pstate.min = mem_table->min; 1383 1320 pstate_table->uclk_pstate.peak = mem_table->max; ··· 1402 1323 pstate_table->fclk_pstate.min = fclk_table->min; 1403 1324 pstate_table->fclk_pstate.peak = fclk_table->max; 1404 1325 1405 - /* 1406 - * For now, just use the mininum clock frequency. 1407 - * TODO: update them when the real pstate settings available 1408 - */ 1409 - pstate_table->gfxclk_pstate.standard = gfx_table->min; 1410 - pstate_table->uclk_pstate.standard = mem_table->min; 1326 + if (driver_clocks.BaseClockAc && 1327 + driver_clocks.BaseClockAc < gfx_table->max) 1328 + pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc; 1329 + else 1330 + pstate_table->gfxclk_pstate.standard = gfx_table->max; 1331 + pstate_table->uclk_pstate.standard = mem_table->max; 1411 1332 pstate_table->socclk_pstate.standard = soc_table->min; 1412 1333 pstate_table->vclk_pstate.standard = vclk_table->min; 1413 1334 pstate_table->dclk_pstate.standard = dclk_table->min; ··· 1441 1362 static int smu_v13_0_0_get_fan_speed_pwm(struct smu_context *smu, 1442 1363 uint32_t *speed) 1443 1364 { 1365 + int ret; 1366 + 1444 1367 if (!speed) 1445 1368 return -EINVAL; 1446 1369 1447 - return smu_v13_0_0_get_smu_metrics_data(smu, 1448 - METRICS_CURR_FANPWM, 1449 - speed); 1370 + ret = smu_v13_0_0_get_smu_metrics_data(smu, 1371 + METRICS_CURR_FANPWM, 1372 + speed); 1373 + if (ret) { 1374 + dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!"); 1375 + return ret; 1376 + } 1377 + 1378 + /* Convert the PMFW output which is in percent to pwm(255) based */ 1379 + *speed = MIN(*speed * 255 / 100, 255); 1380 + 1381 + return 0; 1450 1382 } 1451 1383 1452 1384 static int smu_v13_0_0_get_fan_speed_rpm(struct smu_context *smu, ··· 1989 1899 .get_enabled_mask = smu_cmn_get_enabled_mask, 1990 1900 .dpm_set_vcn_enable = smu_v13_0_set_vcn_enable, 1991 1901 .dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable, 1992 - .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq, 1902 + .get_dpm_ultimate_freq = smu_v13_0_0_get_dpm_ultimate_freq, 1993 1903 .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, 1994 1904 .read_sensor = smu_v13_0_0_read_sensor, 1995 1905 .feature_is_enabled = smu_cmn_feature_is_enabled, ··· 2037 1947 .set_df_cstate = smu_v13_0_0_set_df_cstate, 2038 1948 .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, 2039 1949 .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, 1950 + .gpo_control = smu_v13_0_gpo_control, 2040 1951 }; 2041 1952 2042 1953 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)

+34 -8

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c

··· 123 123 MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), 124 124 MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), 125 125 MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), 126 + MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), 126 127 }; 127 128 128 129 static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { ··· 190 189 FEA_MAP(MEM_TEMP_READ), 191 190 FEA_MAP(ATHUB_MMHUB_PG), 192 191 FEA_MAP(SOC_PCC), 192 + [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, 193 + [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, 193 194 }; 194 195 195 196 static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { ··· 1363 1360 static int smu_v13_0_7_get_fan_speed_pwm(struct smu_context *smu, 1364 1361 uint32_t *speed) 1365 1362 { 1363 + int ret; 1364 + 1366 1365 if (!speed) 1367 1366 return -EINVAL; 1368 1367 1369 - return smu_v13_0_7_get_smu_metrics_data(smu, 1370 - METRICS_CURR_FANPWM, 1371 - speed); 1368 + ret = smu_v13_0_7_get_smu_metrics_data(smu, 1369 + METRICS_CURR_FANPWM, 1370 + speed); 1371 + if (ret) { 1372 + dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!"); 1373 + return ret; 1374 + } 1375 + 1376 + /* Convert the PMFW output which is in percent to pwm(255) based */ 1377 + *speed = MIN(*speed * 255 / 100, 255); 1378 + 1379 + return 0; 1372 1380 } 1373 1381 1374 1382 static int smu_v13_0_7_get_fan_speed_rpm(struct smu_context *smu, ··· 1451 1437 1452 1438 static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf) 1453 1439 { 1454 - DpmActivityMonitorCoeffIntExternal_t activity_monitor_external[PP_SMC_POWER_PROFILE_COUNT]; 1440 + DpmActivityMonitorCoeffIntExternal_t *activity_monitor_external; 1455 1441 uint32_t i, j, size = 0; 1456 1442 int16_t workload_type = 0; 1457 1443 int result = 0; 1458 1444 1459 1445 if (!buf) 1460 1446 return -EINVAL; 1447 + 1448 + activity_monitor_external = kcalloc(PP_SMC_POWER_PROFILE_COUNT, 1449 + sizeof(*activity_monitor_external), 1450 + GFP_KERNEL); 1451 + if (!activity_monitor_external) 1452 + return -ENOMEM; 1461 1453 1462 1454 size += sysfs_emit_at(buf, size, " "); 1463 1455 for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++) ··· 1477 1457 workload_type = smu_cmn_to_asic_specific_index(smu, 1478 1458 CMN2ASIC_MAPPING_WORKLOAD, 1479 1459 i); 1480 - if (workload_type < 0) 1481 - return -EINVAL; 1460 + if (workload_type < 0) { 1461 + result = -EINVAL; 1462 + goto out; 1463 + } 1482 1464 1483 1465 result = smu_cmn_update_table(smu, 1484 1466 SMU_TABLE_ACTIVITY_MONITOR_COEFF, workload_type, 1485 1467 (void *)(&activity_monitor_external[i]), false); 1486 1468 if (result) { 1487 1469 dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); 1488 - return result; 1470 + goto out; 1489 1471 } 1490 1472 } 1491 1473 ··· 1515 1493 PRINT_DPM_MONITOR(Fclk_BoosterFreq); 1516 1494 #undef PRINT_DPM_MONITOR 1517 1495 1518 - return size; 1496 + result = size; 1497 + out: 1498 + kfree(activity_monitor_external); 1499 + return result; 1519 1500 } 1520 1501 1521 1502 static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) ··· 1713 1688 .mode1_reset = smu_v13_0_mode1_reset, 1714 1689 .set_mp1_state = smu_v13_0_7_set_mp1_state, 1715 1690 .set_df_cstate = smu_v13_0_7_set_df_cstate, 1691 + .gpo_control = smu_v13_0_gpo_control, 1716 1692 }; 1717 1693 1718 1694 void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)

+2 -2

drivers/gpu/drm/i915/display/g4x_dp.c

··· 673 673 intel_dp_pcon_dsc_configure(intel_dp, pipe_config); 674 674 intel_dp_start_link_train(intel_dp, pipe_config); 675 675 intel_dp_stop_link_train(intel_dp, pipe_config); 676 - 677 - intel_audio_codec_enable(encoder, pipe_config, conn_state); 678 676 } 679 677 680 678 static void g4x_enable_dp(struct intel_atomic_state *state, ··· 681 683 const struct drm_connector_state *conn_state) 682 684 { 683 685 intel_enable_dp(state, encoder, pipe_config, conn_state); 686 + intel_audio_codec_enable(encoder, pipe_config, conn_state); 684 687 intel_edp_backlight_on(pipe_config, conn_state); 685 688 } 686 689 ··· 690 691 const struct intel_crtc_state *pipe_config, 691 692 const struct drm_connector_state *conn_state) 692 693 { 694 + intel_audio_codec_enable(encoder, pipe_config, conn_state); 693 695 intel_edp_backlight_on(pipe_config, conn_state); 694 696 } 695 697

+19 -6

drivers/gpu/drm/i915/display/g4x_hdmi.c

··· 157 157 &pipe_config->infoframes.hdmi); 158 158 } 159 159 160 - static void g4x_enable_hdmi(struct intel_atomic_state *state, 161 - struct intel_encoder *encoder, 162 - const struct intel_crtc_state *pipe_config, 163 - const struct drm_connector_state *conn_state) 160 + static void g4x_hdmi_enable_port(struct intel_encoder *encoder, 161 + const struct intel_crtc_state *pipe_config) 164 162 { 165 163 struct drm_device *dev = encoder->base.dev; 166 164 struct drm_i915_private *dev_priv = to_i915(dev); ··· 173 175 174 176 intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp); 175 177 intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg); 178 + } 179 + 180 + static void g4x_enable_hdmi(struct intel_atomic_state *state, 181 + struct intel_encoder *encoder, 182 + const struct intel_crtc_state *pipe_config, 183 + const struct drm_connector_state *conn_state) 184 + { 185 + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 186 + 187 + g4x_hdmi_enable_port(encoder, pipe_config); 176 188 177 189 drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio && 178 190 !pipe_config->has_hdmi_sink); ··· 302 294 const struct intel_crtc_state *pipe_config, 303 295 const struct drm_connector_state *conn_state) 304 296 { 297 + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 298 + 299 + drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio && 300 + !pipe_config->has_hdmi_sink); 301 + intel_audio_codec_enable(encoder, pipe_config, conn_state); 305 302 } 306 303 307 304 static void intel_disable_hdmi(struct intel_atomic_state *state, ··· 428 415 pipe_config->has_infoframe, 429 416 pipe_config, conn_state); 430 417 431 - g4x_enable_hdmi(state, encoder, pipe_config, conn_state); 418 + g4x_hdmi_enable_port(encoder, pipe_config); 432 419 433 420 vlv_wait_port_ready(dev_priv, dig_port, 0x0); 434 421 } ··· 505 492 pipe_config->has_infoframe, 506 493 pipe_config, conn_state); 507 494 508 - g4x_enable_hdmi(state, encoder, pipe_config, conn_state); 495 + g4x_hdmi_enable_port(encoder, pipe_config); 509 496 510 497 vlv_wait_port_ready(dev_priv, dig_port, 0x0); 511 498

-59

drivers/gpu/drm/i915/display/intel_dp.c

··· 3679 3679 } 3680 3680 } 3681 3681 3682 - static void 3683 - intel_dp_autotest_phy_ddi_disable(struct intel_dp *intel_dp, 3684 - const struct intel_crtc_state *crtc_state) 3685 - { 3686 - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); 3687 - struct drm_device *dev = dig_port->base.base.dev; 3688 - struct drm_i915_private *dev_priv = to_i915(dev); 3689 - struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); 3690 - enum pipe pipe = crtc->pipe; 3691 - u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value; 3692 - 3693 - trans_ddi_func_ctl_value = intel_de_read(dev_priv, 3694 - TRANS_DDI_FUNC_CTL(pipe)); 3695 - trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe)); 3696 - dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe)); 3697 - 3698 - trans_ddi_func_ctl_value &= ~(TRANS_DDI_FUNC_ENABLE | 3699 - TGL_TRANS_DDI_PORT_MASK); 3700 - trans_conf_value &= ~PIPECONF_ENABLE; 3701 - dp_tp_ctl_value &= ~DP_TP_CTL_ENABLE; 3702 - 3703 - intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value); 3704 - intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe), 3705 - trans_ddi_func_ctl_value); 3706 - intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value); 3707 - } 3708 - 3709 - static void 3710 - intel_dp_autotest_phy_ddi_enable(struct intel_dp *intel_dp, 3711 - const struct intel_crtc_state *crtc_state) 3712 - { 3713 - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); 3714 - struct drm_device *dev = dig_port->base.base.dev; 3715 - struct drm_i915_private *dev_priv = to_i915(dev); 3716 - enum port port = dig_port->base.port; 3717 - struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); 3718 - enum pipe pipe = crtc->pipe; 3719 - u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value; 3720 - 3721 - trans_ddi_func_ctl_value = intel_de_read(dev_priv, 3722 - TRANS_DDI_FUNC_CTL(pipe)); 3723 - trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe)); 3724 - dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe)); 3725 - 3726 - trans_ddi_func_ctl_value |= TRANS_DDI_FUNC_ENABLE | 3727 - TGL_TRANS_DDI_SELECT_PORT(port); 3728 - trans_conf_value |= PIPECONF_ENABLE; 3729 - dp_tp_ctl_value |= DP_TP_CTL_ENABLE; 3730 - 3731 - intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value); 3732 - intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value); 3733 - intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe), 3734 - trans_ddi_func_ctl_value); 3735 - } 3736 - 3737 3682 static void intel_dp_process_phy_request(struct intel_dp *intel_dp, 3738 3683 const struct intel_crtc_state *crtc_state) 3739 3684 { ··· 3697 3752 intel_dp_get_adjust_train(intel_dp, crtc_state, DP_PHY_DPRX, 3698 3753 link_status); 3699 3754 3700 - intel_dp_autotest_phy_ddi_disable(intel_dp, crtc_state); 3701 - 3702 3755 intel_dp_set_signal_levels(intel_dp, crtc_state, DP_PHY_DPRX); 3703 3756 3704 3757 intel_dp_phy_pattern_update(intel_dp, crtc_state); 3705 - 3706 - intel_dp_autotest_phy_ddi_enable(intel_dp, crtc_state); 3707 3758 3708 3759 drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET, 3709 3760 intel_dp->train_set, crtc_state->lane_count);

+3

drivers/gpu/drm/i915/gem/i915_gem_object.c

··· 785 785 if (!HAS_FLAT_CCS(to_i915(obj->base.dev))) 786 786 return false; 787 787 788 + if (obj->flags & I915_BO_ALLOC_CCS_AUX) 789 + return true; 790 + 788 791 for (i = 0; i < obj->mm.n_placements; i++) { 789 792 /* Compression is not allowed for the objects with smem placement */ 790 793 if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM)

+6 -4

drivers/gpu/drm/i915/gem/i915_gem_object_types.h

··· 327 327 * dealing with userspace objects the CPU fault handler is free to ignore this. 328 328 */ 329 329 #define I915_BO_ALLOC_GPU_ONLY BIT(6) 330 + #define I915_BO_ALLOC_CCS_AUX BIT(7) 330 331 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ 331 332 I915_BO_ALLOC_VOLATILE | \ 332 333 I915_BO_ALLOC_CPU_CLEAR | \ 333 334 I915_BO_ALLOC_USER | \ 334 335 I915_BO_ALLOC_PM_VOLATILE | \ 335 336 I915_BO_ALLOC_PM_EARLY | \ 336 - I915_BO_ALLOC_GPU_ONLY) 337 - #define I915_BO_READONLY BIT(7) 338 - #define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ 339 - #define I915_BO_PROTECTED BIT(9) 337 + I915_BO_ALLOC_GPU_ONLY | \ 338 + I915_BO_ALLOC_CCS_AUX) 339 + #define I915_BO_READONLY BIT(8) 340 + #define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ 341 + #define I915_BO_PROTECTED BIT(10) 340 342 /** 341 343 * @mem_flags - Mutable placement-related flags 342 344 *

+17 -1

drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c

··· 50 50 container_of(bo->bdev, typeof(*i915), bdev); 51 51 struct drm_i915_gem_object *backup; 52 52 struct ttm_operation_ctx ctx = {}; 53 + unsigned int flags; 53 54 int err = 0; 54 55 55 56 if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup) ··· 66 65 if (obj->flags & I915_BO_ALLOC_PM_VOLATILE) 67 66 return 0; 68 67 69 - backup = i915_gem_object_create_shmem(i915, obj->base.size); 68 + /* 69 + * It seems that we might have some framebuffers still pinned at this 70 + * stage, but for such objects we might also need to deal with the CCS 71 + * aux state. Make sure we force the save/restore of the CCS state, 72 + * otherwise we might observe display corruption, when returning from 73 + * suspend. 74 + */ 75 + flags = 0; 76 + if (i915_gem_object_needs_ccs_pages(obj)) { 77 + WARN_ON_ONCE(!i915_gem_object_is_framebuffer(obj)); 78 + WARN_ON_ONCE(!pm_apply->allow_gpu); 79 + 80 + flags = I915_BO_ALLOC_CCS_AUX; 81 + } 82 + backup = i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], 83 + obj->base.size, 0, flags); 70 84 if (IS_ERR(backup)) 71 85 return PTR_ERR(backup); 72 86

+1 -1

drivers/gpu/drm/i915/gt/intel_gt_mcr.c

··· 702 702 } 703 703 704 704 /** 705 - * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state 705 + * intel_gt_mcr_wait_for_reg - wait until MCR register matches expected state 706 706 * @gt: GT structure 707 707 * @reg: the register to read 708 708 * @mask: mask to apply to register value

+41 -12

drivers/gpu/drm/i915/gt/intel_migrate.c

··· 342 342 return 0; 343 343 } 344 344 345 + static int max_pte_pkt_size(struct i915_request *rq, int pkt) 346 + { 347 + struct intel_ring *ring = rq->ring; 348 + 349 + pkt = min_t(int, pkt, (ring->space - rq->reserved_space) / sizeof(u32) + 5); 350 + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); 351 + 352 + return pkt; 353 + } 354 + 345 355 static int emit_pte(struct i915_request *rq, 346 356 struct sgt_dma *it, 347 357 enum i915_cache_level cache_level, ··· 398 388 return PTR_ERR(cs); 399 389 400 390 /* Pack as many PTE updates as possible into a single MI command */ 401 - pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5); 402 - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); 391 + pkt = max_pte_pkt_size(rq, dword_length); 403 392 404 393 hdr = cs; 405 394 *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ ··· 431 422 } 432 423 } 433 424 434 - pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5); 435 - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); 425 + pkt = max_pte_pkt_size(rq, dword_rem); 436 426 437 427 hdr = cs; 438 428 *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); ··· 837 829 if (err) 838 830 goto out_rq; 839 831 840 - /* 841 - * While we can't always restore/manage the CCS state, 842 - * we still need to ensure we don't leak the CCS state 843 - * from the previous user, so make sure we overwrite it 844 - * with something. 845 - */ 846 - err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS, 847 - dst_offset, DIRECT_ACCESS, len); 832 + if (src_is_lmem) { 833 + /* 834 + * If the src is already in lmem, then we must 835 + * be doing an lmem -> lmem transfer, and so 836 + * should be safe to directly copy the CCS 837 + * state. In this case we have either 838 + * initialised the CCS aux state when first 839 + * clearing the pages (since it is already 840 + * allocated in lmem), or the user has 841 + * potentially populated it, in which case we 842 + * need to copy the CCS state as-is. 843 + */ 844 + err = emit_copy_ccs(rq, 845 + dst_offset, INDIRECT_ACCESS, 846 + src_offset, INDIRECT_ACCESS, 847 + len); 848 + } else { 849 + /* 850 + * While we can't always restore/manage the CCS 851 + * state, we still need to ensure we don't leak 852 + * the CCS state from the previous user, so make 853 + * sure we overwrite it with something. 854 + */ 855 + err = emit_copy_ccs(rq, 856 + dst_offset, INDIRECT_ACCESS, 857 + dst_offset, DIRECT_ACCESS, 858 + len); 859 + } 860 + 848 861 if (err) 849 862 goto out_rq; 850 863

+5 -1

drivers/gpu/drm/i915/i915_perf.c

··· 1383 1383 u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4; 1384 1384 u32 *state = ce->lrc_reg_state; 1385 1385 1386 + if (drm_WARN_ON(&ce->engine->i915->drm, !state)) 1387 + return U32_MAX; 1388 + 1386 1389 for (offset = 0; offset < len; ) { 1387 1390 if (IS_MI_LRI_CMD(state[offset])) { 1388 1391 /* ··· 1450 1447 if (IS_ERR(ce)) 1451 1448 return PTR_ERR(ce); 1452 1449 1453 - if (engine_supports_mi_query(stream->engine)) { 1450 + if (engine_supports_mi_query(stream->engine) && 1451 + HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) { 1454 1452 /* 1455 1453 * We are enabling perf query here. If we don't find the context 1456 1454 * offset here, just return an error.

+2 -2

drivers/gpu/drm/i915/intel_uncore.c

··· 824 824 } 825 825 826 826 /** 827 - * intel_uncore_forcewake_put__locked - grab forcewake domain references 827 + * intel_uncore_forcewake_put__locked - release forcewake domain references 828 828 * @uncore: the intel_uncore structure 829 - * @fw_domains: forcewake domains to get reference on 829 + * @fw_domains: forcewake domains to put references 830 830 * 831 831 * See intel_uncore_forcewake_put(). This variant places the onus 832 832 * on the caller to explicitly handle the dev_priv->uncore.lock spinlock.

Configure Feed

Configure Feed