Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-fixes-2021-12-31' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
"This is a bit bigger than I'd like, however it has two weeks of amdgpu
fixes in it, since they missed last week, which was very small.

The nouveau regression is probably the biggest fix in here, and it
needs to go into 5.15 as well, two i915 fixes, and then a scattering
of amdgpu fixes. The biggest fix in there is for a fencing NULL
pointer dereference, the rest are pretty minor.

For the misc team, I've pulled the two misc fixes manually since I'm
not sure what is happening at this time of year!

The amdgpu maintainers have the outstanding runpm regression to fix
still, they are just working through the last bits of it now.

Summary:

nouveau:
- fencing regression fix

i915:
- Fix possible uninitialized variable
- Fix composite fence seqno icrement on each fence creation

amdgpu:
- Fencing fix
- XGMI fix
- VCN regression fix
- IP discovery regression fixes
- Fix runpm documentation
- Suspend/resume fixes
- Yellow Carp display fixes
- MCLK power management fix
- dma-buf fix"

* tag 'drm-fixes-2021-12-31' of git://anongit.freedesktop.org/drm/drm:
drm/amd/display: Changed pipe split policy to allow for multi-display pipe split
drm/amd/display: Fix USB4 null pointer dereference in update_psp_stream_config
drm/amd/display: Set optimize_pwr_state for DCN31
drm/amd/display: Send s0i2_rdy in stream_count == 0 optimization
drm/amd/display: Added power down for DCN10
drm/amd/display: fix B0 TMDS deepcolor no dislay issue
drm/amdgpu: no DC support for headless chips
drm/amdgpu: put SMU into proper state on runpm suspending for BOCO capable platform
drm/amdgpu: always reset the asic in suspend (v2)
drm/amd/pm: skip setting gfx cgpg in the s0ix suspend-resume
drm/i915: Increment composite fence seqno
drm/i915: Fix possible uninitialized variable in parallel extension
drm/amdgpu: fix runpm documentation
drm/nouveau: wait for the exclusive fence after the shared ones v2
drm/amdgpu: add support for IP discovery gc_info table v2
drm/amdgpu: When the VCN(1.0) block is suspended, powergating is explicitly enabled
drm/amd/pm: Fix xgmi link control on aldebaran
drm/amdgpu: introduce new amdgpu_fence object to indicate the job embedded fence
drm/amdgpu: fix dropped backing store handling in amdgpu_dma_buf_move_notify

+331 -127
+8 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 3166 3166 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 3167 3167 { 3168 3168 switch (asic_type) { 3169 + #ifdef CONFIG_DRM_AMDGPU_SI 3170 + case CHIP_HAINAN: 3171 + #endif 3172 + case CHIP_TOPAZ: 3173 + /* chips with no display hardware */ 3174 + return false; 3169 3175 #if defined(CONFIG_DRM_AMD_DC) 3170 3176 case CHIP_TAHITI: 3171 3177 case CHIP_PITCAIRN: ··· 4467 4461 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 4468 4462 struct amdgpu_reset_context *reset_context) 4469 4463 { 4470 - int i, j, r = 0; 4464 + int i, r = 0; 4471 4465 struct amdgpu_job *job = NULL; 4472 4466 bool need_full_reset = 4473 4467 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); ··· 4489 4483 4490 4484 /*clear job fence from fence drv to avoid force_completion 4491 4485 *leave NULL and vm flush fence in fence drv */ 4492 - for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) { 4493 - struct dma_fence *old, **ptr; 4486 + amdgpu_fence_driver_clear_job_fences(ring); 4494 4487 4495 - ptr = &ring->fence_drv.fences[j]; 4496 - old = rcu_dereference_protected(*ptr, 1); 4497 - if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) { 4498 - RCU_INIT_POINTER(*ptr, NULL); 4499 - } 4500 - } 4501 4488 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 4502 4489 amdgpu_fence_driver_force_completion(ring); 4503 4490 }
+54 -22
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
··· 526 526 } 527 527 } 528 528 529 + union gc_info { 530 + struct gc_info_v1_0 v1; 531 + struct gc_info_v2_0 v2; 532 + }; 533 + 529 534 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) 530 535 { 531 536 struct binary_header *bhdr; 532 - struct gc_info_v1_0 *gc_info; 537 + union gc_info *gc_info; 533 538 534 539 if (!adev->mman.discovery_bin) { 535 540 DRM_ERROR("ip discovery uninitialized\n"); ··· 542 537 } 543 538 544 539 bhdr = (struct binary_header *)adev->mman.discovery_bin; 545 - gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin + 540 + gc_info = (union gc_info *)(adev->mman.discovery_bin + 546 541 le16_to_cpu(bhdr->table_list[GC].offset)); 547 - 548 - adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); 549 - adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + 550 - le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); 551 - adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); 552 - adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); 553 - adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); 554 - adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); 555 - adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); 556 - adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); 557 - adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); 558 - adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); 559 - adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); 560 - adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); 561 - adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); 562 - adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); 563 - adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / 564 - le32_to_cpu(gc_info->gc_num_sa_per_se); 565 - adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); 566 - 542 + switch (gc_info->v1.header.version_major) { 543 + case 1: 544 + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se); 545 + adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) + 546 + le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa)); 547 + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se); 548 + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se); 549 + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c); 550 + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs); 551 + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds); 552 + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth); 553 + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth); 554 + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer); 555 + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size); 556 + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd); 557 + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu); 558 + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size); 559 + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) / 560 + le32_to_cpu(gc_info->v1.gc_num_sa_per_se); 561 + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc); 562 + break; 563 + case 2: 564 + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); 565 + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh); 566 + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se); 567 + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se); 568 + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs); 569 + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs); 570 + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds); 571 + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth); 572 + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth); 573 + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer); 574 + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size); 575 + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd); 576 + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu); 577 + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size); 578 + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / 579 + le32_to_cpu(gc_info->v2.gc_num_sh_per_se); 580 + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); 581 + break; 582 + default: 583 + dev_err(adev->dev, 584 + "Unhandled GC info table %d.%d\n", 585 + gc_info->v1.header.version_major, 586 + gc_info->v1.header.version_minor); 587 + return -EINVAL; 588 + } 567 589 return 0; 568 590 } 569 591
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
··· 384 384 struct amdgpu_vm_bo_base *bo_base; 385 385 int r; 386 386 387 - if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM) 387 + if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) 388 388 return; 389 389 390 390 r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+23 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 328 328 329 329 /** 330 330 * DOC: runpm (int) 331 - * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down 332 - * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. 331 + * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down 332 + * the dGPUs when they are idle if supported. The default is -1 (auto enable). 333 + * Setting the value to 0 disables this functionality. 333 334 */ 334 - MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)"); 335 + MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)"); 335 336 module_param_named(runpm, amdgpu_runtime_pm, int, 0444); 336 337 337 338 /** ··· 2154 2153 adev->in_s3 = true; 2155 2154 r = amdgpu_device_suspend(drm_dev, true); 2156 2155 adev->in_s3 = false; 2157 - 2156 + if (r) 2157 + return r; 2158 + if (!adev->in_s0ix) 2159 + r = amdgpu_asic_reset(adev); 2158 2160 return r; 2159 2161 } 2160 2162 ··· 2238 2234 if (amdgpu_device_supports_px(drm_dev)) 2239 2235 drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 2240 2236 2237 + /* 2238 + * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some 2239 + * proper cleanups and put itself into a state ready for PNP. That 2240 + * can address some random resuming failure observed on BOCO capable 2241 + * platforms. 2242 + * TODO: this may be also needed for PX capable platform. 2243 + */ 2244 + if (amdgpu_device_supports_boco(drm_dev)) 2245 + adev->mp1_state = PP_MP1_STATE_UNLOAD; 2246 + 2241 2247 ret = amdgpu_device_suspend(drm_dev, false); 2242 2248 if (ret) { 2243 2249 adev->in_runpm = false; 2250 + if (amdgpu_device_supports_boco(drm_dev)) 2251 + adev->mp1_state = PP_MP1_STATE_NONE; 2244 2252 return ret; 2245 2253 } 2254 + 2255 + if (amdgpu_device_supports_boco(drm_dev)) 2256 + adev->mp1_state = PP_MP1_STATE_NONE; 2246 2257 2247 2258 if (amdgpu_device_supports_px(drm_dev)) { 2248 2259 /* Only need to handle PCI state in the driver for ATPX
+87 -39
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
··· 77 77 * Cast helper 78 78 */ 79 79 static const struct dma_fence_ops amdgpu_fence_ops; 80 + static const struct dma_fence_ops amdgpu_job_fence_ops; 80 81 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) 81 82 { 82 83 struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); 83 84 84 - if (__f->base.ops == &amdgpu_fence_ops) 85 + if (__f->base.ops == &amdgpu_fence_ops || 86 + __f->base.ops == &amdgpu_job_fence_ops) 85 87 return __f; 86 88 87 89 return NULL; ··· 160 158 } 161 159 162 160 seq = ++ring->fence_drv.sync_seq; 163 - if (job != NULL && job->job_run_counter) { 161 + if (job && job->job_run_counter) { 164 162 /* reinit seq for resubmitted jobs */ 165 163 fence->seqno = seq; 166 164 } else { 167 - dma_fence_init(fence, &amdgpu_fence_ops, 168 - &ring->fence_drv.lock, 169 - adev->fence_context + ring->idx, 170 - seq); 171 - } 172 - 173 - if (job != NULL) { 174 - /* mark this fence has a parent job */ 175 - set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags); 165 + if (job) 166 + dma_fence_init(fence, &amdgpu_job_fence_ops, 167 + &ring->fence_drv.lock, 168 + adev->fence_context + ring->idx, seq); 169 + else 170 + dma_fence_init(fence, &amdgpu_fence_ops, 171 + &ring->fence_drv.lock, 172 + adev->fence_context + ring->idx, seq); 176 173 } 177 174 178 175 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, ··· 622 621 } 623 622 624 623 /** 624 + * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring 625 + * 626 + * @ring: fence of the ring to be cleared 627 + * 628 + */ 629 + void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) 630 + { 631 + int i; 632 + struct dma_fence *old, **ptr; 633 + 634 + for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { 635 + ptr = &ring->fence_drv.fences[i]; 636 + old = rcu_dereference_protected(*ptr, 1); 637 + if (old && old->ops == &amdgpu_job_fence_ops) 638 + RCU_INIT_POINTER(*ptr, NULL); 639 + } 640 + } 641 + 642 + /** 625 643 * amdgpu_fence_driver_force_completion - force signal latest fence of ring 626 644 * 627 645 * @ring: fence of the ring to signal ··· 663 643 664 644 static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) 665 645 { 666 - struct amdgpu_ring *ring; 646 + return (const char *)to_amdgpu_fence(f)->ring->name; 647 + } 667 648 668 - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { 669 - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); 649 + static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) 650 + { 651 + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); 670 652 671 - ring = to_amdgpu_ring(job->base.sched); 672 - } else { 673 - ring = to_amdgpu_fence(f)->ring; 674 - } 675 - return (const char *)ring->name; 653 + return (const char *)to_amdgpu_ring(job->base.sched)->name; 676 654 } 677 655 678 656 /** ··· 683 665 */ 684 666 static bool amdgpu_fence_enable_signaling(struct dma_fence *f) 685 667 { 686 - struct amdgpu_ring *ring; 668 + if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer)) 669 + amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring); 687 670 688 - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { 689 - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); 671 + return true; 672 + } 690 673 691 - ring = to_amdgpu_ring(job->base.sched); 692 - } else { 693 - ring = to_amdgpu_fence(f)->ring; 694 - } 674 + /** 675 + * amdgpu_job_fence_enable_signaling - enable signalling on job fence 676 + * @f: fence 677 + * 678 + * This is the simliar function with amdgpu_fence_enable_signaling above, it 679 + * only handles the job embedded fence. 680 + */ 681 + static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) 682 + { 683 + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); 695 684 696 - if (!timer_pending(&ring->fence_drv.fallback_timer)) 697 - amdgpu_fence_schedule_fallback(ring); 685 + if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) 686 + amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); 698 687 699 688 return true; 700 689 } ··· 717 692 { 718 693 struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); 719 694 720 - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { 721 - /* free job if fence has a parent job */ 722 - struct amdgpu_job *job; 723 - 724 - job = container_of(f, struct amdgpu_job, hw_fence); 725 - kfree(job); 726 - } else { 727 695 /* free fence_slab if it's separated fence*/ 728 - struct amdgpu_fence *fence; 696 + kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f)); 697 + } 729 698 730 - fence = to_amdgpu_fence(f); 731 - kmem_cache_free(amdgpu_fence_slab, fence); 732 - } 699 + /** 700 + * amdgpu_job_fence_free - free up the job with embedded fence 701 + * 702 + * @rcu: RCU callback head 703 + * 704 + * Free up the job with embedded fence after the RCU grace period. 705 + */ 706 + static void amdgpu_job_fence_free(struct rcu_head *rcu) 707 + { 708 + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); 709 + 710 + /* free job if fence has a parent job */ 711 + kfree(container_of(f, struct amdgpu_job, hw_fence)); 733 712 } 734 713 735 714 /** ··· 749 720 call_rcu(&f->rcu, amdgpu_fence_free); 750 721 } 751 722 723 + /** 724 + * amdgpu_job_fence_release - callback that job embedded fence can be freed 725 + * 726 + * @f: fence 727 + * 728 + * This is the simliar function with amdgpu_fence_release above, it 729 + * only handles the job embedded fence. 730 + */ 731 + static void amdgpu_job_fence_release(struct dma_fence *f) 732 + { 733 + call_rcu(&f->rcu, amdgpu_job_fence_free); 734 + } 735 + 752 736 static const struct dma_fence_ops amdgpu_fence_ops = { 753 737 .get_driver_name = amdgpu_fence_get_driver_name, 754 738 .get_timeline_name = amdgpu_fence_get_timeline_name, ··· 769 727 .release = amdgpu_fence_release, 770 728 }; 771 729 730 + static const struct dma_fence_ops amdgpu_job_fence_ops = { 731 + .get_driver_name = amdgpu_fence_get_driver_name, 732 + .get_timeline_name = amdgpu_job_fence_get_timeline_name, 733 + .enable_signaling = amdgpu_job_fence_enable_signaling, 734 + .release = amdgpu_job_fence_release, 735 + }; 772 736 773 737 /* 774 738 * Fence debugfs
+1 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
··· 53 53 #define AMDGPU_FENCE_FLAG_INT (1 << 1) 54 54 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) 55 55 56 - /* fence flag bit to indicate the face is embedded in job*/ 57 - #define AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT (DMA_FENCE_FLAG_USER_BITS + 1) 58 - 59 56 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) 60 57 61 58 #define AMDGPU_IB_POOL_SIZE (1024 * 1024) ··· 111 114 struct dma_fence **fences; 112 115 }; 113 116 117 + void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); 114 118 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); 115 119 116 120 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+7
drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
··· 246 246 { 247 247 int r; 248 248 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 249 + bool idle_work_unexecuted; 250 + 251 + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); 252 + if (idle_work_unexecuted) { 253 + if (adev->pm.dpm_enabled) 254 + amdgpu_dpm_enable_uvd(adev, false); 255 + } 249 256 250 257 r = vcn_v1_0_hw_fini(adev); 251 258 if (r)
+1
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
··· 158 158 union display_idle_optimization_u idle_info = { 0 }; 159 159 idle_info.idle_info.df_request_disabled = 1; 160 160 idle_info.idle_info.phy_ref_clk_off = 1; 161 + idle_info.idle_info.s0i2_rdy = 1; 161 162 dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data); 162 163 /* update power state */ 163 164 clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+1 -4
drivers/gpu/drm/amd/display/dc/core/dc_link.c
··· 3945 3945 config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst; 3946 3946 #if defined(CONFIG_DRM_AMD_DC_DCN) 3947 3947 config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA; 3948 - 3948 + 3949 3949 if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY || 3950 3950 pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { 3951 - link_enc = pipe_ctx->stream->link->link_enc; 3952 - config.dio_output_type = pipe_ctx->stream->link->ep_type; 3953 - config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; 3954 3951 if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY) 3955 3952 link_enc = pipe_ctx->stream->link->link_enc; 3956 3953 else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+1
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
··· 78 78 .get_clock = dcn10_get_clock, 79 79 .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, 80 80 .calc_vupdate_position = dcn10_calc_vupdate_position, 81 + .power_down = dce110_power_down, 81 82 .set_backlight_level = dce110_set_backlight_level, 82 83 .set_abm_immediate_disable = dce110_set_abm_immediate_disable, 83 84 .set_pipe = dce110_set_pipe,
+1 -1
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
··· 1069 1069 .timing_trace = false, 1070 1070 .clock_trace = true, 1071 1071 .disable_pplib_clock_request = true, 1072 - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, 1072 + .pipe_split_policy = MPC_SPLIT_DYNAMIC, 1073 1073 .force_single_disp_pipe_split = false, 1074 1074 .disable_dcc = DCC_ENABLE, 1075 1075 .vsr_support = true,
+1 -1
drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
··· 603 603 .timing_trace = false, 604 604 .clock_trace = true, 605 605 .disable_pplib_clock_request = true, 606 - .pipe_split_policy = MPC_SPLIT_AVOID, 606 + .pipe_split_policy = MPC_SPLIT_DYNAMIC, 607 607 .force_single_disp_pipe_split = false, 608 608 .disable_dcc = DCC_ENABLE, 609 609 .vsr_support = true,
+1 -1
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
··· 874 874 .clock_trace = true, 875 875 .disable_pplib_clock_request = true, 876 876 .min_disp_clk_khz = 100000, 877 - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, 877 + .pipe_split_policy = MPC_SPLIT_DYNAMIC, 878 878 .force_single_disp_pipe_split = false, 879 879 .disable_dcc = DCC_ENABLE, 880 880 .vsr_support = true,
+1 -1
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
··· 840 840 .timing_trace = false, 841 841 .clock_trace = true, 842 842 .disable_pplib_clock_request = true, 843 - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, 843 + .pipe_split_policy = MPC_SPLIT_DYNAMIC, 844 844 .force_single_disp_pipe_split = false, 845 845 .disable_dcc = DCC_ENABLE, 846 846 .vsr_support = true,
+1 -1
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
··· 686 686 .disable_clock_gate = true, 687 687 .disable_pplib_clock_request = true, 688 688 .disable_pplib_wm_range = true, 689 - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, 689 + .pipe_split_policy = MPC_SPLIT_DYNAMIC, 690 690 .force_single_disp_pipe_split = false, 691 691 .disable_dcc = DCC_ENABLE, 692 692 .vsr_support = true,
+1 -1
drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
··· 211 211 .timing_trace = false, 212 212 .clock_trace = true, 213 213 .disable_pplib_clock_request = true, 214 - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, 214 + .pipe_split_policy = MPC_SPLIT_DYNAMIC, 215 215 .force_single_disp_pipe_split = false, 216 216 .disable_dcc = DCC_ENABLE, 217 217 .vsr_support = true,
+1 -1
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
··· 193 193 .timing_trace = false, 194 194 .clock_trace = true, 195 195 .disable_pplib_clock_request = true, 196 - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, 196 + .pipe_split_policy = MPC_SPLIT_DYNAMIC, 197 197 .force_single_disp_pipe_split = false, 198 198 .disable_dcc = DCC_ENABLE, 199 199 .vsr_support = true,
+1
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
··· 101 101 .z10_restore = dcn31_z10_restore, 102 102 .z10_save_init = dcn31_z10_save_init, 103 103 .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, 104 + .optimize_pwr_state = dcn21_optimize_pwr_state, 104 105 .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, 105 106 .update_visual_confirm_color = dcn20_update_visual_confirm_color, 106 107 };
+24 -3
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
··· 355 355 clk_src_regs(3, D), 356 356 clk_src_regs(4, E) 357 357 }; 358 + /*pll_id being rempped in dmub, in driver it is logical instance*/ 359 + static const struct dce110_clk_src_regs clk_src_regs_b0[] = { 360 + clk_src_regs(0, A), 361 + clk_src_regs(1, B), 362 + clk_src_regs(2, F), 363 + clk_src_regs(3, G), 364 + clk_src_regs(4, E) 365 + }; 358 366 359 367 static const struct dce110_clk_src_shift cs_shift = { 360 368 CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) ··· 1002 994 .timing_trace = false, 1003 995 .clock_trace = true, 1004 996 .disable_pplib_clock_request = false, 1005 - .pipe_split_policy = MPC_SPLIT_AVOID, 997 + .pipe_split_policy = MPC_SPLIT_DYNAMIC, 1006 998 .force_single_disp_pipe_split = false, 1007 999 .disable_dcc = DCC_ENABLE, 1008 1000 .vsr_support = true, ··· 2284 2276 dcn30_clock_source_create(ctx, ctx->dc_bios, 2285 2277 CLOCK_SOURCE_COMBO_PHY_PLL1, 2286 2278 &clk_src_regs[1], false); 2287 - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = 2279 + /*move phypllx_pixclk_resync to dmub next*/ 2280 + if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { 2281 + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = 2282 + dcn30_clock_source_create(ctx, ctx->dc_bios, 2283 + CLOCK_SOURCE_COMBO_PHY_PLL2, 2284 + &clk_src_regs_b0[2], false); 2285 + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = 2286 + dcn30_clock_source_create(ctx, ctx->dc_bios, 2287 + CLOCK_SOURCE_COMBO_PHY_PLL3, 2288 + &clk_src_regs_b0[3], false); 2289 + } else { 2290 + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = 2288 2291 dcn30_clock_source_create(ctx, ctx->dc_bios, 2289 2292 CLOCK_SOURCE_COMBO_PHY_PLL2, 2290 2293 &clk_src_regs[2], false); 2291 - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = 2294 + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = 2292 2295 dcn30_clock_source_create(ctx, ctx->dc_bios, 2293 2296 CLOCK_SOURCE_COMBO_PHY_PLL3, 2294 2297 &clk_src_regs[3], false); 2298 + } 2299 + 2295 2300 pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = 2296 2301 dcn30_clock_source_create(ctx, ctx->dc_bios, 2297 2302 CLOCK_SOURCE_COMBO_PHY_PLL4,
+31
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
··· 49 49 const struct dc_init_data *init_data, 50 50 struct dc *dc); 51 51 52 + /*temp: B0 specific before switch to dcn313 headers*/ 53 + #ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL 54 + #define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e 55 + #define regPHYPLLF_PIXCLK_RESYNC_CNTL_BASE_IDX 1 56 + #define regPHYPLLG_PIXCLK_RESYNC_CNTL 0x005f 57 + #define regPHYPLLG_PIXCLK_RESYNC_CNTL_BASE_IDX 1 58 + 59 + //PHYPLLF_PIXCLK_RESYNC_CNTL 60 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 61 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 62 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 63 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE__SHIFT 0x8 64 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 65 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L 66 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L 67 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L 68 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE_MASK 0x00000100L 69 + #define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L 70 + 71 + //PHYPLLG_PIXCLK_RESYNC_CNTL 72 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 73 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 74 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 75 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE__SHIFT 0x8 76 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 77 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L 78 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L 79 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L 80 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE_MASK 0x00000100L 81 + #define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L 82 + #endif 52 83 #endif /* _DCN31_RESOURCE_H_ */
+49
drivers/gpu/drm/amd/include/discovery.h
··· 143 143 uint32_t gc_num_gl2a; 144 144 }; 145 145 146 + struct gc_info_v1_1 { 147 + struct gpu_info_header header; 148 + 149 + uint32_t gc_num_se; 150 + uint32_t gc_num_wgp0_per_sa; 151 + uint32_t gc_num_wgp1_per_sa; 152 + uint32_t gc_num_rb_per_se; 153 + uint32_t gc_num_gl2c; 154 + uint32_t gc_num_gprs; 155 + uint32_t gc_num_max_gs_thds; 156 + uint32_t gc_gs_table_depth; 157 + uint32_t gc_gsprim_buff_depth; 158 + uint32_t gc_parameter_cache_depth; 159 + uint32_t gc_double_offchip_lds_buffer; 160 + uint32_t gc_wave_size; 161 + uint32_t gc_max_waves_per_simd; 162 + uint32_t gc_max_scratch_slots_per_cu; 163 + uint32_t gc_lds_size; 164 + uint32_t gc_num_sc_per_se; 165 + uint32_t gc_num_sa_per_se; 166 + uint32_t gc_num_packer_per_sc; 167 + uint32_t gc_num_gl2a; 168 + uint32_t gc_num_tcp_per_sa; 169 + uint32_t gc_num_sdp_interface; 170 + uint32_t gc_num_tcps; 171 + }; 172 + 173 + struct gc_info_v2_0 { 174 + struct gpu_info_header header; 175 + 176 + uint32_t gc_num_se; 177 + uint32_t gc_num_cu_per_sh; 178 + uint32_t gc_num_sh_per_se; 179 + uint32_t gc_num_rb_per_se; 180 + uint32_t gc_num_tccs; 181 + uint32_t gc_num_gprs; 182 + uint32_t gc_num_max_gs_thds; 183 + uint32_t gc_gs_table_depth; 184 + uint32_t gc_gsprim_buff_depth; 185 + uint32_t gc_parameter_cache_depth; 186 + uint32_t gc_double_offchip_lds_buffer; 187 + uint32_t gc_wave_size; 188 + uint32_t gc_max_waves_per_simd; 189 + uint32_t gc_max_scratch_slots_per_cu; 190 + uint32_t gc_lds_size; 191 + uint32_t gc_num_sc_per_se; 192 + uint32_t gc_num_packer_per_sc; 193 + }; 194 + 146 195 typedef struct harvest_info_header { 147 196 uint32_t signature; /* Table Signature */ 148 197 uint32_t version; /* Table Version */
+2 -5
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
··· 1568 1568 1569 1569 smu->watermarks_bitmap &= ~(WATERMARKS_LOADED); 1570 1570 1571 - /* skip CGPG when in S0ix */ 1572 - if (smu->is_apu && !adev->in_s0ix) 1573 - smu_set_gfx_cgpg(&adev->smu, false); 1571 + smu_set_gfx_cgpg(&adev->smu, false); 1574 1572 1575 1573 return 0; 1576 1574 } ··· 1599 1601 return ret; 1600 1602 } 1601 1603 1602 - if (smu->is_apu) 1603 - smu_set_gfx_cgpg(&adev->smu, true); 1604 + smu_set_gfx_cgpg(&adev->smu, true); 1604 1605 1605 1606 smu->disable_uclk_switch = 0; 1606 1607
+2 -1
drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
··· 120 120 121 121 int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) 122 122 { 123 - if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 123 + /* Until now the SMU12 only implemented for Renoir series so here neen't do APU check. */ 124 + if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) || smu->adev->in_s0ix) 124 125 return 0; 125 126 126 127 return smu_cmn_send_smc_msg_with_param(smu,
+1 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
··· 1621 1621 { 1622 1622 return smu_cmn_send_smc_msg_with_param(smu, 1623 1623 SMU_MSG_GmiPwrDnControl, 1624 - en ? 1 : 0, 1624 + en ? 0 : 1, 1625 1625 NULL); 1626 1626 } 1627 1627
+1 -1
drivers/gpu/drm/i915/gem/i915_gem_context.c
··· 564 564 container_of_user(base, typeof(*ext), base); 565 565 const struct set_proto_ctx_engines *set = data; 566 566 struct drm_i915_private *i915 = set->i915; 567 + struct i915_engine_class_instance prev_engine; 567 568 u64 flags; 568 569 int err = 0, n, i, j; 569 570 u16 slot, width, num_siblings; ··· 630 629 /* Create contexts / engines */ 631 630 for (i = 0; i < width; ++i) { 632 631 intel_engine_mask_t current_mask = 0; 633 - struct i915_engine_class_instance prev_engine; 634 632 635 633 for (j = 0; j < num_siblings; ++j) { 636 634 struct i915_engine_class_instance ci;
+1 -1
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
··· 3017 3017 fence_array = dma_fence_array_create(eb->num_batches, 3018 3018 fences, 3019 3019 eb->context->parallel.fence_context, 3020 - eb->context->parallel.seqno, 3020 + eb->context->parallel.seqno++, 3021 3021 false); 3022 3022 if (!fence_array) { 3023 3023 kfree(fences);
+28 -26
drivers/gpu/drm/nouveau/nouveau_fence.c
··· 353 353 354 354 if (ret) 355 355 return ret; 356 + 357 + fobj = NULL; 358 + } else { 359 + fobj = dma_resv_shared_list(resv); 356 360 } 357 361 358 - fobj = dma_resv_shared_list(resv); 359 - fence = dma_resv_excl_fence(resv); 360 - 361 - if (fence) { 362 - struct nouveau_channel *prev = NULL; 363 - bool must_wait = true; 364 - 365 - f = nouveau_local_fence(fence, chan->drm); 366 - if (f) { 367 - rcu_read_lock(); 368 - prev = rcu_dereference(f->channel); 369 - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) 370 - must_wait = false; 371 - rcu_read_unlock(); 372 - } 373 - 374 - if (must_wait) 375 - ret = dma_fence_wait(fence, intr); 376 - 377 - return ret; 378 - } 379 - 380 - if (!exclusive || !fobj) 381 - return ret; 382 - 383 - for (i = 0; i < fobj->shared_count && !ret; ++i) { 362 + /* Waiting for the exclusive fence first causes performance regressions 363 + * under some circumstances. So manually wait for the shared ones first. 364 + */ 365 + for (i = 0; i < (fobj ? fobj->shared_count : 0) && !ret; ++i) { 384 366 struct nouveau_channel *prev = NULL; 385 367 bool must_wait = true; 386 368 ··· 380 398 381 399 if (must_wait) 382 400 ret = dma_fence_wait(fence, intr); 401 + } 402 + 403 + fence = dma_resv_excl_fence(resv); 404 + if (fence) { 405 + struct nouveau_channel *prev = NULL; 406 + bool must_wait = true; 407 + 408 + f = nouveau_local_fence(fence, chan->drm); 409 + if (f) { 410 + rcu_read_lock(); 411 + prev = rcu_dereference(f->channel); 412 + if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) 413 + must_wait = false; 414 + rcu_read_unlock(); 415 + } 416 + 417 + if (must_wait) 418 + ret = dma_fence_wait(fence, intr); 419 + 420 + return ret; 383 421 } 384 422 385 423 return ret;