Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-fixes-2025-09-19' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
"Weekly fixes for drm, it's a bit busier than I'd like on the xe side
this week, but otherwise amdgpu and some smaller fixes for i915/bridge
and a revert on docs.

docs:
- fix docs build regression

i915:
- Honor VESA eDP backlight luminance control capability

bridge:
- anx7625: Fix NULL pointer dereference with early IRQ
- cdns-mhdp8546: Fix missing mutex unlock on error path

xe:
- Release kobject for the failure path
- SRIOV PF: Drop rounddown_pow_of_two fair
- Remove type casting on hwmon
- Defer free of NVM auxiliary container to device release
- Fix a NULL vs IS_ERR
- Add cleanup action in xe_device_sysfs_init
- Fix error handling if PXP fails to start
- Set GuC RCS/CCS yield policy

amdgpu:
- GC 11.0.1/4 cleaner shader support
- DC irq fix
- OD fix

amdkfd:
- S0ix fix"

* tag 'drm-fixes-2025-09-19' of https://gitlab.freedesktop.org/drm/kernel:
drm/amdgpu: suspend KFD and KGD user queues for S0ix
drm/amdkfd: add proper handling for S0ix
drm/xe/guc: Set RCS/CCS yield policy
drm/xe: Fix error handling if PXP fails to start
drm/xe/sysfs: Add cleanup action in xe_device_sysfs_init
drm/amd: Only restore cached manual clock settings in restore if OD enabled
drm/xe: Fix a NULL vs IS_ERR() in xe_vm_add_compute_exec_queue()
drm: bridge: cdns-mhdp8546: Fix missing mutex unlock on error path
drm/i915/backlight: Honor VESA eDP backlight luminance control capability
drm/amd/display: Allow RX6xxx & RX7700 to invoke amdgpu_irq_get/put
drm/amdgpu/gfx11: Add Cleaner Shader Support for GFX11.0.1/11.0.4 GPUs
drm: bridge: anx7625: Fix NULL pointer dereference with early IRQ
drm/xe: defer free of NVM auxiliary container to device release callback
drm/xe/hwmon: Remove type casting
drm/xe/pf: Drop rounddown_pow_of_two fair LMEM limitation
drm/xe/tile: Release kobject for the failure path
Revert "drm: Add directive to format code in comment"

+342 -101
+12 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
··· 250 250 251 251 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc) 252 252 { 253 - if (adev->kfd.dev) 254 - kgd2kfd_suspend(adev->kfd.dev, suspend_proc); 253 + if (adev->kfd.dev) { 254 + if (adev->in_s0ix) 255 + kgd2kfd_stop_sched_all_nodes(adev->kfd.dev); 256 + else 257 + kgd2kfd_suspend(adev->kfd.dev, suspend_proc); 258 + } 255 259 } 256 260 257 261 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc) 258 262 { 259 263 int r = 0; 260 264 261 - if (adev->kfd.dev) 262 - r = kgd2kfd_resume(adev->kfd.dev, resume_proc); 265 + if (adev->kfd.dev) { 266 + if (adev->in_s0ix) 267 + r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev); 268 + else 269 + r = kgd2kfd_resume(adev->kfd.dev, resume_proc); 270 + } 263 271 264 272 return r; 265 273 }
+12
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 426 426 int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd); 427 427 void kgd2kfd_unlock_kfd(struct kfd_dev *kfd); 428 428 int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); 429 + int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd); 429 430 int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); 431 + int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd); 430 432 bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); 431 433 bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, 432 434 bool retry_fault); ··· 518 516 return 0; 519 517 } 520 518 519 + static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) 520 + { 521 + return 0; 522 + } 523 + 521 524 static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) 525 + { 526 + return 0; 527 + } 528 + 529 + static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) 522 530 { 523 531 return 0; 524 532 }
+10 -14
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 5136 5136 adev->in_suspend = true; 5137 5137 5138 5138 if (amdgpu_sriov_vf(adev)) { 5139 - if (!adev->in_s0ix && !adev->in_runpm) 5139 + if (!adev->in_runpm) 5140 5140 amdgpu_amdkfd_suspend_process(adev); 5141 5141 amdgpu_virt_fini_data_exchange(adev); 5142 5142 r = amdgpu_virt_request_full_gpu(adev, false); ··· 5156 5156 5157 5157 amdgpu_device_ip_suspend_phase1(adev); 5158 5158 5159 - if (!adev->in_s0ix) { 5160 - amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); 5161 - amdgpu_userq_suspend(adev); 5162 - } 5159 + amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); 5160 + amdgpu_userq_suspend(adev); 5163 5161 5164 5162 r = amdgpu_device_evict_resources(adev); 5165 5163 if (r) ··· 5252 5254 goto exit; 5253 5255 } 5254 5256 5255 - if (!adev->in_s0ix) { 5256 - r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); 5257 - if (r) 5258 - goto exit; 5257 + r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); 5258 + if (r) 5259 + goto exit; 5259 5260 5260 - r = amdgpu_userq_resume(adev); 5261 - if (r) 5262 - goto exit; 5263 - } 5261 + r = amdgpu_userq_resume(adev); 5262 + if (r) 5263 + goto exit; 5264 5264 5265 5265 r = amdgpu_device_ip_late_init(adev); 5266 5266 if (r) ··· 5271 5275 amdgpu_virt_init_data_exchange(adev); 5272 5276 amdgpu_virt_release_full_gpu(adev, true); 5273 5277 5274 - if (!adev->in_s0ix && !r && !adev->in_runpm) 5278 + if (!r && !adev->in_runpm) 5275 5279 r = amdgpu_amdkfd_resume_process(adev); 5276 5280 } 5277 5281
+15
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
··· 1654 1654 } 1655 1655 } 1656 1656 break; 1657 + case IP_VERSION(11, 0, 1): 1658 + case IP_VERSION(11, 0, 4): 1659 + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1660 + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1661 + if (adev->gfx.pfp_fw_version >= 102 && 1662 + adev->gfx.mec_fw_version >= 66 && 1663 + adev->mes.fw_version[0] >= 128) { 1664 + adev->gfx.enable_cleaner_shader = true; 1665 + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1666 + if (r) { 1667 + adev->gfx.enable_cleaner_shader = false; 1668 + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1669 + } 1670 + } 1671 + break; 1657 1672 case IP_VERSION(11, 5, 0): 1658 1673 case IP_VERSION(11, 5, 1): 1659 1674 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+36
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 1550 1550 return ret; 1551 1551 } 1552 1552 1553 + int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) 1554 + { 1555 + struct kfd_node *node; 1556 + int i, r; 1557 + 1558 + if (!kfd->init_complete) 1559 + return 0; 1560 + 1561 + for (i = 0; i < kfd->num_nodes; i++) { 1562 + node = kfd->nodes[i]; 1563 + r = node->dqm->ops.unhalt(node->dqm); 1564 + if (r) { 1565 + dev_err(kfd_device, "Error in starting scheduler\n"); 1566 + return r; 1567 + } 1568 + } 1569 + return 0; 1570 + } 1571 + 1553 1572 int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) 1554 1573 { 1555 1574 struct kfd_node *node; ··· 1584 1565 1585 1566 node = kfd->nodes[node_id]; 1586 1567 return node->dqm->ops.halt(node->dqm); 1568 + } 1569 + 1570 + int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) 1571 + { 1572 + struct kfd_node *node; 1573 + int i, r; 1574 + 1575 + if (!kfd->init_complete) 1576 + return 0; 1577 + 1578 + for (i = 0; i < kfd->num_nodes; i++) { 1579 + node = kfd->nodes[i]; 1580 + r = node->dqm->ops.halt(node->dqm); 1581 + if (r) 1582 + return r; 1583 + } 1584 + return 0; 1587 1585 } 1588 1586 1589 1587 bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+38 -1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
··· 8717 8717 static void manage_dm_interrupts(struct amdgpu_device *adev, 8718 8718 struct amdgpu_crtc *acrtc, 8719 8719 struct dm_crtc_state *acrtc_state) 8720 - { 8720 + { /* 8721 + * We cannot be sure that the frontend index maps to the same 8722 + * backend index - some even map to more than one. 8723 + * So we have to go through the CRTC to find the right IRQ. 8724 + */ 8725 + int irq_type = amdgpu_display_crtc_idx_to_irq_type( 8726 + adev, 8727 + acrtc->crtc_id); 8728 + struct drm_device *dev = adev_to_drm(adev); 8729 + 8721 8730 struct drm_vblank_crtc_config config = {0}; 8722 8731 struct dc_crtc_timing *timing; 8723 8732 int offdelay; ··· 8779 8770 8780 8771 drm_crtc_vblank_on_config(&acrtc->base, 8781 8772 &config); 8773 + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/ 8774 + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { 8775 + case IP_VERSION(3, 0, 0): 8776 + case IP_VERSION(3, 0, 2): 8777 + case IP_VERSION(3, 0, 3): 8778 + case IP_VERSION(3, 2, 0): 8779 + if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type)) 8780 + drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n"); 8781 + #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) 8782 + if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type)) 8783 + drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n"); 8784 + #endif 8785 + } 8786 + 8782 8787 } else { 8788 + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/ 8789 + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { 8790 + case IP_VERSION(3, 0, 0): 8791 + case IP_VERSION(3, 0, 2): 8792 + case IP_VERSION(3, 0, 3): 8793 + case IP_VERSION(3, 2, 0): 8794 + #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) 8795 + if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type)) 8796 + drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n"); 8797 + #endif 8798 + if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type)) 8799 + drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n"); 8800 + } 8801 + 8783 8802 drm_crtc_vblank_off(&acrtc->base); 8784 8803 } 8785 8804 }
+1 -1
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
··· 2236 2236 return ret; 2237 2237 } 2238 2238 2239 - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { 2239 + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL && smu->od_enabled) { 2240 2240 ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0); 2241 2241 if (ret) 2242 2242 return ret;
+4 -2
drivers/gpu/drm/bridge/analogix/anx7625.c
··· 2677 2677 ret = devm_request_threaded_irq(dev, platform->pdata.intp_irq, 2678 2678 NULL, anx7625_intr_hpd_isr, 2679 2679 IRQF_TRIGGER_FALLING | 2680 - IRQF_ONESHOT, 2680 + IRQF_ONESHOT | IRQF_NO_AUTOEN, 2681 2681 "anx7625-intp", platform); 2682 2682 if (ret) { 2683 2683 DRM_DEV_ERROR(dev, "fail to request irq\n"); ··· 2746 2746 } 2747 2747 2748 2748 /* Add work function */ 2749 - if (platform->pdata.intp_irq) 2749 + if (platform->pdata.intp_irq) { 2750 + enable_irq(platform->pdata.intp_irq); 2750 2751 queue_work(platform->workqueue, &platform->work); 2752 + } 2751 2753 2752 2754 if (platform->pdata.audio_en) 2753 2755 anx7625_register_audio(dev, platform);
+4 -2
drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
··· 1984 1984 mhdp_state = to_cdns_mhdp_bridge_state(new_state); 1985 1985 1986 1986 mhdp_state->current_mode = drm_mode_duplicate(bridge->dev, mode); 1987 - if (!mhdp_state->current_mode) 1988 - return; 1987 + if (!mhdp_state->current_mode) { 1988 + ret = -EINVAL; 1989 + goto out; 1990 + } 1989 1991 1990 1992 drm_mode_set_name(mhdp_state->current_mode); 1991 1993
-2
drivers/gpu/drm/drm_gpuvm.c
··· 2432 2432 * 2433 2433 * The expected usage is:: 2434 2434 * 2435 - * .. code-block:: c 2436 - * 2437 2435 * vm_bind { 2438 2436 * struct drm_exec exec; 2439 2437 *
+1 -1
drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
··· 546 546 luminance_range->max_luminance, 547 547 panel->vbt.backlight.pwm_freq_hz, 548 548 intel_dp->edp_dpcd, &current_level, &current_mode, 549 - false); 549 + panel->backlight.edp.vesa.luminance_control_support); 550 550 if (ret < 0) 551 551 return ret; 552 552
+1
drivers/gpu/drm/xe/abi/guc_actions_abi.h
··· 117 117 XE_GUC_ACTION_ENTER_S_STATE = 0x501, 118 118 XE_GUC_ACTION_EXIT_S_STATE = 0x502, 119 119 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, 120 + XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, 120 121 XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, 121 122 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, 122 123 XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
+25
drivers/gpu/drm/xe/abi/guc_klvs_abi.h
··· 17 17 * | 0 | 31:16 | **KEY** - KLV key identifier | 18 18 * | | | - `GuC Self Config KLVs`_ | 19 19 * | | | - `GuC Opt In Feature KLVs`_ | 20 + * | | | - `GuC Scheduling Policies KLVs`_ | 20 21 * | | | - `GuC VGT Policy KLVs`_ | 21 22 * | | | - `GuC VF Configuration KLVs`_ | 22 23 * | | | | ··· 152 151 153 152 #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 154 153 #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u 154 + 155 + /** 156 + * DOC: GuC Scheduling Policies KLVs 157 + * 158 + * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV. 159 + * 160 + * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001 161 + * Some platforms do not allow concurrent execution of RCS and CCS 162 + * workloads from different address spaces. By default, the GuC prioritizes 163 + * RCS submissions over CCS ones, which can lead to CCS workloads being 164 + * significantly (or completely) starved of execution time. This KLV allows 165 + * the driver to specify a quantum (in ms) and a ratio (percentage value 166 + * between 0 and 100), and the GuC will prioritize the CCS for that 167 + * percentage of each quantum. For example, specifying 100ms and 30% will 168 + * make the GuC prioritize the CCS for 30ms of every 100ms. 169 + * Note that this does not necessarly mean that RCS and CCS engines will 170 + * only be active for their percentage of the quantum, as the restriction 171 + * only kicks in if both classes are fully busy with non-compatible address 172 + * spaces; i.e., if one engine is idle or running the same address space, 173 + * a pending job on the other engine will still be submitted to the HW no 174 + * matter what the ratio is 175 + */ 176 + #define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001 177 + #define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u 155 178 156 179 /** 157 180 * DOC: GuC VGT Policy KLVs
+6 -2
drivers/gpu/drm/xe/xe_device_sysfs.c
··· 311 311 if (xe->info.platform == XE_BATTLEMAGE) { 312 312 ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); 313 313 if (ret) 314 - return ret; 314 + goto cleanup; 315 315 316 316 ret = late_bind_create_files(dev); 317 317 if (ret) 318 - return ret; 318 + goto cleanup; 319 319 } 320 320 321 321 return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); 322 + 323 + cleanup: 324 + xe_device_sysfs_fini(xe); 325 + return ret; 322 326 }
+15 -7
drivers/gpu/drm/xe/xe_exec_queue.c
··· 151 151 return err; 152 152 } 153 153 154 + static void __xe_exec_queue_fini(struct xe_exec_queue *q) 155 + { 156 + int i; 157 + 158 + q->ops->fini(q); 159 + 160 + for (i = 0; i < q->width; ++i) 161 + xe_lrc_put(q->lrc[i]); 162 + } 163 + 154 164 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 155 165 u32 logical_mask, u16 width, 156 166 struct xe_hw_engine *hwe, u32 flags, ··· 191 181 if (xe_exec_queue_uses_pxp(q)) { 192 182 err = xe_pxp_exec_queue_add(xe->pxp, q); 193 183 if (err) 194 - goto err_post_alloc; 184 + goto err_post_init; 195 185 } 196 186 197 187 return q; 198 188 189 + err_post_init: 190 + __xe_exec_queue_fini(q); 199 191 err_post_alloc: 200 192 __xe_exec_queue_free(q); 201 193 return ERR_PTR(err); ··· 295 283 xe_exec_queue_put(eq); 296 284 } 297 285 298 - q->ops->fini(q); 286 + q->ops->destroy(q); 299 287 } 300 288 301 289 void xe_exec_queue_fini(struct xe_exec_queue *q) 302 290 { 303 - int i; 304 - 305 291 /* 306 292 * Before releasing our ref to lrc and xef, accumulate our run ticks 307 293 * and wakeup any waiters. ··· 308 298 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 309 299 wake_up_var(&q->xef->exec_queue.pending_removal); 310 300 311 - for (i = 0; i < q->width; ++i) 312 - xe_lrc_put(q->lrc[i]); 313 - 301 + __xe_exec_queue_fini(q); 314 302 __xe_exec_queue_free(q); 315 303 } 316 304
+7 -1
drivers/gpu/drm/xe/xe_exec_queue_types.h
··· 166 166 int (*init)(struct xe_exec_queue *q); 167 167 /** @kill: Kill inflight submissions for backend */ 168 168 void (*kill)(struct xe_exec_queue *q); 169 - /** @fini: Fini exec queue for submission backend */ 169 + /** @fini: Undoes the init() for submission backend */ 170 170 void (*fini)(struct xe_exec_queue *q); 171 + /** 172 + * @destroy: Destroy exec queue for submission backend. The backend 173 + * function must call xe_exec_queue_fini() (which will in turn call the 174 + * fini() backend function) to ensure the queue is properly cleaned up. 175 + */ 176 + void (*destroy)(struct xe_exec_queue *q); 171 177 /** @set_priority: Set priority for exec queue */ 172 178 int (*set_priority)(struct xe_exec_queue *q, 173 179 enum xe_exec_queue_priority priority);
+16 -9
drivers/gpu/drm/xe/xe_execlist.c
··· 385 385 return err; 386 386 } 387 387 388 - static void execlist_exec_queue_fini_async(struct work_struct *w) 388 + static void execlist_exec_queue_fini(struct xe_exec_queue *q) 389 + { 390 + struct xe_execlist_exec_queue *exl = q->execlist; 391 + 392 + drm_sched_entity_fini(&exl->entity); 393 + drm_sched_fini(&exl->sched); 394 + 395 + kfree(exl); 396 + } 397 + 398 + static void execlist_exec_queue_destroy_async(struct work_struct *w) 389 399 { 390 400 struct xe_execlist_exec_queue *ee = 391 - container_of(w, struct xe_execlist_exec_queue, fini_async); 401 + container_of(w, struct xe_execlist_exec_queue, destroy_async); 392 402 struct xe_exec_queue *q = ee->q; 393 403 struct xe_execlist_exec_queue *exl = q->execlist; 394 404 struct xe_device *xe = gt_to_xe(q->gt); ··· 411 401 list_del(&exl->active_link); 412 402 spin_unlock_irqrestore(&exl->port->lock, flags); 413 403 414 - drm_sched_entity_fini(&exl->entity); 415 - drm_sched_fini(&exl->sched); 416 - kfree(exl); 417 - 418 404 xe_exec_queue_fini(q); 419 405 } 420 406 ··· 419 413 /* NIY */ 420 414 } 421 415 422 - static void execlist_exec_queue_fini(struct xe_exec_queue *q) 416 + static void execlist_exec_queue_destroy(struct xe_exec_queue *q) 423 417 { 424 - INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); 425 - queue_work(system_unbound_wq, &q->execlist->fini_async); 418 + INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); 419 + queue_work(system_unbound_wq, &q->execlist->destroy_async); 426 420 } 427 421 428 422 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, ··· 473 467 .init = execlist_exec_queue_init, 474 468 .kill = execlist_exec_queue_kill, 475 469 .fini = execlist_exec_queue_fini, 470 + .destroy = execlist_exec_queue_destroy, 476 471 .set_priority = execlist_exec_queue_set_priority, 477 472 .set_timeslice = execlist_exec_queue_set_timeslice, 478 473 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
+1 -1
drivers/gpu/drm/xe/xe_execlist_types.h
··· 42 42 43 43 bool has_run; 44 44 45 - struct work_struct fini_async; 45 + struct work_struct destroy_async; 46 46 47 47 enum xe_exec_queue_priority active_priority; 48 48 struct list_head active_link;
+2 -1
drivers/gpu/drm/xe/xe_gt.c
··· 41 41 #include "xe_gt_topology.h" 42 42 #include "xe_guc_exec_queue_types.h" 43 43 #include "xe_guc_pc.h" 44 + #include "xe_guc_submit.h" 44 45 #include "xe_hw_fence.h" 45 46 #include "xe_hw_engine_class_sysfs.h" 46 47 #include "xe_irq.h" ··· 98 97 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not 99 98 * reload 100 99 */ 101 - gt->uc.guc.submission_state.enabled = false; 100 + xe_guc_submit_disable(&gt->uc.guc); 102 101 } 103 102 104 103 static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
-1
drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
··· 1632 1632 u64 fair; 1633 1633 1634 1634 fair = div_u64(available, num_vfs); 1635 - fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ 1636 1635 fair = ALIGN_DOWN(fair, alignment); 1637 1636 #ifdef MAX_FAIR_LMEM 1638 1637 fair = min_t(u64, MAX_FAIR_LMEM, fair);
+2 -4
drivers/gpu/drm/xe/xe_guc.c
··· 880 880 return ret; 881 881 } 882 882 883 - guc->submission_state.enabled = true; 884 - 885 - return 0; 883 + return xe_guc_submit_enable(guc); 886 884 } 887 885 888 886 int xe_guc_reset(struct xe_guc *guc) ··· 1577 1579 { 1578 1580 xe_uc_fw_sanitize(&guc->fw); 1579 1581 xe_guc_ct_disable(&guc->ct); 1580 - guc->submission_state.enabled = false; 1582 + xe_guc_submit_disable(guc); 1581 1583 } 1582 1584 1583 1585 int xe_guc_reset_prepare(struct xe_guc *guc)
+2 -2
drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
··· 35 35 struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; 36 36 /** @lr_tdr: long running TDR worker */ 37 37 struct work_struct lr_tdr; 38 - /** @fini_async: do final fini async from this worker */ 39 - struct work_struct fini_async; 38 + /** @destroy_async: do final destroy async from this worker */ 39 + struct work_struct destroy_async; 40 40 /** @resume_time: time of last resume */ 41 41 u64 resume_time; 42 42 /** @state: GuC specific state for this xe_exec_queue */
+98 -22
drivers/gpu/drm/xe/xe_guc_submit.c
··· 32 32 #include "xe_guc_ct.h" 33 33 #include "xe_guc_exec_queue_types.h" 34 34 #include "xe_guc_id_mgr.h" 35 + #include "xe_guc_klv_helpers.h" 35 36 #include "xe_guc_submit_types.h" 36 37 #include "xe_hw_engine.h" 37 38 #include "xe_hw_fence.h" ··· 315 314 guc->submission_state.initialized = true; 316 315 317 316 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 317 + } 318 + 319 + /* 320 + * Given that we want to guarantee enough RCS throughput to avoid missing 321 + * frames, we set the yield policy to 20% of each 80ms interval. 322 + */ 323 + #define RC_YIELD_DURATION 80 /* in ms */ 324 + #define RC_YIELD_RATIO 20 /* in percent */ 325 + static u32 *emit_render_compute_yield_klv(u32 *emit) 326 + { 327 + *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 328 + *emit++ = RC_YIELD_DURATION; 329 + *emit++ = RC_YIELD_RATIO; 330 + 331 + return emit; 332 + } 333 + 334 + #define SCHEDULING_POLICY_MAX_DWORDS 16 335 + static int guc_init_global_schedule_policy(struct xe_guc *guc) 336 + { 337 + u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 338 + u32 *emit = data; 339 + u32 count = 0; 340 + int ret; 341 + 342 + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 343 + return 0; 344 + 345 + *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 346 + 347 + if (CCS_MASK(guc_to_gt(guc))) 348 + emit = emit_render_compute_yield_klv(emit); 349 + 350 + count = emit - data; 351 + if (count > 1) { 352 + xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 353 + 354 + ret = xe_guc_ct_send_block(&guc->ct, data, count); 355 + if (ret < 0) { 356 + xe_gt_err(guc_to_gt(guc), 357 + "failed to enable GuC sheduling policies: %pe\n", 358 + ERR_PTR(ret)); 359 + return ret; 360 + } 361 + } 362 + 363 + return 0; 364 + } 365 + 366 + int xe_guc_submit_enable(struct xe_guc *guc) 367 + { 368 + int ret; 369 + 370 + ret = guc_init_global_schedule_policy(guc); 371 + if (ret) 372 + return ret; 373 + 374 + guc->submission_state.enabled = true; 375 + 376 + return 0; 377 + } 378 + 379 + void xe_guc_submit_disable(struct xe_guc *guc) 380 + { 381 + guc->submission_state.enabled = false; 318 382 } 319 383 320 384 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) ··· 1343 1277 return DRM_GPU_SCHED_STAT_NO_HANG; 1344 1278 } 1345 1279 1346 - static void __guc_exec_queue_fini_async(struct work_struct *w) 1280 + static void guc_exec_queue_fini(struct xe_exec_queue *q) 1347 1281 { 1348 - struct xe_guc_exec_queue *ge = 1349 - container_of(w, struct xe_guc_exec_queue, fini_async); 1350 - struct xe_exec_queue *q = ge->q; 1282 + struct xe_guc_exec_queue *ge = q->guc; 1351 1283 struct xe_guc *guc = exec_queue_to_guc(q); 1352 1284 1353 - xe_pm_runtime_get(guc_to_xe(guc)); 1354 - trace_xe_exec_queue_destroy(q); 1355 - 1356 1285 release_guc_id(guc, q); 1357 - if (xe_exec_queue_is_lr(q)) 1358 - cancel_work_sync(&ge->lr_tdr); 1359 - /* Confirm no work left behind accessing device structures */ 1360 - cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1361 1286 xe_sched_entity_fini(&ge->entity); 1362 1287 xe_sched_fini(&ge->sched); 1363 1288 ··· 1357 1300 * (timeline name). 1358 1301 */ 1359 1302 kfree_rcu(ge, rcu); 1303 + } 1304 + 1305 + static void __guc_exec_queue_destroy_async(struct work_struct *w) 1306 + { 1307 + struct xe_guc_exec_queue *ge = 1308 + container_of(w, struct xe_guc_exec_queue, destroy_async); 1309 + struct xe_exec_queue *q = ge->q; 1310 + struct xe_guc *guc = exec_queue_to_guc(q); 1311 + 1312 + xe_pm_runtime_get(guc_to_xe(guc)); 1313 + trace_xe_exec_queue_destroy(q); 1314 + 1315 + if (xe_exec_queue_is_lr(q)) 1316 + cancel_work_sync(&ge->lr_tdr); 1317 + /* Confirm no work left behind accessing device structures */ 1318 + cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1319 + 1360 1320 xe_exec_queue_fini(q); 1321 + 1361 1322 xe_pm_runtime_put(guc_to_xe(guc)); 1362 1323 } 1363 1324 1364 - static void guc_exec_queue_fini_async(struct xe_exec_queue *q) 1325 + static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1365 1326 { 1366 1327 struct xe_guc *guc = exec_queue_to_guc(q); 1367 1328 struct xe_device *xe = guc_to_xe(guc); 1368 1329 1369 - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); 1330 + INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1370 1331 1371 1332 /* We must block on kernel engines so slabs are empty on driver unload */ 1372 1333 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1373 - __guc_exec_queue_fini_async(&q->guc->fini_async); 1334 + __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1374 1335 else 1375 - queue_work(xe->destroy_wq, &q->guc->fini_async); 1336 + queue_work(xe->destroy_wq, &q->guc->destroy_async); 1376 1337 } 1377 1338 1378 - static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) 1339 + static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1379 1340 { 1380 1341 /* 1381 1342 * Might be done from within the GPU scheduler, need to do async as we ··· 1402 1327 * this we and don't really care when everything is fini'd, just that it 1403 1328 * is. 1404 1329 */ 1405 - guc_exec_queue_fini_async(q); 1330 + guc_exec_queue_destroy_async(q); 1406 1331 } 1407 1332 1408 1333 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) ··· 1416 1341 if (exec_queue_registered(q)) 1417 1342 disable_scheduling_deregister(guc, q); 1418 1343 else 1419 - __guc_exec_queue_fini(guc, q); 1344 + __guc_exec_queue_destroy(guc, q); 1420 1345 } 1421 1346 1422 1347 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) ··· 1649 1574 #define STATIC_MSG_CLEANUP 0 1650 1575 #define STATIC_MSG_SUSPEND 1 1651 1576 #define STATIC_MSG_RESUME 2 1652 - static void guc_exec_queue_fini(struct xe_exec_queue *q) 1577 + static void guc_exec_queue_destroy(struct xe_exec_queue *q) 1653 1578 { 1654 1579 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 1655 1580 1656 1581 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 1657 1582 guc_exec_queue_add_msg(q, msg, CLEANUP); 1658 1583 else 1659 - __guc_exec_queue_fini(exec_queue_to_guc(q), q); 1584 + __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 1660 1585 } 1661 1586 1662 1587 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, ··· 1786 1711 .init = guc_exec_queue_init, 1787 1712 .kill = guc_exec_queue_kill, 1788 1713 .fini = guc_exec_queue_fini, 1714 + .destroy = guc_exec_queue_destroy, 1789 1715 .set_priority = guc_exec_queue_set_priority, 1790 1716 .set_timeslice = guc_exec_queue_set_timeslice, 1791 1717 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, ··· 1808 1732 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 1809 1733 xe_exec_queue_put(q); 1810 1734 else if (exec_queue_destroyed(q)) 1811 - __guc_exec_queue_fini(guc, q); 1735 + __guc_exec_queue_destroy(guc, q); 1812 1736 } 1813 1737 if (q->guc->suspend_pending) { 1814 1738 set_exec_queue_suspended(q); ··· 2065 1989 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 2066 1990 xe_exec_queue_put(q); 2067 1991 else 2068 - __guc_exec_queue_fini(guc, q); 1992 + __guc_exec_queue_destroy(guc, q); 2069 1993 } 2070 1994 2071 1995 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+2
drivers/gpu/drm/xe/xe_guc_submit.h
··· 13 13 struct xe_guc; 14 14 15 15 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids); 16 + int xe_guc_submit_enable(struct xe_guc *guc); 17 + void xe_guc_submit_disable(struct xe_guc *guc); 16 18 17 19 int xe_guc_submit_reset_prepare(struct xe_guc *guc); 18 20 void xe_guc_submit_reset_wait(struct xe_guc *guc);
+19 -16
drivers/gpu/drm/xe/xe_hwmon.c
··· 286 286 */ 287 287 static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) 288 288 { 289 - u64 reg_val = 0, min, max; 289 + u32 reg_val = 0; 290 290 struct xe_device *xe = hwmon->xe; 291 291 struct xe_reg rapl_limit, pkg_power_sku; 292 292 struct xe_mmio *mmio = xe_root_tile_mmio(xe); ··· 294 294 mutex_lock(&hwmon->hwmon_lock); 295 295 296 296 if (hwmon->xe->info.has_mbx_power_limits) { 297 - xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)&reg_val); 297 + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &reg_val); 298 298 } else { 299 299 rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 300 300 pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); ··· 304 304 /* Check if PL limits are disabled. */ 305 305 if (!(reg_val & PWR_LIM_EN)) { 306 306 *value = PL_DISABLE; 307 - drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n", 307 + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n", 308 308 PWR_ATTR_TO_STR(attr), channel, reg_val); 309 309 goto unlock; 310 310 } 311 311 312 312 reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); 313 - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); 313 + *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power; 314 314 315 315 /* For platforms with mailbox power limit support clamping would be done by pcode. */ 316 316 if (!hwmon->xe->info.has_mbx_power_limits) { 317 - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); 318 - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); 319 - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); 317 + u64 pkg_pwr, min, max; 318 + 319 + pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku); 320 + min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr); 321 + max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr); 320 322 min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); 321 323 max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); 322 324 if (min && max) ··· 495 493 { 496 494 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 497 495 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 498 - u32 x, y, x_w = 2; /* 2 bits */ 499 - u64 r, tau4, out; 496 + u32 reg_val, x, y, x_w = 2; /* 2 bits */ 497 + u64 tau4, out; 500 498 int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; 501 499 u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; 502 500 ··· 507 505 mutex_lock(&hwmon->hwmon_lock); 508 506 509 507 if (hwmon->xe->info.has_mbx_power_limits) { 510 - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); 508 + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &reg_val); 511 509 if (ret) { 512 510 drm_err(&hwmon->xe->drm, 513 - "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n", 514 - channel, power_attr, r, ret); 515 - r = 0; 511 + "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n", 512 + channel, power_attr, reg_val, ret); 513 + reg_val = 0; 516 514 } 517 515 } else { 518 - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)); 516 + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, 517 + channel)); 519 518 } 520 519 521 520 mutex_unlock(&hwmon->hwmon_lock); 522 521 523 522 xe_pm_runtime_put(hwmon->xe); 524 523 525 - x = REG_FIELD_GET(PWR_LIM_TIME_X, r); 526 - y = REG_FIELD_GET(PWR_LIM_TIME_Y, r); 524 + x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); 525 + y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); 527 526 528 527 /* 529 528 * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17)
+4 -1
drivers/gpu/drm/xe/xe_nvm.c
··· 35 35 36 36 static void xe_nvm_release_dev(struct device *dev) 37 37 { 38 + struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); 39 + struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev); 40 + 41 + kfree(nvm); 38 42 } 39 43 40 44 static bool xe_nvm_non_posted_erase(struct xe_device *xe) ··· 166 162 167 163 auxiliary_device_delete(&nvm->aux_dev); 168 164 auxiliary_device_uninit(&nvm->aux_dev); 169 - kfree(nvm); 170 165 xe->nvm = NULL; 171 166 }
+7 -5
drivers/gpu/drm/xe/xe_tile_sysfs.c
··· 44 44 kt->tile = tile; 45 45 46 46 err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); 47 - if (err) { 48 - kobject_put(&kt->base); 49 - return err; 50 - } 47 + if (err) 48 + goto err_object; 51 49 52 50 tile->sysfs = &kt->base; 53 51 54 52 err = xe_vram_freq_sysfs_init(tile); 55 53 if (err) 56 - return err; 54 + goto err_object; 57 55 58 56 return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile); 57 + 58 + err_object: 59 + kobject_put(&kt->base); 60 + return err; 59 61 }
+2 -2
drivers/gpu/drm/xe/xe_vm.c
··· 240 240 241 241 pfence = xe_preempt_fence_create(q, q->lr.context, 242 242 ++q->lr.seqno); 243 - if (!pfence) { 244 - err = -ENOMEM; 243 + if (IS_ERR(pfence)) { 244 + err = PTR_ERR(pfence); 245 245 goto out_fini; 246 246 } 247 247