Merge tag 'drm-next-2024-07-26' of https://gitlab.freedesktop.org/drm/kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull drm fixes from Dave Airlie:
"Fixes for rc1, mostly amdgpu, i915 and xe, with some other misc ones,
doesn't seem to be anything too serious.

amdgpu:
- Bump driver version for GFX12 DCC
- DC documention warning fixes
- VCN unified queue power fix
- SMU fix
- RAS fix
- Display corruption fix
- SDMA 5.2 workaround
- GFX12 fixes
- Uninitialized variable fix
- VCN/JPEG 4.0.3 fixes
- Misc display fixes
- RAS fixes
- VCN4/5 harvest fix
- GPU reset fix

i915:
- Reset intel_dp->link_trained before retraining the link
- Don't switch the LTTPR mode on an active link
- Do not consider preemption during execlists_dequeue for gen8
- Allow NULL memory region

xe:
- xe_exec ioctl minor fix on sync entry cleanup upon error
- SRIOV: limit VF LMEM provisioning
- Wedge mode fixes

v3d:
- fix indirect dispatch on newer v3d revs

panel:
- fix panel backlight bindings"

* tag 'drm-next-2024-07-26' of https://gitlab.freedesktop.org/drm/kernel: (39 commits)
drm/amdgpu: reset vm state machine after gpu reset(vram lost)
drm/amdgpu: add missed harvest check for VCN IP v4/v5
drm/amdgpu: Fix eeprom max record count
drm/amdgpu: fix ras UE error injection failure issue
drm/amd/display: Remove ASSERT if significance is zero in math_ceil2
drm/amd/display: Check for NULL pointer
drm/amdgpu/vcn: Use offsets local to VCN/JPEG in VF
drm/amdgpu: Add empty HDP flush function to VCN v4.0.3
drm/amdgpu: Add empty HDP flush function to JPEG v4.0.3
drm/amd/amdgpu: Fix uninitialized variable warnings
drm/amdgpu: Fix atomics on GFX12
drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell
drm/i915: Allow NULL memory region
drm/i915/gt: Do not consider preemption during execlists_dequeue for gen8
dt-bindings: display: panel: samsung,atna33xc20: Document ATNA45AF01
drm/xe: Don't suspend device upon wedge
drm/xe: Wedge the entire device
drm/xe/pf: Limit fair VF LMEM provisioning
drm/xe/exec: Fix minor bug related to xe_sync_entry_cleanup
drm/amd/display: fix corruption with high refresh rates on DCN 3.0
...

Linus Torvalds 2 years ago 0ba9b155 65ad409e

+695 -202

56 changed files

expand all

Documentation

devicetree

bindings

display

panel

samsung,atna33xc20.yaml

gpu

amdgpu

display

dcn-blocks.rst

display-manager.rst

drivers

gpu

drm

amd

amdgpu

Makefile

amdgpu_df.h

amdgpu_discovery.c

amdgpu_drv.c

amdgpu_psp.c

amdgpu_psp.h

amdgpu_psp_ta.c

amdgpu_ras_eeprom.c

amdgpu_vcn.c

amdgpu_vcn.h

amdgpu_vm.c

df_v4_15.c

df_v4_15.h

jpeg_v4_0_3.c

sdma_v5_2.c

smu_v13_0_10.c

soc24.c

vcn_v4_0.c

vcn_v4_0_3.c

vcn_v4_0_5.c

vcn_v5_0_0.c

display

amdgpu_dm

amdgpu_dm.h

core

dc_surface.c

dml2

dml21

src

dml2_standalone_libraries

lib_float_math.c

inc

dpp.h

mpc.h

opp.h

link

hwss

link_hwss_dio.c

link_hwss_dio.h

optc

dcn10

dcn10_optc.c

dcn20

dcn20_optc.c

include

asic_reg

df_4_15_offset.h

df_4_15_sh_mask.h

swsmu

amdgpu_smu.c

smu14

smu_v14_0_0_ppt.c

i915

display

intel_dp.c

intel_dp_link_training.c

intel_execlists_submission.c

intel_memory_region.c

v3d

v3d_drv.c

v3d_drv.h

v3d_sched.c

xe_device.c

xe_exec.c

xe_gt.c

xe_gt.h

xe_gt_sriov_pf_config.c

xe_guc.c

xe_guc.h

xe_guc_submit.c

xe_guc_submit.h

xe_uc.c

xe_uc.h

+7 -1

Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml

··· 14 14 15 15 properties: 16 16 compatible: 17 - const: samsung,atna33xc20 17 + oneOf: 18 + # Samsung 13.3" FHD (1920x1080 pixels) eDP AMOLED panel 19 + - const: samsung,atna33xc20 20 + # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel 21 + - items: 22 + - const: samsung,atna45af01 23 + - const: samsung,atna33xc20 18 24 19 25 enable-gpios: true 20 26 port: true

+6 -29

Documentation/gpu/amdgpu/display/dcn-blocks.rst

··· 8 8 DCHUBBUB 9 9 -------- 10 10 11 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 11 + .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h 12 12 :doc: overview 13 - 14 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 15 - :export: 16 - 17 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 18 - :internal: 19 13 20 14 HUBP 21 15 ---- ··· 17 23 .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 18 24 :doc: overview 19 25 20 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 21 - :export: 22 - 23 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 24 - :internal: 25 - 26 26 DPP 27 27 --- 28 28 29 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 29 + .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h 30 30 :doc: overview 31 31 32 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 33 - :export: 34 - 35 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h 32 + .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h 36 33 :internal: 37 34 38 35 MPC ··· 33 48 :doc: overview 34 49 35 50 .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h 36 - :export: 37 - 38 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h 39 51 :internal: 52 + :no-identifiers: mpcc_blnd_cfg mpcc_alpha_blend_mode 40 53 41 54 OPP 42 55 --- ··· 43 60 :doc: overview 44 61 45 62 .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h 46 - :export: 47 - 48 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h 49 63 :internal: 50 64 51 65 DIO 52 66 --- 53 67 54 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h 68 + .. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c 55 69 :doc: overview 56 70 57 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h 58 - :export: 59 - 60 - .. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h 71 + .. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c 61 72 :internal:

+2 -2

Documentation/gpu/amdgpu/display/display-manager.rst

··· 132 132 (MPC), as follows: 133 133 134 134 .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h 135 - :functions: mpcc_blnd_cfg 135 + :identifiers: mpcc_blnd_cfg 136 136 137 137 Therefore, the blending configuration for a single MPCC instance on the MPC 138 138 tree is defined by :c:type:`mpcc_blnd_cfg`, where ··· 144 144 :c:type:`MPCC_ALPHA_BLND_MODE`, as described below. 145 145 146 146 .. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h 147 - :functions: mpcc_alpha_blend_mode 147 + :identifiers: mpcc_alpha_blend_mode 148 148 149 149 DM then maps the elements of `enum mpcc_alpha_blend_mode` to those in the DRM 150 150 blend formula, as follows:

+2 -1

drivers/gpu/drm/amd/amdgpu/Makefile

··· 106 106 df_v1_7.o \ 107 107 df_v3_6.o \ 108 108 df_v4_3.o \ 109 - df_v4_6_2.o 109 + df_v4_6_2.o \ 110 + df_v4_15.o 110 111 111 112 # add GMC block 112 113 amdgpu-y += \

drivers/gpu/drm/amd/amdgpu/amdgpu_df.h

··· 33 33 struct amdgpu_df_funcs { 34 34 void (*sw_init)(struct amdgpu_device *adev); 35 35 void (*sw_fini)(struct amdgpu_device *adev); 36 + void (*hw_init)(struct amdgpu_device *adev); 36 37 void (*enable_broadcast_mode)(struct amdgpu_device *adev, 37 38 bool enable); 38 39 u32 (*get_fb_channel_number)(struct amdgpu_device *adev);

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

··· 37 37 #include "df_v3_6.h" 38 38 #include "df_v4_3.h" 39 39 #include "df_v4_6_2.h" 40 + #include "df_v4_15.h" 40 41 #include "nbio_v6_1.h" 41 42 #include "nbio_v7_0.h" 42 43 #include "nbio_v7_4.h" ··· 2803 2802 break; 2804 2803 case IP_VERSION(4, 6, 2): 2805 2804 adev->df.funcs = &df_v4_6_2_funcs; 2805 + break; 2806 + case IP_VERSION(4, 15, 0): 2807 + case IP_VERSION(4, 15, 1): 2808 + adev->df.funcs = &df_v4_15_funcs; 2806 2809 break; 2807 2810 default: 2808 2811 break;

+2 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 116 116 * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query 117 117 * - 3.56.0 - Update IB start address and size alignment for decode and encode 118 118 * - 3.57.0 - Compute tunneling on GFX10+ 119 + * - 3.58.0 - Add GFX12 DCC support 119 120 */ 120 121 #define KMS_DRIVER_MAJOR 3 121 - #define KMS_DRIVER_MINOR 57 122 + #define KMS_DRIVER_MINOR 58 122 123 #define KMS_DRIVER_PATCHLEVEL 0 123 124 124 125 /*

+81 -40

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

··· 1591 1591 } 1592 1592 } 1593 1593 1594 + static int psp_ras_send_cmd(struct psp_context *psp, 1595 + enum ras_command cmd_id, void *in, void *out) 1596 + { 1597 + struct ta_ras_shared_memory *ras_cmd; 1598 + uint32_t cmd = cmd_id; 1599 + int ret = 0; 1600 + 1601 + if (!in) 1602 + return -EINVAL; 1603 + 1604 + mutex_lock(&psp->ras_context.mutex); 1605 + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; 1606 + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); 1607 + 1608 + switch (cmd) { 1609 + case TA_RAS_COMMAND__ENABLE_FEATURES: 1610 + case TA_RAS_COMMAND__DISABLE_FEATURES: 1611 + memcpy(&ras_cmd->ras_in_message, 1612 + in, sizeof(ras_cmd->ras_in_message)); 1613 + break; 1614 + case TA_RAS_COMMAND__TRIGGER_ERROR: 1615 + memcpy(&ras_cmd->ras_in_message.trigger_error, 1616 + in, sizeof(ras_cmd->ras_in_message.trigger_error)); 1617 + break; 1618 + case TA_RAS_COMMAND__QUERY_ADDRESS: 1619 + memcpy(&ras_cmd->ras_in_message.address, 1620 + in, sizeof(ras_cmd->ras_in_message.address)); 1621 + break; 1622 + default: 1623 + dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd); 1624 + ret = -EINVAL; 1625 + goto err_out; 1626 + } 1627 + 1628 + ras_cmd->cmd_id = cmd; 1629 + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); 1630 + 1631 + switch (cmd) { 1632 + case TA_RAS_COMMAND__TRIGGER_ERROR: 1633 + if (!ret && out) 1634 + memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status)); 1635 + break; 1636 + case TA_RAS_COMMAND__QUERY_ADDRESS: 1637 + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) 1638 + ret = -EINVAL; 1639 + else if (out) 1640 + memcpy(out, 1641 + &ras_cmd->ras_out_message.address, 1642 + sizeof(ras_cmd->ras_out_message.address)); 1643 + break; 1644 + default: 1645 + break; 1646 + } 1647 + 1648 + err_out: 1649 + mutex_unlock(&psp->ras_context.mutex); 1650 + 1651 + return ret; 1652 + } 1653 + 1594 1654 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) 1595 1655 { 1596 1656 struct ta_ras_shared_memory *ras_cmd; ··· 1692 1632 int psp_ras_enable_features(struct psp_context *psp, 1693 1633 union ta_ras_cmd_input *info, bool enable) 1694 1634 { 1695 - struct ta_ras_shared_memory *ras_cmd; 1635 + enum ras_command cmd_id; 1696 1636 int ret; 1697 1637 1698 - if (!psp->ras_context.context.initialized) 1638 + if (!psp->ras_context.context.initialized || !info) 1699 1639 return -EINVAL; 1700 1640 1701 - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; 1702 - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); 1703 - 1704 - if (enable) 1705 - ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES; 1706 - else 1707 - ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES; 1708 - 1709 - ras_cmd->ras_in_message = *info; 1710 - 1711 - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); 1641 + cmd_id = enable ? 1642 + TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES; 1643 + ret = psp_ras_send_cmd(psp, cmd_id, info, NULL); 1712 1644 if (ret) 1713 1645 return -EINVAL; 1714 1646 ··· 1723 1671 ret = psp_ta_unload(psp, &psp->ras_context.context); 1724 1672 1725 1673 psp->ras_context.context.initialized = false; 1674 + 1675 + mutex_destroy(&psp->ras_context.mutex); 1726 1676 1727 1677 return ret; 1728 1678 } ··· 1810 1756 1811 1757 ret = psp_ta_load(psp, &psp->ras_context.context); 1812 1758 1813 - if (!ret && !ras_cmd->ras_status) 1759 + if (!ret && !ras_cmd->ras_status) { 1814 1760 psp->ras_context.context.initialized = true; 1815 - else { 1761 + mutex_init(&psp->ras_context.mutex); 1762 + } else { 1816 1763 if (ras_cmd->ras_status) 1817 1764 dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); 1818 1765 ··· 1827 1772 int psp_ras_trigger_error(struct psp_context *psp, 1828 1773 struct ta_ras_trigger_error_input *info, uint32_t instance_mask) 1829 1774 { 1830 - struct ta_ras_shared_memory *ras_cmd; 1831 1775 struct amdgpu_device *adev = psp->adev; 1832 1776 int ret; 1833 1777 uint32_t dev_mask; 1778 + uint32_t ras_status = 0; 1834 1779 1835 - if (!psp->ras_context.context.initialized) 1780 + if (!psp->ras_context.context.initialized || !info) 1836 1781 return -EINVAL; 1837 1782 1838 1783 switch (info->block_id) { ··· 1856 1801 dev_mask &= AMDGPU_RAS_INST_MASK; 1857 1802 info->sub_block_index |= dev_mask; 1858 1803 1859 - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; 1860 - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); 1861 - 1862 - ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; 1863 - ras_cmd->ras_in_message.trigger_error = *info; 1864 - 1865 - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); 1804 + ret = psp_ras_send_cmd(psp, 1805 + TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status); 1866 1806 if (ret) 1867 1807 return -EINVAL; 1868 1808 ··· 1867 1817 if (amdgpu_ras_intr_triggered()) 1868 1818 return 0; 1869 1819 1870 - if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) 1820 + if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) 1871 1821 return -EACCES; 1872 - else if (ras_cmd->ras_status) 1822 + else if (ras_status) 1873 1823 return -EINVAL; 1874 1824 1875 1825 return 0; ··· 1879 1829 struct ta_ras_query_address_input *addr_in, 1880 1830 struct ta_ras_query_address_output *addr_out) 1881 1831 { 1882 - struct ta_ras_shared_memory *ras_cmd; 1883 1832 int ret; 1884 1833 1885 - if (!psp->ras_context.context.initialized) 1834 + if (!psp->ras_context.context.initialized || 1835 + !addr_in || !addr_out) 1886 1836 return -EINVAL; 1887 1837 1888 - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; 1889 - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); 1838 + ret = psp_ras_send_cmd(psp, 1839 + TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out); 1890 1840 1891 - ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS; 1892 - ras_cmd->ras_in_message.address = *addr_in; 1893 - 1894 - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); 1895 - if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) 1896 - return -EINVAL; 1897 - 1898 - *addr_out = ras_cmd->ras_out_message.address; 1899 - 1900 - return 0; 1841 + return ret; 1901 1842 } 1902 1843 // ras end 1903 1844

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h

··· 200 200 struct psp_ras_context { 201 201 struct ta_context context; 202 202 struct amdgpu_ras *ras; 203 + struct mutex mutex; 203 204 }; 204 205 205 206 #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942

drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c

··· 348 348 349 349 context->session_id = ta_id; 350 350 351 + mutex_lock(&psp->ras_context.mutex); 351 352 ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); 352 353 if (ret) 353 354 goto err_free_shared_buf; ··· 367 366 ret = -EFAULT; 368 367 369 368 err_free_shared_buf: 369 + mutex_unlock(&psp->ras_context.mutex); 370 370 kfree(shared_buf); 371 371 372 372 return ret;

drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

··· 1011 1011 1012 1012 uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control) 1013 1013 { 1014 + /* get available eeprom table version first before eeprom table init */ 1015 + amdgpu_ras_set_eeprom_table_version(control); 1016 + 1014 1017 if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) 1015 1018 return RAS_MAX_RECORD_COUNT_V2_1; 1016 1019 else

+26 -27

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

··· 147 147 } 148 148 } 149 149 150 + /* from vcn4 and above, only unified queue is used */ 151 + adev->vcn.using_unified_queue = 152 + amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); 153 + 150 154 hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; 151 155 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 152 156 ··· 279 275 return 0; 280 276 } 281 277 282 - /* from vcn4 and above, only unified queue is used */ 283 - static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) 284 - { 285 - struct amdgpu_device *adev = ring->adev; 286 - bool ret = false; 287 - 288 - if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) 289 - ret = true; 290 - 291 - return ret; 292 - } 293 - 294 278 bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) 295 279 { 296 280 bool ret = false; ··· 389 397 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 390 398 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); 391 399 392 - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 400 + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 401 + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 402 + !adev->vcn.using_unified_queue) { 393 403 struct dpg_pause_state new_state; 394 404 395 405 if (fence[j] || ··· 437 443 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 438 444 AMD_PG_STATE_UNGATE); 439 445 440 - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 446 + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 447 + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 448 + !adev->vcn.using_unified_queue) { 441 449 struct dpg_pause_state new_state; 442 450 443 451 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { ··· 465 469 466 470 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 467 471 { 472 + struct amdgpu_device *adev = ring->adev; 473 + 474 + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 468 475 if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 469 - ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) 476 + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && 477 + !adev->vcn.using_unified_queue) 470 478 atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 471 479 472 480 atomic_dec(&ring->adev->vcn.total_submission_cnt); ··· 724 724 struct amdgpu_job *job; 725 725 struct amdgpu_ib *ib; 726 726 uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 727 - bool sq = amdgpu_vcn_using_unified_queue(ring); 728 727 uint32_t *ib_checksum; 729 728 uint32_t ib_pack_in_dw; 730 729 int i, r; 731 730 732 - if (sq) 731 + if (adev->vcn.using_unified_queue) 733 732 ib_size_dw += 8; 734 733 735 734 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ··· 741 742 ib->length_dw = 0; 742 743 743 744 /* single queue headers */ 744 - if (sq) { 745 + if (adev->vcn.using_unified_queue) { 745 746 ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) 746 747 + 4 + 2; /* engine info + decoding ib in dw */ 747 748 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); ··· 760 761 for (i = ib->length_dw; i < ib_size_dw; ++i) 761 762 ib->ptr[i] = 0x0; 762 763 763 - if (sq) 764 + if (adev->vcn.using_unified_queue) 764 765 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); 765 766 766 767 r = amdgpu_job_submit_direct(job, ring, &f); ··· 850 851 struct dma_fence **fence) 851 852 { 852 853 unsigned int ib_size_dw = 16; 854 + struct amdgpu_device *adev = ring->adev; 853 855 struct amdgpu_job *job; 854 856 struct amdgpu_ib *ib; 855 857 struct dma_fence *f = NULL; 856 858 uint32_t *ib_checksum = NULL; 857 859 uint64_t addr; 858 - bool sq = amdgpu_vcn_using_unified_queue(ring); 859 860 int i, r; 860 861 861 - if (sq) 862 + if (adev->vcn.using_unified_queue) 862 863 ib_size_dw += 8; 863 864 864 865 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ··· 872 873 873 874 ib->length_dw = 0; 874 875 875 - if (sq) 876 + if (adev->vcn.using_unified_queue) 876 877 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); 877 878 878 879 ib->ptr[ib->length_dw++] = 0x00000018; ··· 894 895 for (i = ib->length_dw; i < ib_size_dw; ++i) 895 896 ib->ptr[i] = 0x0; 896 897 897 - if (sq) 898 + if (adev->vcn.using_unified_queue) 898 899 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); 899 900 900 901 r = amdgpu_job_submit_direct(job, ring, &f); ··· 917 918 struct dma_fence **fence) 918 919 { 919 920 unsigned int ib_size_dw = 16; 921 + struct amdgpu_device *adev = ring->adev; 920 922 struct amdgpu_job *job; 921 923 struct amdgpu_ib *ib; 922 924 struct dma_fence *f = NULL; 923 925 uint32_t *ib_checksum = NULL; 924 926 uint64_t addr; 925 - bool sq = amdgpu_vcn_using_unified_queue(ring); 926 927 int i, r; 927 928 928 - if (sq) 929 + if (adev->vcn.using_unified_queue) 929 930 ib_size_dw += 8; 930 931 931 932 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ··· 939 940 940 941 ib->length_dw = 0; 941 942 942 - if (sq) 943 + if (adev->vcn.using_unified_queue) 943 944 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); 944 945 945 946 ib->ptr[ib->length_dw++] = 0x00000018; ··· 961 962 for (i = ib->length_dw; i < ib_size_dw; ++i) 962 963 ib->ptr[i] = 0x0; 963 964 964 - if (sq) 965 + if (adev->vcn.using_unified_queue) 965 966 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); 966 967 967 968 r = amdgpu_job_submit_direct(job, ring, &f);

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

··· 329 329 330 330 uint16_t inst_mask; 331 331 uint8_t num_inst_per_aid; 332 + bool using_unified_queue; 332 333 }; 333 334 334 335 struct amdgpu_fw_shared_rb_ptrs_struct {

+5 -4

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

··· 434 434 if (!vm) 435 435 return result; 436 436 437 - result += vm->generation; 437 + result += lower_32_bits(vm->generation); 438 438 /* Add one if the page tables will be re-generated on next CS */ 439 439 if (drm_sched_entity_error(&vm->delayed)) 440 440 ++result; ··· 463 463 int (*validate)(void *p, struct amdgpu_bo *bo), 464 464 void *param) 465 465 { 466 + uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); 466 467 struct amdgpu_vm_bo_base *bo_base; 467 468 struct amdgpu_bo *shadow; 468 469 struct amdgpu_bo *bo; 469 470 int r; 470 471 471 - if (drm_sched_entity_error(&vm->delayed)) { 472 - ++vm->generation; 472 + if (vm->generation != new_vm_generation) { 473 + vm->generation = new_vm_generation; 473 474 amdgpu_vm_bo_reset_state_machine(vm); 474 475 amdgpu_vm_fini_entities(vm); 475 476 r = amdgpu_vm_init_entities(adev, vm); ··· 2440 2439 vm->last_update = dma_fence_get_stub(); 2441 2440 vm->last_unlocked = dma_fence_get_stub(); 2442 2441 vm->last_tlb_flush = dma_fence_get_stub(); 2443 - vm->generation = 0; 2442 + vm->generation = amdgpu_vm_generation(adev, NULL); 2444 2443 2445 2444 mutex_init(&vm->eviction_lock); 2446 2445 vm->evicting = false;

+45

drivers/gpu/drm/amd/amdgpu/df_v4_15.c

··· 1 + /* 2 + * Copyright 2024 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + #include "amdgpu.h" 24 + #include "df_v4_15.h" 25 + 26 + #include "df/df_4_15_offset.h" 27 + #include "df/df_4_15_sh_mask.h" 28 + 29 + static void df_v4_15_hw_init(struct amdgpu_device *adev) 30 + { 31 + if (adev->have_atomics_support) { 32 + uint32_t tmp; 33 + uint32_t dis_lcl_proc = (1 << 1 | 34 + 1 << 2 | 35 + 1 << 13); 36 + 37 + tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1); 38 + tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT); 39 + WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp); 40 + } 41 + } 42 + 43 + const struct amdgpu_df_funcs df_v4_15_funcs = { 44 + .hw_init = df_v4_15_hw_init 45 + };

+30

drivers/gpu/drm/amd/amdgpu/df_v4_15.h

··· 1 + /* 2 + * Copyright 2024 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __DF_V4_15_H__ 25 + #define __DF_V4_15_H__ 26 + 27 + extern const struct amdgpu_df_funcs df_v4_15_funcs; 28 + 29 + #endif /* __DF_V4_15_H__ */ 30 +

+25 -2

drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c

··· 32 32 #include "vcn/vcn_4_0_3_sh_mask.h" 33 33 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" 34 34 35 + #define NORMALIZE_JPEG_REG_OFFSET(offset) \ 36 + (offset & 0x1FFFF) 37 + 35 38 enum jpeg_engin_status { 36 39 UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, 37 40 UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, ··· 624 621 ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); 625 622 } 626 623 624 + static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) 625 + { 626 + /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. 627 + * This is a workaround to avoid any HDP flush through JPEG ring. 628 + */ 629 + } 630 + 627 631 /** 628 632 * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer 629 633 * ··· 827 817 void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 828 818 uint32_t val, uint32_t mask) 829 819 { 830 - uint32_t reg_offset = (reg << 2); 820 + uint32_t reg_offset; 821 + 822 + /* For VF, only local offsets should be used */ 823 + if (amdgpu_sriov_vf(ring->adev)) 824 + reg = NORMALIZE_JPEG_REG_OFFSET(reg); 825 + 826 + reg_offset = (reg << 2); 831 827 832 828 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 833 829 0, 0, PACKETJ_TYPE0)); ··· 874 858 875 859 void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) 876 860 { 877 - uint32_t reg_offset = (reg << 2); 861 + uint32_t reg_offset; 862 + 863 + /* For VF, only local offsets should be used */ 864 + if (amdgpu_sriov_vf(ring->adev)) 865 + reg = NORMALIZE_JPEG_REG_OFFSET(reg); 866 + 867 + reg_offset = (reg << 2); 878 868 879 869 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 880 870 0, 0, PACKETJ_TYPE0)); ··· 1094 1072 .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, 1095 1073 .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, 1096 1074 .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, 1075 + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, 1097 1076 .test_ring = amdgpu_jpeg_dec_ring_test_ring, 1098 1077 .test_ib = amdgpu_jpeg_dec_ring_test_ib, 1099 1078 .insert_nop = jpeg_v4_0_3_dec_ring_nop,

+12

drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c

··· 176 176 DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 177 177 ring->doorbell_index, ring->wptr << 2); 178 178 WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 179 + /* SDMA seems to miss doorbells sometimes when powergating kicks in. 180 + * Updating the wptr directly will wake it. This is only safe because 181 + * we disallow gfxoff in begin_use() and then allow it again in end_use(). 182 + */ 183 + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), 184 + lower_32_bits(ring->wptr << 2)); 185 + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), 186 + upper_32_bits(ring->wptr << 2)); 179 187 } else { 180 188 DRM_DEBUG("Not using doorbell -- " 181 189 "mmSDMA%i_GFX_RB_WPTR == 0x%08x " ··· 1655 1647 * but it shouldn't hurt for other parts since 1656 1648 * this GFXOFF will be disallowed anyway when SDMA is 1657 1649 * active, this just makes it explicit. 1650 + * sdma_v5_2_ring_set_wptr() takes advantage of this 1651 + * to update the wptr because sometimes SDMA seems to miss 1652 + * doorbells when entering PG. If you remove this, update 1653 + * sdma_v5_2_ring_set_wptr() as well! 1658 1654 */ 1659 1655 amdgpu_gfx_off_ctrl(adev, false); 1660 1656 }

+1 -1

drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c

··· 91 91 adev->ip_blocks[i].status.hw = false; 92 92 } 93 93 94 - return r; 94 + return 0; 95 95 } 96 96 97 97 static int

drivers/gpu/drm/amd/amdgpu/soc24.c

··· 484 484 */ 485 485 if (adev->nbio.funcs->remap_hdp_registers) 486 486 adev->nbio.funcs->remap_hdp_registers(adev); 487 + 488 + if (adev->df.funcs->hw_init) 489 + adev->df.funcs->hw_init(adev); 490 + 487 491 /* enable the doorbell aperture */ 488 492 soc24_enable_doorbell_aperture(adev, true); 489 493

drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c

··· 1045 1045 amdgpu_dpm_enable_uvd(adev, true); 1046 1046 1047 1047 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1048 + if (adev->vcn.harvest_config & (1 << i)) 1049 + continue; 1050 + 1048 1051 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 1049 1052 1050 1053 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { ··· 1501 1498 int i, r = 0; 1502 1499 1503 1500 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1501 + if (adev->vcn.harvest_config & (1 << i)) 1502 + continue; 1503 + 1504 1504 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 1505 1505 fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; 1506 1506

+51 -3

drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c

··· 45 45 #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 46 46 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 47 47 48 + #define NORMALIZE_VCN_REG_OFFSET(offset) \ 49 + (offset & 0x1FFFF) 50 + 48 51 static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); 49 52 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); 50 53 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); ··· 1378 1375 regUVD_RB_WPTR); 1379 1376 } 1380 1377 1378 + static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 1379 + uint32_t val, uint32_t mask) 1380 + { 1381 + /* For VF, only local offsets should be used */ 1382 + if (amdgpu_sriov_vf(ring->adev)) 1383 + reg = NORMALIZE_VCN_REG_OFFSET(reg); 1384 + 1385 + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); 1386 + amdgpu_ring_write(ring, reg << 2); 1387 + amdgpu_ring_write(ring, mask); 1388 + amdgpu_ring_write(ring, val); 1389 + } 1390 + 1391 + static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) 1392 + { 1393 + /* For VF, only local offsets should be used */ 1394 + if (amdgpu_sriov_vf(ring->adev)) 1395 + reg = NORMALIZE_VCN_REG_OFFSET(reg); 1396 + 1397 + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); 1398 + amdgpu_ring_write(ring, reg << 2); 1399 + amdgpu_ring_write(ring, val); 1400 + } 1401 + 1402 + static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, 1403 + unsigned int vmid, uint64_t pd_addr) 1404 + { 1405 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 1406 + 1407 + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 1408 + 1409 + /* wait for reg writes */ 1410 + vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + 1411 + vmid * hub->ctx_addr_distance, 1412 + lower_32_bits(pd_addr), 0xffffffff); 1413 + } 1414 + 1415 + static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) 1416 + { 1417 + /* VCN engine access for HDP flush doesn't work when RRMT is enabled. 1418 + * This is a workaround to avoid any HDP flush through VCN ring. 1419 + */ 1420 + } 1421 + 1381 1422 /** 1382 1423 * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer 1383 1424 * ··· 1461 1414 .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ 1462 1415 .emit_ib = vcn_v2_0_enc_ring_emit_ib, 1463 1416 .emit_fence = vcn_v2_0_enc_ring_emit_fence, 1464 - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, 1417 + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, 1418 + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, 1465 1419 .test_ring = amdgpu_vcn_enc_ring_test_ring, 1466 1420 .test_ib = amdgpu_vcn_unified_ring_test_ib, 1467 1421 .insert_nop = amdgpu_ring_insert_nop, ··· 1470 1422 .pad_ib = amdgpu_ring_generic_pad_ib, 1471 1423 .begin_use = amdgpu_vcn_ring_begin_use, 1472 1424 .end_use = amdgpu_vcn_ring_end_use, 1473 - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, 1474 - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, 1425 + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, 1426 + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, 1475 1427 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1476 1428 }; 1477 1429

drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c

··· 958 958 amdgpu_dpm_enable_uvd(adev, true); 959 959 960 960 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 961 + if (adev->vcn.harvest_config & (1 << i)) 962 + continue; 963 + 961 964 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 962 965 963 966 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { ··· 1165 1162 int i, r = 0; 1166 1163 1167 1164 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1165 + if (adev->vcn.harvest_config & (1 << i)) 1166 + continue; 1167 + 1168 1168 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 1169 1169 fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; 1170 1170

drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c

··· 721 721 amdgpu_dpm_enable_uvd(adev, true); 722 722 723 723 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 724 + if (adev->vcn.harvest_config & (1 << i)) 725 + continue; 726 + 724 727 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 725 728 726 729 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { ··· 901 898 int i, r = 0; 902 899 903 900 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 901 + if (adev->vcn.harvest_config & (1 << i)) 902 + continue; 903 + 904 904 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 905 905 fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; 906 906

+16 -1

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

··· 137 137 bool enable; 138 138 }; 139 139 140 + /** 141 + * struct idle_workqueue - Work data for periodic action in idle 142 + * @work: Kernel work data for the work event 143 + * @dm: amdgpu display manager device 144 + * @enable: true if idle worker is enabled 145 + * @running: true if idle worker is running 146 + */ 140 147 struct idle_workqueue { 141 148 struct work_struct work; 142 149 struct amdgpu_display_manager *dm; ··· 509 502 * Deferred work for vblank control events. 510 503 */ 511 504 struct workqueue_struct *vblank_control_workqueue; 505 + 506 + /** 507 + * @idle_workqueue: 508 + * 509 + * Periodic work for idle events. 510 + */ 512 511 struct idle_workqueue *idle_workqueue; 513 512 514 513 struct drm_atomic_state *cached_state; ··· 600 587 */ 601 588 struct mutex dpia_aux_lock; 602 589 603 - /* 590 + /** 591 + * @bb_from_dmub: 592 + * 604 593 * Bounding box data read from dmub during early initialization for DCN4+ 605 594 */ 606 595 struct dml2_soc_bb *bb_from_dmub;

+2 -1

drivers/gpu/drm/amd/display/dc/core/dc_surface.c

··· 143 143 if (pipe_ctx->plane_state != plane_state) 144 144 continue; 145 145 146 - pipe_ctx->plane_state->status.is_flip_pending = false; 146 + if (pipe_ctx->plane_state) 147 + pipe_ctx->plane_state->status.is_flip_pending = false; 147 148 148 149 break; 149 150 }

-2

drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c

··· 64 64 65 65 double math_ceil2(const double arg, const double significance) 66 66 { 67 - ASSERT(significance != 0); 68 - 69 67 return ((int)(arg / significance + 0.99999)) * significance; 70 68 } 71 69

+17 -5

drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h

··· 147 147 int color_keyer_blue_high; 148 148 }; 149 149 150 - /* new for dcn2: set the 8bit alpha values based on the 2 bit alpha 151 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT0 default: 0b00000000 152 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT1 default: 0b01010101 153 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT2 default: 0b10101010 154 - *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT3 default: 0b11111111 150 + /** 151 + * struct cnv_alpha_2bit_lut - Set the 8bit alpha values based on the 2 bit alpha 155 152 */ 156 153 struct cnv_alpha_2bit_lut { 154 + /** 155 + * @lut0: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT0. Default: 0b00000000 156 + */ 157 157 int lut0; 158 + 159 + /** 160 + * @lut1: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT1. Default: 0b01010101 161 + */ 158 162 int lut1; 163 + 164 + /** 165 + * @lut2: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT2. Default: 0b10101010 166 + */ 159 167 int lut2; 168 + 169 + /** 170 + * @lut3: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT3. Default: 0b11111111 171 + */ 160 172 int lut3; 161 173 }; 162 174

+14

drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h

··· 1039 1039 */ 1040 1040 void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_ID id, const enum MCM_LUT_XABLE xable, 1041 1041 bool lut_bank_a, int mpcc_id); 1042 + /** 1043 + * @program_3dlut_size: 1044 + * 1045 + * Program 3D LUT size. 1046 + * 1047 + * Parameters: 1048 + * - [in/out] mpc - MPC context. 1049 + * - [in] is_17x17x17 - is 3dlut 17x17x17 1050 + * - [in] mpcc_id 1051 + * 1052 + * Return: 1053 + * 1054 + * void 1055 + */ 1042 1056 void (*program_3dlut_size)(struct mpc *mpc, bool is_17x17x17, int mpcc_id); 1043 1057 }; 1044 1058

+15

drivers/gpu/drm/amd/display/dc/inc/hw/opp.h

··· 205 205 struct fixed31_32 user_brightness; 206 206 }; 207 207 208 + /** 209 + * struct pwl_float_data - Fixed point RGB color 210 + */ 208 211 struct pwl_float_data { 212 + /** 213 + * @r: Component Red. 214 + */ 209 215 struct fixed31_32 r; 216 + 217 + /** 218 + * @g: Component Green. 219 + */ 220 + 210 221 struct fixed31_32 g; 222 + 223 + /** 224 + * @b: Component Blue. 225 + */ 211 226 struct fixed31_32 b; 212 227 }; 213 228

+29

drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c

··· 26 26 #include "core_types.h" 27 27 #include "link_enc_cfg.h" 28 28 29 + /** 30 + * DOC: overview 31 + * 32 + * Display Input Output (DIO), is the display input and output unit in DCN. It 33 + * includes output encoders to support different display output, like 34 + * DisplayPort, HDMI, DVI interface, and others. It also includes the control 35 + * and status channels for these interfaces. 36 + */ 37 + 38 + 29 39 void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx, 30 40 struct fixed31_32 throttled_vcp_size) 31 41 { ··· 264 254 }, 265 255 }; 266 256 257 + /** 258 + * can_use_dio_link_hwss - Check if the link_hwss is accessible 259 + * 260 + * @link: Reference a link struct containing one or more sinks and the 261 + * connective status. 262 + * @link_res: Mappable hardware resource used to enable a link. 263 + * 264 + * Returns: 265 + * Return true if the link encoder is accessible from link. 266 + */ 267 267 bool can_use_dio_link_hwss(const struct dc_link *link, 268 268 const struct link_resource *link_res) 269 269 { 270 270 return link->link_enc != NULL; 271 271 } 272 272 273 + /** 274 + * get_dio_link_hwss - Return link_hwss reference 275 + * 276 + * This function behaves like a get function to return the link_hwss populated 277 + * in the link_hwss_dio.c file. 278 + * 279 + * Returns: 280 + * Return the reference to the filled struct of link_hwss. 281 + */ 273 282 const struct link_hwss *get_dio_link_hwss(void) 274 283 { 275 284 return &dio_link_hwss;

-9

drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h

··· 23 23 * 24 24 */ 25 25 26 - /** 27 - * DOC: overview 28 - * 29 - * Display Input Output (DIO), is the display input and output unit in DCN. It 30 - * includes output encoders to support different display output, like 31 - * DisplayPort, HDMI, DVI interface, and others. It also includes the control 32 - * and status channels for these interfaces. 33 - */ 34 - 35 26 #ifndef __LINK_HWSS_DIO_H__ 36 27 #define __LINK_HWSS_DIO_H__ 37 28

+3 -12

drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c

··· 945 945 OTG_FORCE_LOCK_ON_EVENT, 0, 946 946 OTG_SET_V_TOTAL_MIN_MASK_EN, 0, 947 947 OTG_SET_V_TOTAL_MIN_MASK, 0); 948 - 949 - // Setup manual flow control for EOF via TRIG_A 950 - optc->funcs->setup_manual_trigger(optc); 951 - 952 - } else { 953 - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, 954 - OTG_SET_V_TOTAL_MIN_MASK, 0, 955 - OTG_V_TOTAL_MIN_SEL, 0, 956 - OTG_V_TOTAL_MAX_SEL, 0, 957 - OTG_FORCE_LOCK_ON_EVENT, 0); 958 - 959 - optc->funcs->set_vtotal_min_max(optc, 0, 0); 960 948 } 949 + 950 + // Setup manual flow control for EOF via TRIG_A 951 + optc->funcs->setup_manual_trigger(optc); 961 952 } 962 953 963 954 void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max)

+10

drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c

··· 453 453 { 454 454 struct optc *optc1 = DCN10TG_FROM_TG(optc); 455 455 456 + /* Set the min/max selectors unconditionally so that 457 + * DMCUB fw may change OTG timings when necessary 458 + * TODO: Remove the w/a after fixing the issue in DMCUB firmware 459 + */ 460 + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, 461 + OTG_V_TOTAL_MIN_SEL, 1, 462 + OTG_V_TOTAL_MAX_SEL, 1, 463 + OTG_FORCE_LOCK_ON_EVENT, 0, 464 + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ 465 + 456 466 REG_SET_8(OTG_TRIGA_CNTL, 0, 457 467 OTG_TRIGA_SOURCE_SELECT, 21, 458 468 OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst,

+28

drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h

··· 1 + /* 2 + * Copyright (C) 2024 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included 12 + * in all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 18 + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 + */ 21 + 22 + #ifndef _df_4_15_OFFSET_HEADER 23 + #define _df_4_15_OFFSET_HEADER 24 + 25 + #define regNCSConfigurationRegister1 0x0901 26 + #define regNCSConfigurationRegister1_BASE_IDX 4 27 + 28 + #endif

+28

drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h

··· 1 + /* 2 + * Copyright (C) 2024 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included 12 + * in all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 18 + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 + */ 21 + 22 + #ifndef _df_4_15_SH_MASK_HEADER 23 + #define _df_4_15_SH_MASK_HEADER 24 + 25 + #define NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT 0x3 26 + #define NCSConfigurationRegister1__DisIntAtomicsLclProcessing_MASK 0x0003FFF8L 27 + 28 + #endif

+4 -12

drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

··· 1924 1924 } 1925 1925 1926 1926 /* 1927 - * For SMU 13.0.4/11 and 14.0.0, PMFW will handle the features disablement properly 1927 + * For GFX11 and subsequent APUs, PMFW will handle the features disablement properly 1928 1928 * for gpu reset and S0i3 cases. Driver involvement is unnecessary. 1929 1929 */ 1930 - if (amdgpu_in_reset(adev) || adev->in_s0ix) { 1931 - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { 1932 - case IP_VERSION(13, 0, 4): 1933 - case IP_VERSION(13, 0, 11): 1934 - case IP_VERSION(14, 0, 0): 1935 - case IP_VERSION(14, 0, 1): 1936 - return 0; 1937 - default: 1938 - break; 1939 - } 1940 - } 1930 + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) >= 11 && 1931 + smu->is_apu && (amdgpu_in_reset(adev) || adev->in_s0ix)) 1932 + return 0; 1941 1933 1942 1934 /* 1943 1935 * For gpu reset, runpm and hibernation through BACO,

+15 -3

drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c

··· 69 69 #define SMU_14_0_0_UMD_PSTATE_SOCCLK 678 70 70 #define SMU_14_0_0_UMD_PSTATE_FCLK 1800 71 71 72 + #define SMU_14_0_4_UMD_PSTATE_GFXCLK 938 73 + #define SMU_14_0_4_UMD_PSTATE_SOCCLK 938 74 + 72 75 #define FEATURE_MASK(feature) (1ULL << feature) 73 76 #define SMC_DPM_FEATURE ( \ 74 77 FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \ ··· 1299 1296 switch (clk_type) { 1300 1297 case SMU_GFXCLK: 1301 1298 case SMU_SCLK: 1302 - clk_limit = SMU_14_0_0_UMD_PSTATE_GFXCLK; 1299 + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) 1300 + clk_limit = SMU_14_0_4_UMD_PSTATE_GFXCLK; 1301 + else 1302 + clk_limit = SMU_14_0_0_UMD_PSTATE_GFXCLK; 1303 1303 if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) 1304 1304 smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &clk_limit); 1305 1305 else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) 1306 1306 smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &clk_limit, NULL); 1307 1307 break; 1308 1308 case SMU_SOCCLK: 1309 - clk_limit = SMU_14_0_0_UMD_PSTATE_SOCCLK; 1309 + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) 1310 + clk_limit = SMU_14_0_4_UMD_PSTATE_SOCCLK; 1311 + else 1312 + clk_limit = SMU_14_0_0_UMD_PSTATE_SOCCLK; 1310 1313 if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) 1311 1314 smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &clk_limit); 1312 1315 break; 1313 1316 case SMU_FCLK: 1314 - clk_limit = SMU_14_0_0_UMD_PSTATE_FCLK; 1317 + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 4)) 1318 + smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &clk_limit); 1319 + else 1320 + clk_limit = SMU_14_0_0_UMD_PSTATE_FCLK; 1315 1321 if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) 1316 1322 smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &clk_limit); 1317 1323 else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK)

drivers/gpu/drm/i915/display/intel_dp.c

··· 5314 5314 const struct intel_crtc_state *crtc_state = 5315 5315 to_intel_crtc_state(crtc->base.state); 5316 5316 5317 + intel_dp->link_trained = false; 5318 + 5317 5319 intel_dp_check_frl_training(intel_dp); 5318 5320 intel_dp_pcon_dsc_configure(intel_dp, crtc_state); 5319 5321 intel_dp_start_link_train(NULL, intel_dp, crtc_state);

+48 -7

drivers/gpu/drm/i915/display/intel_dp_link_training.c

··· 117 117 return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; 118 118 } 119 119 120 - static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) 120 + static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp) 121 + { 122 + return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE - 123 + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] == 124 + DP_PHY_REPEATER_MODE_TRANSPARENT; 125 + } 126 + 127 + /* 128 + * Read the LTTPR common capabilities and switch the LTTPR PHYs to 129 + * non-transparent mode if this is supported. Preserve the 130 + * transparent/non-transparent mode on an active link. 131 + * 132 + * Return the number of detected LTTPRs in non-transparent mode or 0 if the 133 + * LTTPRs are in transparent mode or the detection failed. 134 + */ 135 + static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) 121 136 { 122 137 int lttpr_count; 123 - int i; 124 138 125 139 if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd)) 126 140 return 0; ··· 147 133 */ 148 134 if (lttpr_count == 0) 149 135 return 0; 136 + 137 + /* 138 + * Don't change the mode on an active link, to prevent a loss of link 139 + * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR 140 + * resetting its internal state when the mode is changed from 141 + * non-transparent to transparent. 142 + */ 143 + if (intel_dp->link_trained) { 144 + if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp)) 145 + goto out_reset_lttpr_count; 146 + 147 + return lttpr_count; 148 + } 150 149 151 150 /* 152 151 * See DP Standard v2.0 3.6.6.1. about the explicit disabling of ··· 181 154 "Switching to LTTPR non-transparent LT mode failed, fall-back to transparent mode\n"); 182 155 183 156 intel_dp_set_lttpr_transparent_mode(intel_dp, true); 184 - intel_dp_reset_lttpr_count(intel_dp); 185 157 186 - return 0; 158 + goto out_reset_lttpr_count; 187 159 } 160 + 161 + return lttpr_count; 162 + 163 + out_reset_lttpr_count: 164 + intel_dp_reset_lttpr_count(intel_dp); 165 + 166 + return 0; 167 + } 168 + 169 + static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) 170 + { 171 + int lttpr_count; 172 + int i; 173 + 174 + lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd); 188 175 189 176 for (i = 0; i < lttpr_count; i++) 190 177 intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i)); ··· 1523 1482 struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); 1524 1483 struct intel_encoder *encoder = &dig_port->base; 1525 1484 bool passed; 1526 - 1527 1485 /* 1528 - * TODO: Reiniting LTTPRs here won't be needed once proper connector 1529 - * HW state readout is added. 1486 + * Reinit the LTTPRs here to ensure that they are switched to 1487 + * non-transparent mode. During an earlier LTTPR detection this 1488 + * could've been prevented by an active link. 1530 1489 */ 1531 1490 int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); 1532 1491

+1 -5

drivers/gpu/drm/i915/gt/intel_execlists_submission.c

··· 3315 3315 3316 3316 static bool can_preempt(struct intel_engine_cs *engine) 3317 3317 { 3318 - if (GRAPHICS_VER(engine->i915) > 8) 3319 - return true; 3320 - 3321 - /* GPGPU on bdw requires extra w/a; not implemented */ 3322 - return engine->class != RENDER_CLASS; 3318 + return GRAPHICS_VER(engine->i915) > 8; 3323 3319 } 3324 3320 3325 3321 static void kick_execlists(const struct i915_request *rq, int prio)

+4 -2

drivers/gpu/drm/i915/intel_memory_region.c

··· 368 368 goto out_cleanup; 369 369 } 370 370 371 - mem->id = i; 372 - i915->mm.regions[i] = mem; 371 + if (mem) { /* Skip on non-fatal errors */ 372 + mem->id = i; 373 + i915->mm.regions[i] = mem; 374 + } 373 375 } 374 376 375 377 for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) {

+4 -1

drivers/gpu/drm/v3d/v3d_drv.c

··· 265 265 struct v3d_dev *v3d; 266 266 int ret; 267 267 u32 mmu_debug; 268 - u32 ident1; 268 + u32 ident1, ident3; 269 269 u64 mask; 270 270 271 271 v3d = devm_drm_dev_alloc(dev, &v3d_drm_driver, struct v3d_dev, drm); ··· 297 297 V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV)); 298 298 v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); 299 299 WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ 300 + 301 + ident3 = V3D_READ(V3D_HUB_IDENT3); 302 + v3d->rev = V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV); 300 303 301 304 if (v3d->ver >= 71) 302 305 v3d->max_counters = V3D_V71_NUM_PERFCOUNTERS;

+5 -3

drivers/gpu/drm/v3d/v3d_drv.h

··· 98 98 struct v3d_dev { 99 99 struct drm_device drm; 100 100 101 - /* Short representation (e.g. 33, 41) of the V3D tech version 102 - * and revision. 103 - */ 101 + /* Short representation (e.g. 33, 41) of the V3D tech version */ 104 102 int ver; 103 + 104 + /* Short representation (e.g. 5, 6) of the V3D tech revision */ 105 + int rev; 106 + 105 107 bool single_irq_line; 106 108 107 109 /* Different revisions of V3D have different total number of performance

+13 -3

drivers/gpu/drm/v3d/v3d_sched.c

··· 331 331 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 332 332 struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); 333 333 struct drm_v3d_submit_csd *args = &indirect_csd->job->args; 334 - u32 *wg_counts; 334 + struct v3d_dev *v3d = job->base.v3d; 335 + u32 num_batches, *wg_counts; 335 336 336 337 v3d_get_bo_vaddr(bo); 337 338 v3d_get_bo_vaddr(indirect); ··· 345 344 args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 346 345 args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 347 346 args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 348 - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 349 - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; 347 + 348 + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 349 + (wg_counts[0] * wg_counts[1] * wg_counts[2]); 350 + 351 + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ 352 + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) 353 + args->cfg[4] = num_batches - 1; 354 + else 355 + args->cfg[4] = num_batches; 356 + 357 + WARN_ON(args->cfg[4] == ~0); 350 358 351 359 for (int i = 0; i < 3; i++) { 352 360 /* 0xffffffff indicates that the uniform rewrite is not needed */

+20

drivers/gpu/drm/xe/xe_device.c

··· 854 854 return address & GENMASK_ULL(xe->info.va_bits - 1, 0); 855 855 } 856 856 857 + static void xe_device_wedged_fini(struct drm_device *drm, void *arg) 858 + { 859 + struct xe_device *xe = arg; 860 + 861 + xe_pm_runtime_put(xe); 862 + } 863 + 857 864 /** 858 865 * xe_device_declare_wedged - Declare device wedged 859 866 * @xe: xe device instance ··· 877 870 */ 878 871 void xe_device_declare_wedged(struct xe_device *xe) 879 872 { 873 + struct xe_gt *gt; 874 + u8 id; 875 + 880 876 if (xe->wedged.mode == 0) { 881 877 drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n"); 882 878 return; 883 879 } 880 + 881 + if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { 882 + drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); 883 + return; 884 + } 885 + 886 + xe_pm_runtime_get_noresume(xe); 884 887 885 888 if (!atomic_xchg(&xe->wedged.flag, 1)) { 886 889 xe->needs_flr_on_fini = true; ··· 900 883 "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", 901 884 dev_name(xe->drm.dev)); 902 885 } 886 + 887 + for_each_gt(gt, xe, id) 888 + xe_gt_declare_wedged(gt); 903 889 }

+7 -7

drivers/gpu/drm/xe/xe_exec.c

··· 118 118 u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; 119 119 struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; 120 120 struct drm_exec *exec = &vm_exec.exec; 121 - u32 i, num_syncs = 0, num_ufence = 0; 121 + u32 i, num_syncs, num_ufence = 0; 122 122 struct xe_sched_job *job; 123 123 struct xe_vm *vm; 124 124 bool write_locked, skip_retry = false; ··· 156 156 157 157 vm = q->vm; 158 158 159 - for (i = 0; i < args->num_syncs; i++) { 160 - err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], 161 - &syncs_user[i], SYNC_PARSE_FLAG_EXEC | 159 + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 160 + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 161 + &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | 162 162 (xe_vm_in_lr_mode(vm) ? 163 163 SYNC_PARSE_FLAG_LR_MODE : 0)); 164 164 if (err) 165 165 goto err_syncs; 166 166 167 - if (xe_sync_is_ufence(&syncs[i])) 167 + if (xe_sync_is_ufence(&syncs[num_syncs])) 168 168 num_ufence++; 169 169 } 170 170 ··· 325 325 if (err == -EAGAIN && !skip_retry) 326 326 goto retry; 327 327 err_syncs: 328 - for (i = 0; i < num_syncs; i++) 329 - xe_sync_entry_cleanup(&syncs[i]); 328 + while (num_syncs--) 329 + xe_sync_entry_cleanup(&syncs[num_syncs]); 330 330 kfree(syncs); 331 331 err_exec_queue: 332 332 xe_exec_queue_put(q);

+15

drivers/gpu/drm/xe/xe_gt.c

··· 904 904 905 905 return NULL; 906 906 } 907 + 908 + /** 909 + * xe_gt_declare_wedged() - Declare GT wedged 910 + * @gt: the GT object 911 + * 912 + * Wedge the GT which stops all submission, saves desired debug state, and 913 + * cleans up anything which could timeout. 914 + */ 915 + void xe_gt_declare_wedged(struct xe_gt *gt) 916 + { 917 + xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); 918 + 919 + xe_uc_declare_wedged(&gt->uc); 920 + xe_gt_tlb_invalidation_reset(gt); 921 + }

drivers/gpu/drm/xe/xe_gt.h

··· 37 37 int xe_gt_init_hwconfig(struct xe_gt *gt); 38 38 int xe_gt_init_early(struct xe_gt *gt); 39 39 int xe_gt_init(struct xe_gt *gt); 40 + void xe_gt_declare_wedged(struct xe_gt *gt); 40 41 int xe_gt_record_default_lrcs(struct xe_gt *gt); 41 42 42 43 /**

drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c

··· 1543 1543 u64 fair; 1544 1544 1545 1545 fair = div_u64(available, num_vfs); 1546 + fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ 1546 1547 fair = ALIGN_DOWN(fair, alignment); 1547 1548 #ifdef MAX_FAIR_LMEM 1548 1549 fair = min_t(u64, MAX_FAIR_LMEM, fair);

+16

drivers/gpu/drm/xe/xe_guc.c

··· 1178 1178 xe_guc_ct_print(&guc->ct, p, false); 1179 1179 xe_guc_submit_print(guc, p); 1180 1180 } 1181 + 1182 + /** 1183 + * xe_guc_declare_wedged() - Declare GuC wedged 1184 + * @guc: the GuC object 1185 + * 1186 + * Wedge the GuC which stops all submission, saves desired debug state, and 1187 + * cleans up anything which could timeout. 1188 + */ 1189 + void xe_guc_declare_wedged(struct xe_guc *guc) 1190 + { 1191 + xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1192 + 1193 + xe_guc_reset_prepare(guc); 1194 + xe_guc_ct_stop(&guc->ct); 1195 + xe_guc_submit_wedge(guc); 1196 + }

drivers/gpu/drm/xe/xe_guc.h

··· 37 37 void xe_guc_stop_prepare(struct xe_guc *guc); 38 38 void xe_guc_stop(struct xe_guc *guc); 39 39 int xe_guc_start(struct xe_guc *guc); 40 + void xe_guc_declare_wedged(struct xe_guc *guc); 40 41 41 42 static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) 42 43 {

+30 -18

drivers/gpu/drm/xe/xe_guc_submit.c

··· 861 861 xe_sched_tdr_queue_imm(&q->guc->sched); 862 862 } 863 863 864 - static bool guc_submit_hint_wedged(struct xe_guc *guc) 864 + /** 865 + * xe_guc_submit_wedge() - Wedge GuC submission 866 + * @guc: the GuC object 867 + * 868 + * Save exec queue's registered with GuC state by taking a ref to each queue. 869 + * Register a DRMM handler to drop refs upon driver unload. 870 + */ 871 + void xe_guc_submit_wedge(struct xe_guc *guc) 865 872 { 866 873 struct xe_device *xe = guc_to_xe(guc); 867 874 struct xe_exec_queue *q; 868 875 unsigned long index; 869 876 int err; 877 + 878 + xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 879 + 880 + err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, 881 + guc_submit_wedged_fini, guc); 882 + if (err) { 883 + drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); 884 + return; 885 + } 886 + 887 + mutex_lock(&guc->submission_state.lock); 888 + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 889 + if (xe_exec_queue_get_unless_zero(q)) 890 + set_exec_queue_wedged(q); 891 + mutex_unlock(&guc->submission_state.lock); 892 + } 893 + 894 + static bool guc_submit_hint_wedged(struct xe_guc *guc) 895 + { 896 + struct xe_device *xe = guc_to_xe(guc); 870 897 871 898 if (xe->wedged.mode != 2) 872 899 return false; ··· 902 875 return true; 903 876 904 877 xe_device_declare_wedged(xe); 905 - 906 - xe_guc_submit_reset_prepare(guc); 907 - xe_guc_ct_stop(&guc->ct); 908 - 909 - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, 910 - guc_submit_wedged_fini, guc); 911 - if (err) { 912 - drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); 913 - return true; /* Device is wedged anyway */ 914 - } 915 - 916 - mutex_lock(&guc->submission_state.lock); 917 - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 918 - if (xe_exec_queue_get_unless_zero(q)) 919 - set_exec_queue_wedged(q); 920 - mutex_unlock(&guc->submission_state.lock); 921 878 922 879 return true; 923 880 } ··· 1688 1677 1689 1678 void xe_guc_submit_reset_wait(struct xe_guc *guc) 1690 1679 { 1691 - wait_event(guc->ct.wq, !guc_read_stopped(guc)); 1680 + wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 1681 + !guc_read_stopped(guc)); 1692 1682 } 1693 1683 1694 1684 void xe_guc_submit_stop(struct xe_guc *guc)

drivers/gpu/drm/xe/xe_guc_submit.h

··· 18 18 void xe_guc_submit_reset_wait(struct xe_guc *guc); 19 19 void xe_guc_submit_stop(struct xe_guc *guc); 20 20 int xe_guc_submit_start(struct xe_guc *guc); 21 + void xe_guc_submit_wedge(struct xe_guc *guc); 21 22 22 23 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); 23 24 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);

+14

drivers/gpu/drm/xe/xe_uc.c

··· 300 300 { 301 301 xe_gsc_remove(&uc->gsc); 302 302 } 303 + 304 + /** 305 + * xe_uc_declare_wedged() - Declare UC wedged 306 + * @uc: the UC object 307 + * 308 + * Wedge the UC which stops all submission, saves desired debug state, and 309 + * cleans up anything which could timeout. 310 + */ 311 + void xe_uc_declare_wedged(struct xe_uc *uc) 312 + { 313 + xe_gt_assert(uc_to_gt(uc), uc_to_xe(uc)->wedged.mode); 314 + 315 + xe_guc_declare_wedged(&uc->guc); 316 + }

drivers/gpu/drm/xe/xe_uc.h

··· 21 21 int xe_uc_suspend(struct xe_uc *uc); 22 22 int xe_uc_sanitize_reset(struct xe_uc *uc); 23 23 void xe_uc_remove(struct xe_uc *uc); 24 + void xe_uc_declare_wedged(struct xe_uc *uc); 24 25 25 26 #endif

Configure Feed

Configure Feed