Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: Add sdma instance specific functions

SDMA 4.4.2 supports multiple instances. Add functions to support
handling of each SDMA instance separately.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Lijo Lazar and committed by
Alex Deucher
527c670e dd1a02e2

+91 -71
+91 -71
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
··· 94 94 } 95 95 } 96 96 97 - static void sdma_v4_4_2_init_golden_registers(struct amdgpu_device *adev) 97 + static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev, 98 + uint32_t inst_mask) 98 99 { 99 100 u32 val; 100 101 int i; ··· 419 418 * 420 419 * Stop the gfx async dma ring buffers. 421 420 */ 422 - static void sdma_v4_4_2_gfx_stop(struct amdgpu_device *adev) 421 + static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, 422 + uint32_t inst_mask) 423 423 { 424 424 struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; 425 425 u32 rb_cntl, ib_cntl; 426 426 int i, unset = 0; 427 427 428 - for (i = 0; i < adev->sdma.num_instances; i++) { 428 + for_each_inst(i, inst_mask) { 429 429 sdma[i] = &adev->sdma.instance[i].ring; 430 430 431 431 if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { ··· 450 448 * 451 449 * Stop the compute async dma queues. 452 450 */ 453 - static void sdma_v4_4_2_rlc_stop(struct amdgpu_device *adev) 451 + static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev, 452 + uint32_t inst_mask) 454 453 { 455 454 /* XXX todo */ 456 455 } ··· 463 460 * 464 461 * Stop the page async dma ring buffers. 465 462 */ 466 - static void sdma_v4_4_2_page_stop(struct amdgpu_device *adev) 463 + static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev, 464 + uint32_t inst_mask) 467 465 { 468 466 struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; 469 467 u32 rb_cntl, ib_cntl; 470 468 int i; 471 469 bool unset = false; 472 470 473 - for (i = 0; i < adev->sdma.num_instances; i++) { 471 + for_each_inst(i, inst_mask) { 474 472 sdma[i] = &adev->sdma.instance[i].page; 475 473 476 474 if ((adev->mman.buffer_funcs_ring == sdma[i]) && ··· 499 495 * 500 496 * Halt or unhalt the async dma engines context switch. 501 497 */ 502 - static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) 498 + static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev, 499 + bool enable, uint32_t inst_mask) 503 500 { 504 501 u32 f32_cntl, phase_quantum = 0; 505 502 int i; ··· 529 524 unit << SDMA_PHASE0_QUANTUM__UNIT__SHIFT; 530 525 } 531 526 532 - for (i = 0; i < adev->sdma.num_instances; i++) { 527 + for_each_inst(i, inst_mask) { 533 528 f32_cntl = RREG32_SDMA(i, regSDMA_CNTL); 534 529 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL, 535 530 AUTO_CTXSW_ENABLE, enable ? 1 : 0); ··· 543 538 /* Extend page fault timeout to avoid interrupt storm */ 544 539 WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080); 545 540 } 546 - 547 541 } 548 542 549 543 /** ··· 550 546 * 551 547 * @adev: amdgpu_device pointer 552 548 * @enable: enable/disable the DMA MEs. 549 + * @inst_mask: mask of dma engine instances to be enabled 553 550 * 554 551 * Halt or unhalt the async dma engines. 555 552 */ 556 - static void sdma_v4_4_2_enable(struct amdgpu_device *adev, bool enable) 553 + static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable, 554 + uint32_t inst_mask) 557 555 { 558 556 u32 f32_cntl; 559 557 int i; 560 558 561 559 if (!enable) { 562 - sdma_v4_4_2_gfx_stop(adev); 563 - sdma_v4_4_2_rlc_stop(adev); 560 + sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); 561 + sdma_v4_4_2_inst_rlc_stop(adev, inst_mask); 564 562 if (adev->sdma.has_page_queue) 565 - sdma_v4_4_2_page_stop(adev); 563 + sdma_v4_4_2_inst_page_stop(adev, inst_mask); 566 564 } 567 565 568 - for (i = 0; i < adev->sdma.num_instances; i++) { 566 + for_each_inst(i, inst_mask) { 569 567 f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL); 570 568 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1); 571 569 WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl); ··· 786 780 * Set up the compute DMA queues and enable them. 787 781 * Returns 0 for success, error for failure. 788 782 */ 789 - static int sdma_v4_4_2_rlc_resume(struct amdgpu_device *adev) 783 + static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev, 784 + uint32_t inst_mask) 790 785 { 791 786 sdma_v4_4_2_init_pg(adev); 792 787 ··· 802 795 * Loads the sDMA0/1 ucode. 803 796 * Returns 0 for success, -EINVAL if the ucode is not available. 804 797 */ 805 - static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev) 798 + static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev, 799 + uint32_t inst_mask) 806 800 { 807 801 const struct sdma_firmware_header_v1_0 *hdr; 808 802 const __le32 *fw_data; ··· 811 803 int i, j; 812 804 813 805 /* halt the MEs */ 814 - sdma_v4_4_2_enable(adev, false); 806 + sdma_v4_4_2_inst_enable(adev, false, inst_mask); 815 807 816 - for (i = 0; i < adev->sdma.num_instances; i++) { 808 + for_each_inst(i, inst_mask) { 817 809 if (!adev->sdma.instance[i].fw) 818 810 return -EINVAL; 819 811 ··· 839 831 } 840 832 841 833 /** 842 - * sdma_v4_4_2_start - setup and start the async dma engines 834 + * sdma_v4_4_2_inst_start - setup and start the async dma engines 843 835 * 844 836 * @adev: amdgpu_device pointer 845 837 * 846 838 * Set up the DMA engines and enable them. 847 839 * Returns 0 for success, error for failure. 848 840 */ 849 - static int sdma_v4_4_2_start(struct amdgpu_device *adev) 841 + static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, 842 + uint32_t inst_mask) 850 843 { 851 844 struct amdgpu_ring *ring; 845 + uint32_t tmp_mask; 852 846 int i, r = 0; 853 847 854 848 if (amdgpu_sriov_vf(adev)) { 855 - sdma_v4_4_2_ctx_switch_enable(adev, false); 856 - sdma_v4_4_2_enable(adev, false); 849 + sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask); 850 + sdma_v4_4_2_inst_enable(adev, false, inst_mask); 857 851 } else { 858 852 /* bypass sdma microcode loading on Gopher */ 859 853 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && 860 - !(adev->pdev->device == 0x49) && !(adev->pdev->device == 0x50)) { 861 - r = sdma_v4_4_2_load_microcode(adev); 854 + adev->sdma.instance[0].fw) { 855 + r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask); 862 856 if (r) 863 857 return r; 864 858 } 865 859 866 860 /* unhalt the MEs */ 867 - sdma_v4_4_2_enable(adev, true); 861 + sdma_v4_4_2_inst_enable(adev, true, inst_mask); 868 862 /* enable sdma ring preemption */ 869 - sdma_v4_4_2_ctx_switch_enable(adev, true); 863 + sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask); 870 864 } 871 865 872 866 /* start the gfx rings and rlc compute queues */ 873 - for (i = 0; i < adev->sdma.num_instances; i++) { 867 + tmp_mask = inst_mask; 868 + for_each_inst(i, tmp_mask) { 874 869 uint32_t temp; 875 870 876 871 WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); ··· 900 889 } 901 890 902 891 if (amdgpu_sriov_vf(adev)) { 903 - sdma_v4_4_2_ctx_switch_enable(adev, true); 904 - sdma_v4_4_2_enable(adev, true); 892 + sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask); 893 + sdma_v4_4_2_inst_enable(adev, true, inst_mask); 905 894 } else { 906 - r = sdma_v4_4_2_rlc_resume(adev); 895 + r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask); 907 896 if (r) 908 897 return r; 909 898 } 910 899 911 - for (i = 0; i < adev->sdma.num_instances; i++) { 900 + tmp_mask = inst_mask; 901 + for_each_inst(i, tmp_mask) { 912 902 ring = &adev->sdma.instance[i].ring; 913 903 914 904 r = amdgpu_ring_test_helper(ring); ··· 1395 1383 { 1396 1384 int r; 1397 1385 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1386 + uint32_t inst_mask; 1398 1387 1388 + /* TODO: Check if this is needed */ 1399 1389 if (adev->flags & AMD_IS_APU) 1400 1390 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); 1401 1391 1392 + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); 1402 1393 if (!amdgpu_sriov_vf(adev)) 1403 - sdma_v4_4_2_init_golden_registers(adev); 1394 + sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); 1404 1395 1405 - r = sdma_v4_4_2_start(adev); 1396 + r = sdma_v4_4_2_inst_start(adev, inst_mask); 1406 1397 1407 1398 return r; 1408 1399 } ··· 1413 1398 static int sdma_v4_4_2_hw_fini(void *handle) 1414 1399 { 1415 1400 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1401 + uint32_t inst_mask; 1416 1402 int i; 1417 1403 1418 1404 if (amdgpu_sriov_vf(adev)) 1419 1405 return 0; 1420 1406 1407 + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); 1421 1408 for (i = 0; i < adev->sdma.num_instances; i++) { 1422 1409 amdgpu_irq_put(adev, &adev->sdma.ecc_irq, 1423 1410 AMDGPU_SDMA_IRQ_INSTANCE0 + i); 1424 1411 } 1425 1412 1426 - sdma_v4_4_2_ctx_switch_enable(adev, false); 1427 - sdma_v4_4_2_enable(adev, false); 1413 + sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask); 1414 + sdma_v4_4_2_inst_enable(adev, false, inst_mask); 1428 1415 1429 1416 return 0; 1430 1417 } 1418 + 1419 + static int sdma_v4_4_2_set_clockgating_state(void *handle, 1420 + enum amd_clockgating_state state); 1431 1421 1432 1422 static int sdma_v4_4_2_suspend(void *handle) 1433 1423 { ··· 1670 1650 return 0; 1671 1651 } 1672 1652 1673 - static void sdma_v4_4_2_update_medium_grain_clock_gating( 1674 - struct amdgpu_device *adev, 1675 - bool enable) 1653 + static void sdma_v4_4_2_inst_update_medium_grain_light_sleep( 1654 + struct amdgpu_device *adev, bool enable, uint32_t inst_mask) 1655 + { 1656 + uint32_t data, def; 1657 + int i; 1658 + 1659 + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { 1660 + for_each_inst(i, inst_mask) { 1661 + /* 1-not override: enable sdma mem light sleep */ 1662 + def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL); 1663 + data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1664 + if (def != data) 1665 + WREG32_SDMA(i, regSDMA_POWER_CNTL, data); 1666 + } 1667 + } else { 1668 + for_each_inst(i, inst_mask) { 1669 + /* 0-override:disable sdma mem light sleep */ 1670 + def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL); 1671 + data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1672 + if (def != data) 1673 + WREG32_SDMA(i, regSDMA_POWER_CNTL, data); 1674 + } 1675 + } 1676 + } 1677 + 1678 + static void sdma_v4_4_2_inst_update_medium_grain_clock_gating( 1679 + struct amdgpu_device *adev, bool enable, uint32_t inst_mask) 1676 1680 { 1677 1681 uint32_t data, def; 1678 1682 int i; 1679 1683 1680 1684 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 1681 - for (i = 0; i < adev->sdma.num_instances; i++) { 1685 + for_each_inst(i, inst_mask) { 1682 1686 def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL); 1683 1687 data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1684 1688 SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK | ··· 1716 1672 WREG32_SDMA(i, regSDMA_CLK_CTRL, data); 1717 1673 } 1718 1674 } else { 1719 - for (i = 0; i < adev->sdma.num_instances; i++) { 1675 + for_each_inst(i, inst_mask) { 1720 1676 def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL); 1721 1677 data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1722 1678 SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK | ··· 1732 1688 } 1733 1689 } 1734 1690 1735 - 1736 - static void sdma_v4_4_2_update_medium_grain_light_sleep( 1737 - struct amdgpu_device *adev, 1738 - bool enable) 1739 - { 1740 - uint32_t data, def; 1741 - int i; 1742 - 1743 - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { 1744 - for (i = 0; i < adev->sdma.num_instances; i++) { 1745 - /* 1-not override: enable sdma mem light sleep */ 1746 - def = data = RREG32_SDMA(0, regSDMA_POWER_CNTL); 1747 - data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1748 - if (def != data) 1749 - WREG32_SDMA(0, regSDMA_POWER_CNTL, data); 1750 - } 1751 - } else { 1752 - for (i = 0; i < adev->sdma.num_instances; i++) { 1753 - /* 0-override:disable sdma mem light sleep */ 1754 - def = data = RREG32_SDMA(0, regSDMA_POWER_CNTL); 1755 - data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1756 - if (def != data) 1757 - WREG32_SDMA(0, regSDMA_POWER_CNTL, data); 1758 - } 1759 - } 1760 - } 1761 - 1762 1691 static int sdma_v4_4_2_set_clockgating_state(void *handle, 1763 1692 enum amd_clockgating_state state) 1764 1693 { 1765 1694 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1695 + uint32_t inst_mask; 1766 1696 1767 1697 if (amdgpu_sriov_vf(adev)) 1768 1698 return 0; 1769 1699 1770 - sdma_v4_4_2_update_medium_grain_clock_gating(adev, 1771 - state == AMD_CG_STATE_GATE); 1772 - sdma_v4_4_2_update_medium_grain_light_sleep(adev, 1773 - state == AMD_CG_STATE_GATE); 1700 + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); 1701 + 1702 + sdma_v4_4_2_inst_update_medium_grain_clock_gating( 1703 + adev, state == AMD_CG_STATE_GATE, inst_mask); 1704 + sdma_v4_4_2_inst_update_medium_grain_light_sleep( 1705 + adev, state == AMD_CG_STATE_GATE, inst_mask); 1774 1706 return 0; 1775 1707 } 1776 1708