Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdkfd: Set per-process flags only once cik/vi

Set per-process static sh_mem config only once during process
initialization. Move all static changes from update_qpd() which is
called each time a queue is created to set_cache_memory_policy() which
is called once during process initialization.

set_cache_memory_policy() is currently defined only for cik and vi
family. So this commit only focuses on these two. A separate commit will
address other asics.

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Harish Kasiviswanathan and committed by
Alex Deucher
289e6850 68bfdc8d

+97 -88
+3 -36
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 2591 2591 return retval; 2592 2592 } 2593 2593 2594 - /* 2595 - * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 2596 - * stay in user mode. 2597 - */ 2598 - #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 2599 - /* APE1 limit is inclusive and 64K aligned. */ 2600 - #define APE1_LIMIT_ALIGNMENT 0xFFFF 2601 - 2602 2594 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2603 2595 struct qcm_process_device *qpd, 2604 2596 enum cache_policy default_policy, ··· 2605 2613 2606 2614 dqm_lock(dqm); 2607 2615 2608 - if (alternate_aperture_size == 0) { 2609 - /* base > limit disables APE1 */ 2610 - qpd->sh_mem_ape1_base = 1; 2611 - qpd->sh_mem_ape1_limit = 0; 2612 - } else { 2613 - /* 2614 - * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 2615 - * SH_MEM_APE1_BASE[31:0], 0x0000 } 2616 - * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 2617 - * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 2618 - * Verify that the base and size parameters can be 2619 - * represented in this format and convert them. 2620 - * Additionally restrict APE1 to user-mode addresses. 2621 - */ 2622 - 2623 - uint64_t base = (uintptr_t)alternate_aperture_base; 2624 - uint64_t limit = base + alternate_aperture_size - 1; 2625 - 2626 - if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2627 - (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2628 - retval = false; 2629 - goto out; 2630 - } 2631 - 2632 - qpd->sh_mem_ape1_base = base >> 16; 2633 - qpd->sh_mem_ape1_limit = limit >> 16; 2634 - } 2635 - 2636 2616 retval = dqm->asic_ops.set_cache_memory_policy( 2637 2617 dqm, 2638 2618 qpd, ··· 2612 2648 alternate_policy, 2613 2649 alternate_aperture_base, 2614 2650 alternate_aperture_size); 2651 + 2652 + if (retval) 2653 + goto out; 2615 2654 2616 2655 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2617 2656 program_sh_mem_settings(dqm, qpd);
+46 -23
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
··· 27 27 #include "oss/oss_2_4_sh_mask.h" 28 28 #include "gca/gfx_7_2_sh_mask.h" 29 29 30 + /* 31 + * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 32 + * stay in user mode. 33 + */ 34 + #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 35 + /* APE1 limit is inclusive and 64K aligned. */ 36 + #define APE1_LIMIT_ALIGNMENT 0xFFFF 37 + 30 38 static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, 31 39 struct qcm_process_device *qpd, 32 40 enum cache_policy default_policy, ··· 92 84 { 93 85 uint32_t default_mtype; 94 86 uint32_t ape1_mtype; 87 + unsigned int temp; 88 + bool retval = true; 89 + 90 + if (alternate_aperture_size == 0) { 91 + /* base > limit disables APE1 */ 92 + qpd->sh_mem_ape1_base = 1; 93 + qpd->sh_mem_ape1_limit = 0; 94 + } else { 95 + /* 96 + * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 97 + * SH_MEM_APE1_BASE[31:0], 0x0000 } 98 + * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 99 + * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 100 + * Verify that the base and size parameters can be 101 + * represented in this format and convert them. 102 + * Additionally restrict APE1 to user-mode addresses. 103 + */ 104 + 105 + uint64_t base = (uintptr_t)alternate_aperture_base; 106 + uint64_t limit = base + alternate_aperture_size - 1; 107 + 108 + if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 109 + (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 110 + retval = false; 111 + goto out; 112 + } 113 + 114 + qpd->sh_mem_ape1_base = base >> 16; 115 + qpd->sh_mem_ape1_limit = limit >> 16; 116 + } 95 117 96 118 default_mtype = (default_policy == cache_policy_coherent) ? 97 119 MTYPE_NONCACHED : ··· 135 97 | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) 136 98 | DEFAULT_MTYPE(default_mtype) 137 99 | APE1_MTYPE(ape1_mtype); 138 - 139 - return true; 140 - } 141 - 142 - static int update_qpd_cik(struct device_queue_manager *dqm, 143 - struct qcm_process_device *qpd) 144 - { 145 - struct kfd_process_device *pdd; 146 - unsigned int temp; 147 - 148 - pdd = qpd_to_pdd(qpd); 149 - 150 - /* check if sh_mem_config register already configured */ 151 - if (qpd->sh_mem_config == 0) { 152 - qpd->sh_mem_config = 153 - ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | 154 - DEFAULT_MTYPE(MTYPE_NONCACHED) | 155 - APE1_MTYPE(MTYPE_NONCACHED); 156 - qpd->sh_mem_ape1_limit = 0; 157 - qpd->sh_mem_ape1_base = 0; 158 - } 159 - 160 100 /* On dGPU we're always in GPUVM64 addressing mode with 64-bit 161 101 * aperture addresses. 162 102 */ 163 - temp = get_sh_mem_bases_nybble_64(pdd); 103 + temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd)); 164 104 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); 165 105 166 106 pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", 167 107 qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); 168 108 109 + out: 110 + return retval; 111 + } 112 + 113 + static int update_qpd_cik(struct device_queue_manager *dqm, 114 + struct qcm_process_device *qpd) 115 + { 169 116 return 0; 170 117 } 171 118
+48 -29
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
··· 27 27 #include "gca/gfx_8_0_sh_mask.h" 28 28 #include "oss/oss_3_0_sh_mask.h" 29 29 30 + /* 31 + * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 32 + * stay in user mode. 33 + */ 34 + #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 35 + /* APE1 limit is inclusive and 64K aligned. */ 36 + #define APE1_LIMIT_ALIGNMENT 0xFFFF 37 + 30 38 static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, 31 39 struct qcm_process_device *qpd, 32 40 enum cache_policy default_policy, ··· 93 85 { 94 86 uint32_t default_mtype; 95 87 uint32_t ape1_mtype; 88 + unsigned int temp; 89 + bool retval = true; 90 + 91 + if (alternate_aperture_size == 0) { 92 + /* base > limit disables APE1 */ 93 + qpd->sh_mem_ape1_base = 1; 94 + qpd->sh_mem_ape1_limit = 0; 95 + } else { 96 + /* 97 + * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 98 + * SH_MEM_APE1_BASE[31:0], 0x0000 } 99 + * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 100 + * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 101 + * Verify that the base and size parameters can be 102 + * represented in this format and convert them. 103 + * Additionally restrict APE1 to user-mode addresses. 104 + */ 105 + 106 + uint64_t base = (uintptr_t)alternate_aperture_base; 107 + uint64_t limit = base + alternate_aperture_size - 1; 108 + 109 + if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 110 + (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 111 + retval = false; 112 + goto out; 113 + } 114 + 115 + qpd->sh_mem_ape1_base = base >> 16; 116 + qpd->sh_mem_ape1_limit = limit >> 16; 117 + } 96 118 97 119 default_mtype = (default_policy == cache_policy_coherent) ? 98 120 MTYPE_UC : ··· 138 100 default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 139 101 ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; 140 102 141 - return true; 103 + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit 104 + * aperture addresses. 105 + */ 106 + temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd)); 107 + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); 108 + 109 + pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", 110 + temp, qpd->sh_mem_bases); 111 + out: 112 + return retval; 142 113 } 143 114 144 115 static int update_qpd_vi(struct device_queue_manager *dqm, 145 116 struct qcm_process_device *qpd) 146 117 { 147 - struct kfd_process_device *pdd; 148 - unsigned int temp; 149 - 150 - pdd = qpd_to_pdd(qpd); 151 - 152 - /* check if sh_mem_config register already configured */ 153 - if (qpd->sh_mem_config == 0) { 154 - qpd->sh_mem_config = 155 - SH_MEM_ALIGNMENT_MODE_UNALIGNED << 156 - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 157 - MTYPE_UC << 158 - SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 159 - MTYPE_UC << 160 - SH_MEM_CONFIG__APE1_MTYPE__SHIFT; 161 - 162 - qpd->sh_mem_ape1_limit = 0; 163 - qpd->sh_mem_ape1_base = 0; 164 - } 165 - 166 - /* On dGPU we're always in GPUVM64 addressing mode with 64-bit 167 - * aperture addresses. 168 - */ 169 - temp = get_sh_mem_bases_nybble_64(pdd); 170 - qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); 171 - 172 - pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", 173 - temp, qpd->sh_mem_bases); 174 - 175 118 return 0; 176 119 } 177 120