Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: adjust enforce_isolation handling

Switch from a bool to an enum and allow more options
for enforce isolation. There are now 3 modes of operation:
- Disabled (0)
- Enabled (serialization and cleaner shader) (1)
- Enabled in legacy mode (no serialization or cleaner shader) (2)
This provides better flexibility for more use cases.

Acked-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+93 -30
+9 -2
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 230 230 extern int amdgpu_smartshift_bias; 231 231 extern int amdgpu_use_xgmi_p2p; 232 232 extern int amdgpu_mtype_local; 233 - extern bool enforce_isolation; 233 + extern int amdgpu_enforce_isolation; 234 234 #ifdef CONFIG_HSA_AMD 235 235 extern int sched_policy; 236 236 extern bool debug_evictions; ··· 873 873 struct amdgpu_reset_domain; 874 874 struct amdgpu_fru_info; 875 875 876 + enum amdgpu_enforce_isolation_mode { 877 + AMDGPU_ENFORCE_ISOLATION_DISABLE = 0, 878 + AMDGPU_ENFORCE_ISOLATION_ENABLE = 1, 879 + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2, 880 + }; 881 + 882 + 876 883 /* 877 884 * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. 878 885 */ ··· 1232 1225 1233 1226 /* Protection for the following isolation structure */ 1234 1227 struct mutex enforce_isolation_mutex; 1235 - bool enforce_isolation[MAX_XCP]; 1228 + enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP]; 1236 1229 struct amdgpu_isolation { 1237 1230 void *owner; 1238 1231 struct dma_fence *spearhead;
+15 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 296 296 num_ibs[i], &p->jobs[i]); 297 297 if (ret) 298 298 goto free_all_kdata; 299 - p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; 299 + switch (p->adev->enforce_isolation[fpriv->xcp_id]) { 300 + case AMDGPU_ENFORCE_ISOLATION_DISABLE: 301 + default: 302 + p->jobs[i]->enforce_isolation = false; 303 + p->jobs[i]->run_cleaner_shader = false; 304 + break; 305 + case AMDGPU_ENFORCE_ISOLATION_ENABLE: 306 + p->jobs[i]->enforce_isolation = true; 307 + p->jobs[i]->run_cleaner_shader = true; 308 + break; 309 + case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY: 310 + p->jobs[i]->enforce_isolation = true; 311 + p->jobs[i]->run_cleaner_shader = false; 312 + break; 313 + } 300 314 } 301 315 p->gang_leader = p->jobs[p->gang_leader_idx]; 302 316
+20 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 2145 2145 2146 2146 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 2147 2147 2148 - for (i = 0; i < MAX_XCP; i++) 2149 - adev->enforce_isolation[i] = !!enforce_isolation; 2148 + for (i = 0; i < MAX_XCP; i++) { 2149 + switch (amdgpu_enforce_isolation) { 2150 + case -1: 2151 + case 0: 2152 + default: 2153 + /* disable */ 2154 + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; 2155 + break; 2156 + case 1: 2157 + /* enable */ 2158 + adev->enforce_isolation[i] = 2159 + AMDGPU_ENFORCE_ISOLATION_ENABLE; 2160 + break; 2161 + case 2: 2162 + /* enable legacy mode */ 2163 + adev->enforce_isolation[i] = 2164 + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; 2165 + break; 2166 + } 2167 + } 2150 2168 2151 2169 return 0; 2152 2170 }
+7 -5
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 179 179 uint amdgpu_sdma_phase_quantum = 32; 180 180 char *amdgpu_disable_cu; 181 181 char *amdgpu_virtual_display; 182 - bool enforce_isolation; 182 + int amdgpu_enforce_isolation = -1; 183 183 int amdgpu_modeset = -1; 184 184 185 185 /* Specifies the default granularity for SVM, used in buffer ··· 1038 1038 1039 1039 1040 1040 /** 1041 - * DOC: enforce_isolation (bool) 1042 - * enforce process isolation between graphics and compute via using the same reserved vmid. 1041 + * DOC: enforce_isolation (int) 1042 + * enforce process isolation between graphics and compute. 1043 + * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode) 1043 1044 */ 1044 - module_param(enforce_isolation, bool, 0444); 1045 - MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); 1045 + module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444); 1046 + MODULE_PARM_DESC(enforce_isolation, 1047 + "enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode)"); 1046 1048 1047 1049 /** 1048 1050 * DOC: modeset (int)
+29 -10
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
··· 1468 1468 goto err; 1469 1469 1470 1470 job->enforce_isolation = true; 1471 + /* always run the cleaner shader */ 1472 + job->run_cleaner_shader = true; 1471 1473 1472 1474 ib = &job->ibs[0]; 1473 1475 for (i = 0; i <= ring->funcs->align_mask; ++i) ··· 1601 1599 * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' 1602 1600 * feature for each GPU partition. Reading from the 'enforce_isolation' 1603 1601 * sysfs file returns the isolation settings for all partitions, where '0' 1604 - * indicates disabled and '1' indicates enabled. 1602 + * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode. 1605 1603 * 1606 1604 * Return: The number of bytes read from the sysfs file. 1607 1605 */ ··· 1636 1634 * @count: The size of the input data 1637 1635 * 1638 1636 * This function allows control over the 'enforce_isolation' feature, which 1639 - * serializes access to the graphics engine. Writing '1' or '0' to the 1640 - * 'enforce_isolation' sysfs file enables or disables process isolation for 1641 - * each partition. The input should specify the setting for all partitions. 1637 + * serializes access to the graphics engine. Writing '1', '2', or '0' to the 1638 + * 'enforce_isolation' sysfs file enables (full or legacy) or disables process 1639 + * isolation for each partition. The input should specify the setting for all 1640 + * partitions. 1642 1641 * 1643 1642 * Return: The number of bytes written to the sysfs file. 1644 1643 */ ··· 1676 1673 return -EINVAL; 1677 1674 1678 1675 for (i = 0; i < num_partitions; i++) { 1679 - if (partition_values[i] != 0 && partition_values[i] != 1) 1676 + if (partition_values[i] != 0 && 1677 + partition_values[i] != 1 && 1678 + partition_values[i] != 2) 1680 1679 return -EINVAL; 1681 1680 } 1682 1681 1683 1682 mutex_lock(&adev->enforce_isolation_mutex); 1684 - for (i = 0; i < num_partitions; i++) 1685 - adev->enforce_isolation[i] = partition_values[i]; 1683 + for (i = 0; i < num_partitions; i++) { 1684 + switch (partition_values[i]) { 1685 + case 0: 1686 + default: 1687 + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; 1688 + break; 1689 + case 1: 1690 + adev->enforce_isolation[i] = 1691 + AMDGPU_ENFORCE_ISOLATION_ENABLE; 1692 + break; 1693 + case 2: 1694 + adev->enforce_isolation[i] = 1695 + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; 1696 + break; 1697 + } 1698 + } 1686 1699 mutex_unlock(&adev->enforce_isolation_mutex); 1687 1700 1688 1701 amdgpu_mes_update_enforce_isolation(adev); ··· 2053 2034 bool wait = false; 2054 2035 2055 2036 mutex_lock(&adev->enforce_isolation_mutex); 2056 - if (adev->enforce_isolation[idx]) { 2037 + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { 2057 2038 /* set the initial values if nothing is set */ 2058 2039 if (!adev->gfx.enforce_isolation_jiffies[idx]) { 2059 2040 adev->gfx.enforce_isolation_jiffies[idx] = jiffies; ··· 2120 2101 amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); 2121 2102 2122 2103 mutex_lock(&adev->enforce_isolation_mutex); 2123 - if (adev->enforce_isolation[idx]) { 2104 + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { 2124 2105 if (adev->kfd.init_complete) 2125 2106 sched_work = true; 2126 2107 } ··· 2157 2138 return; 2158 2139 2159 2140 mutex_lock(&adev->enforce_isolation_mutex); 2160 - if (adev->enforce_isolation[idx]) { 2141 + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { 2161 2142 if (adev->kfd.init_complete) 2162 2143 sched_work = true; 2163 2144 }
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
··· 588 588 } 589 589 /* alloc a default reserved vmid to enforce isolation */ 590 590 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 591 - if (adev->enforce_isolation[i]) 591 + if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE) 592 592 amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); 593 593 } 594 594 }
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
··· 78 78 79 79 /* enforce isolation */ 80 80 bool enforce_isolation; 81 + bool run_cleaner_shader; 81 82 82 83 uint32_t num_ibs; 83 84 struct amdgpu_ib ibs[];
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
··· 768 768 if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { 769 769 mutex_lock(&adev->enforce_isolation_mutex); 770 770 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 771 - if (adev->enforce_isolation[i]) 771 + if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 772 772 r |= amdgpu_mes_set_enforce_isolation(adev, i, true); 773 773 else 774 774 r |= amdgpu_mes_set_enforce_isolation(adev, i, false);
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 787 787 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && 788 788 ring->funcs->emit_wreg; 789 789 790 - cleaner_shader_needed = adev->gfx.enable_cleaner_shader && 790 + cleaner_shader_needed = job->run_cleaner_shader && 791 + adev->gfx.enable_cleaner_shader && 791 792 ring->funcs->emit_cleaner_shader && job->base.s_fence && 792 793 &job->base.s_fence->scheduled == isolation->spearhead; 793 794
+1 -1
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
··· 724 724 mes->event_log_gpu_addr; 725 725 } 726 726 727 - if (adev->enforce_isolation[0]) 727 + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 728 728 mes_set_hw_res_pkt.limit_single_process = 1; 729 729 730 730 return mes_v11_0_submit_pkt_and_poll_completion(mes,
+1 -1
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
··· 762 762 pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); 763 763 } 764 764 765 - if (adev->enforce_isolation[0]) 765 + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 766 766 mes_set_hw_res_pkt.limit_single_process = 1; 767 767 768 768 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+6 -5
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
··· 43 43 memset(buffer, 0, sizeof(struct pm4_mes_map_process)); 44 44 packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, 45 45 sizeof(struct pm4_mes_map_process)); 46 - if (adev->enforce_isolation[kfd->node_id]) 46 + if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 47 47 packet->bitfields2.exec_cleaner_shader = 1; 48 48 packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; 49 49 packet->bitfields2.process_quantum = 10; ··· 102 102 memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); 103 103 packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, 104 104 sizeof(struct pm4_mes_map_process_aldebaran)); 105 - if (adev->enforce_isolation[knode->node_id]) 105 + if (adev->enforce_isolation[knode->node_id] == 106 + AMDGPU_ENFORCE_ISOLATION_ENABLE) 106 107 packet->bitfields2.exec_cleaner_shader = 1; 107 108 packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; 108 109 packet->bitfields2.process_quantum = 10; ··· 166 165 * hws_max_conc_proc has been done in 167 166 * kgd2kfd_device_init(). 168 167 */ 169 - concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? 170 - 1 : min(pm->dqm->processes_count, 171 - kfd->max_proc_per_quantum); 168 + concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] == 169 + AMDGPU_ENFORCE_ISOLATION_ENABLE) ? 170 + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); 172 171 173 172 packet = (struct pm4_mes_runlist *)buffer; 174 173