Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

amd/amdkfd: Trigger segfault for early userptr unmmapping

If applications unmap the memory before destroying the userptr, it needs
trigger a segfault to notify user space to correct the free sequence in
VM debug mode.

v2: Send gpu access fault to user space
v3: Report gpu address to user space, remove unnecessary params
v4: update pr_err into one line, remove userptr log info

Signed-off-by: Shane Xiao <shane.xiao@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Shane Xiao and committed by
Alex Deucher
2d274bf7 8e320f67

+33
+12
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 2559 2559 if (ret != -EFAULT) 2560 2560 return ret; 2561 2561 2562 + /* If applications unmap memory before destroying the userptr 2563 + * from the KFD, trigger a segmentation fault in VM debug mode. 2564 + */ 2565 + if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) { 2566 + pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n", 2567 + pid_nr(process_info->pid), mem->va); 2568 + 2569 + // Send GPU VM fault to user space 2570 + kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid), 2571 + mem->va); 2572 + } 2573 + 2562 2574 ret = 0; 2563 2575 } 2564 2576
+19
drivers/gpu/drm/amd/amdkfd/kfd_events.c
··· 1177 1177 kfd_unref_process(p); 1178 1178 } 1179 1179 1180 + void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va) 1181 + { 1182 + struct kfd_process_device *pdd; 1183 + struct kfd_hsa_memory_exception_data exception_data; 1184 + int i; 1185 + 1186 + memset(&exception_data, 0, sizeof(exception_data)); 1187 + exception_data.va = gpu_va; 1188 + exception_data.failure.NotPresent = 1; 1189 + 1190 + // Send VM seg fault to all kfd process device 1191 + for (i = 0; i < p->n_pdds; i++) { 1192 + pdd = p->pdds[i]; 1193 + exception_data.gpu_id = pdd->user_gpu_id; 1194 + kfd_evict_process_device(pdd); 1195 + kfd_signal_vm_fault_event(pdd, NULL, &exception_data); 1196 + } 1197 + } 1198 + 1180 1199 void kfd_signal_vm_fault_event(struct kfd_process_device *pdd, 1181 1200 struct kfd_vm_fault_info *info, 1182 1201 struct kfd_hsa_memory_exception_data *data)
+2
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 1507 1507 int kfd_get_num_events(struct kfd_process *p); 1508 1508 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); 1509 1509 1510 + void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va); 1511 + 1510 1512 void kfd_signal_vm_fault_event(struct kfd_process_device *pdd, 1511 1513 struct kfd_vm_fault_info *info, 1512 1514 struct kfd_hsa_memory_exception_data *data);