Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: Add enforce_isolation sysfs attribute

This commit adds a new sysfs attribute 'enforce_isolation' to control
the 'enforce_isolation' setting per GPU. The attribute can be read and
written, and accepts values 0 (disabled) and 1 (enabled).

When 'enforce_isolation' is enabled, reserved VMIDs are allocated for
each ring. When it's disabled, the reserved VMIDs are freed.

The set function locks a mutex before changing the 'enforce_isolation'
flag and the VMIDs, and unlocks it afterwards. This ensures that these
operations are atomic and prevents race conditions and other concurrency
issues.

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Srinivasan Shanmugam and committed by
Alex Deucher
e189be9b dba1a6cf

+107
+2
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 1164 1164 bool debug_enable_ras_aca; 1165 1165 1166 1166 bool enforce_isolation[MAX_XCP]; 1167 + /* Added this mutex for cleaner shader isolation between GFX and compute processes */ 1168 + struct mutex enforce_isolation_mutex; 1167 1169 }; 1168 1170 1169 1171 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 4065 4065 mutex_init(&adev->pm.stable_pstate_ctx_lock); 4066 4066 mutex_init(&adev->benchmark_mutex); 4067 4067 mutex_init(&adev->gfx.reset_sem_mutex); 4068 + /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ 4069 + mutex_init(&adev->enforce_isolation_mutex); 4068 4070 4069 4071 amdgpu_device_init_apu_flags(adev); 4070 4072
+101
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
··· 1391 1391 return sysfs_emit(buf, "%s\n", supported_partition); 1392 1392 } 1393 1393 1394 + static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, 1395 + struct device_attribute *attr, 1396 + char *buf) 1397 + { 1398 + struct drm_device *ddev = dev_get_drvdata(dev); 1399 + struct amdgpu_device *adev = drm_to_adev(ddev); 1400 + int i; 1401 + ssize_t size = 0; 1402 + 1403 + if (adev->xcp_mgr) { 1404 + for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { 1405 + size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]); 1406 + if (i < (adev->xcp_mgr->num_xcps - 1)) 1407 + size += sysfs_emit_at(buf, size, " "); 1408 + } 1409 + buf[size++] = '\n'; 1410 + } else { 1411 + size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]); 1412 + } 1413 + 1414 + return size; 1415 + } 1416 + 1417 + static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, 1418 + struct device_attribute *attr, 1419 + const char *buf, size_t count) 1420 + { 1421 + struct drm_device *ddev = dev_get_drvdata(dev); 1422 + struct amdgpu_device *adev = drm_to_adev(ddev); 1423 + long partition_values[MAX_XCP] = {0}; 1424 + int ret, i, num_partitions; 1425 + const char *input_buf = buf; 1426 + 1427 + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 1428 + ret = sscanf(input_buf, "%ld", &partition_values[i]); 1429 + if (ret <= 0) 1430 + break; 1431 + 1432 + /* Move the pointer to the next value in the string */ 1433 + input_buf = strchr(input_buf, ' '); 1434 + if (input_buf) { 1435 + input_buf++; 1436 + } else { 1437 + i++; 1438 + break; 1439 + } 1440 + } 1441 + num_partitions = i; 1442 + 1443 + if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps) 1444 + return -EINVAL; 1445 + 1446 + if (!adev->xcp_mgr && num_partitions != 1) 1447 + return -EINVAL; 1448 + 1449 + for (i = 0; i < num_partitions; i++) { 1450 + if (partition_values[i] != 0 && partition_values[i] != 1) 1451 + return -EINVAL; 1452 + } 1453 + 1454 + mutex_lock(&adev->enforce_isolation_mutex); 1455 + 1456 + for (i = 0; i < num_partitions; i++) { 1457 + if (adev->enforce_isolation[i] && !partition_values[i]) { 1458 + /* Going from enabled to disabled */ 1459 + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); 1460 + } else if (!adev->enforce_isolation[i] && partition_values[i]) { 1461 + /* Going from disabled to enabled */ 1462 + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); 1463 + } 1464 + adev->enforce_isolation[i] = partition_values[i]; 1465 + } 1466 + 1467 + mutex_unlock(&adev->enforce_isolation_mutex); 1468 + 1469 + return count; 1470 + } 1471 + 1472 + static DEVICE_ATTR(enforce_isolation, 0644, 1473 + amdgpu_gfx_get_enforce_isolation, 1474 + amdgpu_gfx_set_enforce_isolation); 1475 + 1394 1476 static DEVICE_ATTR(current_compute_partition, 0644, 1395 1477 amdgpu_gfx_get_current_compute_partition, 1396 1478 amdgpu_gfx_set_compute_partition); ··· 1497 1415 { 1498 1416 device_remove_file(adev->dev, &dev_attr_current_compute_partition); 1499 1417 device_remove_file(adev->dev, &dev_attr_available_compute_partition); 1418 + } 1419 + 1420 + int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) 1421 + { 1422 + int r; 1423 + 1424 + if (!amdgpu_sriov_vf(adev)) { 1425 + r = device_create_file(adev->dev, &dev_attr_enforce_isolation); 1426 + if (r) 1427 + return r; 1428 + } 1429 + 1430 + return 0; 1431 + } 1432 + 1433 + void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) 1434 + { 1435 + if (!amdgpu_sriov_vf(adev)) 1436 + device_remove_file(adev->dev, &dev_attr_enforce_isolation); 1500 1437 } 1501 1438 1502 1439 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
··· 561 561 void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, 562 562 unsigned int cleaner_shader_size, 563 563 const void *cleaner_shader_ptr); 564 + int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); 565 + void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); 564 566 565 567 static inline const char *amdgpu_gfx_compute_mode_desc(int mode) 566 568 {