Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'drm-fixes-2024-12-14' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
"This is the weekly fixes pull for drm. Just has i915, xe and amdgpu
changes in it. Nothing too major in here:

i915:
- Don't use indexed register writes needlessly [dsb]
- Stop using non-posted DSB writes for legacy LUT [color]
- Fix NULL pointer dereference in capture_engine
- Fix memory leak by correcting cache object name in error handler

xe:
- Fix a KUNIT test error message (Mirsad Todorovac)
- Fix an invalidation fence PM ref leak (Daniele)
- Fix a register pool UAF (Lucas)

amdgpu:
- ISP hw init fix
- SR-IOV fixes
- Fix contiguous VRAM mapping for UVD on older GPUs
- Fix some regressions due to drm scheduler changes
- Workload profile fixes
- Cleaner shader fix

amdkfd:
- Fix DMA map direction for migration
- Fix a potential null pointer dereference
- Cacheline size fixes
- Runtime PM fix"

* tag 'drm-fixes-2024-12-14' of https://gitlab.freedesktop.org/drm/kernel:
drm/xe/reg_sr: Remove register pool
drm/xe: Call invalidation_fence_fini for PT inval fences in error state
drm/xe: fix the ERR_PTR() returned on failure to allocate tiny pt
drm/amdkfd: pause autosuspend when creating pdd
drm/amdgpu: fix when the cleaner shader is emitted
drm/amdgpu: Fix ISP HW init issue
drm/amdkfd: hard-code MALL cacheline size for gfx11, gfx12
drm/amdkfd: hard-code cacheline size for gfx11
drm/amdkfd: Dereference null return value
drm/i915: Fix memory leak by correcting cache object name in error handler
drm/i915: Fix NULL pointer dereference in capture_engine
drm/i915/color: Stop using non-posted DSB writes for legacy LUT
drm/i915/dsb: Don't use indexed register writes needlessly
drm/amdkfd: Correct the migration DMA map direction
drm/amd/pm: Set SMU v13.0.7 default workload type
drm/amd/pm: Initialize power profile mode
amdgpu/uvd: get ring reference from rq scheduler
drm/amdgpu: fix UVD contiguous CS mapping problem
drm/amdgpu: use sjt mec fw on gfx943 for sriov
Revert "drm/amdgpu: Fix ISP hw init issue"

+200 -125
+11 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 1801 1801 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) 1802 1802 return -EINVAL; 1803 1803 1804 + /* Make sure VRAM is allocated contigiously */ 1804 1805 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1805 - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); 1806 - for (i = 0; i < (*bo)->placement.num_placement; i++) 1807 - (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; 1808 - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); 1809 - if (r) 1810 - return r; 1806 + if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM && 1807 + !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { 1808 + 1809 + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); 1810 + for (i = 0; i < (*bo)->placement.num_placement; i++) 1811 + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; 1812 + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); 1813 + if (r) 1814 + return r; 1815 + } 1811 1816 1812 1817 return amdgpu_ttm_alloc_gart(&(*bo)->tbo); 1813 1818 }
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 145 145 "LAST", 146 146 }; 147 147 148 - #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0) 148 + #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0) 149 149 /* 150 150 * Default init level where all blocks are expected to be initialized. This is 151 151 * the level of initialization expected by default and also after a full reset
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
··· 551 551 for (i = 0; i < abo->placement.num_placement; ++i) { 552 552 abo->placements[i].fpfn = 0 >> PAGE_SHIFT; 553 553 abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 554 + if (abo->placements[i].mem_type == TTM_PL_VRAM) 555 + abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; 554 556 } 555 557 } 556 558
+7 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 674 674 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && 675 675 ring->funcs->emit_wreg; 676 676 677 - if (adev->gfx.enable_cleaner_shader && 678 - ring->funcs->emit_cleaner_shader && 679 - job->enforce_isolation) 680 - ring->funcs->emit_cleaner_shader(ring); 681 - 682 - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 677 + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && 678 + !(job->enforce_isolation && !job->vmid)) 683 679 return 0; 684 680 685 681 amdgpu_ring_ib_begin(ring); ··· 685 689 686 690 if (need_pipe_sync) 687 691 amdgpu_ring_emit_pipeline_sync(ring); 692 + 693 + if (adev->gfx.enable_cleaner_shader && 694 + ring->funcs->emit_cleaner_shader && 695 + job->enforce_isolation) 696 + ring->funcs->emit_cleaner_shader(ring); 688 697 689 698 if (vm_flush_needed) { 690 699 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
+8 -2
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
··· 45 45 MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin"); 46 46 MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); 47 47 MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); 48 + MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin"); 49 + MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin"); 48 50 49 51 #define GFX9_MEC_HPD_SIZE 4096 50 52 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L ··· 576 574 { 577 575 int err; 578 576 579 - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 580 - "amdgpu/%s_mec.bin", chip_name); 577 + if (amdgpu_sriov_vf(adev)) 578 + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 579 + "amdgpu/%s_sjt_mec.bin", chip_name); 580 + else 581 + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 582 + "amdgpu/%s_mec.bin", chip_name); 581 583 if (err) 582 584 goto out; 583 585 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+1 -1
drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
··· 1288 1288 struct amdgpu_job *job, 1289 1289 struct amdgpu_ib *ib) 1290 1290 { 1291 - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); 1291 + struct amdgpu_ring *ring = amdgpu_job_ring(job); 1292 1292 unsigned i; 1293 1293 1294 1294 /* No patching necessary for the first instance */
+21 -3
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
··· 1423 1423 1424 1424 1425 1425 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, 1426 + bool cache_line_size_missing, 1426 1427 struct kfd_gpu_cache_info *pcache_info) 1427 1428 { 1428 1429 struct amdgpu_device *adev = kdev->adev; ··· 1438 1437 CRAT_CACHE_FLAGS_SIMD_CACHE); 1439 1438 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; 1440 1439 pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; 1440 + if (cache_line_size_missing && !pcache_info[i].cache_line_size) 1441 + pcache_info[i].cache_line_size = 128; 1441 1442 i++; 1442 1443 } 1443 1444 /* Scalar L1 Instruction Cache per SQC */ ··· 1452 1449 CRAT_CACHE_FLAGS_SIMD_CACHE); 1453 1450 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 1454 1451 pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; 1452 + if (cache_line_size_missing && !pcache_info[i].cache_line_size) 1453 + pcache_info[i].cache_line_size = 128; 1455 1454 i++; 1456 1455 } 1457 1456 /* Scalar L1 Data Cache per SQC */ ··· 1465 1460 CRAT_CACHE_FLAGS_SIMD_CACHE); 1466 1461 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 1467 1462 pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; 1463 + if (cache_line_size_missing && !pcache_info[i].cache_line_size) 1464 + pcache_info[i].cache_line_size = 64; 1468 1465 i++; 1469 1466 } 1470 1467 /* GL1 Data Cache per SA */ ··· 1479 1472 CRAT_CACHE_FLAGS_DATA_CACHE | 1480 1473 CRAT_CACHE_FLAGS_SIMD_CACHE); 1481 1474 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1482 - pcache_info[i].cache_line_size = 0; 1475 + if (cache_line_size_missing) 1476 + pcache_info[i].cache_line_size = 128; 1483 1477 i++; 1484 1478 } 1485 1479 /* L2 Data Cache per GPU (Total Tex Cache) */ ··· 1492 1484 CRAT_CACHE_FLAGS_SIMD_CACHE); 1493 1485 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1494 1486 pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; 1487 + if (cache_line_size_missing && !pcache_info[i].cache_line_size) 1488 + pcache_info[i].cache_line_size = 128; 1495 1489 i++; 1496 1490 } 1497 1491 /* L3 Data Cache per GPU */ ··· 1504 1494 CRAT_CACHE_FLAGS_DATA_CACHE | 1505 1495 CRAT_CACHE_FLAGS_SIMD_CACHE); 1506 1496 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1507 - pcache_info[i].cache_line_size = 0; 1497 + pcache_info[i].cache_line_size = 64; 1508 1498 i++; 1509 1499 } 1510 1500 return i; ··· 1579 1569 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) 1580 1570 { 1581 1571 int num_of_cache_types = 0; 1572 + bool cache_line_size_missing = false; 1582 1573 1583 1574 switch (kdev->adev->asic_type) { 1584 1575 case CHIP_KAVERI: ··· 1703 1692 case IP_VERSION(11, 5, 0): 1704 1693 case IP_VERSION(11, 5, 1): 1705 1694 case IP_VERSION(11, 5, 2): 1695 + /* Cacheline size not available in IP discovery for gc11. 1696 + * kfd_fill_gpu_cache_info_from_gfx_config to hard code it 1697 + */ 1698 + cache_line_size_missing = true; 1699 + fallthrough; 1706 1700 case IP_VERSION(12, 0, 0): 1707 1701 case IP_VERSION(12, 0, 1): 1708 1702 num_of_cache_types = 1709 - kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); 1703 + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, 1704 + cache_line_size_missing, 1705 + *pcache_info); 1710 1706 break; 1711 1707 default: 1712 1708 *pcache_info = dummy_cache_info;
+15
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 207 207 if (!down_read_trylock(&adev->reset_domain->sem)) 208 208 return -EIO; 209 209 210 + if (!pdd->proc_ctx_cpu_ptr) { 211 + r = amdgpu_amdkfd_alloc_gtt_mem(adev, 212 + AMDGPU_MES_PROC_CTX_SIZE, 213 + &pdd->proc_ctx_bo, 214 + &pdd->proc_ctx_gpu_addr, 215 + &pdd->proc_ctx_cpu_ptr, 216 + false); 217 + if (r) { 218 + dev_err(adev->dev, 219 + "failed to allocate process context bo\n"); 220 + return r; 221 + } 222 + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); 223 + } 224 + 210 225 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 211 226 queue_input.process_id = qpd->pqm->process->pasid; 212 227 queue_input.page_table_base_addr = qpd->page_table_base;
+2 -2
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
··· 306 306 spage = migrate_pfn_to_page(migrate->src[i]); 307 307 if (spage && !is_zone_device_page(spage)) { 308 308 src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, 309 - DMA_TO_DEVICE); 309 + DMA_BIDIRECTIONAL); 310 310 r = dma_mapping_error(dev, src[i]); 311 311 if (r) { 312 312 dev_err(dev, "%s: fail %d dma_map_page\n", ··· 629 629 goto out_oom; 630 630 } 631 631 632 - dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); 632 + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); 633 633 r = dma_mapping_error(dev, dst[i]); 634 634 if (r) { 635 635 dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
+2 -21
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 1076 1076 1077 1077 kfd_free_process_doorbells(pdd->dev->kfd, pdd); 1078 1078 1079 - if (pdd->dev->kfd->shared_resources.enable_mes) 1079 + if (pdd->dev->kfd->shared_resources.enable_mes && 1080 + pdd->proc_ctx_cpu_ptr) 1080 1081 amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, 1081 1082 &pdd->proc_ctx_bo); 1082 1083 /* ··· 1609 1608 struct kfd_process *p) 1610 1609 { 1611 1610 struct kfd_process_device *pdd = NULL; 1612 - int retval = 0; 1613 1611 1614 1612 if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) 1615 1613 return NULL; ··· 1632 1632 pdd->user_gpu_id = dev->id; 1633 1633 atomic64_set(&pdd->evict_duration_counter, 0); 1634 1634 1635 - if (dev->kfd->shared_resources.enable_mes) { 1636 - retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, 1637 - AMDGPU_MES_PROC_CTX_SIZE, 1638 - &pdd->proc_ctx_bo, 1639 - &pdd->proc_ctx_gpu_addr, 1640 - &pdd->proc_ctx_cpu_ptr, 1641 - false); 1642 - if (retval) { 1643 - dev_err(dev->adev->dev, 1644 - "failed to allocate process context bo\n"); 1645 - goto err_free_pdd; 1646 - } 1647 - memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); 1648 - } 1649 - 1650 1635 p->pdds[p->n_pdds++] = pdd; 1651 1636 if (kfd_dbg_is_per_vmid_supported(pdd->dev)) 1652 1637 pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( ··· 1643 1658 idr_init(&pdd->alloc_idr); 1644 1659 1645 1660 return pdd; 1646 - 1647 - err_free_pdd: 1648 - kfree(pdd); 1649 - return NULL; 1650 1661 } 1651 1662 1652 1663 /**
+8 -4
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 212 212 void pqm_uninit(struct process_queue_manager *pqm) 213 213 { 214 214 struct process_queue_node *pqn, *next; 215 - struct kfd_process_device *pdd; 216 215 217 216 list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { 218 217 if (pqn->q) { 219 - pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); 220 - kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); 221 - kfd_queue_release_buffers(pdd, &pqn->q->properties); 218 + struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device, 219 + pqm->process); 220 + if (pdd) { 221 + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); 222 + kfd_queue_release_buffers(pdd, &pqn->q->properties); 223 + } else { 224 + WARN_ON(!pdd); 225 + } 222 226 pqm_clean_queue_resource(pqm, pqn); 223 227 } 224 228
+1
drivers/gpu/drm/amd/include/kgd_pp_interface.h
··· 164 164 }; 165 165 166 166 enum PP_SMC_POWER_PROFILE { 167 + PP_SMC_POWER_PROFILE_UNKNOWN = -1, 167 168 PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0, 168 169 PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1, 169 170 PP_SMC_POWER_PROFILE_POWERSAVING = 0x2,
+17 -7
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
··· 764 764 smu->smu_baco.platform_support = false; 765 765 smu->smu_baco.maco_support = false; 766 766 smu->user_dpm_profile.fan_mode = -1; 767 + smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN; 767 768 768 769 mutex_init(&smu->message_lock); 769 770 ··· 1249 1248 return smu->workload_map && smu->workload_map[profile].valid_mapping; 1250 1249 } 1251 1250 1251 + static void smu_init_power_profile(struct smu_context *smu) 1252 + { 1253 + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) { 1254 + if (smu->is_apu || 1255 + !smu_is_workload_profile_available( 1256 + smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) 1257 + smu->power_profile_mode = 1258 + PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; 1259 + else 1260 + smu->power_profile_mode = 1261 + PP_SMC_POWER_PROFILE_FULLSCREEN3D; 1262 + } 1263 + smu_power_profile_mode_get(smu, smu->power_profile_mode); 1264 + } 1265 + 1252 1266 static int smu_sw_init(struct amdgpu_ip_block *ip_block) 1253 1267 { 1254 1268 struct amdgpu_device *adev = ip_block->adev; ··· 1285 1269 atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); 1286 1270 atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); 1287 1271 1288 - if (smu->is_apu || 1289 - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) 1290 - smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; 1291 - else 1292 - smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; 1293 - smu_power_profile_mode_get(smu, smu->power_profile_mode); 1294 - 1272 + smu_init_power_profile(smu); 1295 1273 smu->display_config = &adev->pm.pm_display_cfg; 1296 1274 1297 1275 smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
+1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
··· 2810 2810 smu->workload_map = smu_v13_0_7_workload_map; 2811 2811 smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION; 2812 2812 smu_v13_0_set_smu_mailbox_registers(smu); 2813 + smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; 2813 2814 }
+51 -30
drivers/gpu/drm/i915/display/intel_color.c
··· 1343 1343 intel_de_write_fw(display, reg, val); 1344 1344 } 1345 1345 1346 + static void ilk_lut_write_indexed(const struct intel_crtc_state *crtc_state, 1347 + i915_reg_t reg, u32 val) 1348 + { 1349 + struct intel_display *display = to_intel_display(crtc_state); 1350 + 1351 + if (crtc_state->dsb_color_vblank) 1352 + intel_dsb_reg_write_indexed(crtc_state->dsb_color_vblank, reg, val); 1353 + else 1354 + intel_de_write_fw(display, reg, val); 1355 + } 1356 + 1346 1357 static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state, 1347 1358 const struct drm_property_blob *blob) 1348 1359 { ··· 1368 1357 lut = blob->data; 1369 1358 1370 1359 /* 1371 - * DSB fails to correctly load the legacy LUT 1372 - * unless we either write each entry twice, 1373 - * or use non-posted writes 1360 + * DSB fails to correctly load the legacy LUT unless 1361 + * we either write each entry twice when using posted 1362 + * writes, or we use non-posted writes. 1363 + * 1364 + * If palette anti-collision is active during LUT 1365 + * register writes: 1366 + * - posted writes simply get dropped and thus the LUT 1367 + * contents may not be correctly updated 1368 + * - non-posted writes are blocked and thus the LUT 1369 + * contents are always correct, but simultaneous CPU 1370 + * MMIO access will start to fail 1371 + * 1372 + * Choose the lesser of two evils and use posted writes. 1373 + * Using posted writes is also faster, even when having 1374 + * to write each register twice. 1374 1375 */ 1375 - if (crtc_state->dsb_color_vblank) 1376 - intel_dsb_nonpost_start(crtc_state->dsb_color_vblank); 1377 - 1378 - for (i = 0; i < 256; i++) 1376 + for (i = 0; i < 256; i++) { 1379 1377 ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), 1380 1378 i9xx_lut_8(&lut[i])); 1381 - 1382 - if (crtc_state->dsb_color_vblank) 1383 - intel_dsb_nonpost_end(crtc_state->dsb_color_vblank); 1379 + if (crtc_state->dsb_color_vblank) 1380 + ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), 1381 + i9xx_lut_8(&lut[i])); 1382 + } 1384 1383 } 1385 1384 1386 1385 static void ilk_load_lut_10(const struct intel_crtc_state *crtc_state, ··· 1479 1458 prec_index); 1480 1459 1481 1460 for (i = 0; i < lut_size; i++) 1482 - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), 1483 - ilk_lut_10(&lut[i])); 1461 + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), 1462 + ilk_lut_10(&lut[i])); 1484 1463 1485 1464 /* 1486 1465 * Reset the index, otherwise it prevents the legacy palette to be ··· 1633 1612 * ToDo: Extend to max 7.0. Enable 32 bit input value 1634 1613 * as compared to just 16 to achieve this. 1635 1614 */ 1636 - ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), 1637 - DISPLAY_VER(display) >= 14 ? 1638 - mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i])); 1615 + ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe), 1616 + DISPLAY_VER(display) >= 14 ? 1617 + mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i])); 1639 1618 } 1640 1619 1641 1620 /* Clamp values > 1.0. */ 1642 1621 while (i++ < glk_degamma_lut_size(display)) 1643 - ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), 1644 - DISPLAY_VER(display) >= 14 ? 1645 - 1 << 24 : 1 << 16); 1622 + ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe), 1623 + DISPLAY_VER(display) >= 14 ? 1624 + 1 << 24 : 1 << 16); 1646 1625 1647 1626 ilk_lut_write(crtc_state, PRE_CSC_GAMC_INDEX(pipe), 0); 1648 1627 } ··· 1708 1687 for (i = 0; i < 9; i++) { 1709 1688 const struct drm_color_lut *entry = &lut[i]; 1710 1689 1711 - ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), 1712 - ilk_lut_12p4_ldw(entry)); 1713 - ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), 1714 - ilk_lut_12p4_udw(entry)); 1690 + ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), 1691 + ilk_lut_12p4_ldw(entry)); 1692 + ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), 1693 + ilk_lut_12p4_udw(entry)); 1715 1694 } 1716 1695 1717 1696 ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_INDEX(pipe), ··· 1747 1726 for (i = 1; i < 257; i++) { 1748 1727 entry = &lut[i * 8]; 1749 1728 1750 - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), 1751 - ilk_lut_12p4_ldw(entry)); 1752 - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), 1753 - ilk_lut_12p4_udw(entry)); 1729 + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), 1730 + ilk_lut_12p4_ldw(entry)); 1731 + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), 1732 + ilk_lut_12p4_udw(entry)); 1754 1733 } 1755 1734 1756 1735 /* ··· 1768 1747 for (i = 0; i < 256; i++) { 1769 1748 entry = &lut[i * 8 * 128]; 1770 1749 1771 - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), 1772 - ilk_lut_12p4_ldw(entry)); 1773 - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), 1774 - ilk_lut_12p4_udw(entry)); 1750 + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), 1751 + ilk_lut_12p4_ldw(entry)); 1752 + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), 1753 + ilk_lut_12p4_udw(entry)); 1775 1754 } 1776 1755 1777 1756 ilk_lut_write(crtc_state, PREC_PAL_INDEX(pipe),
+16 -3
drivers/gpu/drm/i915/display/intel_dsb.c
··· 273 273 } 274 274 275 275 /** 276 - * intel_dsb_reg_write() - Emit register wriite to the DSB context 276 + * intel_dsb_reg_write_indexed() - Emit register wriite to the DSB context 277 277 * @dsb: DSB context 278 278 * @reg: register address. 279 279 * @val: value. 280 280 * 281 281 * This function is used for writing register-value pair in command 282 282 * buffer of DSB. 283 + * 284 + * Note that indexed writes are slower than normal MMIO writes 285 + * for a small number (less than 5 or so) of writes to the same 286 + * register. 283 287 */ 284 - void intel_dsb_reg_write(struct intel_dsb *dsb, 285 - i915_reg_t reg, u32 val) 288 + void intel_dsb_reg_write_indexed(struct intel_dsb *dsb, 289 + i915_reg_t reg, u32 val) 286 290 { 287 291 /* 288 292 * For example the buffer will look like below for 3 dwords for auto ··· 342 338 if (dsb->free_pos & 0x1) 343 339 intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos, 0); 344 340 } 341 + } 342 + 343 + void intel_dsb_reg_write(struct intel_dsb *dsb, 344 + i915_reg_t reg, u32 val) 345 + { 346 + intel_dsb_emit(dsb, val, 347 + (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) | 348 + (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | 349 + i915_mmio_reg_offset(reg)); 345 350 } 346 351 347 352 static u32 intel_dsb_mask_to_byte_en(u32 mask)
+2
drivers/gpu/drm/i915/display/intel_dsb.h
··· 34 34 void intel_dsb_cleanup(struct intel_dsb *dsb); 35 35 void intel_dsb_reg_write(struct intel_dsb *dsb, 36 36 i915_reg_t reg, u32 val); 37 + void intel_dsb_reg_write_indexed(struct intel_dsb *dsb, 38 + i915_reg_t reg, u32 val); 37 39 void intel_dsb_reg_write_masked(struct intel_dsb *dsb, 38 40 i915_reg_t reg, u32 mask, u32 val); 39 41 void intel_dsb_noop(struct intel_dsb *dsb, int count);
+15 -3
drivers/gpu/drm/i915/i915_gpu_error.c
··· 1643 1643 return NULL; 1644 1644 1645 1645 intel_engine_get_hung_entity(engine, &ce, &rq); 1646 - if (rq && !i915_request_started(rq)) 1647 - drm_info(&engine->gt->i915->drm, "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n", 1648 - engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id); 1646 + if (rq && !i915_request_started(rq)) { 1647 + /* 1648 + * We want to know also what is the guc_id of the context, 1649 + * but if we don't have the context reference, then skip 1650 + * printing it. 1651 + */ 1652 + if (ce) 1653 + drm_info(&engine->gt->i915->drm, 1654 + "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n", 1655 + engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id); 1656 + else 1657 + drm_info(&engine->gt->i915->drm, 1658 + "Got hung context on %s with active request %lld:%lld not yet started\n", 1659 + engine->name, rq->fence.context, rq->fence.seqno); 1660 + } 1649 1661 1650 1662 if (rq) { 1651 1663 capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
+1 -1
drivers/gpu/drm/i915/i915_scheduler.c
··· 506 506 return 0; 507 507 508 508 err_priorities: 509 - kmem_cache_destroy(slab_priorities); 509 + kmem_cache_destroy(slab_dependencies); 510 510 return -ENOMEM; 511 511 }
+2 -2
drivers/gpu/drm/xe/tests/xe_migrate.c
··· 224 224 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 225 225 XE_BO_FLAG_PINNED); 226 226 if (IS_ERR(tiny)) { 227 - KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", 228 - PTR_ERR(pt)); 227 + KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", 228 + PTR_ERR(tiny)); 229 229 goto free_pt; 230 230 } 231 231
+8
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
··· 65 65 __invalidation_fence_signal(xe, fence); 66 66 } 67 67 68 + void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) 69 + { 70 + if (WARN_ON_ONCE(!fence->gt)) 71 + return; 72 + 73 + __invalidation_fence_signal(gt_to_xe(fence->gt), fence); 74 + } 75 + 68 76 static void xe_gt_tlb_fence_timeout(struct work_struct *work) 69 77 { 70 78 struct xe_gt *gt = container_of(work, struct xe_gt,
+1
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
··· 28 28 void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, 29 29 struct xe_gt_tlb_invalidation_fence *fence, 30 30 bool stack); 31 + void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence); 31 32 32 33 static inline void 33 34 xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
+1 -2
drivers/gpu/drm/xe/xe_pt.c
··· 1333 1333 queue_work(system_wq, &ifence->work); 1334 1334 } else { 1335 1335 ifence->base.base.error = ifence->fence->error; 1336 - dma_fence_signal(&ifence->base.base); 1337 - dma_fence_put(&ifence->base.base); 1336 + xe_gt_tlb_invalidation_fence_signal(&ifence->base); 1338 1337 } 1339 1338 dma_fence_put(ifence->fence); 1340 1339 }
+6 -25
drivers/gpu/drm/xe/xe_reg_sr.c
··· 27 27 #include "xe_reg_whitelist.h" 28 28 #include "xe_rtp_types.h" 29 29 30 - #define XE_REG_SR_GROW_STEP_DEFAULT 16 31 - 32 30 static void reg_sr_fini(struct drm_device *drm, void *arg) 33 31 { 34 32 struct xe_reg_sr *sr = arg; 33 + struct xe_reg_sr_entry *entry; 34 + unsigned long reg; 35 + 36 + xa_for_each(&sr->xa, reg, entry) 37 + kfree(entry); 35 38 36 39 xa_destroy(&sr->xa); 37 - kfree(sr->pool.arr); 38 - memset(&sr->pool, 0, sizeof(sr->pool)); 39 40 } 40 41 41 42 int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) 42 43 { 43 44 xa_init(&sr->xa); 44 - memset(&sr->pool, 0, sizeof(sr->pool)); 45 - sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; 46 45 sr->name = name; 47 46 48 47 return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); 49 48 } 50 49 EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init); 51 - 52 - static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) 53 - { 54 - if (sr->pool.used == sr->pool.allocated) { 55 - struct xe_reg_sr_entry *arr; 56 - 57 - arr = krealloc_array(sr->pool.arr, 58 - ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), 59 - sizeof(*arr), GFP_KERNEL); 60 - if (!arr) 61 - return NULL; 62 - 63 - sr->pool.arr = arr; 64 - sr->pool.allocated += sr->pool.grow_step; 65 - } 66 - 67 - return &sr->pool.arr[sr->pool.used++]; 68 - } 69 50 70 51 static bool compatible_entries(const struct xe_reg_sr_entry *e1, 71 52 const struct xe_reg_sr_entry *e2) ··· 93 112 return 0; 94 113 } 95 114 96 - pentry = alloc_entry(sr); 115 + pentry = kmalloc(sizeof(*pentry), GFP_KERNEL); 97 116 if (!pentry) { 98 117 ret = -ENOMEM; 99 118 goto fail;
-6
drivers/gpu/drm/xe/xe_reg_sr_types.h
··· 20 20 }; 21 21 22 22 struct xe_reg_sr { 23 - struct { 24 - struct xe_reg_sr_entry *arr; 25 - unsigned int used; 26 - unsigned int allocated; 27 - unsigned int grow_step; 28 - } pool; 29 23 struct xarray xa; 30 24 const char *name; 31 25