drm/msm/adreno: Introduce A8x GPU Support

+1

drivers/gpu/drm/msm/Makefile

··· 24 24 adreno/a6xx_gmu.o \ 25 25 adreno/a6xx_hfi.o \ 26 26 adreno/a6xx_preempt.o \ 27 + adreno/a8xx_gpu.o \ 27 28 28 29 adreno-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o \ 29 30

+3

drivers/gpu/drm/msm/adreno/a6xx_gmu.c

··· 1174 1174 return ret; 1175 1175 } 1176 1176 1177 + /* Read the slice info on A8x GPUs */ 1178 + a8xx_gpu_get_slice_info(gpu); 1179 + 1177 1180 /* Set the bus quota to a reasonable value for boot */ 1178 1181 a6xx_gmu_set_initial_bw(gpu, gmu); 1179 1182

+85 -33

drivers/gpu/drm/msm/adreno/a6xx_gpu.c

··· 157 157 } 158 158 } 159 159 160 - static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 160 + void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 161 161 { 162 162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 163 163 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); ··· 245 245 } 246 246 247 247 if (!sysprof) { 248 - if (!adreno_is_a7xx(adreno_gpu)) { 248 + if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { 249 249 /* Turn off protected mode to write to special registers */ 250 250 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 251 251 OUT_RING(ring, 0); 252 252 } 253 253 254 - OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 255 - OUT_RING(ring, 1); 254 + if (adreno_is_a8xx(adreno_gpu)) { 255 + OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 256 + OUT_RING(ring, 1); 257 + OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1); 258 + OUT_RING(ring, 1); 259 + } else { 260 + OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 261 + OUT_RING(ring, 1); 262 + } 256 263 } 257 264 258 265 /* Execute the table update */ ··· 288 281 * to make sure BV doesn't race ahead while BR is still switching 289 282 * pagetables. 290 283 */ 291 - if (adreno_is_a7xx(&a6xx_gpu->base)) { 284 + if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) { 292 285 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 293 286 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 294 287 } ··· 302 295 OUT_RING(ring, CACHE_INVALIDATE); 303 296 304 297 if (!sysprof) { 298 + u32 reg_status = adreno_is_a8xx(adreno_gpu) ? 299 + REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS : 300 + REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS; 305 301 /* 306 302 * Wait for SRAM clear after the pgtable update, so the 307 303 * two can happen in parallel: 308 304 */ 309 305 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 310 306 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); 311 - OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO( 312 - REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); 307 + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status)); 313 308 OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); 314 309 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); 315 310 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); 316 311 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); 317 312 318 - if (!adreno_is_a7xx(adreno_gpu)) { 313 + if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { 319 314 /* Re-enable protected mode: */ 320 315 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 321 316 OUT_RING(ring, 1); ··· 455 446 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 456 447 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 457 448 struct msm_ringbuffer *ring = submit->ring; 449 + u32 rbbm_perfctr_cp0, cp_always_on_counter; 458 450 unsigned int i, ibs = 0; 459 451 460 452 adreno_check_and_reenable_stall(adreno_gpu); ··· 476 466 if (gpu->nr_rings > 1) 477 467 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); 478 468 479 - get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), 480 - rbmemptr_stats(ring, index, cpcycles_start)); 481 - get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 482 - rbmemptr_stats(ring, index, alwayson_start)); 469 + if (adreno_is_a8xx(adreno_gpu)) { 470 + rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0); 471 + cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER; 472 + } else { 473 + rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0); 474 + cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER; 475 + } 476 + 477 + get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start)); 478 + get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_start)); 483 479 484 480 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 485 481 OUT_RING(ring, CP_SET_THREAD_BOTH); ··· 532 516 OUT_RING(ring, 0x00e); /* IB1LIST end */ 533 517 } 534 518 535 - get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), 536 - rbmemptr_stats(ring, index, cpcycles_end)); 537 - get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 538 - rbmemptr_stats(ring, index, alwayson_end)); 519 + get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end)); 520 + get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_end)); 539 521 540 522 /* Write the fence to the scratch register */ 541 - OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); 542 - OUT_RING(ring, submit->seqno); 523 + if (adreno_is_a8xx(adreno_gpu)) { 524 + OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1); 525 + OUT_RING(ring, submit->seqno); 526 + } else { 527 + OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); 528 + OUT_RING(ring, submit->seqno); 529 + } 543 530 544 531 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 545 532 OUT_RING(ring, CP_SET_THREAD_BR); ··· 742 723 /* Copy the data into the internal struct to drop the const qualifier (temporarily) */ 743 724 *cfg = *common_cfg; 744 725 745 - cfg->ubwc_swizzle = 0x6; 746 - cfg->highest_bank_bit = 15; 726 + /* Use common config as is for A8x */ 727 + if (!adreno_is_a8xx(gpu)) { 728 + cfg->ubwc_swizzle = 0x6; 729 + cfg->highest_bank_bit = 15; 730 + } 747 731 748 732 if (adreno_is_a610(gpu)) { 749 733 cfg->highest_bank_bit = 13; ··· 1035 1013 return false; 1036 1014 1037 1015 /* A7xx is safe! */ 1038 - if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu)) 1016 + if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) 1039 1017 return true; 1040 1018 1041 1019 /* ··· 1149 1127 return 0; 1150 1128 } 1151 1129 1152 - static int a6xx_zap_shader_init(struct msm_gpu *gpu) 1130 + int a6xx_zap_shader_init(struct msm_gpu *gpu) 1153 1131 { 1154 1132 static bool loaded; 1155 1133 int ret; ··· 2111 2089 u32 fuse_val; 2112 2090 int ret; 2113 2091 2114 - if (adreno_is_a750(adreno_gpu)) { 2092 + if (adreno_is_a750(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { 2115 2093 /* 2116 2094 * Assume that if qcom scm isn't available, that whatever 2117 2095 * replacement allows writing the fuse register ourselves. ··· 2137 2115 return ret; 2138 2116 2139 2117 /* 2140 - * On a750 raytracing may be disabled by the firmware, find out 2141 - * whether that's the case. The scm call above sets the fuse 2142 - * register. 2118 + * On A7XX_GEN3 and newer, raytracing may be disabled by the 2119 + * firmware, find out whether that's the case. The scm call 2120 + * above sets the fuse register. 2143 2121 */ 2144 2122 fuse_val = a6xx_llc_read(a6xx_gpu, 2145 2123 REG_A7XX_CX_MISC_SW_FUSE_VALUE); ··· 2200 2178 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert) 2201 2179 { 2202 2180 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */ 2203 - if (adreno_is_a610(to_adreno_gpu(gpu))) 2181 + if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu))) 2204 2182 return; 2205 2183 2206 2184 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); ··· 2231 2209 2232 2210 msm_devfreq_resume(gpu); 2233 2211 2234 - adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu); 2212 + if (adreno_is_a8xx(adreno_gpu)) 2213 + a8xx_llc_activate(a6xx_gpu); 2214 + else if (adreno_is_a7xx(adreno_gpu)) 2215 + a7xx_llc_activate(a6xx_gpu); 2216 + else 2217 + a6xx_llc_activate(a6xx_gpu); 2235 2218 2236 2219 return ret; 2237 2220 } ··· 2616 2589 adreno_gpu->base.hw_apriv = 2617 2590 !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); 2618 2591 2619 - /* gpu->info only gets assigned in adreno_gpu_init() */ 2620 - is_a7xx = config->info->family == ADRENO_7XX_GEN1 || 2621 - config->info->family == ADRENO_7XX_GEN2 || 2622 - config->info->family == ADRENO_7XX_GEN3; 2592 + /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */ 2593 + is_a7xx = config->info->family >= ADRENO_7XX_GEN1; 2623 2594 2624 2595 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); 2625 2596 ··· 2655 2630 return ERR_PTR(ret); 2656 2631 } 2657 2632 2658 - if (adreno_is_a7xx(adreno_gpu)) { 2633 + if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { 2659 2634 ret = a7xx_cx_mem_init(a6xx_gpu); 2660 2635 if (ret) { 2661 2636 a6xx_destroy(&(a6xx_gpu->base.base)); ··· 2778 2753 .get_timestamp = a6xx_gmu_get_timestamp, 2779 2754 .bus_halt = a6xx_bus_clear_pending_transactions, 2780 2755 .mmu_fault_handler = a6xx_fault_handler, 2756 + }; 2757 + 2758 + const struct adreno_gpu_funcs a8xx_gpu_funcs = { 2759 + .base = { 2760 + .get_param = adreno_get_param, 2761 + .set_param = adreno_set_param, 2762 + .hw_init = a8xx_hw_init, 2763 + .ucode_load = a6xx_ucode_load, 2764 + .pm_suspend = a6xx_gmu_pm_suspend, 2765 + .pm_resume = a6xx_gmu_pm_resume, 2766 + .recover = a8xx_recover, 2767 + .submit = a7xx_submit, 2768 + .active_ring = a6xx_active_ring, 2769 + .irq = a8xx_irq, 2770 + .destroy = a6xx_destroy, 2771 + .gpu_busy = a8xx_gpu_busy, 2772 + .gpu_get_freq = a6xx_gmu_get_freq, 2773 + .gpu_set_freq = a6xx_gpu_set_freq, 2774 + .create_vm = a6xx_create_vm, 2775 + .create_private_vm = a6xx_create_private_vm, 2776 + .get_rptr = a6xx_get_rptr, 2777 + .progress = a8xx_progress, 2778 + }, 2779 + .init = a6xx_gpu_init, 2780 + .get_timestamp = a8xx_gmu_get_timestamp, 2781 + .bus_halt = a8xx_bus_clear_pending_transactions, 2782 + .mmu_fault_handler = a8xx_fault_handler, 2781 2783 };

+21

drivers/gpu/drm/msm/adreno/a6xx_gpu.h

··· 46 46 const struct adreno_protect *protect; 47 47 const struct adreno_reglist_list *pwrup_reglist; 48 48 const struct adreno_reglist_list *ifpc_reglist; 49 + const struct adreno_reglist_pipe *nonctxt_reglist; 50 + u32 max_slices; 49 51 u32 gmu_chipid; 50 52 u32 gmu_cgc_mode; 51 53 u32 prim_fifo_threshold; ··· 103 101 void *htw_llc_slice; 104 102 bool have_mmu500; 105 103 bool hung; 104 + 105 + u32 cached_aperture; 106 + spinlock_t aperture_lock; 107 + 108 + u32 slice_mask; 106 109 }; 107 110 108 111 #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) ··· 309 302 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off); 310 303 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert); 311 304 int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b); 305 + void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 306 + int a6xx_zap_shader_init(struct msm_gpu *gpu); 312 307 308 + void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off); 309 + int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data); 310 + void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 311 + int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value); 312 + u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate); 313 + int a8xx_gpu_feature_probe(struct msm_gpu *gpu); 314 + void a8xx_gpu_get_slice_info(struct msm_gpu *gpu); 315 + int a8xx_hw_init(struct msm_gpu *gpu); 316 + irqreturn_t a8xx_irq(struct msm_gpu *gpu); 317 + void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu); 318 + bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 319 + void a8xx_recover(struct msm_gpu *gpu); 313 320 #endif /* __A6XX_GPU_H__ */

+1202

drivers/gpu/drm/msm/adreno/a8xx_gpu.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ 3 + 4 + 5 + #include "msm_gem.h" 6 + #include "msm_mmu.h" 7 + #include "msm_gpu_trace.h" 8 + #include "a6xx_gpu.h" 9 + #include "a6xx_gmu.xml.h" 10 + 11 + #include <linux/bitfield.h> 12 + #include <linux/devfreq.h> 13 + #include <linux/firmware/qcom/qcom_scm.h> 14 + #include <linux/pm_domain.h> 15 + #include <linux/soc/qcom/llcc-qcom.h> 16 + 17 + #define GPU_PAS_ID 13 18 + 19 + static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice) 20 + { 21 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 22 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 23 + u32 val; 24 + 25 + val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice); 26 + 27 + if (a6xx_gpu->cached_aperture == val) 28 + return; 29 + 30 + gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val); 31 + 32 + a6xx_gpu->cached_aperture = val; 33 + } 34 + 35 + static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags) 36 + { 37 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 38 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 39 + 40 + spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags); 41 + 42 + a8xx_aperture_slice_set(gpu, pipe, 0); 43 + } 44 + 45 + static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags) 46 + { 47 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 48 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 49 + 50 + spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 51 + } 52 + 53 + static void a8xx_aperture_clear(struct msm_gpu *gpu) 54 + { 55 + unsigned long flags; 56 + 57 + a8xx_aperture_acquire(gpu, PIPE_NONE, &flags); 58 + a8xx_aperture_release(gpu, flags); 59 + } 60 + 61 + static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data) 62 + { 63 + unsigned long flags; 64 + 65 + a8xx_aperture_acquire(gpu, pipe, &flags); 66 + gpu_write(gpu, offset, data); 67 + a8xx_aperture_release(gpu, flags); 68 + } 69 + 70 + static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset) 71 + { 72 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 73 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 74 + unsigned long flags; 75 + u32 val; 76 + 77 + spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags); 78 + a8xx_aperture_slice_set(gpu, pipe, slice); 79 + val = gpu_read(gpu, offset); 80 + spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); 81 + 82 + return val; 83 + } 84 + 85 + void a8xx_gpu_get_slice_info(struct msm_gpu *gpu) 86 + { 87 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 88 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 89 + const struct a6xx_info *info = adreno_gpu->info->a6xx; 90 + u32 slice_mask; 91 + 92 + if (adreno_gpu->info->family < ADRENO_8XX_GEN1) 93 + return; 94 + 95 + if (a6xx_gpu->slice_mask) 96 + return; 97 + 98 + slice_mask = GENMASK(info->max_slices - 1, 0); 99 + 100 + /* GEN1 doesn't support partial slice configurations */ 101 + if (adreno_gpu->info->family == ADRENO_8XX_GEN1) { 102 + a6xx_gpu->slice_mask = slice_mask; 103 + return; 104 + } 105 + 106 + slice_mask &= a6xx_llc_read(a6xx_gpu, 107 + REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL); 108 + 109 + a6xx_gpu->slice_mask = slice_mask; 110 + 111 + /* Chip ID depends on the number of slices available. So update it */ 112 + adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask)); 113 + } 114 + 115 + static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu) 116 + { 117 + return ffs(a6xx_gpu->slice_mask) - 1; 118 + } 119 + 120 + static inline bool _a8xx_check_idle(struct msm_gpu *gpu) 121 + { 122 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 123 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 124 + 125 + /* Check that the GMU is idle */ 126 + if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) 127 + return false; 128 + 129 + /* Check that the CX master is idle */ 130 + if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) & 131 + ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 132 + return false; 133 + 134 + return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) & 135 + A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 136 + } 137 + 138 + static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 139 + { 140 + /* wait for CP to drain ringbuffer: */ 141 + if (!adreno_idle(gpu, ring)) 142 + return false; 143 + 144 + if (spin_until(_a8xx_check_idle(gpu))) { 145 + DRM_ERROR( 146 + "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 147 + gpu->name, __builtin_return_address(0), 148 + gpu_read(gpu, REG_A8XX_RBBM_STATUS), 149 + gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS), 150 + gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 151 + gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 152 + return false; 153 + } 154 + 155 + return true; 156 + } 157 + 158 + void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 159 + { 160 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 161 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 162 + uint32_t wptr; 163 + unsigned long flags; 164 + 165 + spin_lock_irqsave(&ring->preempt_lock, flags); 166 + 167 + /* Copy the shadow to the actual register */ 168 + ring->cur = ring->next; 169 + 170 + /* Make sure to wrap wptr if we need to */ 171 + wptr = get_wptr(ring); 172 + 173 + /* Update HW if this is the current ring and we are not in preempt*/ 174 + if (!a6xx_in_preempt(a6xx_gpu)) { 175 + if (a6xx_gpu->cur_ring == ring) 176 + gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 177 + else 178 + ring->restore_wptr = true; 179 + } else { 180 + ring->restore_wptr = true; 181 + } 182 + 183 + spin_unlock_irqrestore(&ring->preempt_lock, flags); 184 + } 185 + 186 + static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state) 187 + { 188 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 189 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 190 + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 191 + u32 val; 192 + 193 + gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 194 + state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 195 + gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 196 + state ? 0x110111 : 0); 197 + gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 198 + state ? 0x55555 : 0); 199 + 200 + gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 201 + gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state); 202 + 203 + if (state) { 204 + gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1); 205 + 206 + if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val, 207 + val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 208 + dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 209 + return; 210 + } 211 + 212 + gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 213 + } else { 214 + /* 215 + * GMU enables clk gating in GBIF during boot up. So, 216 + * override that here when hwcg feature is disabled 217 + */ 218 + gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0); 219 + } 220 + } 221 + 222 + static void a8xx_set_cp_protect(struct msm_gpu *gpu) 223 + { 224 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 225 + const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 226 + u32 cntl, final_cfg; 227 + unsigned int i; 228 + 229 + cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN | 230 + A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN | 231 + A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE | 232 + A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK; 233 + /* 234 + * Enable access protection to privileged registers, fault on an access 235 + * protect violation and select the last span to protect from the start 236 + * address all the way to the end of the register address space 237 + */ 238 + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 239 + a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); 240 + 241 + a8xx_aperture_clear(gpu); 242 + 243 + for (i = 0; i < protect->count; i++) { 244 + /* Intentionally skip writing to some registers */ 245 + if (protect->regs[i]) { 246 + gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]); 247 + final_cfg = protect->regs[i]; 248 + } 249 + } 250 + 251 + /* 252 + * Last span feature is only supported on PIPE specific register. 253 + * So update those here 254 + */ 255 + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 256 + a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); 257 + 258 + a8xx_aperture_clear(gpu); 259 + } 260 + 261 + static void a8xx_set_ubwc_config(struct msm_gpu *gpu) 262 + { 263 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 264 + const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; 265 + u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); 266 + u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3); 267 + bool rgba8888_lossless = false, fp16compoptdis = false; 268 + bool yuvnotcomptofc = false, min_acc_len_64b = false; 269 + bool rgb565_predicator = false, amsbc = false; 270 + bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); 271 + u32 ubwc_version = cfg->ubwc_enc_version; 272 + u32 hbb, hbb_hi, hbb_lo, mode = 1; 273 + u8 uavflagprd_inv = 2; 274 + 275 + switch (ubwc_version) { 276 + case UBWC_5_0: 277 + amsbc = true; 278 + rgb565_predicator = true; 279 + mode = 4; 280 + break; 281 + case UBWC_4_0: 282 + amsbc = true; 283 + rgb565_predicator = true; 284 + fp16compoptdis = true; 285 + rgba8888_lossless = true; 286 + mode = 2; 287 + break; 288 + case UBWC_3_0: 289 + amsbc = true; 290 + mode = 1; 291 + break; 292 + default: 293 + dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); 294 + break; 295 + } 296 + 297 + /* 298 + * We subtract 13 from the highest bank bit (13 is the minimum value 299 + * allowed by hw) and write the lowest two bits of the remaining value 300 + * as hbb_lo and the one above it as hbb_hi to the hardware. 301 + */ 302 + WARN_ON(cfg->highest_bank_bit < 13); 303 + hbb = cfg->highest_bank_bit - 13; 304 + hbb_hi = hbb >> 2; 305 + hbb_lo = hbb & 3; 306 + a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); 307 + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); 308 + 309 + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, 310 + yuvnotcomptofc << 6 | 311 + hbb_hi << 3 | 312 + hbb_lo << 1); 313 + 314 + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL, 315 + mode << 15 | 316 + yuvnotcomptofc << 6 | 317 + rgba8888_lossless << 4 | 318 + fp16compoptdis << 3 | 319 + rgb565_predicator << 2 | 320 + amsbc << 1 | 321 + min_acc_len_64b); 322 + 323 + a8xx_aperture_clear(gpu); 324 + 325 + gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 326 + level3_swizzling_dis << 13 | 327 + level2_swizzling_dis << 12 | 328 + hbb_hi << 10 | 329 + uavflagprd_inv << 4 | 330 + min_acc_len_64b << 3 | 331 + hbb_lo << 1 | ubwc_mode); 332 + 333 + gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 334 + level3_swizzling_dis << 7 | 335 + level2_swizzling_dis << 6 | 336 + hbb_hi << 4 | 337 + min_acc_len_64b << 3 | 338 + hbb_lo << 1 | ubwc_mode); 339 + } 340 + 341 + static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect) 342 + { 343 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 344 + const struct a6xx_info *info = adreno_gpu->info->a6xx; 345 + const struct adreno_reglist_pipe *regs = info->nonctxt_reglist; 346 + unsigned int pipe_id, i; 347 + unsigned long flags; 348 + 349 + for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 350 + /* We don't have support for LPAC yet */ 351 + if (pipe_id == PIPE_LPAC) 352 + continue; 353 + 354 + a8xx_aperture_acquire(gpu, pipe_id, &flags); 355 + 356 + for (i = 0; regs[i].offset; i++) { 357 + if (!(BIT(pipe_id) & regs[i].pipe)) 358 + continue; 359 + 360 + if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT) 361 + *gmem_protect = regs[i].value; 362 + 363 + gpu_write(gpu, regs[i].offset, regs[i].value); 364 + } 365 + 366 + a8xx_aperture_release(gpu, flags); 367 + } 368 + 369 + a8xx_aperture_clear(gpu); 370 + } 371 + 372 + static int a8xx_cp_init(struct msm_gpu *gpu) 373 + { 374 + struct msm_ringbuffer *ring = gpu->rb[0]; 375 + u32 mask; 376 + 377 + /* Disable concurrent binning before sending CP init */ 378 + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 379 + OUT_RING(ring, BIT(27)); 380 + 381 + OUT_PKT7(ring, CP_ME_INIT, 4); 382 + 383 + /* Use multiple HW contexts */ 384 + mask = BIT(0); 385 + 386 + /* Enable error detection */ 387 + mask |= BIT(1); 388 + 389 + /* Set default reset state */ 390 + mask |= BIT(3); 391 + 392 + /* Disable save/restore of performance counters across preemption */ 393 + mask |= BIT(6); 394 + 395 + OUT_RING(ring, mask); 396 + 397 + /* Enable multiple hardware contexts */ 398 + OUT_RING(ring, 0x00000003); 399 + 400 + /* Enable error detection */ 401 + OUT_RING(ring, 0x20000000); 402 + 403 + /* Operation mode mask */ 404 + OUT_RING(ring, 0x00000002); 405 + 406 + a6xx_flush(gpu, ring); 407 + return a8xx_idle(gpu, ring) ? 0 : -EINVAL; 408 + } 409 + 410 + #define A8XX_INT_MASK \ 411 + (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 412 + A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 413 + A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 414 + A6XX_RBBM_INT_0_MASK_CP_SW | \ 415 + A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 416 + A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 417 + A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 418 + A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 419 + A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 420 + A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 421 + A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 422 + A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 423 + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 424 + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 425 + 426 + #define A8XX_APRIV_MASK \ 427 + (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \ 428 + A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \ 429 + A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \ 430 + A8XX_CP_APRIV_CNTL_PIPE_RBRPWB) 431 + 432 + #define A8XX_BR_APRIV_MASK \ 433 + (A8XX_APRIV_MASK | \ 434 + A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \ 435 + A8XX_CP_APRIV_CNTL_PIPE_CDWRITE) 436 + 437 + #define A8XX_CP_GLOBAL_INT_MASK \ 438 + (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \ 439 + A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \ 440 + A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \ 441 + A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \ 442 + A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \ 443 + A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \ 444 + A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \ 445 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \ 446 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \ 447 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \ 448 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \ 449 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \ 450 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \ 451 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV) 452 + 453 + #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \ 454 + (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \ 455 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \ 456 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \ 457 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \ 458 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \ 459 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \ 460 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \ 461 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \ 462 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \ 463 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \ 464 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \ 465 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \ 466 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \ 467 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \ 468 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \ 469 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \ 470 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \ 471 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \ 472 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \ 473 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS) 474 + 475 + #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \ 476 + (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \ 477 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \ 478 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \ 479 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \ 480 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \ 481 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \ 482 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \ 483 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \ 484 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \ 485 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR) 486 + 487 + static int hw_init(struct msm_gpu *gpu) 488 + { 489 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 490 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 491 + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 492 + unsigned int pipe_id, i; 493 + u32 gmem_protect = 0; 494 + u64 gmem_range_min; 495 + int ret; 496 + 497 + ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 498 + if (ret) 499 + return ret; 500 + 501 + /* Clear the cached value to force aperture configuration next time */ 502 + a6xx_gpu->cached_aperture = UINT_MAX; 503 + a8xx_aperture_clear(gpu); 504 + 505 + /* Clear GBIF halt in case GX domain was not collapsed */ 506 + gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 507 + gpu_read(gpu, REG_A6XX_GBIF_HALT); 508 + 509 + gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0); 510 + gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT); 511 + 512 + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 513 + 514 + /* 515 + * Disable the trusted memory range - we don't actually supported secure 516 + * memory rendering at this point in time and we don't want to block off 517 + * part of the virtual memory space. 518 + */ 519 + gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 520 + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 521 + 522 + gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); 523 + gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); 524 + gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); 525 + gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620); 526 + gpu_write(gpu, REG_A8XX_GBIF_CX_CONFIG, 0x20023000); 527 + gmu_write(gmu, REG_A6XX_GMU_MRC_GBIF_QOS_CTRL, 0x33); 528 + 529 + /* Make all blocks contribute to the GPU BUSY perf counter */ 530 + gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 531 + 532 + /* Setup GMEM Range in UCHE */ 533 + gmem_range_min = SZ_64M; 534 + /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 535 + gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min); 536 + gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min); 537 + 538 + /* Setup UCHE Trap region */ 539 + gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 540 + gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 541 + gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 542 + gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 543 + 544 + /* Turn on performance counters */ 545 + gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1); 546 + gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1); 547 + 548 + /* Turn on the IFPC counter (countable 4 on XOCLK1) */ 549 + gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1, 550 + FIELD_PREP(GENMASK(7, 0), 0x4)); 551 + 552 + /* Select CP0 to always count cycles */ 553 + gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1); 554 + 555 + a8xx_set_ubwc_config(gpu); 556 + 557 + /* Set weights for bicubic filtering */ 558 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); 559 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); 560 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); 561 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); 562 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); 563 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000); 564 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8); 565 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc); 566 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb); 567 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0); 568 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b); 569 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d); 570 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412); 571 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a); 572 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05); 573 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e); 574 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001); 575 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa); 576 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7); 577 + gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7); 578 + 579 + gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 580 + 581 + a8xx_nonctxt_config(gpu, &gmem_protect); 582 + 583 + /* Enable fault detection */ 584 + gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff); 585 + gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); 586 + 587 + /* Set up the CX GMU counter 0 to count busy ticks */ 588 + gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 589 + 590 + /* Enable the power counter */ 591 + gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5)); 592 + gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 593 + 594 + /* Protect registers from the CP */ 595 + a8xx_set_cp_protect(gpu); 596 + 597 + /* Enable the GMEM save/restore feature for preemption */ 598 + a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1); 599 + 600 + for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { 601 + u32 apriv_mask = A8XX_APRIV_MASK; 602 + unsigned long flags; 603 + 604 + if (pipe_id == PIPE_LPAC) 605 + continue; 606 + 607 + if (pipe_id == PIPE_BR) 608 + apriv_mask = A8XX_BR_APRIV_MASK; 609 + 610 + a8xx_aperture_acquire(gpu, pipe_id, &flags); 611 + gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask); 612 + gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE, 613 + A8XX_CP_INTERRUPT_STATUS_MASK_PIPE); 614 + gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE, 615 + A8XX_CP_HW_FAULT_STATUS_MASK_PIPE); 616 + a8xx_aperture_release(gpu, flags); 617 + } 618 + 619 + a8xx_aperture_clear(gpu); 620 + 621 + /* Enable interrupts */ 622 + gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK); 623 + gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK); 624 + 625 + ret = adreno_hw_init(gpu); 626 + if (ret) 627 + goto out; 628 + 629 + gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 630 + /* Set the ringbuffer address */ 631 + gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 632 + gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 633 + 634 + /* Configure the RPTR shadow if needed: */ 635 + gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); 636 + gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr)); 637 + 638 + for (i = 0; i < gpu->nr_rings; i++) 639 + a6xx_gpu->shadow[i] = 0; 640 + 641 + /* Always come up on rb 0 */ 642 + a6xx_gpu->cur_ring = gpu->rb[0]; 643 + 644 + for (i = 0; i < gpu->nr_rings; i++) 645 + gpu->rb[i]->cur_ctx_seqno = 0; 646 + 647 + /* Enable the SQE_to start the CP engine */ 648 + gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1); 649 + 650 + ret = a8xx_cp_init(gpu); 651 + if (ret) 652 + goto out; 653 + 654 + /* 655 + * Try to load a zap shader into the secure world. If successful 656 + * we can use the CP to switch out of secure mode. If not then we 657 + * have no resource but to try to switch ourselves out manually. If we 658 + * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 659 + * be blocked and a permissions violation will soon follow. 660 + */ 661 + ret = a6xx_zap_shader_init(gpu); 662 + if (!ret) { 663 + OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 664 + OUT_RING(gpu->rb[0], 0x00000000); 665 + 666 + a6xx_flush(gpu, gpu->rb[0]); 667 + if (!a8xx_idle(gpu, gpu->rb[0])) 668 + return -EINVAL; 669 + } else if (ret == -ENODEV) { 670 + /* 671 + * This device does not use zap shader (but print a warning 672 + * just in case someone got their dt wrong.. hopefully they 673 + * have a debug UART to realize the error of their ways... 674 + * if you mess this up you are about to crash horribly) 675 + */ 676 + dev_warn_once(gpu->dev->dev, 677 + "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 678 + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 679 + ret = 0; 680 + } else { 681 + return ret; 682 + } 683 + 684 + /* 685 + * GMEM_PROTECT register should be programmed after GPU is transitioned to 686 + * non-secure mode 687 + */ 688 + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect); 689 + WARN_ON(!gmem_protect); 690 + a8xx_aperture_clear(gpu); 691 + 692 + /* Enable hardware clockgating */ 693 + a8xx_set_hwcg(gpu, true); 694 + out: 695 + /* 696 + * Tell the GMU that we are done touching the GPU and it can start power 697 + * management 698 + */ 699 + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 700 + 701 + return ret; 702 + } 703 + 704 + int a8xx_hw_init(struct msm_gpu *gpu) 705 + { 706 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 707 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 708 + int ret; 709 + 710 + mutex_lock(&a6xx_gpu->gmu.lock); 711 + ret = hw_init(gpu); 712 + mutex_unlock(&a6xx_gpu->gmu.lock); 713 + 714 + return ret; 715 + } 716 + 717 + static void a8xx_dump(struct msm_gpu *gpu) 718 + { 719 + DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS)); 720 + adreno_dump(gpu); 721 + } 722 + 723 + void a8xx_recover(struct msm_gpu *gpu) 724 + { 725 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 726 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 727 + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 728 + int active_submits; 729 + 730 + adreno_dump_info(gpu); 731 + 732 + if (hang_debug) 733 + a8xx_dump(gpu); 734 + 735 + /* 736 + * To handle recovery specific sequences during the rpm suspend we are 737 + * about to trigger 738 + */ 739 + a6xx_gpu->hung = true; 740 + 741 + /* Halt SQE first */ 742 + gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); 743 + 744 + pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 745 + 746 + /* active_submit won't change until we make a submission */ 747 + mutex_lock(&gpu->active_lock); 748 + active_submits = gpu->active_submits; 749 + 750 + /* 751 + * Temporarily clear active_submits count to silence a WARN() in the 752 + * runtime suspend cb 753 + */ 754 + gpu->active_submits = 0; 755 + 756 + reinit_completion(&gmu->pd_gate); 757 + dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 758 + dev_pm_genpd_synced_poweroff(gmu->cxpd); 759 + 760 + /* Drop the rpm refcount from active submits */ 761 + if (active_submits) 762 + pm_runtime_put(&gpu->pdev->dev); 763 + 764 + /* And the final one from recover worker */ 765 + pm_runtime_put_sync(&gpu->pdev->dev); 766 + 767 + if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 768 + DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 769 + 770 + dev_pm_genpd_remove_notifier(gmu->cxpd); 771 + 772 + pm_runtime_use_autosuspend(&gpu->pdev->dev); 773 + 774 + if (active_submits) 775 + pm_runtime_get(&gpu->pdev->dev); 776 + 777 + pm_runtime_get_sync(&gpu->pdev->dev); 778 + 779 + gpu->active_submits = active_submits; 780 + mutex_unlock(&gpu->active_lock); 781 + 782 + msm_gpu_hw_init(gpu); 783 + a6xx_gpu->hung = false; 784 + } 785 + 786 + static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 787 + { 788 + static const char * const uche_clients[] = { 789 + "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 790 + "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 791 + "STCHE", 792 + }; 793 + static const char * const uche_clients_lpac[] = { 794 + "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC", 795 + }; 796 + u32 val; 797 + 798 + /* 799 + * The source of the data depends on the mid ID read from FSYNR1. 800 + * and the client ID read from the UCHE block 801 + */ 802 + val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF); 803 + 804 + val &= GENMASK(6, 0); 805 + 806 + /* mid=3 refers to BR or BV */ 807 + if (mid == 3) { 808 + if (val < ARRAY_SIZE(uche_clients)) 809 + return uche_clients[val]; 810 + else 811 + return "UCHE"; 812 + } 813 + 814 + /* mid=8 refers to LPAC */ 815 + if (mid == 8) { 816 + if (val < ARRAY_SIZE(uche_clients_lpac)) 817 + return uche_clients_lpac[val]; 818 + else 819 + return "UCHE_LPAC"; 820 + } 821 + 822 + return "Unknown"; 823 + } 824 + 825 + static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id) 826 + { 827 + switch (id) { 828 + case 0x0: 829 + return "CP"; 830 + case 0x1: 831 + return "UCHE: Unknown"; 832 + case 0x2: 833 + return "UCHE_LPAC: Unknown"; 834 + case 0x3: 835 + case 0x8: 836 + return a8xx_uche_fault_block(gpu, id); 837 + case 0x4: 838 + return "CCU"; 839 + case 0x5: 840 + return "Flag cache"; 841 + case 0x6: 842 + return "PREFETCH"; 843 + case 0x7: 844 + return "GMU"; 845 + case 0x9: 846 + return "UCHE_HPAC"; 847 + } 848 + 849 + return "Unknown"; 850 + } 851 + 852 + int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 853 + { 854 + struct msm_gpu *gpu = arg; 855 + struct adreno_smmu_fault_info *info = data; 856 + const char *block = "unknown"; 857 + 858 + u32 scratch[] = { 859 + gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)), 860 + gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)), 861 + gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)), 862 + gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)), 863 + }; 864 + 865 + if (info) 866 + block = a8xx_fault_block(gpu, info->fsynr1 & 0xff); 867 + 868 + return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 869 + } 870 + 871 + static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu) 872 + { 873 + u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL); 874 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 875 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 876 + u32 slice = a8xx_get_first_slice(a6xx_gpu); 877 + u32 hw_fault_mask = GENMASK(6, 0); 878 + u32 sw_fault_mask = GENMASK(22, 16); 879 + u32 pipe = 0; 880 + 881 + dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status); 882 + 883 + if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | 884 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR)) 885 + pipe |= BIT(PIPE_BR); 886 + 887 + if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | 888 + A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV)) 889 + pipe |= BIT(PIPE_BV); 890 + 891 + if (!pipe) { 892 + dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n"); 893 + goto out; 894 + } 895 + 896 + for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { 897 + if (!(BIT(pipe_id) & pipe)) 898 + continue; 899 + 900 + if (hw_fault_mask & status) { 901 + status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 902 + REG_A8XX_CP_HW_FAULT_STATUS_PIPE); 903 + dev_err_ratelimited(&gpu->pdev->dev, 904 + "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 905 + } 906 + 907 + if (sw_fault_mask & status) { 908 + status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 909 + REG_A8XX_CP_INTERRUPT_STATUS_PIPE); 910 + dev_err_ratelimited(&gpu->pdev->dev, 911 + "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status); 912 + 913 + if (status & BIT(8)) { 914 + a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1); 915 + status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 916 + REG_A8XX_CP_SQE_STAT_DATA_PIPE); 917 + dev_err_ratelimited(&gpu->pdev->dev, 918 + "CP Opcode error, opcode=0x%x\n", status); 919 + } 920 + 921 + if (status & BIT(10)) { 922 + status = a8xx_read_pipe_slice(gpu, pipe_id, slice, 923 + REG_A8XX_CP_PROTECT_STATUS_PIPE); 924 + dev_err_ratelimited(&gpu->pdev->dev, 925 + "CP REG PROTECT error, status=0x%x\n", status); 926 + } 927 + } 928 + } 929 + 930 + out: 931 + /* Turn off interrupts to avoid triggering recovery again */ 932 + a8xx_aperture_clear(gpu); 933 + gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0); 934 + gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0); 935 + 936 + kthread_queue_work(gpu->worker, &gpu->recover_work); 937 + } 938 + 939 + static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset) 940 + { 941 + gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset); 942 + 943 + return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE); 944 + } 945 + 946 + static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset) 947 + { 948 + u64 lo, hi; 949 + 950 + lo = gpu_periph_read(gpu, dbg_offset); 951 + hi = gpu_periph_read(gpu, dbg_offset + 1); 952 + 953 + return (hi << 32) | lo; 954 + } 955 + 956 + #define CP_PERIPH_IB1_BASE_LO 0x7005 957 + #define CP_PERIPH_IB1_BASE_HI 0x7006 958 + #define CP_PERIPH_IB1_SIZE 0x7007 959 + #define CP_PERIPH_IB1_OFFSET 0x7008 960 + #define CP_PERIPH_IB2_BASE_LO 0x7009 961 + #define CP_PERIPH_IB2_BASE_HI 0x700a 962 + #define CP_PERIPH_IB2_SIZE 0x700b 963 + #define CP_PERIPH_IB2_OFFSET 0x700c 964 + #define CP_PERIPH_IB3_BASE_LO 0x700d 965 + #define CP_PERIPH_IB3_BASE_HI 0x700e 966 + #define CP_PERIPH_IB3_SIZE 0x700f 967 + #define CP_PERIPH_IB3_OFFSET 0x7010 968 + 969 + static void a8xx_fault_detect_irq(struct msm_gpu *gpu) 970 + { 971 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 972 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 973 + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 974 + unsigned long flags; 975 + 976 + /* 977 + * If stalled on SMMU fault, we could trip the GPU's hang detection, 978 + * but the fault handler will trigger the devcore dump, and we want 979 + * to otherwise resume normally rather than killing the submit, so 980 + * just bail. 981 + */ 982 + if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT) 983 + return; 984 + 985 + /* 986 + * Force the GPU to stay on until after we finish 987 + * collecting information 988 + */ 989 + if (!adreno_has_gmu_wrapper(adreno_gpu)) 990 + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); 991 + 992 + DRM_DEV_ERROR(&gpu->pdev->dev, 993 + "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n", 994 + ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 995 + gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS)); 996 + 997 + a8xx_aperture_acquire(gpu, PIPE_BR, &flags); 998 + 999 + DRM_DEV_ERROR(&gpu->pdev->dev, 1000 + "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1001 + gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS), 1002 + gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1003 + gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1004 + gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1005 + gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1006 + gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1007 + gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1008 + gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1009 + gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1010 + 1011 + a8xx_aperture_release(gpu, flags); 1012 + a8xx_aperture_acquire(gpu, PIPE_BV, &flags); 1013 + 1014 + DRM_DEV_ERROR(&gpu->pdev->dev, 1015 + "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", 1016 + gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS), 1017 + gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV), 1018 + gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1019 + gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), 1020 + gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), 1021 + gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), 1022 + gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), 1023 + gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), 1024 + gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); 1025 + 1026 + a8xx_aperture_release(gpu, flags); 1027 + a8xx_aperture_clear(gpu); 1028 + 1029 + /* Turn off the hangcheck timer to keep it from bothering us */ 1030 + timer_delete(&gpu->hangcheck_timer); 1031 + 1032 + kthread_queue_work(gpu->worker, &gpu->recover_work); 1033 + } 1034 + 1035 + static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1036 + { 1037 + u32 status; 1038 + 1039 + status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS); 1040 + gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0); 1041 + 1042 + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1043 + 1044 + /* 1045 + * Ignore FASTBLEND violations, because the HW will silently fall back 1046 + * to legacy blending. 1047 + */ 1048 + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1049 + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1050 + timer_delete(&gpu->hangcheck_timer); 1051 + 1052 + kthread_queue_work(gpu->worker, &gpu->recover_work); 1053 + } 1054 + } 1055 + 1056 + irqreturn_t a8xx_irq(struct msm_gpu *gpu) 1057 + { 1058 + struct msm_drm_private *priv = gpu->dev->dev_private; 1059 + u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS); 1060 + 1061 + gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status); 1062 + 1063 + if (priv->disable_err_irq) 1064 + status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1065 + 1066 + if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1067 + a8xx_fault_detect_irq(gpu); 1068 + 1069 + if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) { 1070 + u32 rl0, rl1; 1071 + 1072 + rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0); 1073 + rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1); 1074 + dev_err_ratelimited(&gpu->pdev->dev, 1075 + "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1); 1076 + } 1077 + 1078 + if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1079 + a8xx_cp_hw_err_irq(gpu); 1080 + 1081 + if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1082 + dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1083 + 1084 + if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1085 + dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1086 + 1087 + if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1088 + dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1089 + 1090 + if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1091 + dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n"); 1092 + 1093 + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1094 + a8xx_sw_fuse_violation_irq(gpu); 1095 + 1096 + if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1097 + msm_gpu_retire(gpu); 1098 + a6xx_preempt_trigger(gpu); 1099 + } 1100 + 1101 + if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1102 + a6xx_preempt_irq(gpu); 1103 + 1104 + return IRQ_HANDLED; 1105 + } 1106 + 1107 + void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1108 + { 1109 + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1110 + struct msm_gpu *gpu = &adreno_gpu->base; 1111 + 1112 + if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1113 + u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1114 + 1115 + gpu_scid &= GENMASK(5, 0); 1116 + 1117 + gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 1118 + FIELD_PREP(GENMASK(29, 24), gpu_scid) | 1119 + FIELD_PREP(GENMASK(23, 18), gpu_scid) | 1120 + FIELD_PREP(GENMASK(17, 12), gpu_scid) | 1121 + FIELD_PREP(GENMASK(11, 6), gpu_scid) | 1122 + FIELD_PREP(GENMASK(5, 0), gpu_scid)); 1123 + 1124 + gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 1125 + FIELD_PREP(GENMASK(27, 22), gpu_scid) | 1126 + FIELD_PREP(GENMASK(21, 16), gpu_scid) | 1127 + FIELD_PREP(GENMASK(15, 10), gpu_scid) | 1128 + BIT(8)); 1129 + } 1130 + 1131 + llcc_slice_activate(a6xx_gpu->htw_llc_slice); 1132 + } 1133 + 1134 + #define GBIF_CLIENT_HALT_MASK BIT(0) 1135 + #define GBIF_ARB_HALT_MASK BIT(1) 1136 + #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 1137 + #define VBIF_RESET_ACK_MASK 0xF0 1138 + #define GPR0_GBIF_HALT_REQUEST 0x1E0 1139 + 1140 + void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 1141 + { 1142 + struct msm_gpu *gpu = &adreno_gpu->base; 1143 + 1144 + if (gx_off) { 1145 + /* Halt the gx side of GBIF */ 1146 + gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1); 1147 + spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1); 1148 + } 1149 + 1150 + /* Halt new client requests on GBIF */ 1151 + gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 1152 + spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1153 + (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 1154 + 1155 + /* Halt all AXI requests on GBIF */ 1156 + gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 1157 + spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 1158 + (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 1159 + 1160 + /* The GBIF halt needs to be explicitly cleared */ 1161 + gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 1162 + } 1163 + 1164 + int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1165 + { 1166 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1167 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1168 + 1169 + mutex_lock(&a6xx_gpu->gmu.lock); 1170 + 1171 + /* Force the GPU power on so we can read this register */ 1172 + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 1173 + 1174 + *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER); 1175 + 1176 + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 1177 + 1178 + mutex_unlock(&a6xx_gpu->gmu.lock); 1179 + 1180 + return 0; 1181 + } 1182 + 1183 + u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 1184 + { 1185 + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1186 + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1187 + u64 busy_cycles; 1188 + 1189 + /* 19.2MHz */ 1190 + *out_sample_rate = 19200000; 1191 + 1192 + busy_cycles = gmu_read64(&a6xx_gpu->gmu, 1193 + REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 1194 + REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 1195 + 1196 + return busy_cycles; 1197 + } 1198 + 1199 + bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1200 + { 1201 + return true; 1202 + }

+7

drivers/gpu/drm/msm/adreno/adreno_gpu.h

··· 88 88 u32 value; 89 89 }; 90 90 91 + /* Reglist with pipe information */ 92 + struct adreno_reglist_pipe { 93 + u32 offset; 94 + u32 value; 95 + u32 pipe; 96 + }; 97 + 91 98 struct adreno_speedbin { 92 99 uint16_t fuse; 93 100 uint16_t speedbin;

+1

drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml

··· 60 60 <reg32 offset="0x1f400" name="GMU_ICACHE_CONFIG"/> 61 61 <reg32 offset="0x1f401" name="GMU_DCACHE_CONFIG"/> 62 62 <reg32 offset="0x1f40f" name="GMU_SYS_BUS_CONFIG"/> 63 + <reg32 offset="0x1f50b" name="GMU_MRC_GBIF_QOS_CTRL"/> 63 64 <reg32 offset="0x1f800" name="GMU_CM3_SYSRESET"/> 64 65 <reg32 offset="0x1f801" name="GMU_CM3_BOOT_CONFIG"/> 65 66 <reg32 offset="0x1f81a" name="GMU_CM3_FW_BUSY"/>

Configure Feed

Configure Feed