Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

crypto: qat - add command queue telemetry counters for GEN6

Add slice-specific command queue counters for QAT GEN6 devices to monitor
utilization metrics, including wait time, execution duration, and release
events.

Update the documentation to reflect the new command queue counter
functionality.

Co-developed-by: George Abraham P <george.abraham.p@intel.com>
Signed-off-by: George Abraham P <george.abraham.p@intel.com>
Signed-off-by: Vijay Sundar Selvamani <vijay.sundar.selvamani@intel.com>
Signed-off-by: Suman Kumar Chakraborty <suman.kumar.chakraborty@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Vijay Sundar Selvamani and committed by
Herbert Xu
3ed63344 9ea349e4

+210
+26
Documentation/ABI/testing/debugfs-driver-qat_telemetry
··· 86 86 exec_cph<N> execution count of Cipher slice N 87 87 util_ath<N> utilization of Authentication slice N [%] 88 88 exec_ath<N> execution count of Authentication slice N 89 + cmdq_wait_cnv<N> wait time for cmdq N to get Compression and verify 90 + slice ownership 91 + cmdq_exec_cnv<N> Compression and verify slice execution time while 92 + owned by cmdq N 93 + cmdq_drain_cnv<N> time taken for cmdq N to release Compression and 94 + verify slice ownership 95 + cmdq_wait_dcprz<N> wait time for cmdq N to get Decompression 96 + slice N ownership 97 + cmdq_exec_dcprz<N> Decompression slice execution time while 98 + owned by cmdq N 99 + cmdq_drain_dcprz<N> time taken for cmdq N to release Decompression 100 + slice ownership 101 + cmdq_wait_pke<N> wait time for cmdq N to get PKE slice ownership 102 + cmdq_exec_pke<N> PKE slice execution time while owned by cmdq N 103 + cmdq_drain_pke<N> time taken for cmdq N to release PKE slice 104 + ownership 105 + cmdq_wait_ucs<N> wait time for cmdq N to get UCS slice ownership 106 + cmdq_exec_ucs<N> UCS slice execution time while owned by cmdq N 107 + cmdq_drain_ucs<N> time taken for cmdq N to release UCS slice 108 + ownership 109 + cmdq_wait_ath<N> wait time for cmdq N to get Authentication slice 110 + ownership 111 + cmdq_exec_ath<N> Authentication slice execution time while owned 112 + by cmdq N 113 + cmdq_drain_ath<N> time taken for cmdq N to release Authentication 114 + slice ownership 89 115 ======================= ======================================== 90 116 91 117 The telemetry report file can be read with the following command::
+104
drivers/crypto/intel/qat/qat_common/adf_gen6_tl.c
··· 21 21 22 22 #define SLICE_IDX(sl) offsetof(struct icp_qat_fw_init_admin_slice_cnt, sl##_cnt) 23 23 24 + #define ADF_GEN6_TL_CMDQ_WAIT_COUNTER(_name) \ 25 + ADF_TL_COUNTER("cmdq_wait_" #_name, ADF_TL_SIMPLE_COUNT, \ 26 + ADF_TL_CMDQ_REG_OFF(_name, reg_tm_cmdq_wait_cnt, gen6)) 27 + #define ADF_GEN6_TL_CMDQ_EXEC_COUNTER(_name) \ 28 + ADF_TL_COUNTER("cmdq_exec_" #_name, ADF_TL_SIMPLE_COUNT, \ 29 + ADF_TL_CMDQ_REG_OFF(_name, reg_tm_cmdq_exec_cnt, gen6)) 30 + #define ADF_GEN6_TL_CMDQ_DRAIN_COUNTER(_name) \ 31 + ADF_TL_COUNTER("cmdq_drain_" #_name, ADF_TL_SIMPLE_COUNT, \ 32 + ADF_TL_CMDQ_REG_OFF(_name, reg_tm_cmdq_drain_cnt, \ 33 + gen6)) 34 + 35 + #define CPR_QUEUE_COUNT 5 36 + #define DCPR_QUEUE_COUNT 3 37 + #define PKE_QUEUE_COUNT 1 38 + #define WAT_QUEUE_COUNT 7 39 + #define WCP_QUEUE_COUNT 7 40 + #define USC_QUEUE_COUNT 3 41 + #define ATH_QUEUE_COUNT 2 42 + 24 43 /* Device level counters. */ 25 44 static const struct adf_tl_dbg_counter dev_counters[] = { 26 45 /* PCIe partial transactions. */ ··· 118 99 [SLICE_IDX(ath)] = ADF_GEN6_TL_SL_EXEC_COUNTER(ath), 119 100 }; 120 101 102 + static const struct adf_tl_dbg_counter cnv_cmdq_counters[] = { 103 + ADF_GEN6_TL_CMDQ_WAIT_COUNTER(cnv), 104 + ADF_GEN6_TL_CMDQ_EXEC_COUNTER(cnv), 105 + ADF_GEN6_TL_CMDQ_DRAIN_COUNTER(cnv) 106 + }; 107 + 108 + #define NUM_CMDQ_COUNTERS ARRAY_SIZE(cnv_cmdq_counters) 109 + 110 + static const struct adf_tl_dbg_counter dcprz_cmdq_counters[] = { 111 + ADF_GEN6_TL_CMDQ_WAIT_COUNTER(dcprz), 112 + ADF_GEN6_TL_CMDQ_EXEC_COUNTER(dcprz), 113 + ADF_GEN6_TL_CMDQ_DRAIN_COUNTER(dcprz) 114 + }; 115 + 116 + static_assert(ARRAY_SIZE(dcprz_cmdq_counters) == NUM_CMDQ_COUNTERS); 117 + 118 + static const struct adf_tl_dbg_counter pke_cmdq_counters[] = { 119 + ADF_GEN6_TL_CMDQ_WAIT_COUNTER(pke), 120 + ADF_GEN6_TL_CMDQ_EXEC_COUNTER(pke), 121 + ADF_GEN6_TL_CMDQ_DRAIN_COUNTER(pke) 122 + }; 123 + 124 + static_assert(ARRAY_SIZE(pke_cmdq_counters) == NUM_CMDQ_COUNTERS); 125 + 126 + static const struct adf_tl_dbg_counter wat_cmdq_counters[] = { 127 + ADF_GEN6_TL_CMDQ_WAIT_COUNTER(wat), 128 + ADF_GEN6_TL_CMDQ_EXEC_COUNTER(wat), 129 + ADF_GEN6_TL_CMDQ_DRAIN_COUNTER(wat) 130 + }; 131 + 132 + static_assert(ARRAY_SIZE(wat_cmdq_counters) == NUM_CMDQ_COUNTERS); 133 + 134 + static const struct adf_tl_dbg_counter wcp_cmdq_counters[] = { 135 + ADF_GEN6_TL_CMDQ_WAIT_COUNTER(wcp), 136 + ADF_GEN6_TL_CMDQ_EXEC_COUNTER(wcp), 137 + ADF_GEN6_TL_CMDQ_DRAIN_COUNTER(wcp) 138 + }; 139 + 140 + static_assert(ARRAY_SIZE(wcp_cmdq_counters) == NUM_CMDQ_COUNTERS); 141 + 142 + static const struct adf_tl_dbg_counter ucs_cmdq_counters[] = { 143 + ADF_GEN6_TL_CMDQ_WAIT_COUNTER(ucs), 144 + ADF_GEN6_TL_CMDQ_EXEC_COUNTER(ucs), 145 + ADF_GEN6_TL_CMDQ_DRAIN_COUNTER(ucs) 146 + }; 147 + 148 + static_assert(ARRAY_SIZE(ucs_cmdq_counters) == NUM_CMDQ_COUNTERS); 149 + 150 + static const struct adf_tl_dbg_counter ath_cmdq_counters[] = { 151 + ADF_GEN6_TL_CMDQ_WAIT_COUNTER(ath), 152 + ADF_GEN6_TL_CMDQ_EXEC_COUNTER(ath), 153 + ADF_GEN6_TL_CMDQ_DRAIN_COUNTER(ath) 154 + }; 155 + 156 + static_assert(ARRAY_SIZE(ath_cmdq_counters) == NUM_CMDQ_COUNTERS); 157 + 158 + /* CMDQ drain counters. */ 159 + static const struct adf_tl_dbg_counter *cmdq_counters[ADF_TL_SL_CNT_COUNT] = { 160 + /* Compression accelerator execution count. */ 161 + [SLICE_IDX(cpr)] = cnv_cmdq_counters, 162 + /* Decompression accelerator execution count. */ 163 + [SLICE_IDX(dcpr)] = dcprz_cmdq_counters, 164 + /* PKE execution count. */ 165 + [SLICE_IDX(pke)] = pke_cmdq_counters, 166 + /* Wireless Authentication accelerator execution count. */ 167 + [SLICE_IDX(wat)] = wat_cmdq_counters, 168 + /* Wireless Cipher accelerator execution count. */ 169 + [SLICE_IDX(wcp)] = wcp_cmdq_counters, 170 + /* UCS accelerator execution count. */ 171 + [SLICE_IDX(ucs)] = ucs_cmdq_counters, 172 + /* Authentication accelerator execution count. */ 173 + [SLICE_IDX(ath)] = ath_cmdq_counters, 174 + }; 175 + 121 176 /* Ring pair counters. */ 122 177 static const struct adf_tl_dbg_counter rp_counters[] = { 123 178 /* PCIe partial transactions. */ ··· 229 136 { 230 137 tl_data->layout_sz = ADF_GEN6_TL_LAYOUT_SZ; 231 138 tl_data->slice_reg_sz = ADF_GEN6_TL_SLICE_REG_SZ; 139 + tl_data->cmdq_reg_sz = ADF_GEN6_TL_CMDQ_REG_SZ; 232 140 tl_data->rp_reg_sz = ADF_GEN6_TL_RP_REG_SZ; 233 141 tl_data->num_hbuff = ADF_GEN6_TL_NUM_HIST_BUFFS; 234 142 tl_data->max_rp = ADF_GEN6_TL_MAX_RP_NUM; ··· 241 147 tl_data->num_dev_counters = ARRAY_SIZE(dev_counters); 242 148 tl_data->sl_util_counters = sl_util_counters; 243 149 tl_data->sl_exec_counters = sl_exec_counters; 150 + tl_data->cmdq_counters = cmdq_counters; 151 + tl_data->num_cmdq_counters = NUM_CMDQ_COUNTERS; 244 152 tl_data->rp_counters = rp_counters; 245 153 tl_data->num_rp_counters = ARRAY_SIZE(rp_counters); 246 154 tl_data->max_sl_cnt = ADF_GEN6_TL_MAX_SLICES_PER_TYPE; 155 + 156 + tl_data->multiplier.cpr_cnt = CPR_QUEUE_COUNT; 157 + tl_data->multiplier.dcpr_cnt = DCPR_QUEUE_COUNT; 158 + tl_data->multiplier.pke_cnt = PKE_QUEUE_COUNT; 159 + tl_data->multiplier.wat_cnt = WAT_QUEUE_COUNT; 160 + tl_data->multiplier.wcp_cnt = WCP_QUEUE_COUNT; 161 + tl_data->multiplier.ucs_cnt = USC_QUEUE_COUNT; 162 + tl_data->multiplier.ath_cnt = ATH_QUEUE_COUNT; 247 163 } 248 164 EXPORT_SYMBOL_GPL(adf_gen6_init_tl_data);
+19
drivers/crypto/intel/qat/qat_common/adf_telemetry.c
··· 212 212 return ret; 213 213 } 214 214 215 + static void adf_set_cmdq_cnt(struct adf_accel_dev *accel_dev, 216 + struct adf_tl_hw_data *tl_data) 217 + { 218 + struct icp_qat_fw_init_admin_slice_cnt *slice_cnt, *cmdq_cnt; 219 + 220 + slice_cnt = &accel_dev->telemetry->slice_cnt; 221 + cmdq_cnt = &accel_dev->telemetry->cmdq_cnt; 222 + 223 + cmdq_cnt->cpr_cnt = slice_cnt->cpr_cnt * tl_data->multiplier.cpr_cnt; 224 + cmdq_cnt->dcpr_cnt = slice_cnt->dcpr_cnt * tl_data->multiplier.dcpr_cnt; 225 + cmdq_cnt->pke_cnt = slice_cnt->pke_cnt * tl_data->multiplier.pke_cnt; 226 + cmdq_cnt->wat_cnt = slice_cnt->wat_cnt * tl_data->multiplier.wat_cnt; 227 + cmdq_cnt->wcp_cnt = slice_cnt->wcp_cnt * tl_data->multiplier.wcp_cnt; 228 + cmdq_cnt->ucs_cnt = slice_cnt->ucs_cnt * tl_data->multiplier.ucs_cnt; 229 + cmdq_cnt->ath_cnt = slice_cnt->ath_cnt * tl_data->multiplier.ath_cnt; 230 + } 231 + 215 232 int adf_tl_run(struct adf_accel_dev *accel_dev, int state) 216 233 { 217 234 struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); ··· 251 234 adf_send_admin_tl_stop(accel_dev); 252 235 return ret; 253 236 } 237 + 238 + adf_set_cmdq_cnt(accel_dev, tl_data); 254 239 255 240 telemetry->hbuffs = state; 256 241 atomic_set(&telemetry->state, state);
+5
drivers/crypto/intel/qat/qat_common/adf_telemetry.h
··· 28 28 struct adf_tl_hw_data { 29 29 size_t layout_sz; 30 30 size_t slice_reg_sz; 31 + size_t cmdq_reg_sz; 31 32 size_t rp_reg_sz; 32 33 size_t msg_cnt_off; 33 34 const struct adf_tl_dbg_counter *dev_counters; 34 35 const struct adf_tl_dbg_counter *sl_util_counters; 35 36 const struct adf_tl_dbg_counter *sl_exec_counters; 37 + const struct adf_tl_dbg_counter **cmdq_counters; 36 38 const struct adf_tl_dbg_counter *rp_counters; 37 39 u8 num_hbuff; 38 40 u8 cpp_ns_per_cycle; 39 41 u8 bw_units_to_bytes; 40 42 u8 num_dev_counters; 41 43 u8 num_rp_counters; 44 + u8 num_cmdq_counters; 42 45 u8 max_rp; 43 46 u8 max_sl_cnt; 47 + struct icp_qat_fw_init_admin_slice_cnt multiplier; 44 48 }; 45 49 46 50 struct adf_telemetry { ··· 73 69 struct mutex wr_lock; 74 70 struct delayed_work work_ctx; 75 71 struct icp_qat_fw_init_admin_slice_cnt slice_cnt; 72 + struct icp_qat_fw_init_admin_slice_cnt cmdq_cnt; 76 73 }; 77 74 78 75 #ifdef CONFIG_DEBUG_FS
+52
drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c
··· 339 339 return 0; 340 340 } 341 341 342 + static int tl_print_cmdq_counter(struct adf_telemetry *telemetry, 343 + const struct adf_tl_dbg_counter *ctr, 344 + struct seq_file *s, u8 cnt_id, u8 counter) 345 + { 346 + size_t cmdq_regs_sz = GET_TL_DATA(telemetry->accel_dev).cmdq_reg_sz; 347 + size_t offset_inc = cnt_id * cmdq_regs_sz; 348 + struct adf_tl_dbg_counter slice_ctr; 349 + char cnt_name[MAX_COUNT_NAME_SIZE]; 350 + 351 + slice_ctr = *(ctr + counter); 352 + slice_ctr.offset1 += offset_inc; 353 + snprintf(cnt_name, MAX_COUNT_NAME_SIZE, "%s%d", slice_ctr.name, cnt_id); 354 + 355 + return tl_calc_and_print_counter(telemetry, s, &slice_ctr, cnt_name); 356 + } 357 + 358 + static int tl_calc_and_print_cmdq_counters(struct adf_accel_dev *accel_dev, 359 + struct seq_file *s, u8 cnt_type, 360 + u8 cnt_id) 361 + { 362 + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); 363 + struct adf_telemetry *telemetry = accel_dev->telemetry; 364 + const struct adf_tl_dbg_counter **cmdq_tl_counters; 365 + const struct adf_tl_dbg_counter *ctr; 366 + u8 counter; 367 + int ret; 368 + 369 + cmdq_tl_counters = tl_data->cmdq_counters; 370 + ctr = cmdq_tl_counters[cnt_type]; 371 + 372 + for (counter = 0; counter < tl_data->num_cmdq_counters; counter++) { 373 + ret = tl_print_cmdq_counter(telemetry, ctr, s, cnt_id, counter); 374 + if (ret) { 375 + dev_notice(&GET_DEV(accel_dev), 376 + "invalid slice utilization counter type\n"); 377 + return ret; 378 + } 379 + } 380 + 381 + return 0; 382 + } 383 + 342 384 static void tl_print_msg_cnt(struct seq_file *s, u32 msg_cnt) 343 385 { 344 386 seq_printf(s, "%-*s", TL_KEY_MIN_PADDING, SNAPSHOT_CNT_MSG); ··· 394 352 struct adf_telemetry *telemetry = accel_dev->telemetry; 395 353 const struct adf_tl_dbg_counter *dev_tl_counters; 396 354 u8 num_dev_counters = tl_data->num_dev_counters; 355 + u8 *cmdq_cnt = (u8 *)&telemetry->cmdq_cnt; 397 356 u8 *sl_cnt = (u8 *)&telemetry->slice_cnt; 398 357 const struct adf_tl_dbg_counter *ctr; 399 358 unsigned int i; ··· 425 382 for (i = 0; i < ADF_TL_SL_CNT_COUNT; i++) { 426 383 for (j = 0; j < sl_cnt[i]; j++) { 427 384 ret = tl_calc_and_print_sl_counters(accel_dev, s, i, j); 385 + if (ret) 386 + return ret; 387 + } 388 + } 389 + 390 + /* Print per command queue telemetry. */ 391 + for (i = 0; i < ADF_TL_SL_CNT_COUNT; i++) { 392 + for (j = 0; j < cmdq_cnt[i]; j++) { 393 + ret = tl_calc_and_print_cmdq_counters(accel_dev, s, i, j); 428 394 if (ret) 429 395 return ret; 430 396 }
+4
drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.h
··· 44 44 (ADF_TL_DEV_REG_OFF(slice##_slices[0], qat_gen) + \ 45 45 offsetof(struct adf_##qat_gen##_tl_slice_data_regs, reg)) 46 46 47 + #define ADF_TL_CMDQ_REG_OFF(slice, reg, qat_gen) \ 48 + (ADF_TL_DEV_REG_OFF(slice##_cmdq[0], qat_gen) + \ 49 + offsetof(struct adf_##qat_gen##_tl_cmdq_data_regs, reg)) 50 + 47 51 #define ADF_TL_RP_REG_OFF(reg, qat_gen) \ 48 52 (ADF_TL_DATA_REG_OFF(tl_ring_pairs_data_regs[0], qat_gen) + \ 49 53 offsetof(struct adf_##qat_gen##_tl_ring_pair_data_regs, reg))