Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

crypto: qat - add support for zstd

Add support for the ZSTD algorithm for QAT GEN4, GEN5 and GEN6 via the
acomp API.

For GEN4 and GEN5, compression is performed in hardware using LZ4s, a
QAT-specific variant of LZ4. The compressed output is post-processed to
generate ZSTD sequences, and the ZSTD library is then used to produce
the final ZSTD stream via zstd_compress_sequences_and_literals(). Only
inputs between 8 KB and 512 KB are offloaded to the device. The minimum
size restriction will be relaxed once polling support is added. The
maximum size is limited by the use of pre-allocated per-CPU scratch
buffers. On these generations, only compression is offloaded to hardware;
decompression always falls back to software.

For GEN6, both compression and decompression are offloaded to the
accelerator, which natively supports the ZSTD algorithm. There is no
limit on the input buffer size supported. However, since GEN6 is limited
to a history size of 64 KB, decompression of frames compressed with a
larger history falls back to software.

Since GEN2 devices do not support ZSTD or LZ4s, add a mechanism that
prevents selecting GEN2 compression instances for ZSTD or LZ4s when a
GEN2 plug-in card is present on a system with an embedded GEN4, GEN5 or
GEN6 device.

In addition, modify the algorithm registration logic to allow
registering the correct implementation, i.e. LZ4s based for GEN4 and
GEN5 or native ZSTD for GEN6.

Co-developed-by: Suman Kumar Chakraborty <suman.kumar.chakraborty@intel.com>
Signed-off-by: Suman Kumar Chakraborty <suman.kumar.chakraborty@intel.com>
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Laurent M Coquerel <laurent.m.coquerel@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Giovanni Cabiddu and committed by
Herbert Xu
879a4f78 35ecb77a

+773 -30
+1
drivers/crypto/intel/qat/Kconfig
··· 12 12 select CRYPTO_LIB_SHA1 13 13 select CRYPTO_LIB_SHA256 14 14 select CRYPTO_LIB_SHA512 15 + select CRYPTO_ZSTD 15 16 select FW_LOADER 16 17 select CRC8 17 18
+1
drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
··· 488 488 hw_data->clock_frequency = ADF_420XX_AE_FREQ; 489 489 hw_data->services_supported = adf_gen4_services_supported; 490 490 hw_data->get_svc_slice_cnt = adf_gen4_get_svc_slice_cnt; 491 + hw_data->accel_capabilities_ext_mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S; 491 492 492 493 adf_gen4_set_err_mask(&hw_data->dev_err_mask); 493 494 adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
+1
drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
··· 473 473 hw_data->clock_frequency = ADF_4XXX_AE_FREQ; 474 474 hw_data->services_supported = adf_gen4_services_supported; 475 475 hw_data->get_svc_slice_cnt = adf_gen4_get_svc_slice_cnt; 476 + hw_data->accel_capabilities_ext_mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S; 476 477 477 478 adf_gen4_set_err_mask(&hw_data->dev_err_mask); 478 479 adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
+17
drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c
··· 33 33 #define ADF_AE_GROUP_1 GENMASK(7, 4) 34 34 #define ADF_AE_GROUP_2 BIT(8) 35 35 36 + #define ASB_MULTIPLIER 9 37 + 36 38 struct adf_ring_config { 37 39 u32 ring_mask; 38 40 enum adf_cfg_service_type ring_type; ··· 511 509 case QAT_DEFLATE: 512 510 header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DYNAMIC; 513 511 break; 512 + case QAT_ZSTD: 513 + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_ZSTD_COMPRESS; 514 + break; 514 515 default: 515 516 return -EINVAL; 516 517 } ··· 523 518 lower_val = ICP_QAT_FW_COMP_51_BUILD_CONFIG_LOWER(hw_comp_lower_csr); 524 519 cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val; 525 520 cd_pars->u.sl.comp_slice_cfg_word[1] = 0; 521 + 522 + /* 523 + * Store Auto Select Best (ASB) multiplier in the request template. 524 + * This will be used in the data path to set the actual threshold 525 + * value based on the input data size. 526 + */ 527 + req_tmpl->u3.asb_threshold.asb_value = ASB_MULTIPLIER; 526 528 527 529 return 0; 528 530 } ··· 544 532 case QAT_DEFLATE: 545 533 header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS; 546 534 break; 535 + case QAT_ZSTD: 536 + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_ZSTD_DECOMPRESS; 537 + break; 547 538 default: 548 539 return -EINVAL; 549 540 } 550 541 551 542 cd_pars->u.sl.comp_slice_cfg_word[0] = 0; 552 543 cd_pars->u.sl.comp_slice_cfg_word[1] = 0; 544 + req_tmpl->u3.asb_threshold.asb_value = 0; 553 545 554 546 return 0; 555 547 } ··· 1046 1030 hw_data->num_rps = ADF_GEN6_ETR_MAX_BANKS; 1047 1031 hw_data->clock_frequency = ADF_6XXX_AE_FREQ; 1048 1032 hw_data->get_svc_slice_cnt = adf_gen6_get_svc_slice_cnt; 1033 + hw_data->accel_capabilities_ext_mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD; 1049 1034 1050 1035 adf_gen6_init_services_supported(hw_data); 1051 1036 adf_gen6_init_hw_csr_ops(&hw_data->csr_ops);
+1
drivers/crypto/intel/qat/qat_common/Makefile
··· 41 41 qat_bl.o \ 42 42 qat_comp_algs.o \ 43 43 qat_compression.o \ 44 + qat_comp_zstd_utils.o \ 44 45 qat_crypto.o \ 45 46 qat_hal.o \ 46 47 qat_mig_dev.o \
+6
drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
··· 59 59 ADF_ACCEL_CAPABILITIES_RANDOM_NUMBER = 128 60 60 }; 61 61 62 + enum adf_accel_capabilities_ext { 63 + ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S = BIT(0), 64 + ADF_ACCEL_CAPABILITIES_EXT_ZSTD = BIT(1), 65 + }; 66 + 62 67 enum adf_fuses { 63 68 ADF_FUSECTL0, 64 69 ADF_FUSECTL1, ··· 341 336 u32 fuses[ADF_MAX_FUSES]; 342 337 u32 straps; 343 338 u32 accel_capabilities_mask; 339 + u32 accel_capabilities_ext_mask; 344 340 u32 extended_dc_capabilities; 345 341 u16 fw_capabilities; 346 342 u32 clock_frequency;
+3 -3
drivers/crypto/intel/qat/qat_common/adf_common_drv.h
··· 111 111 int qat_asym_algs_register(void); 112 112 void qat_asym_algs_unregister(void); 113 113 114 - struct qat_compression_instance *qat_compression_get_instance_node(int node); 114 + struct qat_compression_instance *qat_compression_get_instance_node(int node, int alg); 115 115 void qat_compression_put_instance(struct qat_compression_instance *inst); 116 116 int qat_compression_register(void); 117 117 int qat_compression_unregister(void); 118 - int qat_comp_algs_register(void); 119 - void qat_comp_algs_unregister(void); 118 + int qat_comp_algs_register(u32 caps); 119 + void qat_comp_algs_unregister(u32 caps); 120 120 void qat_comp_alg_callback(void *resp); 121 121 122 122 int adf_isr_resource_alloc(struct adf_accel_dev *accel_dev);
+14 -4
drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
··· 504 504 switch (algo) { 505 505 case QAT_DEFLATE: 506 506 header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DYNAMIC; 507 + hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77; 508 + hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED; 509 + hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL; 510 + break; 511 + case QAT_LZ4S: 512 + header->service_cmd_id = ICP_QAT_FW_COMP_20_CMD_LZ4S_COMPRESS; 513 + hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_LZ4S; 514 + hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_DISABLED; 515 + hw_comp_lower_csr.abd = ICP_QAT_HW_COMP_20_ABD_ABD_DISABLED; 507 516 break; 508 517 default: 509 518 return -EINVAL; 510 519 } 511 520 512 - hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL; 513 - hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77; 514 - hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED; 515 521 hw_comp_lower_csr.sd = ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1; 516 522 hw_comp_lower_csr.hash_update = ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW; 517 523 hw_comp_lower_csr.edmm = ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED; ··· 544 538 switch (algo) { 545 539 case QAT_DEFLATE: 546 540 header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS; 541 + hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE; 542 + break; 543 + case QAT_LZ4S: 544 + header->service_cmd_id = ICP_QAT_FW_COMP_20_CMD_LZ4S_DECOMPRESS; 545 + hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_LZ4S; 547 546 break; 548 547 default: 549 548 return -EINVAL; 550 549 } 551 550 552 - hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE; 553 551 lower_val = ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(hw_decomp_lower_csr); 554 552 555 553 cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
+4 -2
drivers/crypto/intel/qat/qat_common/adf_init.c
··· 180 180 { 181 181 struct adf_hw_device_data *hw_data = accel_dev->hw_device; 182 182 struct service_hndl *service; 183 + u32 caps; 183 184 int ret; 184 185 185 186 set_bit(ADF_STATUS_STARTING, &accel_dev->status); ··· 254 253 } 255 254 set_bit(ADF_STATUS_CRYPTO_ALGS_REGISTERED, &accel_dev->status); 256 255 257 - if (!list_empty(&accel_dev->compression_list) && qat_comp_algs_register()) { 256 + caps = hw_data->accel_capabilities_ext_mask; 257 + if (!list_empty(&accel_dev->compression_list) && qat_comp_algs_register(caps)) { 258 258 dev_err(&GET_DEV(accel_dev), 259 259 "Failed to register compression algs\n"); 260 260 set_bit(ADF_STATUS_STARTING, &accel_dev->status); ··· 310 308 311 309 if (!list_empty(&accel_dev->compression_list) && 312 310 test_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status)) 313 - qat_comp_algs_unregister(); 311 + qat_comp_algs_unregister(hw_data->accel_capabilities_ext_mask); 314 312 clear_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status); 315 313 316 314 list_for_each_entry(service, &service_table, list) {
+7
drivers/crypto/intel/qat/qat_common/icp_qat_fw.h
··· 151 151 ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \ 152 152 ICP_QAT_FW_COMN_CNV_FLAG_MASK) 153 153 154 + #define ICP_QAT_FW_COMN_ST_BLK_FLAG_BITPOS 4 155 + #define ICP_QAT_FW_COMN_ST_BLK_FLAG_MASK 0x1 156 + #define ICP_QAT_FW_COMN_HDR_ST_BLK_FLAG_GET(hdr_flags) \ 157 + QAT_FIELD_GET(hdr_flags, \ 158 + ICP_QAT_FW_COMN_ST_BLK_FLAG_BITPOS, \ 159 + ICP_QAT_FW_COMN_ST_BLK_FLAG_MASK) 160 + 154 161 #define ICP_QAT_FW_COMN_HDR_CNV_FLAG_SET(hdr_t, val) \ 155 162 QAT_FIELD_SET((hdr_t.hdr_flags), (val), \ 156 163 ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \
+2
drivers/crypto/intel/qat/qat_common/icp_qat_fw_comp.h
··· 8 8 ICP_QAT_FW_COMP_CMD_STATIC = 0, 9 9 ICP_QAT_FW_COMP_CMD_DYNAMIC = 1, 10 10 ICP_QAT_FW_COMP_CMD_DECOMPRESS = 2, 11 + ICP_QAT_FW_COMP_CMD_ZSTD_COMPRESS = 10, 12 + ICP_QAT_FW_COMP_CMD_ZSTD_DECOMPRESS = 11, 11 13 ICP_QAT_FW_COMP_CMD_DELIMITER 12 14 }; 13 15
+2 -1
drivers/crypto/intel/qat/qat_common/icp_qat_hw.h
··· 336 336 enum icp_qat_hw_compression_algo { 337 337 ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE = 0, 338 338 ICP_QAT_HW_COMPRESSION_ALGO_LZS = 1, 339 - ICP_QAT_HW_COMPRESSION_ALGO_DELIMITER = 2 339 + ICP_QAT_HW_COMPRESSION_ALGO_ZSTD = 2, 340 + ICP_QAT_HW_COMPRESSION_ALGO_DELIMITER 340 341 }; 341 342 342 343 enum icp_qat_hw_compression_depth {
+505 -19
drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
··· 6 6 #include <crypto/scatterwalk.h> 7 7 #include <linux/dma-mapping.h> 8 8 #include <linux/workqueue.h> 9 + #include <linux/zstd.h> 9 10 #include "adf_accel_devices.h" 10 11 #include "adf_common_drv.h" 11 12 #include "adf_dc.h" ··· 14 13 #include "qat_comp_req.h" 15 14 #include "qat_compression.h" 16 15 #include "qat_algs_send.h" 16 + #include "qat_comp_zstd_utils.h" 17 + 18 + #define QAT_ZSTD_SCRATCH_SIZE 524288 19 + #define QAT_ZSTD_MAX_BLOCK_SIZE 65535 20 + #define QAT_ZSTD_MAX_CONTENT_SIZE 4096 21 + #define QAT_LZ4S_MIN_INPUT_SIZE 8192 22 + #define QAT_LZ4S_MAX_OUTPUT_SIZE QAT_ZSTD_SCRATCH_SIZE 23 + #define QAT_MAX_SEQUENCES (128 * 1024) 17 24 18 25 static DEFINE_MUTEX(algs_lock); 19 - static unsigned int active_devs; 26 + static unsigned int active_devs_deflate; 27 + static unsigned int active_devs_lz4s; 28 + static unsigned int active_devs_zstd; 29 + 30 + struct qat_zstd_scratch { 31 + size_t cctx_buffer_size; 32 + void *lz4s; 33 + void *literals; 34 + void *out_seqs; 35 + void *workspace; 36 + ZSTD_CCtx *ctx; 37 + }; 38 + 39 + static void *qat_zstd_alloc_scratch(void) 40 + { 41 + struct qat_zstd_scratch *scratch; 42 + ZSTD_parameters params; 43 + size_t cctx_size; 44 + ZSTD_CCtx *ctx; 45 + size_t zret; 46 + int ret; 47 + 48 + ret = -ENOMEM; 49 + scratch = kzalloc_obj(*scratch); 50 + if (!scratch) 51 + return ERR_PTR(ret); 52 + 53 + scratch->lz4s = kvmalloc(QAT_ZSTD_SCRATCH_SIZE, GFP_KERNEL); 54 + if (!scratch->lz4s) 55 + goto error; 56 + 57 + scratch->literals = kvmalloc(QAT_ZSTD_SCRATCH_SIZE, GFP_KERNEL); 58 + if (!scratch->literals) 59 + goto error; 60 + 61 + scratch->out_seqs = kvcalloc(QAT_MAX_SEQUENCES, sizeof(ZSTD_Sequence), 62 + GFP_KERNEL); 63 + if (!scratch->out_seqs) 64 + goto error; 65 + 66 + params = zstd_get_params(zstd_max_clevel(), QAT_ZSTD_SCRATCH_SIZE); 67 + cctx_size = zstd_cctx_workspace_bound(&params.cParams); 68 + 69 + scratch->workspace = kvmalloc(cctx_size, GFP_KERNEL | __GFP_ZERO); 70 + if (!scratch->workspace) 71 + goto error; 72 + 73 + ret = -EINVAL; 74 + ctx = zstd_init_cctx(scratch->workspace, cctx_size); 75 + if (!ctx) 76 + goto error; 77 + 78 + scratch->ctx = ctx; 79 + scratch->cctx_buffer_size = cctx_size; 80 + 81 + zret = zstd_cctx_set_param(ctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters); 82 + if (zstd_is_error(zret)) 83 + goto error; 84 + 85 + return scratch; 86 + 87 + error: 88 + kvfree(scratch->lz4s); 89 + kvfree(scratch->literals); 90 + kvfree(scratch->out_seqs); 91 + kvfree(scratch->workspace); 92 + kfree(scratch); 93 + return ERR_PTR(ret); 94 + } 95 + 96 + static void qat_zstd_free_scratch(void *ctx) 97 + { 98 + struct qat_zstd_scratch *scratch = ctx; 99 + 100 + if (!scratch) 101 + return; 102 + 103 + kvfree(scratch->lz4s); 104 + kvfree(scratch->literals); 105 + kvfree(scratch->out_seqs); 106 + kvfree(scratch->workspace); 107 + kfree(scratch); 108 + } 109 + 110 + static struct crypto_acomp_streams qat_zstd_streams = { 111 + .alloc_ctx = qat_zstd_alloc_scratch, 112 + .free_ctx = qat_zstd_free_scratch, 113 + }; 20 114 21 115 enum direction { 22 116 DECOMPRESSION = 0, ··· 120 24 121 25 struct qat_compression_req; 122 26 27 + struct qat_callback_params { 28 + unsigned int produced; 29 + unsigned int dlen; 30 + bool plain; 31 + }; 32 + 123 33 struct qat_compression_ctx { 124 34 u8 comp_ctx[QAT_COMP_CTX_SIZE]; 125 35 struct qat_compression_instance *inst; 126 - int (*qat_comp_callback)(struct qat_compression_req *qat_req, void *resp); 36 + int (*qat_comp_callback)(struct qat_compression_req *qat_req, void *resp, 37 + struct qat_callback_params *params); 38 + struct crypto_acomp *ftfm; 127 39 }; 128 40 129 41 struct qat_compression_req { ··· 166 62 struct adf_accel_dev *accel_dev = ctx->inst->accel_dev; 167 63 struct crypto_acomp *tfm = crypto_acomp_reqtfm(areq); 168 64 struct qat_compression_instance *inst = ctx->inst; 65 + struct qat_callback_params params = { }; 169 66 int consumed, produced; 170 67 s8 cmp_err, xlt_err; 171 68 int res = -EBADMSG; ··· 181 76 consumed = qat_comp_get_consumed_ctr(resp); 182 77 produced = qat_comp_get_produced_ctr(resp); 183 78 79 + /* Cache parameters for algorithm specific callback */ 80 + params.produced = produced; 81 + params.dlen = areq->dlen; 82 + 184 83 dev_dbg(&GET_DEV(accel_dev), 185 84 "[%s][%s][%s] slen = %8d dlen = %8d consumed = %8d produced = %8d cmp_err = %3d xlt_err = %3d", 186 85 crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)), ··· 192 83 status ? "ERR" : "OK ", 193 84 areq->slen, areq->dlen, consumed, produced, cmp_err, xlt_err); 194 85 195 - areq->dlen = 0; 86 + if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) { 87 + if (cmp_err == ERR_CODE_OVERFLOW_ERROR || xlt_err == ERR_CODE_OVERFLOW_ERROR) 88 + res = -E2BIG; 196 89 197 - if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) 90 + areq->dlen = 0; 198 91 goto end; 92 + } 199 93 200 94 if (qat_req->dir == COMPRESSION) { 201 95 cnv = qat_comp_get_cmp_cnv_flag(resp); 202 96 if (unlikely(!cnv)) { 203 97 dev_err(&GET_DEV(accel_dev), 204 98 "Verified compression not supported\n"); 99 + areq->dlen = 0; 205 100 goto end; 206 101 } 207 102 ··· 215 102 dev_dbg(&GET_DEV(accel_dev), 216 103 "Actual buffer overflow: produced=%d, dlen=%d\n", 217 104 produced, qat_req->actual_dlen); 105 + 106 + res = -E2BIG; 107 + areq->dlen = 0; 218 108 goto end; 219 109 } 110 + 111 + params.plain = !!qat_comp_get_cmp_uncomp_flag(resp); 220 112 } 221 113 222 114 res = 0; 223 115 areq->dlen = produced; 224 116 225 117 if (ctx->qat_comp_callback) 226 - res = ctx->qat_comp_callback(qat_req, resp); 118 + res = ctx->qat_comp_callback(qat_req, resp, &params); 227 119 228 120 end: 229 121 qat_bl_free_bufl(accel_dev, &qat_req->buf); 230 122 acomp_request_complete(areq, res); 123 + qat_alg_send_backlog(qat_req->alg_req.backlog); 231 124 } 232 125 233 126 void qat_comp_alg_callback(void *resp) 234 127 { 235 128 struct qat_compression_req *qat_req = 236 129 (void *)(__force long)qat_comp_get_opaque(resp); 237 - struct qat_instance_backlog *backlog = qat_req->alg_req.backlog; 238 130 239 131 qat_comp_generic_callback(qat_req, resp); 240 - 241 - qat_alg_send_backlog(backlog); 242 132 } 243 133 244 - static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm) 134 + static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm, int alg) 245 135 { 246 136 struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm); 247 137 struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); ··· 257 141 node = tfm->node; 258 142 259 143 memset(ctx, 0, sizeof(*ctx)); 260 - inst = qat_compression_get_instance_node(node); 144 + inst = qat_compression_get_instance_node(node, alg); 261 145 if (!inst) 262 146 return -EINVAL; 263 147 ctx->inst = inst; 264 148 265 - ret = qat_comp_build_ctx(inst->accel_dev, ctx->comp_ctx, QAT_DEFLATE); 149 + ret = qat_comp_build_ctx(inst->accel_dev, ctx->comp_ctx, alg); 266 150 if (ret) { 267 151 qat_compression_put_instance(inst); 268 152 memset(ctx, 0, sizeof(*ctx)); 269 153 } 270 154 271 155 return ret; 156 + } 157 + 158 + static int qat_comp_alg_deflate_init_tfm(struct crypto_acomp *acomp_tfm) 159 + { 160 + return qat_comp_alg_init_tfm(acomp_tfm, QAT_DEFLATE); 272 161 } 273 162 274 163 static void qat_comp_alg_exit_tfm(struct crypto_acomp *acomp_tfm) ··· 358 237 return qat_comp_alg_compress_decompress(req, DECOMPRESSION, 0, 0, 0, 0); 359 238 } 360 239 361 - static struct acomp_alg qat_acomp[] = { { 240 + static int qat_comp_alg_zstd_decompress(struct acomp_req *req) 241 + { 242 + struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(req); 243 + struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm); 244 + struct acomp_req *nreq = acomp_request_ctx(req); 245 + zstd_frame_header header; 246 + void *buffer; 247 + size_t zret; 248 + int ret; 249 + 250 + buffer = kmap_local_page(sg_page(req->src)) + req->src->offset; 251 + zret = zstd_get_frame_header(&header, buffer, req->src->length); 252 + kunmap_local(buffer); 253 + if (zret) { 254 + dev_err(&GET_DEV(ctx->inst->accel_dev), 255 + "ZSTD-compressed data has an incomplete frame header\n"); 256 + return -EINVAL; 257 + } 258 + 259 + if (header.windowSize > QAT_ZSTD_MAX_BLOCK_SIZE || 260 + header.frameContentSize >= QAT_ZSTD_MAX_CONTENT_SIZE) { 261 + dev_dbg(&GET_DEV(ctx->inst->accel_dev), "Window size=0x%llx\n", 262 + header.windowSize); 263 + 264 + memcpy(nreq, req, sizeof(*req)); 265 + acomp_request_set_tfm(nreq, ctx->ftfm); 266 + 267 + ret = crypto_acomp_decompress(nreq); 268 + req->dlen = nreq->dlen; 269 + 270 + return ret; 271 + } 272 + 273 + return qat_comp_alg_compress_decompress(req, DECOMPRESSION, 0, 0, 0, 0); 274 + } 275 + 276 + static int qat_comp_lz4s_zstd_callback(struct qat_compression_req *qat_req, void *resp, 277 + struct qat_callback_params *params) 278 + { 279 + struct qat_compression_ctx *qat_ctx = qat_req->qat_compression_ctx; 280 + struct acomp_req *areq = qat_req->acompress_req; 281 + struct qat_zstd_scratch *scratch; 282 + struct crypto_acomp_stream *s; 283 + unsigned int lit_len = 0; 284 + ZSTD_Sequence *out_seqs; 285 + void *lz4s, *zstd; 286 + size_t comp_size; 287 + ZSTD_CCtx *ctx; 288 + void *literals; 289 + int seq_count; 290 + int ret = 0; 291 + 292 + if (params->produced + QAT_ZSTD_LIT_COPY_LEN > QAT_ZSTD_SCRATCH_SIZE) { 293 + dev_dbg(&GET_DEV(qat_ctx->inst->accel_dev), 294 + "LZ4s-ZSTD: produced size (%u) + COPY_SIZE > QAT_ZSTD_SCRATCH_SIZE (%u)\n", 295 + params->produced, QAT_ZSTD_SCRATCH_SIZE); 296 + areq->dlen = 0; 297 + return -E2BIG; 298 + } 299 + 300 + s = crypto_acomp_lock_stream_bh(&qat_zstd_streams); 301 + scratch = s->ctx; 302 + 303 + lz4s = scratch->lz4s; 304 + zstd = lz4s; /* Output buffer is same as lz4s */ 305 + out_seqs = scratch->out_seqs; 306 + ctx = scratch->ctx; 307 + literals = scratch->literals; 308 + 309 + if (likely(!params->plain)) { 310 + if (likely(sg_nents(areq->dst) == 1)) { 311 + zstd = sg_virt(areq->dst); 312 + lz4s = zstd; 313 + } else { 314 + memcpy_from_sglist(lz4s, areq->dst, 0, params->produced); 315 + } 316 + 317 + seq_count = qat_alg_dec_lz4s(out_seqs, QAT_MAX_SEQUENCES, lz4s, 318 + params->produced, literals, &lit_len); 319 + if (seq_count < 0) { 320 + ret = seq_count; 321 + comp_size = 0; 322 + goto out; 323 + } 324 + } else { 325 + out_seqs[0].litLength = areq->slen; 326 + out_seqs[0].offset = 0; 327 + out_seqs[0].matchLength = 0; 328 + 329 + seq_count = 1; 330 + } 331 + 332 + comp_size = zstd_compress_sequences_and_literals(ctx, zstd, params->dlen, 333 + out_seqs, seq_count, 334 + literals, lit_len, 335 + QAT_ZSTD_SCRATCH_SIZE, 336 + areq->slen); 337 + if (zstd_is_error(comp_size)) { 338 + if (comp_size == ZSTD_error_cannotProduce_uncompressedBlock) 339 + ret = -E2BIG; 340 + else 341 + ret = -EOPNOTSUPP; 342 + 343 + comp_size = 0; 344 + goto out; 345 + } 346 + 347 + if (comp_size > params->dlen) { 348 + dev_dbg(&GET_DEV(qat_ctx->inst->accel_dev), 349 + "LZ4s-ZSTD: compressed_size (%u) > output buffer size (%u)\n", 350 + (unsigned int)comp_size, params->dlen); 351 + ret = -EOVERFLOW; 352 + goto out; 353 + } 354 + 355 + if (unlikely(sg_nents(areq->dst) != 1)) 356 + memcpy_to_sglist(areq->dst, 0, zstd, comp_size); 357 + 358 + out: 359 + areq->dlen = comp_size; 360 + crypto_acomp_unlock_stream_bh(s); 361 + 362 + return ret; 363 + } 364 + 365 + static int qat_comp_alg_lz4s_zstd_init_tfm(struct crypto_acomp *acomp_tfm) 366 + { 367 + struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm); 368 + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); 369 + int reqsize; 370 + int ret; 371 + 372 + /* qat_comp_alg_init_tfm() wipes out the ctx */ 373 + ret = qat_comp_alg_init_tfm(acomp_tfm, QAT_LZ4S); 374 + if (ret) 375 + return ret; 376 + 377 + ctx->ftfm = crypto_alloc_acomp_node("zstd", 0, CRYPTO_ALG_NEED_FALLBACK, 378 + tfm->node); 379 + if (IS_ERR(ctx->ftfm)) { 380 + qat_comp_alg_exit_tfm(acomp_tfm); 381 + return PTR_ERR(ctx->ftfm); 382 + } 383 + 384 + reqsize = max(sizeof(struct qat_compression_req), 385 + sizeof(struct acomp_req) + crypto_acomp_reqsize(ctx->ftfm)); 386 + 387 + acomp_tfm->reqsize = reqsize; 388 + 389 + ctx->qat_comp_callback = qat_comp_lz4s_zstd_callback; 390 + 391 + return 0; 392 + } 393 + 394 + static int qat_comp_alg_zstd_init_tfm(struct crypto_acomp *acomp_tfm) 395 + { 396 + struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm); 397 + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); 398 + int reqsize; 399 + int ret; 400 + 401 + /* qat_comp_alg_init_tfm() wipes out the ctx */ 402 + ret = qat_comp_alg_init_tfm(acomp_tfm, QAT_ZSTD); 403 + if (ret) 404 + return ret; 405 + 406 + ctx->ftfm = crypto_alloc_acomp_node("zstd", 0, CRYPTO_ALG_NEED_FALLBACK, 407 + tfm->node); 408 + if (IS_ERR(ctx->ftfm)) { 409 + qat_comp_alg_exit_tfm(acomp_tfm); 410 + return PTR_ERR(ctx->ftfm); 411 + } 412 + 413 + reqsize = max(sizeof(struct qat_compression_req), 414 + sizeof(struct acomp_req) + crypto_acomp_reqsize(ctx->ftfm)); 415 + 416 + acomp_tfm->reqsize = reqsize; 417 + 418 + return 0; 419 + } 420 + 421 + static void qat_comp_alg_zstd_exit_tfm(struct crypto_acomp *acomp_tfm) 422 + { 423 + struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm); 424 + 425 + if (ctx->ftfm) 426 + crypto_free_acomp(ctx->ftfm); 427 + 428 + qat_comp_alg_exit_tfm(acomp_tfm); 429 + } 430 + 431 + static int qat_comp_alg_lz4s_zstd_compress(struct acomp_req *req) 432 + { 433 + struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(req); 434 + struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm); 435 + struct acomp_req *nreq = acomp_request_ctx(req); 436 + int ret; 437 + 438 + if (req->slen >= QAT_LZ4S_MIN_INPUT_SIZE && req->dlen >= QAT_LZ4S_MIN_INPUT_SIZE && 439 + req->slen <= QAT_LZ4S_MAX_OUTPUT_SIZE && req->dlen <= QAT_LZ4S_MAX_OUTPUT_SIZE) 440 + return qat_comp_alg_compress(req); 441 + 442 + memcpy(nreq, req, sizeof(*req)); 443 + acomp_request_set_tfm(nreq, ctx->ftfm); 444 + 445 + ret = crypto_acomp_compress(nreq); 446 + req->dlen = nreq->dlen; 447 + 448 + return ret; 449 + } 450 + 451 + static int qat_comp_alg_sw_decompress(struct acomp_req *req) 452 + { 453 + struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(req); 454 + struct qat_compression_ctx *ctx = acomp_tfm_ctx(acomp_tfm); 455 + struct acomp_req *nreq = acomp_request_ctx(req); 456 + int ret; 457 + 458 + memcpy(nreq, req, sizeof(*req)); 459 + acomp_request_set_tfm(nreq, ctx->ftfm); 460 + 461 + ret = crypto_acomp_decompress(nreq); 462 + req->dlen = nreq->dlen; 463 + 464 + return ret; 465 + } 466 + 467 + static struct acomp_alg qat_acomp_deflate[] = { { 362 468 .base = { 363 469 .cra_name = "deflate", 364 470 .cra_driver_name = "qat_deflate", ··· 595 247 .cra_reqsize = sizeof(struct qat_compression_req), 596 248 .cra_module = THIS_MODULE, 597 249 }, 598 - .init = qat_comp_alg_init_tfm, 250 + .init = qat_comp_alg_deflate_init_tfm, 599 251 .exit = qat_comp_alg_exit_tfm, 600 252 .compress = qat_comp_alg_compress, 601 253 .decompress = qat_comp_alg_decompress, 602 254 }}; 603 255 604 - int qat_comp_algs_register(void) 256 + static struct acomp_alg qat_acomp_zstd_lz4s = { 257 + .base = { 258 + .cra_name = "zstd", 259 + .cra_driver_name = "qat_zstd", 260 + .cra_priority = 4001, 261 + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | 262 + CRYPTO_ALG_NEED_FALLBACK, 263 + .cra_reqsize = sizeof(struct qat_compression_req), 264 + .cra_ctxsize = sizeof(struct qat_compression_ctx), 265 + .cra_module = THIS_MODULE, 266 + }, 267 + .init = qat_comp_alg_lz4s_zstd_init_tfm, 268 + .exit = qat_comp_alg_zstd_exit_tfm, 269 + .compress = qat_comp_alg_lz4s_zstd_compress, 270 + .decompress = qat_comp_alg_sw_decompress, 271 + }; 272 + 273 + static struct acomp_alg qat_acomp_zstd_native = { 274 + .base = { 275 + .cra_name = "zstd", 276 + .cra_driver_name = "qat_zstd", 277 + .cra_priority = 4001, 278 + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | 279 + CRYPTO_ALG_NEED_FALLBACK, 280 + .cra_reqsize = sizeof(struct qat_compression_req), 281 + .cra_ctxsize = sizeof(struct qat_compression_ctx), 282 + .cra_module = THIS_MODULE, 283 + }, 284 + .init = qat_comp_alg_zstd_init_tfm, 285 + .exit = qat_comp_alg_zstd_exit_tfm, 286 + .compress = qat_comp_alg_compress, 287 + .decompress = qat_comp_alg_zstd_decompress, 288 + }; 289 + 290 + static int qat_comp_algs_register_deflate(void) 605 291 { 606 292 int ret = 0; 607 293 608 294 mutex_lock(&algs_lock); 609 - if (++active_devs == 1) 610 - ret = crypto_register_acomps(qat_acomp, ARRAY_SIZE(qat_acomp)); 295 + if (++active_devs_deflate == 1) { 296 + ret = crypto_register_acomps(qat_acomp_deflate, 297 + ARRAY_SIZE(qat_acomp_deflate)); 298 + if (ret) 299 + active_devs_deflate--; 300 + } 611 301 mutex_unlock(&algs_lock); 302 + 612 303 return ret; 613 304 } 614 305 615 - void qat_comp_algs_unregister(void) 306 + static void qat_comp_algs_unregister_deflate(void) 616 307 { 617 308 mutex_lock(&algs_lock); 618 - if (--active_devs == 0) 619 - crypto_unregister_acomps(qat_acomp, ARRAY_SIZE(qat_acomp)); 309 + if (--active_devs_deflate == 0) 310 + crypto_unregister_acomps(qat_acomp_deflate, ARRAY_SIZE(qat_acomp_deflate)); 620 311 mutex_unlock(&algs_lock); 312 + } 313 + 314 + static int qat_comp_algs_register_lz4s(void) 315 + { 316 + int ret = 0; 317 + 318 + mutex_lock(&algs_lock); 319 + if (++active_devs_lz4s == 1) { 320 + ret = crypto_acomp_alloc_streams(&qat_zstd_streams); 321 + if (ret) { 322 + active_devs_lz4s--; 323 + goto unlock; 324 + } 325 + 326 + ret = crypto_register_acomp(&qat_acomp_zstd_lz4s); 327 + if (ret) { 328 + crypto_acomp_free_streams(&qat_zstd_streams); 329 + active_devs_lz4s--; 330 + } 331 + } 332 + unlock: 333 + mutex_unlock(&algs_lock); 334 + 335 + return ret; 336 + } 337 + 338 + static void qat_comp_algs_unregister_lz4s(void) 339 + { 340 + mutex_lock(&algs_lock); 341 + if (--active_devs_lz4s == 0) { 342 + crypto_unregister_acomp(&qat_acomp_zstd_lz4s); 343 + crypto_acomp_free_streams(&qat_zstd_streams); 344 + } 345 + mutex_unlock(&algs_lock); 346 + } 347 + 348 + static int qat_comp_algs_register_zstd(void) 349 + { 350 + int ret = 0; 351 + 352 + mutex_lock(&algs_lock); 353 + if (++active_devs_zstd == 1) { 354 + ret = crypto_register_acomp(&qat_acomp_zstd_native); 355 + if (ret) 356 + active_devs_zstd--; 357 + } 358 + mutex_unlock(&algs_lock); 359 + 360 + return ret; 361 + } 362 + 363 + static void qat_comp_algs_unregister_zstd(void) 364 + { 365 + mutex_lock(&algs_lock); 366 + if (--active_devs_zstd == 0) 367 + crypto_unregister_acomp(&qat_acomp_zstd_native); 368 + mutex_unlock(&algs_lock); 369 + } 370 + 371 + int qat_comp_algs_register(u32 caps) 372 + { 373 + int ret; 374 + 375 + ret = qat_comp_algs_register_deflate(); 376 + if (ret) 377 + return ret; 378 + 379 + if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S) { 380 + ret = qat_comp_algs_register_lz4s(); 381 + if (ret) 382 + goto err_unregister_deflate; 383 + } 384 + 385 + if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD) { 386 + ret = qat_comp_algs_register_zstd(); 387 + if (ret) 388 + goto err_unregister_lz4s; 389 + } 390 + 391 + return ret; 392 + 393 + err_unregister_lz4s: 394 + if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S) 395 + qat_comp_algs_unregister_lz4s(); 396 + err_unregister_deflate: 397 + qat_comp_algs_unregister_deflate(); 398 + 399 + return ret; 400 + } 401 + 402 + void qat_comp_algs_unregister(u32 caps) 403 + { 404 + qat_comp_algs_unregister_deflate(); 405 + 406 + if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S) 407 + qat_comp_algs_unregister_lz4s(); 408 + 409 + if (caps & ADF_ACCEL_CAPABILITIES_EXT_ZSTD) 410 + qat_comp_algs_unregister_zstd(); 621 411 }
+9
drivers/crypto/intel/qat/qat_common/qat_comp_req.h
··· 23 23 fw_req->comn_mid.opaque_data = opaque; 24 24 req_pars->comp_len = slen; 25 25 req_pars->out_buffer_sz = dlen; 26 + fw_req->u3.asb_threshold.asb_value *= slen >> 4; 26 27 } 27 28 28 29 static inline void qat_comp_create_compression_req(void *ctx, void *req, ··· 109 108 u8 flags = qat_resp->comn_resp.hdr_flags; 110 109 111 110 return ICP_QAT_FW_COMN_HDR_CNV_FLAG_GET(flags); 111 + } 112 + 113 + static inline u8 qat_comp_get_cmp_uncomp_flag(void *resp) 114 + { 115 + struct icp_qat_fw_comp_resp *qat_resp = resp; 116 + u8 flags = qat_resp->comn_resp.hdr_flags; 117 + 118 + return ICP_QAT_FW_COMN_HDR_ST_BLK_FLAG_GET(flags); 112 119 } 113 120 114 121 #endif
+165
drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright(c) 2026 Intel Corporation */ 3 + #include <linux/errno.h> 4 + #include <linux/printk.h> 5 + #include <linux/string.h> 6 + #include <linux/unaligned.h> 7 + #include <linux/zstd.h> 8 + 9 + #include "qat_comp_zstd_utils.h" 10 + 11 + #define ML_BITS 4 12 + #define ML_MASK ((1U << ML_BITS) - 1) 13 + #define RUN_BITS (8 - ML_BITS) 14 + #define RUN_MASK ((1U << RUN_BITS) - 1) 15 + #define LZ4S_MINMATCH 2 16 + 17 + /* 18 + * ZSTD blocks can decompress to at most min(windowSize, 128KB) bytes. 19 + * Insert explicit block delimiters to keep blocks within this limit. 20 + */ 21 + #define QAT_ZSTD_BLOCK_MAX ZSTD_BLOCKSIZE_MAX 22 + 23 + static int emit_delimiter(ZSTD_Sequence *out_seqs, size_t *seqs_idx, 24 + size_t out_seqs_capacity, unsigned int lz4s_buff_size) 25 + { 26 + if (*seqs_idx >= out_seqs_capacity - 1) { 27 + pr_debug("QAT ZSTD: sequence overflow (seqs_idx:%zu, capacity:%zu, lz4s_size:%u)\n", 28 + *seqs_idx, out_seqs_capacity, lz4s_buff_size); 29 + return -EOVERFLOW; 30 + } 31 + 32 + out_seqs[*seqs_idx].offset = 0; 33 + out_seqs[*seqs_idx].litLength = 0; 34 + out_seqs[*seqs_idx].matchLength = 0; 35 + (*seqs_idx)++; 36 + 37 + return 0; 38 + } 39 + 40 + int qat_alg_dec_lz4s(ZSTD_Sequence *out_seqs, size_t out_seqs_capacity, 41 + unsigned char *lz4s_buff, unsigned int lz4s_buff_size, 42 + unsigned char *literals, unsigned int *lit_len) 43 + { 44 + unsigned char *end_ip = lz4s_buff + lz4s_buff_size; 45 + unsigned char *start, *dest, *dest_end; 46 + unsigned int hist_literal_len = 0; 47 + unsigned char *ip = lz4s_buff; 48 + size_t block_decomp_size = 0; 49 + size_t seqs_idx = 0; 50 + int ret; 51 + 52 + *lit_len = 0; 53 + 54 + if (!lz4s_buff_size) 55 + return 0; 56 + 57 + while (ip < end_ip) { 58 + size_t literal_len = 0, match_len = 0; 59 + const unsigned int token = *ip++; 60 + size_t length = 0; 61 + size_t offset = 0; 62 + 63 + /* Get literal length */ 64 + length = token >> ML_BITS; 65 + if (length == RUN_MASK) { 66 + unsigned int s; 67 + 68 + do { 69 + s = *ip++; 70 + length += s; 71 + } while (s == 255); 72 + } 73 + 74 + literal_len = length; 75 + 76 + start = ip; 77 + dest = literals; 78 + dest_end = literals + length; 79 + 80 + do { 81 + memcpy(dest, start, QAT_ZSTD_LIT_COPY_LEN); 82 + dest += QAT_ZSTD_LIT_COPY_LEN; 83 + start += QAT_ZSTD_LIT_COPY_LEN; 84 + } while (dest < dest_end); 85 + 86 + literals += length; 87 + *lit_len += length; 88 + 89 + ip += length; 90 + if (ip == end_ip) { 91 + literal_len += hist_literal_len; 92 + /* 93 + * If adding trailing literals would overflow the 94 + * current block, close it first. 95 + */ 96 + if (block_decomp_size + literal_len > QAT_ZSTD_BLOCK_MAX) { 97 + ret = emit_delimiter(out_seqs, &seqs_idx, 98 + out_seqs_capacity, 99 + lz4s_buff_size); 100 + if (ret) 101 + return ret; 102 + } 103 + out_seqs[seqs_idx].litLength = literal_len; 104 + out_seqs[seqs_idx].offset = offset; 105 + out_seqs[seqs_idx].matchLength = match_len; 106 + break; 107 + } 108 + 109 + offset = get_unaligned_le16(ip); 110 + ip += 2; 111 + 112 + length = token & ML_MASK; 113 + if (length == ML_MASK) { 114 + unsigned int s; 115 + 116 + do { 117 + s = *ip++; 118 + length += s; 119 + } while (s == 255); 120 + } 121 + if (length != 0) { 122 + length += LZ4S_MINMATCH; 123 + match_len = (unsigned short)length; 124 + literal_len += hist_literal_len; 125 + 126 + /* 127 + * If this sequence would push the current block past 128 + * the ZSTD maximum, close the block first. 129 + */ 130 + if (block_decomp_size + literal_len + match_len > QAT_ZSTD_BLOCK_MAX) { 131 + ret = emit_delimiter(out_seqs, &seqs_idx, 132 + out_seqs_capacity, 133 + lz4s_buff_size); 134 + if (ret) 135 + return ret; 136 + 137 + block_decomp_size = 0; 138 + } 139 + 140 + out_seqs[seqs_idx].offset = offset; 141 + out_seqs[seqs_idx].litLength = literal_len; 142 + out_seqs[seqs_idx].matchLength = match_len; 143 + hist_literal_len = 0; 144 + seqs_idx++; 145 + if (seqs_idx >= out_seqs_capacity - 1) { 146 + pr_debug("QAT ZSTD: sequence overflow (seqs_idx:%zu, capacity:%zu, lz4s_size:%u)\n", 147 + seqs_idx, out_seqs_capacity, lz4s_buff_size); 148 + return -EOVERFLOW; 149 + } 150 + 151 + block_decomp_size += literal_len + match_len; 152 + } else { 153 + if (literal_len > 0) { 154 + /* 155 + * When match length is 0, the literal length needs 156 + * to be temporarily stored and processed together 157 + * with the next data block. 158 + */ 159 + hist_literal_len += literal_len; 160 + } 161 + } 162 + } 163 + 164 + return seqs_idx + 1; 165 + }
+13
drivers/crypto/intel/qat/qat_common/qat_comp_zstd_utils.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* Copyright(c) 2026 Intel Corporation */ 3 + #ifndef QAT_COMP_ZSTD_UTILS_H_ 4 + #define QAT_COMP_ZSTD_UTILS_H_ 5 + #include <linux/zstd_lib.h> 6 + 7 + #define QAT_ZSTD_LIT_COPY_LEN 8 8 + 9 + int qat_alg_dec_lz4s(ZSTD_Sequence *out_seqs, size_t out_seqs_capacity, 10 + unsigned char *lz4s_buff, unsigned int lz4s_buff_size, 11 + unsigned char *literals, unsigned int *lit_len); 12 + 13 + #endif
+22 -1
drivers/crypto/intel/qat/qat_common/qat_compression.c
··· 46 46 return 0; 47 47 } 48 48 49 - struct qat_compression_instance *qat_compression_get_instance_node(int node) 49 + struct qat_compression_instance *qat_compression_get_instance_node(int node, int alg) 50 50 { 51 51 struct qat_compression_instance *inst = NULL; 52 + struct adf_hw_device_data *hw_data = NULL; 52 53 struct adf_accel_dev *accel_dev = NULL; 53 54 unsigned long best = ~0; 54 55 struct list_head *itr; 56 + u32 caps, mask; 55 57 56 58 list_for_each(itr, adf_devmgr_get_head()) { 57 59 struct adf_accel_dev *tmp_dev; ··· 62 60 63 61 tmp_dev = list_entry(itr, struct adf_accel_dev, list); 64 62 tmp_dev_node = dev_to_node(&GET_DEV(tmp_dev)); 63 + 64 + if (alg == QAT_ZSTD || alg == QAT_LZ4S) { 65 + hw_data = tmp_dev->hw_device; 66 + caps = hw_data->accel_capabilities_ext_mask; 67 + mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD | 68 + ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S; 69 + if (!(caps & mask)) 70 + continue; 71 + } 65 72 66 73 if ((node == tmp_dev_node || tmp_dev_node < 0) && 67 74 adf_dev_started(tmp_dev) && !list_empty(&tmp_dev->compression_list)) { ··· 89 78 struct adf_accel_dev *tmp_dev; 90 79 91 80 tmp_dev = list_entry(itr, struct adf_accel_dev, list); 81 + 82 + if (alg == QAT_ZSTD || alg == QAT_LZ4S) { 83 + hw_data = tmp_dev->hw_device; 84 + caps = hw_data->accel_capabilities_ext_mask; 85 + mask = ADF_ACCEL_CAPABILITIES_EXT_ZSTD | 86 + ADF_ACCEL_CAPABILITIES_EXT_ZSTD_LZ4S; 87 + if (!(caps & mask)) 88 + continue; 89 + } 90 + 92 91 if (adf_dev_started(tmp_dev) && 93 92 !list_empty(&tmp_dev->compression_list)) { 94 93 accel_dev = tmp_dev;