Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

RDMA/irdma: Add GEN3 CQP support with deferred completions

GEN3 introduces asynchronous handling of Control QP (CQP) operations to
minimize head-of-line blocking. Create the CQP using the updated GEN3-
specific descriptor fields and implement the necessary support for this
deferred completion mechanism.

Signed-off-by: Krzysztof Czurylo <krzysztof.czurylo@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Link: https://patch.msgid.link/20250827152545.2056-5-tatyana.e.nikolova@intel.com
Tested-by: Jacob Moroni <jmoroni@google.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Krzysztof Czurylo and committed by
Leon Romanovsky
c7db0abe 7d5a7cc7

+438 -15
+250 -3
drivers/infiniband/hw/irdma/ctrl.c
··· 2742 2742 } 2743 2743 2744 2744 /** 2745 + * irdma_sc_cqp_def_cmpl_ae_handler - remove completed requests from pending list 2746 + * @dev: sc device struct 2747 + * @info: AE entry info 2748 + * @first: true if this is the first call to this handler for given AEQE 2749 + * @scratch: (out) scratch entry pointer 2750 + * @sw_def_info: (in/out) SW ticket value for this AE 2751 + * 2752 + * In case of AE_DEF_CMPL event, this function should be called in a loop 2753 + * until it returns NULL-ptr via scratch. 2754 + * For each call, it looks for a matching CQP request on pending list, 2755 + * removes it from the list and returns the pointer to the associated scratch 2756 + * entry. 2757 + * If this is the first call to this function for given AEQE, sw_def_info 2758 + * value is not used to find matching requests. Instead, it is populated 2759 + * with the value from the first matching cqp_request on the list. 2760 + * For subsequent calls, ooo_op->sw_def_info need to match the value passed 2761 + * by a caller. 2762 + * 2763 + * Return: scratch entry pointer for cqp_request to be released or NULL 2764 + * if no matching request is found. 2765 + */ 2766 + void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev, 2767 + struct irdma_aeqe_info *info, 2768 + bool first, u64 *scratch, 2769 + u32 *sw_def_info) 2770 + { 2771 + struct irdma_ooo_cqp_op *ooo_op; 2772 + unsigned long flags; 2773 + 2774 + *scratch = 0; 2775 + 2776 + spin_lock_irqsave(&dev->cqp->ooo_list_lock, flags); 2777 + list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) { 2778 + if (ooo_op->deferred && 2779 + ((first && ooo_op->def_info == info->def_info) || 2780 + (!first && ooo_op->sw_def_info == *sw_def_info))) { 2781 + *sw_def_info = ooo_op->sw_def_info; 2782 + *scratch = ooo_op->scratch; 2783 + 2784 + list_move(&ooo_op->list_entry, &dev->cqp->ooo_avail); 2785 + atomic64_inc(&dev->cqp->completed_ops); 2786 + 2787 + break; 2788 + } 2789 + } 2790 + spin_unlock_irqrestore(&dev->cqp->ooo_list_lock, flags); 2791 + 2792 + if (first && !*scratch) 2793 + ibdev_dbg(to_ibdev(dev), 2794 + "AEQ: deferred completion with unknown ticket: def_info 0x%x\n", 2795 + info->def_info); 2796 + } 2797 + 2798 + /** 2799 + * irdma_sc_cqp_cleanup_handler - remove requests from pending list 2800 + * @dev: sc device struct 2801 + * 2802 + * This function should be called in a loop from irdma_cleanup_pending_cqp_op. 2803 + * For each call, it returns first CQP request on pending list, removes it 2804 + * from the list and returns the pointer to the associated scratch entry. 2805 + * 2806 + * Return: scratch entry pointer for cqp_request to be released or NULL 2807 + * if pending list is empty. 2808 + */ 2809 + u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev) 2810 + { 2811 + struct irdma_ooo_cqp_op *ooo_op; 2812 + u64 scratch = 0; 2813 + 2814 + list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) { 2815 + scratch = ooo_op->scratch; 2816 + 2817 + list_del(&ooo_op->list_entry); 2818 + list_add(&ooo_op->list_entry, &dev->cqp->ooo_avail); 2819 + atomic64_inc(&dev->cqp->completed_ops); 2820 + 2821 + break; 2822 + } 2823 + 2824 + return scratch; 2825 + } 2826 + 2827 + /** 2745 2828 * irdma_cqp_poll_registers - poll cqp registers 2746 2829 * @cqp: struct for cqp hw 2747 2830 * @tail: wqtail register value ··· 3209 3126 int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, 3210 3127 struct irdma_cqp_init_info *info) 3211 3128 { 3129 + struct irdma_ooo_cqp_op *ooo_op; 3130 + u32 num_ooo_ops; 3212 3131 u8 hw_sq_size; 3213 3132 3214 3133 if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 || ··· 3241 3156 cqp->rocev2_rto_policy = info->rocev2_rto_policy; 3242 3157 cqp->protocol_used = info->protocol_used; 3243 3158 memcpy(&cqp->dcqcn_params, &info->dcqcn_params, sizeof(cqp->dcqcn_params)); 3159 + if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { 3160 + cqp->ooisc_blksize = info->ooisc_blksize; 3161 + cqp->rrsp_blksize = info->rrsp_blksize; 3162 + cqp->q1_blksize = info->q1_blksize; 3163 + cqp->xmit_blksize = info->xmit_blksize; 3164 + cqp->blksizes_valid = info->blksizes_valid; 3165 + cqp->ts_shift = info->ts_shift; 3166 + cqp->ts_override = info->ts_override; 3167 + cqp->en_fine_grained_timers = info->en_fine_grained_timers; 3168 + cqp->pe_en_vf_cnt = info->pe_en_vf_cnt; 3169 + cqp->ooo_op_array = info->ooo_op_array; 3170 + /* initialize the OOO lists */ 3171 + INIT_LIST_HEAD(&cqp->ooo_avail); 3172 + INIT_LIST_HEAD(&cqp->ooo_pnd); 3173 + if (cqp->ooo_op_array) { 3174 + /* Populate avail list entries */ 3175 + for (num_ooo_ops = 0, ooo_op = info->ooo_op_array; 3176 + num_ooo_ops < cqp->sq_size; 3177 + num_ooo_ops++, ooo_op++) 3178 + list_add(&ooo_op->list_entry, &cqp->ooo_avail); 3179 + } 3180 + } 3244 3181 info->dev->cqp = cqp; 3245 3182 3246 3183 IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size); 3184 + cqp->last_def_cmpl_ticket = 0; 3185 + cqp->sw_def_cmpl_ticket = 0; 3247 3186 cqp->requested_ops = 0; 3248 3187 atomic64_set(&cqp->completed_ops, 0); 3249 3188 /* for the cqp commands backlog. */ 3250 3189 INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head); 3251 3190 3252 3191 writel(0, cqp->dev->hw_regs[IRDMA_CQPTAIL]); 3253 - writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]); 3254 - writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]); 3192 + if (cqp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) { 3193 + writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]); 3194 + writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]); 3195 + } 3255 3196 3256 3197 ibdev_dbg(to_ibdev(cqp->dev), 3257 3198 "WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%p] cqp[%p] polarity[x%04x]\n", ··· 3309 3198 return -ENOMEM; 3310 3199 3311 3200 spin_lock_init(&cqp->dev->cqp_lock); 3201 + spin_lock_init(&cqp->ooo_list_lock); 3312 3202 3313 3203 temp = FIELD_PREP(IRDMA_CQPHC_SQSIZE, cqp->hw_sq_size) | 3314 3204 FIELD_PREP(IRDMA_CQPHC_SVER, cqp->struct_ver) | ··· 3321 3209 FIELD_PREP(IRDMA_CQPHC_PROTOCOL_USED, 3322 3210 cqp->protocol_used); 3323 3211 } 3212 + if (hw_rev >= IRDMA_GEN_3) 3213 + temp |= FIELD_PREP(IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS, 3214 + cqp->en_fine_grained_timers); 3324 3215 3325 3216 set_64bit_val(cqp->host_ctx, 0, temp); 3326 3217 set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa); 3327 3218 3328 3219 temp = FIELD_PREP(IRDMA_CQPHC_ENABLED_VFS, cqp->ena_vf_count) | 3329 3220 FIELD_PREP(IRDMA_CQPHC_HMC_PROFILE, cqp->hmc_profile); 3221 + 3222 + if (hw_rev >= IRDMA_GEN_3) 3223 + temp |= FIELD_PREP(IRDMA_CQPHC_OOISC_BLKSIZE, 3224 + cqp->ooisc_blksize) | 3225 + FIELD_PREP(IRDMA_CQPHC_RRSP_BLKSIZE, 3226 + cqp->rrsp_blksize) | 3227 + FIELD_PREP(IRDMA_CQPHC_Q1_BLKSIZE, cqp->q1_blksize) | 3228 + FIELD_PREP(IRDMA_CQPHC_XMIT_BLKSIZE, 3229 + cqp->xmit_blksize) | 3230 + FIELD_PREP(IRDMA_CQPHC_BLKSIZES_VALID, 3231 + cqp->blksizes_valid) | 3232 + FIELD_PREP(IRDMA_CQPHC_TIMESTAMP_OVERRIDE, 3233 + cqp->ts_override) | 3234 + FIELD_PREP(IRDMA_CQPHC_TS_SHIFT, cqp->ts_shift); 3330 3235 set_64bit_val(cqp->host_ctx, 16, temp); 3331 3236 set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp); 3332 3237 temp = FIELD_PREP(IRDMA_CQPHC_HW_MAJVER, cqp->hw_maj_ver) | ··· 3505 3376 } 3506 3377 3507 3378 /** 3379 + * irdma_sc_process_def_cmpl - process deferred or pending completion 3380 + * @cqp: CQP sc struct 3381 + * @info: CQP CQE info 3382 + * @wqe_idx: CQP WQE descriptor index 3383 + * @def_info: deferred op ticket value or out-of-order completion id 3384 + * @def_cmpl: true for deferred completion, false for pending (RCA) 3385 + */ 3386 + static void irdma_sc_process_def_cmpl(struct irdma_sc_cqp *cqp, 3387 + struct irdma_ccq_cqe_info *info, 3388 + u32 wqe_idx, u32 def_info, bool def_cmpl) 3389 + { 3390 + struct irdma_ooo_cqp_op *ooo_op; 3391 + unsigned long flags; 3392 + 3393 + /* Deferred and out-of-order completions share the same list of pending 3394 + * completions. Since the list can be also accessed from AE handler, 3395 + * it must be protected by a lock. 3396 + */ 3397 + spin_lock_irqsave(&cqp->ooo_list_lock, flags); 3398 + 3399 + /* For deferred completions bump up SW completion ticket value. */ 3400 + if (def_cmpl) { 3401 + cqp->last_def_cmpl_ticket = def_info; 3402 + cqp->sw_def_cmpl_ticket++; 3403 + } 3404 + if (!list_empty(&cqp->ooo_avail)) { 3405 + ooo_op = (struct irdma_ooo_cqp_op *) 3406 + list_entry(cqp->ooo_avail.next, 3407 + struct irdma_ooo_cqp_op, list_entry); 3408 + 3409 + list_del(&ooo_op->list_entry); 3410 + ooo_op->scratch = info->scratch; 3411 + ooo_op->def_info = def_info; 3412 + ooo_op->sw_def_info = cqp->sw_def_cmpl_ticket; 3413 + ooo_op->deferred = def_cmpl; 3414 + ooo_op->wqe_idx = wqe_idx; 3415 + /* Pending completions must be chronologically ordered, 3416 + * so adding at the end of list. 3417 + */ 3418 + list_add_tail(&ooo_op->list_entry, &cqp->ooo_pnd); 3419 + } 3420 + spin_unlock_irqrestore(&cqp->ooo_list_lock, flags); 3421 + 3422 + info->pending = true; 3423 + } 3424 + 3425 + /** 3426 + * irdma_sc_process_ooo_cmpl - process out-of-order (final) completion 3427 + * @cqp: CQP sc struct 3428 + * @info: CQP CQE info 3429 + * @def_info: out-of-order completion id 3430 + */ 3431 + static void irdma_sc_process_ooo_cmpl(struct irdma_sc_cqp *cqp, 3432 + struct irdma_ccq_cqe_info *info, 3433 + u32 def_info) 3434 + { 3435 + struct irdma_ooo_cqp_op *ooo_op_tmp; 3436 + struct irdma_ooo_cqp_op *ooo_op; 3437 + unsigned long flags; 3438 + 3439 + info->scratch = 0; 3440 + 3441 + spin_lock_irqsave(&cqp->ooo_list_lock, flags); 3442 + list_for_each_entry_safe(ooo_op, ooo_op_tmp, &cqp->ooo_pnd, 3443 + list_entry) { 3444 + if (!ooo_op->deferred && ooo_op->def_info == def_info) { 3445 + list_del(&ooo_op->list_entry); 3446 + info->scratch = ooo_op->scratch; 3447 + list_add(&ooo_op->list_entry, &cqp->ooo_avail); 3448 + break; 3449 + } 3450 + } 3451 + spin_unlock_irqrestore(&cqp->ooo_list_lock, flags); 3452 + 3453 + if (!info->scratch) 3454 + ibdev_dbg(to_ibdev(cqp->dev), 3455 + "CQP: DEBUG_FW_OOO out-of-order completion with unknown def_info = 0x%x\n", 3456 + def_info); 3457 + } 3458 + 3459 + /** 3508 3460 * irdma_sc_ccq_get_cqe_info - get ccq's cq entry 3509 3461 * @ccq: ccq sc struct 3510 3462 * @info: completion q entry to return ··· 3593 3383 int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, 3594 3384 struct irdma_ccq_cqe_info *info) 3595 3385 { 3386 + u32 def_info; 3387 + bool def_cmpl = false; 3388 + bool pend_cmpl = false; 3389 + bool ooo_final_cmpl = false; 3596 3390 u64 qp_ctx, temp, temp1; 3597 3391 __le64 *cqe; 3598 3392 struct irdma_sc_cqp *cqp; ··· 3604 3390 u32 error; 3605 3391 u8 polarity; 3606 3392 int ret_code = 0; 3393 + unsigned long flags; 3607 3394 3608 3395 if (ccq->cq_uk.avoid_mem_cflct) 3609 3396 cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk); ··· 3636 3421 3637 3422 get_64bit_val(cqe, 16, &temp1); 3638 3423 info->op_ret_val = (u32)FIELD_GET(IRDMA_CCQ_OPRETVAL, temp1); 3424 + if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { 3425 + def_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR && 3426 + info->min_err_code == IRDMA_CQPSQ_MIN_DEF_CMPL; 3427 + def_info = (u32)FIELD_GET(IRDMA_CCQ_DEFINFO, temp1); 3428 + 3429 + pend_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR && 3430 + info->min_err_code == IRDMA_CQPSQ_MIN_OOO_CMPL; 3431 + 3432 + ooo_final_cmpl = (bool)FIELD_GET(IRDMA_OOO_CMPL, temp); 3433 + 3434 + if (def_cmpl || pend_cmpl || ooo_final_cmpl) { 3435 + if (ooo_final_cmpl) 3436 + irdma_sc_process_ooo_cmpl(cqp, info, def_info); 3437 + else 3438 + irdma_sc_process_def_cmpl(cqp, info, wqe_idx, 3439 + def_info, def_cmpl); 3440 + } 3441 + } 3442 + 3639 3443 get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1); 3640 3444 info->op_code = (u8)FIELD_GET(IRDMA_CQPSQ_OPCODE, temp1); 3641 3445 info->cqp = cqp; ··· 3671 3437 3672 3438 dma_wmb(); /* make sure shadow area is updated before moving tail */ 3673 3439 3674 - IRDMA_RING_MOVE_TAIL(cqp->sq_ring); 3440 + spin_lock_irqsave(&cqp->dev->cqp_lock, flags); 3441 + if (!ooo_final_cmpl) 3442 + IRDMA_RING_MOVE_TAIL(cqp->sq_ring); 3443 + spin_unlock_irqrestore(&cqp->dev->cqp_lock, flags); 3444 + 3445 + /* Do not increment completed_ops counter on pending or deferred 3446 + * completions. 3447 + */ 3448 + if (pend_cmpl || def_cmpl) 3449 + return ret_code; 3675 3450 atomic64_inc(&cqp->completed_ops); 3676 3451 3677 3452 return ret_code; ··· 4364 4121 case IRDMA_AE_LCE_CQ_CATASTROPHIC: 4365 4122 info->cq = true; 4366 4123 info->compl_ctx = compl_ctx << 1; 4124 + ae_src = IRDMA_AE_SOURCE_RSVD; 4125 + break; 4126 + case IRDMA_AE_CQP_DEFERRED_COMPLETE: 4127 + info->def_info = info->wqe_idx; 4367 4128 ae_src = IRDMA_AE_SOURCE_RSVD; 4368 4129 break; 4369 4130 case IRDMA_AE_ROCE_EMPTY_MCG:
+15
drivers/infiniband/hw/irdma/defs.h
··· 367 367 #define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 368 368 #define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702 369 369 #define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900 370 + #define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901 370 371 371 372 #define FLD_LS_64(dev, val, field) \ 372 373 (((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M]) ··· 465 464 #define IRDMA_CQPHC_SVER GENMASK_ULL(31, 24) 466 465 #define IRDMA_CQPHC_SQBASE GENMASK_ULL(63, 9) 467 466 467 + #define IRDMA_CQPHC_TIMESTAMP_OVERRIDE BIT_ULL(5) 468 + #define IRDMA_CQPHC_TS_SHIFT GENMASK_ULL(12, 8) 469 + #define IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS BIT_ULL(0) 470 + 471 + #define IRDMA_CQPHC_OOISC_BLKSIZE GENMASK_ULL(63, 60) 472 + #define IRDMA_CQPHC_RRSP_BLKSIZE GENMASK_ULL(59, 56) 473 + #define IRDMA_CQPHC_Q1_BLKSIZE GENMASK_ULL(55, 52) 474 + #define IRDMA_CQPHC_XMIT_BLKSIZE GENMASK_ULL(51, 48) 475 + #define IRDMA_CQPHC_BLKSIZES_VALID BIT_ULL(4) 476 + 468 477 #define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0) 469 478 #define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0) 470 479 #define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0) ··· 487 476 #define IRDMA_CQ_QPCTX IRDMA_CQPHC_QPCTX 488 477 489 478 #define IRDMA_CCQ_OPRETVAL GENMASK_ULL(31, 0) 479 + 480 + #define IRDMA_CCQ_DEFINFO GENMASK_ULL(63, 32) 490 481 491 482 #define IRDMA_CQ_MINERR GENMASK_ULL(15, 0) 492 483 #define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16) ··· 721 708 722 709 #define IRDMA_CQPSQ_MIN_STAG_INVALID 0x0001 723 710 #define IRDMA_CQPSQ_MIN_SUSPEND_PND 0x0005 711 + #define IRDMA_CQPSQ_MIN_DEF_CMPL 0x0006 712 + #define IRDMA_CQPSQ_MIN_OOO_CMPL 0x0007 724 713 725 714 #define IRDMA_CQPSQ_MAJ_NO_ERROR 0x0000 726 715 #define IRDMA_CQPSQ_MAJ_OBJCACHE_ERROR 0xF000
+79 -10
drivers/infiniband/hw/irdma/hw.c
··· 208 208 } 209 209 210 210 /** 211 + * irdma_complete_cqp_request - perform post-completion cleanup 212 + * @cqp: device CQP 213 + * @cqp_request: CQP request 214 + * 215 + * Mark CQP request as done, wake up waiting thread or invoke 216 + * callback function and release/free CQP request. 217 + */ 218 + static void irdma_complete_cqp_request(struct irdma_cqp *cqp, 219 + struct irdma_cqp_request *cqp_request) 220 + { 221 + if (cqp_request->waiting) { 222 + WRITE_ONCE(cqp_request->request_done, true); 223 + wake_up(&cqp_request->waitq); 224 + } else if (cqp_request->callback_fcn) { 225 + cqp_request->callback_fcn(cqp_request); 226 + } 227 + irdma_put_cqp_request(cqp, cqp_request); 228 + } 229 + 230 + /** 231 + * irdma_process_ae_def_cmpl - handle IRDMA_AE_CQP_DEFERRED_COMPLETE event 232 + * @rf: RDMA PCI function 233 + * @info: AEQ entry info 234 + */ 235 + static void irdma_process_ae_def_cmpl(struct irdma_pci_f *rf, 236 + struct irdma_aeqe_info *info) 237 + { 238 + u32 sw_def_info; 239 + u64 scratch; 240 + 241 + irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); 242 + 243 + irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, true, 244 + &scratch, &sw_def_info); 245 + while (scratch) { 246 + struct irdma_cqp_request *cqp_request = 247 + (struct irdma_cqp_request *)(uintptr_t)scratch; 248 + 249 + irdma_complete_cqp_request(&rf->cqp, cqp_request); 250 + irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, false, 251 + &scratch, &sw_def_info); 252 + } 253 + } 254 + 255 + /** 211 256 * irdma_process_aeq - handle aeq events 212 257 * @rf: RDMA PCI function 213 258 */ ··· 314 269 spin_unlock_irqrestore(&iwqp->lock, flags); 315 270 ctx_info = &iwqp->ctx_info; 316 271 } else { 317 - if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR) 272 + if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR && 273 + info->ae_id != IRDMA_AE_CQP_DEFERRED_COMPLETE) 318 274 continue; 319 275 } 320 276 ··· 409 363 iwcq->ibcq.cq_context); 410 364 } 411 365 irdma_cq_rem_ref(&iwcq->ibcq); 366 + break; 367 + case IRDMA_AE_CQP_DEFERRED_COMPLETE: 368 + /* Remove completed CQP requests from pending list 369 + * and notify about those CQP ops completion. 370 + */ 371 + irdma_process_ae_def_cmpl(rf, info); 412 372 break; 413 373 case IRDMA_AE_RESET_NOT_SENT: 414 374 case IRDMA_AE_LLP_DOUBT_REACHABILITY: ··· 652 600 dma_free_coherent(dev->hw->device, cqp->sq.size, cqp->sq.va, 653 601 cqp->sq.pa); 654 602 cqp->sq.va = NULL; 603 + kfree(cqp->oop_op_array); 604 + cqp->oop_op_array = NULL; 655 605 kfree(cqp->scratch_array); 656 606 cqp->scratch_array = NULL; 657 607 kfree(cqp->cqp_requests); ··· 997 943 goto err_scratch; 998 944 } 999 945 946 + cqp->oop_op_array = kcalloc(sqsize, sizeof(*cqp->oop_op_array), 947 + GFP_KERNEL); 948 + if (!cqp->oop_op_array) { 949 + status = -ENOMEM; 950 + goto err_oop; 951 + } 952 + cqp_init_info.ooo_op_array = cqp->oop_op_array; 1000 953 dev->cqp = &cqp->sc_cqp; 1001 954 dev->cqp->dev = dev; 1002 955 cqp->sq.size = ALIGN(sizeof(struct irdma_cqp_sq_wqe) * sqsize, ··· 1040 979 case IRDMA_GEN_2: 1041 980 cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_2; 1042 981 break; 982 + case IRDMA_GEN_3: 983 + cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_3; 984 + cqp_init_info.ts_override = 1; 985 + break; 1043 986 } 1044 987 status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info); 1045 988 if (status) { ··· 1078 1013 cqp->sq.va, cqp->sq.pa); 1079 1014 cqp->sq.va = NULL; 1080 1015 err_sq: 1016 + kfree(cqp->oop_op_array); 1017 + cqp->oop_op_array = NULL; 1018 + err_oop: 1081 1019 kfree(cqp->scratch_array); 1082 1020 cqp->scratch_array = NULL; 1083 1021 err_scratch: ··· 2172 2104 cqp_request->compl_info.op_ret_val = info.op_ret_val; 2173 2105 cqp_request->compl_info.error = info.error; 2174 2106 2175 - if (cqp_request->waiting) { 2176 - WRITE_ONCE(cqp_request->request_done, true); 2177 - wake_up(&cqp_request->waitq); 2178 - irdma_put_cqp_request(&rf->cqp, cqp_request); 2179 - } else { 2180 - if (cqp_request->callback_fcn) 2181 - cqp_request->callback_fcn(cqp_request); 2182 - irdma_put_cqp_request(&rf->cqp, cqp_request); 2183 - } 2107 + /* 2108 + * If this is deferred or pending completion, then mark 2109 + * CQP request as pending to not block the CQ, but don't 2110 + * release CQP request, as it is still on the OOO list. 2111 + */ 2112 + if (info.pending) 2113 + cqp_request->pending = true; 2114 + else 2115 + irdma_complete_cqp_request(&rf->cqp, 2116 + cqp_request); 2184 2117 } 2185 2118 2186 2119 cqe_count++;
+2
drivers/infiniband/hw/irdma/main.h
··· 168 168 bool request_done; /* READ/WRITE_ONCE macros operate on it */ 169 169 bool waiting:1; 170 170 bool dynamic:1; 171 + bool pending:1; 171 172 }; 172 173 173 174 struct irdma_cqp { ··· 181 180 struct irdma_dma_mem host_ctx; 182 181 u64 *scratch_array; 183 182 struct irdma_cqp_request *cqp_requests; 183 + struct irdma_ooo_cqp_op *oop_op_array; 184 184 struct list_head cqp_avail_reqs; 185 185 struct list_head cqp_pending_reqs; 186 186 };
+1
drivers/infiniband/hw/irdma/protos.h
··· 10 10 #define ALL_TC2PFC 0xff 11 11 #define CQP_COMPL_WAIT_TIME_MS 10 12 12 #define CQP_TIMEOUT_THRESHOLD 500 13 + #define CQP_DEF_CMPL_TIMEOUT_THRESHOLD 2500 13 14 14 15 /* init operations */ 15 16 int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
+42 -1
drivers/infiniband/hw/irdma/type.h
··· 262 262 __le64 *host_ctx; 263 263 u64 *scratch_array; 264 264 u32 sq_size; 265 + struct irdma_ooo_cqp_op *ooo_op_array; 266 + u32 pe_en_vf_cnt; 265 267 u16 hw_maj_ver; 266 268 u16 hw_min_ver; 267 269 u8 struct_ver; 268 270 u8 hmc_profile; 269 271 u8 ena_vf_count; 270 272 u8 ceqs_per_vf; 273 + u8 ooisc_blksize; 274 + u8 rrsp_blksize; 275 + u8 q1_blksize; 276 + u8 xmit_blksize; 277 + u8 ts_override; 278 + u8 ts_shift; 279 + u8 en_fine_grained_timers; 280 + u8 blksizes_valid; 271 281 bool en_datacenter_tcp:1; 272 282 bool disable_packed:1; 273 283 bool rocev2_rto_policy:1; ··· 402 392 __le64 elem[IRDMA_CQP_WQE_SIZE]; 403 393 }; 404 394 395 + struct irdma_ooo_cqp_op { 396 + struct list_head list_entry; 397 + u64 scratch; 398 + u32 def_info; 399 + u32 sw_def_info; 400 + u32 wqe_idx; 401 + bool deferred:1; 402 + }; 403 + 405 404 struct irdma_sc_cqp { 405 + spinlock_t ooo_list_lock; /* protects list of pending completions */ 406 + struct list_head ooo_avail; 407 + struct list_head ooo_pnd; 408 + u32 last_def_cmpl_ticket; 409 + u32 sw_def_cmpl_ticket; 406 410 u32 size; 407 411 u64 sq_pa; 408 412 u64 host_ctx_pa; ··· 432 408 u64 *scratch_array; 433 409 u64 requested_ops; 434 410 atomic64_t completed_ops; 411 + struct irdma_ooo_cqp_op *ooo_op_array; 435 412 u32 cqp_id; 436 413 u32 sq_size; 414 + u32 pe_en_vf_cnt; 437 415 u32 hw_sq_size; 438 416 u16 hw_maj_ver; 439 417 u16 hw_min_ver; ··· 445 419 u8 ena_vf_count; 446 420 u8 timeout_count; 447 421 u8 ceqs_per_vf; 422 + u8 ooisc_blksize; 423 + u8 rrsp_blksize; 424 + u8 q1_blksize; 425 + u8 xmit_blksize; 426 + u8 ts_override; 427 + u8 ts_shift; 428 + u8 en_fine_grained_timers; 429 + u8 blksizes_valid; 448 430 bool en_datacenter_tcp:1; 449 431 bool disable_packed:1; 450 432 bool rocev2_rto_policy:1; ··· 757 723 u16 maj_err_code; 758 724 u16 min_err_code; 759 725 u8 op_code; 760 - bool error; 726 + bool error:1; 727 + bool pending:1; 761 728 }; 762 729 763 730 struct irdma_dcb_app_info { ··· 1033 998 struct irdma_aeqe_info { 1034 999 u64 compl_ctx; 1035 1000 u32 qp_cq_id; 1001 + u32 def_info; /* only valid for DEF_CMPL */ 1036 1002 u16 ae_id; 1037 1003 u16 wqe_idx; 1038 1004 u8 tcp_state; ··· 1278 1242 void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable); 1279 1243 void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout, 1280 1244 struct irdma_sc_dev *dev); 1245 + void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev, 1246 + struct irdma_aeqe_info *info, 1247 + bool first, u64 *scratch, 1248 + u32 *sw_def_info); 1249 + u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev); 1281 1250 int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err); 1282 1251 int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp); 1283 1252 int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
+49 -1
drivers/infiniband/hw/irdma/utils.c
··· 481 481 WRITE_ONCE(cqp_request->request_done, false); 482 482 cqp_request->callback_fcn = NULL; 483 483 cqp_request->waiting = false; 484 + cqp_request->pending = false; 484 485 485 486 spin_lock_irqsave(&cqp->req_lock, flags); 486 487 list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs); ··· 522 521 } 523 522 524 523 /** 524 + * irdma_cleanup_deferred_cqp_ops - clean-up cqp with no completions 525 + * @dev: sc_dev 526 + * @cqp: cqp 527 + */ 528 + static void irdma_cleanup_deferred_cqp_ops(struct irdma_sc_dev *dev, 529 + struct irdma_cqp *cqp) 530 + { 531 + u64 scratch; 532 + 533 + /* process all CQP requests with deferred/pending completions */ 534 + while ((scratch = irdma_sc_cqp_cleanup_handler(dev))) 535 + irdma_free_pending_cqp_request(cqp, (struct irdma_cqp_request *) 536 + (uintptr_t)scratch); 537 + } 538 + 539 + /** 525 540 * irdma_cleanup_pending_cqp_op - clean-up cqp with no 526 541 * completions 527 542 * @rf: RDMA PCI function ··· 550 533 struct cqp_cmds_info *pcmdinfo = NULL; 551 534 u32 i, pending_work, wqe_idx; 552 535 536 + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) 537 + irdma_cleanup_deferred_cqp_ops(dev, cqp); 553 538 pending_work = IRDMA_RING_USED_QUANTA(cqp->sc_cqp.sq_ring); 554 539 wqe_idx = IRDMA_RING_CURRENT_TAIL(cqp->sc_cqp.sq_ring); 555 540 for (i = 0; i < pending_work; i++) { ··· 571 552 } 572 553 } 573 554 555 + static int irdma_get_timeout_threshold(struct irdma_sc_dev *dev) 556 + { 557 + u16 time_s = dev->vc_caps.cqp_timeout_s; 558 + 559 + if (!time_s) 560 + return CQP_TIMEOUT_THRESHOLD; 561 + 562 + return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms; 563 + } 564 + 565 + static int irdma_get_def_timeout_threshold(struct irdma_sc_dev *dev) 566 + { 567 + u16 time_s = dev->vc_caps.cqp_def_timeout_s; 568 + 569 + if (!time_s) 570 + return CQP_DEF_CMPL_TIMEOUT_THRESHOLD; 571 + 572 + return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms; 573 + } 574 + 574 575 /** 575 576 * irdma_wait_event - wait for completion 576 577 * @rf: RDMA PCI function ··· 600 561 struct irdma_cqp_request *cqp_request) 601 562 { 602 563 struct irdma_cqp_timeout cqp_timeout = {}; 564 + int timeout_threshold = irdma_get_timeout_threshold(&rf->sc_dev); 603 565 bool cqp_error = false; 604 566 int err_code = 0; 605 567 ··· 612 572 msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS))) 613 573 break; 614 574 575 + if (cqp_request->pending) 576 + /* There was a deferred or pending completion 577 + * received for this CQP request, so we need 578 + * to wait longer than usual. 579 + */ 580 + timeout_threshold = 581 + irdma_get_def_timeout_threshold(&rf->sc_dev); 582 + 615 583 irdma_check_cqp_progress(&cqp_timeout, &rf->sc_dev); 616 584 617 - if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD) 585 + if (cqp_timeout.count < timeout_threshold) 618 586 continue; 619 587 620 588 if (!rf->reset) {