Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull RDMA fixes from Jason Gunthorpe:
"Bug fixes for old bugs in the hns and hfi1 drivers:

- Calculate various values in hns properly to avoid over/underflows
in some cases

- Fix an oops, PCI negotiation on Gen4 systems, and bugs related to
retries"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
RDMA/hns: Correct the value of srq_desc_size
RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN
IB/hfi1: TID RDMA WRITE should not return IB_WC_RNR_RETRY_EXC_ERR
IB/hfi1: Calculate flow weight based on QP MTU for TID RDMA
IB/hfi1: Ensure r_tid_ack is valid before building TID RDMA ACK packet
IB/hfi1: Ensure full Gen3 speed in a Gen4 system

+46 -39
-1
drivers/infiniband/hw/hfi1/init.c
··· 1489 1489 goto bail_dev; 1490 1490 } 1491 1491 1492 - hfi1_compute_tid_rdma_flow_wt(); 1493 1492 /* 1494 1493 * These must be called before the driver is registered with 1495 1494 * the PCI subsystem.
+3 -1
drivers/infiniband/hw/hfi1/pcie.c
··· 319 319 /* 320 320 * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed 321 321 */ 322 - if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { 322 + if (parent && 323 + (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT || 324 + dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) { 323 325 dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); 324 326 dd->link_gen3_capable = 0; 325 327 }
+8 -8
drivers/infiniband/hw/hfi1/rc.c
··· 2209 2209 if (qp->s_flags & RVT_S_WAIT_RNR) 2210 2210 goto bail_stop; 2211 2211 rdi = ib_to_rvt(qp->ibqp.device); 2212 - if (qp->s_rnr_retry == 0 && 2213 - !((rdi->post_parms[wqe->wr.opcode].flags & 2214 - RVT_OPERATION_IGN_RNR_CNT) && 2215 - qp->s_rnr_retry_cnt == 0)) { 2216 - status = IB_WC_RNR_RETRY_EXC_ERR; 2217 - goto class_b; 2212 + if (!(rdi->post_parms[wqe->wr.opcode].flags & 2213 + RVT_OPERATION_IGN_RNR_CNT)) { 2214 + if (qp->s_rnr_retry == 0) { 2215 + status = IB_WC_RNR_RETRY_EXC_ERR; 2216 + goto class_b; 2217 + } 2218 + if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) 2219 + qp->s_rnr_retry--; 2218 2220 } 2219 - if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) 2220 - qp->s_rnr_retry--; 2221 2221 2222 2222 /* 2223 2223 * The last valid PSN is the previous PSN. For TID RDMA WRITE
+32 -25
drivers/infiniband/hw/hfi1/tid_rdma.c
··· 107 107 * C - Capcode 108 108 */ 109 109 110 - static u32 tid_rdma_flow_wt; 111 - 112 110 static void tid_rdma_trigger_resume(struct work_struct *work); 113 111 static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req); 114 112 static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req, ··· 133 135 struct hfi1_ctxtdata *rcd, 134 136 struct tid_rdma_flow *flow, 135 137 bool fecn); 138 + 139 + static void validate_r_tid_ack(struct hfi1_qp_priv *priv) 140 + { 141 + if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) 142 + priv->r_tid_ack = priv->r_tid_tail; 143 + } 144 + 145 + static void tid_rdma_schedule_ack(struct rvt_qp *qp) 146 + { 147 + struct hfi1_qp_priv *priv = qp->priv; 148 + 149 + priv->s_flags |= RVT_S_ACK_PENDING; 150 + hfi1_schedule_tid_send(qp); 151 + } 152 + 153 + static void tid_rdma_trigger_ack(struct rvt_qp *qp) 154 + { 155 + validate_r_tid_ack(qp->priv); 156 + tid_rdma_schedule_ack(qp); 157 + } 136 158 137 159 static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) 138 160 { ··· 3023 3005 qpriv->s_nak_state = IB_NAK_PSN_ERROR; 3024 3006 /* We are NAK'ing the next expected PSN */ 3025 3007 qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn); 3026 - qpriv->s_flags |= RVT_S_ACK_PENDING; 3027 - if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID) 3028 - qpriv->r_tid_ack = qpriv->r_tid_tail; 3029 - hfi1_schedule_tid_send(qp); 3008 + tid_rdma_trigger_ack(qp); 3030 3009 } 3031 3010 goto unlock; 3032 3011 } ··· 3386 3371 return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32); 3387 3372 } 3388 3373 3389 - void hfi1_compute_tid_rdma_flow_wt(void) 3374 + static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp) 3390 3375 { 3391 3376 /* 3392 3377 * Heuristic for computing the RNR timeout when waiting on the flow 3393 3378 * queue. Rather than a computationaly expensive exact estimate of when 3394 3379 * a flow will be available, we assume that if a QP is at position N in 3395 3380 * the flow queue it has to wait approximately (N + 1) * (number of 3396 - * segments between two sync points), assuming PMTU of 4K. The rationale 3397 - * for this is that flows are released and recycled at each sync point. 3381 + * segments between two sync points). The rationale for this is that 3382 + * flows are released and recycled at each sync point. 3398 3383 */ 3399 - tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) / 3400 - TID_RDMA_MAX_SEGMENT_SIZE; 3384 + return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT; 3401 3385 } 3402 3386 3403 3387 static u32 position_in_queue(struct hfi1_qp_priv *qpriv, ··· 3519 3505 if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) { 3520 3506 ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp); 3521 3507 if (ret) { 3522 - to_seg = tid_rdma_flow_wt * 3508 + to_seg = hfi1_compute_tid_rdma_flow_wt(qp) * 3523 3509 position_in_queue(qpriv, 3524 3510 &rcd->flow_queue); 3525 3511 break; ··· 3540 3526 /* 3541 3527 * If overtaking req->acked_tail, send an RNR NAK. Because the 3542 3528 * QP is not queued in this case, and the issue can only be 3543 - * caused due a delay in scheduling the second leg which we 3529 + * caused by a delay in scheduling the second leg which we 3544 3530 * cannot estimate, we use a rather arbitrary RNR timeout of 3545 3531 * (MAX_FLOWS / 2) segments 3546 3532 */ ··· 3548 3534 MAX_FLOWS)) { 3549 3535 ret = -EAGAIN; 3550 3536 to_seg = MAX_FLOWS >> 1; 3551 - qpriv->s_flags |= RVT_S_ACK_PENDING; 3552 - hfi1_schedule_tid_send(qp); 3537 + tid_rdma_trigger_ack(qp); 3553 3538 break; 3554 3539 } 3555 3540 ··· 4348 4335 trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn, 4349 4336 req); 4350 4337 trace_hfi1_tid_write_rsp_rcv_data(qp); 4351 - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) 4352 - priv->r_tid_ack = priv->r_tid_tail; 4338 + validate_r_tid_ack(priv); 4353 4339 4354 4340 if (opcode == TID_OP(WRITE_DATA_LAST)) { 4355 4341 release_rdma_sge_mr(e); ··· 4387 4375 } 4388 4376 4389 4377 done: 4390 - priv->s_flags |= RVT_S_ACK_PENDING; 4391 - hfi1_schedule_tid_send(qp); 4378 + tid_rdma_schedule_ack(qp); 4392 4379 exit: 4393 4380 priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; 4394 4381 if (fecn) ··· 4399 4388 if (!priv->s_nak_state) { 4400 4389 priv->s_nak_state = IB_NAK_PSN_ERROR; 4401 4390 priv->s_nak_psn = flow->flow_state.r_next_psn; 4402 - priv->s_flags |= RVT_S_ACK_PENDING; 4403 - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) 4404 - priv->r_tid_ack = priv->r_tid_tail; 4405 - hfi1_schedule_tid_send(qp); 4391 + tid_rdma_trigger_ack(qp); 4406 4392 } 4407 4393 goto done; 4408 4394 } ··· 4947 4939 qpriv->resync = true; 4948 4940 /* RESYNC request always gets a TID RDMA ACK. */ 4949 4941 qpriv->s_nak_state = 0; 4950 - qpriv->s_flags |= RVT_S_ACK_PENDING; 4951 - hfi1_schedule_tid_send(qp); 4942 + tid_rdma_trigger_ack(qp); 4952 4943 bail: 4953 4944 if (fecn) 4954 4945 qp->s_flags |= RVT_S_ECN;
+1 -2
drivers/infiniband/hw/hfi1/tid_rdma.h
··· 17 17 #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ 18 18 #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ 19 19 #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) 20 + #define TID_RDMA_SEGMENT_SHIFT 18 20 21 21 22 /* 22 23 * Bit definitions for priv->s_flags. ··· 274 273 u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, 275 274 struct ib_other_headers *ohdr, 276 275 u32 *bth1, u32 *bth2, u32 *len); 277 - 278 - void hfi1_compute_tid_rdma_flow_wt(void); 279 276 280 277 void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); 281 278
+1 -1
drivers/infiniband/hw/hns/hns_roce_hem.h
··· 59 59 60 60 #define HNS_ROCE_HEM_CHUNK_LEN \ 61 61 ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ 62 - (sizeof(struct scatterlist))) 62 + (sizeof(struct scatterlist) + sizeof(void *))) 63 63 64 64 #define check_whether_bt_num_3(type, hop_num) \ 65 65 (type < HEM_TYPE_MTT && hop_num == 2)
+1 -1
drivers/infiniband/hw/hns/hns_roce_srq.c
··· 376 376 srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); 377 377 srq->max_gs = srq_init_attr->attr.max_sge; 378 378 379 - srq_desc_size = max(16, 16 * srq->max_gs); 379 + srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs)); 380 380 381 381 srq->wqe_shift = ilog2(srq_desc_size); 382 382