Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

RDMA/bnxt_re: Change wr posting logic to accommodate variable wqes

Modifying the post-send and post-recv to initialize the wqes slot by slot
dynamically depending on the number of max sges requested by consumer at
the time of QP creation.

Changed the QP creation logic to determine the size of SQ and RQ in 16B
slots based on the number of wqe and number of SGEs requested by consumer

Link: https://lore.kernel.org/r/1594822619-4098-6-git-send-email-devesh.sharma@broadcom.com
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Devesh Sharma and committed by
Jason Gunthorpe
2bb3c32c 54ace984

+396 -171
+126 -41
drivers/infiniband/hw/bnxt_re/ib_verbs.c
··· 842 842 } 843 843 } 844 844 845 + static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp, 846 + int rsge, int max) 847 + { 848 + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) 849 + rsge = max; 850 + return bnxt_re_get_rwqe_size(rsge); 851 + } 852 + 853 + static u16 bnxt_re_get_wqe_size(int ilsize, int nsge) 854 + { 855 + u16 wqe_size, calc_ils; 856 + 857 + wqe_size = bnxt_re_get_swqe_size(nsge); 858 + if (ilsize) { 859 + calc_ils = sizeof(struct sq_send_hdr) + ilsize; 860 + wqe_size = max_t(u16, calc_ils, wqe_size); 861 + wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr)); 862 + } 863 + return wqe_size; 864 + } 865 + 866 + static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, 867 + struct ib_qp_init_attr *init_attr) 868 + { 869 + struct bnxt_qplib_dev_attr *dev_attr; 870 + struct bnxt_qplib_qp *qplqp; 871 + struct bnxt_re_dev *rdev; 872 + struct bnxt_qplib_q *sq; 873 + int align, ilsize; 874 + 875 + rdev = qp->rdev; 876 + qplqp = &qp->qplib_qp; 877 + sq = &qplqp->sq; 878 + dev_attr = &rdev->dev_attr; 879 + 880 + align = sizeof(struct sq_send_hdr); 881 + ilsize = ALIGN(init_attr->cap.max_inline_data, align); 882 + 883 + sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge); 884 + if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges)) 885 + return -EINVAL; 886 + /* For gen p4 and gen p5 backward compatibility mode 887 + * wqe size is fixed to 128 bytes 888 + */ 889 + if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) && 890 + qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) 891 + sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges); 892 + 893 + if (init_attr->cap.max_inline_data) { 894 + qplqp->max_inline_data = sq->wqe_size - 895 + sizeof(struct sq_send_hdr); 896 + init_attr->cap.max_inline_data = qplqp->max_inline_data; 897 + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) 898 + sq->max_sge = qplqp->max_inline_data / 899 + sizeof(struct sq_sge); 900 + } 901 + 902 + return 0; 903 + } 904 + 845 905 static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, 846 906 struct bnxt_re_qp *qp, struct ib_udata *udata) 847 907 { 908 + struct bnxt_qplib_qp *qplib_qp; 909 + struct bnxt_re_ucontext *cntx; 848 910 struct bnxt_re_qp_req ureq; 849 - struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp; 850 - struct ib_umem *umem; 851 911 int bytes = 0, psn_sz; 852 - struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context( 853 - udata, struct bnxt_re_ucontext, ib_uctx); 912 + struct ib_umem *umem; 913 + int psn_nume; 854 914 915 + qplib_qp = &qp->qplib_qp; 916 + cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, 917 + ib_uctx); 855 918 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) 856 919 return -EFAULT; 857 920 ··· 922 859 /* Consider mapping PSN search memory only for RC QPs. */ 923 860 if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { 924 861 psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? 925 - sizeof(struct sq_psn_search_ext) : 926 - sizeof(struct sq_psn_search); 927 - bytes += (qplib_qp->sq.max_wqe * psn_sz); 862 + sizeof(struct sq_psn_search_ext) : 863 + sizeof(struct sq_psn_search); 864 + psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? 865 + qplib_qp->sq.max_wqe : 866 + ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) / 867 + sizeof(struct bnxt_qplib_sge)); 868 + bytes += (psn_nume * psn_sz); 928 869 } 870 + 929 871 bytes = PAGE_ALIGN(bytes); 930 872 umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes, 931 873 IB_ACCESS_LOCAL_WRITE); ··· 1043 975 qp->qplib_qp.sig_type = true; 1044 976 1045 977 /* Shadow QP SQ depth should be same as QP1 RQ depth */ 1046 - qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(); 978 + qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6); 1047 979 qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; 1048 980 qp->qplib_qp.sq.max_sge = 2; 1049 981 /* Q full delta can be 1 since it is internal QP */ ··· 1054 986 qp->qplib_qp.scq = qp1_qp->scq; 1055 987 qp->qplib_qp.rcq = qp1_qp->rcq; 1056 988 1057 - qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(); 989 + qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6); 1058 990 qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; 1059 991 qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; 1060 992 /* Q full delta can be 1 since it is internal QP */ ··· 1109 1041 qplqp->srq = &srq->qplib_srq; 1110 1042 rq->max_wqe = 0; 1111 1043 } else { 1112 - rq->wqe_size = bnxt_re_get_rwqe_size(); 1044 + rq->max_sge = init_attr->cap.max_recv_sge; 1045 + if (rq->max_sge > dev_attr->max_qp_sges) 1046 + rq->max_sge = dev_attr->max_qp_sges; 1047 + init_attr->cap.max_recv_sge = rq->max_sge; 1048 + rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge, 1049 + dev_attr->max_qp_sges); 1113 1050 /* Allocate 1 more than what's provided so posting max doesn't 1114 1051 * mean empty. 1115 1052 */ 1116 1053 entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); 1117 1054 rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); 1118 - rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr; 1119 - rq->max_sge = init_attr->cap.max_recv_sge; 1120 - if (rq->max_sge > dev_attr->max_qp_sges) 1121 - rq->max_sge = dev_attr->max_qp_sges; 1055 + rq->q_full_delta = 0; 1056 + rq->sg_info.pgsize = PAGE_SIZE; 1057 + rq->sg_info.pgshft = PAGE_SHIFT; 1122 1058 } 1123 - rq->sg_info.pgsize = PAGE_SIZE; 1124 - rq->sg_info.pgshft = PAGE_SHIFT; 1125 1059 1126 1060 return 0; 1127 1061 } ··· 1138 1068 qplqp = &qp->qplib_qp; 1139 1069 dev_attr = &rdev->dev_attr; 1140 1070 1141 - qplqp->rq.max_sge = dev_attr->max_qp_sges; 1142 - if (qplqp->rq.max_sge > dev_attr->max_qp_sges) 1071 + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { 1143 1072 qplqp->rq.max_sge = dev_attr->max_qp_sges; 1144 - qplqp->rq.max_sge = 6; 1073 + if (qplqp->rq.max_sge > dev_attr->max_qp_sges) 1074 + qplqp->rq.max_sge = dev_attr->max_qp_sges; 1075 + qplqp->rq.max_sge = 6; 1076 + } 1145 1077 } 1146 1078 1147 - static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, 1148 - struct ib_qp_init_attr *init_attr, 1149 - struct ib_udata *udata) 1079 + static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, 1080 + struct ib_qp_init_attr *init_attr, 1081 + struct ib_udata *udata) 1150 1082 { 1151 1083 struct bnxt_qplib_dev_attr *dev_attr; 1152 1084 struct bnxt_qplib_qp *qplqp; 1153 1085 struct bnxt_re_dev *rdev; 1154 1086 struct bnxt_qplib_q *sq; 1155 1087 int entries; 1088 + int diff; 1089 + int rc; 1156 1090 1157 1091 rdev = qp->rdev; 1158 1092 qplqp = &qp->qplib_qp; 1159 1093 sq = &qplqp->sq; 1160 1094 dev_attr = &rdev->dev_attr; 1161 1095 1162 - sq->wqe_size = bnxt_re_get_swqe_size(); 1163 1096 sq->max_sge = init_attr->cap.max_send_sge; 1164 - if (sq->max_sge > dev_attr->max_qp_sges) 1097 + if (sq->max_sge > dev_attr->max_qp_sges) { 1165 1098 sq->max_sge = dev_attr->max_qp_sges; 1166 - /* 1167 - * Change the SQ depth if user has requested minimum using 1168 - * configfs. Only supported for kernel consumers 1169 - */ 1099 + init_attr->cap.max_send_sge = sq->max_sge; 1100 + } 1101 + 1102 + rc = bnxt_re_setup_swqe_size(qp, init_attr); 1103 + if (rc) 1104 + return rc; 1105 + 1170 1106 entries = init_attr->cap.max_send_wr; 1171 1107 /* Allocate 128 + 1 more than what's provided */ 1172 - entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); 1173 - sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1174 - BNXT_QPLIB_RESERVED_QP_WRS + 1); 1175 - sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; 1108 + diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ? 1109 + 0 : BNXT_QPLIB_RESERVED_QP_WRS; 1110 + entries = roundup_pow_of_two(entries + diff + 1); 1111 + sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); 1112 + sq->q_full_delta = diff + 1; 1176 1113 /* 1177 1114 * Reserving one slot for Phantom WQE. Application can 1178 1115 * post one extra entry in this case. But allowing this to avoid ··· 1188 1111 qplqp->sq.q_full_delta -= 1; 1189 1112 qplqp->sq.sg_info.pgsize = PAGE_SIZE; 1190 1113 qplqp->sq.sg_info.pgshft = PAGE_SHIFT; 1114 + 1115 + return 0; 1191 1116 } 1192 1117 1193 1118 static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, ··· 1204 1125 qplqp = &qp->qplib_qp; 1205 1126 dev_attr = &rdev->dev_attr; 1206 1127 1207 - entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); 1208 - qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); 1209 - qplqp->sq.q_full_delta = qplqp->sq.max_wqe - 1210 - init_attr->cap.max_send_wr; 1211 - qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ 1212 - if (qplqp->sq.max_sge > dev_attr->max_qp_sges) 1213 - qplqp->sq.max_sge = dev_attr->max_qp_sges; 1128 + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { 1129 + entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); 1130 + qplqp->sq.max_wqe = min_t(u32, entries, 1131 + dev_attr->max_qp_wqes + 1); 1132 + qplqp->sq.q_full_delta = qplqp->sq.max_wqe - 1133 + init_attr->cap.max_send_wr; 1134 + qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ 1135 + if (qplqp->sq.max_sge > dev_attr->max_qp_sges) 1136 + qplqp->sq.max_sge = dev_attr->max_qp_sges; 1137 + } 1214 1138 } 1215 1139 1216 1140 static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, ··· 1309 1227 bnxt_re_adjust_gsi_rq_attr(qp); 1310 1228 1311 1229 /* Setup SQ */ 1312 - bnxt_re_init_sq_attr(qp, init_attr, udata); 1230 + rc = bnxt_re_init_sq_attr(qp, init_attr, udata); 1231 + if (rc) 1232 + goto out; 1313 1233 if (init_attr->qp_type == IB_QPT_GSI) 1314 1234 bnxt_re_adjust_gsi_sq_attr(qp, init_attr); 1315 1235 ··· 1659 1575 entries = dev_attr->max_srq_wqes + 1; 1660 1576 srq->qplib_srq.max_wqe = entries; 1661 1577 1662 - srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(); 1663 1578 srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; 1579 + srq->qplib_srq.wqe_size = 1580 + bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); 1664 1581 srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; 1665 1582 srq->srq_limit = srq_init_attr->attr.srq_limit; 1666 1583 srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
+4 -4
drivers/infiniband/hw/bnxt_re/ib_verbs.h
··· 136 136 spinlock_t sh_lock; /* protect shpg */ 137 137 }; 138 138 139 - static inline u16 bnxt_re_get_swqe_size(void) 139 + static inline u16 bnxt_re_get_swqe_size(int nsge) 140 140 { 141 - return sizeof(struct sq_send); 141 + return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); 142 142 } 143 143 144 - static inline u16 bnxt_re_get_rwqe_size(void) 144 + static inline u16 bnxt_re_get_rwqe_size(int nsge) 145 145 { 146 - return sizeof(struct rq_wqe); 146 + return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); 147 147 } 148 148 149 149 int bnxt_re_query_device(struct ib_device *ibdev,
+200 -120
drivers/infiniband/hw/bnxt_re/qplib_fp.c
··· 660 660 srq->dbinfo.hwq = &srq->hwq; 661 661 srq->dbinfo.xid = srq->id; 662 662 srq->dbinfo.db = srq->dpi->dbr; 663 + srq->dbinfo.max_slot = 1; 663 664 srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; 664 665 if (srq->threshold) 665 666 bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); ··· 798 797 799 798 que->swq_start = 0; 800 799 que->swq_last = que->max_wqe - 1; 801 - for (indx = 0; indx < que->max_wqe; indx++) { 802 - que->swq[indx].slots = 1; 800 + for (indx = 0; indx < que->max_wqe; indx++) 803 801 que->swq[indx].next_idx = indx + 1; 804 - } 805 802 que->swq[que->swq_last].next_idx = 0; /* Make it circular */ 806 803 que->swq_last = 0; 807 804 out: ··· 830 831 /* SQ */ 831 832 hwq_attr.res = res; 832 833 hwq_attr.sginfo = &sq->sg_info; 833 - hwq_attr.depth = sq->max_wqe; 834 - hwq_attr.stride = sq->wqe_size; 834 + hwq_attr.stride = sizeof(struct sq_sge); 835 + hwq_attr.depth = bnxt_qplib_get_depth(sq); 835 836 hwq_attr.type = HWQ_TYPE_QUEUE; 836 837 rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); 837 838 if (rc) ··· 841 842 if (rc) 842 843 goto fail_sq; 843 844 844 - req.sq_size = cpu_to_le32(sq->max_wqe); 845 + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); 845 846 pbl = &sq->hwq.pbl[PBL_LVL_0]; 846 847 req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); 847 848 pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << ··· 857 858 if (rq->max_wqe) { 858 859 hwq_attr.res = res; 859 860 hwq_attr.sginfo = &rq->sg_info; 860 - hwq_attr.stride = rq->wqe_size; 861 - hwq_attr.depth = qp->rq.max_wqe; 861 + hwq_attr.stride = sizeof(struct sq_sge); 862 + hwq_attr.depth = bnxt_qplib_get_depth(rq); 862 863 hwq_attr.type = HWQ_TYPE_QUEUE; 863 864 rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); 864 865 if (rc) ··· 900 901 sq->dbinfo.hwq = &sq->hwq; 901 902 sq->dbinfo.xid = qp->id; 902 903 sq->dbinfo.db = qp->dpi->dbr; 904 + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); 903 905 if (rq->max_wqe) { 904 906 rq->dbinfo.hwq = &rq->hwq; 905 907 rq->dbinfo.xid = qp->id; 906 908 rq->dbinfo.db = qp->dpi->dbr; 909 + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); 907 910 } 908 911 rcfw->qp_tbl[qp->id].qp_id = qp->id; 909 912 rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; ··· 977 976 978 977 hwq_attr.res = res; 979 978 hwq_attr.sginfo = &sq->sg_info; 980 - hwq_attr.stride = sq->wqe_size; 981 - hwq_attr.depth = sq->max_wqe; 979 + hwq_attr.stride = sizeof(struct sq_sge); 980 + hwq_attr.depth = bnxt_qplib_get_depth(sq); 982 981 hwq_attr.aux_stride = psn_sz; 983 - hwq_attr.aux_depth = hwq_attr.depth; 982 + hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); 984 983 hwq_attr.type = HWQ_TYPE_QUEUE; 985 984 rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); 986 985 if (rc) ··· 993 992 if (psn_sz) 994 993 bnxt_qplib_init_psn_ptr(qp, psn_sz); 995 994 996 - req.sq_size = cpu_to_le32(sq->max_wqe); 995 + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); 997 996 pbl = &sq->hwq.pbl[PBL_LVL_0]; 998 997 req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); 999 998 pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << ··· 1009 1008 if (!qp->srq) { 1010 1009 hwq_attr.res = res; 1011 1010 hwq_attr.sginfo = &rq->sg_info; 1012 - hwq_attr.stride = rq->wqe_size; 1013 - hwq_attr.depth = rq->max_wqe; 1011 + hwq_attr.stride = sizeof(struct sq_sge); 1012 + hwq_attr.depth = bnxt_qplib_get_depth(rq); 1014 1013 hwq_attr.aux_stride = 0; 1015 1014 hwq_attr.aux_depth = 0; 1016 1015 hwq_attr.type = HWQ_TYPE_QUEUE; ··· 1045 1044 qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; 1046 1045 if (qp->sig_type) 1047 1046 qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; 1047 + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) 1048 + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; 1048 1049 req.qp_flags = cpu_to_le32(qp_flags); 1049 1050 1050 1051 /* ORRQ and IRRQ */ ··· 1104 1101 sq->dbinfo.hwq = &sq->hwq; 1105 1102 sq->dbinfo.xid = qp->id; 1106 1103 sq->dbinfo.db = qp->dpi->dbr; 1104 + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); 1107 1105 if (rq->max_wqe) { 1108 1106 rq->dbinfo.hwq = &rq->hwq; 1109 1107 rq->dbinfo.xid = qp->id; 1110 1108 rq->dbinfo.db = qp->dpi->dbr; 1109 + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); 1111 1110 } 1112 1111 rcfw->qp_tbl[qp->id].qp_id = qp->id; 1113 1112 rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; ··· 1567 1562 if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { 1568 1563 psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); 1569 1564 psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); 1565 + psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); 1570 1566 } else { 1571 1567 psns->opcode_start_psn = cpu_to_le32(op_spsn); 1572 1568 psns->flags_next_psn = cpu_to_le32(flg_npsn); 1573 1569 } 1574 1570 } 1575 1571 1572 + static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, 1573 + struct bnxt_qplib_swqe *wqe, 1574 + u16 *idx) 1575 + { 1576 + struct bnxt_qplib_hwq *hwq; 1577 + int len, t_len, offt; 1578 + bool pull_dst = true; 1579 + void *il_dst = NULL; 1580 + void *il_src = NULL; 1581 + int t_cplen, cplen; 1582 + int indx; 1583 + 1584 + hwq = &qp->sq.hwq; 1585 + t_len = 0; 1586 + for (indx = 0; indx < wqe->num_sge; indx++) { 1587 + len = wqe->sg_list[indx].size; 1588 + il_src = (void *)wqe->sg_list[indx].addr; 1589 + t_len += len; 1590 + if (t_len > qp->max_inline_data) 1591 + goto bad; 1592 + while (len) { 1593 + if (pull_dst) { 1594 + pull_dst = false; 1595 + il_dst = bnxt_qplib_get_prod_qe(hwq, *idx); 1596 + (*idx)++; 1597 + t_cplen = 0; 1598 + offt = 0; 1599 + } 1600 + cplen = min_t(int, len, sizeof(struct sq_sge)); 1601 + cplen = min_t(int, cplen, 1602 + (sizeof(struct sq_sge) - offt)); 1603 + memcpy(il_dst, il_src, cplen); 1604 + t_cplen += cplen; 1605 + il_src += cplen; 1606 + il_dst += cplen; 1607 + offt += cplen; 1608 + len -= cplen; 1609 + if (t_cplen == sizeof(struct sq_sge)) 1610 + pull_dst = true; 1611 + } 1612 + } 1613 + 1614 + return t_len; 1615 + bad: 1616 + return -ENOMEM; 1617 + } 1618 + 1619 + static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, 1620 + struct bnxt_qplib_sge *ssge, 1621 + u16 nsge, u16 *idx) 1622 + { 1623 + struct sq_sge *dsge; 1624 + int indx, len = 0; 1625 + 1626 + for (indx = 0; indx < nsge; indx++, (*idx)++) { 1627 + dsge = bnxt_qplib_get_prod_qe(hwq, *idx); 1628 + dsge->va_or_pa = cpu_to_le64(ssge[indx].addr); 1629 + dsge->l_key = cpu_to_le32(ssge[indx].lkey); 1630 + dsge->size = cpu_to_le32(ssge[indx].size); 1631 + len += ssge[indx].size; 1632 + } 1633 + 1634 + return len; 1635 + } 1636 + 1637 + static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, 1638 + struct bnxt_qplib_swqe *wqe, 1639 + u16 *wqe_sz, u16 *qdf, u8 mode) 1640 + { 1641 + u32 ilsize, bytes; 1642 + u16 nsge; 1643 + u16 slot; 1644 + 1645 + nsge = wqe->num_sge; 1646 + /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */ 1647 + bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); 1648 + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { 1649 + ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data); 1650 + bytes = ALIGN(ilsize, sizeof(struct sq_sge)); 1651 + bytes += sizeof(struct sq_send_hdr); 1652 + } 1653 + 1654 + *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes); 1655 + slot = bytes >> 4; 1656 + *wqe_sz = slot; 1657 + if (mode == BNXT_QPLIB_WQE_MODE_STATIC) 1658 + slot = 8; 1659 + return slot; 1660 + } 1661 + 1576 1662 static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, 1577 - struct bnxt_qplib_swq *swq, u32 tail) 1663 + struct bnxt_qplib_swq *swq) 1578 1664 { 1579 1665 struct bnxt_qplib_hwq *hwq; 1580 1666 u32 pg_num, pg_indx; 1581 1667 void *buff; 1668 + u32 tail; 1582 1669 1583 1670 hwq = &sq->hwq; 1584 1671 if (!hwq->pad_pg) 1585 1672 return; 1673 + tail = swq->slot_idx / sq->dbinfo.max_slot; 1586 1674 pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); 1587 1675 pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); 1588 1676 buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); ··· 1696 1598 struct bnxt_qplib_nq_work *nq_work = NULL; 1697 1599 int i, rc = 0, data_len = 0, pkt_num = 0; 1698 1600 struct bnxt_qplib_q *sq = &qp->sq; 1699 - struct sq_send *hw_sq_send_hdr; 1700 1601 struct bnxt_qplib_hwq *hwq; 1701 1602 struct bnxt_qplib_swq *swq; 1702 1603 bool sch_handler = false; 1703 - struct sq_sge *hw_sge; 1704 - u8 wqe_size16; 1604 + u16 wqe_sz, qdf = 0; 1605 + void *base_hdr; 1606 + void *ext_hdr; 1705 1607 __le32 temp32; 1706 - u32 sw_prod; 1608 + u32 wqe_idx; 1609 + u32 slots; 1610 + u16 idx; 1707 1611 1708 1612 hwq = &sq->hwq; 1709 1613 if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && ··· 1717 1617 goto done; 1718 1618 } 1719 1619 1720 - if (bnxt_qplib_queue_full(sq)) { 1620 + slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode); 1621 + if (bnxt_qplib_queue_full(sq, slots + qdf)) { 1721 1622 dev_err(&hwq->pdev->dev, 1722 1623 "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", 1723 - hwq->prod, hwq->cons, hwq->max_elements, 1724 - sq->q_full_delta); 1624 + hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta); 1725 1625 rc = -ENOMEM; 1726 1626 goto done; 1727 1627 } 1728 1628 1729 - sw_prod = sq->hwq.prod; 1730 - swq = bnxt_qplib_get_swqe(sq, NULL); 1731 - bnxt_qplib_pull_psn_buff(sq, swq, sw_prod); 1629 + swq = bnxt_qplib_get_swqe(sq, &wqe_idx); 1630 + bnxt_qplib_pull_psn_buff(sq, swq); 1631 + 1632 + idx = 0; 1633 + swq->slot_idx = hwq->prod; 1634 + swq->slots = slots; 1732 1635 swq->wr_id = wqe->wr_id; 1733 1636 swq->type = wqe->type; 1734 1637 swq->flags = wqe->flags; ··· 1739 1636 if (qp->sig_type) 1740 1637 swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; 1741 1638 1742 - hw_sq_send_hdr = bnxt_qplib_get_qe(hwq, sw_prod, NULL); 1743 - memset(hw_sq_send_hdr, 0, sq->wqe_size); 1744 1639 if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1745 1640 sch_handler = true; 1746 1641 dev_dbg(&hwq->pdev->dev, ··· 1746 1645 goto queue_err; 1747 1646 } 1748 1647 1749 - if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { 1750 - /* Copy the inline data */ 1751 - if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { 1752 - dev_warn(&hwq->pdev->dev, 1753 - "Inline data length > 96 detected\n"); 1754 - data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; 1755 - } else { 1756 - data_len = wqe->inline_len; 1757 - } 1758 - memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len); 1759 - wqe_size16 = (data_len + 15) >> 4; 1760 - } else { 1761 - for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data; 1762 - i < wqe->num_sge; i++, hw_sge++) { 1763 - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); 1764 - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); 1765 - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); 1766 - data_len += wqe->sg_list[i].size; 1767 - } 1768 - /* Each SGE entry = 1 WQE size16 */ 1769 - wqe_size16 = wqe->num_sge; 1770 - /* HW requires wqe size has room for atleast one SGE even if 1771 - * none was supplied by ULP 1772 - */ 1773 - if (!wqe->num_sge) 1774 - wqe_size16++; 1775 - } 1648 + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); 1649 + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); 1650 + memset(base_hdr, 0, sizeof(struct sq_sge)); 1651 + memset(ext_hdr, 0, sizeof(struct sq_sge)); 1776 1652 1653 + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) 1654 + /* Copy the inline data */ 1655 + data_len = bnxt_qplib_put_inline(qp, wqe, &idx); 1656 + else 1657 + data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, 1658 + &idx); 1659 + if (data_len < 0) 1660 + goto queue_err; 1777 1661 /* Specifics */ 1778 1662 switch (wqe->type) { 1779 1663 case BNXT_QPLIB_SWQE_TYPE_SEND: 1780 1664 if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) { 1665 + struct sq_send_raweth_qp1_hdr *sqe = base_hdr; 1666 + struct sq_raw_ext_hdr *ext_sqe = ext_hdr; 1781 1667 /* Assemble info for Raw Ethertype QPs */ 1782 - struct sq_send_raweth_qp1 *sqe = 1783 - (struct sq_send_raweth_qp1 *)hw_sq_send_hdr; 1784 1668 1785 1669 sqe->wqe_type = wqe->type; 1786 1670 sqe->flags = wqe->flags; 1787 - sqe->wqe_size = wqe_size16 + 1788 - ((offsetof(typeof(*sqe), data) + 15) >> 4); 1671 + sqe->wqe_size = wqe_sz; 1789 1672 sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action); 1790 1673 sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags); 1791 1674 sqe->length = cpu_to_le32(data_len); 1792 - sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & 1675 + ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & 1793 1676 SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) << 1794 1677 SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT); 1795 1678 ··· 1783 1698 case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: 1784 1699 case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: 1785 1700 { 1786 - struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr; 1701 + struct sq_ud_ext_hdr *ext_sqe = ext_hdr; 1702 + struct sq_send_hdr *sqe = base_hdr; 1787 1703 1788 1704 sqe->wqe_type = wqe->type; 1789 1705 sqe->flags = wqe->flags; 1790 - sqe->wqe_size = wqe_size16 + 1791 - ((offsetof(typeof(*sqe), data) + 15) >> 4); 1792 - sqe->inv_key_or_imm_data = cpu_to_le32( 1793 - wqe->send.inv_key); 1706 + sqe->wqe_size = wqe_sz; 1707 + sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key); 1794 1708 if (qp->type == CMDQ_CREATE_QP_TYPE_UD || 1795 1709 qp->type == CMDQ_CREATE_QP_TYPE_GSI) { 1796 1710 sqe->q_key = cpu_to_le32(wqe->send.q_key); 1797 - sqe->dst_qp = cpu_to_le32( 1798 - wqe->send.dst_qp & SQ_SEND_DST_QP_MASK); 1799 1711 sqe->length = cpu_to_le32(data_len); 1800 - sqe->avid = cpu_to_le32(wqe->send.avid & 1801 - SQ_SEND_AVID_MASK); 1802 1712 sq->psn = (sq->psn + 1) & BTH_PSN_MASK; 1713 + ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp & 1714 + SQ_SEND_DST_QP_MASK); 1715 + ext_sqe->avid = cpu_to_le32(wqe->send.avid & 1716 + SQ_SEND_AVID_MASK); 1803 1717 } else { 1804 1718 sqe->length = cpu_to_le32(data_len); 1805 - sqe->dst_qp = 0; 1806 - sqe->avid = 0; 1807 1719 if (qp->mtu) 1808 1720 pkt_num = (data_len + qp->mtu - 1) / qp->mtu; 1809 1721 if (!pkt_num) ··· 1813 1731 case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM: 1814 1732 case BNXT_QPLIB_SWQE_TYPE_RDMA_READ: 1815 1733 { 1816 - struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr; 1734 + struct sq_rdma_ext_hdr *ext_sqe = ext_hdr; 1735 + struct sq_rdma_hdr *sqe = base_hdr; 1817 1736 1818 1737 sqe->wqe_type = wqe->type; 1819 1738 sqe->flags = wqe->flags; 1820 - sqe->wqe_size = wqe_size16 + 1821 - ((offsetof(typeof(*sqe), data) + 15) >> 4); 1739 + sqe->wqe_size = wqe_sz; 1822 1740 sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key); 1823 1741 sqe->length = cpu_to_le32((u32)data_len); 1824 - sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); 1825 - sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); 1742 + ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); 1743 + ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); 1826 1744 if (qp->mtu) 1827 1745 pkt_num = (data_len + qp->mtu - 1) / qp->mtu; 1828 1746 if (!pkt_num) ··· 1833 1751 case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP: 1834 1752 case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD: 1835 1753 { 1836 - struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr; 1754 + struct sq_atomic_ext_hdr *ext_sqe = ext_hdr; 1755 + struct sq_atomic_hdr *sqe = base_hdr; 1837 1756 1838 1757 sqe->wqe_type = wqe->type; 1839 1758 sqe->flags = wqe->flags; 1840 1759 sqe->remote_key = cpu_to_le32(wqe->atomic.r_key); 1841 1760 sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va); 1842 - sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); 1843 - sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); 1761 + ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); 1762 + ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); 1844 1763 if (qp->mtu) 1845 1764 pkt_num = (data_len + qp->mtu - 1) / qp->mtu; 1846 1765 if (!pkt_num) ··· 1851 1768 } 1852 1769 case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV: 1853 1770 { 1854 - struct sq_localinvalidate *sqe = 1855 - (struct sq_localinvalidate *)hw_sq_send_hdr; 1771 + struct sq_localinvalidate *sqe = base_hdr; 1856 1772 1857 1773 sqe->wqe_type = wqe->type; 1858 1774 sqe->flags = wqe->flags; ··· 1861 1779 } 1862 1780 case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: 1863 1781 { 1864 - struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr; 1782 + struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr; 1783 + struct sq_fr_pmr_hdr *sqe = base_hdr; 1865 1784 1866 1785 sqe->wqe_type = wqe->type; 1867 1786 sqe->flags = wqe->flags; ··· 1886 1803 wqe->frmr.pbl_ptr[i] = cpu_to_le64( 1887 1804 wqe->frmr.page_list[i] | 1888 1805 PTU_PTE_VALID); 1889 - sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); 1890 - sqe->va = cpu_to_le64(wqe->frmr.va); 1806 + ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); 1807 + ext_sqe->va = cpu_to_le64(wqe->frmr.va); 1891 1808 1892 1809 break; 1893 1810 } 1894 1811 case BNXT_QPLIB_SWQE_TYPE_BIND_MW: 1895 1812 { 1896 - struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr; 1813 + struct sq_bind_ext_hdr *ext_sqe = ext_hdr; 1814 + struct sq_bind_hdr *sqe = base_hdr; 1897 1815 1898 1816 sqe->wqe_type = wqe->type; 1899 1817 sqe->flags = wqe->flags; ··· 1903 1819 (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0); 1904 1820 sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key); 1905 1821 sqe->l_key = cpu_to_le32(wqe->bind.r_key); 1906 - sqe->va = cpu_to_le64(wqe->bind.va); 1907 - temp32 = cpu_to_le32(wqe->bind.length); 1908 - memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length)); 1822 + ext_sqe->va = cpu_to_le64(wqe->bind.va); 1823 + ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); 1909 1824 break; 1910 1825 } 1911 1826 default: ··· 1915 1832 swq->next_psn = sq->psn & BTH_PSN_MASK; 1916 1833 bnxt_qplib_fill_psn_search(qp, wqe, swq); 1917 1834 queue_err: 1918 - bnxt_qplib_swq_mod_start(sq, sw_prod); 1919 - bnxt_qplib_hwq_incr_prod(&sq->hwq, 1); 1835 + bnxt_qplib_swq_mod_start(sq, wqe_idx); 1836 + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); 1920 1837 qp->wqe_cnt++; 1921 1838 done: 1922 1839 if (sch_handler) { ··· 1947 1864 { 1948 1865 struct bnxt_qplib_nq_work *nq_work = NULL; 1949 1866 struct bnxt_qplib_q *rq = &qp->rq; 1950 - struct bnxt_qplib_swq *swq; 1867 + struct rq_wqe_hdr *base_hdr; 1868 + struct rq_ext_hdr *ext_hdr; 1951 1869 struct bnxt_qplib_hwq *hwq; 1870 + struct bnxt_qplib_swq *swq; 1952 1871 bool sch_handler = false; 1953 - struct sq_sge *hw_sge; 1954 - struct rq_wqe *rqe; 1955 - int i, rc = 0; 1956 - u32 sw_prod; 1872 + u16 wqe_sz, idx; 1873 + u32 wqe_idx; 1874 + int rc = 0; 1957 1875 1958 1876 hwq = &rq->hwq; 1959 1877 if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { ··· 1965 1881 goto done; 1966 1882 } 1967 1883 1968 - if (bnxt_qplib_queue_full(rq)) { 1884 + if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) { 1969 1885 dev_err(&hwq->pdev->dev, 1970 1886 "FP: QP (0x%x) RQ is full!\n", qp->id); 1971 1887 rc = -EINVAL; 1972 1888 goto done; 1973 1889 } 1974 1890 1975 - sw_prod = rq->hwq.prod; 1976 - swq = bnxt_qplib_get_swqe(rq, NULL); 1891 + swq = bnxt_qplib_get_swqe(rq, &wqe_idx); 1977 1892 swq->wr_id = wqe->wr_id; 1893 + swq->slots = rq->dbinfo.max_slot; 1978 1894 1979 1895 if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1980 1896 sch_handler = true; ··· 1983 1899 goto queue_err; 1984 1900 } 1985 1901 1986 - rqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); 1987 - memset(rqe, 0, rq->wqe_size); 1902 + idx = 0; 1903 + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); 1904 + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); 1905 + memset(base_hdr, 0, sizeof(struct sq_sge)); 1906 + memset(ext_hdr, 0, sizeof(struct sq_sge)); 1907 + wqe_sz = (sizeof(struct rq_wqe_hdr) + 1908 + wqe->num_sge * sizeof(struct sq_sge)) >> 4; 1909 + bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx); 1910 + if (!wqe->num_sge) { 1911 + struct sq_sge *sge; 1988 1912 1989 - /* Calculate wqe_size16 and data_len */ 1990 - for (i = 0, hw_sge = (struct sq_sge *)rqe->data; 1991 - i < wqe->num_sge; i++, hw_sge++) { 1992 - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); 1993 - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); 1994 - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); 1913 + sge = bnxt_qplib_get_prod_qe(hwq, idx++); 1914 + sge->size = 0; 1915 + wqe_sz++; 1995 1916 } 1996 - rqe->wqe_type = wqe->type; 1997 - rqe->flags = wqe->flags; 1998 - rqe->wqe_size = wqe->num_sge + 1999 - ((offsetof(typeof(*rqe), data) + 15) >> 4); 2000 - /* HW requires wqe size has room for atleast one SGE even if none 2001 - * was supplied by ULP 2002 - */ 2003 - if (!wqe->num_sge) 2004 - rqe->wqe_size++; 2005 - 2006 - /* Supply the rqe->wr_id index to the wr_id_tbl for now */ 2007 - rqe->wr_id[0] = cpu_to_le32(sw_prod); 2008 - 1917 + base_hdr->wqe_type = wqe->type; 1918 + base_hdr->flags = wqe->flags; 1919 + base_hdr->wqe_size = wqe_sz; 1920 + base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); 2009 1921 queue_err: 2010 - bnxt_qplib_swq_mod_start(rq, sw_prod); 2011 - bnxt_qplib_hwq_incr_prod(&rq->hwq, 1); 1922 + bnxt_qplib_swq_mod_start(rq, wqe_idx); 1923 + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); 2012 1924 done: 2013 1925 if (sch_handler) { 2014 1926 nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
+56 -4
drivers/infiniband/hw/bnxt_re/qplib_fp.h
··· 119 119 u8 flags; 120 120 u32 start_psn; 121 121 u32 next_psn; 122 + u32 slot_idx; 122 123 u8 slots; 123 124 struct sq_psn_search *psn_search; 124 125 struct sq_psn_search_ext *psn_ext; ··· 350 349 (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ 351 350 !((raw_cons) & (cp_bit))) 352 351 353 - static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q) 352 + static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, 353 + u8 slots) 354 354 { 355 - return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta), 356 - &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons, 357 - &qplib_q->hwq); 355 + struct bnxt_qplib_hwq *hwq; 356 + int avail; 357 + 358 + hwq = &que->hwq; 359 + /* False full is possible, retrying post-send makes sense */ 360 + avail = hwq->cons - hwq->prod; 361 + if (hwq->cons <= hwq->prod) 362 + avail += hwq->depth; 363 + return avail <= slots; 358 364 } 359 365 360 366 struct bnxt_qplib_cqe { ··· 562 554 que->swq_start = que->swq[idx].next_idx; 563 555 } 564 556 557 + static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que) 558 + { 559 + return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge); 560 + } 561 + 562 + static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode) 563 + { 564 + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? 565 + que->max_wqe : bnxt_qplib_get_depth(que); 566 + } 567 + 568 + static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode) 569 + { 570 + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? 571 + sizeof(struct sq_send) / sizeof(struct sq_sge) : 1; 572 + } 573 + 574 + static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size) 575 + { 576 + return (wqe_size / sizeof(struct sq_sge)); 577 + } 578 + 579 + static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes) 580 + { 581 + /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128 582 + * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128. 583 + * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512. 584 + * when 8916 is disabled. 585 + */ 586 + return (delta * wqe_bytes) / sizeof(struct sq_sge); 587 + } 588 + 589 + static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) 590 + { 591 + u16 size = 0; 592 + int indx; 593 + 594 + for (indx = 0; indx < wqe->num_sge; indx++) 595 + size += wqe->sg_list[indx].size; 596 + if (size > max) 597 + size = max; 598 + 599 + return size; 600 + } 565 601 #endif /* __BNXT_QPLIB_FP_H__ */
+10 -2
drivers/infiniband/hw/bnxt_re/qplib_res.h
··· 173 173 void __iomem *priv_db; 174 174 struct bnxt_qplib_hwq *hwq; 175 175 u32 xid; 176 + u32 max_slot; 176 177 }; 177 178 178 179 /* Tables */ ··· 333 332 return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); 334 333 } 335 334 335 + static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx) 336 + { 337 + idx += hwq->prod; 338 + if (idx >= hwq->depth) 339 + idx -= hwq->depth; 340 + return bnxt_qplib_get_qe(hwq, idx, NULL); 341 + } 342 + 336 343 #define to_bnxt_qplib(ptr, type, member) \ 337 344 container_of(ptr, type, member) 338 345 ··· 418 409 419 410 key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; 420 411 key <<= 32; 421 - key |= (info->hwq->prod & (info->hwq->max_elements - 1)) & 422 - DBC_DBC_INDEX_MASK; 412 + key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK; 423 413 writeq(key, info->db); 424 414 } 425 415