Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
IB/mlx4: Fix data corruption triggered by wrong headroom marking order

+49 -13
+49 -13
drivers/infiniband/hw/mlx4/qp.c
··· 1211 1211 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 1212 1212 } 1213 1213 1214 - static void set_data_seg(struct mlx4_wqe_data_seg *dseg, 1215 - struct ib_sge *sg) 1214 + static void set_mlx_icrc_seg(void *dseg) 1216 1215 { 1217 - dseg->byte_count = cpu_to_be32(sg->length); 1216 + u32 *t = dseg; 1217 + struct mlx4_wqe_inline_seg *iseg = dseg; 1218 + 1219 + t[1] = 0; 1220 + 1221 + /* 1222 + * Need a barrier here before writing the byte_count field to 1223 + * make sure that all the data is visible before the 1224 + * byte_count field is set. Otherwise, if the segment begins 1225 + * a new cacheline, the HCA prefetcher could grab the 64-byte 1226 + * chunk and get a valid (!= * 0xffffffff) byte count but 1227 + * stale data, and end up sending the wrong data. 1228 + */ 1229 + wmb(); 1230 + 1231 + iseg->byte_count = cpu_to_be32((1 << 31) | 4); 1232 + } 1233 + 1234 + static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) 1235 + { 1218 1236 dseg->lkey = cpu_to_be32(sg->lkey); 1219 1237 dseg->addr = cpu_to_be64(sg->addr); 1238 + 1239 + /* 1240 + * Need a barrier here before writing the byte_count field to 1241 + * make sure that all the data is visible before the 1242 + * byte_count field is set. Otherwise, if the segment begins 1243 + * a new cacheline, the HCA prefetcher could grab the 64-byte 1244 + * chunk and get a valid (!= * 0xffffffff) byte count but 1245 + * stale data, and end up sending the wrong data. 1246 + */ 1247 + wmb(); 1248 + 1249 + dseg->byte_count = cpu_to_be32(sg->length); 1220 1250 } 1221 1251 1222 1252 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ··· 1255 1225 struct mlx4_ib_qp *qp = to_mqp(ibqp); 1256 1226 void *wqe; 1257 1227 struct mlx4_wqe_ctrl_seg *ctrl; 1228 + struct mlx4_wqe_data_seg *dseg; 1258 1229 unsigned long flags; 1259 1230 int nreq; 1260 1231 int err = 0; ··· 1355 1324 break; 1356 1325 } 1357 1326 1358 - for (i = 0; i < wr->num_sge; ++i) { 1359 - set_data_seg(wqe, wr->sg_list + i); 1327 + /* 1328 + * Write data segments in reverse order, so as to 1329 + * overwrite cacheline stamp last within each 1330 + * cacheline. This avoids issues with WQE 1331 + * prefetching. 1332 + */ 1360 1333 1361 - wqe += sizeof (struct mlx4_wqe_data_seg); 1362 - size += sizeof (struct mlx4_wqe_data_seg) / 16; 1363 - } 1334 + dseg = wqe; 1335 + dseg += wr->num_sge - 1; 1336 + size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); 1364 1337 1365 1338 /* Add one more inline data segment for ICRC for MLX sends */ 1366 - if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) { 1367 - ((struct mlx4_wqe_inline_seg *) wqe)->byte_count = 1368 - cpu_to_be32((1 << 31) | 4); 1369 - ((u32 *) wqe)[1] = 0; 1370 - wqe += sizeof (struct mlx4_wqe_data_seg); 1339 + if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || 1340 + qp->ibqp.qp_type == IB_QPT_GSI)) { 1341 + set_mlx_icrc_seg(dseg + 1); 1371 1342 size += sizeof (struct mlx4_wqe_data_seg) / 16; 1372 1343 } 1344 + 1345 + for (i = wr->num_sge - 1; i >= 0; --i, --dseg) 1346 + set_data_seg(dseg, wr->sg_list + i); 1373 1347 1374 1348 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? 1375 1349 MLX4_WQE_CTRL_FENCE : 0) | size;