Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
"Round three of 4.8 rc fixes.

This is likely the last rdma pull request this cycle. The new rxe
driver had a few issues (you probably saw the boot bot bug report) and
they should be addressed now. There are a couple other fixes here,
mainly mlx4. There are still two outstanding issues that need
resolved but I don't think their fix will make this kernel cycle.

Summary:

- Various fixes to rdmavt, ipoib, mlx5, mlx4, rxe"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
IB/rdmavt: Don't vfree a kzalloc'ed memory region
IB/rxe: Fix kmem_cache leak
IB/rxe: Fix race condition between requester and completer
IB/rxe: Fix duplicate atomic request handling
IB/rxe: Fix kernel panic in udp_setup_tunnel
IB/mlx5: Set source mac address in FTE
IB/mlx5: Enable MAD_IFC commands for IB ports only
IB/mlx4: Diagnostic HW counters are not supported in slave mode
IB/mlx4: Use correct subnet-prefix in QP1 mads under SR-IOV
IB/mlx4: Fix code indentation in QP1 MAD flow
IB/mlx4: Fix incorrect MC join state bit-masking on SR-IOV
IB/ipoib: Don't allow MC joins during light MC flush
IB/rxe: fix GFP_KERNEL in spinlock context

+190 -81
+23
drivers/infiniband/hw/mlx4/mad.c
··· 1128 1128 1129 1129 /* Generate GUID changed event */ 1130 1130 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { 1131 + if (mlx4_is_master(dev->dev)) { 1132 + union ib_gid gid; 1133 + int err = 0; 1134 + 1135 + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) 1136 + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); 1137 + else 1138 + gid.global.subnet_prefix = 1139 + eqe->event.port_mgmt_change.params.port_info.gid_prefix; 1140 + if (err) { 1141 + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", 1142 + port, err); 1143 + } else { 1144 + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", 1145 + port, 1146 + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), 1147 + be64_to_cpu(gid.global.subnet_prefix)); 1148 + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, 1149 + be64_to_cpu(gid.global.subnet_prefix)); 1150 + } 1151 + } 1131 1152 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); 1132 1153 /*if master, notify all slaves*/ 1133 1154 if (mlx4_is_master(dev->dev)) ··· 2223 2202 if (err) 2224 2203 goto demux_err; 2225 2204 dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; 2205 + atomic64_set(&dev->sriov.demux[i].subnet_prefix, 2206 + be64_to_cpu(gid.global.subnet_prefix)); 2226 2207 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, 2227 2208 &dev->sriov.sqps[i]); 2228 2209 if (err)
+3
drivers/infiniband/hw/mlx4/main.c
··· 2202 2202 bool per_port = !!(ibdev->dev->caps.flags2 & 2203 2203 MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); 2204 2204 2205 + if (mlx4_is_slave(ibdev->dev)) 2206 + return 0; 2207 + 2205 2208 for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { 2206 2209 /* i == 1 means we are building port counters */ 2207 2210 if (i && !per_port)
+7 -7
drivers/infiniband/hw/mlx4/mcg.c
··· 489 489 if (!group->members[i]) 490 490 leave_state |= (1 << i); 491 491 492 - return leave_state & (group->rec.scope_join_state & 7); 492 + return leave_state & (group->rec.scope_join_state & 0xf); 493 493 } 494 494 495 495 static int join_group(struct mcast_group *group, int slave, u8 join_mask) ··· 564 564 } else 565 565 mcg_warn_group(group, "DRIVER BUG\n"); 566 566 } else if (group->state == MCAST_LEAVE_SENT) { 567 - if (group->rec.scope_join_state & 7) 568 - group->rec.scope_join_state &= 0xf8; 567 + if (group->rec.scope_join_state & 0xf) 568 + group->rec.scope_join_state &= 0xf0; 569 569 group->state = MCAST_IDLE; 570 570 mutex_unlock(&group->lock); 571 571 if (release_group(group, 1)) ··· 605 605 static int handle_join_req(struct mcast_group *group, u8 join_mask, 606 606 struct mcast_req *req) 607 607 { 608 - u8 group_join_state = group->rec.scope_join_state & 7; 608 + u8 group_join_state = group->rec.scope_join_state & 0xf; 609 609 int ref = 0; 610 610 u16 status; 611 611 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; ··· 690 690 u8 cur_join_state; 691 691 692 692 resp_join_state = ((struct ib_sa_mcmember_data *) 693 - group->response_sa_mad.data)->scope_join_state & 7; 694 - cur_join_state = group->rec.scope_join_state & 7; 693 + group->response_sa_mad.data)->scope_join_state & 0xf; 694 + cur_join_state = group->rec.scope_join_state & 0xf; 695 695 696 696 if (method == IB_MGMT_METHOD_GET_RESP) { 697 697 /* successfull join */ ··· 710 710 req = list_first_entry(&group->pending_list, struct mcast_req, 711 711 group_list); 712 712 sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; 713 - req_join_state = sa_data->scope_join_state & 0x7; 713 + req_join_state = sa_data->scope_join_state & 0xf; 714 714 715 715 /* For a leave request, we will immediately answer the VF, and 716 716 * update our internal counters. The actual leave will be sent
+1 -1
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 448 448 struct workqueue_struct *wq; 449 449 struct workqueue_struct *ud_wq; 450 450 spinlock_t ud_lock; 451 - __be64 subnet_prefix; 451 + atomic64_t subnet_prefix; 452 452 __be64 guid_cache[128]; 453 453 struct mlx4_ib_dev *dev; 454 454 /* the following lock protects both mcg_table and mcg_mgid0_list */
+20 -17
drivers/infiniband/hw/mlx4/qp.c
··· 2493 2493 sqp->ud_header.grh.flow_label = 2494 2494 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 2495 2495 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; 2496 - if (is_eth) 2496 + if (is_eth) { 2497 2497 memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); 2498 - else { 2499 - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 2500 - /* When multi-function is enabled, the ib_core gid 2501 - * indexes don't necessarily match the hw ones, so 2502 - * we must use our own cache */ 2503 - sqp->ud_header.grh.source_gid.global.subnet_prefix = 2504 - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. 2505 - subnet_prefix; 2506 - sqp->ud_header.grh.source_gid.global.interface_id = 2507 - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. 2508 - guid_cache[ah->av.ib.gid_index]; 2509 - } else 2510 - ib_get_cached_gid(ib_dev, 2511 - be32_to_cpu(ah->av.ib.port_pd) >> 24, 2512 - ah->av.ib.gid_index, 2513 - &sqp->ud_header.grh.source_gid, NULL); 2498 + } else { 2499 + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 2500 + /* When multi-function is enabled, the ib_core gid 2501 + * indexes don't necessarily match the hw ones, so 2502 + * we must use our own cache 2503 + */ 2504 + sqp->ud_header.grh.source_gid.global.subnet_prefix = 2505 + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. 2506 + demux[sqp->qp.port - 1]. 2507 + subnet_prefix))); 2508 + sqp->ud_header.grh.source_gid.global.interface_id = 2509 + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. 2510 + guid_cache[ah->av.ib.gid_index]; 2511 + } else { 2512 + ib_get_cached_gid(ib_dev, 2513 + be32_to_cpu(ah->av.ib.port_pd) >> 24, 2514 + ah->av.ib.gid_index, 2515 + &sqp->ud_header.grh.source_gid, NULL); 2516 + } 2514 2517 } 2515 2518 memcpy(sqp->ud_header.grh.destination_gid.raw, 2516 2519 ah->av.ib.dgid, 16);
+10 -1
drivers/infiniband/hw/mlx5/main.c
··· 288 288 289 289 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 290 290 { 291 - return !MLX5_CAP_GEN(dev->mdev, ib_virt); 291 + if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) 292 + return !MLX5_CAP_GEN(dev->mdev, ib_virt); 293 + return 0; 292 294 } 293 295 294 296 enum { ··· 1429 1427 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1430 1428 dmac_47_16), 1431 1429 ib_spec->eth.val.dst_mac); 1430 + 1431 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1432 + smac_47_16), 1433 + ib_spec->eth.mask.src_mac); 1434 + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1435 + smac_47_16), 1436 + ib_spec->eth.val.src_mac); 1432 1437 1433 1438 if (ib_spec->eth.mask.vlan_tag) { 1434 1439 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+1 -1
drivers/infiniband/sw/rdmavt/mr.c
··· 294 294 { 295 295 rvt_deinit_mregion(&mr->mr); 296 296 rvt_free_lkey(&mr->mr); 297 - vfree(mr); 297 + kfree(mr); 298 298 } 299 299 300 300 /**
+22 -3
drivers/infiniband/sw/rxe/rxe.c
··· 362 362 return err; 363 363 } 364 364 365 - err = rxe_net_init(); 365 + err = rxe_net_ipv4_init(); 366 366 if (err) { 367 - pr_err("rxe: unable to init\n"); 367 + pr_err("rxe: unable to init ipv4 tunnel\n"); 368 368 rxe_cache_exit(); 369 - return err; 369 + goto exit; 370 370 } 371 + 372 + err = rxe_net_ipv6_init(); 373 + if (err) { 374 + pr_err("rxe: unable to init ipv6 tunnel\n"); 375 + rxe_cache_exit(); 376 + goto exit; 377 + } 378 + 379 + err = register_netdevice_notifier(&rxe_net_notifier); 380 + if (err) { 381 + pr_err("rxe: Failed to rigister netdev notifier\n"); 382 + goto exit; 383 + } 384 + 371 385 pr_info("rxe: loaded\n"); 372 386 373 387 return 0; 388 + 389 + exit: 390 + rxe_release_udp_tunnel(recv_sockets.sk4); 391 + rxe_release_udp_tunnel(recv_sockets.sk6); 392 + return err; 374 393 } 375 394 376 395 static void __exit rxe_module_exit(void)
+13
drivers/infiniband/sw/rxe/rxe_comp.c
··· 689 689 qp->req.need_retry = 1; 690 690 rxe_run_task(&qp->req.task, 1); 691 691 } 692 + 693 + if (pkt) { 694 + rxe_drop_ref(pkt->qp); 695 + kfree_skb(skb); 696 + } 697 + 692 698 goto exit; 699 + 693 700 } else { 694 701 wqe->status = IB_WC_RETRY_EXC_ERR; 695 702 state = COMPST_ERROR; ··· 723 716 case COMPST_ERROR: 724 717 do_complete(qp, wqe); 725 718 rxe_qp_error(qp); 719 + 720 + if (pkt) { 721 + rxe_drop_ref(pkt->qp); 722 + kfree_skb(skb); 723 + } 724 + 726 725 goto exit; 727 726 } 728 727 }
+26 -31
drivers/infiniband/sw/rxe/rxe_net.c
··· 275 275 return sock; 276 276 } 277 277 278 - static void rxe_release_udp_tunnel(struct socket *sk) 278 + void rxe_release_udp_tunnel(struct socket *sk) 279 279 { 280 - udp_tunnel_sock_release(sk); 280 + if (sk) 281 + udp_tunnel_sock_release(sk); 281 282 } 282 283 283 284 static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, ··· 659 658 return NOTIFY_OK; 660 659 } 661 660 662 - static struct notifier_block rxe_net_notifier = { 661 + struct notifier_block rxe_net_notifier = { 663 662 .notifier_call = rxe_notify, 664 663 }; 665 664 666 - int rxe_net_init(void) 665 + int rxe_net_ipv4_init(void) 667 666 { 668 - int err; 667 + spin_lock_init(&dev_list_lock); 668 + 669 + recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, 670 + htons(ROCE_V2_UDP_DPORT), false); 671 + if (IS_ERR(recv_sockets.sk4)) { 672 + recv_sockets.sk4 = NULL; 673 + pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); 674 + return -1; 675 + } 676 + 677 + return 0; 678 + } 679 + 680 + int rxe_net_ipv6_init(void) 681 + { 682 + #if IS_ENABLED(CONFIG_IPV6) 669 683 670 684 spin_lock_init(&dev_list_lock); 671 685 672 686 recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, 673 - htons(ROCE_V2_UDP_DPORT), true); 687 + htons(ROCE_V2_UDP_DPORT), true); 674 688 if (IS_ERR(recv_sockets.sk6)) { 675 689 recv_sockets.sk6 = NULL; 676 690 pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); 677 691 return -1; 678 692 } 679 - 680 - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, 681 - htons(ROCE_V2_UDP_DPORT), false); 682 - if (IS_ERR(recv_sockets.sk4)) { 683 - rxe_release_udp_tunnel(recv_sockets.sk6); 684 - recv_sockets.sk4 = NULL; 685 - recv_sockets.sk6 = NULL; 686 - pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); 687 - return -1; 688 - } 689 - 690 - err = register_netdevice_notifier(&rxe_net_notifier); 691 - if (err) { 692 - rxe_release_udp_tunnel(recv_sockets.sk6); 693 - rxe_release_udp_tunnel(recv_sockets.sk4); 694 - pr_err("rxe: Failed to rigister netdev notifier\n"); 695 - } 696 - 697 - return err; 693 + #endif 694 + return 0; 698 695 } 699 696 700 697 void rxe_net_exit(void) 701 698 { 702 - if (recv_sockets.sk6) 703 - rxe_release_udp_tunnel(recv_sockets.sk6); 704 - 705 - if (recv_sockets.sk4) 706 - rxe_release_udp_tunnel(recv_sockets.sk4); 707 - 699 + rxe_release_udp_tunnel(recv_sockets.sk6); 700 + rxe_release_udp_tunnel(recv_sockets.sk4); 708 701 unregister_netdevice_notifier(&rxe_net_notifier); 709 702 }
+4 -1
drivers/infiniband/sw/rxe/rxe_net.h
··· 44 44 }; 45 45 46 46 extern struct rxe_recv_sockets recv_sockets; 47 + extern struct notifier_block rxe_net_notifier; 48 + void rxe_release_udp_tunnel(struct socket *sk); 47 49 48 50 struct rxe_dev *rxe_net_add(struct net_device *ndev); 49 51 50 - int rxe_net_init(void); 52 + int rxe_net_ipv4_init(void); 53 + int rxe_net_ipv6_init(void); 51 54 void rxe_net_exit(void); 52 55 53 56 #endif /* RXE_NET_H */
+1 -1
drivers/infiniband/sw/rxe/rxe_recv.c
··· 312 312 * make a copy of the skb to post to the next qp 313 313 */ 314 314 skb_copy = (mce->qp_list.next != &mcg->qp_list) ? 315 - skb_clone(skb, GFP_KERNEL) : NULL; 315 + skb_clone(skb, GFP_ATOMIC) : NULL; 316 316 317 317 pkt->qp = qp; 318 318 rxe_add_ref(qp);
+44 -13
drivers/infiniband/sw/rxe/rxe_req.c
··· 511 511 } 512 512 513 513 static void update_wqe_state(struct rxe_qp *qp, 514 - struct rxe_send_wqe *wqe, 515 - struct rxe_pkt_info *pkt, 516 - enum wqe_state *prev_state) 514 + struct rxe_send_wqe *wqe, 515 + struct rxe_pkt_info *pkt) 517 516 { 518 - enum wqe_state prev_state_ = wqe->state; 519 - 520 517 if (pkt->mask & RXE_END_MASK) { 521 518 if (qp_type(qp) == IB_QPT_RC) 522 519 wqe->state = wqe_state_pending; 523 520 } else { 524 521 wqe->state = wqe_state_processing; 525 522 } 526 - 527 - *prev_state = prev_state_; 528 523 } 529 524 530 - static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 531 - struct rxe_pkt_info *pkt, int payload) 525 + static void update_wqe_psn(struct rxe_qp *qp, 526 + struct rxe_send_wqe *wqe, 527 + struct rxe_pkt_info *pkt, 528 + int payload) 532 529 { 533 530 /* number of packets left to send including current one */ 534 531 int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; ··· 543 546 qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; 544 547 else 545 548 qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; 549 + } 546 550 551 + static void save_state(struct rxe_send_wqe *wqe, 552 + struct rxe_qp *qp, 553 + struct rxe_send_wqe *rollback_wqe, 554 + struct rxe_qp *rollback_qp) 555 + { 556 + rollback_wqe->state = wqe->state; 557 + rollback_wqe->first_psn = wqe->first_psn; 558 + rollback_wqe->last_psn = wqe->last_psn; 559 + rollback_qp->req.psn = qp->req.psn; 560 + } 561 + 562 + static void rollback_state(struct rxe_send_wqe *wqe, 563 + struct rxe_qp *qp, 564 + struct rxe_send_wqe *rollback_wqe, 565 + struct rxe_qp *rollback_qp) 566 + { 567 + wqe->state = rollback_wqe->state; 568 + wqe->first_psn = rollback_wqe->first_psn; 569 + wqe->last_psn = rollback_wqe->last_psn; 570 + qp->req.psn = rollback_qp->req.psn; 571 + } 572 + 573 + static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 574 + struct rxe_pkt_info *pkt, int payload) 575 + { 547 576 qp->req.opcode = pkt->opcode; 548 - 549 577 550 578 if (pkt->mask & RXE_END_MASK) 551 579 qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); ··· 593 571 int mtu; 594 572 int opcode; 595 573 int ret; 596 - enum wqe_state prev_state; 574 + struct rxe_qp rollback_qp; 575 + struct rxe_send_wqe rollback_wqe; 597 576 598 577 next_wqe: 599 578 if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) ··· 711 688 goto err; 712 689 } 713 690 714 - update_wqe_state(qp, wqe, &pkt, &prev_state); 691 + /* 692 + * To prevent a race on wqe access between requester and completer, 693 + * wqe members state and psn need to be set before calling 694 + * rxe_xmit_packet(). 695 + * Otherwise, completer might initiate an unjustified retry flow. 696 + */ 697 + save_state(wqe, qp, &rollback_wqe, &rollback_qp); 698 + update_wqe_state(qp, wqe, &pkt); 699 + update_wqe_psn(qp, wqe, &pkt, payload); 715 700 ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); 716 701 if (ret) { 717 702 qp->need_req_skb = 1; 718 703 kfree_skb(skb); 719 704 720 - wqe->state = prev_state; 705 + rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); 721 706 722 707 if (ret == -EAGAIN) { 723 708 rxe_run_task(&qp->req.task, 1);
+6 -5
drivers/infiniband/sw/rxe/rxe_resp.c
··· 972 972 free_rd_atomic_resource(qp, res); 973 973 rxe_advance_resp_resource(qp); 974 974 975 + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); 976 + 975 977 res->type = RXE_ATOMIC_MASK; 976 978 res->atomic.skb = skb; 977 - res->first_psn = qp->resp.psn; 978 - res->last_psn = qp->resp.psn; 979 - res->cur_psn = qp->resp.psn; 979 + res->first_psn = ack_pkt.psn; 980 + res->last_psn = ack_pkt.psn; 981 + res->cur_psn = ack_pkt.psn; 980 982 981 983 rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); 982 984 if (rc) { ··· 1118 1116 rc = RESPST_CLEANUP; 1119 1117 goto out; 1120 1118 } 1121 - bth_set_psn(SKB_TO_PKT(skb_copy), 1122 - qp->resp.psn - 1); 1119 + 1123 1120 /* Resend the result. */ 1124 1121 rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, 1125 1122 pkt, skb_copy);
+9
drivers/infiniband/ulp/ipoib/ipoib_ib.c
··· 1161 1161 } 1162 1162 1163 1163 if (level == IPOIB_FLUSH_LIGHT) { 1164 + int oper_up; 1164 1165 ipoib_mark_paths_invalid(dev); 1166 + /* Set IPoIB operation as down to prevent races between: 1167 + * the flush flow which leaves MCG and on the fly joins 1168 + * which can happen during that time. mcast restart task 1169 + * should deal with join requests we missed. 1170 + */ 1171 + oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 1165 1172 ipoib_mcast_dev_flush(dev); 1173 + if (oper_up) 1174 + set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 1166 1175 ipoib_flush_ah(dev); 1167 1176 } 1168 1177