Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
"Seveal fixes scattered across the drivers and a few new features:

- Minor updates and bug fixes to hfi1, efa, iopob, bnxt, hns

- Force disassociate the userspace FD when hns does an async reset

- bnxt new features for optimized modify QP to skip certain stayes,
CQ coalescing, better debug dumping

- mlx5 new data placement ordering feature

- Faster destruction of mlx5 devx HW objects

- Improvements to RDMA CM mad handling"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (51 commits)
RDMA/bnxt_re: Correct the sequence of device suspend
RDMA/bnxt_re: Use the default mode of congestion control
RDMA/bnxt_re: Support different traffic class
IB/cm: Rework sending DREQ when destroying a cm_id
IB/cm: Do not hold reference on cm_id unless needed
IB/cm: Explicitly mark if a response MAD is a retransmission
RDMA/mlx5: Move events notifier registration to be after device registration
RDMA/bnxt_re: Cache MSIx info to a local structure
RDMA/bnxt_re: Refurbish CQ to NQ hash calculation
RDMA/bnxt_re: Refactor NQ allocation
RDMA/bnxt_re: Fail probe early when not enough MSI-x vectors are reserved
RDMA/hns: Fix different dgids mapping to the same dip_idx
RDMA/bnxt_re: Add set_func_resources support for P5/P7 adapters
RDMA/bnxt_re: Enhance RoCE SRIOV resource configuration design
bnxt_en: Add support for RoCE sriov configuration
RDMA/hns: Fix NULL pointer derefernce in hns_roce_map_mr_sg()
RDMA/hns: Fix out-of-order issue of requester when setting FENCE
RDMA/nldev: Add IB device and net device rename events
RDMA/mlx5: Add implementation for ufile_hw_cleanup device operation
RDMA/core: Move ib_uverbs_file struct to uverbs_types.h
...

+1989 -511
+98 -94
drivers/infiniband/core/cm.c
··· 35 35 MODULE_LICENSE("Dual BSD/GPL"); 36 36 37 37 #define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */ 38 + #define CM_DIRECT_RETRY_CTX ((void *) 1UL) 39 + 38 40 static const char * const ibcm_rej_reason_strs[] = { 39 41 [IB_CM_REJ_NO_QP] = "no QP", 40 42 [IB_CM_REJ_NO_EEC] = "no EEC", ··· 95 93 struct cm_work *work); 96 94 static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, 97 95 struct ib_cm_sidr_rep_param *param); 98 - static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, 99 - const void *private_data, u8 private_data_len); 96 + static void cm_issue_dreq(struct cm_id_private *cm_id_priv); 100 97 static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, 101 98 void *private_data, u8 private_data_len); 102 99 static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, ··· 308 307 goto out; 309 308 } 310 309 311 - /* Timeout set by caller if response is expected. */ 312 310 m->ah = ah; 313 - m->retries = cm_id_priv->max_cm_retries; 314 - 315 - refcount_inc(&cm_id_priv->refcount); 316 - m->context[0] = cm_id_priv; 317 311 318 312 out: 319 313 spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); ··· 317 321 318 322 static void cm_free_msg(struct ib_mad_send_buf *msg) 319 323 { 320 - struct cm_id_private *cm_id_priv = msg->context[0]; 321 - 322 324 if (msg->ah) 323 325 rdma_destroy_ah(msg->ah, 0); 324 - cm_deref_id(cm_id_priv); 325 326 ib_free_send_mad(msg); 326 327 } 327 328 328 329 static struct ib_mad_send_buf * 329 - cm_alloc_priv_msg(struct cm_id_private *cm_id_priv) 330 + cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state) 330 331 { 331 332 struct ib_mad_send_buf *msg; 332 333 ··· 332 339 msg = cm_alloc_msg(cm_id_priv); 333 340 if (IS_ERR(msg)) 334 341 return msg; 342 + 335 343 cm_id_priv->msg = msg; 344 + refcount_inc(&cm_id_priv->refcount); 345 + msg->context[0] = cm_id_priv; 346 + msg->context[1] = (void *) (unsigned long) state; 347 + 348 + msg->retries = cm_id_priv->max_cm_retries; 349 + msg->timeout_ms = cm_id_priv->timeout_ms; 350 + 336 351 return msg; 337 352 } 338 353 ··· 359 358 ib_free_send_mad(msg); 360 359 } 361 360 362 - static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port, 363 - struct ib_mad_recv_wc *mad_recv_wc) 361 + static struct ib_mad_send_buf * 362 + cm_alloc_response_msg_no_ah(struct cm_port *port, 363 + struct ib_mad_recv_wc *mad_recv_wc, 364 + bool direct_retry) 364 365 { 365 - return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 366 - 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, 367 - GFP_ATOMIC, 368 - IB_MGMT_BASE_VERSION); 366 + struct ib_mad_send_buf *m; 367 + 368 + m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 369 + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, 370 + GFP_ATOMIC, IB_MGMT_BASE_VERSION); 371 + if (!IS_ERR(m)) 372 + m->context[0] = direct_retry ? CM_DIRECT_RETRY_CTX : NULL; 373 + 374 + return m; 369 375 } 370 376 371 377 static int cm_create_response_msg_ah(struct cm_port *port, ··· 392 384 393 385 static int cm_alloc_response_msg(struct cm_port *port, 394 386 struct ib_mad_recv_wc *mad_recv_wc, 387 + bool direct_retry, 395 388 struct ib_mad_send_buf **msg) 396 389 { 397 390 struct ib_mad_send_buf *m; 398 391 int ret; 399 392 400 - m = cm_alloc_response_msg_no_ah(port, mad_recv_wc); 393 + m = cm_alloc_response_msg_no_ah(port, mad_recv_wc, direct_retry); 401 394 if (IS_ERR(m)) 402 395 return PTR_ERR(m); 403 396 ··· 410 401 411 402 *msg = m; 412 403 return 0; 413 - } 414 - 415 - static void cm_free_response_msg(struct ib_mad_send_buf *msg) 416 - { 417 - if (msg->ah) 418 - rdma_destroy_ah(msg->ah, 0); 419 - ib_free_send_mad(msg); 420 404 } 421 405 422 406 static void *cm_copy_private_data(const void *private_data, u8 private_data_len) ··· 1111 1109 cm_id->state = IB_CM_IDLE; 1112 1110 break; 1113 1111 } 1114 - cm_send_dreq_locked(cm_id_priv, NULL, 0); 1112 + cm_issue_dreq(cm_id_priv); 1113 + cm_enter_timewait(cm_id_priv); 1115 1114 goto retest; 1116 1115 case IB_CM_DREQ_SENT: 1117 1116 ib_cancel_mad(cm_id_priv->msg); ··· 1560 1557 if (param->alternate_path) 1561 1558 cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); 1562 1559 1563 - msg = cm_alloc_priv_msg(cm_id_priv); 1560 + msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REQ_SENT); 1564 1561 if (IS_ERR(msg)) { 1565 1562 ret = PTR_ERR(msg); 1566 1563 goto out_unlock; ··· 1569 1566 req_msg = (struct cm_req_msg *)msg->mad; 1570 1567 cm_format_req(req_msg, cm_id_priv, param); 1571 1568 cm_id_priv->tid = req_msg->hdr.tid; 1572 - msg->timeout_ms = cm_id_priv->timeout_ms; 1573 - msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT; 1574 1569 1575 1570 cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); 1576 1571 cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); ··· 1599 1598 struct cm_rej_msg *rej_msg, *rcv_msg; 1600 1599 int ret; 1601 1600 1602 - ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); 1601 + ret = cm_alloc_response_msg(port, mad_recv_wc, false, &msg); 1603 1602 if (ret) 1604 1603 return ret; 1605 1604 ··· 1625 1624 IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); 1626 1625 ret = ib_post_send_mad(msg, NULL); 1627 1626 if (ret) 1628 - cm_free_response_msg(msg); 1627 + cm_free_msg(msg); 1629 1628 1630 1629 return ret; 1631 1630 } ··· 1952 1951 } 1953 1952 spin_unlock_irq(&cm_id_priv->lock); 1954 1953 1955 - ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); 1954 + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg); 1956 1955 if (ret) 1957 1956 return; 1958 1957 ··· 1981 1980 return; 1982 1981 1983 1982 unlock: spin_unlock_irq(&cm_id_priv->lock); 1984 - free: cm_free_response_msg(msg); 1983 + free: cm_free_msg(msg); 1985 1984 } 1986 1985 1987 1986 static struct cm_id_private *cm_match_req(struct cm_work *work, ··· 2295 2294 goto out; 2296 2295 } 2297 2296 2298 - msg = cm_alloc_priv_msg(cm_id_priv); 2297 + msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REP_SENT); 2299 2298 if (IS_ERR(msg)) { 2300 2299 ret = PTR_ERR(msg); 2301 2300 goto out; ··· 2303 2302 2304 2303 rep_msg = (struct cm_rep_msg *) msg->mad; 2305 2304 cm_format_rep(rep_msg, cm_id_priv, param); 2306 - msg->timeout_ms = cm_id_priv->timeout_ms; 2307 - msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; 2308 2305 2309 2306 trace_icm_send_rep(cm_id); 2310 2307 ret = ib_post_send_mad(msg, NULL); ··· 2443 2444 2444 2445 atomic_long_inc( 2445 2446 &work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]); 2446 - ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); 2447 + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg); 2447 2448 if (ret) 2448 2449 goto deref; 2449 2450 ··· 2468 2469 goto deref; 2469 2470 2470 2471 unlock: spin_unlock_irq(&cm_id_priv->lock); 2471 - free: cm_free_response_msg(msg); 2472 + free: cm_free_msg(msg); 2472 2473 deref: cm_deref_id(cm_id_priv); 2473 2474 } 2474 2475 ··· 2652 2653 private_data_len); 2653 2654 } 2654 2655 2655 - static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, 2656 - const void *private_data, u8 private_data_len) 2656 + static void cm_issue_dreq(struct cm_id_private *cm_id_priv) 2657 2657 { 2658 2658 struct ib_mad_send_buf *msg; 2659 2659 int ret; 2660 2660 2661 2661 lockdep_assert_held(&cm_id_priv->lock); 2662 2662 2663 - if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) 2664 - return -EINVAL; 2663 + msg = cm_alloc_msg(cm_id_priv); 2664 + if (IS_ERR(msg)) 2665 + return; 2665 2666 2666 - if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { 2667 - trace_icm_dreq_skipped(&cm_id_priv->id); 2668 - return -EINVAL; 2669 - } 2670 - 2671 - if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || 2672 - cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) 2673 - ib_cancel_mad(cm_id_priv->msg); 2674 - 2675 - msg = cm_alloc_priv_msg(cm_id_priv); 2676 - if (IS_ERR(msg)) { 2677 - cm_enter_timewait(cm_id_priv); 2678 - return PTR_ERR(msg); 2679 - } 2680 - 2681 - cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, 2682 - private_data, private_data_len); 2683 - msg->timeout_ms = cm_id_priv->timeout_ms; 2684 - msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; 2667 + cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, NULL, 0); 2685 2668 2686 2669 trace_icm_send_dreq(&cm_id_priv->id); 2687 2670 ret = ib_post_send_mad(msg, NULL); 2688 - if (ret) { 2689 - cm_enter_timewait(cm_id_priv); 2690 - cm_free_priv_msg(msg); 2691 - return ret; 2692 - } 2693 - 2694 - cm_id_priv->id.state = IB_CM_DREQ_SENT; 2695 - return 0; 2671 + if (ret) 2672 + cm_free_msg(msg); 2696 2673 } 2697 2674 2698 2675 int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, ··· 2676 2701 { 2677 2702 struct cm_id_private *cm_id_priv = 2678 2703 container_of(cm_id, struct cm_id_private, id); 2704 + struct ib_mad_send_buf *msg; 2679 2705 unsigned long flags; 2680 2706 int ret; 2681 2707 2708 + if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) 2709 + return -EINVAL; 2710 + 2682 2711 spin_lock_irqsave(&cm_id_priv->lock, flags); 2683 - ret = cm_send_dreq_locked(cm_id_priv, private_data, private_data_len); 2712 + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { 2713 + trace_icm_dreq_skipped(&cm_id_priv->id); 2714 + ret = -EINVAL; 2715 + goto unlock; 2716 + } 2717 + 2718 + if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || 2719 + cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) 2720 + ib_cancel_mad(cm_id_priv->msg); 2721 + 2722 + msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_DREQ_SENT); 2723 + if (IS_ERR(msg)) { 2724 + cm_enter_timewait(cm_id_priv); 2725 + ret = PTR_ERR(msg); 2726 + goto unlock; 2727 + } 2728 + 2729 + cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, 2730 + private_data, private_data_len); 2731 + 2732 + trace_icm_send_dreq(&cm_id_priv->id); 2733 + ret = ib_post_send_mad(msg, NULL); 2734 + if (ret) { 2735 + cm_enter_timewait(cm_id_priv); 2736 + cm_free_priv_msg(msg); 2737 + goto unlock; 2738 + } 2739 + 2740 + cm_id_priv->id.state = IB_CM_DREQ_SENT; 2741 + unlock: 2684 2742 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2685 2743 return ret; 2686 2744 } ··· 2799 2791 struct cm_drep_msg *drep_msg; 2800 2792 int ret; 2801 2793 2802 - ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); 2794 + ret = cm_alloc_response_msg(port, mad_recv_wc, true, &msg); 2803 2795 if (ret) 2804 2796 return ret; 2805 2797 ··· 2817 2809 IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); 2818 2810 ret = ib_post_send_mad(msg, NULL); 2819 2811 if (ret) 2820 - cm_free_response_msg(msg); 2812 + cm_free_msg(msg); 2821 2813 2822 2814 return ret; 2823 2815 } ··· 2864 2856 case IB_CM_TIMEWAIT: 2865 2857 atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] 2866 2858 [CM_DREQ_COUNTER]); 2867 - msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc); 2859 + msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc, 2860 + true); 2868 2861 if (IS_ERR(msg)) 2869 2862 goto unlock; 2870 2863 ··· 2876 2867 2877 2868 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || 2878 2869 ib_post_send_mad(msg, NULL)) 2879 - cm_free_response_msg(msg); 2870 + cm_free_msg(msg); 2880 2871 goto deref; 2881 2872 case IB_CM_DREQ_RCVD: 2882 2873 atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] ··· 3370 3361 case IB_CM_MRA_LAP_SENT: 3371 3362 atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] 3372 3363 [CM_LAP_COUNTER]); 3373 - msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc); 3364 + msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc, 3365 + true); 3374 3366 if (IS_ERR(msg)) 3375 3367 goto unlock; 3376 3368 ··· 3384 3374 3385 3375 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || 3386 3376 ib_post_send_mad(msg, NULL)) 3387 - cm_free_response_msg(msg); 3377 + cm_free_msg(msg); 3388 3378 goto deref; 3389 3379 case IB_CM_LAP_RCVD: 3390 3380 atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] ··· 3523 3513 goto out_unlock; 3524 3514 } 3525 3515 3526 - msg = cm_alloc_priv_msg(cm_id_priv); 3516 + msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_SIDR_REQ_SENT); 3527 3517 if (IS_ERR(msg)) { 3528 3518 ret = PTR_ERR(msg); 3529 3519 goto out_unlock; ··· 3531 3521 3532 3522 cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv, 3533 3523 param); 3534 - msg->timeout_ms = cm_id_priv->timeout_ms; 3535 - msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT; 3536 3524 3537 3525 trace_icm_send_sidr_req(&cm_id_priv->id); 3538 3526 ret = ib_post_send_mad(msg, NULL); ··· 3776 3768 3777 3769 static void cm_process_send_error(struct cm_id_private *cm_id_priv, 3778 3770 struct ib_mad_send_buf *msg, 3779 - enum ib_cm_state state, 3780 3771 enum ib_wc_status wc_status) 3781 3772 { 3773 + enum ib_cm_state state = (unsigned long) msg->context[1]; 3782 3774 struct ib_cm_event cm_event = {}; 3783 3775 int ret; 3784 3776 3785 - /* Discard old sends or ones without a response. */ 3777 + /* Discard old sends. */ 3786 3778 spin_lock_irq(&cm_id_priv->lock); 3787 3779 if (msg != cm_id_priv->msg) { 3788 3780 spin_unlock_irq(&cm_id_priv->lock); 3789 - cm_free_msg(msg); 3781 + cm_free_priv_msg(msg); 3790 3782 return; 3791 3783 } 3792 3784 cm_free_priv_msg(msg); ··· 3834 3826 struct ib_mad_send_wc *mad_send_wc) 3835 3827 { 3836 3828 struct ib_mad_send_buf *msg = mad_send_wc->send_buf; 3837 - struct cm_id_private *cm_id_priv = msg->context[0]; 3838 - enum ib_cm_state state = 3839 - (enum ib_cm_state)(unsigned long)msg->context[1]; 3829 + struct cm_id_private *cm_id_priv; 3840 3830 struct cm_port *port; 3841 3831 u16 attr_index; 3842 3832 ··· 3842 3836 attr_index = be16_to_cpu(((struct ib_mad_hdr *) 3843 3837 msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; 3844 3838 3845 - /* 3846 - * If the send was in response to a received message (context[0] is not 3847 - * set to a cm_id), and is not a REJ, then it is a send that was 3848 - * manually retried. 3849 - */ 3850 - if (!cm_id_priv && (attr_index != CM_REJ_COUNTER)) 3839 + if (msg->context[0] == CM_DIRECT_RETRY_CTX) { 3851 3840 msg->retries = 1; 3841 + cm_id_priv = NULL; 3842 + } else { 3843 + cm_id_priv = msg->context[0]; 3844 + } 3852 3845 3853 3846 atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]); 3854 3847 if (msg->retries) ··· 3855 3850 &port->counters[CM_XMIT_RETRIES][attr_index]); 3856 3851 3857 3852 if (cm_id_priv) 3858 - cm_process_send_error(cm_id_priv, msg, state, 3859 - mad_send_wc->status); 3853 + cm_process_send_error(cm_id_priv, msg, mad_send_wc->status); 3860 3854 else 3861 - cm_free_response_msg(msg); 3855 + cm_free_msg(msg); 3862 3856 } 3863 3857 3864 3858 static void cm_work_handler(struct work_struct *_work)
+39
drivers/infiniband/core/device.c
··· 437 437 client->rename(ibdev, client_data); 438 438 } 439 439 up_read(&ibdev->client_data_rwsem); 440 + rdma_nl_notify_event(ibdev, 0, RDMA_RENAME_EVENT); 440 441 up_read(&devices_rwsem); 441 442 return 0; 442 443 } ··· 2760 2759 SET_DEVICE_OP(dev_ops, resize_cq); 2761 2760 SET_DEVICE_OP(dev_ops, set_vf_guid); 2762 2761 SET_DEVICE_OP(dev_ops, set_vf_link_state); 2762 + SET_DEVICE_OP(dev_ops, ufile_hw_cleanup); 2763 2763 2764 2764 SET_OBJ_SIZE(dev_ops, ib_ah); 2765 2765 SET_OBJ_SIZE(dev_ops, ib_counters); ··· 2854 2852 }, 2855 2853 }; 2856 2854 2855 + static int ib_netdevice_event(struct notifier_block *this, 2856 + unsigned long event, void *ptr) 2857 + { 2858 + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 2859 + struct net_device *ib_ndev; 2860 + struct ib_device *ibdev; 2861 + u32 port; 2862 + 2863 + switch (event) { 2864 + case NETDEV_CHANGENAME: 2865 + ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN); 2866 + if (!ibdev) 2867 + return NOTIFY_DONE; 2868 + 2869 + rdma_for_each_port(ibdev, port) { 2870 + ib_ndev = ib_device_get_netdev(ibdev, port); 2871 + if (ndev == ib_ndev) 2872 + rdma_nl_notify_event(ibdev, port, 2873 + RDMA_NETDEV_RENAME_EVENT); 2874 + dev_put(ib_ndev); 2875 + } 2876 + ib_device_put(ibdev); 2877 + break; 2878 + default: 2879 + break; 2880 + } 2881 + 2882 + return NOTIFY_DONE; 2883 + } 2884 + 2885 + static struct notifier_block nb_netdevice = { 2886 + .notifier_call = ib_netdevice_event, 2887 + }; 2888 + 2857 2889 static int __init ib_core_init(void) 2858 2890 { 2859 2891 int ret = -ENOMEM; ··· 2959 2923 goto err_parent; 2960 2924 } 2961 2925 2926 + register_netdevice_notifier(&nb_netdevice); 2927 + 2962 2928 return 0; 2963 2929 2964 2930 err_parent: ··· 2990 2952 2991 2953 static void __exit ib_core_cleanup(void) 2992 2954 { 2955 + unregister_netdevice_notifier(&nb_netdevice); 2993 2956 roce_gid_mgmt_cleanup(); 2994 2957 rdma_nl_unregister(RDMA_NL_LS); 2995 2958 nldev_exit();
+38 -2
drivers/infiniband/core/nldev.c
··· 2729 2729 }, 2730 2730 }; 2731 2731 2732 + static int fill_mon_netdev_rename(struct sk_buff *msg, 2733 + struct ib_device *device, u32 port, 2734 + const struct net *net) 2735 + { 2736 + struct net_device *netdev = ib_device_get_netdev(device, port); 2737 + int ret = 0; 2738 + 2739 + if (!netdev || !net_eq(dev_net(netdev), net)) 2740 + goto out; 2741 + 2742 + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 2743 + if (ret) 2744 + goto out; 2745 + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 2746 + out: 2747 + dev_put(netdev); 2748 + return ret; 2749 + } 2750 + 2732 2751 static int fill_mon_netdev_association(struct sk_buff *msg, 2733 2752 struct ib_device *device, u32 port, 2734 2753 const struct net *net) ··· 2812 2793 "Failed to send RDMA monitor netdev detach event: port %d\n", 2813 2794 port_num); 2814 2795 break; 2796 + case RDMA_RENAME_EVENT: 2797 + dev_warn_ratelimited(&device->dev, 2798 + "Failed to send RDMA monitor rename device event\n"); 2799 + break; 2800 + 2801 + case RDMA_NETDEV_RENAME_EVENT: 2802 + netdev = ib_device_get_netdev(device, port_num); 2803 + dev_warn_ratelimited(&device->dev, 2804 + "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n", 2805 + port_num, netdev->ifindex); 2806 + dev_put(netdev); 2807 + break; 2815 2808 default: 2816 2809 break; 2817 2810 } ··· 2853 2822 switch (type) { 2854 2823 case RDMA_REGISTER_EVENT: 2855 2824 case RDMA_UNREGISTER_EVENT: 2825 + case RDMA_RENAME_EVENT: 2856 2826 ret = fill_nldev_handle(skb, device); 2857 2827 if (ret) 2858 2828 goto err_free; 2859 2829 break; 2860 2830 case RDMA_NETDEV_ATTACH_EVENT: 2861 2831 case RDMA_NETDEV_DETACH_EVENT: 2862 - ret = fill_mon_netdev_association(skb, device, 2863 - port_num, net); 2832 + ret = fill_mon_netdev_association(skb, device, port_num, net); 2833 + if (ret) 2834 + goto err_free; 2835 + break; 2836 + case RDMA_NETDEV_RENAME_EVENT: 2837 + ret = fill_mon_netdev_rename(skb, device, port_num, net); 2864 2838 if (ret) 2865 2839 goto err_free; 2866 2840 break;
+9 -3
drivers/infiniband/core/rdma_core.c
··· 58 58 } 59 59 EXPORT_SYMBOL(uverbs_uobject_put); 60 60 61 - static int uverbs_try_lock_object(struct ib_uobject *uobj, 62 - enum rdma_lookup_mode mode) 61 + int uverbs_try_lock_object(struct ib_uobject *uobj, 62 + enum rdma_lookup_mode mode) 63 63 { 64 64 /* 65 65 * When a shared access is required, we use a positive counter. Each ··· 84 84 } 85 85 return 0; 86 86 } 87 + EXPORT_SYMBOL(uverbs_try_lock_object); 87 88 88 89 static void assert_uverbs_usecnt(struct ib_uobject *uobj, 89 90 enum rdma_lookup_mode mode) ··· 881 880 static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, 882 881 enum rdma_remove_reason reason) 883 882 { 883 + struct uverbs_attr_bundle attrs = { .ufile = ufile }; 884 + struct ib_ucontext *ucontext = ufile->ucontext; 885 + struct ib_device *ib_dev = ucontext->device; 884 886 struct ib_uobject *obj, *next_obj; 885 887 int ret = -EINVAL; 886 - struct uverbs_attr_bundle attrs = { .ufile = ufile }; 888 + 889 + if (ib_dev->ops.ufile_hw_cleanup) 890 + ib_dev->ops.ufile_hw_cleanup(ufile); 887 891 888 892 /* 889 893 * This shouldn't run while executing other commands on this
+26 -4
drivers/infiniband/core/roce_gid_mgmt.c
··· 515 515 } 516 516 EXPORT_SYMBOL(rdma_roce_rescan_device); 517 517 518 + /** 519 + * rdma_roce_rescan_port - Rescan all of the network devices in the system 520 + * and add their gids if relevant to the port of the RoCE device. 521 + * 522 + * @ib_dev: IB device 523 + * @port: Port number 524 + */ 525 + void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port) 526 + { 527 + struct net_device *ndev = NULL; 528 + 529 + if (rdma_protocol_roce(ib_dev, port)) { 530 + ndev = ib_device_get_netdev(ib_dev, port); 531 + if (!ndev) 532 + return; 533 + enum_all_gids_of_dev_cb(ib_dev, port, ndev, ndev); 534 + dev_put(ndev); 535 + } 536 + } 537 + EXPORT_SYMBOL(rdma_roce_rescan_port); 538 + 518 539 static void callback_for_addr_gid_device_scan(struct ib_device *device, 519 540 u32 port, 520 541 struct net_device *rdma_ndev, ··· 596 575 } 597 576 } 598 577 599 - static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port, 600 - struct net_device *event_ndev) 578 + void roce_del_all_netdev_gids(struct ib_device *ib_dev, 579 + u32 port, struct net_device *ndev) 601 580 { 602 - ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev); 581 + ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev); 603 582 } 583 + EXPORT_SYMBOL(roce_del_all_netdev_gids); 604 584 605 585 static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port, 606 586 struct net_device *rdma_ndev, void *cookie) 607 587 { 608 - handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids); 588 + handle_netdev_upper(ib_dev, port, cookie, roce_del_all_netdev_gids); 609 589 } 610 590 611 591 static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
-29
drivers/infiniband/core/uverbs.h
··· 133 133 struct ib_uverbs_event_queue ev_queue; 134 134 }; 135 135 136 - struct ib_uverbs_file { 137 - struct kref ref; 138 - struct ib_uverbs_device *device; 139 - struct mutex ucontext_lock; 140 - /* 141 - * ucontext must be accessed via ib_uverbs_get_ucontext() or with 142 - * ucontext_lock held 143 - */ 144 - struct ib_ucontext *ucontext; 145 - struct ib_uverbs_async_event_file *default_async_file; 146 - struct list_head list; 147 - 148 - /* 149 - * To access the uobjects list hw_destroy_rwsem must be held for write 150 - * OR hw_destroy_rwsem held for read AND uobjects_lock held. 151 - * hw_destroy_rwsem should be called across any destruction of the HW 152 - * object of an associated uobject. 153 - */ 154 - struct rw_semaphore hw_destroy_rwsem; 155 - spinlock_t uobjects_lock; 156 - struct list_head uobjects; 157 - 158 - struct mutex umap_lock; 159 - struct list_head umaps; 160 - struct page *disassociate_page; 161 - 162 - struct xarray idr; 163 - }; 164 - 165 136 struct ib_uverbs_event { 166 137 union { 167 138 struct ib_uverbs_async_event_desc async;
+41 -2
drivers/infiniband/core/uverbs_main.c
··· 76 76 static DEFINE_IDA(uverbs_ida); 77 77 static int ib_uverbs_add_one(struct ib_device *device); 78 78 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); 79 + static struct ib_client uverbs_client; 79 80 80 81 static char *uverbs_devnode(const struct device *dev, umode_t *mode) 81 82 { ··· 218 217 219 218 if (file->disassociate_page) 220 219 __free_pages(file->disassociate_page, 0); 220 + mutex_destroy(&file->disassociation_lock); 221 221 mutex_destroy(&file->umap_lock); 222 222 mutex_destroy(&file->ucontext_lock); 223 223 kfree(file); ··· 700 698 ret = PTR_ERR(ucontext); 701 699 goto out; 702 700 } 701 + 702 + mutex_lock(&file->disassociation_lock); 703 + 703 704 vma->vm_ops = &rdma_umap_ops; 704 705 ret = ucontext->device->ops.mmap(ucontext, vma); 706 + 707 + mutex_unlock(&file->disassociation_lock); 705 708 out: 706 709 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 707 710 return ret; ··· 728 721 /* We are racing with disassociation */ 729 722 if (!down_read_trylock(&ufile->hw_destroy_rwsem)) 730 723 goto out_zap; 724 + mutex_lock(&ufile->disassociation_lock); 725 + 731 726 /* 732 727 * Disassociation already completed, the VMA should already be zapped. 733 728 */ ··· 741 732 goto out_unlock; 742 733 rdma_umap_priv_init(priv, vma, opriv->entry); 743 734 735 + mutex_unlock(&ufile->disassociation_lock); 744 736 up_read(&ufile->hw_destroy_rwsem); 745 737 return; 746 738 747 739 out_unlock: 740 + mutex_unlock(&ufile->disassociation_lock); 748 741 up_read(&ufile->hw_destroy_rwsem); 749 742 out_zap: 750 743 /* ··· 830 819 { 831 820 struct rdma_umap_priv *priv, *next_priv; 832 821 833 - lockdep_assert_held(&ufile->hw_destroy_rwsem); 822 + mutex_lock(&ufile->disassociation_lock); 834 823 835 824 while (1) { 836 825 struct mm_struct *mm = NULL; ··· 856 845 break; 857 846 } 858 847 mutex_unlock(&ufile->umap_lock); 859 - if (!mm) 848 + if (!mm) { 849 + mutex_unlock(&ufile->disassociation_lock); 860 850 return; 851 + } 861 852 862 853 /* 863 854 * The umap_lock is nested under mmap_lock since it used within ··· 889 876 mmap_read_unlock(mm); 890 877 mmput(mm); 891 878 } 879 + 880 + mutex_unlock(&ufile->disassociation_lock); 892 881 } 882 + 883 + /** 884 + * rdma_user_mmap_disassociate() - Revoke mmaps for a device 885 + * @device: device to revoke 886 + * 887 + * This function should be called by drivers that need to disable mmaps for the 888 + * device, for instance because it is going to be reset. 889 + */ 890 + void rdma_user_mmap_disassociate(struct ib_device *device) 891 + { 892 + struct ib_uverbs_device *uverbs_dev = 893 + ib_get_client_data(device, &uverbs_client); 894 + struct ib_uverbs_file *ufile; 895 + 896 + mutex_lock(&uverbs_dev->lists_mutex); 897 + list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) { 898 + if (ufile->ucontext) 899 + uverbs_user_mmap_disassociate(ufile); 900 + } 901 + mutex_unlock(&uverbs_dev->lists_mutex); 902 + } 903 + EXPORT_SYMBOL(rdma_user_mmap_disassociate); 893 904 894 905 /* 895 906 * ib_uverbs_open() does not need the BKL: ··· 983 946 init_rwsem(&file->hw_destroy_rwsem); 984 947 mutex_init(&file->umap_lock); 985 948 INIT_LIST_HEAD(&file->umaps); 949 + 950 + mutex_init(&file->disassociation_lock); 986 951 987 952 filp->private_data = file; 988 953 list_add_tail(&file->list, &dev->uverbs_file_list);
+2 -1
drivers/infiniband/hw/bnxt_re/Makefile
··· 4 4 obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o 5 5 bnxt_re-y := main.o ib_verbs.o \ 6 6 qplib_res.o qplib_rcfw.o \ 7 - qplib_sp.o qplib_fp.o hw_counters.o 7 + qplib_sp.o qplib_fp.o hw_counters.o \ 8 + debugfs.o
+41 -6
drivers/infiniband/hw/bnxt_re/bnxt_re.h
··· 154 154 155 155 #define BNXT_RE_GRC_FIFO_REG_BASE 0x2000 156 156 157 + #define BNXT_RE_MIN_MSIX 2 158 + #define BNXT_RE_MAX_MSIX BNXT_MAX_ROCE_MSIX 159 + struct bnxt_re_nq_record { 160 + struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX]; 161 + struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; 162 + int num_msix; 163 + /* serialize NQ access */ 164 + struct mutex load_lock; 165 + }; 166 + 157 167 #define MAX_CQ_HASH_BITS (16) 158 168 #define MAX_SRQ_HASH_BITS (16) 169 + 170 + static inline bool bnxt_re_chip_gen_p7(u16 chip_num) 171 + { 172 + return (chip_num == CHIP_NUM_58818 || 173 + chip_num == CHIP_NUM_57608); 174 + } 175 + 159 176 struct bnxt_re_dev { 160 177 struct ib_device ibdev; 161 178 struct list_head list; ··· 191 174 unsigned int version, major, minor; 192 175 struct bnxt_qplib_chip_ctx *chip_ctx; 193 176 struct bnxt_en_dev *en_dev; 194 - int num_msix; 195 177 196 178 int id; 197 179 198 180 struct delayed_work worker; 199 181 u8 cur_prio_map; 200 182 201 - /* FP Notification Queue (CQ & SRQ) */ 202 - struct tasklet_struct nq_task; 203 - 204 183 /* RCFW Channel */ 205 184 struct bnxt_qplib_rcfw rcfw; 206 185 207 - /* NQ */ 208 - struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX]; 186 + /* NQ record */ 187 + struct bnxt_re_nq_record *nqr; 209 188 210 189 /* Device Resources */ 211 190 struct bnxt_qplib_dev_attr dev_attr; 212 191 struct bnxt_qplib_ctx qplib_ctx; 213 192 struct bnxt_qplib_res qplib_res; 214 193 struct bnxt_qplib_dpi dpi_privileged; 194 + struct bnxt_qplib_cq_coal_param cq_coalescing; 215 195 216 196 struct mutex qp_lock; /* protect qp list */ 217 197 struct list_head qp_list; ··· 227 213 struct delayed_work dbq_pacing_work; 228 214 DECLARE_HASHTABLE(cq_hash, MAX_CQ_HASH_BITS); 229 215 DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS); 216 + struct dentry *dbg_root; 217 + struct dentry *qp_debugfs; 230 218 }; 231 219 232 220 #define to_bnxt_re_dev(ptr, member) \ ··· 255 239 rdev->qplib_res.pacing_data->dev_err_state = 256 240 test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); 257 241 } 242 + 243 + static inline int bnxt_re_read_context_allowed(struct bnxt_re_dev *rdev) 244 + { 245 + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) || 246 + rdev->rcfw.res->cctx->hwrm_intf_ver < HWRM_VERSION_READ_CTX) 247 + return -EOPNOTSUPP; 248 + return 0; 249 + } 250 + 251 + #define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5 1088 252 + #define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5 128 253 + #define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5 128 254 + #define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5 192 255 + 256 + #define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 1088 257 + #define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 192 258 + #define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 192 259 + #define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 192 260 + 258 261 #endif
+138
drivers/infiniband/hw/bnxt_re/debugfs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause 2 + /* 3 + * Copyright (c) 2024, Broadcom. All rights reserved. The term 4 + * Broadcom refers to Broadcom Limited and/or its subsidiaries. 5 + * 6 + * Description: Debugfs component of the bnxt_re driver 7 + */ 8 + 9 + #include <linux/debugfs.h> 10 + #include <linux/pci.h> 11 + #include <rdma/ib_addr.h> 12 + 13 + #include "bnxt_ulp.h" 14 + #include "roce_hsi.h" 15 + #include "qplib_res.h" 16 + #include "qplib_sp.h" 17 + #include "qplib_fp.h" 18 + #include "qplib_rcfw.h" 19 + #include "bnxt_re.h" 20 + #include "ib_verbs.h" 21 + #include "debugfs.h" 22 + 23 + static struct dentry *bnxt_re_debugfs_root; 24 + 25 + static inline const char *bnxt_re_qp_state_str(u8 state) 26 + { 27 + switch (state) { 28 + case CMDQ_MODIFY_QP_NEW_STATE_RESET: 29 + return "RST"; 30 + case CMDQ_MODIFY_QP_NEW_STATE_INIT: 31 + return "INIT"; 32 + case CMDQ_MODIFY_QP_NEW_STATE_RTR: 33 + return "RTR"; 34 + case CMDQ_MODIFY_QP_NEW_STATE_RTS: 35 + return "RTS"; 36 + case CMDQ_MODIFY_QP_NEW_STATE_SQE: 37 + return "SQER"; 38 + case CMDQ_MODIFY_QP_NEW_STATE_SQD: 39 + return "SQD"; 40 + case CMDQ_MODIFY_QP_NEW_STATE_ERR: 41 + return "ERR"; 42 + default: 43 + return "Invalid QP state"; 44 + } 45 + } 46 + 47 + static inline const char *bnxt_re_qp_type_str(u8 type) 48 + { 49 + switch (type) { 50 + case CMDQ_CREATE_QP1_TYPE_GSI: return "QP1"; 51 + case CMDQ_CREATE_QP_TYPE_GSI: return "QP1"; 52 + case CMDQ_CREATE_QP_TYPE_RC: return "RC"; 53 + case CMDQ_CREATE_QP_TYPE_UD: return "UD"; 54 + case CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE: return "RAW_ETHERTYPE"; 55 + default: return "Invalid transport type"; 56 + } 57 + } 58 + 59 + static ssize_t qp_info_read(struct file *filep, 60 + char __user *buffer, 61 + size_t count, loff_t *ppos) 62 + { 63 + struct bnxt_re_qp *qp = filep->private_data; 64 + char *buf; 65 + int len; 66 + 67 + if (*ppos) 68 + return 0; 69 + 70 + buf = kasprintf(GFP_KERNEL, 71 + "QPN\t\t: %d\n" 72 + "transport\t: %s\n" 73 + "state\t\t: %s\n" 74 + "mtu\t\t: %d\n" 75 + "timeout\t\t: %d\n" 76 + "remote QPN\t: %d\n", 77 + qp->qplib_qp.id, 78 + bnxt_re_qp_type_str(qp->qplib_qp.type), 79 + bnxt_re_qp_state_str(qp->qplib_qp.state), 80 + qp->qplib_qp.mtu, 81 + qp->qplib_qp.timeout, 82 + qp->qplib_qp.dest_qpn); 83 + if (!buf) 84 + return -ENOMEM; 85 + if (count < strlen(buf)) { 86 + kfree(buf); 87 + return -ENOSPC; 88 + } 89 + len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); 90 + kfree(buf); 91 + return len; 92 + } 93 + 94 + static const struct file_operations debugfs_qp_fops = { 95 + .owner = THIS_MODULE, 96 + .open = simple_open, 97 + .read = qp_info_read, 98 + }; 99 + 100 + void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp) 101 + { 102 + char resn[32]; 103 + 104 + sprintf(resn, "0x%x", qp->qplib_qp.id); 105 + qp->dentry = debugfs_create_file(resn, 0400, rdev->qp_debugfs, qp, &debugfs_qp_fops); 106 + } 107 + 108 + void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp) 109 + { 110 + debugfs_remove(qp->dentry); 111 + } 112 + 113 + void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev) 114 + { 115 + struct pci_dev *pdev = rdev->en_dev->pdev; 116 + 117 + rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root); 118 + 119 + rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root); 120 + } 121 + 122 + void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev) 123 + { 124 + debugfs_remove_recursive(rdev->qp_debugfs); 125 + 126 + debugfs_remove_recursive(rdev->dbg_root); 127 + rdev->dbg_root = NULL; 128 + } 129 + 130 + void bnxt_re_register_debugfs(void) 131 + { 132 + bnxt_re_debugfs_root = debugfs_create_dir("bnxt_re", NULL); 133 + } 134 + 135 + void bnxt_re_unregister_debugfs(void) 136 + { 137 + debugfs_remove(bnxt_re_debugfs_root); 138 + }
+21
drivers/infiniband/hw/bnxt_re/debugfs.h
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause 2 + /* 3 + * Copyright (c) 2024, Broadcom. All rights reserved. The term 4 + * Broadcom refers to Broadcom Limited and/or its subsidiaries. 5 + * 6 + * Description: Debugfs header 7 + */ 8 + 9 + #ifndef __BNXT_RE_DEBUGFS__ 10 + #define __BNXT_RE_DEBUGFS__ 11 + 12 + void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp); 13 + void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp); 14 + 15 + void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev); 16 + void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev); 17 + 18 + void bnxt_re_register_debugfs(void); 19 + void bnxt_re_unregister_debugfs(void); 20 + 21 + #endif
+105 -25
drivers/infiniband/hw/bnxt_re/ib_verbs.c
··· 62 62 63 63 #include "bnxt_re.h" 64 64 #include "ib_verbs.h" 65 + #include "debugfs.h" 65 66 66 67 #include <rdma/uverbs_types.h> 67 68 #include <rdma/uverbs_std_types.h> ··· 95 94 return qflags; 96 95 }; 97 96 98 - static enum ib_access_flags __to_ib_access_flags(int qflags) 97 + static int __to_ib_access_flags(int qflags) 99 98 { 100 - enum ib_access_flags iflags = 0; 99 + int iflags = 0; 101 100 102 101 if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE) 103 102 iflags |= IB_ACCESS_LOCAL_WRITE; ··· 114 113 if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND) 115 114 iflags |= IB_ACCESS_ON_DEMAND; 116 115 return iflags; 117 - }; 116 + } 117 + 118 + static u8 __qp_access_flags_from_ib(struct bnxt_qplib_chip_ctx *cctx, int iflags) 119 + { 120 + u8 qflags = 0; 121 + 122 + if (!bnxt_qplib_is_chip_gen_p5_p7(cctx)) 123 + /* For Wh+ */ 124 + return (u8)__from_ib_access_flags(iflags); 125 + 126 + /* For P5, P7 and later chips */ 127 + if (iflags & IB_ACCESS_LOCAL_WRITE) 128 + qflags |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE; 129 + if (iflags & IB_ACCESS_REMOTE_WRITE) 130 + qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE; 131 + if (iflags & IB_ACCESS_REMOTE_READ) 132 + qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ; 133 + if (iflags & IB_ACCESS_REMOTE_ATOMIC) 134 + qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC; 135 + 136 + return qflags; 137 + } 138 + 139 + static int __qp_access_flags_to_ib(struct bnxt_qplib_chip_ctx *cctx, u8 qflags) 140 + { 141 + int iflags = 0; 142 + 143 + if (!bnxt_qplib_is_chip_gen_p5_p7(cctx)) 144 + /* For Wh+ */ 145 + return __to_ib_access_flags(qflags); 146 + 147 + /* For P5, P7 and later chips */ 148 + if (qflags & CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE) 149 + iflags |= IB_ACCESS_LOCAL_WRITE; 150 + if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE) 151 + iflags |= IB_ACCESS_REMOTE_WRITE; 152 + if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_READ) 153 + iflags |= IB_ACCESS_REMOTE_READ; 154 + if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC) 155 + iflags |= IB_ACCESS_REMOTE_ATOMIC; 156 + 157 + return iflags; 158 + } 118 159 119 160 static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev, 120 161 struct bnxt_qplib_mrw *qplib_mr) ··· 251 208 252 209 ib_attr->max_pkeys = 1; 253 210 ib_attr->local_ca_ack_delay = BNXT_RE_DEFAULT_ACK_DELAY; 211 + return 0; 212 + } 213 + 214 + int bnxt_re_modify_device(struct ib_device *ibdev, 215 + int device_modify_mask, 216 + struct ib_device_modify *device_modify) 217 + { 218 + ibdev_dbg(ibdev, "Modify device with mask 0x%x", device_modify_mask); 219 + 220 + if (device_modify_mask & ~IB_DEVICE_MODIFY_NODE_DESC) 221 + return -EOPNOTSUPP; 222 + 223 + if (!(device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)) 224 + return 0; 225 + 226 + memcpy(ibdev->node_desc, device_modify->node_desc, IB_DEVICE_NODE_DESC_MAX); 254 227 return 0; 255 228 } 256 229 ··· 998 939 else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD) 999 940 atomic_dec(&rdev->stats.res.ud_qp_count); 1000 941 942 + bnxt_re_debug_rem_qpinfo(rdev, qp); 943 + 1001 944 ib_umem_release(qp->rumem); 1002 945 ib_umem_release(qp->sumem); 1003 946 ··· 1683 1622 if (active_qps > rdev->stats.res.ud_qp_watermark) 1684 1623 rdev->stats.res.ud_qp_watermark = active_qps; 1685 1624 } 1625 + bnxt_re_debug_add_qpinfo(rdev, qp); 1686 1626 1687 1627 return 0; 1688 1628 qp_destroy: ··· 1876 1814 srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); 1877 1815 srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; 1878 1816 srq->srq_limit = srq_init_attr->attr.srq_limit; 1879 - srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; 1880 - nq = &rdev->nq[0]; 1817 + srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; 1818 + nq = &rdev->nqr->nq[0]; 1881 1819 1882 1820 if (udata) { 1883 1821 rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); ··· 2103 2041 if (qp_attr_mask & IB_QP_ACCESS_FLAGS) { 2104 2042 qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS; 2105 2043 qp->qplib_qp.access = 2106 - __from_ib_access_flags(qp_attr->qp_access_flags); 2044 + __qp_access_flags_from_ib(qp->qplib_qp.cctx, 2045 + qp_attr->qp_access_flags); 2107 2046 /* LOCAL_WRITE access must be set to allow RC receive */ 2108 - qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE; 2109 - /* Temp: Set all params on QP as of now */ 2110 - qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE; 2111 - qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ; 2047 + qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE; 2112 2048 } 2113 2049 if (qp_attr_mask & IB_QP_PKEY_INDEX) { 2114 2050 qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY; ··· 2140 2080 qp->qplib_qp.ah.sgid_index = ctx->idx; 2141 2081 qp->qplib_qp.ah.host_sgid_index = grh->sgid_index; 2142 2082 qp->qplib_qp.ah.hop_limit = grh->hop_limit; 2143 - qp->qplib_qp.ah.traffic_class = grh->traffic_class; 2083 + qp->qplib_qp.ah.traffic_class = grh->traffic_class >> 2; 2144 2084 qp->qplib_qp.ah.sl = rdma_ah_get_sl(&qp_attr->ah_attr); 2145 2085 ether_addr_copy(qp->qplib_qp.ah.dmac, 2146 2086 qp_attr->ah_attr.roce.dmac); ··· 2311 2251 qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); 2312 2252 qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); 2313 2253 qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; 2314 - qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); 2254 + qp_attr->qp_access_flags = __qp_access_flags_to_ib(qp->qplib_qp.cctx, 2255 + qplib_qp->access); 2315 2256 qp_attr->pkey_index = qplib_qp->pkey_index; 2316 2257 qp_attr->qkey = qplib_qp->qkey; 2317 2258 qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; ··· 3033 2972 return rc; 3034 2973 } 3035 2974 2975 + static struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev) 2976 + { 2977 + int min, indx; 2978 + 2979 + mutex_lock(&rdev->nqr->load_lock); 2980 + for (indx = 0, min = 0; indx < (rdev->nqr->num_msix - 1); indx++) { 2981 + if (rdev->nqr->nq[min].load > rdev->nqr->nq[indx].load) 2982 + min = indx; 2983 + } 2984 + rdev->nqr->nq[min].load++; 2985 + mutex_unlock(&rdev->nqr->load_lock); 2986 + 2987 + return &rdev->nqr->nq[min]; 2988 + } 2989 + 2990 + static void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq) 2991 + { 2992 + mutex_lock(&rdev->nqr->load_lock); 2993 + nq->load--; 2994 + mutex_unlock(&rdev->nqr->load_lock); 2995 + } 2996 + 3036 2997 /* Completion Queues */ 3037 2998 int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) 3038 2999 { ··· 3073 2990 hash_del(&cq->hash_entry); 3074 2991 } 3075 2992 bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); 2993 + 2994 + bnxt_re_put_nq(rdev, nq); 3076 2995 ib_umem_release(cq->umem); 3077 2996 3078 2997 atomic_dec(&rdev->stats.res.cq_count); ··· 3093 3008 rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); 3094 3009 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; 3095 3010 struct bnxt_qplib_chip_ctx *cctx; 3096 - struct bnxt_qplib_nq *nq = NULL; 3097 - unsigned int nq_alloc_cnt; 3098 3011 int cqe = attr->cqe; 3099 3012 int rc, entries; 3100 3013 u32 active_cqs; ··· 3143 3060 3144 3061 cq->qplib_cq.dpi = &rdev->dpi_privileged; 3145 3062 } 3146 - /* 3147 - * Allocating the NQ in a round robin fashion. nq_alloc_cnt is a 3148 - * used for getting the NQ index. 3149 - */ 3150 - nq_alloc_cnt = atomic_inc_return(&rdev->nq_alloc_cnt); 3151 - nq = &rdev->nq[nq_alloc_cnt % (rdev->num_msix - 1)]; 3152 3063 cq->qplib_cq.max_wqe = entries; 3153 - cq->qplib_cq.cnq_hw_ring_id = nq->ring_id; 3154 - cq->qplib_cq.nq = nq; 3064 + cq->qplib_cq.coalescing = &rdev->cq_coalescing; 3065 + cq->qplib_cq.nq = bnxt_re_get_nq(rdev); 3066 + cq->qplib_cq.cnq_hw_ring_id = cq->qplib_cq.nq->ring_id; 3155 3067 3156 3068 rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq); 3157 3069 if (rc) { ··· 3156 3078 3157 3079 cq->ib_cq.cqe = entries; 3158 3080 cq->cq_period = cq->qplib_cq.period; 3159 - nq->budget++; 3160 3081 3161 3082 active_cqs = atomic_inc_return(&rdev->stats.res.cq_count); 3162 3083 if (active_cqs > rdev->stats.res.cq_watermark) ··· 3710 3633 wc->byte_len = orig_cqe->length; 3711 3634 wc->qp = &gsi_qp->ib_qp; 3712 3635 3713 - wc->ex.imm_data = cpu_to_be32(le32_to_cpu(orig_cqe->immdata)); 3636 + wc->ex.imm_data = cpu_to_be32(orig_cqe->immdata); 3714 3637 wc->src_qp = orig_cqe->src_qp; 3715 3638 memcpy(wc->smac, orig_cqe->smac, ETH_ALEN); 3716 3639 if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) { ··· 3855 3778 (unsigned long)(cqe->qp_handle), 3856 3779 struct bnxt_re_qp, qplib_qp); 3857 3780 wc->qp = &qp->ib_qp; 3858 - wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->immdata)); 3781 + if (cqe->flags & CQ_RES_RC_FLAGS_IMM) 3782 + wc->ex.imm_data = cpu_to_be32(cqe->immdata); 3783 + else 3784 + wc->ex.invalidate_rkey = cqe->invrkey; 3859 3785 wc->src_qp = cqe->src_qp; 3860 3786 memcpy(wc->smac, cqe->smac, ETH_ALEN); 3861 3787 wc->port_num = 1;
+4
drivers/infiniband/hw/bnxt_re/ib_verbs.h
··· 95 95 struct ib_ud_header qp1_hdr; 96 96 struct bnxt_re_cq *scq; 97 97 struct bnxt_re_cq *rcq; 98 + struct dentry *dentry; 98 99 }; 99 100 100 101 struct bnxt_re_cq { ··· 197 196 int bnxt_re_query_device(struct ib_device *ibdev, 198 197 struct ib_device_attr *ib_attr, 199 198 struct ib_udata *udata); 199 + int bnxt_re_modify_device(struct ib_device *ibdev, 200 + int device_modify_mask, 201 + struct ib_device_modify *device_modify); 200 202 int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, 201 203 struct ib_port_attr *port_attr); 202 204 int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+375 -78
drivers/infiniband/hw/bnxt_re/main.c
··· 67 67 #include <rdma/bnxt_re-abi.h> 68 68 #include "bnxt.h" 69 69 #include "hw_counters.h" 70 + #include "debugfs.h" 70 71 71 72 static char version[] = 72 73 BNXT_RE_DESC "\n"; ··· 184 183 rdev->rcfw.res = &rdev->qplib_res; 185 184 rdev->qplib_res.dattr = &rdev->dev_attr; 186 185 rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); 186 + rdev->qplib_res.en_dev = en_dev; 187 187 188 188 bnxt_re_set_drv_mode(rdev); 189 189 ··· 289 287 290 288 static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) 291 289 { 290 + /* 291 + * Use the total VF count since the actual VF count may not be 292 + * available at this point. 293 + */ 292 294 rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev); 293 - if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { 294 - bnxt_re_set_resource_limits(rdev); 295 - bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, 296 - &rdev->qplib_ctx); 297 - } 295 + if (!rdev->num_vfs) 296 + return; 297 + 298 + bnxt_re_set_resource_limits(rdev); 299 + bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, 300 + &rdev->qplib_ctx); 298 301 } 299 302 300 303 static void bnxt_re_shutdown(struct auxiliary_device *adev) ··· 323 316 rdev = en_info->rdev; 324 317 rcfw = &rdev->rcfw; 325 318 326 - for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) { 327 - nq = &rdev->nq[indx - 1]; 319 + for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) { 320 + nq = &rdev->nqr->nq[indx - 1]; 328 321 bnxt_qplib_nq_stop_irq(nq, false); 329 322 } 330 323 ··· 341 334 int indx, rc; 342 335 343 336 rdev = en_info->rdev; 344 - msix_ent = rdev->en_dev->msix_entries; 337 + msix_ent = rdev->nqr->msix_entries; 345 338 rcfw = &rdev->rcfw; 346 339 if (!ent) { 347 340 /* Not setting the f/w timeout bit in rcfw. ··· 356 349 /* Vectors may change after restart, so update with new vectors 357 350 * in device sctructure. 358 351 */ 359 - for (indx = 0; indx < rdev->num_msix; indx++) 360 - rdev->en_dev->msix_entries[indx].vector = ent[indx].vector; 352 + for (indx = 0; indx < rdev->nqr->num_msix; indx++) 353 + rdev->nqr->msix_entries[indx].vector = ent[indx].vector; 361 354 362 355 rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, 363 356 false); ··· 365 358 ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n"); 366 359 return; 367 360 } 368 - for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) { 369 - nq = &rdev->nq[indx - 1]; 361 + for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) { 362 + nq = &rdev->nqr->nq[indx - 1]; 370 363 rc = bnxt_qplib_nq_start_irq(nq, indx - 1, 371 364 msix_ent[indx].vector, false); 372 365 if (rc) { ··· 880 873 .attrs = bnxt_re_attributes, 881 874 }; 882 875 876 + static int bnxt_re_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr) 877 + { 878 + struct bnxt_qplib_hwq *mr_hwq; 879 + struct nlattr *table_attr; 880 + struct bnxt_re_mr *mr; 881 + 882 + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); 883 + if (!table_attr) 884 + return -EMSGSIZE; 885 + 886 + mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr); 887 + mr_hwq = &mr->qplib_mr.hwq; 888 + 889 + if (rdma_nl_put_driver_u32(msg, "page_size", 890 + mr_hwq->qe_ppg * mr_hwq->element_size)) 891 + goto err; 892 + if (rdma_nl_put_driver_u32(msg, "max_elements", mr_hwq->max_elements)) 893 + goto err; 894 + if (rdma_nl_put_driver_u32(msg, "element_size", mr_hwq->element_size)) 895 + goto err; 896 + if (rdma_nl_put_driver_u64_hex(msg, "hwq", (unsigned long)mr_hwq)) 897 + goto err; 898 + if (rdma_nl_put_driver_u64_hex(msg, "va", mr->qplib_mr.va)) 899 + goto err; 900 + 901 + nla_nest_end(msg, table_attr); 902 + return 0; 903 + 904 + err: 905 + nla_nest_cancel(msg, table_attr); 906 + return -EMSGSIZE; 907 + } 908 + 909 + static int bnxt_re_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) 910 + { 911 + struct bnxt_re_dev *rdev; 912 + struct bnxt_re_mr *mr; 913 + int err, len; 914 + void *data; 915 + 916 + mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr); 917 + rdev = mr->rdev; 918 + 919 + err = bnxt_re_read_context_allowed(rdev); 920 + if (err) 921 + return err; 922 + 923 + len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 : 924 + BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5; 925 + data = kzalloc(len, GFP_KERNEL); 926 + if (!data) 927 + return -ENOMEM; 928 + 929 + err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_MRW, 930 + mr->qplib_mr.lkey, len, data); 931 + if (!err) 932 + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 933 + 934 + kfree(data); 935 + return err; 936 + } 937 + 938 + static int bnxt_re_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) 939 + { 940 + struct bnxt_qplib_hwq *cq_hwq; 941 + struct nlattr *table_attr; 942 + struct bnxt_re_cq *cq; 943 + 944 + cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); 945 + cq_hwq = &cq->qplib_cq.hwq; 946 + 947 + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); 948 + if (!table_attr) 949 + return -EMSGSIZE; 950 + 951 + if (rdma_nl_put_driver_u32(msg, "cq_depth", cq_hwq->depth)) 952 + goto err; 953 + if (rdma_nl_put_driver_u32(msg, "max_elements", cq_hwq->max_elements)) 954 + goto err; 955 + if (rdma_nl_put_driver_u32(msg, "element_size", cq_hwq->element_size)) 956 + goto err; 957 + if (rdma_nl_put_driver_u32(msg, "max_wqe", cq->qplib_cq.max_wqe)) 958 + goto err; 959 + 960 + nla_nest_end(msg, table_attr); 961 + return 0; 962 + 963 + err: 964 + nla_nest_cancel(msg, table_attr); 965 + return -EMSGSIZE; 966 + } 967 + 968 + static int bnxt_re_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) 969 + { 970 + struct bnxt_re_dev *rdev; 971 + struct bnxt_re_cq *cq; 972 + int err, len; 973 + void *data; 974 + 975 + cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); 976 + rdev = cq->rdev; 977 + 978 + err = bnxt_re_read_context_allowed(rdev); 979 + if (err) 980 + return err; 981 + 982 + len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 : 983 + BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5; 984 + data = kzalloc(len, GFP_KERNEL); 985 + if (!data) 986 + return -ENOMEM; 987 + 988 + err = bnxt_qplib_read_context(&rdev->rcfw, 989 + CMDQ_READ_CONTEXT_TYPE_CQ, 990 + cq->qplib_cq.id, len, data); 991 + if (!err) 992 + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 993 + 994 + kfree(data); 995 + return err; 996 + } 997 + 998 + static int bnxt_re_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp) 999 + { 1000 + struct bnxt_qplib_qp *qplib_qp; 1001 + struct nlattr *table_attr; 1002 + struct bnxt_re_qp *qp; 1003 + 1004 + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); 1005 + if (!table_attr) 1006 + return -EMSGSIZE; 1007 + 1008 + qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); 1009 + qplib_qp = &qp->qplib_qp; 1010 + 1011 + if (rdma_nl_put_driver_u32(msg, "sq_max_wqe", qplib_qp->sq.max_wqe)) 1012 + goto err; 1013 + if (rdma_nl_put_driver_u32(msg, "sq_max_sge", qplib_qp->sq.max_sge)) 1014 + goto err; 1015 + if (rdma_nl_put_driver_u32(msg, "sq_wqe_size", qplib_qp->sq.wqe_size)) 1016 + goto err; 1017 + if (rdma_nl_put_driver_u32(msg, "sq_swq_start", qplib_qp->sq.swq_start)) 1018 + goto err; 1019 + if (rdma_nl_put_driver_u32(msg, "sq_swq_last", qplib_qp->sq.swq_last)) 1020 + goto err; 1021 + if (rdma_nl_put_driver_u32(msg, "rq_max_wqe", qplib_qp->rq.max_wqe)) 1022 + goto err; 1023 + if (rdma_nl_put_driver_u32(msg, "rq_max_sge", qplib_qp->rq.max_sge)) 1024 + goto err; 1025 + if (rdma_nl_put_driver_u32(msg, "rq_wqe_size", qplib_qp->rq.wqe_size)) 1026 + goto err; 1027 + if (rdma_nl_put_driver_u32(msg, "rq_swq_start", qplib_qp->rq.swq_start)) 1028 + goto err; 1029 + if (rdma_nl_put_driver_u32(msg, "rq_swq_last", qplib_qp->rq.swq_last)) 1030 + goto err; 1031 + if (rdma_nl_put_driver_u32(msg, "timeout", qplib_qp->timeout)) 1032 + goto err; 1033 + 1034 + nla_nest_end(msg, table_attr); 1035 + return 0; 1036 + 1037 + err: 1038 + nla_nest_cancel(msg, table_attr); 1039 + return -EMSGSIZE; 1040 + } 1041 + 1042 + static int bnxt_re_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) 1043 + { 1044 + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibqp->device, ibdev); 1045 + int err, len; 1046 + void *data; 1047 + 1048 + err = bnxt_re_read_context_allowed(rdev); 1049 + if (err) 1050 + return err; 1051 + 1052 + len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 : 1053 + BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5; 1054 + data = kzalloc(len, GFP_KERNEL); 1055 + if (!data) 1056 + return -ENOMEM; 1057 + 1058 + err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_QPC, 1059 + ibqp->qp_num, len, data); 1060 + if (!err) 1061 + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 1062 + 1063 + kfree(data); 1064 + return err; 1065 + } 1066 + 1067 + static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq) 1068 + { 1069 + struct nlattr *table_attr; 1070 + struct bnxt_re_srq *srq; 1071 + 1072 + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); 1073 + if (!table_attr) 1074 + return -EMSGSIZE; 1075 + 1076 + srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); 1077 + 1078 + if (rdma_nl_put_driver_u32_hex(msg, "wqe_size", srq->qplib_srq.wqe_size)) 1079 + goto err; 1080 + if (rdma_nl_put_driver_u32_hex(msg, "max_wqe", srq->qplib_srq.max_wqe)) 1081 + goto err; 1082 + if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge)) 1083 + goto err; 1084 + 1085 + nla_nest_end(msg, table_attr); 1086 + return 0; 1087 + 1088 + err: 1089 + nla_nest_cancel(msg, table_attr); 1090 + return -EMSGSIZE; 1091 + } 1092 + 1093 + static int bnxt_re_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq) 1094 + { 1095 + struct bnxt_re_dev *rdev; 1096 + struct bnxt_re_srq *srq; 1097 + int err, len; 1098 + void *data; 1099 + 1100 + srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); 1101 + rdev = srq->rdev; 1102 + 1103 + err = bnxt_re_read_context_allowed(rdev); 1104 + if (err) 1105 + return err; 1106 + 1107 + len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 : 1108 + BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5; 1109 + 1110 + data = kzalloc(len, GFP_KERNEL); 1111 + if (!data) 1112 + return -ENOMEM; 1113 + 1114 + err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_SRQ, 1115 + srq->qplib_srq.id, len, data); 1116 + if (!err) 1117 + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 1118 + 1119 + kfree(data); 1120 + return err; 1121 + } 1122 + 883 1123 static const struct ib_device_ops bnxt_re_dev_ops = { 884 1124 .owner = THIS_MODULE, 885 1125 .driver_id = RDMA_DRIVER_BNXT_RE, ··· 1168 914 .post_srq_recv = bnxt_re_post_srq_recv, 1169 915 .query_ah = bnxt_re_query_ah, 1170 916 .query_device = bnxt_re_query_device, 917 + .modify_device = bnxt_re_modify_device, 1171 918 .query_pkey = bnxt_re_query_pkey, 1172 919 .query_port = bnxt_re_query_port, 1173 920 .query_qp = bnxt_re_query_qp, ··· 1185 930 INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx), 1186 931 }; 1187 932 933 + static const struct ib_device_ops restrack_ops = { 934 + .fill_res_cq_entry = bnxt_re_fill_res_cq_entry, 935 + .fill_res_cq_entry_raw = bnxt_re_fill_res_cq_entry_raw, 936 + .fill_res_qp_entry = bnxt_re_fill_res_qp_entry, 937 + .fill_res_qp_entry_raw = bnxt_re_fill_res_qp_entry_raw, 938 + .fill_res_mr_entry = bnxt_re_fill_res_mr_entry, 939 + .fill_res_mr_entry_raw = bnxt_re_fill_res_mr_entry_raw, 940 + .fill_res_srq_entry = bnxt_re_fill_res_srq_entry, 941 + .fill_res_srq_entry_raw = bnxt_re_fill_res_srq_entry_raw, 942 + }; 943 + 1188 944 static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) 1189 945 { 1190 946 struct ib_device *ibdev = &rdev->ibdev; ··· 1209 943 1210 944 addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); 1211 945 1212 - ibdev->num_comp_vectors = rdev->num_msix - 1; 946 + ibdev->num_comp_vectors = rdev->nqr->num_msix - 1; 1213 947 ibdev->dev.parent = &rdev->en_dev->pdev->dev; 1214 948 ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; 1215 949 ··· 1217 951 ibdev->driver_def = bnxt_re_uapi_defs; 1218 952 1219 953 ib_set_device_ops(ibdev, &bnxt_re_dev_ops); 954 + ib_set_device_ops(ibdev, &restrack_ops); 1220 955 ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); 1221 956 if (ret) 1222 957 return ret; ··· 1257 990 atomic_set(&rdev->stats.res.pd_count, 0); 1258 991 rdev->cosq[0] = 0xFFFF; 1259 992 rdev->cosq[1] = 0xFFFF; 993 + rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME; 994 + if (bnxt_re_chip_gen_p7(en_dev->chip_num)) { 995 + rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7; 996 + rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7; 997 + } else { 998 + rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5; 999 + rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5; 1000 + } 1001 + rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE; 1260 1002 1261 1003 return rdev; 1262 1004 } ··· 1552 1276 { 1553 1277 int i; 1554 1278 1555 - for (i = 1; i < rdev->num_msix; i++) 1556 - bnxt_qplib_disable_nq(&rdev->nq[i - 1]); 1279 + for (i = 1; i < rdev->nqr->num_msix; i++) 1280 + bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]); 1557 1281 1558 1282 if (rdev->qplib_res.rcfw) 1559 1283 bnxt_qplib_cleanup_res(&rdev->qplib_res); ··· 1567 1291 1568 1292 bnxt_qplib_init_res(&rdev->qplib_res); 1569 1293 1570 - for (i = 1; i < rdev->num_msix ; i++) { 1571 - db_offt = rdev->en_dev->msix_entries[i].db_offset; 1572 - rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], 1573 - i - 1, rdev->en_dev->msix_entries[i].vector, 1294 + mutex_init(&rdev->nqr->load_lock); 1295 + 1296 + for (i = 1; i < rdev->nqr->num_msix ; i++) { 1297 + db_offt = rdev->nqr->msix_entries[i].db_offset; 1298 + rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1], 1299 + i - 1, rdev->nqr->msix_entries[i].vector, 1574 1300 db_offt, &bnxt_re_cqn_handler, 1575 1301 &bnxt_re_srqn_handler); 1576 1302 if (rc) { ··· 1585 1307 return 0; 1586 1308 fail: 1587 1309 for (i = num_vec_enabled; i >= 0; i--) 1588 - bnxt_qplib_disable_nq(&rdev->nq[i]); 1310 + bnxt_qplib_disable_nq(&rdev->nqr->nq[i]); 1589 1311 return rc; 1590 1312 } 1591 1313 1592 1314 static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) 1593 1315 { 1316 + struct bnxt_qplib_nq *nq; 1594 1317 u8 type; 1595 1318 int i; 1596 1319 1597 - for (i = 0; i < rdev->num_msix - 1; i++) { 1320 + for (i = 0; i < rdev->nqr->num_msix - 1; i++) { 1598 1321 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 1599 - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); 1600 - bnxt_qplib_free_nq(&rdev->nq[i]); 1601 - rdev->nq[i].res = NULL; 1322 + nq = &rdev->nqr->nq[i]; 1323 + bnxt_re_net_ring_free(rdev, nq->ring_id, type); 1324 + bnxt_qplib_free_nq(nq); 1325 + nq->res = NULL; 1602 1326 } 1603 1327 } 1604 1328 ··· 1642 1362 if (rc) 1643 1363 goto dealloc_res; 1644 1364 1645 - for (i = 0; i < rdev->num_msix - 1; i++) { 1365 + for (i = 0; i < rdev->nqr->num_msix - 1; i++) { 1646 1366 struct bnxt_qplib_nq *nq; 1647 1367 1648 - nq = &rdev->nq[i]; 1368 + nq = &rdev->nqr->nq[i]; 1649 1369 nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; 1650 - rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]); 1370 + rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq); 1651 1371 if (rc) { 1652 1372 ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x", 1653 1373 i, rc); ··· 1655 1375 } 1656 1376 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 1657 1377 rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr; 1658 - rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count; 1378 + rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count; 1659 1379 rattr.type = type; 1660 1380 rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; 1661 1381 rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1; 1662 - rattr.lrid = rdev->en_dev->msix_entries[i + 1].ring_idx; 1382 + rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx; 1663 1383 rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id); 1664 1384 if (rc) { 1665 1385 ibdev_err(&rdev->ibdev, 1666 1386 "Failed to allocate NQ fw id with rc = 0x%x", 1667 1387 rc); 1668 - bnxt_qplib_free_nq(&rdev->nq[i]); 1388 + bnxt_qplib_free_nq(nq); 1669 1389 goto free_nq; 1670 1390 } 1671 1391 num_vec_created++; ··· 1674 1394 free_nq: 1675 1395 for (i = num_vec_created - 1; i >= 0; i--) { 1676 1396 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 1677 - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); 1678 - bnxt_qplib_free_nq(&rdev->nq[i]); 1397 + bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type); 1398 + bnxt_qplib_free_nq(&rdev->nqr->nq[i]); 1679 1399 } 1680 1400 bnxt_qplib_dealloc_dpi(&rdev->qplib_res, 1681 1401 &rdev->dpi_privileged); ··· 1870 1590 return rc; 1871 1591 } 1872 1592 1593 + static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev) 1594 + { 1595 + rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL); 1596 + if (!rdev->nqr) 1597 + return -ENOMEM; 1598 + 1599 + return 0; 1600 + } 1601 + 1602 + static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev) 1603 + { 1604 + kfree(rdev->nqr); 1605 + rdev->nqr = NULL; 1606 + } 1607 + 1873 1608 static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) 1874 1609 { 1875 1610 u8 type; 1876 1611 int rc; 1612 + 1613 + bnxt_re_debugfs_rem_pdev(rdev); 1877 1614 1878 1615 if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) 1879 1616 cancel_delayed_work_sync(&rdev->worker); ··· 1914 1617 bnxt_qplib_free_rcfw_channel(&rdev->rcfw); 1915 1618 } 1916 1619 1917 - rdev->num_msix = 0; 1620 + rdev->nqr->num_msix = 0; 1918 1621 1919 1622 if (rdev->pacing.dbr_pacing) 1920 1623 bnxt_re_deinitialize_dbr_pacing(rdev); 1921 1624 1625 + bnxt_re_free_nqr_mem(rdev); 1922 1626 bnxt_re_destroy_chip_ctx(rdev); 1923 1627 if (op_type == BNXT_RE_COMPLETE_REMOVE) { 1924 1628 if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) ··· 1957 1659 } 1958 1660 set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1959 1661 1662 + if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) { 1663 + ibdev_err(&rdev->ibdev, 1664 + "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", 1665 + rdev->en_dev->ulp_tbl->msix_requested); 1666 + bnxt_unregister_dev(rdev->en_dev); 1667 + clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1668 + return -EINVAL; 1669 + } 1670 + ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", 1671 + rdev->en_dev->ulp_tbl->msix_requested); 1672 + 1960 1673 rc = bnxt_re_setup_chip_ctx(rdev); 1961 1674 if (rc) { 1962 1675 bnxt_unregister_dev(rdev->en_dev); ··· 1976 1667 return -EINVAL; 1977 1668 } 1978 1669 1670 + rc = bnxt_re_alloc_nqr_mem(rdev); 1671 + if (rc) { 1672 + bnxt_re_destroy_chip_ctx(rdev); 1673 + bnxt_unregister_dev(rdev->en_dev); 1674 + clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1675 + return rc; 1676 + } 1677 + rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested; 1678 + memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries, 1679 + sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix); 1680 + 1979 1681 /* Check whether VF or PF */ 1980 1682 bnxt_re_get_sriov_func_type(rdev); 1981 - 1982 - if (!rdev->en_dev->ulp_tbl->msix_requested) { 1983 - ibdev_err(&rdev->ibdev, 1984 - "Failed to get MSI-X vectors: %#x\n", rc); 1985 - rc = -EINVAL; 1986 - goto fail; 1987 - } 1988 - ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", 1989 - rdev->en_dev->ulp_tbl->msix_requested); 1990 - rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested; 1991 1683 1992 1684 bnxt_re_query_hwrm_intf_version(rdev); 1993 1685 ··· 2011 1701 rattr.type = type; 2012 1702 rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; 2013 1703 rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1; 2014 - rattr.lrid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; 1704 + rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; 2015 1705 rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); 2016 1706 if (rc) { 2017 1707 ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); 2018 1708 goto free_rcfw; 2019 1709 } 2020 - db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset; 2021 - vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector; 1710 + db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset; 1711 + vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector; 2022 1712 rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, 2023 1713 vid, db_offt, 2024 1714 &bnxt_re_aeq_handler); ··· 2095 1785 INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); 2096 1786 set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags); 2097 1787 schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); 2098 - /* 2099 - * Use the total VF count since the actual VF count may not be 2100 - * available at this point. 2101 - */ 2102 - bnxt_re_vf_res_config(rdev); 1788 + 1789 + if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)) 1790 + bnxt_re_vf_res_config(rdev); 2103 1791 } 2104 1792 hash_init(rdev->cq_hash); 2105 1793 if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) 2106 1794 hash_init(rdev->srq_hash); 1795 + 1796 + bnxt_re_debugfs_add_pdev(rdev); 2107 1797 2108 1798 return 0; 2109 1799 free_sctx: ··· 2206 1896 2207 1897 if (enable) { 2208 1898 cc_param.enable = 1; 2209 - cc_param.cc_mode = CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE; 1899 + cc_param.tos_ecn = 1; 2210 1900 } 2211 1901 2212 - cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE | 2213 - CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | 1902 + cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | 2214 1903 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); 2215 1904 2216 1905 if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param)) ··· 2342 2033 rdev = en_info->rdev; 2343 2034 en_dev = en_info->en_dev; 2344 2035 mutex_lock(&bnxt_re_mutex); 2345 - /* L2 driver may invoke this callback during device error/crash or device 2346 - * reset. Current RoCE driver doesn't recover the device in case of 2347 - * error. Handle the error by dispatching fatal events to all qps 2348 - * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as 2349 - * L2 driver want to modify the MSIx table. 2350 - */ 2351 2036 2352 2037 ibdev_info(&rdev->ibdev, "Handle device suspend call"); 2353 2038 /* Check the current device state from bnxt_en_dev and move the ··· 2349 2046 * This prevents more commands to HW during clean-up, 2350 2047 * in case the device is already in error. 2351 2048 */ 2352 - if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) 2049 + if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) { 2353 2050 set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); 2354 - 2355 - bnxt_re_dev_stop(rdev); 2356 - bnxt_re_stop_irq(adev); 2357 - /* Move the device states to detached and avoid sending any more 2358 - * commands to HW 2359 - */ 2360 - set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); 2361 - set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); 2362 - wake_up_all(&rdev->rcfw.cmdq.waitq); 2051 + set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); 2052 + wake_up_all(&rdev->rcfw.cmdq.waitq); 2053 + bnxt_re_dev_stop(rdev); 2054 + } 2363 2055 2364 2056 if (rdev->pacing.dbr_pacing) 2365 2057 bnxt_re_set_pacing_dev_state(rdev); ··· 2373 2075 struct bnxt_re_dev *rdev; 2374 2076 2375 2077 mutex_lock(&bnxt_re_mutex); 2376 - /* L2 driver may invoke this callback during device recovery, resume. 2377 - * reset. Current RoCE driver doesn't recover the device in case of 2378 - * error. Handle the error by dispatching fatal events to all qps 2379 - * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as 2380 - * L2 driver want to modify the MSIx table. 2381 - */ 2382 - 2383 2078 bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT); 2384 2079 rdev = en_info->rdev; 2385 2080 ibdev_info(&rdev->ibdev, "Device resume completed"); ··· 2403 2112 int rc; 2404 2113 2405 2114 pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version); 2115 + bnxt_re_register_debugfs(); 2116 + 2406 2117 rc = auxiliary_driver_register(&bnxt_re_driver); 2407 2118 if (rc) { 2408 2119 pr_err("%s: Failed to register auxiliary driver\n", 2409 2120 ROCE_DRV_MODULE_NAME); 2410 - return rc; 2121 + goto err_debug; 2411 2122 } 2412 2123 return 0; 2124 + err_debug: 2125 + bnxt_re_unregister_debugfs(); 2126 + return rc; 2413 2127 } 2414 2128 2415 2129 static void __exit bnxt_re_mod_exit(void) 2416 2130 { 2417 2131 auxiliary_driver_unregister(&bnxt_re_driver); 2132 + bnxt_re_unregister_debugfs(); 2418 2133 } 2419 2134 2420 2135 module_init(bnxt_re_mod_init);
+72 -1
drivers/infiniband/hw/bnxt_re/qplib_fp.c
··· 556 556 nq->pdev = pdev; 557 557 nq->cqn_handler = cqn_handler; 558 558 nq->srqn_handler = srqn_handler; 559 + nq->load = 0; 559 560 560 561 /* Have a task to schedule CQ notifiers in post send case */ 561 562 nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq"); ··· 1283 1282 } 1284 1283 } 1285 1284 1285 + static void bnxt_set_mandatory_attributes(struct bnxt_qplib_qp *qp, 1286 + struct cmdq_modify_qp *req) 1287 + { 1288 + u32 mandatory_flags = 0; 1289 + 1290 + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC) 1291 + mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS; 1292 + 1293 + if (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT && 1294 + qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) { 1295 + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC && qp->srq) 1296 + req->flags = cpu_to_le16(CMDQ_MODIFY_QP_FLAGS_SRQ_USED); 1297 + mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY; 1298 + } 1299 + 1300 + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_UD || 1301 + qp->type == CMDQ_MODIFY_QP_QP_TYPE_GSI) 1302 + mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY; 1303 + 1304 + qp->modify_flags |= mandatory_flags; 1305 + req->qp_type = qp->type; 1306 + } 1307 + 1308 + static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp) 1309 + { 1310 + if ((qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT && 1311 + qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) || 1312 + (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR && 1313 + qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS)) 1314 + return true; 1315 + 1316 + return false; 1317 + } 1318 + 1286 1319 int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) 1287 1320 { 1288 1321 struct bnxt_qplib_rcfw *rcfw = res->rcfw; 1289 1322 struct creq_modify_qp_resp resp = {}; 1290 1323 struct bnxt_qplib_cmdqmsg msg = {}; 1291 1324 struct cmdq_modify_qp req = {}; 1325 + u16 vlan_pcp_vlan_dei_vlan_id; 1292 1326 u32 temp32[4]; 1293 1327 u32 bmask; 1294 1328 int rc; ··· 1334 1298 1335 1299 /* Filter out the qp_attr_mask based on the state->new transition */ 1336 1300 __filter_modify_flags(qp); 1301 + if (qp->modify_flags & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) { 1302 + /* Set mandatory attributes for INIT -> RTR and RTR -> RTS transition */ 1303 + if (_is_optimize_modify_qp_supported(res->dattr->dev_cap_flags2) && 1304 + is_optimized_state_transition(qp)) 1305 + bnxt_set_mandatory_attributes(qp, &req); 1306 + } 1337 1307 bmask = qp->modify_flags; 1338 1308 req.modify_mask = cpu_to_le32(qp->modify_flags); 1339 1309 req.qp_cid = cpu_to_le32(qp->id); ··· 1420 1378 if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID) 1421 1379 req.dest_qp_id = cpu_to_le32(qp->dest_qpn); 1422 1380 1423 - req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id); 1381 + if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID) { 1382 + vlan_pcp_vlan_dei_vlan_id = 1383 + ((res->sgid_tbl.tbl[qp->ah.sgid_index].vlan_id << 1384 + CMDQ_MODIFY_QP_VLAN_ID_SFT) & 1385 + CMDQ_MODIFY_QP_VLAN_ID_MASK); 1386 + vlan_pcp_vlan_dei_vlan_id |= 1387 + ((qp->ah.sl << CMDQ_MODIFY_QP_VLAN_PCP_SFT) & 1388 + CMDQ_MODIFY_QP_VLAN_PCP_MASK); 1389 + req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(vlan_pcp_vlan_dei_vlan_id); 1390 + } 1424 1391 1425 1392 bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); 1426 1393 rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); ··· 2202 2151 struct bnxt_qplib_cmdqmsg msg = {}; 2203 2152 struct cmdq_create_cq req = {}; 2204 2153 struct bnxt_qplib_pbl *pbl; 2154 + u32 coalescing = 0; 2205 2155 u32 pg_sz_lvl; 2206 2156 int rc; 2207 2157 ··· 2229 2177 req.dpi = cpu_to_le32(cq->dpi->dpi); 2230 2178 req.cq_handle = cpu_to_le64(cq->cq_handle); 2231 2179 req.cq_size = cpu_to_le32(cq->max_wqe); 2180 + 2181 + if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2)) { 2182 + req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID); 2183 + coalescing |= ((cq->coalescing->buf_maxtime << 2184 + CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) & 2185 + CMDQ_CREATE_CQ_BUF_MAXTIME_MASK); 2186 + coalescing |= ((cq->coalescing->normal_maxbuf << 2187 + CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT) & 2188 + CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK); 2189 + coalescing |= ((cq->coalescing->during_maxbuf << 2190 + CMDQ_CREATE_CQ_DURING_MAXBUF_SFT) & 2191 + CMDQ_CREATE_CQ_DURING_MAXBUF_MASK); 2192 + if (cq->coalescing->en_ring_idle_mode) 2193 + coalescing |= CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE; 2194 + else 2195 + coalescing &= ~CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE; 2196 + req.coalescing = cpu_to_le32(coalescing); 2197 + } 2198 + 2232 2199 pbl = &cq->hwq.pbl[PBL_LVL_0]; 2233 2200 pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) << 2234 2201 CMDQ_CREATE_CQ_PG_SIZE_SFT);
+22 -1
drivers/infiniband/hw/bnxt_re/qplib_fp.h
··· 383 383 return avail <= slots; 384 384 } 385 385 386 + /* CQ coalescing parameters */ 387 + struct bnxt_qplib_cq_coal_param { 388 + u16 buf_maxtime; 389 + u8 normal_maxbuf; 390 + u8 during_maxbuf; 391 + u8 en_ring_idle_mode; 392 + }; 393 + 394 + #define BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME 0x1 395 + #define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7 0x8 396 + #define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7 0x8 397 + #define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5 0x1 398 + #define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5 0x1 399 + #define BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE 0x1 400 + #define BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME 0x1bf 401 + #define BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF 0x1f 402 + #define BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF 0x1f 403 + #define BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE 0x1 404 + 386 405 struct bnxt_qplib_cqe { 387 406 u8 status; 388 407 u8 type; ··· 410 391 u16 cfa_meta; 411 392 u64 wr_id; 412 393 union { 413 - __le32 immdata; 394 + u32 immdata; 414 395 u32 invrkey; 415 396 }; 416 397 u64 qp_handle; ··· 464 445 */ 465 446 spinlock_t flush_lock; /* QP flush management */ 466 447 u16 cnq_events; 448 + struct bnxt_qplib_cq_coal_param *coalescing; 467 449 }; 468 450 469 451 #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq) ··· 519 499 struct tasklet_struct nq_tasklet; 520 500 bool requested; 521 501 int budget; 502 + u32 load; 522 503 523 504 cqn_handler_t cqn_handler; 524 505 srqn_handler_t srqn_handler;
+8 -11
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
··· 831 831 struct creq_initialize_fw_resp resp = {}; 832 832 struct cmdq_initialize_fw req = {}; 833 833 struct bnxt_qplib_cmdqmsg msg = {}; 834 + u16 flags = 0; 834 835 u8 pgsz, lvl; 835 836 int rc; 836 837 ··· 850 849 * shall setup this area for VF. Skipping the 851 850 * HW programming 852 851 */ 853 - if (is_virtfn) 852 + if (is_virtfn || bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) 854 853 goto skip_ctx_setup; 855 - if (bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) 856 - goto config_vf_res; 857 854 858 855 lvl = ctx->qpc_tbl.level; 859 856 pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl); ··· 895 896 req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements); 896 897 req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements); 897 898 898 - config_vf_res: 899 - req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf); 900 - req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf); 901 - req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf); 902 - req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf); 903 - req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf); 904 - 905 899 skip_ctx_setup: 906 900 if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags)) 907 - req.flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED); 901 + flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED; 902 + if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2)) 903 + flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED; 904 + if (rcfw->res->en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT) 905 + flags |= CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT; 906 + req.flags |= cpu_to_le16(flags); 908 907 req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id); 909 908 bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); 910 909 rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+2
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
··· 131 131 #define RCFW_CMD_IS_BLOCKING 0x8000 132 132 133 133 #define HWRM_VERSION_DEV_ATTR_MAX_DPI 0x1000A0000000DULL 134 + /* HWRM version 1.10.3.18 */ 135 + #define HWRM_VERSION_READ_CTX 0x1000A00030012 134 136 135 137 /* Crsq buf is 1024-Byte */ 136 138 struct bnxt_qplib_crsbe {
+13
drivers/infiniband/hw/bnxt_re/qplib_res.h
··· 39 39 #ifndef __BNXT_QPLIB_RES_H__ 40 40 #define __BNXT_QPLIB_RES_H__ 41 41 42 + #include "bnxt_ulp.h" 43 + 42 44 extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; 43 45 44 46 #define CHIP_NUM_57508 0x1750 ··· 304 302 struct bnxt_qplib_chip_ctx *cctx; 305 303 struct bnxt_qplib_dev_attr *dattr; 306 304 struct net_device *netdev; 305 + struct bnxt_en_dev *en_dev; 307 306 struct bnxt_qplib_rcfw *rcfw; 308 307 struct bnxt_qplib_pd_tbl pd_tbl; 309 308 /* To protect the pd table bit map */ ··· 577 574 static inline bool _is_relaxed_ordering_supported(u16 dev_cap_ext_flags2) 578 575 { 579 576 return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED; 577 + } 578 + 579 + static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2) 580 + { 581 + return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED; 582 + } 583 + 584 + static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2) 585 + { 586 + return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED; 580 587 } 581 588 582 589 #endif /* __BNXT_QPLIB_RES_H__ */
+35
drivers/infiniband/hw/bnxt_re/qplib_sp.c
··· 981 981 rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg); 982 982 return rc; 983 983 } 984 + 985 + int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 res_type, 986 + u32 xid, u32 resp_size, void *resp_va) 987 + { 988 + struct creq_read_context resp = {}; 989 + struct bnxt_qplib_cmdqmsg msg = {}; 990 + struct cmdq_read_context req = {}; 991 + struct bnxt_qplib_rcfw_sbuf sbuf; 992 + int rc; 993 + 994 + sbuf.size = resp_size; 995 + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, 996 + &sbuf.dma_addr, GFP_KERNEL); 997 + if (!sbuf.sb) 998 + return -ENOMEM; 999 + 1000 + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, 1001 + CMDQ_BASE_OPCODE_READ_CONTEXT, sizeof(req)); 1002 + req.resp_addr = cpu_to_le64(sbuf.dma_addr); 1003 + req.resp_size = resp_size / BNXT_QPLIB_CMDQE_UNITS; 1004 + 1005 + req.xid = cpu_to_le32(xid); 1006 + req.type = res_type; 1007 + 1008 + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), 1009 + sizeof(resp), 0); 1010 + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); 1011 + if (rc) 1012 + goto free_mem; 1013 + 1014 + memcpy(resp_va, sbuf.sb, resp_size); 1015 + free_mem: 1016 + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); 1017 + return rc; 1018 + }
+2
drivers/infiniband/hw/bnxt_re/qplib_sp.h
··· 353 353 struct bnxt_qplib_ext_stat *estat); 354 354 int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res, 355 355 struct bnxt_qplib_cc_param *cc_param); 356 + int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid, 357 + u32 resp_size, void *resp_va); 356 358 357 359 #define BNXT_VAR_MAX_WQE 4352 358 360 #define BNXT_VAR_MAX_SLOT_ALIGN 256
+56 -1
drivers/infiniband/hw/bnxt_re/roce_hsi.h
··· 216 216 __le16 flags; 217 217 #define CMDQ_INITIALIZE_FW_FLAGS_MRAV_RESERVATION_SPLIT 0x1UL 218 218 #define CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED 0x2UL 219 + #define CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED 0x8UL 220 + #define CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT 0x10UL 219 221 __le16 cookie; 220 222 u8 resp_size; 221 223 u8 reserved8; ··· 561 559 #define CMDQ_MODIFY_QP_OPCODE_LAST CMDQ_MODIFY_QP_OPCODE_MODIFY_QP 562 560 u8 cmd_size; 563 561 __le16 flags; 562 + #define CMDQ_MODIFY_QP_FLAGS_SRQ_USED 0x1UL 564 563 __le16 cookie; 565 564 u8 resp_size; 566 565 u8 qp_type; ··· 1140 1137 #define CMDQ_CREATE_CQ_FLAGS_DISABLE_CQ_OVERFLOW_DETECTION 0x1UL 1141 1138 #define CMDQ_CREATE_CQ_FLAGS_STEERING_TAG_VALID 0x2UL 1142 1139 #define CMDQ_CREATE_CQ_FLAGS_INFINITE_CQ_MODE 0x4UL 1140 + #define CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID 0x8UL 1143 1141 __le16 cookie; 1144 1142 u8 resp_size; 1145 1143 u8 reserved8; ··· 1173 1169 __le32 cq_size; 1174 1170 __le64 pbl; 1175 1171 __le16 steering_tag; 1176 - u8 reserved48[6]; 1172 + u8 reserved48[2]; 1173 + __le32 coalescing; 1174 + #define CMDQ_CREATE_CQ_BUF_MAXTIME_MASK 0x1ffUL 1175 + #define CMDQ_CREATE_CQ_BUF_MAXTIME_SFT 0 1176 + #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK 0x3e00UL 1177 + #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT 9 1178 + #define CMDQ_CREATE_CQ_DURING_MAXBUF_MASK 0x7c000UL 1179 + #define CMDQ_CREATE_CQ_DURING_MAXBUF_SFT 14 1180 + #define CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE 0x80000UL 1181 + #define CMDQ_CREATE_CQ_UNUSED12_MASK 0xfff00000UL 1182 + #define CMDQ_CREATE_CQ_UNUSED12_SFT 20 1183 + __le64 reserved64; 1177 1184 }; 1178 1185 1179 1186 /* creq_create_cq_resp (size:128b/16B) */ ··· 2264 2249 #define CREQ_SET_FUNC_RESOURCES_RESP_EVENT_LAST \ 2265 2250 CREQ_SET_FUNC_RESOURCES_RESP_EVENT_SET_FUNC_RESOURCES 2266 2251 u8 reserved48[6]; 2252 + }; 2253 + 2254 + /* cmdq_read_context (size:192b/24B) */ 2255 + struct cmdq_read_context { 2256 + u8 opcode; 2257 + #define CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 0x85UL 2258 + #define CMDQ_READ_CONTEXT_OPCODE_LAST CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 2259 + u8 cmd_size; 2260 + __le16 flags; 2261 + __le16 cookie; 2262 + u8 resp_size; 2263 + u8 reserved8; 2264 + __le64 resp_addr; 2265 + __le32 xid; 2266 + u8 type; 2267 + #define CMDQ_READ_CONTEXT_TYPE_QPC 0x0UL 2268 + #define CMDQ_READ_CONTEXT_TYPE_CQ 0x1UL 2269 + #define CMDQ_READ_CONTEXT_TYPE_MRW 0x2UL 2270 + #define CMDQ_READ_CONTEXT_TYPE_SRQ 0x3UL 2271 + #define CMDQ_READ_CONTEXT_TYPE_LAST CMDQ_READ_CONTEXT_TYPE_SRQ 2272 + u8 unused_0[3]; 2273 + }; 2274 + 2275 + /* creq_read_context (size:128b/16B) */ 2276 + struct creq_read_context { 2277 + u8 type; 2278 + #define CREQ_READ_CONTEXT_TYPE_MASK 0x3fUL 2279 + #define CREQ_READ_CONTEXT_TYPE_SFT 0 2280 + #define CREQ_READ_CONTEXT_TYPE_QP_EVENT 0x38UL 2281 + #define CREQ_READ_CONTEXT_TYPE_LAST CREQ_READ_CONTEXT_TYPE_QP_EVENT 2282 + u8 status; 2283 + __le16 cookie; 2284 + __le32 reserved32; 2285 + u8 v; 2286 + #define CREQ_READ_CONTEXT_V 0x1UL 2287 + u8 event; 2288 + #define CREQ_READ_CONTEXT_EVENT_READ_CONTEXT 0x85UL 2289 + #define CREQ_READ_CONTEXT_EVENT_LAST CREQ_READ_CONTEXT_EVENT_READ_CONTEXT 2290 + __le16 reserved16; 2291 + __le32 reserved_32; 2267 2292 }; 2268 2293 2269 2294 /* cmdq_map_tc_to_cos (size:192b/24B) */
+56 -7
drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
··· 30 30 EFA_ADMIN_DEALLOC_UAR = 17, 31 31 EFA_ADMIN_CREATE_EQ = 18, 32 32 EFA_ADMIN_DESTROY_EQ = 19, 33 - EFA_ADMIN_MAX_OPCODE = 19, 33 + EFA_ADMIN_ALLOC_MR = 20, 34 + EFA_ADMIN_MAX_OPCODE = 20, 34 35 }; 35 36 36 37 enum efa_admin_aq_feature_id { ··· 151 150 /* UAR number */ 152 151 u16 uar; 153 152 153 + /* Requested service level for the QP, 0 is the default SL */ 154 + u8 sl; 155 + 154 156 /* MBZ */ 155 - u16 reserved; 157 + u8 reserved; 156 158 157 159 /* MBZ */ 158 160 u32 reserved2; ··· 463 459 struct efa_admin_acq_common_desc acq_common_desc; 464 460 }; 465 461 462 + /* 463 + * Allocation of MemoryRegion, required for QP working with Virtual 464 + * Addresses in kernel verbs semantics, ready for fast registration use. 465 + */ 466 + struct efa_admin_alloc_mr_cmd { 467 + /* Common Admin Queue descriptor */ 468 + struct efa_admin_aq_common_desc aq_common_desc; 469 + 470 + /* Protection Domain */ 471 + u16 pd; 472 + 473 + /* MBZ */ 474 + u16 reserved1; 475 + 476 + /* Maximum number of pages this MR supports. */ 477 + u32 max_pages; 478 + }; 479 + 480 + struct efa_admin_alloc_mr_resp { 481 + /* Common Admin Queue completion descriptor */ 482 + struct efa_admin_acq_common_desc acq_common_desc; 483 + 484 + /* 485 + * L_Key, to be used in conjunction with local buffer references in 486 + * SQ and RQ WQE, or with virtual RQ/CQ rings 487 + */ 488 + u32 l_key; 489 + 490 + /* 491 + * R_Key, to be used in RDMA messages to refer to remotely accessed 492 + * memory region 493 + */ 494 + u32 r_key; 495 + }; 496 + 466 497 struct efa_admin_create_cq_cmd { 467 498 struct efa_admin_aq_common_desc aq_common_desc; 468 499 ··· 522 483 */ 523 484 u8 cq_caps_2; 524 485 525 - /* completion queue depth in # of entries. must be power of 2 */ 526 - u16 cq_depth; 486 + /* Sub completion queue depth in # of entries. must be power of 2 */ 487 + u16 sub_cq_depth; 527 488 528 489 /* EQ number assigned to this cq */ 529 490 u16 eqn; ··· 558 519 559 520 u16 cq_idx; 560 521 561 - /* actual cq depth in number of entries */ 562 - u16 cq_actual_depth; 522 + /* actual sub cq depth in number of entries */ 523 + u16 sub_cq_actual_depth; 563 524 564 525 /* CQ doorbell address, as offset to PCIe DB BAR */ 565 526 u32 db_offset; ··· 617 578 u64 rx_pkts; 618 579 619 580 u64 rx_drops; 581 + 582 + u64 qkey_viol; 620 583 }; 621 584 622 585 struct efa_admin_messages_stats { ··· 718 677 719 678 /* Unique global ID for an EFA device */ 720 679 u64 guid; 680 + 681 + /* The device maximum link speed in Gbit/sec */ 682 + u16 max_link_speed_gbps; 683 + 684 + /* MBZ */ 685 + u16 reserved0; 686 + 687 + /* MBZ */ 688 + u32 reserved1; 721 689 }; 722 690 723 691 struct efa_admin_feature_queue_attr_desc { ··· 1107 1057 1108 1058 /* create_eq_cmd */ 1109 1059 #define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) 1110 - #define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6) 1111 1060 #define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0) 1112 1061 1113 1062 /* host_info */
+2 -2
drivers/infiniband/hw/efa/efa_admin_defs.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 2 /* 3 - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. 3 + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 6 #ifndef _EFA_ADMIN_H_ ··· 96 96 struct efa_admin_aenq_common_desc { 97 97 u16 group; 98 98 99 - u16 syndrom; 99 + u16 syndrome; 100 100 101 101 /* 102 102 * 0 : phase
+4 -2
drivers/infiniband/hw/efa/efa_com_cmd.c
··· 31 31 create_qp_cmd.qp_alloc_size.recv_queue_depth = 32 32 params->rq_depth; 33 33 create_qp_cmd.uar = params->uarn; 34 + create_qp_cmd.sl = params->sl; 34 35 35 36 if (params->unsolicited_write_recv) 36 37 EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1); ··· 164 163 EFA_SET(&create_cmd.cq_caps_2, 165 164 EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS, 166 165 params->entry_size_in_bytes / 4); 167 - create_cmd.cq_depth = params->cq_depth; 166 + create_cmd.sub_cq_depth = params->sub_cq_depth; 168 167 create_cmd.num_sub_cqs = params->num_sub_cqs; 169 168 create_cmd.uar = params->uarn; 170 169 if (params->interrupt_mode_enabled) { ··· 192 191 } 193 192 194 193 result->cq_idx = cmd_completion.cq_idx; 195 - result->actual_depth = params->cq_depth; 194 + result->actual_depth = params->sub_cq_depth; 196 195 result->db_off = cmd_completion.db_offset; 197 196 result->db_valid = EFA_GET(&cmd_completion.flags, 198 197 EFA_ADMIN_CREATE_CQ_RESP_DB_VALID); ··· 467 466 result->max_rdma_size = resp.u.device_attr.max_rdma_size; 468 467 result->device_caps = resp.u.device_attr.device_caps; 469 468 result->guid = resp.u.device_attr.guid; 469 + result->max_link_speed_gbps = resp.u.device_attr.max_link_speed_gbps; 470 470 471 471 if (result->admin_api_version < 1) { 472 472 ibdev_err_ratelimited(
+3 -1
drivers/infiniband/hw/efa/efa_com_cmd.h
··· 27 27 u16 pd; 28 28 u16 uarn; 29 29 u8 qp_type; 30 + u8 sl; 30 31 u8 unsolicited_write_recv : 1; 31 32 }; 32 33 ··· 72 71 /* cq physical base address in OS memory */ 73 72 dma_addr_t dma_addr; 74 73 /* completion queue depth in # of entries */ 75 - u16 cq_depth; 74 + u16 sub_cq_depth; 76 75 u16 num_sub_cqs; 77 76 u16 uarn; 78 77 u16 eqn; ··· 142 141 u16 max_wr_rdma_sge; 143 142 u16 max_tx_batch; 144 143 u16 min_sq_depth; 144 + u16 max_link_speed_gbps; 145 145 u8 db_bar; 146 146 }; 147 147
+96 -10
drivers/infiniband/hw/efa/efa_io_defs.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 2 /* 3 - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. 3 + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 6 #ifndef _EFA_IO_H_ ··· 10 10 #define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1 11 11 #define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32 12 12 #define EFA_IO_TX_DESC_IMM_DATA_SIZE 4 13 + #define EFA_IO_TX_DESC_INLINE_PBL_SIZE 1 13 14 14 15 enum efa_io_queue_type { 15 16 /* send queue (of a QP) */ ··· 26 25 EFA_IO_RDMA_READ = 1, 27 26 /* RDMA write */ 28 27 EFA_IO_RDMA_WRITE = 2, 28 + /* Fast MR registration */ 29 + EFA_IO_FAST_REG = 3, 30 + /* Fast MR invalidation */ 31 + EFA_IO_FAST_INV = 4, 29 32 }; 30 33 31 34 enum efa_io_comp_status { ··· 39 34 EFA_IO_COMP_STATUS_FLUSHED = 1, 40 35 /* Internal QP error */ 41 36 EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2, 42 - /* Bad operation type */ 43 - EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3, 37 + /* Unsupported operation */ 38 + EFA_IO_COMP_STATUS_LOCAL_ERROR_UNSUPPORTED_OP = 3, 44 39 /* Bad AH */ 45 40 EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4, 46 41 /* LKEY not registered or does not match IOVA */ 47 42 EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5, 48 43 /* Message too long */ 49 44 EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6, 50 - /* Destination ENI is down or does not run EFA */ 45 + /* RKEY not registered or does not match remote IOVA */ 51 46 EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7, 52 47 /* Connection was reset by remote side */ 53 48 EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8, ··· 59 54 EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11, 60 55 /* Unexpected status returned by responder */ 61 56 EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12, 62 - /* Unresponsive remote - detected locally */ 57 + /* Unresponsive remote - was previously responsive */ 63 58 EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13, 59 + /* No valid AH at remote side (required for RDMA operations) */ 60 + EFA_IO_COMP_STATUS_REMOTE_ERROR_UNKNOWN_PEER = 14, 61 + /* Unreachable remote - never received a response */ 62 + EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE = 15, 63 + }; 64 + 65 + enum efa_io_frwr_pbl_mode { 66 + EFA_IO_FRWR_INLINE_PBL = 0, 67 + EFA_IO_FRWR_DIRECT_PBL = 1, 64 68 }; 65 69 66 70 struct efa_io_tx_meta_desc { ··· 109 95 110 96 /* 111 97 * If inline_msg bit is set, length of inline message in bytes, 112 - * otherwise length of SGL (number of buffers). 98 + * otherwise length of SGL (number of buffers). 113 99 */ 114 100 u16 length; 115 101 116 102 /* 117 - * immediate data: if has_imm is set, then this field is included 118 - * within Tx message and reported in remote Rx completion. 103 + * immediate data: if has_imm is set, then this field is included within 104 + * Tx message and reported in remote Rx completion. 119 105 */ 120 106 u32 immediate_data; 121 107 ··· 172 158 struct efa_io_tx_buf_desc local_mem[1]; 173 159 }; 174 160 161 + struct efa_io_fast_mr_reg_req { 162 + /* Updated local key of the MR after lkey/rkey increment */ 163 + u32 lkey; 164 + 165 + /* 166 + * permissions 167 + * 0 : local_write_enable - Local write permissions: 168 + * must be set for RQ buffers and buffers posted for 169 + * RDMA Read requests 170 + * 1 : remote_write_enable - Remote write 171 + * permissions: must be set to enable RDMA write to 172 + * the region 173 + * 2 : remote_read_enable - Remote read permissions: 174 + * must be set to enable RDMA read from the region 175 + * 7:3 : reserved2 - MBZ 176 + */ 177 + u8 permissions; 178 + 179 + /* 180 + * control flags 181 + * 4:0 : phys_page_size_shift - page size is (1 << 182 + * phys_page_size_shift) 183 + * 6:5 : pbl_mode - enum efa_io_frwr_pbl_mode 184 + * 7 : reserved - MBZ 185 + */ 186 + u8 flags; 187 + 188 + /* MBZ */ 189 + u8 reserved[2]; 190 + 191 + /* IO Virtual Address associated with this MR */ 192 + u64 iova; 193 + 194 + /* Memory region length, in bytes */ 195 + u64 mr_length; 196 + 197 + /* Physical Buffer List, each element is page-aligned. */ 198 + union { 199 + /* 200 + * Inline array of physical page addresses (optimization 201 + * for short region activation). 202 + */ 203 + u64 inline_array[1]; 204 + 205 + /* points to PBL (Currently only direct) */ 206 + u64 dma_addr; 207 + } pbl; 208 + }; 209 + 210 + struct efa_io_fast_mr_inv_req { 211 + /* Local key of the MR to invalidate */ 212 + u32 lkey; 213 + 214 + /* MBZ */ 215 + u8 reserved[28]; 216 + }; 217 + 175 218 /* 176 219 * Tx WQE, composed of tx meta descriptors followed by either tx buffer 177 220 * descriptors or inline data ··· 245 174 246 175 /* RDMA local and remote memory addresses */ 247 176 struct efa_io_rdma_req rdma_req; 177 + 178 + /* Fast registration */ 179 + struct efa_io_fast_mr_reg_req reg_mr_req; 180 + 181 + /* Fast invalidation */ 182 + struct efa_io_fast_mr_inv_req inv_mr_req; 248 183 } data; 249 184 }; 250 185 ··· 285 208 struct efa_io_cdesc_common { 286 209 /* 287 210 * verbs-generated request ID, as provided in the completed tx or rx 288 - * descriptor. 211 + * descriptor. 289 212 */ 290 213 u16 req_id; 291 214 ··· 298 221 * 3 : has_imm - indicates that immediate data is 299 222 * present - for RX completions only 300 223 * 6:4 : op_type - enum efa_io_send_op_type 301 - * 7 : reserved31 - MBZ 224 + * 7 : unsolicited - indicates that there is no 225 + * matching request - for RDMA with imm. RX only 302 226 */ 303 227 u8 flags; 304 228 ··· 369 291 /* tx_buf_desc */ 370 292 #define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0) 371 293 294 + /* fast_mr_reg_req */ 295 + #define EFA_IO_FAST_MR_REG_REQ_LOCAL_WRITE_ENABLE_MASK BIT(0) 296 + #define EFA_IO_FAST_MR_REG_REQ_REMOTE_WRITE_ENABLE_MASK BIT(1) 297 + #define EFA_IO_FAST_MR_REG_REQ_REMOTE_READ_ENABLE_MASK BIT(2) 298 + #define EFA_IO_FAST_MR_REG_REQ_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) 299 + #define EFA_IO_FAST_MR_REG_REQ_PBL_MODE_MASK GENMASK(6, 5) 300 + 372 301 /* rx_desc */ 373 302 #define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0) 374 303 #define EFA_IO_RX_DESC_FIRST_MASK BIT(30) ··· 386 301 #define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1) 387 302 #define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3) 388 303 #define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4) 304 + #define EFA_IO_CDESC_COMMON_UNSOLICITED_MASK BIT(7) 389 305 390 306 #endif /* _EFA_IO_H_ */
+47 -4
drivers/infiniband/hw/efa/efa_verbs.c
··· 85 85 EFA_DEFINE_PORT_STATS(EFA_STATS_STR) 86 86 }; 87 87 88 + #define EFA_DEFAULT_LINK_SPEED_GBPS 100 89 + 88 90 #define EFA_CHUNK_PAYLOAD_SHIFT 12 89 91 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) 90 92 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 ··· 279 277 return 0; 280 278 } 281 279 280 + static void efa_link_gbps_to_speed_and_width(u16 gbps, 281 + enum ib_port_speed *speed, 282 + enum ib_port_width *width) 283 + { 284 + if (gbps >= 400) { 285 + *width = IB_WIDTH_8X; 286 + *speed = IB_SPEED_HDR; 287 + } else if (gbps >= 200) { 288 + *width = IB_WIDTH_4X; 289 + *speed = IB_SPEED_HDR; 290 + } else if (gbps >= 120) { 291 + *width = IB_WIDTH_12X; 292 + *speed = IB_SPEED_FDR10; 293 + } else if (gbps >= 100) { 294 + *width = IB_WIDTH_4X; 295 + *speed = IB_SPEED_EDR; 296 + } else if (gbps >= 60) { 297 + *width = IB_WIDTH_12X; 298 + *speed = IB_SPEED_DDR; 299 + } else if (gbps >= 50) { 300 + *width = IB_WIDTH_1X; 301 + *speed = IB_SPEED_HDR; 302 + } else if (gbps >= 40) { 303 + *width = IB_WIDTH_4X; 304 + *speed = IB_SPEED_FDR10; 305 + } else if (gbps >= 30) { 306 + *width = IB_WIDTH_12X; 307 + *speed = IB_SPEED_SDR; 308 + } else { 309 + *width = IB_WIDTH_1X; 310 + *speed = IB_SPEED_EDR; 311 + } 312 + } 313 + 282 314 int efa_query_port(struct ib_device *ibdev, u32 port, 283 315 struct ib_port_attr *props) 284 316 { 285 317 struct efa_dev *dev = to_edev(ibdev); 318 + enum ib_port_speed link_speed; 319 + enum ib_port_width link_width; 320 + u16 link_gbps; 286 321 287 322 props->lmc = 1; 288 323 ··· 327 288 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; 328 289 props->gid_tbl_len = 1; 329 290 props->pkey_tbl_len = 1; 330 - props->active_speed = IB_SPEED_EDR; 331 - props->active_width = IB_WIDTH_4X; 291 + link_gbps = dev->dev_attr.max_link_speed_gbps ?: EFA_DEFAULT_LINK_SPEED_GBPS; 292 + efa_link_gbps_to_speed_and_width(link_gbps, &link_speed, &link_width); 293 + props->active_speed = link_speed; 294 + props->active_width = link_width; 332 295 props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 333 296 props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 334 297 props->max_msg_sz = dev->dev_attr.mtu; ··· 717 676 goto err_out; 718 677 } 719 678 720 - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_90)) { 679 + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_98)) { 721 680 ibdev_dbg(&dev->ibdev, 722 681 "Incompatible ABI params, unknown fields in udata\n"); 723 682 err = -EINVAL; ··· 772 731 qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); 773 732 create_qp_params.rq_base_addr = qp->rq_dma_addr; 774 733 } 734 + 735 + create_qp_params.sl = cmd.sl; 775 736 776 737 if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV) 777 738 create_qp_params.unsolicited_write_recv = true; ··· 1210 1167 } 1211 1168 1212 1169 params.uarn = cq->ucontext->uarn; 1213 - params.cq_depth = entries; 1170 + params.sub_cq_depth = entries; 1214 1171 params.dma_addr = cq->dma_addr; 1215 1172 params.entry_size_in_bytes = cmd.cq_entry_size; 1216 1173 params.num_sub_cqs = cmd.num_sub_cqs;
+1 -1
drivers/infiniband/hw/hfi1/chip.c
··· 13235 13235 /* 13236 13236 * Clear all interrupt sources on the chip. 13237 13237 */ 13238 - void clear_all_interrupts(struct hfi1_devdata *dd) 13238 + static void clear_all_interrupts(struct hfi1_devdata *dd) 13239 13239 { 13240 13240 int i; 13241 13241
-1
drivers/infiniband/hw/hfi1/chip.h
··· 1404 1404 1405 1405 int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set); 1406 1406 void init_qsfp_int(struct hfi1_devdata *dd); 1407 - void clear_all_interrupts(struct hfi1_devdata *dd); 1408 1407 void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr); 1409 1408 void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr); 1410 1409 void reset_interrupts(struct hfi1_devdata *dd);
+2 -2
drivers/infiniband/hw/hns/hns_roce_cq.c
··· 179 179 ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, 180 180 hr_cq->cqn); 181 181 if (ret) 182 - dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, 183 - hr_cq->cqn); 182 + dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", 183 + ret, hr_cq->cqn); 184 184 185 185 xa_erase_irq(&cq_table->array, hr_cq->cqn); 186 186
+2 -1
drivers/infiniband/hw/hns/hns_roce_debugfs.c
··· 5 5 6 6 #include <linux/debugfs.h> 7 7 #include <linux/device.h> 8 + #include <linux/pci.h> 8 9 9 10 #include "hns_roce_device.h" 10 11 ··· 87 86 { 88 87 struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs; 89 88 90 - dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev), 89 + dbgfs->root = debugfs_create_dir(pci_name(hr_dev->pci_dev), 91 90 hns_roce_dbgfs_root); 92 91 93 92 create_sw_stat_debugfs(hr_dev, dbgfs->root);
+5 -9
drivers/infiniband/hw/hns/hns_roce_device.h
··· 489 489 u32 next; /* Next ID to allocate. */ 490 490 }; 491 491 492 - struct hns_roce_idx_table { 493 - u32 *spare_idx; 494 - u32 head; 495 - u32 tail; 496 - }; 497 - 498 492 struct hns_roce_qp_table { 499 493 struct hns_roce_hem_table qp_table; 500 494 struct hns_roce_hem_table irrl_table; ··· 497 503 struct mutex scc_mutex; 498 504 struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; 499 505 struct mutex bank_mutex; 500 - struct hns_roce_idx_table idx_table; 506 + struct xarray dip_xa; 501 507 }; 502 508 503 509 struct hns_roce_cq_table { ··· 587 593 588 594 enum { 589 595 HNS_ROCE_FLUSH_FLAG = 0, 596 + HNS_ROCE_STOP_FLUSH_FLAG = 1, 590 597 }; 591 598 592 599 struct hns_roce_work { ··· 651 656 enum hns_roce_cong_type cong_type; 652 657 u8 tc_mode; 653 658 u8 priority; 659 + spinlock_t flush_lock; 660 + struct hns_roce_dip *dip; 654 661 }; 655 662 656 663 struct hns_roce_ib_iboe { ··· 979 982 enum hns_roce_device_state state; 980 983 struct list_head qp_list; /* list of all qps on this dev */ 981 984 spinlock_t qp_list_lock; /* protect qp_list */ 982 - struct list_head dip_list; /* list of all dest ips on this dev */ 983 - spinlock_t dip_list_lock; /* protect dip_list */ 984 985 985 986 struct list_head pgdir_list; 986 987 struct mutex pgdir_mutex; ··· 1284 1289 void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); 1285 1290 void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp); 1286 1291 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); 1292 + void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn); 1287 1293 void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); 1288 1294 void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); 1289 1295 int hns_roce_init(struct hns_roce_dev *hr_dev);
+24 -24
drivers/infiniband/hw/hns/hns_roce_hem.c
··· 300 300 struct hns_roce_hem_mhop *mhop, 301 301 struct hns_roce_hem_index *index) 302 302 { 303 - struct ib_device *ibdev = &hr_dev->ib_dev; 303 + struct device *dev = hr_dev->dev; 304 304 unsigned long mhop_obj = obj; 305 305 u32 l0_idx, l1_idx, l2_idx; 306 306 u32 chunk_ba_num; ··· 331 331 index->buf = l0_idx; 332 332 break; 333 333 default: 334 - ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", 335 - table->type, mhop->hop_num); 334 + dev_err(dev, "table %u not support mhop.hop_num = %u!\n", 335 + table->type, mhop->hop_num); 336 336 return -EINVAL; 337 337 } 338 338 339 339 if (unlikely(index->buf >= table->num_hem)) { 340 - ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", 341 - table->type, index->buf, table->num_hem); 340 + dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n", 341 + table->type, index->buf, table->num_hem); 342 342 return -EINVAL; 343 343 } 344 344 ··· 448 448 struct hns_roce_hem_mhop *mhop, 449 449 struct hns_roce_hem_index *index) 450 450 { 451 - struct ib_device *ibdev = &hr_dev->ib_dev; 451 + struct device *dev = hr_dev->dev; 452 452 u32 step_idx; 453 453 int ret = 0; 454 454 455 455 if (index->inited & HEM_INDEX_L0) { 456 456 ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0); 457 457 if (ret) { 458 - ibdev_err(ibdev, "set HEM step 0 failed!\n"); 458 + dev_err(dev, "set HEM step 0 failed!\n"); 459 459 goto out; 460 460 } 461 461 } ··· 463 463 if (index->inited & HEM_INDEX_L1) { 464 464 ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1); 465 465 if (ret) { 466 - ibdev_err(ibdev, "set HEM step 1 failed!\n"); 466 + dev_err(dev, "set HEM step 1 failed!\n"); 467 467 goto out; 468 468 } 469 469 } ··· 475 475 step_idx = mhop->hop_num; 476 476 ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx); 477 477 if (ret) 478 - ibdev_err(ibdev, "set HEM step last failed!\n"); 478 + dev_err(dev, "set HEM step last failed!\n"); 479 479 } 480 480 out: 481 481 return ret; ··· 485 485 struct hns_roce_hem_table *table, 486 486 unsigned long obj) 487 487 { 488 - struct ib_device *ibdev = &hr_dev->ib_dev; 489 488 struct hns_roce_hem_index index = {}; 490 489 struct hns_roce_hem_mhop mhop = {}; 490 + struct device *dev = hr_dev->dev; 491 491 int ret; 492 492 493 493 ret = calc_hem_config(hr_dev, table, obj, &mhop, &index); 494 494 if (ret) { 495 - ibdev_err(ibdev, "calc hem config failed!\n"); 495 + dev_err(dev, "calc hem config failed!\n"); 496 496 return ret; 497 497 } 498 498 ··· 504 504 505 505 ret = alloc_mhop_hem(hr_dev, table, &mhop, &index); 506 506 if (ret) { 507 - ibdev_err(ibdev, "alloc mhop hem failed!\n"); 507 + dev_err(dev, "alloc mhop hem failed!\n"); 508 508 goto out; 509 509 } 510 510 ··· 512 512 if (table->type < HEM_TYPE_MTT) { 513 513 ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index); 514 514 if (ret) { 515 - ibdev_err(ibdev, "set HEM address to HW failed!\n"); 515 + dev_err(dev, "set HEM address to HW failed!\n"); 516 516 goto err_alloc; 517 517 } 518 518 } ··· 575 575 struct hns_roce_hem_mhop *mhop, 576 576 struct hns_roce_hem_index *index) 577 577 { 578 - struct ib_device *ibdev = &hr_dev->ib_dev; 578 + struct device *dev = hr_dev->dev; 579 579 u32 hop_num = mhop->hop_num; 580 580 u32 chunk_ba_num; 581 581 u32 step_idx; ··· 605 605 606 606 ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx); 607 607 if (ret) 608 - ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n", 609 - hop_num, ret); 608 + dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n", 609 + hop_num, ret); 610 610 611 611 if (index->inited & HEM_INDEX_L1) { 612 612 ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1); 613 613 if (ret) 614 - ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n", 615 - ret); 614 + dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n", 615 + ret); 616 616 } 617 617 618 618 if (index->inited & HEM_INDEX_L0) { 619 619 ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0); 620 620 if (ret) 621 - ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n", 622 - ret); 621 + dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n", 622 + ret); 623 623 } 624 624 } 625 625 } ··· 629 629 unsigned long obj, 630 630 int check_refcount) 631 631 { 632 - struct ib_device *ibdev = &hr_dev->ib_dev; 633 632 struct hns_roce_hem_index index = {}; 634 633 struct hns_roce_hem_mhop mhop = {}; 634 + struct device *dev = hr_dev->dev; 635 635 int ret; 636 636 637 637 ret = calc_hem_config(hr_dev, table, obj, &mhop, &index); 638 638 if (ret) { 639 - ibdev_err(ibdev, "calc hem config failed!\n"); 639 + dev_err(dev, "calc hem config failed!\n"); 640 640 return; 641 641 } 642 642 ··· 672 672 673 673 ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT); 674 674 if (ret) 675 - dev_warn(dev, "failed to clear HEM base address, ret = %d.\n", 676 - ret); 675 + dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n", 676 + ret); 677 677 678 678 hns_roce_free_hem(hr_dev, table->hem[i]); 679 679 table->hem[i] = NULL;
+165 -92
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 373 373 static int check_send_valid(struct hns_roce_dev *hr_dev, 374 374 struct hns_roce_qp *hr_qp) 375 375 { 376 - struct ib_device *ibdev = &hr_dev->ib_dev; 377 - 378 376 if (unlikely(hr_qp->state == IB_QPS_RESET || 379 377 hr_qp->state == IB_QPS_INIT || 380 - hr_qp->state == IB_QPS_RTR)) { 381 - ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", 382 - hr_qp->state); 378 + hr_qp->state == IB_QPS_RTR)) 383 379 return -EINVAL; 384 - } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { 385 - ibdev_err(ibdev, "failed to post WQE, dev state %d!\n", 386 - hr_dev->state); 380 + else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) 387 381 return -EIO; 388 - } 389 382 390 383 return 0; 391 384 } ··· 575 582 if (WARN_ON(ret)) 576 583 return ret; 577 584 578 - hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE, 585 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO, 579 586 (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); 580 587 581 588 hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE, ··· 2553 2560 free_link_table_buf(hr_dev, &priv->ext_llm); 2554 2561 } 2555 2562 2556 - static void free_dip_list(struct hns_roce_dev *hr_dev) 2563 + static void free_dip_entry(struct hns_roce_dev *hr_dev) 2557 2564 { 2558 2565 struct hns_roce_dip *hr_dip; 2559 - struct hns_roce_dip *tmp; 2560 - unsigned long flags; 2566 + unsigned long idx; 2561 2567 2562 - spin_lock_irqsave(&hr_dev->dip_list_lock, flags); 2568 + xa_lock(&hr_dev->qp_table.dip_xa); 2563 2569 2564 - list_for_each_entry_safe(hr_dip, tmp, &hr_dev->dip_list, node) { 2565 - list_del(&hr_dip->node); 2570 + xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) { 2571 + __xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx); 2566 2572 kfree(hr_dip); 2567 2573 } 2568 2574 2569 - spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); 2575 + xa_unlock(&hr_dev->qp_table.dip_xa); 2570 2576 } 2571 2577 2572 2578 static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev) ··· 2767 2775 ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT, 2768 2776 IB_QPS_INIT, NULL); 2769 2777 if (ret) { 2770 - ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n", 2771 - ret); 2778 + ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n", 2779 + ret); 2772 2780 return ret; 2773 2781 } 2774 2782 ··· 2973 2981 hns_roce_free_link_table(hr_dev); 2974 2982 2975 2983 if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09) 2976 - free_dip_list(hr_dev); 2984 + free_dip_entry(hr_dev); 2977 2985 } 2978 2986 2979 2987 static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, ··· 3413 3421 3414 3422 ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr); 3415 3423 if (ret) { 3416 - ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n", 3417 - ret); 3424 + ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n", 3425 + ret); 3418 3426 return ret; 3419 3427 } 3420 3428 ··· 3453 3461 3454 3462 ret = free_mr_post_send_lp_wqe(hr_qp); 3455 3463 if (ret) { 3456 - ibdev_err(ibdev, 3457 - "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", 3458 - hr_qp->qpn, ret); 3464 + ibdev_err_ratelimited(ibdev, 3465 + "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", 3466 + hr_qp->qpn, ret); 3459 3467 break; 3460 3468 } 3461 3469 ··· 3466 3474 while (cqe_cnt) { 3467 3475 npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc); 3468 3476 if (npolled < 0) { 3469 - ibdev_err(ibdev, 3470 - "failed to poll cqe for free mr, remain %d cqe.\n", 3471 - cqe_cnt); 3477 + ibdev_err_ratelimited(ibdev, 3478 + "failed to poll cqe for free mr, remain %d cqe.\n", 3479 + cqe_cnt); 3472 3480 goto out; 3473 3481 } 3474 3482 3475 3483 if (time_after(jiffies, end)) { 3476 - ibdev_err(ibdev, 3477 - "failed to poll cqe for free mr and timeout, remain %d cqe.\n", 3478 - cqe_cnt); 3484 + ibdev_err_ratelimited(ibdev, 3485 + "failed to poll cqe for free mr and timeout, remain %d cqe.\n", 3486 + cqe_cnt); 3479 3487 goto out; 3480 3488 } 3481 3489 cqe_cnt -= npolled; ··· 4693 4701 return 0; 4694 4702 } 4695 4703 4704 + static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn) 4705 + { 4706 + struct hns_roce_dip *hr_dip; 4707 + int ret; 4708 + 4709 + hr_dip = xa_load(dip_xa, qpn); 4710 + if (hr_dip) 4711 + return 0; 4712 + 4713 + hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL); 4714 + if (!hr_dip) 4715 + return -ENOMEM; 4716 + 4717 + ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL)); 4718 + if (ret) 4719 + kfree(hr_dip); 4720 + 4721 + return ret; 4722 + } 4723 + 4696 4724 static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, 4697 4725 u32 *dip_idx) 4698 4726 { 4699 4727 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 4700 4728 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 4701 - u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx; 4702 - u32 *head = &hr_dev->qp_table.idx_table.head; 4703 - u32 *tail = &hr_dev->qp_table.idx_table.tail; 4729 + struct xarray *dip_xa = &hr_dev->qp_table.dip_xa; 4730 + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); 4704 4731 struct hns_roce_dip *hr_dip; 4705 - unsigned long flags; 4732 + unsigned long idx; 4706 4733 int ret = 0; 4707 4734 4708 - spin_lock_irqsave(&hr_dev->dip_list_lock, flags); 4735 + ret = alloc_dip_entry(dip_xa, ibqp->qp_num); 4736 + if (ret) 4737 + return ret; 4709 4738 4710 - spare_idx[*tail] = ibqp->qp_num; 4711 - *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1); 4739 + xa_lock(dip_xa); 4712 4740 4713 - list_for_each_entry(hr_dip, &hr_dev->dip_list, node) { 4714 - if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) { 4741 + xa_for_each(dip_xa, idx, hr_dip) { 4742 + if (hr_dip->qp_cnt && 4743 + !memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) { 4715 4744 *dip_idx = hr_dip->dip_idx; 4745 + hr_dip->qp_cnt++; 4746 + hr_qp->dip = hr_dip; 4716 4747 goto out; 4717 4748 } 4718 4749 } ··· 4743 4728 /* If no dgid is found, a new dip and a mapping between dgid and 4744 4729 * dip_idx will be created. 4745 4730 */ 4746 - hr_dip = kzalloc(sizeof(*hr_dip), GFP_ATOMIC); 4747 - if (!hr_dip) { 4748 - ret = -ENOMEM; 4749 - goto out; 4731 + xa_for_each(dip_xa, idx, hr_dip) { 4732 + if (hr_dip->qp_cnt) 4733 + continue; 4734 + 4735 + *dip_idx = idx; 4736 + memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); 4737 + hr_dip->dip_idx = idx; 4738 + hr_dip->qp_cnt++; 4739 + hr_qp->dip = hr_dip; 4740 + break; 4750 4741 } 4751 4742 4752 - memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); 4753 - hr_dip->dip_idx = *dip_idx = spare_idx[*head]; 4754 - *head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1); 4755 - list_add_tail(&hr_dip->node, &hr_dev->dip_list); 4743 + /* This should never happen. */ 4744 + if (WARN_ON_ONCE(!hr_qp->dip)) 4745 + ret = -ENOSPC; 4756 4746 4757 4747 out: 4758 - spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); 4748 + xa_unlock(dip_xa); 4759 4749 return ret; 4760 4750 } 4761 4751 ··· 5081 5061 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 5082 5062 int ret = 0; 5083 5063 5084 - if (!check_qp_state(cur_state, new_state)) { 5085 - ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n"); 5064 + if (!check_qp_state(cur_state, new_state)) 5086 5065 return -EINVAL; 5087 - } 5088 5066 5089 5067 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 5090 5068 memset(qpc_mask, 0, hr_dev->caps.qpc_sz); ··· 5343 5325 /* SW pass context to HW */ 5344 5326 ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); 5345 5327 if (ret) { 5346 - ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); 5328 + ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret); 5347 5329 goto out; 5348 5330 } 5349 5331 ··· 5481 5463 5482 5464 ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context); 5483 5465 if (ret) { 5484 - ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); 5466 + ibdev_err_ratelimited(ibdev, 5467 + "failed to query QPC, ret = %d.\n", 5468 + ret); 5485 5469 ret = -EINVAL; 5486 5470 goto out; 5487 5471 } ··· 5491 5471 state = hr_reg_read(&context, QPC_QP_ST); 5492 5472 tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); 5493 5473 if (tmp_qp_state == -1) { 5494 - ibdev_err(ibdev, "Illegal ib_qp_state\n"); 5474 + ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n"); 5495 5475 ret = -EINVAL; 5496 5476 goto out; 5497 5477 } ··· 5584 5564 ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, 5585 5565 hr_qp->state, IB_QPS_RESET, udata); 5586 5566 if (ret) 5587 - ibdev_err(ibdev, 5588 - "failed to modify QP to RST, ret = %d.\n", 5589 - ret); 5567 + ibdev_err_ratelimited(ibdev, 5568 + "failed to modify QP to RST, ret = %d.\n", 5569 + ret); 5590 5570 } 5591 5571 5592 5572 send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; ··· 5614 5594 return ret; 5615 5595 } 5616 5596 5597 + static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev, 5598 + struct hns_roce_qp *hr_qp) 5599 + { 5600 + struct hns_roce_dip *hr_dip = hr_qp->dip; 5601 + 5602 + xa_lock(&hr_dev->qp_table.dip_xa); 5603 + 5604 + hr_dip->qp_cnt--; 5605 + if (!hr_dip->qp_cnt) 5606 + memset(hr_dip->dgid, 0, GID_LEN_V2); 5607 + 5608 + xa_unlock(&hr_dev->qp_table.dip_xa); 5609 + } 5610 + 5617 5611 int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) 5618 5612 { 5619 5613 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 5620 5614 struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); 5615 + unsigned long flags; 5621 5616 int ret; 5617 + 5618 + /* Make sure flush_cqe() is completed */ 5619 + spin_lock_irqsave(&hr_qp->flush_lock, flags); 5620 + set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag); 5621 + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); 5622 + flush_work(&hr_qp->flush_work.work); 5623 + 5624 + if (hr_qp->cong_type == CONG_TYPE_DIP) 5625 + put_dip_ctx_idx(hr_dev, hr_qp); 5622 5626 5623 5627 ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); 5624 5628 if (ret) 5625 - ibdev_err(&hr_dev->ib_dev, 5626 - "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", 5627 - hr_qp->qpn, ret); 5629 + ibdev_err_ratelimited(&hr_dev->ib_dev, 5630 + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", 5631 + hr_qp->qpn, ret); 5628 5632 5629 5633 hns_roce_qp_destroy(hr_dev, hr_qp, udata); 5630 5634 ··· 5942 5898 HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn); 5943 5899 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 5944 5900 if (ret) 5945 - ibdev_err(&hr_dev->ib_dev, 5946 - "failed to process cmd when modifying CQ, ret = %d.\n", 5947 - ret); 5901 + ibdev_err_ratelimited(&hr_dev->ib_dev, 5902 + "failed to process cmd when modifying CQ, ret = %d.\n", 5903 + ret); 5948 5904 5949 5905 err_out: 5950 5906 if (ret) ··· 5968 5924 ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, 5969 5925 HNS_ROCE_CMD_QUERY_CQC, cqn); 5970 5926 if (ret) { 5971 - ibdev_err(&hr_dev->ib_dev, 5972 - "failed to process cmd when querying CQ, ret = %d.\n", 5973 - ret); 5927 + ibdev_err_ratelimited(&hr_dev->ib_dev, 5928 + "failed to process cmd when querying CQ, ret = %d.\n", 5929 + ret); 5974 5930 goto err_mailbox; 5975 5931 } 5976 5932 ··· 6011 5967 return ret; 6012 5968 } 6013 5969 6014 - static void hns_roce_irq_work_handle(struct work_struct *work) 5970 + static void dump_aeqe_log(struct hns_roce_work *irq_work) 6015 5971 { 6016 - struct hns_roce_work *irq_work = 6017 - container_of(work, struct hns_roce_work, work); 6018 - struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; 5972 + struct hns_roce_dev *hr_dev = irq_work->hr_dev; 5973 + struct ib_device *ibdev = &hr_dev->ib_dev; 6019 5974 6020 5975 switch (irq_work->event_type) { 6021 5976 case HNS_ROCE_EVENT_TYPE_PATH_MIG: ··· 6058 6015 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 6059 6016 ibdev_warn(ibdev, "DB overflow.\n"); 6060 6017 break; 6018 + case HNS_ROCE_EVENT_TYPE_MB: 6019 + break; 6061 6020 case HNS_ROCE_EVENT_TYPE_FLR: 6062 6021 ibdev_warn(ibdev, "function level reset.\n"); 6063 6022 break; ··· 6070 6025 ibdev_err(ibdev, "invalid xrceth error.\n"); 6071 6026 break; 6072 6027 default: 6028 + ibdev_info(ibdev, "Undefined event %d.\n", 6029 + irq_work->event_type); 6073 6030 break; 6074 6031 } 6032 + } 6033 + 6034 + static void hns_roce_irq_work_handle(struct work_struct *work) 6035 + { 6036 + struct hns_roce_work *irq_work = 6037 + container_of(work, struct hns_roce_work, work); 6038 + struct hns_roce_dev *hr_dev = irq_work->hr_dev; 6039 + int event_type = irq_work->event_type; 6040 + u32 queue_num = irq_work->queue_num; 6041 + 6042 + switch (event_type) { 6043 + case HNS_ROCE_EVENT_TYPE_PATH_MIG: 6044 + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: 6045 + case HNS_ROCE_EVENT_TYPE_COMM_EST: 6046 + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: 6047 + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 6048 + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: 6049 + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 6050 + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 6051 + case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: 6052 + case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: 6053 + hns_roce_qp_event(hr_dev, queue_num, event_type); 6054 + break; 6055 + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 6056 + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: 6057 + hns_roce_srq_event(hr_dev, queue_num, event_type); 6058 + break; 6059 + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 6060 + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 6061 + hns_roce_cq_event(hr_dev, queue_num, event_type); 6062 + break; 6063 + default: 6064 + break; 6065 + } 6066 + 6067 + dump_aeqe_log(irq_work); 6075 6068 6076 6069 kfree(irq_work); 6077 6070 } ··· 6170 6087 static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, 6171 6088 struct hns_roce_eq *eq) 6172 6089 { 6173 - struct device *dev = hr_dev->dev; 6174 6090 struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); 6175 6091 irqreturn_t aeqe_found = IRQ_NONE; 6092 + int num_aeqes = 0; 6176 6093 int event_type; 6177 6094 u32 queue_num; 6178 6095 int sub_type; 6179 6096 6180 - while (aeqe) { 6097 + while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) { 6181 6098 /* Make sure we read AEQ entry after we have checked the 6182 6099 * ownership bit 6183 6100 */ ··· 6188 6105 queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM); 6189 6106 6190 6107 switch (event_type) { 6191 - case HNS_ROCE_EVENT_TYPE_PATH_MIG: 6192 - case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: 6193 - case HNS_ROCE_EVENT_TYPE_COMM_EST: 6194 - case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: 6195 6108 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 6196 - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: 6197 6109 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 6198 6110 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 6199 6111 case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: 6200 6112 case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: 6201 - hns_roce_qp_event(hr_dev, queue_num, event_type); 6202 - break; 6203 - case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 6204 - case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: 6205 - hns_roce_srq_event(hr_dev, queue_num, event_type); 6206 - break; 6207 - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 6208 - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 6209 - hns_roce_cq_event(hr_dev, queue_num, event_type); 6113 + hns_roce_flush_cqe(hr_dev, queue_num); 6210 6114 break; 6211 6115 case HNS_ROCE_EVENT_TYPE_MB: 6212 6116 hns_roce_cmd_event(hr_dev, ··· 6201 6131 aeqe->event.cmd.status, 6202 6132 le64_to_cpu(aeqe->event.cmd.out_param)); 6203 6133 break; 6204 - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 6205 - case HNS_ROCE_EVENT_TYPE_FLR: 6206 - break; 6207 6134 default: 6208 - dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n", 6209 - event_type, eq->eqn, eq->cons_index); 6210 6135 break; 6211 6136 } 6212 6137 ··· 6215 6150 hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); 6216 6151 6217 6152 aeqe = next_aeqe_sw_v2(eq); 6153 + ++num_aeqes; 6218 6154 } 6219 6155 6220 6156 update_eq_db(eq); ··· 6765 6699 int ret; 6766 6700 int i; 6767 6701 6702 + if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET) 6703 + return -EINVAL; 6704 + 6768 6705 other_num = hr_dev->caps.num_other_vectors; 6769 6706 comp_num = hr_dev->caps.num_comp_vectors; 6770 6707 aeq_num = hr_dev->caps.num_aeq_vectors; ··· 7086 7017 7087 7018 handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; 7088 7019 } 7020 + 7089 7021 static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) 7090 7022 { 7091 7023 struct hns_roce_dev *hr_dev; ··· 7105 7035 7106 7036 hr_dev->active = false; 7107 7037 hr_dev->dis_db = true; 7038 + 7039 + rdma_user_mmap_disassociate(&hr_dev->ib_dev); 7040 + 7108 7041 hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN; 7109 7042 7110 7043 return 0;
+7 -1
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
··· 85 85 86 86 #define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18) 87 87 88 + /* budget must be smaller than aeqe_depth to guarantee that we update 89 + * the ci before we polled all the entries in the EQ. 90 + */ 91 + #define HNS_AEQ_POLLING_BUDGET 64 92 + 88 93 enum { 89 94 HNS_ROCE_CMD_FLAG_IN = BIT(0), 90 95 HNS_ROCE_CMD_FLAG_OUT = BIT(1), ··· 924 919 #define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) 925 920 #define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) 926 921 #define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) 922 + #define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10) 927 923 #define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11) 928 924 #define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12) 929 925 #define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15) ··· 1348 1342 struct hns_roce_dip { 1349 1343 u8 dgid[GID_LEN_V2]; 1350 1344 u32 dip_idx; 1351 - struct list_head node; /* all dips are on a list */ 1345 + u32 qp_cnt; 1352 1346 }; 1353 1347 1354 1348 struct fmea_ram_ecc {
+5 -2
drivers/infiniband/hw/hns/hns_roce_main.c
··· 466 466 pgprot_t prot; 467 467 int ret; 468 468 469 + if (hr_dev->dis_db) { 470 + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); 471 + return -EPERM; 472 + } 473 + 469 474 rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff); 470 475 if (!rdma_entry) { 471 476 atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); ··· 1135 1130 1136 1131 INIT_LIST_HEAD(&hr_dev->qp_list); 1137 1132 spin_lock_init(&hr_dev->qp_list_lock); 1138 - INIT_LIST_HEAD(&hr_dev->dip_list); 1139 - spin_lock_init(&hr_dev->dip_list_lock); 1140 1133 1141 1134 ret = hns_roce_register_device(hr_dev); 1142 1135 if (ret)
+6 -5
drivers/infiniband/hw/hns/hns_roce_mr.c
··· 138 138 key_to_hw_index(mr->key) & 139 139 (hr_dev->caps.num_mtpts - 1)); 140 140 if (ret) 141 - ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n", 142 - ret); 141 + ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n", 142 + ret); 143 143 } 144 144 145 145 free_mr_pbl(hr_dev, mr); ··· 435 435 } 436 436 437 437 int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 438 - unsigned int *sg_offset) 438 + unsigned int *sg_offset_p) 439 439 { 440 + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 440 441 struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); 441 442 struct ib_device *ibdev = &hr_dev->ib_dev; 442 443 struct hns_roce_mr *mr = to_hr_mr(ibmr); 443 444 struct hns_roce_mtr *mtr = &mr->pbl_mtr; 444 445 int ret, sg_num = 0; 445 446 446 - if (!IS_ALIGNED(*sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) || 447 + if (!IS_ALIGNED(sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) || 447 448 ibmr->page_size < HNS_HW_PAGE_SIZE || 448 449 ibmr->page_size > HNS_HW_MAX_PAGE_SIZE) 449 450 return sg_num; ··· 455 454 if (!mr->page_list) 456 455 return sg_num; 457 456 458 - sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); 457 + sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset_p, hns_roce_set_page); 459 458 if (sg_num < 1) { 460 459 ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", 461 460 mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num);
+51 -28
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 39 39 #include "hns_roce_device.h" 40 40 #include "hns_roce_hem.h" 41 41 42 + static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, 43 + u32 qpn) 44 + { 45 + struct device *dev = hr_dev->dev; 46 + struct hns_roce_qp *qp; 47 + unsigned long flags; 48 + 49 + xa_lock_irqsave(&hr_dev->qp_table_xa, flags); 50 + qp = __hns_roce_qp_lookup(hr_dev, qpn); 51 + if (qp) 52 + refcount_inc(&qp->refcount); 53 + xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags); 54 + 55 + if (!qp) 56 + dev_warn(dev, "async event for bogus QP %08x\n", qpn); 57 + 58 + return qp; 59 + } 60 + 42 61 static void flush_work_handle(struct work_struct *work) 43 62 { 44 63 struct hns_roce_work *flush_work = container_of(work, ··· 90 71 void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) 91 72 { 92 73 struct hns_roce_work *flush_work = &hr_qp->flush_work; 74 + unsigned long flags; 93 75 94 - flush_work->hr_dev = hr_dev; 95 - INIT_WORK(&flush_work->work, flush_work_handle); 76 + spin_lock_irqsave(&hr_qp->flush_lock, flags); 77 + /* Exit directly after destroy_qp() */ 78 + if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) { 79 + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); 80 + return; 81 + } 82 + 96 83 refcount_inc(&hr_qp->refcount); 97 84 queue_work(hr_dev->irq_workq, &flush_work->work); 85 + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); 98 86 } 99 87 100 88 void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp) ··· 121 95 122 96 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) 123 97 { 124 - struct device *dev = hr_dev->dev; 125 98 struct hns_roce_qp *qp; 126 99 127 - xa_lock(&hr_dev->qp_table_xa); 128 - qp = __hns_roce_qp_lookup(hr_dev, qpn); 129 - if (qp) 130 - refcount_inc(&qp->refcount); 131 - xa_unlock(&hr_dev->qp_table_xa); 132 - 133 - if (!qp) { 134 - dev_warn(dev, "async event for bogus QP %08x\n", qpn); 100 + qp = hns_roce_qp_lookup(hr_dev, qpn); 101 + if (!qp) 135 102 return; 136 - } 137 - 138 - if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR || 139 - event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR || 140 - event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR || 141 - event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION || 142 - event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) { 143 - qp->state = IB_QPS_ERR; 144 - 145 - flush_cqe(hr_dev, qp); 146 - } 147 103 148 104 qp->event(qp, (enum hns_roce_event)event_type); 105 + 106 + if (refcount_dec_and_test(&qp->refcount)) 107 + complete(&qp->free); 108 + } 109 + 110 + void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn) 111 + { 112 + struct hns_roce_qp *qp; 113 + 114 + qp = hns_roce_qp_lookup(hr_dev, qpn); 115 + if (!qp) 116 + return; 117 + 118 + qp->state = IB_QPS_ERR; 119 + flush_cqe(hr_dev, qp); 149 120 150 121 if (refcount_dec_and_test(&qp->refcount)) 151 122 complete(&qp->free); ··· 1147 1124 struct ib_udata *udata, 1148 1125 struct hns_roce_qp *hr_qp) 1149 1126 { 1127 + struct hns_roce_work *flush_work = &hr_qp->flush_work; 1150 1128 struct hns_roce_ib_create_qp_resp resp = {}; 1151 1129 struct ib_device *ibdev = &hr_dev->ib_dev; 1152 1130 struct hns_roce_ib_create_qp ucmd = {}; ··· 1156 1132 mutex_init(&hr_qp->mutex); 1157 1133 spin_lock_init(&hr_qp->sq.lock); 1158 1134 spin_lock_init(&hr_qp->rq.lock); 1135 + spin_lock_init(&hr_qp->flush_lock); 1159 1136 1160 1137 hr_qp->state = IB_QPS_RESET; 1161 1138 hr_qp->flush_flag = 0; 1139 + flush_work->hr_dev = hr_dev; 1140 + INIT_WORK(&flush_work->work, flush_work_handle); 1162 1141 1163 1142 if (init_attr->create_flags) 1164 1143 return -EOPNOTSUPP; ··· 1573 1546 unsigned int reserved_from_bot; 1574 1547 unsigned int i; 1575 1548 1576 - qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps, 1577 - sizeof(u32), GFP_KERNEL); 1578 - if (!qp_table->idx_table.spare_idx) 1579 - return -ENOMEM; 1580 - 1581 1549 mutex_init(&qp_table->scc_mutex); 1582 1550 mutex_init(&qp_table->bank_mutex); 1583 1551 xa_init(&hr_dev->qp_table_xa); 1552 + xa_init(&qp_table->dip_xa); 1584 1553 1585 1554 reserved_from_bot = hr_dev->caps.reserved_qps; 1586 1555 ··· 1601 1578 1602 1579 for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) 1603 1580 ida_destroy(&hr_dev->qp_table.bank[i].ida); 1581 + xa_destroy(&hr_dev->qp_table.dip_xa); 1604 1582 mutex_destroy(&hr_dev->qp_table.bank_mutex); 1605 1583 mutex_destroy(&hr_dev->qp_table.scc_mutex); 1606 - kfree(hr_dev->qp_table.idx_table.spare_idx); 1607 1584 }
+2 -2
drivers/infiniband/hw/hns/hns_roce_srq.c
··· 151 151 ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, 152 152 srq->srqn); 153 153 if (ret) 154 - dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", 155 - ret, srq->srqn); 154 + dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", 155 + ret, srq->srqn); 156 156 157 157 xa_erase_irq(&srq_table->xa, srq->srqn); 158 158
+92 -1
drivers/infiniband/hw/mlx5/devx.c
··· 27 27 DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0, 28 28 DEVX_OBJ_FLAGS_DCT = 1 << 1, 29 29 DEVX_OBJ_FLAGS_CQ = 1 << 2, 30 + DEVX_OBJ_FLAGS_HW_FREED = 1 << 3, 31 + }; 32 + 33 + #define MAX_ASYNC_CMDS 8 34 + 35 + struct mlx5_async_cmd { 36 + struct ib_uobject *uobject; 37 + void *in; 38 + int in_size; 39 + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; 40 + int err; 41 + struct mlx5_async_work cb_work; 42 + struct completion comp; 30 43 }; 31 44 32 45 struct devx_async_data { ··· 1418 1405 */ 1419 1406 mlx5r_deref_wait_odp_mkey(&obj->mkey); 1420 1407 1421 - if (obj->flags & DEVX_OBJ_FLAGS_DCT) 1408 + if (obj->flags & DEVX_OBJ_FLAGS_HW_FREED) 1409 + ret = 0; 1410 + else if (obj->flags & DEVX_OBJ_FLAGS_DCT) 1422 1411 ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct); 1423 1412 else if (obj->flags & DEVX_OBJ_FLAGS_CQ) 1424 1413 ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq); ··· 2607 2592 xa_destroy(&table->event_xa); 2608 2593 2609 2594 mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); 2595 + } 2596 + } 2597 + 2598 + static void devx_async_destroy_cb(int status, struct mlx5_async_work *context) 2599 + { 2600 + struct mlx5_async_cmd *devx_out = container_of(context, 2601 + struct mlx5_async_cmd, cb_work); 2602 + struct devx_obj *obj = devx_out->uobject->object; 2603 + 2604 + if (!status) 2605 + obj->flags |= DEVX_OBJ_FLAGS_HW_FREED; 2606 + 2607 + complete(&devx_out->comp); 2608 + } 2609 + 2610 + static void devx_async_destroy(struct mlx5_ib_dev *dev, 2611 + struct mlx5_async_cmd *cmd) 2612 + { 2613 + init_completion(&cmd->comp); 2614 + cmd->err = mlx5_cmd_exec_cb(&dev->async_ctx, cmd->in, cmd->in_size, 2615 + &cmd->out, sizeof(cmd->out), 2616 + devx_async_destroy_cb, &cmd->cb_work); 2617 + } 2618 + 2619 + static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd) 2620 + { 2621 + if (!cmd->err) 2622 + wait_for_completion(&cmd->comp); 2623 + atomic_set(&cmd->uobject->usecnt, 0); 2624 + } 2625 + 2626 + void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) 2627 + { 2628 + struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS]; 2629 + struct ib_ucontext *ucontext = ufile->ucontext; 2630 + struct ib_device *device = ucontext->device; 2631 + struct mlx5_ib_dev *dev = to_mdev(device); 2632 + struct ib_uobject *uobject; 2633 + struct devx_obj *obj; 2634 + int head = 0; 2635 + int tail = 0; 2636 + 2637 + list_for_each_entry(uobject, &ufile->uobjects, list) { 2638 + WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE)); 2639 + 2640 + /* 2641 + * Currently we only support QP destruction, if other objects 2642 + * are to be destroyed need to add type synchronization to the 2643 + * cleanup algorithm and handle pre/post FW cleanup for the 2644 + * new types if needed. 2645 + */ 2646 + if (uobj_get_object_id(uobject) != MLX5_IB_OBJECT_DEVX_OBJ || 2647 + (get_dec_obj_type(uobject->object, MLX5_EVENT_TYPE_MAX) != 2648 + MLX5_OBJ_TYPE_QP)) { 2649 + atomic_set(&uobject->usecnt, 0); 2650 + continue; 2651 + } 2652 + 2653 + obj = uobject->object; 2654 + 2655 + async_cmd[tail % MAX_ASYNC_CMDS].in = obj->dinbox; 2656 + async_cmd[tail % MAX_ASYNC_CMDS].in_size = obj->dinlen; 2657 + async_cmd[tail % MAX_ASYNC_CMDS].uobject = uobject; 2658 + 2659 + devx_async_destroy(dev, &async_cmd[tail % MAX_ASYNC_CMDS]); 2660 + tail++; 2661 + 2662 + if (tail - head == MAX_ASYNC_CMDS) { 2663 + devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]); 2664 + head++; 2665 + } 2666 + } 2667 + 2668 + while (head != tail) { 2669 + devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]); 2670 + head++; 2610 2671 } 2611 2672 } 2612 2673
+4
drivers/infiniband/hw/mlx5/devx.h
··· 28 28 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); 29 29 int mlx5_ib_devx_init(struct mlx5_ib_dev *dev); 30 30 void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev); 31 + void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile); 31 32 #else 32 33 static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) 33 34 { ··· 40 39 return 0; 41 40 } 42 41 static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) 42 + { 43 + } 44 + static inline void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) 43 45 { 44 46 } 45 47 #endif
+7 -1
drivers/infiniband/hw/mlx5/mad.c
··· 278 278 goto done; 279 279 } 280 280 281 - err = query_ib_ppcnt(mdev, mdev_port_num, 0, out_cnt, sz, 0); 281 + if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) 282 + err = query_ib_ppcnt(mdev, mdev_port_num, port_num, 283 + out_cnt, sz, 0); 284 + else 285 + err = query_ib_ppcnt(mdev, mdev_port_num, 0, 286 + out_cnt, sz, 0); 287 + 282 288 if (!err) 283 289 pma_cnt_assign(pma_cnt, out_cnt); 284 290 }
+46 -32
drivers/infiniband/hw/mlx5/main.c
··· 1182 1182 MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; 1183 1183 1184 1184 resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; 1185 + 1186 + if (MLX5_CAP_GEN_2(mdev, dp_ordering_force) && 1187 + (MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc) || 1188 + MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc) || 1189 + MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc) || 1190 + MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud) || 1191 + MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc))) 1192 + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP; 1185 1193 } 1186 1194 1187 1195 if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) { ··· 3005 2997 static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) 3006 2998 { 3007 2999 struct mlx5_ib_resources *devr = &dev->devr; 3008 - int port; 3009 3000 int ret; 3010 3001 3011 3002 if (!MLX5_CAP_GEN(dev->mdev, xrc)) ··· 3020 3013 return ret; 3021 3014 } 3022 3015 3023 - for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) 3024 - INIT_WORK(&devr->ports[port].pkey_change_work, 3025 - pkey_change_handler); 3026 - 3027 3016 mutex_init(&devr->cq_lock); 3028 3017 mutex_init(&devr->srq_lock); 3029 3018 ··· 3029 3026 static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) 3030 3027 { 3031 3028 struct mlx5_ib_resources *devr = &dev->devr; 3032 - int port; 3033 - 3034 - /* 3035 - * Make sure no change P_Key work items are still executing. 3036 - * 3037 - * At this stage, the mlx5_ib_event should be unregistered 3038 - * and it ensures that no new works are added. 3039 - */ 3040 - for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) 3041 - cancel_work_sync(&devr->ports[port].pkey_change_work); 3042 3029 3043 3030 /* After s0/s1 init, they are not unset during the device lifetime. */ 3044 3031 if (devr->s1) { ··· 3204 3211 struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev, 3205 3212 lag_events); 3206 3213 struct mlx5_core_dev *mdev = dev->mdev; 3214 + struct ib_device *ibdev = &dev->ib_dev; 3215 + struct net_device *old_ndev = NULL; 3207 3216 struct mlx5_ib_port *port; 3208 3217 struct net_device *ndev; 3209 - int i, err; 3210 - int portnum; 3218 + u32 portnum = 0; 3219 + int ret = 0; 3220 + int i; 3211 3221 3212 - portnum = 0; 3213 3222 switch (event) { 3214 3223 case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE: 3215 3224 ndev = data; ··· 3227 3232 } 3228 3233 } 3229 3234 } 3230 - err = ib_device_set_netdev(&dev->ib_dev, ndev, 3231 - portnum + 1); 3232 - dev_put(ndev); 3233 - if (err) 3234 - return err; 3235 - /* Rescan gids after new netdev assignment */ 3236 - rdma_roce_rescan_device(&dev->ib_dev); 3235 + old_ndev = ib_device_get_netdev(ibdev, portnum + 1); 3236 + ret = ib_device_set_netdev(ibdev, ndev, portnum + 1); 3237 + if (ret) 3238 + goto out; 3239 + 3240 + if (old_ndev) 3241 + roce_del_all_netdev_gids(ibdev, portnum + 1, 3242 + old_ndev); 3243 + rdma_roce_rescan_port(ibdev, portnum + 1); 3237 3244 } 3238 3245 break; 3239 3246 default: 3240 3247 return NOTIFY_DONE; 3241 3248 } 3242 - return NOTIFY_OK; 3249 + 3250 + out: 3251 + dev_put(old_ndev); 3252 + return notifier_from_errno(ret); 3243 3253 } 3244 3254 3245 3255 static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev) ··· 4134 4134 .req_notify_cq = mlx5_ib_arm_cq, 4135 4135 .rereg_user_mr = mlx5_ib_rereg_user_mr, 4136 4136 .resize_cq = mlx5_ib_resize_cq, 4137 + .ufile_hw_cleanup = mlx5_ib_ufile_hw_cleanup, 4137 4138 4138 4139 INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), 4139 4140 INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), ··· 4465 4464 4466 4465 static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) 4467 4466 { 4467 + struct mlx5_ib_resources *devr = &dev->devr; 4468 + int port; 4469 + 4470 + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) 4471 + INIT_WORK(&devr->ports[port].pkey_change_work, 4472 + pkey_change_handler); 4473 + 4468 4474 dev->mdev_events.notifier_call = mlx5_ib_event; 4469 4475 mlx5_notifier_register(dev->mdev, &dev->mdev_events); 4470 4476 ··· 4482 4474 4483 4475 static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) 4484 4476 { 4477 + struct mlx5_ib_resources *devr = &dev->devr; 4478 + int port; 4479 + 4485 4480 mlx5r_macsec_event_unregister(dev); 4486 4481 mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); 4482 + 4483 + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) 4484 + cancel_work_sync(&devr->ports[port].pkey_change_work); 4487 4485 } 4488 4486 4489 4487 void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev, ··· 4579 4565 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, 4580 4566 mlx5_ib_dev_res_init, 4581 4567 mlx5_ib_dev_res_cleanup), 4582 - STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, 4583 - mlx5_ib_stage_dev_notifier_init, 4584 - mlx5_ib_stage_dev_notifier_cleanup), 4585 4568 STAGE_CREATE(MLX5_IB_STAGE_ODP, 4586 4569 mlx5_ib_odp_init_one, 4587 4570 mlx5_ib_odp_cleanup_one), ··· 4603 4592 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 4604 4593 mlx5_ib_stage_ib_reg_init, 4605 4594 mlx5_ib_stage_ib_reg_cleanup), 4595 + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, 4596 + mlx5_ib_stage_dev_notifier_init, 4597 + mlx5_ib_stage_dev_notifier_cleanup), 4606 4598 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 4607 4599 mlx5_ib_stage_post_ib_reg_umr_init, 4608 4600 NULL), ··· 4642 4628 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, 4643 4629 mlx5_ib_dev_res_init, 4644 4630 mlx5_ib_dev_res_cleanup), 4645 - STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, 4646 - mlx5_ib_stage_dev_notifier_init, 4647 - mlx5_ib_stage_dev_notifier_cleanup), 4648 4631 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, 4649 4632 mlx5_ib_counters_init, 4650 4633 mlx5_ib_counters_cleanup), ··· 4663 4652 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 4664 4653 mlx5_ib_stage_ib_reg_init, 4665 4654 mlx5_ib_stage_ib_reg_cleanup), 4655 + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, 4656 + mlx5_ib_stage_dev_notifier_init, 4657 + mlx5_ib_stage_dev_notifier_cleanup), 4666 4658 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 4667 4659 mlx5_ib_stage_post_ib_reg_umr_init, 4668 4660 NULL),
+2 -1
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 521 521 struct mlx5_bf bf; 522 522 u8 has_rq:1; 523 523 u8 is_rss:1; 524 + u8 is_ooo_rq:1; 524 525 525 526 /* only for user space QPs. For kernel 526 527 * we have it from the bf object ··· 973 972 MLX5_IB_STAGE_QP, 974 973 MLX5_IB_STAGE_SRQ, 975 974 MLX5_IB_STAGE_DEVICE_RESOURCES, 976 - MLX5_IB_STAGE_DEVICE_NOTIFIER, 977 975 MLX5_IB_STAGE_ODP, 978 976 MLX5_IB_STAGE_COUNTERS, 979 977 MLX5_IB_STAGE_CONG_DEBUGFS, ··· 981 981 MLX5_IB_STAGE_PRE_IB_REG_UMR, 982 982 MLX5_IB_STAGE_WHITELIST_UID, 983 983 MLX5_IB_STAGE_IB_REG, 984 + MLX5_IB_STAGE_DEVICE_NOTIFIER, 984 985 MLX5_IB_STAGE_POST_IB_REG_UMR, 985 986 MLX5_IB_STAGE_DELAY_DROP, 986 987 MLX5_IB_STAGE_RESTRACK,
+46 -5
drivers/infiniband/hw/mlx5/qp.c
··· 1960 1960 } 1961 1961 1962 1962 static int get_atomic_mode(struct mlx5_ib_dev *dev, 1963 - enum ib_qp_type qp_type) 1963 + struct mlx5_ib_qp *qp) 1964 1964 { 1965 1965 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 1966 1966 u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic); ··· 1970 1970 if (!atomic) 1971 1971 return -EOPNOTSUPP; 1972 1972 1973 - if (qp_type == MLX5_IB_QPT_DCT) 1973 + if (qp->type == MLX5_IB_QPT_DCT) 1974 1974 atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); 1975 1975 else 1976 1976 atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); ··· 1983 1983 (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP && 1984 1984 atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD)) 1985 1985 atomic_mode = MLX5_ATOMIC_MODE_IB_COMP; 1986 + 1987 + /* OOO DP QPs do not support larger than 8-Bytes atomic operations */ 1988 + if (atomic_mode > MLX5_ATOMIC_MODE_8B && qp->is_ooo_rq) 1989 + atomic_mode = MLX5_ATOMIC_MODE_8B; 1986 1990 1987 1991 return atomic_mode; 1988 1992 } ··· 2843 2839 return 0; 2844 2840 } 2845 2841 2842 + static bool get_dp_ooo_cap(struct mlx5_core_dev *mdev, enum ib_qp_type qp_type) 2843 + { 2844 + if (!MLX5_CAP_GEN_2(mdev, dp_ordering_force)) 2845 + return false; 2846 + 2847 + switch (qp_type) { 2848 + case IB_QPT_RC: 2849 + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc); 2850 + case IB_QPT_XRC_INI: 2851 + case IB_QPT_XRC_TGT: 2852 + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc); 2853 + case IB_QPT_UC: 2854 + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc); 2855 + case IB_QPT_UD: 2856 + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud); 2857 + case MLX5_IB_QPT_DCI: 2858 + case MLX5_IB_QPT_DCT: 2859 + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc); 2860 + default: 2861 + return false; 2862 + } 2863 + } 2864 + 2846 2865 static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag, 2847 2866 bool cond, struct mlx5_ib_qp *qp) 2848 2867 { ··· 3392 3365 if (access_flags & IB_ACCESS_REMOTE_ATOMIC) { 3393 3366 int atomic_mode; 3394 3367 3395 - atomic_mode = get_atomic_mode(dev, qp->type); 3368 + atomic_mode = get_atomic_mode(dev, qp); 3396 3369 if (atomic_mode < 0) 3397 3370 return -EOPNOTSUPP; 3398 3371 ··· 4343 4316 if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) 4344 4317 MLX5_SET(qpc, qpc, deth_sqpn, 1); 4345 4318 4319 + if (qp->is_ooo_rq && cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { 4320 + MLX5_SET(qpc, qpc, dp_ordering_1, 1); 4321 + MLX5_SET(qpc, qpc, dp_ordering_force, 1); 4322 + } 4323 + 4346 4324 mlx5_cur = to_mlx5_state(cur_state); 4347 4325 mlx5_new = to_mlx5_state(new_state); 4348 4326 ··· 4563 4531 if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) { 4564 4532 int atomic_mode; 4565 4533 4566 - atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT); 4534 + atomic_mode = get_atomic_mode(dev, qp); 4567 4535 if (atomic_mode < 0) 4568 4536 return -EOPNOTSUPP; 4569 4537 ··· 4605 4573 MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); 4606 4574 if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) 4607 4575 MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7); 4576 + if (qp->is_ooo_rq) { 4577 + MLX5_SET(dctc, dctc, dp_ordering_1, 1); 4578 + MLX5_SET(dctc, dctc, dp_ordering_force, 1); 4579 + } 4608 4580 4609 4581 err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in, 4610 4582 MLX5_ST_SZ_BYTES(create_dct_in), out, ··· 4712 4676 min(udata->inlen, sizeof(ucmd)))) 4713 4677 return -EFAULT; 4714 4678 4715 - if (ucmd.comp_mask || 4679 + if (ucmd.comp_mask & ~MLX5_IB_MODIFY_QP_OOO_DP || 4716 4680 memchr_inv(&ucmd.burst_info.reserved, 0, 4717 4681 sizeof(ucmd.burst_info.reserved))) 4718 4682 return -EOPNOTSUPP; 4719 4683 4684 + if (ucmd.comp_mask & MLX5_IB_MODIFY_QP_OOO_DP) { 4685 + if (!get_dp_ooo_cap(dev->mdev, qp->type)) 4686 + return -EOPNOTSUPP; 4687 + qp->is_ooo_rq = 1; 4688 + } 4720 4689 } 4721 4690 4722 4691 if (qp->type == IB_QPT_GSI)
+1
drivers/infiniband/sw/rxe/rxe_qp.c
··· 775 775 * Yield the processor 776 776 */ 777 777 spin_lock_irqsave(&qp->state_lock, flags); 778 + attr->cur_qp_state = qp_state(qp); 778 779 if (qp->attr.sq_draining) { 779 780 spin_unlock_irqrestore(&qp->state_lock, flags); 780 781 cond_resched();
+4 -2
drivers/infiniband/sw/rxe/rxe_req.c
··· 663 663 if (unlikely(qp_state(qp) == IB_QPS_ERR)) { 664 664 wqe = __req_next_wqe(qp); 665 665 spin_unlock_irqrestore(&qp->state_lock, flags); 666 - if (wqe) 666 + if (wqe) { 667 + wqe->status = IB_WC_WR_FLUSH_ERR; 667 668 goto err; 668 - else 669 + } else { 669 670 goto exit; 671 + } 670 672 } 671 673 672 674 if (unlikely(qp_state(qp) == IB_QPS_RESET)) {
+3 -6
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
··· 128 128 static void ipoib_get_strings(struct net_device __always_unused *dev, 129 129 u32 stringset, u8 *data) 130 130 { 131 - u8 *p = data; 132 131 int i; 133 132 134 133 switch (stringset) { 135 134 case ETH_SS_STATS: 136 - for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) { 137 - memcpy(p, ipoib_gstrings_stats[i].stat_string, 138 - ETH_GSTRING_LEN); 139 - p += ETH_GSTRING_LEN; 140 - } 135 + for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) 136 + ethtool_puts(&data, 137 + ipoib_gstrings_stats[i].stat_string); 141 138 break; 142 139 default: 143 140 break;
+2 -1
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 49 49 #include <linux/jhash.h> 50 50 #include <net/arp.h> 51 51 #include <net/addrconf.h> 52 + #include <net/pkt_sched.h> 52 53 #include <linux/inetdevice.h> 53 54 #include <rdma/ib_cache.h> 54 55 ··· 2146 2145 dev->hard_header_len = IPOIB_HARD_LEN; 2147 2146 dev->addr_len = INFINIBAND_ALEN; 2148 2147 dev->type = ARPHRD_INFINIBAND; 2149 - dev->tx_queue_len = ipoib_sendq_size * 2; 2148 + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; 2150 2149 dev->features = (NETIF_F_VLAN_CHALLENGED | 2151 2150 NETIF_F_HIGHDMA); 2152 2151 netif_keep_dst(dev);
+1 -3
drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
··· 164 164 return; 165 165 166 166 for (i = 0; i < VNIC_STATS_LEN; i++) 167 - memcpy(data + i * ETH_GSTRING_LEN, 168 - vnic_gstrings_stats[i].stat_string, 169 - ETH_GSTRING_LEN); 167 + ethtool_puts(&data, vnic_gstrings_stats[i].stat_string); 170 168 } 171 169 172 170 /* ethtool ops */
+6
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 8256 8256 if (flags & FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED) 8257 8257 bp->fw_cap |= BNXT_FW_CAP_RING_MONITOR; 8258 8258 8259 + if (flags & FUNC_QCFG_RESP_FLAGS_ENABLE_RDMA_SRIOV) 8260 + bp->fw_cap |= BNXT_FW_CAP_ENABLE_RDMA_SRIOV; 8261 + 8259 8262 switch (resp->port_partition_type) { 8260 8263 case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0: 8261 8264 case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5: ··· 9425 9422 bp->flags |= BNXT_FLAG_UDP_GSO_CAP; 9426 9423 if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_TX_PKT_TS_CMPL_SUPPORTED) 9427 9424 bp->fw_cap |= BNXT_FW_CAP_TX_TS_CMP; 9425 + if (BNXT_PF(bp) && 9426 + (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_ROCE_VF_RESOURCE_MGMT_SUPPORTED)) 9427 + bp->fw_cap |= BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED; 9428 9428 9429 9429 bp->tx_push_thresh = 0; 9430 9430 if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) &&
+6
drivers/net/ethernet/broadcom/bnxt/bnxt.h
··· 2446 2446 #define BNXT_FW_CAP_DCBX_AGENT BIT_ULL(2) 2447 2447 #define BNXT_FW_CAP_NEW_RM BIT_ULL(3) 2448 2448 #define BNXT_FW_CAP_IF_CHANGE BIT_ULL(4) 2449 + #define BNXT_FW_CAP_ENABLE_RDMA_SRIOV BIT_ULL(5) 2450 + #define BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED BIT_ULL(6) 2449 2451 #define BNXT_FW_CAP_KONG_MB_CHNL BIT_ULL(7) 2450 2452 #define BNXT_FW_CAP_OVS_64BIT_HANDLE BIT_ULL(10) 2451 2453 #define BNXT_FW_CAP_TRUSTED_VF BIT_ULL(11) ··· 2494 2492 #define BNXT_SUPPORTS_QUEUE_API(bp) \ 2495 2493 (BNXT_PF(bp) && BNXT_SUPPORTS_NTUPLE_VNIC(bp) && \ 2496 2494 ((bp)->fw_cap & BNXT_FW_CAP_VNIC_RE_FLUSH)) 2495 + #define BNXT_RDMA_SRIOV_EN(bp) \ 2496 + ((bp)->fw_cap & BNXT_FW_CAP_ENABLE_RDMA_SRIOV) 2497 + #define BNXT_ROCE_VF_RESC_CAP(bp) \ 2498 + ((bp)->fw_cap & BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED) 2497 2499 2498 2500 u32 hwrm_spec_code; 2499 2501 u16 hwrm_cmd_seq;
+53
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
··· 520 520 return hwrm_req_send(bp, req); 521 521 } 522 522 523 + static void bnxt_hwrm_roce_sriov_cfg(struct bnxt *bp, int num_vfs) 524 + { 525 + struct hwrm_func_qcaps_output *resp; 526 + struct hwrm_func_cfg_input *cfg_req; 527 + struct hwrm_func_qcaps_input *req; 528 + int rc; 529 + 530 + rc = hwrm_req_init(bp, req, HWRM_FUNC_QCAPS); 531 + if (rc) 532 + return; 533 + 534 + req->fid = cpu_to_le16(0xffff); 535 + resp = hwrm_req_hold(bp, req); 536 + rc = hwrm_req_send(bp, req); 537 + if (rc) 538 + goto err; 539 + 540 + rc = hwrm_req_init(bp, cfg_req, HWRM_FUNC_CFG); 541 + if (rc) 542 + goto err; 543 + 544 + cfg_req->fid = cpu_to_le16(0xffff); 545 + cfg_req->enables2 = 546 + cpu_to_le32(FUNC_CFG_REQ_ENABLES2_ROCE_MAX_AV_PER_VF | 547 + FUNC_CFG_REQ_ENABLES2_ROCE_MAX_CQ_PER_VF | 548 + FUNC_CFG_REQ_ENABLES2_ROCE_MAX_MRW_PER_VF | 549 + FUNC_CFG_REQ_ENABLES2_ROCE_MAX_QP_PER_VF | 550 + FUNC_CFG_REQ_ENABLES2_ROCE_MAX_SRQ_PER_VF | 551 + FUNC_CFG_REQ_ENABLES2_ROCE_MAX_GID_PER_VF); 552 + cfg_req->roce_max_av_per_vf = 553 + cpu_to_le32(le32_to_cpu(resp->roce_vf_max_av) / num_vfs); 554 + cfg_req->roce_max_cq_per_vf = 555 + cpu_to_le32(le32_to_cpu(resp->roce_vf_max_cq) / num_vfs); 556 + cfg_req->roce_max_mrw_per_vf = 557 + cpu_to_le32(le32_to_cpu(resp->roce_vf_max_mrw) / num_vfs); 558 + cfg_req->roce_max_qp_per_vf = 559 + cpu_to_le32(le32_to_cpu(resp->roce_vf_max_qp) / num_vfs); 560 + cfg_req->roce_max_srq_per_vf = 561 + cpu_to_le32(le32_to_cpu(resp->roce_vf_max_srq) / num_vfs); 562 + cfg_req->roce_max_gid_per_vf = 563 + cpu_to_le32(le32_to_cpu(resp->roce_vf_max_gid) / num_vfs); 564 + 565 + rc = hwrm_req_send(bp, cfg_req); 566 + 567 + err: 568 + hwrm_req_drop(bp, req); 569 + if (rc) 570 + netdev_err(bp->dev, "RoCE sriov configuration failed\n"); 571 + } 572 + 523 573 /* Only called by PF to reserve resources for VFs, returns actual number of 524 574 * VFs configured, or < 0 on error. 525 575 */ ··· 808 758 rc); 809 759 *num_vfs = rc; 810 760 } 761 + 762 + if (BNXT_RDMA_SRIOV_EN(bp) && BNXT_ROCE_VF_RESC_CAP(bp)) 763 + bnxt_hwrm_roce_sriov_cfg(bp, *num_vfs); 811 764 812 765 return 0; 813 766 }
+2
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
··· 414 414 edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; 415 415 if (bp->flags & BNXT_FLAG_VF) 416 416 edev->flags |= BNXT_EN_FLAG_VF; 417 + if (BNXT_ROCE_VF_RESC_CAP(bp)) 418 + edev->flags |= BNXT_EN_FLAG_ROCE_VF_RES_MGMT; 417 419 418 420 edev->chip_num = bp->chip_num; 419 421 edev->hw_ring_stats_size = bp->hw_ring_stats_size;
+1
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
··· 64 64 #define BNXT_EN_FLAG_ULP_STOPPED 0x8 65 65 #define BNXT_EN_FLAG_VF 0x10 66 66 #define BNXT_EN_VF(edev) ((edev)->flags & BNXT_EN_FLAG_VF) 67 + #define BNXT_EN_FLAG_ROCE_VF_RES_MGMT 0x20 67 68 68 69 struct bnxt_ulp *ulp_tbl; 69 70 int l2_db_size; /* Doorbell BAR size in
+12
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
··· 516 516 blocking_notifier_call_chain(&dev0->priv.lag_nh, 517 517 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, 518 518 ndev); 519 + dev_put(ndev); 519 520 } 520 521 } 521 522 ··· 919 918 { 920 919 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 921 920 struct lag_tracker tracker = { }; 921 + struct net_device *ndev; 922 922 bool do_bond, roce_lag; 923 923 int err; 924 924 int i; ··· 982 980 mlx5_core_err(dev0, "Failed to enable lag\n"); 983 981 return; 984 982 } 983 + } 984 + if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 985 + ndev = mlx5_lag_active_backup_get_netdev(dev0); 986 + /** Only sriov and roce lag should have tracker->TX_type 987 + * set so no need to check the mode 988 + */ 989 + blocking_notifier_call_chain(&dev0->priv.lag_nh, 990 + MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, 991 + ndev); 992 + dev_put(ndev); 985 993 } 986 994 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 987 995 mlx5_modify_lag(ldev, &tracker);
+17 -6
include/linux/mlx5/mlx5_ifc.h
··· 1872 1872 u8 reserved_at_328[0x2]; 1873 1873 u8 relaxed_ordering_read[0x1]; 1874 1874 u8 log_max_pd[0x5]; 1875 - u8 reserved_at_330[0x5]; 1875 + u8 dp_ordering_ooo_all_ud[0x1]; 1876 + u8 dp_ordering_ooo_all_uc[0x1]; 1877 + u8 dp_ordering_ooo_all_xrc[0x1]; 1878 + u8 dp_ordering_ooo_all_dc[0x1]; 1879 + u8 dp_ordering_ooo_all_rc[0x1]; 1876 1880 u8 pcie_reset_using_hotreset_method[0x1]; 1877 1881 u8 pci_sync_for_fw_update_with_driver_unload[0x1]; 1878 1882 u8 vnic_env_cnt_steering_fail[0x1]; ··· 2098 2094 u8 reserved_at_0[0x80]; 2099 2095 2100 2096 u8 migratable[0x1]; 2101 - u8 reserved_at_81[0x11]; 2097 + u8 reserved_at_81[0x7]; 2098 + u8 dp_ordering_force[0x1]; 2099 + u8 reserved_at_89[0x9]; 2102 2100 u8 query_vuid[0x1]; 2103 2101 u8 reserved_at_93[0x5]; 2104 2102 u8 umr_log_entity_size_5[0x1]; ··· 3530 3524 u8 latency_sensitive[0x1]; 3531 3525 u8 reserved_at_24[0x1]; 3532 3526 u8 drain_sigerr[0x1]; 3533 - u8 reserved_at_26[0x2]; 3527 + u8 reserved_at_26[0x1]; 3528 + u8 dp_ordering_force[0x1]; 3534 3529 u8 pd[0x18]; 3535 3530 3536 3531 u8 mtu[0x3]; ··· 3604 3597 u8 rae[0x1]; 3605 3598 u8 reserved_at_493[0x1]; 3606 3599 u8 page_offset[0x6]; 3607 - u8 reserved_at_49a[0x3]; 3600 + u8 reserved_at_49a[0x2]; 3601 + u8 dp_ordering_1[0x1]; 3608 3602 u8 cd_slave_receive[0x1]; 3609 3603 u8 cd_slave_send[0x1]; 3610 3604 u8 cd_master[0x1]; ··· 4551 4543 u8 state[0x4]; 4552 4544 u8 reserved_at_8[0x18]; 4553 4545 4554 - u8 reserved_at_20[0x8]; 4546 + u8 reserved_at_20[0x7]; 4547 + u8 dp_ordering_force[0x1]; 4555 4548 u8 user_index[0x18]; 4556 4549 4557 4550 u8 reserved_at_40[0x8]; ··· 4567 4558 u8 latency_sensitive[0x1]; 4568 4559 u8 rlky[0x1]; 4569 4560 u8 free_ar[0x1]; 4570 - u8 reserved_at_73[0xd]; 4561 + u8 reserved_at_73[0x1]; 4562 + u8 dp_ordering_1[0x1]; 4563 + u8 reserved_at_75[0xb]; 4571 4564 4572 4565 u8 reserved_at_80[0x8]; 4573 4566 u8 cs_res[0x8];
+17
include/rdma/ib_verbs.h
··· 2675 2675 */ 2676 2676 void (*del_sub_dev)(struct ib_device *sub_dev); 2677 2677 2678 + /** 2679 + * ufile_cleanup - Attempt to cleanup ubojects HW resources inside 2680 + * the ufile. 2681 + */ 2682 + void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile); 2683 + 2678 2684 DECLARE_RDMA_OBJ_SIZE(ib_ah); 2679 2685 DECLARE_RDMA_OBJ_SIZE(ib_counters); 2680 2686 DECLARE_RDMA_OBJ_SIZE(ib_cq); ··· 2953 2947 struct rdma_user_mmap_entry *entry, 2954 2948 size_t length, u32 min_pgoff, 2955 2949 u32 max_pgoff); 2950 + 2951 + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 2952 + void rdma_user_mmap_disassociate(struct ib_device *device); 2953 + #else 2954 + static inline void rdma_user_mmap_disassociate(struct ib_device *device) 2955 + { 2956 + } 2957 + #endif 2956 2958 2957 2959 static inline int 2958 2960 rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, ··· 4740 4726 * @device: the rdma device 4741 4727 */ 4742 4728 void rdma_roce_rescan_device(struct ib_device *ibdev); 4729 + void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port); 4730 + void roce_del_all_netdev_gids(struct ib_device *ib_dev, 4731 + u32 port, struct net_device *ndev); 4743 4732 4744 4733 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); 4745 4734
+33
include/rdma/uverbs_types.h
··· 134 134 } 135 135 void uverbs_uobject_put(struct ib_uobject *uobject); 136 136 137 + int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode); 138 + 137 139 struct uverbs_obj_fd_type { 138 140 /* 139 141 * In fd based objects, uverbs_obj_type_ops points to generic ··· 150 148 const struct file_operations *fops; 151 149 const char *name; 152 150 int flags; 151 + }; 152 + 153 + struct ib_uverbs_file { 154 + struct kref ref; 155 + struct ib_uverbs_device *device; 156 + struct mutex ucontext_lock; 157 + /* 158 + * ucontext must be accessed via ib_uverbs_get_ucontext() or with 159 + * ucontext_lock held 160 + */ 161 + struct ib_ucontext *ucontext; 162 + struct ib_uverbs_async_event_file *default_async_file; 163 + struct list_head list; 164 + 165 + /* 166 + * To access the uobjects list hw_destroy_rwsem must be held for write 167 + * OR hw_destroy_rwsem held for read AND uobjects_lock held. 168 + * hw_destroy_rwsem should be called across any destruction of the HW 169 + * object of an associated uobject. 170 + */ 171 + struct rw_semaphore hw_destroy_rwsem; 172 + spinlock_t uobjects_lock; 173 + struct list_head uobjects; 174 + 175 + struct mutex umap_lock; 176 + struct list_head umaps; 177 + struct page *disassociate_page; 178 + 179 + struct xarray idr; 180 + 181 + struct mutex disassociation_lock; 153 182 }; 154 183 155 184 extern const struct uverbs_obj_type_class uverbs_idr_class;
+2 -1
include/uapi/rdma/efa-abi.h
··· 95 95 __u32 sq_ring_size; /* bytes */ 96 96 __u32 driver_qp_type; 97 97 __u16 flags; 98 - __u8 reserved_90[6]; 98 + __u8 sl; 99 + __u8 reserved_98[5]; 99 100 }; 100 101 101 102 struct efa_ibv_create_qp_resp {
+5
include/uapi/rdma/mlx5-abi.h
··· 252 252 MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, 253 253 MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, 254 254 MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3, 255 + MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP = 1 << 4, 255 256 }; 256 257 257 258 enum mlx5_ib_tunnel_offloads { ··· 438 437 __u32 max_burst_sz; 439 438 __u16 typical_pkt_sz; 440 439 __u16 reserved; 440 + }; 441 + 442 + enum mlx5_ib_modify_qp_mask { 443 + MLX5_IB_MODIFY_QP_OOO_DP = 1 << 0, 441 444 }; 442 445 443 446 struct mlx5_ib_modify_qp {
+2
include/uapi/rdma/rdma_netlink.h
··· 638 638 RDMA_UNREGISTER_EVENT, 639 639 RDMA_NETDEV_ATTACH_EVENT, 640 640 RDMA_NETDEV_DETACH_EVENT, 641 + RDMA_RENAME_EVENT, 642 + RDMA_NETDEV_RENAME_EVENT, 641 643 }; 642 644 643 645 #endif /* _UAPI_RDMA_NETLINK_H */