Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
"A fairly modest set of bug fixes, nothing abnormal from the merge
window

The ucma patch is a bit on the larger side, but given the regression
was recently added I've opted to forward it to the rc stream.

- Fix a ucma memory leak introduced in v5.9 while fixing the
Syzkaller bugs

- Don't fail when the xarray wraps for user verbs objects

- User triggerable oops regression from the umem page size rework

- Error unwind bugs in usnic, ocrdma, mlx5 and cma"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
RDMA/cma: Fix error flow in default_roce_mode_store
RDMA/mlx5: Fix wrong free of blue flame register on error
IB/mlx5: Fix error unwinding when set_has_smi_cap fails
RDMA/umem: Avoid undefined behavior of rounddown_pow_of_two()
RDMA/ocrdma: Fix use after free in ocrdma_dealloc_ucontext_pd()
RDMA/usnic: Fix memleak in find_free_vf_and_create_qp_grp
RDMA/restrack: Don't treat as an error allocation ID wrapping
RDMA/ucma: Do not miss ctx destruction steps in some cases

+84 -69
+3 -1
drivers/infiniband/core/cma_configfs.c
··· 131 131 return ret; 132 132 133 133 gid_type = ib_cache_gid_parse_type_str(buf); 134 - if (gid_type < 0) 134 + if (gid_type < 0) { 135 + cma_configfs_params_put(cma_dev); 135 136 return -EINVAL; 137 + } 136 138 137 139 ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type); 138 140
+1
drivers/infiniband/core/restrack.c
··· 254 254 } else { 255 255 ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b, 256 256 &rt->next_id, GFP_KERNEL); 257 + ret = (ret < 0) ? ret : 0; 257 258 } 258 259 259 260 out:
+73 -64
drivers/infiniband/core/ucma.c
··· 95 95 u64 uid; 96 96 97 97 struct list_head list; 98 - /* sync between removal event and id destroy, protected by file mut */ 99 - int destroying; 100 98 struct work_struct close_work; 101 99 }; 102 100 ··· 120 122 static DEFINE_XARRAY_ALLOC(multicast_table); 121 123 122 124 static const struct file_operations ucma_fops; 123 - static int __destroy_id(struct ucma_context *ctx); 125 + static int ucma_destroy_private_ctx(struct ucma_context *ctx); 124 126 125 127 static inline struct ucma_context *_ucma_find_context(int id, 126 128 struct ucma_file *file) ··· 177 179 178 180 /* once all inflight tasks are finished, we close all underlying 179 181 * resources. The context is still alive till its explicit destryoing 180 - * by its creator. 182 + * by its creator. This puts back the xarray's reference. 181 183 */ 182 184 ucma_put_ctx(ctx); 183 185 wait_for_completion(&ctx->comp); 184 186 /* No new events will be generated after destroying the id. */ 185 187 rdma_destroy_id(ctx->cm_id); 186 188 187 - /* 188 - * At this point ctx->ref is zero so the only place the ctx can be is in 189 - * a uevent or in __destroy_id(). Since the former doesn't touch 190 - * ctx->cm_id and the latter sync cancels this, there is no races with 191 - * this store. 192 - */ 189 + /* Reading the cm_id without holding a positive ref is not allowed */ 193 190 ctx->cm_id = NULL; 194 191 } 195 192 ··· 197 204 return NULL; 198 205 199 206 INIT_WORK(&ctx->close_work, ucma_close_id); 200 - refcount_set(&ctx->ref, 1); 201 207 init_completion(&ctx->comp); 202 208 /* So list_del() will work if we don't do ucma_finish_ctx() */ 203 209 INIT_LIST_HEAD(&ctx->list); ··· 208 216 return NULL; 209 217 } 210 218 return ctx; 219 + } 220 + 221 + static void ucma_set_ctx_cm_id(struct ucma_context *ctx, 222 + struct rdma_cm_id *cm_id) 223 + { 224 + refcount_set(&ctx->ref, 1); 225 + ctx->cm_id = cm_id; 211 226 } 212 227 213 228 static void ucma_finish_ctx(struct ucma_context *ctx) ··· 302 303 ctx = ucma_alloc_ctx(listen_ctx->file); 303 304 if (!ctx) 304 305 goto err_backlog; 305 - ctx->cm_id = cm_id; 306 + ucma_set_ctx_cm_id(ctx, cm_id); 306 307 307 308 uevent = ucma_create_uevent(listen_ctx, event); 308 309 if (!uevent) ··· 320 321 return 0; 321 322 322 323 err_alloc: 323 - xa_erase(&ctx_table, ctx->id); 324 - kfree(ctx); 324 + ucma_destroy_private_ctx(ctx); 325 325 err_backlog: 326 326 atomic_inc(&listen_ctx->backlog); 327 327 /* Returning error causes the new ID to be destroyed */ ··· 354 356 wake_up_interruptible(&ctx->file->poll_wait); 355 357 } 356 358 357 - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying) 358 - queue_work(system_unbound_wq, &ctx->close_work); 359 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { 360 + xa_lock(&ctx_table); 361 + if (xa_load(&ctx_table, ctx->id) == ctx) 362 + queue_work(system_unbound_wq, &ctx->close_work); 363 + xa_unlock(&ctx_table); 364 + } 359 365 return 0; 360 366 } 361 367 ··· 463 461 ret = PTR_ERR(cm_id); 464 462 goto err1; 465 463 } 466 - ctx->cm_id = cm_id; 464 + ucma_set_ctx_cm_id(ctx, cm_id); 467 465 468 466 resp.id = ctx->id; 469 467 if (copy_to_user(u64_to_user_ptr(cmd.response), 470 468 &resp, sizeof(resp))) { 471 - xa_erase(&ctx_table, ctx->id); 472 - __destroy_id(ctx); 469 + ucma_destroy_private_ctx(ctx); 473 470 return -EFAULT; 474 471 } 475 472 ··· 478 477 return 0; 479 478 480 479 err1: 481 - xa_erase(&ctx_table, ctx->id); 482 - kfree(ctx); 480 + ucma_destroy_private_ctx(ctx); 483 481 return ret; 484 482 } 485 483 ··· 516 516 rdma_unlock_handler(mc->ctx->cm_id); 517 517 } 518 518 519 - /* 520 - * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At 521 - * this point, no new events will be reported from the hardware. However, we 522 - * still need to cleanup the UCMA context for this ID. Specifically, there 523 - * might be events that have not yet been consumed by the user space software. 524 - * mutex. After that we release them as needed. 525 - */ 526 - static int ucma_free_ctx(struct ucma_context *ctx) 519 + static int ucma_cleanup_ctx_events(struct ucma_context *ctx) 527 520 { 528 521 int events_reported; 529 522 struct ucma_event *uevent, *tmp; 530 523 LIST_HEAD(list); 531 524 532 - ucma_cleanup_multicast(ctx); 533 - 534 - /* Cleanup events not yet reported to the user. */ 525 + /* Cleanup events not yet reported to the user.*/ 535 526 mutex_lock(&ctx->file->mut); 536 527 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 537 - if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx) 528 + if (uevent->ctx != ctx) 529 + continue; 530 + 531 + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && 532 + xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, 533 + uevent->conn_req_ctx, XA_ZERO_ENTRY, 534 + GFP_KERNEL) == uevent->conn_req_ctx) { 538 535 list_move_tail(&uevent->list, &list); 536 + continue; 537 + } 538 + list_del(&uevent->list); 539 + kfree(uevent); 539 540 } 540 541 list_del(&ctx->list); 541 542 events_reported = ctx->events_reported; 542 543 mutex_unlock(&ctx->file->mut); 543 544 544 545 /* 545 - * If this was a listening ID then any connections spawned from it 546 - * that have not been delivered to userspace are cleaned up too. 547 - * Must be done outside any locks. 546 + * If this was a listening ID then any connections spawned from it that 547 + * have not been delivered to userspace are cleaned up too. Must be done 548 + * outside any locks. 548 549 */ 549 550 list_for_each_entry_safe(uevent, tmp, &list, list) { 550 - list_del(&uevent->list); 551 - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && 552 - uevent->conn_req_ctx != ctx) 553 - __destroy_id(uevent->conn_req_ctx); 551 + ucma_destroy_private_ctx(uevent->conn_req_ctx); 554 552 kfree(uevent); 555 553 } 556 - 557 - mutex_destroy(&ctx->mutex); 558 - kfree(ctx); 559 554 return events_reported; 560 555 } 561 556 562 - static int __destroy_id(struct ucma_context *ctx) 557 + /* 558 + * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie 559 + * the ctx is not public to the user). This either because: 560 + * - ucma_finish_ctx() hasn't been called 561 + * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) 562 + */ 563 + static int ucma_destroy_private_ctx(struct ucma_context *ctx) 563 564 { 564 - /* 565 - * If the refcount is already 0 then ucma_close_id() has already 566 - * destroyed the cm_id, otherwise holding the refcount keeps cm_id 567 - * valid. Prevent queue_work() from being called. 568 - */ 569 - if (refcount_inc_not_zero(&ctx->ref)) { 570 - rdma_lock_handler(ctx->cm_id); 571 - ctx->destroying = 1; 572 - rdma_unlock_handler(ctx->cm_id); 573 - ucma_put_ctx(ctx); 574 - } 565 + int events_reported; 575 566 567 + /* 568 + * Destroy the underlying cm_id. New work queuing is prevented now by 569 + * the removal from the xarray. Once the work is cancled ref will either 570 + * be 0 because the work ran to completion and consumed the ref from the 571 + * xarray, or it will be positive because we still have the ref from the 572 + * xarray. This can also be 0 in cases where cm_id was never set 573 + */ 576 574 cancel_work_sync(&ctx->close_work); 577 - /* At this point it's guaranteed that there is no inflight closing task */ 578 - if (ctx->cm_id) 575 + if (refcount_read(&ctx->ref)) 579 576 ucma_close_id(&ctx->close_work); 580 - return ucma_free_ctx(ctx); 577 + 578 + events_reported = ucma_cleanup_ctx_events(ctx); 579 + ucma_cleanup_multicast(ctx); 580 + 581 + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, 582 + GFP_KERNEL) != NULL); 583 + mutex_destroy(&ctx->mutex); 584 + kfree(ctx); 585 + return events_reported; 581 586 } 582 587 583 588 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, ··· 601 596 602 597 xa_lock(&ctx_table); 603 598 ctx = _ucma_find_context(cmd.id, file); 604 - if (!IS_ERR(ctx)) 605 - __xa_erase(&ctx_table, ctx->id); 599 + if (!IS_ERR(ctx)) { 600 + if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 601 + GFP_KERNEL) != ctx) 602 + ctx = ERR_PTR(-ENOENT); 603 + } 606 604 xa_unlock(&ctx_table); 607 605 608 606 if (IS_ERR(ctx)) 609 607 return PTR_ERR(ctx); 610 608 611 - resp.events_reported = __destroy_id(ctx); 609 + resp.events_reported = ucma_destroy_private_ctx(ctx); 612 610 if (copy_to_user(u64_to_user_ptr(cmd.response), 613 611 &resp, sizeof(resp))) 614 612 ret = -EFAULT; ··· 1785 1777 * prevented by this being a FD release function. The list_add_tail() in 1786 1778 * ucma_connect_event_handler() can run concurrently, however it only 1787 1779 * adds to the list *after* a listening ID. By only reading the first of 1788 - * the list, and relying on __destroy_id() to block 1780 + * the list, and relying on ucma_destroy_private_ctx() to block 1789 1781 * ucma_connect_event_handler(), no additional locking is needed. 1790 1782 */ 1791 1783 while (!list_empty(&file->ctx_list)) { 1792 1784 struct ucma_context *ctx = list_first_entry( 1793 1785 &file->ctx_list, struct ucma_context, list); 1794 1786 1795 - xa_erase(&ctx_table, ctx->id); 1796 - __destroy_id(ctx); 1787 + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, 1788 + GFP_KERNEL) != ctx); 1789 + ucma_destroy_private_ctx(ctx); 1797 1790 } 1798 1791 kfree(file); 1799 1792 return 0;
+1 -1
drivers/infiniband/core/umem.c
··· 135 135 */ 136 136 if (mask) 137 137 pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); 138 - return rounddown_pow_of_two(pgsz_bitmap); 138 + return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0; 139 139 } 140 140 EXPORT_SYMBOL(ib_umem_find_best_pgsz); 141 141
+2 -2
drivers/infiniband/hw/mlx5/main.c
··· 3956 3956 3957 3957 err = set_has_smi_cap(dev); 3958 3958 if (err) 3959 - return err; 3959 + goto err_mp; 3960 3960 3961 3961 if (!mlx5_core_mp_enabled(mdev)) { 3962 3962 for (i = 1; i <= dev->num_ports; i++) { ··· 4319 4319 4320 4320 err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); 4321 4321 if (err) 4322 - mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 4322 + mlx5_free_bfreg(dev->mdev, &dev->bfreg); 4323 4323 4324 4324 return err; 4325 4325 }
+1 -1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 434 434 pr_err("%s(%d) Freeing in use pdid=0x%x.\n", 435 435 __func__, dev->id, pd->id); 436 436 } 437 - kfree(uctx->cntxt_pd); 438 437 uctx->cntxt_pd = NULL; 439 438 _ocrdma_dealloc_pd(dev, pd); 439 + kfree(pd); 440 440 } 441 441 442 442 static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
+3
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
··· 214 214 215 215 } 216 216 usnic_uiom_free_dev_list(dev_list); 217 + dev_list = NULL; 217 218 } 218 219 219 220 /* Try to find resources on an unused vf */ ··· 240 239 qp_grp_check: 241 240 if (IS_ERR_OR_NULL(qp_grp)) { 242 241 usnic_err("Failed to allocate qp_grp\n"); 242 + if (usnic_ib_share_vf) 243 + usnic_uiom_free_dev_list(dev_list); 243 244 return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); 244 245 } 245 246 return qp_grp;