Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-6.12-20241018' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

- NVMe pull request via Keith:
- Fix target passthrough identifier (Nilay)
- Fix tcp locking (Hannes)
- Replace list with sbitmap for tracking RDMA rsp tags (Guixen)
- Remove unnecessary fallthrough statements (Tokunori)
- Remove ready-without-media support (Greg)
- Fix multipath partition scan deadlock (Keith)
- Fix concurrent PCI reset and remove queue mapping (Maurizio)
- Fabrics shutdown fixes (Nilay)

- Fix for a kerneldoc warning (Keith)

- Fix a race with blk-rq-qos and wakeups (Omar)

- Cleanup of checking for always-set tag_set (SurajSonawane2415)

- Fix for a crash with CPU hotplug notifiers (Ming)

- Don't allow zero-copy ublk on unprivileged device (Ming)

- Use array_index_nospec() for CDROM (Josh)

- Remove dead code in drbd (David)

- Tweaks to elevator loading (Breno)

* tag 'block-6.12-20241018' of git://git.kernel.dk/linux:
cdrom: Avoid barrier_nospec() in cdrom_ioctl_media_changed()
nvme: use helper nvme_ctrl_state in nvme_keep_alive_finish function
nvme: make keep-alive synchronous operation
nvme-loop: flush off pending I/O while shutting down loop controller
nvme-pci: fix race condition between reset and nvme_dev_disable()
ublk: don't allow user copy for unprivileged device
blk-rq-qos: fix crash on rq_qos_wait vs. rq_qos_wake_function race
nvme-multipath: defer partition scanning
blk-mq: setup queue ->tag_set before initializing hctx
elevator: Remove argument from elevator_find_get
elevator: do not request_module if elevator exists
drbd: Remove unused conn_lowest_minor
nvme: disable CC.CRIME (NVME_CC_CRIME)
nvme: delete unnecessary fallthru comment
nvmet-rdma: use sbitmap to replace rsp free list
block: Fix elevator_get_default() checking for NULL q->tag_set
nvme: tcp: avoid race between queue_lock lock and destroy
nvmet-passthru: clear EUID/NGUID/UUID while using loop target
block: fix blk_rq_map_integrity_sg kernel-doc

+152 -98
+6 -2
block/blk-mq.c
··· 4310 4310 /* mark the queue as mq asap */ 4311 4311 q->mq_ops = set->ops; 4312 4312 4313 + /* 4314 + * ->tag_set has to be setup before initialize hctx, which cpuphp 4315 + * handler needs it for checking queue mapping 4316 + */ 4317 + q->tag_set = set; 4318 + 4313 4319 if (blk_mq_alloc_ctxs(q)) 4314 4320 goto err_exit; 4315 4321 ··· 4333 4327 4334 4328 INIT_WORK(&q->timeout_work, blk_mq_timeout_work); 4335 4329 blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); 4336 - 4337 - q->tag_set = set; 4338 4330 4339 4331 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 4340 4332
+1 -1
block/blk-rq-qos.c
··· 219 219 220 220 data->got_token = true; 221 221 smp_wmb(); 222 - list_del_init(&curr->entry); 223 222 wake_up_process(data->task); 223 + list_del_init_careful(&curr->entry); 224 224 return 1; 225 225 } 226 226
+14 -7
block/elevator.c
··· 106 106 return NULL; 107 107 } 108 108 109 - static struct elevator_type *elevator_find_get(struct request_queue *q, 110 - const char *name) 109 + static struct elevator_type *elevator_find_get(const char *name) 111 110 { 112 111 struct elevator_type *e; 113 112 ··· 550 551 static inline bool elv_support_iosched(struct request_queue *q) 551 552 { 552 553 if (!queue_is_mq(q) || 553 - (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))) 554 + (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) 554 555 return false; 555 556 return true; 556 557 } ··· 561 562 */ 562 563 static struct elevator_type *elevator_get_default(struct request_queue *q) 563 564 { 564 - if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) 565 + if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) 565 566 return NULL; 566 567 567 568 if (q->nr_hw_queues != 1 && 568 569 !blk_mq_is_shared_tags(q->tag_set->flags)) 569 570 return NULL; 570 571 571 - return elevator_find_get(q, "mq-deadline"); 572 + return elevator_find_get("mq-deadline"); 572 573 } 573 574 574 575 /* ··· 696 697 if (q->elevator && elevator_match(q->elevator->type, elevator_name)) 697 698 return 0; 698 699 699 - e = elevator_find_get(q, elevator_name); 700 + e = elevator_find_get(elevator_name); 700 701 if (!e) 701 702 return -EINVAL; 702 703 ret = elevator_switch(q, e); ··· 708 709 size_t count) 709 710 { 710 711 char elevator_name[ELV_NAME_MAX]; 712 + struct elevator_type *found; 713 + const char *name; 711 714 712 715 if (!elv_support_iosched(disk->queue)) 713 716 return -EOPNOTSUPP; 714 717 715 718 strscpy(elevator_name, buf, sizeof(elevator_name)); 719 + name = strstrip(elevator_name); 716 720 717 - request_module("%s-iosched", strstrip(elevator_name)); 721 + spin_lock(&elv_list_lock); 722 + found = __elevator_find(name); 723 + spin_unlock(&elv_list_lock); 724 + 725 + if (!found) 726 + request_module("%s-iosched", name); 718 727 719 728 return 0; 720 729 }
-1
drivers/block/drbd/drbd_int.h
··· 1364 1364 1365 1365 extern struct mutex resources_mutex; 1366 1366 1367 - extern int conn_lowest_minor(struct drbd_connection *connection); 1368 1367 extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor); 1369 1368 extern void drbd_destroy_device(struct kref *kref); 1370 1369 extern void drbd_delete_device(struct drbd_device *device);
-14
drivers/block/drbd/drbd_main.c
··· 471 471 wait_for_completion(&thi->stop); 472 472 } 473 473 474 - int conn_lowest_minor(struct drbd_connection *connection) 475 - { 476 - struct drbd_peer_device *peer_device; 477 - int vnr = 0, minor = -1; 478 - 479 - rcu_read_lock(); 480 - peer_device = idr_get_next(&connection->peer_devices, &vnr); 481 - if (peer_device) 482 - minor = device_to_minor(peer_device->device); 483 - rcu_read_unlock(); 484 - 485 - return minor; 486 - } 487 - 488 474 #ifdef CONFIG_SMP 489 475 /* 490 476 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
+10 -1
drivers/block/ublk_drv.c
··· 2380 2380 * TODO: provide forward progress for RECOVERY handler, so that 2381 2381 * unprivileged device can benefit from it 2382 2382 */ 2383 - if (info.flags & UBLK_F_UNPRIVILEGED_DEV) 2383 + if (info.flags & UBLK_F_UNPRIVILEGED_DEV) { 2384 2384 info.flags &= ~(UBLK_F_USER_RECOVERY_REISSUE | 2385 2385 UBLK_F_USER_RECOVERY); 2386 + 2387 + /* 2388 + * For USER_COPY, we depends on userspace to fill request 2389 + * buffer by pwrite() to ublk char device, which can't be 2390 + * used for unprivileged device 2391 + */ 2392 + if (info.flags & UBLK_F_USER_COPY) 2393 + return -EINVAL; 2394 + } 2386 2395 2387 2396 /* the created device is always owned by current user */ 2388 2397 ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid);
+1 -1
drivers/cdrom/cdrom.c
··· 2313 2313 return -EINVAL; 2314 2314 2315 2315 /* Prevent arg from speculatively bypassing the length check */ 2316 - barrier_nospec(); 2316 + arg = array_index_nospec(arg, cdi->capacity); 2317 2317 2318 2318 info = kmalloc(sizeof(*info), GFP_KERNEL); 2319 2319 if (!info)
+17 -24
drivers/nvme/host/core.c
··· 1292 1292 queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); 1293 1293 } 1294 1294 1295 - static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, 1296 - blk_status_t status) 1295 + static void nvme_keep_alive_finish(struct request *rq, 1296 + blk_status_t status, struct nvme_ctrl *ctrl) 1297 1297 { 1298 - struct nvme_ctrl *ctrl = rq->end_io_data; 1299 - unsigned long flags; 1300 - bool startka = false; 1301 1298 unsigned long rtt = jiffies - (rq->deadline - rq->timeout); 1302 1299 unsigned long delay = nvme_keep_alive_work_period(ctrl); 1300 + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); 1303 1301 1304 1302 /* 1305 1303 * Subtract off the keepalive RTT so nvme_keep_alive_work runs ··· 1311 1313 delay = 0; 1312 1314 } 1313 1315 1314 - blk_mq_free_request(rq); 1315 - 1316 1316 if (status) { 1317 1317 dev_err(ctrl->device, 1318 1318 "failed nvme_keep_alive_end_io error=%d\n", 1319 1319 status); 1320 - return RQ_END_IO_NONE; 1320 + return; 1321 1321 } 1322 1322 1323 1323 ctrl->ka_last_check_time = jiffies; 1324 1324 ctrl->comp_seen = false; 1325 - spin_lock_irqsave(&ctrl->lock, flags); 1326 - if (ctrl->state == NVME_CTRL_LIVE || 1327 - ctrl->state == NVME_CTRL_CONNECTING) 1328 - startka = true; 1329 - spin_unlock_irqrestore(&ctrl->lock, flags); 1330 - if (startka) 1325 + if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING) 1331 1326 queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); 1332 - return RQ_END_IO_NONE; 1333 1327 } 1334 1328 1335 1329 static void nvme_keep_alive_work(struct work_struct *work) ··· 1330 1340 struct nvme_ctrl, ka_work); 1331 1341 bool comp_seen = ctrl->comp_seen; 1332 1342 struct request *rq; 1343 + blk_status_t status; 1333 1344 1334 1345 ctrl->ka_last_check_time = jiffies; 1335 1346 ··· 1353 1362 nvme_init_request(rq, &ctrl->ka_cmd); 1354 1363 1355 1364 rq->timeout = ctrl->kato * HZ; 1356 - rq->end_io = nvme_keep_alive_end_io; 1357 - rq->end_io_data = ctrl; 1358 - blk_execute_rq_nowait(rq, false); 1365 + status = blk_execute_rq(rq, false); 1366 + nvme_keep_alive_finish(rq, status, ctrl); 1367 + blk_mq_free_request(rq); 1359 1368 } 1360 1369 1361 1370 static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) ··· 2449 2458 else 2450 2459 ctrl->ctrl_config = NVME_CC_CSS_NVM; 2451 2460 2452 - if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS) 2453 - ctrl->ctrl_config |= NVME_CC_CRIME; 2461 + /* 2462 + * Setting CRIME results in CSTS.RDY before the media is ready. This 2463 + * makes it possible for media related commands to return the error 2464 + * NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Until the driver is 2465 + * restructured to handle retries, disable CC.CRIME. 2466 + */ 2467 + ctrl->ctrl_config &= ~NVME_CC_CRIME; 2454 2468 2455 2469 ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; 2456 2470 ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ··· 2485 2489 * devices are known to get this wrong. Use the larger of the 2486 2490 * two values. 2487 2491 */ 2488 - if (ctrl->ctrl_config & NVME_CC_CRIME) 2489 - ready_timeout = NVME_CRTO_CRIMT(crto); 2490 - else 2491 - ready_timeout = NVME_CRTO_CRWMT(crto); 2492 + ready_timeout = NVME_CRTO_CRWMT(crto); 2492 2493 2493 2494 if (ready_timeout < timeout) 2494 2495 dev_warn_once(ctrl->device, "bad crto:%x cap:%llx\n",
+33 -7
drivers/nvme/host/multipath.c
··· 431 431 case NVME_CTRL_LIVE: 432 432 case NVME_CTRL_RESETTING: 433 433 case NVME_CTRL_CONNECTING: 434 - /* fallthru */ 435 434 return true; 436 435 default: 437 436 break; ··· 579 580 return ret; 580 581 } 581 582 583 + static void nvme_partition_scan_work(struct work_struct *work) 584 + { 585 + struct nvme_ns_head *head = 586 + container_of(work, struct nvme_ns_head, partition_scan_work); 587 + 588 + if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN, 589 + &head->disk->state))) 590 + return; 591 + 592 + mutex_lock(&head->disk->open_mutex); 593 + bdev_disk_changed(head->disk, false); 594 + mutex_unlock(&head->disk->open_mutex); 595 + } 596 + 582 597 static void nvme_requeue_work(struct work_struct *work) 583 598 { 584 599 struct nvme_ns_head *head = ··· 619 606 bio_list_init(&head->requeue_list); 620 607 spin_lock_init(&head->requeue_lock); 621 608 INIT_WORK(&head->requeue_work, nvme_requeue_work); 609 + INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work); 622 610 623 611 /* 624 612 * Add a multipath node if the subsystems supports multiple controllers. ··· 643 629 return PTR_ERR(head->disk); 644 630 head->disk->fops = &nvme_ns_head_ops; 645 631 head->disk->private_data = head; 632 + 633 + /* 634 + * We need to suppress the partition scan from occuring within the 635 + * controller's scan_work context. If a path error occurs here, the IO 636 + * will wait until a path becomes available or all paths are torn down, 637 + * but that action also occurs within scan_work, so it would deadlock. 638 + * Defer the partion scan to a different context that does not block 639 + * scan_work. 640 + */ 641 + set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state); 646 642 sprintf(head->disk->disk_name, "nvme%dn%d", 647 643 ctrl->subsys->instance, head->instance); 648 644 return 0; ··· 679 655 return; 680 656 } 681 657 nvme_add_ns_head_cdev(head); 658 + kblockd_schedule_work(&head->partition_scan_work); 682 659 } 683 660 684 661 mutex_lock(&head->lock); ··· 999 974 return; 1000 975 if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { 1001 976 nvme_cdev_del(&head->cdev, &head->cdev_device); 977 + /* 978 + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared 979 + * to allow multipath to fail all I/O. 980 + */ 981 + synchronize_srcu(&head->srcu); 982 + kblockd_schedule_work(&head->requeue_work); 1002 983 del_gendisk(head->disk); 1003 984 } 1004 - /* 1005 - * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared 1006 - * to allow multipath to fail all I/O. 1007 - */ 1008 - synchronize_srcu(&head->srcu); 1009 - kblockd_schedule_work(&head->requeue_work); 1010 985 } 1011 986 1012 987 void nvme_mpath_remove_disk(struct nvme_ns_head *head) ··· 1016 991 /* make sure all pending bios are cleaned up */ 1017 992 kblockd_schedule_work(&head->requeue_work); 1018 993 flush_work(&head->requeue_work); 994 + flush_work(&head->partition_scan_work); 1019 995 put_disk(head->disk); 1020 996 } 1021 997
+1
drivers/nvme/host/nvme.h
··· 494 494 struct bio_list requeue_list; 495 495 spinlock_t requeue_lock; 496 496 struct work_struct requeue_work; 497 + struct work_struct partition_scan_work; 497 498 struct mutex lock; 498 499 unsigned long flags; 499 500 #define NVME_NSHEAD_DISK_LIVE 0
+16 -3
drivers/nvme/host/pci.c
··· 2506 2506 return 1; 2507 2507 } 2508 2508 2509 - static void nvme_pci_update_nr_queues(struct nvme_dev *dev) 2509 + static bool nvme_pci_update_nr_queues(struct nvme_dev *dev) 2510 2510 { 2511 2511 if (!dev->ctrl.tagset) { 2512 2512 nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops, 2513 2513 nvme_pci_nr_maps(dev), sizeof(struct nvme_iod)); 2514 - return; 2514 + return true; 2515 + } 2516 + 2517 + /* Give up if we are racing with nvme_dev_disable() */ 2518 + if (!mutex_trylock(&dev->shutdown_lock)) 2519 + return false; 2520 + 2521 + /* Check if nvme_dev_disable() has been executed already */ 2522 + if (!dev->online_queues) { 2523 + mutex_unlock(&dev->shutdown_lock); 2524 + return false; 2515 2525 } 2516 2526 2517 2527 blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); 2518 2528 /* free previously allocated queues that are no longer usable */ 2519 2529 nvme_free_queues(dev, dev->online_queues); 2530 + mutex_unlock(&dev->shutdown_lock); 2531 + return true; 2520 2532 } 2521 2533 2522 2534 static int nvme_pci_enable(struct nvme_dev *dev) ··· 2809 2797 nvme_dbbuf_set(dev); 2810 2798 nvme_unquiesce_io_queues(&dev->ctrl); 2811 2799 nvme_wait_freeze(&dev->ctrl); 2812 - nvme_pci_update_nr_queues(dev); 2800 + if (!nvme_pci_update_nr_queues(dev)) 2801 + goto out; 2813 2802 nvme_unfreeze(&dev->ctrl); 2814 2803 } else { 2815 2804 dev_warn(dev->ctrl.device, "IO queues lost\n");
+4 -3
drivers/nvme/host/tcp.c
··· 2644 2644 2645 2645 len = nvmf_get_address(ctrl, buf, size); 2646 2646 2647 + if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) 2648 + return len; 2649 + 2647 2650 mutex_lock(&queue->queue_lock); 2648 2651 2649 - if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) 2650 - goto done; 2651 2652 ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); 2652 2653 if (ret > 0) { 2653 2654 if (len > 0) ··· 2656 2655 len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", 2657 2656 (len) ? "," : "", &src_addr); 2658 2657 } 2659 - done: 2658 + 2660 2659 mutex_unlock(&queue->queue_lock); 2661 2660 2662 2661 return len;
+13
drivers/nvme/target/loop.c
··· 265 265 { 266 266 if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags)) 267 267 return; 268 + /* 269 + * It's possible that some requests might have been added 270 + * after admin queue is stopped/quiesced. So now start the 271 + * queue to flush these requests to the completion. 272 + */ 273 + nvme_unquiesce_admin_queue(&ctrl->ctrl); 274 + 268 275 nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); 269 276 nvme_remove_admin_tag_set(&ctrl->ctrl); 270 277 } ··· 304 297 nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); 305 298 } 306 299 ctrl->ctrl.queue_count = 1; 300 + /* 301 + * It's possible that some requests might have been added 302 + * after io queue is stopped/quiesced. So now start the 303 + * queue to flush these requests to the completion. 304 + */ 305 + nvme_unquiesce_io_queues(&ctrl->ctrl); 307 306 } 308 307 309 308 static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
+2 -4
drivers/nvme/target/passthru.c
··· 535 535 break; 536 536 case nvme_admin_identify: 537 537 switch (req->cmd->identify.cns) { 538 - case NVME_ID_CNS_CTRL: 539 - req->execute = nvmet_passthru_execute_cmd; 540 - req->p.use_workqueue = true; 541 - return NVME_SC_SUCCESS; 542 538 case NVME_ID_CNS_CS_CTRL: 543 539 switch (req->cmd->identify.csi) { 544 540 case NVME_CSI_ZNS: ··· 543 547 return NVME_SC_SUCCESS; 544 548 } 545 549 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 550 + case NVME_ID_CNS_CTRL: 546 551 case NVME_ID_CNS_NS: 552 + case NVME_ID_CNS_NS_DESC_LIST: 547 553 req->execute = nvmet_passthru_execute_cmd; 548 554 req->p.use_workqueue = true; 549 555 return NVME_SC_SUCCESS;
+27 -29
drivers/nvme/target/rdma.c
··· 39 39 40 40 #define NVMET_RDMA_BACKLOG 128 41 41 42 + #define NVMET_RDMA_DISCRETE_RSP_TAG -1 43 + 42 44 struct nvmet_rdma_srq; 43 45 44 46 struct nvmet_rdma_cmd { ··· 77 75 u32 invalidate_rkey; 78 76 79 77 struct list_head wait_list; 80 - struct list_head free_list; 78 + int tag; 81 79 }; 82 80 83 81 enum nvmet_rdma_queue_state { ··· 100 98 struct nvmet_sq nvme_sq; 101 99 102 100 struct nvmet_rdma_rsp *rsps; 103 - struct list_head free_rsps; 104 - spinlock_t rsps_lock; 101 + struct sbitmap rsp_tags; 105 102 struct nvmet_rdma_cmd *cmds; 106 103 107 104 struct work_struct release_work; ··· 173 172 static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, 174 173 struct nvmet_rdma_rsp *r); 175 174 static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, 176 - struct nvmet_rdma_rsp *r); 175 + struct nvmet_rdma_rsp *r, 176 + int tag); 177 177 178 178 static const struct nvmet_fabrics_ops nvmet_rdma_ops; 179 179 ··· 212 210 static inline struct nvmet_rdma_rsp * 213 211 nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) 214 212 { 215 - struct nvmet_rdma_rsp *rsp; 216 - unsigned long flags; 213 + struct nvmet_rdma_rsp *rsp = NULL; 214 + int tag; 217 215 218 - spin_lock_irqsave(&queue->rsps_lock, flags); 219 - rsp = list_first_entry_or_null(&queue->free_rsps, 220 - struct nvmet_rdma_rsp, free_list); 221 - if (likely(rsp)) 222 - list_del(&rsp->free_list); 223 - spin_unlock_irqrestore(&queue->rsps_lock, flags); 216 + tag = sbitmap_get(&queue->rsp_tags); 217 + if (tag >= 0) 218 + rsp = &queue->rsps[tag]; 224 219 225 220 if (unlikely(!rsp)) { 226 221 int ret; ··· 225 226 rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); 226 227 if (unlikely(!rsp)) 227 228 return NULL; 228 - ret = nvmet_rdma_alloc_rsp(queue->dev, rsp); 229 + ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, 230 + NVMET_RDMA_DISCRETE_RSP_TAG); 229 231 if (unlikely(ret)) { 230 232 kfree(rsp); 231 233 return NULL; 232 234 } 233 - 234 - rsp->allocated = true; 235 235 } 236 236 237 237 return rsp; ··· 239 241 static inline void 240 242 nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) 241 243 { 242 - unsigned long flags; 243 - 244 - if (unlikely(rsp->allocated)) { 244 + if (unlikely(rsp->tag == NVMET_RDMA_DISCRETE_RSP_TAG)) { 245 245 nvmet_rdma_free_rsp(rsp->queue->dev, rsp); 246 246 kfree(rsp); 247 247 return; 248 248 } 249 249 250 - spin_lock_irqsave(&rsp->queue->rsps_lock, flags); 251 - list_add_tail(&rsp->free_list, &rsp->queue->free_rsps); 252 - spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); 250 + sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag); 253 251 } 254 252 255 253 static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev, ··· 398 404 } 399 405 400 406 static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, 401 - struct nvmet_rdma_rsp *r) 407 + struct nvmet_rdma_rsp *r, int tag) 402 408 { 403 409 /* NVMe CQE / RDMA SEND */ 404 410 r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL); ··· 426 432 r->read_cqe.done = nvmet_rdma_read_data_done; 427 433 /* Data Out / RDMA WRITE */ 428 434 r->write_cqe.done = nvmet_rdma_write_data_done; 435 + r->tag = tag; 429 436 430 437 return 0; 431 438 ··· 449 454 { 450 455 struct nvmet_rdma_device *ndev = queue->dev; 451 456 int nr_rsps = queue->recv_queue_size * 2; 452 - int ret = -EINVAL, i; 457 + int ret = -ENOMEM, i; 458 + 459 + if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL, 460 + NUMA_NO_NODE, false, true)) 461 + goto out; 453 462 454 463 queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp), 455 464 GFP_KERNEL); 456 465 if (!queue->rsps) 457 - goto out; 466 + goto out_free_sbitmap; 458 467 459 468 for (i = 0; i < nr_rsps; i++) { 460 469 struct nvmet_rdma_rsp *rsp = &queue->rsps[i]; 461 470 462 - ret = nvmet_rdma_alloc_rsp(ndev, rsp); 471 + ret = nvmet_rdma_alloc_rsp(ndev, rsp, i); 463 472 if (ret) 464 473 goto out_free; 465 - 466 - list_add_tail(&rsp->free_list, &queue->free_rsps); 467 474 } 468 475 469 476 return 0; ··· 474 477 while (--i >= 0) 475 478 nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); 476 479 kfree(queue->rsps); 480 + out_free_sbitmap: 481 + sbitmap_free(&queue->rsp_tags); 477 482 out: 478 483 return ret; 479 484 } ··· 488 489 for (i = 0; i < nr_rsps; i++) 489 490 nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); 490 491 kfree(queue->rsps); 492 + sbitmap_free(&queue->rsp_tags); 491 493 } 492 494 493 495 static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, ··· 1447 1447 INIT_LIST_HEAD(&queue->rsp_wait_list); 1448 1448 INIT_LIST_HEAD(&queue->rsp_wr_wait_list); 1449 1449 spin_lock_init(&queue->rsp_wr_wait_lock); 1450 - INIT_LIST_HEAD(&queue->free_rsps); 1451 - spin_lock_init(&queue->rsps_lock); 1452 1450 INIT_LIST_HEAD(&queue->queue_list); 1453 1451 1454 1452 queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
+7 -1
include/uapi/linux/ublk_cmd.h
··· 175 175 /* use ioctl encoding for uring command */ 176 176 #define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) 177 177 178 - /* Copy between request and user buffer by pread()/pwrite() */ 178 + /* 179 + * Copy between request and user buffer by pread()/pwrite() 180 + * 181 + * Not available for UBLK_F_UNPRIVILEGED_DEV, otherwise userspace may 182 + * deceive us by not filling request buffer, then kernel uninitialized 183 + * data may be leaked. 184 + */ 179 185 #define UBLK_F_USER_COPY (1UL << 7) 180 186 181 187 /*