Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-20180616' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"A collection of fixes that should go into -rc1. This contains:

- bsg_open vs bsg_unregister race fix (Anatoliy)

- NVMe pull request from Christoph, with fixes for regressions in
this window, FC connect/reconnect path code unification, and a
trace point addition.

- timeout fix (Christoph)

- remove a few unused functions (Christoph)

- blk-mq tag_set reinit fix (Roman)"

* tag 'for-linus-20180616' of git://git.kernel.dk/linux-block:
bsg: fix race of bsg_open and bsg_unregister
block: remov blk_queue_invalidate_tags
nvme-fabrics: fix and refine state checks in __nvmf_check_ready
nvme-fabrics: handle the admin-only case properly in nvmf_check_ready
nvme-fabrics: refactor queue ready check
blk-mq: remove blk_mq_tagset_iter
nvme: remove nvme_reinit_tagset
nvme-fc: fix nulling of queue data on reconnect
nvme-fc: remove reinit_request routine
blk-mq: don't time out requests again that are in the timeout handler
nvme-fc: change controllers first connect to use reconnect path
nvme: don't rely on the changed namespace list log
nvmet: free smart-log buffer after use
nvme-rdma: fix error flow during mapping request data
nvme: add bio remapping tracepoint
nvme: fix NULL pointer dereference in nvme_init_subsystem
blk-mq: reinit q->tag_set_list entry only after grace period

+182 -283
+1 -14
Documentation/block/biodoc.txt
··· 752 752 operations before calling end_that_request_last()! For an example of a user 753 753 of these helpers, see the IDE tagged command queueing support. 754 754 755 - Certain hardware conditions may dictate a need to invalidate the block tag 756 - queue. For instance, on IDE any tagged request error needs to clear both 757 - the hardware and software block queue and enable the driver to sanely restart 758 - all the outstanding requests. There's a third helper to do that: 759 - 760 - blk_queue_invalidate_tags(struct request_queue *q) 761 - 762 - Clear the internal block tag queue and re-add all the pending requests 763 - to the request queue. The driver will receive them again on the 764 - next request_fn run, just like it did the first time it encountered 765 - them. 766 - 767 755 3.2.5.2 Tag info 768 756 769 757 Some block functions exist to query current tag status or to go from a ··· 793 805 Most of the above is simple and straight forward, however busy_list may need 794 806 a bit of explaining. Normally we don't care too much about request ordering, 795 807 but in the event of any barrier requests in the tag queue we need to ensure 796 - that requests are restarted in the order they were queue. This may happen 797 - if the driver needs to use blk_queue_invalidate_tags(). 808 + that requests are restarted in the order they were queue. 798 809 799 810 3.3 I/O Submission 800 811
-29
block/blk-mq-tag.c
··· 311 311 } 312 312 EXPORT_SYMBOL(blk_mq_tagset_busy_iter); 313 313 314 - int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data, 315 - int (fn)(void *, struct request *)) 316 - { 317 - int i, j, ret = 0; 318 - 319 - if (WARN_ON_ONCE(!fn)) 320 - goto out; 321 - 322 - for (i = 0; i < set->nr_hw_queues; i++) { 323 - struct blk_mq_tags *tags = set->tags[i]; 324 - 325 - if (!tags) 326 - continue; 327 - 328 - for (j = 0; j < tags->nr_tags; j++) { 329 - if (!tags->static_rqs[j]) 330 - continue; 331 - 332 - ret = fn(data, tags->static_rqs[j]); 333 - if (ret) 334 - goto out; 335 - } 336 - } 337 - 338 - out: 339 - return ret; 340 - } 341 - EXPORT_SYMBOL_GPL(blk_mq_tagset_iter); 342 - 343 314 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, 344 315 void *priv) 345 316 {
+6 -2
block/blk-mq.c
··· 671 671 672 672 if (blk_mq_request_started(rq)) { 673 673 WRITE_ONCE(rq->state, MQ_RQ_IDLE); 674 + rq->rq_flags &= ~RQF_TIMED_OUT; 674 675 if (q->dma_drain_size && blk_rq_bytes(rq)) 675 676 rq->nr_phys_segments--; 676 677 } ··· 771 770 772 771 static void blk_mq_rq_timed_out(struct request *req, bool reserved) 773 772 { 773 + req->rq_flags |= RQF_TIMED_OUT; 774 774 if (req->q->mq_ops->timeout) { 775 775 enum blk_eh_timer_return ret; 776 776 ··· 781 779 WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); 782 780 } 783 781 782 + req->rq_flags &= ~RQF_TIMED_OUT; 784 783 blk_add_timer(req); 785 784 } 786 785 ··· 790 787 unsigned long deadline; 791 788 792 789 if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT) 790 + return false; 791 + if (rq->rq_flags & RQF_TIMED_OUT) 793 792 return false; 794 793 795 794 deadline = blk_rq_deadline(rq); ··· 2354 2349 2355 2350 mutex_lock(&set->tag_list_lock); 2356 2351 list_del_rcu(&q->tag_set_list); 2357 - INIT_LIST_HEAD(&q->tag_set_list); 2358 2352 if (list_is_singular(&set->tag_list)) { 2359 2353 /* just transitioned to unshared */ 2360 2354 set->flags &= ~BLK_MQ_F_TAG_SHARED; ··· 2361 2357 blk_mq_update_tag_set_depth(set, false); 2362 2358 } 2363 2359 mutex_unlock(&set->tag_list_lock); 2364 - 2365 2360 synchronize_rcu(); 2361 + INIT_LIST_HEAD(&q->tag_set_list); 2366 2362 } 2367 2363 2368 2364 static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
-22
block/blk-tag.c
··· 188 188 */ 189 189 q->queue_tags = tags; 190 190 queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); 191 - INIT_LIST_HEAD(&q->tag_busy_list); 192 191 return 0; 193 192 } 194 193 EXPORT_SYMBOL(blk_queue_init_tags); ··· 373 374 rq->tag = tag; 374 375 bqt->tag_index[tag] = rq; 375 376 blk_start_request(rq); 376 - list_add(&rq->queuelist, &q->tag_busy_list); 377 377 return 0; 378 378 } 379 379 EXPORT_SYMBOL(blk_queue_start_tag); 380 - 381 - /** 382 - * blk_queue_invalidate_tags - invalidate all pending tags 383 - * @q: the request queue for the device 384 - * 385 - * Description: 386 - * Hardware conditions may dictate a need to stop all pending requests. 387 - * In this case, we will safely clear the block side of the tag queue and 388 - * readd all requests to the request queue in the right order. 389 - **/ 390 - void blk_queue_invalidate_tags(struct request_queue *q) 391 - { 392 - struct list_head *tmp, *n; 393 - 394 - lockdep_assert_held(q->queue_lock); 395 - 396 - list_for_each_safe(tmp, n, &q->tag_busy_list) 397 - blk_requeue_request(q, list_entry_rq(tmp)); 398 - } 399 - EXPORT_SYMBOL(blk_queue_invalidate_tags);
+11 -11
block/bsg.c
··· 693 693 struct bsg_device *bd; 694 694 unsigned char buf[32]; 695 695 696 + lockdep_assert_held(&bsg_mutex); 697 + 696 698 if (!blk_get_queue(rq)) 697 699 return ERR_PTR(-ENXIO); 698 700 ··· 709 707 bsg_set_block(bd, file); 710 708 711 709 atomic_set(&bd->ref_count, 1); 712 - mutex_lock(&bsg_mutex); 713 710 hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode))); 714 711 715 712 strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1); 716 713 bsg_dbg(bd, "bound to <%s>, max queue %d\n", 717 714 format_dev_t(buf, inode->i_rdev), bd->max_queue); 718 715 719 - mutex_unlock(&bsg_mutex); 720 716 return bd; 721 717 } 722 718 ··· 722 722 { 723 723 struct bsg_device *bd; 724 724 725 - mutex_lock(&bsg_mutex); 725 + lockdep_assert_held(&bsg_mutex); 726 726 727 727 hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { 728 728 if (bd->queue == q) { ··· 732 732 } 733 733 bd = NULL; 734 734 found: 735 - mutex_unlock(&bsg_mutex); 736 735 return bd; 737 736 } 738 737 ··· 745 746 */ 746 747 mutex_lock(&bsg_mutex); 747 748 bcd = idr_find(&bsg_minor_idr, iminor(inode)); 748 - mutex_unlock(&bsg_mutex); 749 749 750 - if (!bcd) 751 - return ERR_PTR(-ENODEV); 750 + if (!bcd) { 751 + bd = ERR_PTR(-ENODEV); 752 + goto out_unlock; 753 + } 752 754 753 755 bd = __bsg_get_device(iminor(inode), bcd->queue); 754 - if (bd) 755 - return bd; 756 + if (!bd) 757 + bd = bsg_add_device(inode, bcd->queue, file); 756 758 757 - bd = bsg_add_device(inode, bcd->queue, file); 758 - 759 + out_unlock: 760 + mutex_unlock(&bsg_mutex); 759 761 return bd; 760 762 } 761 763
+12 -36
drivers/nvme/host/core.c
··· 2208 2208 * Verify that the subsystem actually supports multiple 2209 2209 * controllers, else bail out. 2210 2210 */ 2211 - if (!ctrl->opts->discovery_nqn && 2211 + if (!(ctrl->opts && ctrl->opts->discovery_nqn) && 2212 2212 nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { 2213 2213 dev_err(ctrl->device, 2214 2214 "ignoring ctrl due to duplicate subnqn (%s).\n", ··· 3197 3197 nvme_remove_invalid_namespaces(ctrl, nn); 3198 3198 } 3199 3199 3200 - static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl) 3200 + static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl) 3201 3201 { 3202 3202 size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); 3203 3203 __le32 *log; 3204 - int error, i; 3205 - bool ret = false; 3204 + int error; 3206 3205 3207 3206 log = kzalloc(log_size, GFP_KERNEL); 3208 3207 if (!log) 3209 - return false; 3208 + return; 3210 3209 3210 + /* 3211 + * We need to read the log to clear the AEN, but we don't want to rely 3212 + * on it for the changed namespace information as userspace could have 3213 + * raced with us in reading the log page, which could cause us to miss 3214 + * updates. 3215 + */ 3211 3216 error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); 3212 - if (error) { 3217 + if (error) 3213 3218 dev_warn(ctrl->device, 3214 3219 "reading changed ns log failed: %d\n", error); 3215 - goto out_free_log; 3216 - } 3217 3220 3218 - if (log[0] == cpu_to_le32(0xffffffff)) 3219 - goto out_free_log; 3220 - 3221 - for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) { 3222 - u32 nsid = le32_to_cpu(log[i]); 3223 - 3224 - if (nsid == 0) 3225 - break; 3226 - dev_info(ctrl->device, "rescanning namespace %d.\n", nsid); 3227 - nvme_validate_ns(ctrl, nsid); 3228 - } 3229 - ret = true; 3230 - 3231 - out_free_log: 3232 3221 kfree(log); 3233 - return ret; 3234 3222 } 3235 3223 3236 3224 static void nvme_scan_work(struct work_struct *work) ··· 3234 3246 WARN_ON_ONCE(!ctrl->tagset); 3235 3247 3236 3248 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { 3237 - if (nvme_scan_changed_ns_log(ctrl)) 3238 - goto out_sort_namespaces; 3239 3249 dev_info(ctrl->device, "rescanning namespaces.\n"); 3250 + nvme_clear_changed_ns_log(ctrl); 3240 3251 } 3241 3252 3242 3253 if (nvme_identify_ctrl(ctrl, &id)) ··· 3250 3263 nvme_scan_ns_sequential(ctrl, nn); 3251 3264 out_free_id: 3252 3265 kfree(id); 3253 - out_sort_namespaces: 3254 3266 down_write(&ctrl->namespaces_rwsem); 3255 3267 list_sort(NULL, &ctrl->namespaces, ns_cmp); 3256 3268 up_write(&ctrl->namespaces_rwsem); ··· 3626 3640 up_read(&ctrl->namespaces_rwsem); 3627 3641 } 3628 3642 EXPORT_SYMBOL_GPL(nvme_start_queues); 3629 - 3630 - int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set) 3631 - { 3632 - if (!ctrl->ops->reinit_request) 3633 - return 0; 3634 - 3635 - return blk_mq_tagset_iter(set, set->driver_data, 3636 - ctrl->ops->reinit_request); 3637 - } 3638 - EXPORT_SYMBOL_GPL(nvme_reinit_tagset); 3639 3643 3640 3644 int __init nvme_core_init(void) 3641 3645 {
+43 -55
drivers/nvme/host/fabrics.c
··· 536 536 return NULL; 537 537 } 538 538 539 - blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, 540 - bool queue_live, bool is_connected) 539 + /* 540 + * For something we're not in a state to send to the device the default action 541 + * is to busy it and retry it after the controller state is recovered. However, 542 + * anything marked for failfast or nvme multipath is immediately failed. 543 + * 544 + * Note: commands used to initialize the controller will be marked for failfast. 545 + * Note: nvme cli/ioctl commands are marked for failfast. 546 + */ 547 + blk_status_t nvmf_fail_nonready_command(struct request *rq) 541 548 { 542 - struct nvme_command *cmd = nvme_req(rq)->cmd; 543 - 544 - if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected)) 545 - return BLK_STS_OK; 546 - 547 - switch (ctrl->state) { 548 - case NVME_CTRL_NEW: 549 - case NVME_CTRL_CONNECTING: 550 - case NVME_CTRL_DELETING: 551 - /* 552 - * This is the case of starting a new or deleting an association 553 - * but connectivity was lost before it was fully created or torn 554 - * down. We need to error the commands used to initialize the 555 - * controller so the reconnect can go into a retry attempt. The 556 - * commands should all be marked REQ_FAILFAST_DRIVER, which will 557 - * hit the reject path below. Anything else will be queued while 558 - * the state settles. 559 - */ 560 - if (!is_connected) 561 - break; 562 - 563 - /* 564 - * If queue is live, allow only commands that are internally 565 - * generated pass through. These are commands on the admin 566 - * queue to initialize the controller. This will reject any 567 - * ioctl admin cmds received while initializing. 568 - */ 569 - if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) 570 - return BLK_STS_OK; 571 - 572 - /* 573 - * If the queue is not live, allow only a connect command. This 574 - * will reject any ioctl admin cmd as well as initialization 575 - * commands if the controller reverted the queue to non-live. 576 - */ 577 - if (!queue_live && blk_rq_is_passthrough(rq) && 578 - cmd->common.opcode == nvme_fabrics_command && 579 - cmd->fabrics.fctype == nvme_fabrics_type_connect) 580 - return BLK_STS_OK; 581 - break; 582 - default: 583 - break; 584 - } 585 - 586 - /* 587 - * Any other new io is something we're not in a state to send to the 588 - * device. Default action is to busy it and retry it after the 589 - * controller state is recovered. However, anything marked for failfast 590 - * or nvme multipath is immediately failed. Note: commands used to 591 - * initialize the controller will be marked for failfast. 592 - * Note: nvme cli/ioctl commands are marked for failfast. 593 - */ 594 549 if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) 595 550 return BLK_STS_RESOURCE; 596 551 nvme_req(rq)->status = NVME_SC_ABORT_REQ; 597 552 return BLK_STS_IOERR; 598 553 } 599 - EXPORT_SYMBOL_GPL(nvmf_check_if_ready); 554 + EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command); 555 + 556 + bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 557 + bool queue_live) 558 + { 559 + struct nvme_request *req = nvme_req(rq); 560 + 561 + /* 562 + * If we are in some state of setup or teardown only allow 563 + * internally generated commands. 564 + */ 565 + if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD)) 566 + return false; 567 + 568 + /* 569 + * Only allow commands on a live queue, except for the connect command, 570 + * which is require to set the queue live in the appropinquate states. 571 + */ 572 + switch (ctrl->state) { 573 + case NVME_CTRL_NEW: 574 + case NVME_CTRL_CONNECTING: 575 + if (req->cmd->common.opcode == nvme_fabrics_command && 576 + req->cmd->fabrics.fctype == nvme_fabrics_type_connect) 577 + return true; 578 + break; 579 + default: 580 + break; 581 + case NVME_CTRL_DEAD: 582 + return false; 583 + } 584 + 585 + return queue_live; 586 + } 587 + EXPORT_SYMBOL_GPL(__nvmf_check_ready); 600 588 601 589 static const match_table_t opt_tokens = { 602 590 { NVMF_OPT_TRANSPORT, "transport=%s" },
+12 -2
drivers/nvme/host/fabrics.h
··· 162 162 void nvmf_free_options(struct nvmf_ctrl_options *opts); 163 163 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 164 164 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 165 - blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, 166 - struct request *rq, bool queue_live, bool is_connected); 165 + blk_status_t nvmf_fail_nonready_command(struct request *rq); 166 + bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 167 + bool queue_live); 168 + 169 + static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 170 + bool queue_live) 171 + { 172 + if (likely(ctrl->state == NVME_CTRL_LIVE || 173 + ctrl->state == NVME_CTRL_ADMIN_ONLY)) 174 + return true; 175 + return __nvmf_check_ready(ctrl, rq, queue_live); 176 + } 167 177 168 178 #endif /* _NVME_FABRICS_H */
+58 -90
drivers/nvme/host/fc.c
··· 142 142 struct nvme_fc_rport *rport; 143 143 u32 cnum; 144 144 145 + bool ioq_live; 145 146 bool assoc_active; 146 147 u64 association_id; 147 148 ··· 1471 1470 1472 1471 static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1473 1472 1474 - static int 1475 - nvme_fc_reinit_request(void *data, struct request *rq) 1476 - { 1477 - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1478 - struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1479 - 1480 - memset(cmdiu, 0, sizeof(*cmdiu)); 1481 - cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1482 - cmdiu->fc_id = NVME_CMD_FC_ID; 1483 - cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1484 - memset(&op->rsp_iu, 0, sizeof(op->rsp_iu)); 1485 - 1486 - return 0; 1487 - } 1488 - 1489 1473 static void 1490 1474 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1491 1475 struct nvme_fc_fcp_op *op) ··· 1879 1893 */ 1880 1894 1881 1895 queue->connection_id = 0; 1896 + atomic_set(&queue->csn, 1); 1882 1897 } 1883 1898 1884 1899 static void ··· 2266 2279 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2267 2280 struct nvme_command *sqe = &cmdiu->sqe; 2268 2281 enum nvmefc_fcp_datadir io_dir; 2282 + bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); 2269 2283 u32 data_len; 2270 2284 blk_status_t ret; 2271 2285 2272 - ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, 2273 - test_bit(NVME_FC_Q_LIVE, &queue->flags), 2274 - ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE); 2275 - if (unlikely(ret)) 2276 - return ret; 2286 + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || 2287 + !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2288 + return nvmf_fail_nonready_command(rq); 2277 2289 2278 2290 ret = nvme_setup_cmd(ns, rq, sqe); 2279 2291 if (ret) ··· 2449 2463 if (ret) 2450 2464 goto out_delete_hw_queues; 2451 2465 2466 + ctrl->ioq_live = true; 2467 + 2452 2468 return 0; 2453 2469 2454 2470 out_delete_hw_queues: ··· 2468 2480 } 2469 2481 2470 2482 static int 2471 - nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) 2483 + nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) 2472 2484 { 2473 2485 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2474 2486 unsigned int nr_io_queues; ··· 2487 2499 /* check for io queues existing */ 2488 2500 if (ctrl->ctrl.queue_count == 1) 2489 2501 return 0; 2490 - 2491 - nvme_fc_init_io_queues(ctrl); 2492 - 2493 - ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 2494 - if (ret) 2495 - goto out_free_io_queues; 2496 2502 2497 2503 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2498 2504 if (ret) ··· 2585 2603 * Create the admin queue 2586 2604 */ 2587 2605 2588 - nvme_fc_init_queue(ctrl, 0); 2589 - 2590 2606 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2591 2607 NVME_AQ_DEPTH); 2592 2608 if (ret) ··· 2595 2615 if (ret) 2596 2616 goto out_delete_hw_queue; 2597 2617 2598 - if (ctrl->ctrl.state != NVME_CTRL_NEW) 2599 - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2618 + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2600 2619 2601 2620 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2602 2621 if (ret) ··· 2668 2689 */ 2669 2690 2670 2691 if (ctrl->ctrl.queue_count > 1) { 2671 - if (ctrl->ctrl.state == NVME_CTRL_NEW) 2692 + if (!ctrl->ioq_live) 2672 2693 ret = nvme_fc_create_io_queues(ctrl); 2673 2694 else 2674 - ret = nvme_fc_reinit_io_queues(ctrl); 2695 + ret = nvme_fc_recreate_io_queues(ctrl); 2675 2696 if (ret) 2676 2697 goto out_term_aen_ops; 2677 2698 } ··· 2755 2776 * use blk_mq_tagset_busy_itr() and the transport routine to 2756 2777 * terminate the exchanges. 2757 2778 */ 2758 - if (ctrl->ctrl.state != NVME_CTRL_NEW) 2759 - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2779 + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2760 2780 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2761 2781 nvme_fc_terminate_exchange, &ctrl->ctrl); 2762 2782 ··· 2895 2917 .submit_async_event = nvme_fc_submit_async_event, 2896 2918 .delete_ctrl = nvme_fc_delete_ctrl, 2897 2919 .get_address = nvmf_get_address, 2898 - .reinit_request = nvme_fc_reinit_request, 2899 2920 }; 2900 2921 2901 2922 static void ··· 2911 2934 nvme_fc_reconnect_or_delete(ctrl, ret); 2912 2935 else 2913 2936 dev_info(ctrl->ctrl.device, 2914 - "NVME-FC{%d}: controller reconnect complete\n", 2937 + "NVME-FC{%d}: controller connect complete\n", 2915 2938 ctrl->cnum); 2916 2939 } 2917 2940 ··· 2959 2982 { 2960 2983 struct nvme_fc_ctrl *ctrl; 2961 2984 unsigned long flags; 2962 - int ret, idx, retry; 2985 + int ret, idx; 2963 2986 2964 2987 if (!(rport->remoteport.port_role & 2965 2988 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { ··· 2986 3009 } 2987 3010 2988 3011 ctrl->ctrl.opts = opts; 3012 + ctrl->ctrl.nr_reconnects = 0; 2989 3013 INIT_LIST_HEAD(&ctrl->ctrl_list); 2990 3014 ctrl->lport = lport; 2991 3015 ctrl->rport = rport; 2992 3016 ctrl->dev = lport->dev; 2993 3017 ctrl->cnum = idx; 3018 + ctrl->ioq_live = false; 2994 3019 ctrl->assoc_active = false; 2995 3020 init_waitqueue_head(&ctrl->ioabort_wait); 2996 3021 ··· 3011 3032 3012 3033 ctrl->ctrl.sqsize = opts->queue_size - 1; 3013 3034 ctrl->ctrl.kato = opts->kato; 3035 + ctrl->ctrl.cntlid = 0xffff; 3014 3036 3015 3037 ret = -ENOMEM; 3016 3038 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, 3017 3039 sizeof(struct nvme_fc_queue), GFP_KERNEL); 3018 3040 if (!ctrl->queues) 3019 3041 goto out_free_ida; 3042 + 3043 + nvme_fc_init_queue(ctrl, 0); 3020 3044 3021 3045 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 3022 3046 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; ··· 3063 3081 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 3064 3082 spin_unlock_irqrestore(&rport->lock, flags); 3065 3083 3066 - /* 3067 - * It's possible that transactions used to create the association 3068 - * may fail. Examples: CreateAssociation LS or CreateIOConnection 3069 - * LS gets dropped/corrupted/fails; or a frame gets dropped or a 3070 - * command times out for one of the actions to init the controller 3071 - * (Connect, Get/Set_Property, Set_Features, etc). Many of these 3072 - * transport errors (frame drop, LS failure) inherently must kill 3073 - * the association. The transport is coded so that any command used 3074 - * to create the association (prior to a LIVE state transition 3075 - * while NEW or CONNECTING) will fail if it completes in error or 3076 - * times out. 3077 - * 3078 - * As such: as the connect request was mostly likely due to a 3079 - * udev event that discovered the remote port, meaning there is 3080 - * not an admin or script there to restart if the connect 3081 - * request fails, retry the initial connection creation up to 3082 - * three times before giving up and declaring failure. 3083 - */ 3084 - for (retry = 0; retry < 3; retry++) { 3085 - ret = nvme_fc_create_association(ctrl); 3086 - if (!ret) 3087 - break; 3088 - } 3089 - 3090 - if (ret) { 3091 - nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); 3092 - cancel_work_sync(&ctrl->ctrl.reset_work); 3093 - cancel_delayed_work_sync(&ctrl->connect_work); 3094 - 3095 - /* couldn't schedule retry - fail out */ 3084 + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || 3085 + !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 3096 3086 dev_err(ctrl->ctrl.device, 3097 - "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); 3098 - 3099 - ctrl->ctrl.opts = NULL; 3100 - 3101 - /* initiate nvme ctrl ref counting teardown */ 3102 - nvme_uninit_ctrl(&ctrl->ctrl); 3103 - 3104 - /* Remove core ctrl ref. */ 3105 - nvme_put_ctrl(&ctrl->ctrl); 3106 - 3107 - /* as we're past the point where we transition to the ref 3108 - * counting teardown path, if we return a bad pointer here, 3109 - * the calling routine, thinking it's prior to the 3110 - * transition, will do an rport put. Since the teardown 3111 - * path also does a rport put, we do an extra get here to 3112 - * so proper order/teardown happens. 3113 - */ 3114 - nvme_fc_rport_get(rport); 3115 - 3116 - if (ret > 0) 3117 - ret = -EIO; 3118 - return ERR_PTR(ret); 3087 + "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); 3088 + goto fail_ctrl; 3119 3089 } 3120 3090 3121 3091 nvme_get_ctrl(&ctrl->ctrl); 3092 + 3093 + if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { 3094 + nvme_put_ctrl(&ctrl->ctrl); 3095 + dev_err(ctrl->ctrl.device, 3096 + "NVME-FC{%d}: failed to schedule initial connect\n", 3097 + ctrl->cnum); 3098 + goto fail_ctrl; 3099 + } 3100 + 3101 + flush_delayed_work(&ctrl->connect_work); 3122 3102 3123 3103 dev_info(ctrl->ctrl.device, 3124 3104 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 3125 3105 ctrl->cnum, ctrl->ctrl.opts->subsysnqn); 3126 3106 3127 3107 return &ctrl->ctrl; 3108 + 3109 + fail_ctrl: 3110 + nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); 3111 + cancel_work_sync(&ctrl->ctrl.reset_work); 3112 + cancel_delayed_work_sync(&ctrl->connect_work); 3113 + 3114 + ctrl->ctrl.opts = NULL; 3115 + 3116 + /* initiate nvme ctrl ref counting teardown */ 3117 + nvme_uninit_ctrl(&ctrl->ctrl); 3118 + 3119 + /* Remove core ctrl ref. */ 3120 + nvme_put_ctrl(&ctrl->ctrl); 3121 + 3122 + /* as we're past the point where we transition to the ref 3123 + * counting teardown path, if we return a bad pointer here, 3124 + * the calling routine, thinking it's prior to the 3125 + * transition, will do an rport put. Since the teardown 3126 + * path also does a rport put, we do an extra get here to 3127 + * so proper order/teardown happens. 3128 + */ 3129 + nvme_fc_rport_get(rport); 3130 + 3131 + return ERR_PTR(-EIO); 3128 3132 3129 3133 out_cleanup_admin_q: 3130 3134 blk_cleanup_queue(ctrl->ctrl.admin_q);
+4
drivers/nvme/host/multipath.c
··· 12 12 */ 13 13 14 14 #include <linux/moduleparam.h> 15 + #include <trace/events/block.h> 15 16 #include "nvme.h" 16 17 17 18 static bool multipath = true; ··· 112 111 if (likely(ns)) { 113 112 bio->bi_disk = ns->disk; 114 113 bio->bi_opf |= REQ_NVME_MPATH; 114 + trace_block_bio_remap(bio->bi_disk->queue, bio, 115 + disk_devt(ns->head->disk), 116 + bio->bi_iter.bi_sector); 115 117 ret = direct_make_request(bio); 116 118 } else if (!list_empty_careful(&head->list)) { 117 119 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
-2
drivers/nvme/host/nvme.h
··· 321 321 void (*submit_async_event)(struct nvme_ctrl *ctrl); 322 322 void (*delete_ctrl)(struct nvme_ctrl *ctrl); 323 323 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 324 - int (*reinit_request)(void *data, struct request *rq); 325 324 void (*stop_ctrl)(struct nvme_ctrl *ctrl); 326 325 }; 327 326 ··· 415 416 void nvme_wait_freeze(struct nvme_ctrl *ctrl); 416 417 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); 417 418 void nvme_start_freeze(struct nvme_ctrl *ctrl); 418 - int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set); 419 419 420 420 #define NVME_QID_ANY -1 421 421 struct request *nvme_alloc_request(struct request_queue *q,
+27 -11
drivers/nvme/host/rdma.c
··· 1189 1189 count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, 1190 1190 rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 1191 1191 if (unlikely(count <= 0)) { 1192 - sg_free_table_chained(&req->sg_table, true); 1193 - return -EIO; 1192 + ret = -EIO; 1193 + goto out_free_table; 1194 1194 } 1195 1195 1196 1196 if (count == 1) { 1197 1197 if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && 1198 1198 blk_rq_payload_bytes(rq) <= 1199 - nvme_rdma_inline_data_size(queue)) 1200 - return nvme_rdma_map_sg_inline(queue, req, c); 1199 + nvme_rdma_inline_data_size(queue)) { 1200 + ret = nvme_rdma_map_sg_inline(queue, req, c); 1201 + goto out; 1202 + } 1201 1203 1202 - if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) 1203 - return nvme_rdma_map_sg_single(queue, req, c); 1204 + if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) { 1205 + ret = nvme_rdma_map_sg_single(queue, req, c); 1206 + goto out; 1207 + } 1204 1208 } 1205 1209 1206 - return nvme_rdma_map_sg_fr(queue, req, c, count); 1210 + ret = nvme_rdma_map_sg_fr(queue, req, c, count); 1211 + out: 1212 + if (unlikely(ret)) 1213 + goto out_unmap_sg; 1214 + 1215 + return 0; 1216 + 1217 + out_unmap_sg: 1218 + ib_dma_unmap_sg(ibdev, req->sg_table.sgl, 1219 + req->nents, rq_data_dir(rq) == 1220 + WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 1221 + out_free_table: 1222 + sg_free_table_chained(&req->sg_table, true); 1223 + return ret; 1207 1224 } 1208 1225 1209 1226 static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) ··· 1630 1613 struct nvme_rdma_qe *sqe = &req->sqe; 1631 1614 struct nvme_command *c = sqe->data; 1632 1615 struct ib_device *dev; 1616 + bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags); 1633 1617 blk_status_t ret; 1634 1618 int err; 1635 1619 1636 1620 WARN_ON_ONCE(rq->tag < 0); 1637 1621 1638 - ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, 1639 - test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true); 1640 - if (unlikely(ret)) 1641 - return ret; 1622 + if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 1623 + return nvmf_fail_nonready_command(rq); 1642 1624 1643 1625 dev = queue->device->dev; 1644 1626 ib_dma_sync_single_for_cpu(dev, sqe->dma,
+3 -1
drivers/nvme/target/admin-cmd.c
··· 119 119 else 120 120 status = nvmet_get_smart_log_nsid(req, log); 121 121 if (status) 122 - goto out; 122 + goto out_free_log; 123 123 124 124 status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); 125 + out_free_log: 126 + kfree(log); 125 127 out: 126 128 nvmet_req_complete(req, status); 127 129 }
+3 -4
drivers/nvme/target/loop.c
··· 158 158 struct nvme_loop_queue *queue = hctx->driver_data; 159 159 struct request *req = bd->rq; 160 160 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); 161 + bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags); 161 162 blk_status_t ret; 162 163 163 - ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req, 164 - test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true); 165 - if (unlikely(ret)) 166 - return ret; 164 + if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) 165 + return nvmf_fail_nonready_command(req); 167 166 168 167 ret = nvme_setup_cmd(ns, req, &iod->cmd); 169 168 if (ret)
-2
include/linux/blk-mq.h
··· 281 281 void blk_mq_freeze_queue_wait(struct request_queue *q); 282 282 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, 283 283 unsigned long timeout); 284 - int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data, 285 - int (reinit_request)(void *, struct request *)); 286 284 287 285 int blk_mq_map_queues(struct blk_mq_tag_set *set); 288 286 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+2 -2
include/linux/blkdev.h
··· 127 127 #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) 128 128 /* already slept for hybrid poll */ 129 129 #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) 130 + /* ->timeout has been called, don't expire again */ 131 + #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) 130 132 131 133 /* flags that prevent us from merging requests: */ 132 134 #define RQF_NOMERGE_FLAGS \ ··· 562 560 unsigned int dma_alignment; 563 561 564 562 struct blk_queue_tag *queue_tags; 565 - struct list_head tag_busy_list; 566 563 567 564 unsigned int nr_sorted; 568 565 unsigned int in_flight[2]; ··· 1374 1373 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); 1375 1374 extern void blk_queue_free_tags(struct request_queue *); 1376 1375 extern int blk_queue_resize_tags(struct request_queue *, int); 1377 - extern void blk_queue_invalidate_tags(struct request_queue *); 1378 1376 extern struct blk_queue_tag *blk_init_tags(int, int); 1379 1377 extern void blk_free_tags(struct blk_queue_tag *); 1380 1378