Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-5.12-2021-03-19' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"Just an NVMe pull request this week:

- fix tag allocation for keep alive

- fix a unit mismatch for the Write Zeroes limits

- various TCP transport fixes (Sagi Grimberg, Elad Grupi)

- fix iosqes and iocqes validation for discovery controllers (Sagi Grimberg)"

* tag 'block-5.12-2021-03-19' of git://git.kernel.dk/linux-block:
nvmet-tcp: fix kmap leak when data digest in use
nvmet: don't check iosqes,iocqes for discovery controllers
nvme-rdma: fix possible hang when failing to set io queues
nvme-tcp: fix possible hang when failing to set io queues
nvme-tcp: fix misuse of __smp_processor_id with preemption enabled
nvme-tcp: fix a NULL deref when receiving a 0-length r2t PDU
nvme: fix Write Zeroes limitations
nvme: allocate the keep alive request using BLK_MQ_REQ_NOWAIT
nvme: merge nvme_keep_alive into nvme_keep_alive_work
nvme-fabrics: only reserve a single tag

+69 -60
+21 -43
drivers/nvme/host/core.c
··· 1226 1226 queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); 1227 1227 } 1228 1228 1229 - static int nvme_keep_alive(struct nvme_ctrl *ctrl) 1230 - { 1231 - struct request *rq; 1232 - 1233 - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, 1234 - BLK_MQ_REQ_RESERVED); 1235 - if (IS_ERR(rq)) 1236 - return PTR_ERR(rq); 1237 - 1238 - rq->timeout = ctrl->kato * HZ; 1239 - rq->end_io_data = ctrl; 1240 - 1241 - blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io); 1242 - 1243 - return 0; 1244 - } 1245 - 1246 1229 static void nvme_keep_alive_work(struct work_struct *work) 1247 1230 { 1248 1231 struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), 1249 1232 struct nvme_ctrl, ka_work); 1250 1233 bool comp_seen = ctrl->comp_seen; 1234 + struct request *rq; 1251 1235 1252 1236 if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { 1253 1237 dev_dbg(ctrl->device, ··· 1241 1257 return; 1242 1258 } 1243 1259 1244 - if (nvme_keep_alive(ctrl)) { 1260 + rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, 1261 + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); 1262 + if (IS_ERR(rq)) { 1245 1263 /* allocation failure, reset the controller */ 1246 - dev_err(ctrl->device, "keep-alive failed\n"); 1264 + dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq)); 1247 1265 nvme_reset_ctrl(ctrl); 1248 1266 return; 1249 1267 } 1268 + 1269 + rq->timeout = ctrl->kato * HZ; 1270 + rq->end_io_data = ctrl; 1271 + blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io); 1250 1272 } 1251 1273 1252 1274 static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) ··· 1954 1964 blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); 1955 1965 } 1956 1966 1957 - static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) 1967 + /* 1968 + * Even though NVMe spec explicitly states that MDTS is not applicable to the 1969 + * write-zeroes, we are cautious and limit the size to the controllers 1970 + * max_hw_sectors value, which is based on the MDTS field and possibly other 1971 + * limiting factors. 1972 + */ 1973 + static void nvme_config_write_zeroes(struct request_queue *q, 1974 + struct nvme_ctrl *ctrl) 1958 1975 { 1959 - u64 max_blocks; 1960 - 1961 - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || 1962 - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) 1963 - return; 1964 - /* 1965 - * Even though NVMe spec explicitly states that MDTS is not 1966 - * applicable to the write-zeroes:- "The restriction does not apply to 1967 - * commands that do not transfer data between the host and the 1968 - * controller (e.g., Write Uncorrectable ro Write Zeroes command).". 1969 - * In order to be more cautious use controller's max_hw_sectors value 1970 - * to configure the maximum sectors for the write-zeroes which is 1971 - * configured based on the controller's MDTS field in the 1972 - * nvme_init_identify() if available. 1973 - */ 1974 - if (ns->ctrl->max_hw_sectors == UINT_MAX) 1975 - max_blocks = (u64)USHRT_MAX + 1; 1976 - else 1977 - max_blocks = ns->ctrl->max_hw_sectors + 1; 1978 - 1979 - blk_queue_max_write_zeroes_sectors(disk->queue, 1980 - nvme_lba_to_sect(ns, max_blocks)); 1976 + if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) && 1977 + !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) 1978 + blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors); 1981 1979 } 1982 1980 1983 1981 static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) ··· 2137 2159 set_capacity_and_notify(disk, capacity); 2138 2160 2139 2161 nvme_config_discard(disk, ns); 2140 - nvme_config_write_zeroes(disk, ns); 2162 + nvme_config_write_zeroes(disk->queue, ns->ctrl); 2141 2163 2142 2164 set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || 2143 2165 test_bit(NVME_NS_FORCE_RO, &ns->flags));
+7
drivers/nvme/host/fabrics.h
··· 19 19 #define NVMF_DEF_FAIL_FAST_TMO -1 20 20 21 21 /* 22 + * Reserved one command for internal usage. This command is used for sending 23 + * the connect command, as well as for the keep alive command on the admin 24 + * queue once live. 25 + */ 26 + #define NVMF_RESERVED_TAGS 1 27 + 28 + /* 22 29 * Define a host as seen by the target. We allocate one at boot, but also 23 30 * allow the override it when creating controllers. This is both to provide 24 31 * persistence of the Host NQN over multiple boots, and to allow using
+2 -2
drivers/nvme/host/fc.c
··· 2863 2863 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 2864 2864 ctrl->tag_set.ops = &nvme_fc_mq_ops; 2865 2865 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 2866 - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 2866 + ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; 2867 2867 ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; 2868 2868 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 2869 2869 ctrl->tag_set.cmd_size = ··· 3485 3485 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 3486 3486 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 3487 3487 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; 3488 - ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ 3488 + ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; 3489 3489 ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; 3490 3490 ctrl->admin_tag_set.cmd_size = 3491 3491 struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
+7 -4
drivers/nvme/host/rdma.c
··· 736 736 return ret; 737 737 738 738 ctrl->ctrl.queue_count = nr_io_queues + 1; 739 - if (ctrl->ctrl.queue_count < 2) 740 - return 0; 739 + if (ctrl->ctrl.queue_count < 2) { 740 + dev_err(ctrl->ctrl.device, 741 + "unable to set any I/O queues\n"); 742 + return -ENOMEM; 743 + } 741 744 742 745 dev_info(ctrl->ctrl.device, 743 746 "creating %d I/O queues.\n", nr_io_queues); ··· 801 798 memset(set, 0, sizeof(*set)); 802 799 set->ops = &nvme_rdma_admin_mq_ops; 803 800 set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; 804 - set->reserved_tags = 2; /* connect + keep-alive */ 801 + set->reserved_tags = NVMF_RESERVED_TAGS; 805 802 set->numa_node = nctrl->numa_node; 806 803 set->cmd_size = sizeof(struct nvme_rdma_request) + 807 804 NVME_RDMA_DATA_SGL_SIZE; ··· 814 811 memset(set, 0, sizeof(*set)); 815 812 set->ops = &nvme_rdma_mq_ops; 816 813 set->queue_depth = nctrl->sqsize + 1; 817 - set->reserved_tags = 1; /* fabric connect */ 814 + set->reserved_tags = NVMF_RESERVED_TAGS; 818 815 set->numa_node = nctrl->numa_node; 819 816 set->flags = BLK_MQ_F_SHOULD_MERGE; 820 817 set->cmd_size = sizeof(struct nvme_rdma_request) +
+15 -5
drivers/nvme/host/tcp.c
··· 287 287 * directly, otherwise queue io_work. Also, only do that if we 288 288 * are on the same cpu, so we don't introduce contention. 289 289 */ 290 - if (queue->io_cpu == __smp_processor_id() && 290 + if (queue->io_cpu == raw_smp_processor_id() && 291 291 sync && empty && mutex_trylock(&queue->send_mutex)) { 292 292 queue->more_requests = !last; 293 293 nvme_tcp_send_all(queue); ··· 567 567 568 568 req->pdu_len = le32_to_cpu(pdu->r2t_length); 569 569 req->pdu_sent = 0; 570 + 571 + if (unlikely(!req->pdu_len)) { 572 + dev_err(queue->ctrl->ctrl.device, 573 + "req %d r2t len is %u, probably a bug...\n", 574 + rq->tag, req->pdu_len); 575 + return -EPROTO; 576 + } 570 577 571 578 if (unlikely(req->data_sent + req->pdu_len > req->data_len)) { 572 579 dev_err(queue->ctrl->ctrl.device, ··· 1582 1575 memset(set, 0, sizeof(*set)); 1583 1576 set->ops = &nvme_tcp_admin_mq_ops; 1584 1577 set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; 1585 - set->reserved_tags = 2; /* connect + keep-alive */ 1578 + set->reserved_tags = NVMF_RESERVED_TAGS; 1586 1579 set->numa_node = nctrl->numa_node; 1587 1580 set->flags = BLK_MQ_F_BLOCKING; 1588 1581 set->cmd_size = sizeof(struct nvme_tcp_request); ··· 1594 1587 memset(set, 0, sizeof(*set)); 1595 1588 set->ops = &nvme_tcp_mq_ops; 1596 1589 set->queue_depth = nctrl->sqsize + 1; 1597 - set->reserved_tags = 1; /* fabric connect */ 1590 + set->reserved_tags = NVMF_RESERVED_TAGS; 1598 1591 set->numa_node = nctrl->numa_node; 1599 1592 set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; 1600 1593 set->cmd_size = sizeof(struct nvme_tcp_request); ··· 1752 1745 return ret; 1753 1746 1754 1747 ctrl->queue_count = nr_io_queues + 1; 1755 - if (ctrl->queue_count < 2) 1756 - return 0; 1748 + if (ctrl->queue_count < 2) { 1749 + dev_err(ctrl->device, 1750 + "unable to set any I/O queues\n"); 1751 + return -ENOMEM; 1752 + } 1757 1753 1758 1754 dev_info(ctrl->device, 1759 1755 "creating %d I/O queues.\n", nr_io_queues);
+14 -3
drivers/nvme/target/core.c
··· 1118 1118 { 1119 1119 lockdep_assert_held(&ctrl->lock); 1120 1120 1121 - if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1122 - nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 1123 - nvmet_cc_mps(ctrl->cc) != 0 || 1121 + /* 1122 + * Only I/O controllers should verify iosqes,iocqes. 1123 + * Strictly speaking, the spec says a discovery controller 1124 + * should verify iosqes,iocqes are zeroed, however that 1125 + * would break backwards compatibility, so don't enforce it. 1126 + */ 1127 + if (ctrl->subsys->type != NVME_NQN_DISC && 1128 + (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1129 + nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { 1130 + ctrl->csts = NVME_CSTS_CFS; 1131 + return; 1132 + } 1133 + 1134 + if (nvmet_cc_mps(ctrl->cc) != 0 || 1124 1135 nvmet_cc_ams(ctrl->cc) != 0 || 1125 1136 nvmet_cc_css(ctrl->cc) != 0) { 1126 1137 ctrl->csts = NVME_CSTS_CFS;
+2 -2
drivers/nvme/target/loop.c
··· 349 349 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 350 350 ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; 351 351 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; 352 - ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ 352 + ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; 353 353 ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; 354 354 ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + 355 355 NVME_INLINE_SG_CNT * sizeof(struct scatterlist); ··· 520 520 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 521 521 ctrl->tag_set.ops = &nvme_loop_mq_ops; 522 522 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 523 - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 523 + ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; 524 524 ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; 525 525 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 526 526 ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
+1 -1
drivers/nvme/target/tcp.c
··· 1098 1098 cmd->rbytes_done += ret; 1099 1099 } 1100 1100 1101 + nvmet_tcp_unmap_pdu_iovec(cmd); 1101 1102 if (queue->data_digest) { 1102 1103 nvmet_tcp_prep_recv_ddgst(cmd); 1103 1104 return 0; 1104 1105 } 1105 - nvmet_tcp_unmap_pdu_iovec(cmd); 1106 1106 1107 1107 if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && 1108 1108 cmd->rbytes_done == cmd->req.transfer_len) {