Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

nvme: remove virtual boundary for sgl capable devices

The nvme virtual boundary is only required for the PRP format. Devices
that can use SGL for DMA don't need it for IO queues. Drop reporting it
for such devices; rdma fabrics controllers will continue to use the
limit as they currently don't report any boundary requirements, but tcp
and fc never needed it in the first place so they get to report no
virtual boundary.

Applications may continue to align to the same virtual boundaries for
optimization purposes if they want, and the driver will continue to
decide whether to use the PRP format the same as before if the IO allows
it.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Keith Busch and committed by
Jens Axboe
bc840b21 2f6b2565

+48 -8
+1
drivers/nvme/host/apple.c
··· 1283 1283 .reg_read64 = apple_nvme_reg_read64, 1284 1284 .free_ctrl = apple_nvme_free_ctrl, 1285 1285 .get_address = apple_nvme_get_address, 1286 + .get_virt_boundary = nvme_get_virt_boundary, 1286 1287 }; 1287 1288 1288 1289 static void apple_nvme_async_probe(void *data, async_cookie_t cookie)
+5 -5
drivers/nvme/host/core.c
··· 2069 2069 } 2070 2070 2071 2071 static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl, 2072 - struct queue_limits *lim) 2072 + struct queue_limits *lim, bool is_admin) 2073 2073 { 2074 2074 lim->max_hw_sectors = ctrl->max_hw_sectors; 2075 2075 lim->max_segments = min_t(u32, USHRT_MAX, 2076 2076 min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments)); 2077 2077 lim->max_integrity_segments = ctrl->max_integrity_segments; 2078 - lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1; 2078 + lim->virt_boundary_mask = ctrl->ops->get_virt_boundary(ctrl, is_admin); 2079 2079 lim->max_segment_size = UINT_MAX; 2080 2080 lim->dma_alignment = 3; 2081 2081 } ··· 2177 2177 int ret; 2178 2178 2179 2179 lim = queue_limits_start_update(ns->disk->queue); 2180 - nvme_set_ctrl_limits(ns->ctrl, &lim); 2180 + nvme_set_ctrl_limits(ns->ctrl, &lim, false); 2181 2181 2182 2182 memflags = blk_mq_freeze_queue(ns->disk->queue); 2183 2183 ret = queue_limits_commit_update(ns->disk->queue, &lim); ··· 2381 2381 ns->head->lba_shift = id->lbaf[lbaf].ds; 2382 2382 ns->head->nuse = le64_to_cpu(id->nuse); 2383 2383 capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); 2384 - nvme_set_ctrl_limits(ns->ctrl, &lim); 2384 + nvme_set_ctrl_limits(ns->ctrl, &lim, false); 2385 2385 nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info); 2386 2386 nvme_set_chunk_sectors(ns, id, &lim); 2387 2387 if (!nvme_update_disk_info(ns, id, &lim)) ··· 3588 3588 min_not_zero(ctrl->max_hw_sectors, max_hw_sectors); 3589 3589 3590 3590 lim = queue_limits_start_update(ctrl->admin_q); 3591 - nvme_set_ctrl_limits(ctrl, &lim); 3591 + nvme_set_ctrl_limits(ctrl, &lim, true); 3592 3592 ret = queue_limits_commit_update(ctrl->admin_q, &lim); 3593 3593 if (ret) 3594 3594 goto out_free;
+6
drivers/nvme/host/fabrics.h
··· 217 217 min(opts->nr_poll_queues, num_online_cpus()); 218 218 } 219 219 220 + static inline unsigned long nvmf_get_virt_boundary(struct nvme_ctrl *ctrl, 221 + bool is_admin) 222 + { 223 + return 0; 224 + } 225 + 220 226 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); 221 227 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); 222 228 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
+1
drivers/nvme/host/fc.c
··· 3360 3360 .submit_async_event = nvme_fc_submit_async_event, 3361 3361 .delete_ctrl = nvme_fc_delete_ctrl, 3362 3362 .get_address = nvmf_get_address, 3363 + .get_virt_boundary = nvmf_get_virt_boundary, 3363 3364 }; 3364 3365 3365 3366 static void
+7
drivers/nvme/host/nvme.h
··· 558 558 return head->pi_type && head->ms == head->pi_size; 559 559 } 560 560 561 + static inline unsigned long nvme_get_virt_boundary(struct nvme_ctrl *ctrl, 562 + bool is_admin) 563 + { 564 + return NVME_CTRL_PAGE_SIZE - 1; 565 + } 566 + 561 567 struct nvme_ctrl_ops { 562 568 const char *name; 563 569 struct module *module; ··· 584 578 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 585 579 void (*print_device_info)(struct nvme_ctrl *ctrl); 586 580 bool (*supports_pci_p2pdma)(struct nvme_ctrl *ctrl); 581 + unsigned long (*get_virt_boundary)(struct nvme_ctrl *ctrl, bool is_admin); 587 582 }; 588 583 589 584 /*
+25 -3
drivers/nvme/host/pci.c
··· 613 613 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 614 614 615 615 if (nvmeq->qid && nvme_ctrl_sgl_supported(&dev->ctrl)) { 616 - if (nvme_req(req)->flags & NVME_REQ_USERCMD) 617 - return SGL_FORCED; 618 - if (req->nr_integrity_segments > 1) 616 + /* 617 + * When the controller is capable of using SGL, there are 618 + * several conditions that we force to use it: 619 + * 620 + * 1. A request containing page gaps within the controller's 621 + * mask can not use the PRP format. 622 + * 623 + * 2. User commands use SGL because that lets the device 624 + * validate the requested transfer lengths. 625 + * 626 + * 3. Multiple integrity segments must use SGL as that's the 627 + * only way to describe such a command in NVMe. 628 + */ 629 + if (req_phys_gap_mask(req) & (NVME_CTRL_PAGE_SIZE - 1) || 630 + nvme_req(req)->flags & NVME_REQ_USERCMD || 631 + req->nr_integrity_segments > 1) 619 632 return SGL_FORCED; 620 633 return SGL_SUPPORTED; 621 634 } ··· 3256 3243 return dma_pci_p2pdma_supported(dev->dev); 3257 3244 } 3258 3245 3246 + static unsigned long nvme_pci_get_virt_boundary(struct nvme_ctrl *ctrl, 3247 + bool is_admin) 3248 + { 3249 + if (!nvme_ctrl_sgl_supported(ctrl) || is_admin) 3250 + return NVME_CTRL_PAGE_SIZE - 1; 3251 + return 0; 3252 + } 3253 + 3259 3254 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { 3260 3255 .name = "pcie", 3261 3256 .module = THIS_MODULE, ··· 3278 3257 .get_address = nvme_pci_get_address, 3279 3258 .print_device_info = nvme_pci_print_device_info, 3280 3259 .supports_pci_p2pdma = nvme_pci_supports_pci_p2pdma, 3260 + .get_virt_boundary = nvme_pci_get_virt_boundary, 3281 3261 }; 3282 3262 3283 3263 static int nvme_dev_map(struct nvme_dev *dev)
+1
drivers/nvme/host/rdma.c
··· 2202 2202 .delete_ctrl = nvme_rdma_delete_ctrl, 2203 2203 .get_address = nvmf_get_address, 2204 2204 .stop_ctrl = nvme_rdma_stop_ctrl, 2205 + .get_virt_boundary = nvme_get_virt_boundary, 2205 2206 }; 2206 2207 2207 2208 /*
+1
drivers/nvme/host/tcp.c
··· 2865 2865 .delete_ctrl = nvme_tcp_delete_ctrl, 2866 2866 .get_address = nvme_tcp_get_address, 2867 2867 .stop_ctrl = nvme_tcp_stop_ctrl, 2868 + .get_virt_boundary = nvmf_get_virt_boundary, 2868 2869 }; 2869 2870 2870 2871 static bool
+1
drivers/nvme/target/loop.c
··· 511 511 .submit_async_event = nvme_loop_submit_async_event, 512 512 .delete_ctrl = nvme_loop_delete_ctrl_host, 513 513 .get_address = nvmf_get_address, 514 + .get_virt_boundary = nvme_get_virt_boundary, 514 515 }; 515 516 516 517 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)