Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

nvme-pci: convert the data mapping to blk_rq_dma_map

Use the blk_rq_dma_map API to DMA map requests instead of scatterlists.
This removes the need to allocate a scatterlist covering every segment,
and thus the overall transfer length limit based on the scatterlist
allocation.

Instead the DMA mapping is done by iterating the bio_vec chain in the
request directly. The unmap is handled differently depending on how
we mapped:

- when using an IOMMU only a single IOVA is used, and it is stored in
iova_state
- for direct mappings that don't use swiotlb and are cache coherent,
unmap is not needed at all
- for direct mappings that are not cache coherent or use swiotlb, the
physical addresses are rebuild from the PRPs or SGL segments

The latter unfortunately adds a fair amount of code to the driver, but
it is code not used in the fast path.

The conversion only covers the data mapping path, and still uses a
scatterlist for the multi-segment metadata case. I plan to convert that
as soon as we have good test coverage for the multi-segment metadata
path.

Thanks to Chaitanya Kulkarni for an initial attempt at a new DMA API
conversion for nvme-pci, Kanchan Joshi for bringing back the single
segment optimization, Leon Romanovsky for shepherding this through a
gazillion rebases and Nitesh Shetty for various improvements.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20250625113531.522027-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Christoph Hellwig and committed by
Jens Axboe
7ce3c1dd deecd1c4

+246 -151
+246 -151
drivers/nvme/host/pci.c
··· 7 7 #include <linux/acpi.h> 8 8 #include <linux/async.h> 9 9 #include <linux/blkdev.h> 10 - #include <linux/blk-mq.h> 10 + #include <linux/blk-mq-dma.h> 11 11 #include <linux/blk-integrity.h> 12 12 #include <linux/dmi.h> 13 13 #include <linux/init.h> ··· 27 27 #include <linux/io-64-nonatomic-lo-hi.h> 28 28 #include <linux/io-64-nonatomic-hi-lo.h> 29 29 #include <linux/sed-opal.h> 30 - #include <linux/pci-p2pdma.h> 31 30 32 31 #include "trace.h" 33 32 #include "nvme.h" ··· 45 46 #define NVME_MAX_NR_DESCRIPTORS 5 46 47 47 48 /* 48 - * For data SGLs we support a single descriptors worth of SGL entries, but for 49 - * now we also limit it to avoid an allocation larger than PAGE_SIZE for the 50 - * scatterlist. 49 + * For data SGLs we support a single descriptors worth of SGL entries. 50 + * For PRPs, segments don't matter at all. 51 51 */ 52 52 #define NVME_MAX_SEGS \ 53 - min(NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc), \ 54 - (PAGE_SIZE / sizeof(struct scatterlist))) 53 + (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc)) 55 54 56 55 /* 57 56 * For metadata SGLs, only the small descriptor is supported, and the first ··· 159 162 bool hmb; 160 163 struct sg_table *hmb_sgt; 161 164 162 - mempool_t *iod_mempool; 163 165 mempool_t *iod_meta_mempool; 164 166 165 167 /* shadow doorbell buffer support: */ ··· 242 246 IOD_ABORTED = 1U << 0, 243 247 244 248 /* uses the small descriptor pool */ 245 - IOD_SMALL_DESCRIPTOR = 1U << 1, 249 + IOD_SMALL_DESCRIPTOR = 1U << 1, 250 + 251 + /* single segment dma mapping */ 252 + IOD_SINGLE_SEGMENT = 1U << 2, 246 253 }; 247 254 248 255 /* ··· 256 257 struct nvme_command cmd; 257 258 u8 flags; 258 259 u8 nr_descriptors; 259 - unsigned int dma_len; /* length of single DMA segment mapping */ 260 - dma_addr_t first_dma; 260 + 261 + unsigned int total_len; 262 + struct dma_iova_state dma_state; 263 + void *descriptors[NVME_MAX_NR_DESCRIPTORS]; 264 + 261 265 dma_addr_t meta_dma; 262 - struct sg_table sgt; 263 266 struct sg_table meta_sgt; 264 267 struct nvme_sgl_desc *meta_descriptor; 265 - void *descriptors[NVME_MAX_NR_DESCRIPTORS]; 266 268 }; 267 269 268 270 static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) ··· 614 614 static unsigned int nvme_pci_avg_seg_size(struct request *req) 615 615 { 616 616 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 617 + unsigned int nseg; 617 618 618 - return DIV_ROUND_UP(blk_rq_payload_bytes(req), iod->sgt.nents); 619 + if (blk_rq_dma_map_coalesce(&iod->dma_state)) 620 + nseg = 1; 621 + else 622 + nseg = blk_rq_nr_phys_segments(req); 623 + return DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); 619 624 } 620 625 621 626 static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq, ··· 631 626 return nvmeq->descriptor_pools.large; 632 627 } 633 628 629 + static inline bool nvme_pci_cmd_use_sgl(struct nvme_command *cmd) 630 + { 631 + return cmd->common.flags & 632 + (NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG); 633 + } 634 + 635 + static inline dma_addr_t nvme_pci_first_desc_dma_addr(struct nvme_command *cmd) 636 + { 637 + if (nvme_pci_cmd_use_sgl(cmd)) 638 + return le64_to_cpu(cmd->common.dptr.sgl.addr); 639 + return le64_to_cpu(cmd->common.dptr.prp2); 640 + } 641 + 634 642 static void nvme_free_descriptors(struct request *req) 635 643 { 636 644 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 637 645 const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; 638 646 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 639 - dma_addr_t dma_addr = iod->first_dma; 647 + dma_addr_t dma_addr = nvme_pci_first_desc_dma_addr(&iod->cmd); 640 648 int i; 641 649 642 650 if (iod->nr_descriptors == 1) { ··· 668 650 } 669 651 } 670 652 653 + static void nvme_free_prps(struct request *req) 654 + { 655 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 656 + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 657 + struct device *dma_dev = nvmeq->dev->dev; 658 + enum dma_data_direction dir = rq_dma_dir(req); 659 + int length = iod->total_len; 660 + dma_addr_t dma_addr; 661 + int i, desc; 662 + __le64 *prp_list; 663 + u32 dma_len; 664 + 665 + dma_addr = le64_to_cpu(iod->cmd.common.dptr.prp1); 666 + dma_len = min_t(u32, length, 667 + NVME_CTRL_PAGE_SIZE - (dma_addr & (NVME_CTRL_PAGE_SIZE - 1))); 668 + length -= dma_len; 669 + if (!length) { 670 + dma_unmap_page(dma_dev, dma_addr, dma_len, dir); 671 + return; 672 + } 673 + 674 + if (length <= NVME_CTRL_PAGE_SIZE) { 675 + dma_unmap_page(dma_dev, dma_addr, dma_len, dir); 676 + dma_addr = le64_to_cpu(iod->cmd.common.dptr.prp2); 677 + dma_unmap_page(dma_dev, dma_addr, length, dir); 678 + return; 679 + } 680 + 681 + i = 0; 682 + desc = 0; 683 + prp_list = iod->descriptors[desc]; 684 + do { 685 + dma_unmap_page(dma_dev, dma_addr, dma_len, dir); 686 + if (i == NVME_CTRL_PAGE_SIZE >> 3) { 687 + prp_list = iod->descriptors[++desc]; 688 + i = 0; 689 + } 690 + 691 + dma_addr = le64_to_cpu(prp_list[i++]); 692 + dma_len = min(length, NVME_CTRL_PAGE_SIZE); 693 + length -= dma_len; 694 + } while (length); 695 + } 696 + 697 + static void nvme_free_sgls(struct request *req) 698 + { 699 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 700 + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 701 + struct device *dma_dev = nvmeq->dev->dev; 702 + dma_addr_t sqe_dma_addr = le64_to_cpu(iod->cmd.common.dptr.sgl.addr); 703 + unsigned int sqe_dma_len = le32_to_cpu(iod->cmd.common.dptr.sgl.length); 704 + struct nvme_sgl_desc *sg_list = iod->descriptors[0]; 705 + enum dma_data_direction dir = rq_dma_dir(req); 706 + 707 + if (iod->nr_descriptors) { 708 + unsigned int nr_entries = sqe_dma_len / sizeof(*sg_list), i; 709 + 710 + for (i = 0; i < nr_entries; i++) 711 + dma_unmap_page(dma_dev, le64_to_cpu(sg_list[i].addr), 712 + le32_to_cpu(sg_list[i].length), dir); 713 + } else { 714 + dma_unmap_page(dma_dev, sqe_dma_addr, sqe_dma_len, dir); 715 + } 716 + } 717 + 671 718 static void nvme_unmap_data(struct request *req) 672 719 { 673 720 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 674 721 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 722 + struct device *dma_dev = nvmeq->dev->dev; 675 723 676 - if (iod->dma_len) { 677 - dma_unmap_page(nvmeq->dev->dev, iod->first_dma, iod->dma_len, 678 - rq_dma_dir(req)); 724 + if (iod->flags & IOD_SINGLE_SEGMENT) { 725 + static_assert(offsetof(union nvme_data_ptr, prp1) == 726 + offsetof(union nvme_data_ptr, sgl.addr)); 727 + dma_unmap_page(dma_dev, le64_to_cpu(iod->cmd.common.dptr.prp1), 728 + iod->total_len, rq_dma_dir(req)); 679 729 return; 680 730 } 681 731 682 - WARN_ON_ONCE(!iod->sgt.nents); 683 - 684 - dma_unmap_sgtable(nvmeq->dev->dev, &iod->sgt, rq_dma_dir(req), 0); 685 - nvme_free_descriptors(req); 686 - mempool_free(iod->sgt.sgl, nvmeq->dev->iod_mempool); 687 - } 688 - 689 - static void nvme_print_sgl(struct scatterlist *sgl, int nents) 690 - { 691 - int i; 692 - struct scatterlist *sg; 693 - 694 - for_each_sg(sgl, sg, nents, i) { 695 - dma_addr_t phys = sg_phys(sg); 696 - pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d " 697 - "dma_address:%pad dma_length:%d\n", 698 - i, &phys, sg->offset, sg->length, &sg_dma_address(sg), 699 - sg_dma_len(sg)); 732 + if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) { 733 + if (nvme_pci_cmd_use_sgl(&iod->cmd)) 734 + nvme_free_sgls(req); 735 + else 736 + nvme_free_prps(req); 700 737 } 738 + 739 + if (iod->nr_descriptors) 740 + nvme_free_descriptors(req); 701 741 } 702 742 703 - static blk_status_t nvme_pci_setup_prps(struct request *req) 743 + static blk_status_t nvme_pci_setup_data_prp(struct request *req, 744 + struct blk_dma_iter *iter) 704 745 { 705 746 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 706 747 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 707 - int length = blk_rq_payload_bytes(req); 708 - struct scatterlist *sg = iod->sgt.sgl; 709 - int dma_len = sg_dma_len(sg); 710 - u64 dma_addr = sg_dma_address(sg); 711 - int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); 748 + unsigned int length = blk_rq_payload_bytes(req); 749 + dma_addr_t prp1_dma, prp2_dma = 0; 750 + unsigned int prp_len, i; 712 751 __le64 *prp_list; 713 - dma_addr_t prp_dma; 714 - int i; 715 752 716 - length -= (NVME_CTRL_PAGE_SIZE - offset); 717 - if (length <= 0) { 718 - iod->first_dma = 0; 753 + /* 754 + * PRP1 always points to the start of the DMA transfers. 755 + * 756 + * This is the only PRP (except for the list entries) that could be 757 + * non-aligned. 758 + */ 759 + prp1_dma = iter->addr; 760 + prp_len = min(length, NVME_CTRL_PAGE_SIZE - 761 + (iter->addr & (NVME_CTRL_PAGE_SIZE - 1))); 762 + iod->total_len += prp_len; 763 + iter->addr += prp_len; 764 + iter->len -= prp_len; 765 + length -= prp_len; 766 + if (!length) 719 767 goto done; 768 + 769 + if (!iter->len) { 770 + if (!blk_rq_dma_map_iter_next(req, nvmeq->dev->dev, 771 + &iod->dma_state, iter)) { 772 + if (WARN_ON_ONCE(!iter->status)) 773 + goto bad_sgl; 774 + goto done; 775 + } 720 776 } 721 777 722 - dma_len -= (NVME_CTRL_PAGE_SIZE - offset); 723 - if (dma_len) { 724 - dma_addr += (NVME_CTRL_PAGE_SIZE - offset); 725 - } else { 726 - sg = sg_next(sg); 727 - dma_addr = sg_dma_address(sg); 728 - dma_len = sg_dma_len(sg); 729 - } 730 - 778 + /* 779 + * PRP2 is usually a list, but can point to data if all data to be 780 + * transferred fits into PRP1 + PRP2: 781 + */ 731 782 if (length <= NVME_CTRL_PAGE_SIZE) { 732 - iod->first_dma = dma_addr; 783 + prp2_dma = iter->addr; 784 + iod->total_len += length; 733 785 goto done; 734 786 } 735 787 ··· 808 720 iod->flags |= IOD_SMALL_DESCRIPTOR; 809 721 810 722 prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC, 811 - &prp_dma); 812 - if (!prp_list) 813 - return BLK_STS_RESOURCE; 723 + &prp2_dma); 724 + if (!prp_list) { 725 + iter->status = BLK_STS_RESOURCE; 726 + goto done; 727 + } 814 728 iod->descriptors[iod->nr_descriptors++] = prp_list; 815 - iod->first_dma = prp_dma; 729 + 816 730 i = 0; 817 731 for (;;) { 732 + prp_list[i++] = cpu_to_le64(iter->addr); 733 + prp_len = min(length, NVME_CTRL_PAGE_SIZE); 734 + if (WARN_ON_ONCE(iter->len < prp_len)) 735 + goto bad_sgl; 736 + 737 + iod->total_len += prp_len; 738 + iter->addr += prp_len; 739 + iter->len -= prp_len; 740 + length -= prp_len; 741 + if (!length) 742 + break; 743 + 744 + if (iter->len == 0) { 745 + if (!blk_rq_dma_map_iter_next(req, nvmeq->dev->dev, 746 + &iod->dma_state, iter)) { 747 + if (WARN_ON_ONCE(!iter->status)) 748 + goto bad_sgl; 749 + goto done; 750 + } 751 + } 752 + 753 + /* 754 + * If we've filled the entire descriptor, allocate a new that is 755 + * pointed to be the last entry in the previous PRP list. To 756 + * accommodate for that move the last actual entry to the new 757 + * descriptor. 758 + */ 818 759 if (i == NVME_CTRL_PAGE_SIZE >> 3) { 819 760 __le64 *old_prp_list = prp_list; 761 + dma_addr_t prp_list_dma; 820 762 821 763 prp_list = dma_pool_alloc(nvmeq->descriptor_pools.large, 822 - GFP_ATOMIC, &prp_dma); 823 - if (!prp_list) 824 - goto free_prps; 764 + GFP_ATOMIC, &prp_list_dma); 765 + if (!prp_list) { 766 + iter->status = BLK_STS_RESOURCE; 767 + goto done; 768 + } 825 769 iod->descriptors[iod->nr_descriptors++] = prp_list; 770 + 826 771 prp_list[0] = old_prp_list[i - 1]; 827 - old_prp_list[i - 1] = cpu_to_le64(prp_dma); 772 + old_prp_list[i - 1] = cpu_to_le64(prp_list_dma); 828 773 i = 1; 829 774 } 830 - prp_list[i++] = cpu_to_le64(dma_addr); 831 - dma_len -= NVME_CTRL_PAGE_SIZE; 832 - dma_addr += NVME_CTRL_PAGE_SIZE; 833 - length -= NVME_CTRL_PAGE_SIZE; 834 - if (length <= 0) 835 - break; 836 - if (dma_len > 0) 837 - continue; 838 - if (unlikely(dma_len < 0)) 839 - goto bad_sgl; 840 - sg = sg_next(sg); 841 - dma_addr = sg_dma_address(sg); 842 - dma_len = sg_dma_len(sg); 843 775 } 776 + 844 777 done: 845 - iod->cmd.common.dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sgt.sgl)); 846 - iod->cmd.common.dptr.prp2 = cpu_to_le64(iod->first_dma); 847 - return BLK_STS_OK; 848 - free_prps: 849 - nvme_free_descriptors(req); 850 - return BLK_STS_RESOURCE; 778 + /* 779 + * nvme_unmap_data uses the DPT field in the SQE to tear down the 780 + * mapping, so initialize it even for failures. 781 + */ 782 + iod->cmd.common.dptr.prp1 = cpu_to_le64(prp1_dma); 783 + iod->cmd.common.dptr.prp2 = cpu_to_le64(prp2_dma); 784 + if (unlikely(iter->status)) 785 + nvme_unmap_data(req); 786 + return iter->status; 787 + 851 788 bad_sgl: 852 - WARN(DO_ONCE(nvme_print_sgl, iod->sgt.sgl, iod->sgt.nents), 853 - "Invalid SGL for payload:%d nents:%d\n", 854 - blk_rq_payload_bytes(req), iod->sgt.nents); 789 + dev_err_once(nvmeq->dev->dev, 790 + "Incorrectly formed request for payload:%d nents:%d\n", 791 + blk_rq_payload_bytes(req), blk_rq_nr_phys_segments(req)); 855 792 return BLK_STS_IOERR; 856 793 } 857 794 858 795 static void nvme_pci_sgl_set_data(struct nvme_sgl_desc *sge, 859 - struct scatterlist *sg) 796 + struct blk_dma_iter *iter) 860 797 { 861 - sge->addr = cpu_to_le64(sg_dma_address(sg)); 862 - sge->length = cpu_to_le32(sg_dma_len(sg)); 798 + sge->addr = cpu_to_le64(iter->addr); 799 + sge->length = cpu_to_le32(iter->len); 863 800 sge->type = NVME_SGL_FMT_DATA_DESC << 4; 864 801 } 865 802 ··· 896 783 sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; 897 784 } 898 785 899 - static blk_status_t nvme_pci_setup_sgls(struct request *req) 786 + static blk_status_t nvme_pci_setup_data_sgl(struct request *req, 787 + struct blk_dma_iter *iter) 900 788 { 901 789 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 902 790 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 791 + unsigned int entries = blk_rq_nr_phys_segments(req); 903 792 struct nvme_sgl_desc *sg_list; 904 - struct scatterlist *sg = iod->sgt.sgl; 905 - unsigned int entries = iod->sgt.nents; 906 793 dma_addr_t sgl_dma; 907 - int i = 0; 794 + unsigned int mapped = 0; 908 795 909 - /* setting the transfer type as SGL */ 796 + /* set the transfer type as SGL */ 910 797 iod->cmd.common.flags = NVME_CMD_SGL_METABUF; 911 798 912 - if (entries == 1) { 913 - nvme_pci_sgl_set_data(&iod->cmd.common.dptr.sgl, sg); 799 + if (entries == 1 || blk_rq_dma_map_coalesce(&iod->dma_state)) { 800 + nvme_pci_sgl_set_data(&iod->cmd.common.dptr.sgl, iter); 801 + iod->total_len += iter->len; 914 802 return BLK_STS_OK; 915 803 } 916 804 ··· 923 809 if (!sg_list) 924 810 return BLK_STS_RESOURCE; 925 811 iod->descriptors[iod->nr_descriptors++] = sg_list; 926 - iod->first_dma = sgl_dma; 927 812 928 - nvme_pci_sgl_set_seg(&iod->cmd.common.dptr.sgl, sgl_dma, entries); 929 813 do { 930 - nvme_pci_sgl_set_data(&sg_list[i++], sg); 931 - sg = sg_next(sg); 932 - } while (--entries > 0); 814 + if (WARN_ON_ONCE(mapped == entries)) { 815 + iter->status = BLK_STS_IOERR; 816 + break; 817 + } 818 + nvme_pci_sgl_set_data(&sg_list[mapped++], iter); 819 + iod->total_len += iter->len; 820 + } while (blk_rq_dma_map_iter_next(req, nvmeq->dev->dev, &iod->dma_state, 821 + iter)); 933 822 934 - return BLK_STS_OK; 823 + nvme_pci_sgl_set_seg(&iod->cmd.common.dptr.sgl, sgl_dma, mapped); 824 + if (unlikely(iter->status)) 825 + nvme_free_sgls(req); 826 + return iter->status; 935 827 } 936 828 937 829 static blk_status_t nvme_pci_setup_data_simple(struct request *req, ··· 958 838 dma_addr = dma_map_bvec(nvmeq->dev->dev, &bv, rq_dma_dir(req), 0); 959 839 if (dma_mapping_error(nvmeq->dev->dev, dma_addr)) 960 840 return BLK_STS_RESOURCE; 961 - iod->dma_len = bv.bv_len; 841 + iod->total_len = bv.bv_len; 842 + iod->flags |= IOD_SINGLE_SEGMENT; 962 843 963 844 if (use_sgl == SGL_FORCED || !prp_possible) { 964 845 iod->cmd.common.flags = NVME_CMD_SGL_METABUF; ··· 985 864 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 986 865 struct nvme_dev *dev = nvmeq->dev; 987 866 enum nvme_use_sgl use_sgl = nvme_pci_use_sgls(dev, req); 988 - blk_status_t ret = BLK_STS_RESOURCE; 989 - int rc; 867 + struct blk_dma_iter iter; 868 + blk_status_t ret; 990 869 870 + /* 871 + * Try to skip the DMA iterator for single segment requests, as that 872 + * significantly improves performances for small I/O sizes. 873 + */ 991 874 if (blk_rq_nr_phys_segments(req) == 1) { 992 875 ret = nvme_pci_setup_data_simple(req, use_sgl); 993 876 if (ret != BLK_STS_AGAIN) 994 877 return ret; 995 878 } 996 879 997 - iod->dma_len = 0; 998 - iod->sgt.sgl = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); 999 - if (!iod->sgt.sgl) 1000 - return BLK_STS_RESOURCE; 1001 - sg_init_table(iod->sgt.sgl, blk_rq_nr_phys_segments(req)); 1002 - iod->sgt.orig_nents = blk_rq_map_sg(req, iod->sgt.sgl); 1003 - if (!iod->sgt.orig_nents) 1004 - goto out_free_sg; 1005 - 1006 - rc = dma_map_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 1007 - DMA_ATTR_NO_WARN); 1008 - if (rc) { 1009 - if (rc == -EREMOTEIO) 1010 - ret = BLK_STS_TARGET; 1011 - goto out_free_sg; 1012 - } 880 + if (!blk_rq_dma_map_iter_start(req, dev->dev, &iod->dma_state, &iter)) 881 + return iter.status; 1013 882 1014 883 if (use_sgl == SGL_FORCED || 1015 884 (use_sgl == SGL_SUPPORTED && 1016 885 (sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold))) 1017 - ret = nvme_pci_setup_sgls(req); 1018 - else 1019 - ret = nvme_pci_setup_prps(req); 1020 - if (ret != BLK_STS_OK) 1021 - goto out_unmap_sg; 1022 - return BLK_STS_OK; 886 + return nvme_pci_setup_data_sgl(req, &iter); 887 + return nvme_pci_setup_data_prp(req, &iter); 888 + } 1023 889 1024 - out_unmap_sg: 1025 - dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); 1026 - out_free_sg: 1027 - mempool_free(iod->sgt.sgl, dev->iod_mempool); 1028 - return ret; 890 + static void nvme_pci_sgl_set_data_sg(struct nvme_sgl_desc *sge, 891 + struct scatterlist *sg) 892 + { 893 + sge->addr = cpu_to_le64(sg_dma_address(sg)); 894 + sge->length = cpu_to_le32(sg_dma_len(sg)); 895 + sge->type = NVME_SGL_FMT_DATA_DESC << 4; 1029 896 } 1030 897 1031 898 static blk_status_t nvme_pci_setup_meta_sgls(struct request *req) ··· 1056 947 1057 948 sgl = iod->meta_sgt.sgl; 1058 949 if (entries == 1) { 1059 - nvme_pci_sgl_set_data(sg_list, sgl); 950 + nvme_pci_sgl_set_data_sg(sg_list, sgl); 1060 951 return BLK_STS_OK; 1061 952 } 1062 953 1063 954 sgl_dma += sizeof(*sg_list); 1064 955 nvme_pci_sgl_set_seg(sg_list, sgl_dma, entries); 1065 956 for_each_sg(sgl, sg, entries, i) 1066 - nvme_pci_sgl_set_data(&sg_list[i + 1], sg); 957 + nvme_pci_sgl_set_data_sg(&sg_list[i + 1], sg); 1067 958 1068 959 return BLK_STS_OK; 1069 960 ··· 1104 995 1105 996 iod->flags = 0; 1106 997 iod->nr_descriptors = 0; 1107 - iod->sgt.nents = 0; 998 + iod->total_len = 0; 1108 999 iod->meta_sgt.nents = 0; 1109 1000 1110 1001 ret = nvme_setup_cmd(req->q->queuedata, req); ··· 3022 2913 static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) 3023 2914 { 3024 2915 size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1); 3025 - size_t alloc_size = sizeof(struct scatterlist) * NVME_MAX_SEGS; 3026 - 3027 - dev->iod_mempool = mempool_create_node(1, 3028 - mempool_kmalloc, mempool_kfree, 3029 - (void *)alloc_size, GFP_KERNEL, 3030 - dev_to_node(dev->dev)); 3031 - if (!dev->iod_mempool) 3032 - return -ENOMEM; 3033 2916 3034 2917 dev->iod_meta_mempool = mempool_create_node(1, 3035 2918 mempool_kmalloc, mempool_kfree, 3036 2919 (void *)meta_size, GFP_KERNEL, 3037 2920 dev_to_node(dev->dev)); 3038 2921 if (!dev->iod_meta_mempool) 3039 - goto free; 3040 - 2922 + return -ENOMEM; 3041 2923 return 0; 3042 - free: 3043 - mempool_destroy(dev->iod_mempool); 3044 - return -ENOMEM; 3045 2924 } 3046 2925 3047 2926 static void nvme_free_tagset(struct nvme_dev *dev) ··· 3473 3376 nvme_dbbuf_dma_free(dev); 3474 3377 nvme_free_queues(dev, 0); 3475 3378 out_release_iod_mempool: 3476 - mempool_destroy(dev->iod_mempool); 3477 3379 mempool_destroy(dev->iod_meta_mempool); 3478 3380 out_dev_unmap: 3479 3381 nvme_dev_unmap(dev); ··· 3536 3440 nvme_dev_remove_admin(dev); 3537 3441 nvme_dbbuf_dma_free(dev); 3538 3442 nvme_free_queues(dev, 0); 3539 - mempool_destroy(dev->iod_mempool); 3540 3443 mempool_destroy(dev->iod_meta_mempool); 3541 3444 nvme_release_descriptor_pools(dev); 3542 3445 nvme_dev_unmap(dev);