Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

block-dma: properly take MMIO path

In commit eadaa8b255f3 ("dma-mapping: introduce new DMA attribute to
indicate MMIO memory"), DMA_ATTR_MMIO attribute was added to describe
MMIO addresses, which require to avoid any memory cache flushing, as
an outcome of the discussion pointed in Link tag below.

In case of PCI_P2PDMA_MAP_THRU_HOST_BRIDGE transfer, blk-mq-dm logic
treated this as regular page and relied on "struct page" DMA flow.
That flow performs CPU cache flushing, which shouldn't be done here,
and doesn't set IOMMU_MMIO flag in DMA-IOMMU case.

As a solution, let's encode peer-to-peer transaction type in NVMe IOD
flags variable and provide it to blk-mq-dma API.

Link: https://lore.kernel.org/all/f912c446-1ae9-4390-9c11-00dce7bf0fd3@arm.com/
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Leon Romanovsky and committed by
Jens Axboe
37f0c7a8 61d43b17

+90 -46
+12 -6
block/blk-mq-dma.c
··· 92 92 static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, 93 93 struct blk_dma_iter *iter, struct phys_vec *vec) 94 94 { 95 + unsigned int attrs = 0; 96 + 97 + if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 98 + attrs |= DMA_ATTR_MMIO; 99 + 95 100 iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len, 96 - rq_dma_dir(req), 0); 101 + rq_dma_dir(req), attrs); 97 102 if (dma_mapping_error(dma_dev, iter->addr)) { 98 103 iter->status = BLK_STS_RESOURCE; 99 104 return false; ··· 113 108 { 114 109 enum dma_data_direction dir = rq_dma_dir(req); 115 110 unsigned int mapped = 0; 111 + unsigned int attrs = 0; 116 112 int error; 117 113 118 114 iter->addr = state->addr; 119 115 iter->len = dma_iova_size(state); 120 116 117 + if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 118 + attrs |= DMA_ATTR_MMIO; 119 + 121 120 do { 122 121 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 123 - vec->len, dir, 0); 122 + vec->len, dir, attrs); 124 123 if (error) 125 124 break; 126 125 mapped += vec->len; ··· 171 162 172 163 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); 173 164 iter->status = BLK_STS_OK; 165 + iter->p2pdma.map = PCI_P2PDMA_MAP_NONE; 174 166 175 167 /* 176 168 * Grab the first segment ASAP because we'll need it to check for P2P ··· 183 173 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, 184 174 phys_to_page(vec.paddr))) { 185 175 case PCI_P2PDMA_MAP_BUS_ADDR: 186 - if (iter->iter.is_integrity) 187 - bio_integrity(req->bio)->bip_flags |= BIP_P2P_DMA; 188 - else 189 - req->cmd_flags |= REQ_P2PDMA; 190 176 return blk_dma_map_bus(iter, &vec); 191 177 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 192 178 /*
+65 -8
drivers/nvme/host/pci.c
··· 260 260 /* single segment dma mapping */ 261 261 IOD_SINGLE_SEGMENT = 1U << 2, 262 262 263 + /* Data payload contains p2p memory */ 264 + IOD_DATA_P2P = 1U << 3, 265 + 266 + /* Metadata contains p2p memory */ 267 + IOD_META_P2P = 1U << 4, 268 + 269 + /* Data payload contains MMIO memory */ 270 + IOD_DATA_MMIO = 1U << 5, 271 + 272 + /* Metadata contains MMIO memory */ 273 + IOD_META_MMIO = 1U << 6, 274 + 263 275 /* Metadata using non-coalesced MPTR */ 264 - IOD_SINGLE_META_SEGMENT = 1U << 5, 276 + IOD_SINGLE_META_SEGMENT = 1U << 7, 265 277 }; 266 278 267 279 struct nvme_dma_vec { ··· 745 733 static void nvme_unmap_metadata(struct request *req) 746 734 { 747 735 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 736 + enum pci_p2pdma_map_type map = PCI_P2PDMA_MAP_NONE; 748 737 enum dma_data_direction dir = rq_dma_dir(req); 749 738 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 750 739 struct device *dma_dev = nvmeq->dev->dev; 751 740 struct nvme_sgl_desc *sge = iod->meta_descriptor; 741 + unsigned int attrs = 0; 752 742 753 743 if (iod->flags & IOD_SINGLE_META_SEGMENT) { 754 744 dma_unmap_page(dma_dev, iod->meta_dma, ··· 759 745 return; 760 746 } 761 747 762 - if (!blk_rq_integrity_dma_unmap(req, dma_dev, &iod->meta_dma_state, 763 - iod->meta_total_len)) { 748 + if (iod->flags & IOD_META_P2P) 749 + map = PCI_P2PDMA_MAP_BUS_ADDR; 750 + else if (iod->flags & IOD_META_MMIO) { 751 + map = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; 752 + attrs |= DMA_ATTR_MMIO; 753 + } 754 + 755 + if (!blk_rq_dma_unmap(req, dma_dev, &iod->meta_dma_state, 756 + iod->meta_total_len, map)) { 764 757 if (nvme_pci_cmd_use_meta_sgl(&iod->cmd)) 765 - nvme_free_sgls(req, sge, &sge[1], 0); 758 + nvme_free_sgls(req, sge, &sge[1], attrs); 766 759 else 767 760 dma_unmap_phys(dma_dev, iod->meta_dma, 768 - iod->meta_total_len, dir, 0); 761 + iod->meta_total_len, dir, attrs); 769 762 } 770 763 771 764 if (iod->meta_descriptor) ··· 782 761 783 762 static void nvme_unmap_data(struct request *req) 784 763 { 764 + enum pci_p2pdma_map_type map = PCI_P2PDMA_MAP_NONE; 785 765 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 786 766 struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 787 767 struct device *dma_dev = nvmeq->dev->dev; 768 + unsigned int attrs = 0; 788 769 789 770 if (iod->flags & IOD_SINGLE_SEGMENT) { 790 771 static_assert(offsetof(union nvme_data_ptr, prp1) == ··· 796 773 return; 797 774 } 798 775 799 - if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) { 776 + if (iod->flags & IOD_DATA_P2P) 777 + map = PCI_P2PDMA_MAP_BUS_ADDR; 778 + else if (iod->flags & IOD_DATA_MMIO) { 779 + map = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; 780 + attrs |= DMA_ATTR_MMIO; 781 + } 782 + 783 + if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len, 784 + map)) { 800 785 if (nvme_pci_cmd_use_sgl(&iod->cmd)) 801 786 nvme_free_sgls(req, iod->descriptors[0], 802 - &iod->cmd.common.dptr.sgl, 0); 787 + &iod->cmd.common.dptr.sgl, attrs); 803 788 else 804 - nvme_free_prps(req, 0); 789 + nvme_free_prps(req, attrs); 805 790 } 806 791 807 792 if (iod->nr_descriptors) ··· 1080 1049 if (!blk_rq_dma_map_iter_start(req, dev->dev, &iod->dma_state, &iter)) 1081 1050 return iter.status; 1082 1051 1052 + switch (iter.p2pdma.map) { 1053 + case PCI_P2PDMA_MAP_BUS_ADDR: 1054 + iod->flags |= IOD_DATA_P2P; 1055 + break; 1056 + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 1057 + iod->flags |= IOD_DATA_MMIO; 1058 + break; 1059 + case PCI_P2PDMA_MAP_NONE: 1060 + break; 1061 + default: 1062 + return BLK_STS_RESOURCE; 1063 + } 1064 + 1083 1065 if (use_sgl == SGL_FORCED || 1084 1066 (use_sgl == SGL_SUPPORTED && 1085 1067 (sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold))) ··· 1114 1070 if (!blk_rq_integrity_dma_map_iter_start(req, dev->dev, 1115 1071 &iod->meta_dma_state, &iter)) 1116 1072 return iter.status; 1073 + 1074 + switch (iter.p2pdma.map) { 1075 + case PCI_P2PDMA_MAP_BUS_ADDR: 1076 + iod->flags |= IOD_META_P2P; 1077 + break; 1078 + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 1079 + iod->flags |= IOD_META_MMIO; 1080 + break; 1081 + case PCI_P2PDMA_MAP_NONE: 1082 + break; 1083 + default: 1084 + return BLK_STS_RESOURCE; 1085 + } 1117 1086 1118 1087 if (blk_rq_dma_map_coalesce(&iod->meta_dma_state)) 1119 1088 entries = 1;
-1
include/linux/bio-integrity.h
··· 13 13 BIP_CHECK_GUARD = 1 << 5, /* guard check */ 14 14 BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ 15 15 BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ 16 - BIP_P2P_DMA = 1 << 8, /* using P2P address */ 17 16 18 17 BIP_MEMPOOL = 1 << 15, /* buffer backed by mempool */ 19 18 };
-14
include/linux/blk-integrity.h
··· 33 33 #ifdef CONFIG_BLK_DEV_INTEGRITY 34 34 int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); 35 35 36 - static inline bool blk_rq_integrity_dma_unmap(struct request *req, 37 - struct device *dma_dev, struct dma_iova_state *state, 38 - size_t mapped_len) 39 - { 40 - return blk_dma_unmap(req, dma_dev, state, mapped_len, 41 - bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA); 42 - } 43 - 44 36 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); 45 37 int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, 46 38 ssize_t bytes); ··· 120 128 struct scatterlist *s) 121 129 { 122 130 return 0; 123 - } 124 - static inline bool blk_rq_integrity_dma_unmap(struct request *req, 125 - struct device *dma_dev, struct dma_iova_state *state, 126 - size_t mapped_len) 127 - { 128 - return false; 129 131 } 130 132 static inline int blk_rq_integrity_map_user(struct request *rq, 131 133 void __user *ubuf,
+13 -15
include/linux/blk-mq-dma.h
··· 16 16 /* Output address range for this iteration */ 17 17 dma_addr_t addr; 18 18 u32 len; 19 + struct pci_p2pdma_map_state p2pdma; 19 20 20 21 /* Status code. Only valid when blk_rq_dma_map_iter_* returned false */ 21 22 blk_status_t status; 22 23 23 24 /* Internal to blk_rq_dma_map_iter_* */ 24 25 struct blk_map_iter iter; 25 - struct pci_p2pdma_map_state p2pdma; 26 26 }; 27 27 28 28 bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, ··· 43 43 } 44 44 45 45 /** 46 - * blk_dma_unmap - try to DMA unmap a request 46 + * blk_rq_dma_unmap - try to DMA unmap a request 47 47 * @req: request to unmap 48 48 * @dma_dev: device to unmap from 49 49 * @state: DMA IOVA state 50 50 * @mapped_len: number of bytes to unmap 51 - * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR 51 + * @map: peer-to-peer mapping type 52 52 * 53 53 * Returns %false if the callers need to manually unmap every DMA segment 54 54 * mapped using @iter or %true if no work is left to be done. 55 55 */ 56 - static inline bool blk_dma_unmap(struct request *req, struct device *dma_dev, 57 - struct dma_iova_state *state, size_t mapped_len, bool is_p2p) 56 + static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, 57 + struct dma_iova_state *state, size_t mapped_len, 58 + enum pci_p2pdma_map_type map) 58 59 { 59 - if (is_p2p) 60 + if (map == PCI_P2PDMA_MAP_BUS_ADDR) 60 61 return true; 61 62 62 63 if (dma_use_iova(state)) { 64 + unsigned int attrs = 0; 65 + 66 + if (map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 67 + attrs |= DMA_ATTR_MMIO; 68 + 63 69 dma_iova_destroy(dma_dev, state, mapped_len, rq_dma_dir(req), 64 - 0); 70 + attrs); 65 71 return true; 66 72 } 67 73 68 74 return !dma_need_unmap(dma_dev); 69 75 } 70 - 71 - static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, 72 - struct dma_iova_state *state, size_t mapped_len) 73 - { 74 - return blk_dma_unmap(req, dma_dev, state, mapped_len, 75 - req->cmd_flags & REQ_P2PDMA); 76 - } 77 - 78 76 #endif /* BLK_MQ_DMA_H */
-2
include/linux/blk_types.h
··· 393 393 __REQ_DRV, /* for driver use */ 394 394 __REQ_FS_PRIVATE, /* for file system (submitter) use */ 395 395 __REQ_ATOMIC, /* for atomic write operations */ 396 - __REQ_P2PDMA, /* contains P2P DMA pages */ 397 396 /* 398 397 * Command specific flags, keep last: 399 398 */ ··· 425 426 #define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) 426 427 #define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) 427 428 #define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC) 428 - #define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA) 429 429 430 430 #define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) 431 431