Merge tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux

+7

Documentation/userspace-api/dma-buf-heaps.rst

··· 16 16 17 17 - The ``system`` heap allocates virtually contiguous, cacheable, buffers. 18 18 19 + - The ``system_cc_shared`` heap allocates virtually contiguous, cacheable, 20 + buffers using shared (decrypted) memory. It is only present on 21 + confidential computing (CoCo) VMs where memory encryption is active 22 + (e.g., AMD SEV, Intel TDX). The allocated pages have the encryption 23 + bit cleared, making them accessible for device DMA without TDISP 24 + support. On non-CoCo VM configurations, this heap is not registered. 25 + 19 26 - The ``default_cma_region`` heap allocates physically contiguous, 20 27 cacheable, buffers. Only present if a CMA region is present. Such a 21 28 region is usually created either through the kernel commandline

+1

arch/arm64/Kconfig

··· 54 54 select ARCH_HAS_STRICT_MODULE_RWX 55 55 select ARCH_HAS_SYNC_DMA_FOR_DEVICE 56 56 select ARCH_HAS_SYNC_DMA_FOR_CPU 57 + select ARCH_HAS_BATCHED_DMA_SYNC 57 58 select ARCH_HAS_SYSCALL_WRAPPER 58 59 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 59 60 select ARCH_HAS_ZONE_DMA_SET if EXPERT

+19 -6

arch/arm64/include/asm/assembler.h

··· 371 371 * [start, end) with dcache line size explicitly provided. 372 372 * 373 373 * op: operation passed to dc instruction 374 - * domain: domain used in dsb instruction 375 374 * start: starting virtual address of the region 376 375 * end: end virtual address of the region 377 376 * linesz: dcache line size 378 377 * fixup: optional label to branch to on user fault 379 378 * Corrupts: start, end, tmp 380 379 */ 381 - .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup 380 + .macro dcache_by_myline_op_nosync op, start, end, linesz, tmp, fixup 382 381 sub \tmp, \linesz, #1 383 382 bic \start, \start, \tmp 384 383 alternative_if ARM64_WORKAROUND_4311569 ··· 411 412 cbnz \start, .Ldcache_op\@ 412 413 .endif 413 414 alternative_else_nop_endif 414 - dsb \domain 415 415 416 416 _cond_uaccess_extable .Ldcache_op\@, \fixup 417 417 .endm 418 418 419 419 /* 420 420 * Macro to perform a data cache maintenance for the interval 421 - * [start, end) 421 + * [start, end) without waiting for completion 422 + * 423 + * op: operation passed to dc instruction 424 + * start: starting virtual address of the region 425 + * end: end virtual address of the region 426 + * fixup: optional label to branch to on user fault 427 + * Corrupts: start, end, tmp1, tmp2 428 + */ 429 + .macro dcache_by_line_op_nosync op, start, end, tmp1, tmp2, fixup 430 + dcache_line_size \tmp1, \tmp2 431 + dcache_by_myline_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup 432 + .endm 433 + 434 + /* 435 + * Macro to perform a data cache maintenance for the interval 436 + * [start, end) and wait for completion 422 437 * 423 438 * op: operation passed to dc instruction 424 439 * domain: domain used in dsb instruction ··· 442 429 * Corrupts: start, end, tmp1, tmp2 443 430 */ 444 431 .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup 445 - dcache_line_size \tmp1, \tmp2 446 - dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup 432 + dcache_by_line_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup 433 + dsb \domain 447 434 .endm 448 435 449 436 /*

+5

arch/arm64/include/asm/cache.h

··· 87 87 88 88 #define dma_get_cache_alignment cache_line_size 89 89 90 + static inline void arch_sync_dma_flush(void) 91 + { 92 + dsb(sy); 93 + } 94 + 90 95 /* Compress a u64 MPIDR value into 32 bits. */ 91 96 static inline u64 arch_compact_of_hwid(u64 id) 92 97 {

+2

arch/arm64/include/asm/cacheflush.h

··· 74 74 extern void dcache_clean_inval_poc(unsigned long start, unsigned long end); 75 75 extern void dcache_inval_poc(unsigned long start, unsigned long end); 76 76 extern void dcache_clean_poc(unsigned long start, unsigned long end); 77 + extern void dcache_inval_poc_nosync(unsigned long start, unsigned long end); 78 + extern void dcache_clean_poc_nosync(unsigned long start, unsigned long end); 77 79 extern void dcache_clean_pop(unsigned long start, unsigned long end); 78 80 extern void dcache_clean_pou(unsigned long start, unsigned long end); 79 81 extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);

+2 -1

arch/arm64/kernel/relocate_kernel.S

··· 64 64 mov x19, x13 65 65 copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 66 66 add x1, x19, #PAGE_SIZE 67 - dcache_by_myline_op civac, sy, x19, x1, x15, x20 67 + dcache_by_myline_op_nosync civac, x19, x1, x15, x20 68 + dsb sy 68 69 b .Lnext 69 70 .Ltest_indirection: 70 71 tbz x16, IND_INDIRECTION_BIT, .Ltest_destination

+46 -11

arch/arm64/mm/cache.S

··· 132 132 ret 133 133 SYM_FUNC_END(dcache_clean_pou) 134 134 135 - /* 136 - * dcache_inval_poc(start, end) 137 - * 138 - * Ensure that any D-cache lines for the interval [start, end) 139 - * are invalidated. Any partial lines at the ends of the interval are 140 - * also cleaned to PoC to prevent data loss. 141 - * 142 - * - start - kernel start address of region 143 - * - end - kernel end address of region 144 - */ 145 - SYM_FUNC_START(__pi_dcache_inval_poc) 135 + .macro __dcache_inval_poc_nosync 146 136 dcache_line_size x2, x3 147 137 sub x3, x2, #1 148 138 tst x1, x3 // end cache line aligned? ··· 148 158 3: add x0, x0, x2 149 159 cmp x0, x1 150 160 b.lo 2b 161 + .endm 162 + 163 + /* 164 + * dcache_inval_poc(start, end) 165 + * 166 + * Ensure that any D-cache lines for the interval [start, end) 167 + * are invalidated. Any partial lines at the ends of the interval are 168 + * also cleaned to PoC to prevent data loss. 169 + * 170 + * - start - kernel start address of region 171 + * - end - kernel end address of region 172 + */ 173 + SYM_FUNC_START(__pi_dcache_inval_poc) 174 + __dcache_inval_poc_nosync 151 175 dsb sy 152 176 ret 153 177 SYM_FUNC_END(__pi_dcache_inval_poc) 154 178 SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) 179 + 180 + /* 181 + * dcache_inval_poc_nosync(start, end) 182 + * 183 + * Issue the instructions of D-cache lines for the interval [start, end) 184 + * for invalidation. Not necessarily cleaned to PoC till an explicit dsb 185 + * sy is issued later 186 + * 187 + * - start - kernel start address of region 188 + * - end - kernel end address of region 189 + */ 190 + SYM_FUNC_START(__pi_dcache_inval_poc_nosync) 191 + __dcache_inval_poc_nosync 192 + ret 193 + SYM_FUNC_END(__pi_dcache_inval_poc_nosync) 194 + SYM_FUNC_ALIAS(dcache_inval_poc_nosync, __pi_dcache_inval_poc_nosync) 155 195 156 196 /* 157 197 * dcache_clean_poc(start, end) ··· 197 177 ret 198 178 SYM_FUNC_END(__pi_dcache_clean_poc) 199 179 SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) 180 + 181 + /* 182 + * dcache_clean_poc_nosync(start, end) 183 + * 184 + * Issue the instructions of D-cache lines for the interval [start, end). 185 + * not necessarily cleaned to the PoC till an explicit dsb sy afterward. 186 + * 187 + * - start - virtual start address of region 188 + * - end - virtual end address of region 189 + */ 190 + SYM_FUNC_START(__pi_dcache_clean_poc_nosync) 191 + dcache_by_line_op_nosync cvac, x0, x1, x2, x3 192 + ret 193 + SYM_FUNC_END(__pi_dcache_clean_poc_nosync) 194 + SYM_FUNC_ALIAS(dcache_clean_poc_nosync, __pi_dcache_clean_poc_nosync) 200 195 201 196 /* 202 197 * dcache_clean_pop(start, end)

+2 -2

arch/arm64/mm/dma-mapping.c

··· 17 17 { 18 18 unsigned long start = (unsigned long)phys_to_virt(paddr); 19 19 20 - dcache_clean_poc(start, start + size); 20 + dcache_clean_poc_nosync(start, start + size); 21 21 } 22 22 23 23 void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, ··· 28 28 if (dir == DMA_TO_DEVICE) 29 29 return; 30 30 31 - dcache_inval_poc(start, start + size); 31 + dcache_inval_poc_nosync(start, start + size); 32 32 } 33 33 34 34 void arch_dma_prep_coherent(struct page *page, size_t size)

+2 -17

drivers/dma-buf/heaps/cma_heap.c

··· 14 14 15 15 #include <linux/cma.h> 16 16 #include <linux/dma-buf.h> 17 - #include <linux/dma-buf/heaps/cma.h> 18 17 #include <linux/dma-heap.h> 19 18 #include <linux/dma-map-ops.h> 20 19 #include <linux/err.h> ··· 28 29 #include <linux/vmalloc.h> 29 30 30 31 #define DEFAULT_CMA_NAME "default_cma_region" 31 - 32 - static struct cma *dma_areas[MAX_CMA_AREAS] __initdata; 33 - static unsigned int dma_areas_num __initdata; 34 - 35 - int __init dma_heap_cma_register_heap(struct cma *cma) 36 - { 37 - if (dma_areas_num >= ARRAY_SIZE(dma_areas)) 38 - return -EINVAL; 39 - 40 - dma_areas[dma_areas_num++] = cma; 41 - 42 - return 0; 43 - } 44 32 45 33 struct cma_heap { 46 34 struct dma_heap *heap; ··· 397 411 static int __init add_cma_heaps(void) 398 412 { 399 413 struct cma *default_cma = dev_get_cma_area(NULL); 414 + struct cma *cma; 400 415 unsigned int i; 401 416 int ret; 402 417 ··· 407 420 return ret; 408 421 } 409 422 410 - for (i = 0; i < dma_areas_num; i++) { 411 - struct cma *cma = dma_areas[i]; 412 - 423 + for (i = 0; (cma = dma_contiguous_get_area_by_idx(i)) != NULL; i++) { 413 424 ret = __add_cma_heap(cma, cma_get_name(cma)); 414 425 if (ret) { 415 426 pr_warn("Failed to add CMA heap %s", cma_get_name(cma));

+98 -5

drivers/dma-buf/heaps/system_heap.c

··· 10 10 * Andrew F. Davis <afd@ti.com> 11 11 */ 12 12 13 + #include <linux/cc_platform.h> 13 14 #include <linux/dma-buf.h> 14 15 #include <linux/dma-mapping.h> 15 16 #include <linux/dma-heap.h> 16 17 #include <linux/err.h> 17 18 #include <linux/highmem.h> 19 + #include <linux/mem_encrypt.h> 18 20 #include <linux/mm.h> 21 + #include <linux/set_memory.h> 19 22 #include <linux/module.h> 23 + #include <linux/pgtable.h> 20 24 #include <linux/scatterlist.h> 21 25 #include <linux/slab.h> 22 26 #include <linux/vmalloc.h> 27 + 28 + struct system_heap_priv { 29 + bool cc_shared; 30 + }; 23 31 24 32 struct system_heap_buffer { 25 33 struct dma_heap *heap; ··· 37 29 struct sg_table sg_table; 38 30 int vmap_cnt; 39 31 void *vaddr; 32 + bool cc_shared; 40 33 }; 41 34 42 35 struct dma_heap_attachment { ··· 45 36 struct sg_table table; 46 37 struct list_head list; 47 38 bool mapped; 39 + bool cc_shared; 48 40 }; 49 41 50 42 #define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) ··· 61 51 */ 62 52 static const unsigned int orders[] = {8, 4, 0}; 63 53 #define NUM_ORDERS ARRAY_SIZE(orders) 54 + 55 + static int system_heap_set_page_decrypted(struct page *page) 56 + { 57 + unsigned long addr = (unsigned long)page_address(page); 58 + unsigned int nr_pages = 1 << compound_order(page); 59 + int ret; 60 + 61 + ret = set_memory_decrypted(addr, nr_pages); 62 + if (ret) 63 + pr_warn_ratelimited("dma-buf system heap: failed to decrypt page at %p\n", 64 + page_address(page)); 65 + 66 + return ret; 67 + } 68 + 69 + static int system_heap_set_page_encrypted(struct page *page) 70 + { 71 + unsigned long addr = (unsigned long)page_address(page); 72 + unsigned int nr_pages = 1 << compound_order(page); 73 + int ret; 74 + 75 + ret = set_memory_encrypted(addr, nr_pages); 76 + if (ret) 77 + pr_warn_ratelimited("dma-buf system heap: failed to re-encrypt page at %p, leaking memory\n", 78 + page_address(page)); 79 + 80 + return ret; 81 + } 64 82 65 83 static int dup_sg_table(struct sg_table *from, struct sg_table *to) 66 84 { ··· 128 90 a->dev = attachment->dev; 129 91 INIT_LIST_HEAD(&a->list); 130 92 a->mapped = false; 93 + a->cc_shared = buffer->cc_shared; 131 94 132 95 attachment->priv = a; 133 96 ··· 158 119 { 159 120 struct dma_heap_attachment *a = attachment->priv; 160 121 struct sg_table *table = &a->table; 122 + unsigned long attrs; 161 123 int ret; 162 124 163 - ret = dma_map_sgtable(attachment->dev, table, direction, 0); 125 + attrs = a->cc_shared ? DMA_ATTR_CC_SHARED : 0; 126 + ret = dma_map_sgtable(attachment->dev, table, direction, attrs); 164 127 if (ret) 165 128 return ERR_PTR(ret); 166 129 ··· 229 188 unsigned long addr = vma->vm_start; 230 189 unsigned long pgoff = vma->vm_pgoff; 231 190 struct scatterlist *sg; 191 + pgprot_t prot; 232 192 int i, ret; 193 + 194 + prot = vma->vm_page_prot; 195 + if (buffer->cc_shared) 196 + prot = pgprot_decrypted(prot); 233 197 234 198 for_each_sgtable_sg(table, sg, i) { 235 199 unsigned long n = sg->length >> PAGE_SHIFT; ··· 252 206 if (addr + size > vma->vm_end) 253 207 size = vma->vm_end - addr; 254 208 255 - ret = remap_pfn_range(vma, addr, page_to_pfn(page), 256 - size, vma->vm_page_prot); 209 + ret = remap_pfn_range(vma, addr, page_to_pfn(page), size, prot); 257 210 if (ret) 258 211 return ret; 259 212 ··· 270 225 struct page **pages = vmalloc(sizeof(struct page *) * npages); 271 226 struct page **tmp = pages; 272 227 struct sg_page_iter piter; 228 + pgprot_t prot; 273 229 void *vaddr; 274 230 275 231 if (!pages) ··· 281 235 *tmp++ = sg_page_iter_page(&piter); 282 236 } 283 237 284 - vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL); 238 + prot = PAGE_KERNEL; 239 + if (buffer->cc_shared) 240 + prot = pgprot_decrypted(prot); 241 + vaddr = vmap(pages, npages, VM_MAP, prot); 285 242 vfree(pages); 286 243 287 244 if (!vaddr) ··· 345 296 for_each_sgtable_sg(table, sg, i) { 346 297 struct page *page = sg_page(sg); 347 298 299 + /* 300 + * Intentionally leak pages that cannot be re-encrypted 301 + * to prevent shared memory from being reused. 302 + */ 303 + if (buffer->cc_shared && 304 + system_heap_set_page_encrypted(page)) 305 + continue; 306 + 348 307 __free_pages(page, compound_order(page)); 349 308 } 350 309 sg_free_table(table); ··· 404 347 DEFINE_DMA_BUF_EXPORT_INFO(exp_info); 405 348 unsigned long size_remaining = len; 406 349 unsigned int max_order = orders[0]; 350 + struct system_heap_priv *priv = dma_heap_get_drvdata(heap); 351 + bool cc_shared = priv->cc_shared; 407 352 struct dma_buf *dmabuf; 408 353 struct sg_table *table; 409 354 struct scatterlist *sg; ··· 421 362 mutex_init(&buffer->lock); 422 363 buffer->heap = heap; 423 364 buffer->len = len; 365 + buffer->cc_shared = cc_shared; 424 366 425 367 INIT_LIST_HEAD(&pages); 426 368 i = 0; ··· 456 396 list_del(&page->lru); 457 397 } 458 398 399 + if (cc_shared) { 400 + for_each_sgtable_sg(table, sg, i) { 401 + ret = system_heap_set_page_decrypted(sg_page(sg)); 402 + if (ret) 403 + goto free_pages; 404 + } 405 + } 406 + 459 407 /* create the dmabuf */ 460 408 exp_info.exp_name = dma_heap_get_name(heap); 461 409 exp_info.ops = &system_heap_buf_ops; ··· 481 413 for_each_sgtable_sg(table, sg, i) { 482 414 struct page *p = sg_page(sg); 483 415 416 + /* 417 + * Intentionally leak pages that cannot be re-encrypted 418 + * to prevent shared memory from being reused. 419 + */ 420 + if (buffer->cc_shared && 421 + system_heap_set_page_encrypted(p)) 422 + continue; 484 423 __free_pages(p, compound_order(p)); 485 424 } 486 425 sg_free_table(table); ··· 503 428 .allocate = system_heap_allocate, 504 429 }; 505 430 431 + static struct system_heap_priv system_heap_priv = { 432 + .cc_shared = false, 433 + }; 434 + 435 + static struct system_heap_priv system_heap_cc_shared_priv = { 436 + .cc_shared = true, 437 + }; 438 + 506 439 static int __init system_heap_create(void) 507 440 { 508 441 struct dma_heap_export_info exp_info; ··· 518 435 519 436 exp_info.name = "system"; 520 437 exp_info.ops = &system_heap_ops; 521 - exp_info.priv = NULL; 438 + exp_info.priv = &system_heap_priv; 522 439 440 + sys_heap = dma_heap_add(&exp_info); 441 + if (IS_ERR(sys_heap)) 442 + return PTR_ERR(sys_heap); 443 + 444 + if (IS_ENABLED(CONFIG_HIGHMEM) || 445 + !cc_platform_has(CC_ATTR_MEM_ENCRYPT)) 446 + return 0; 447 + 448 + exp_info.name = "system_cc_shared"; 449 + exp_info.priv = &system_heap_cc_shared_priv; 523 450 sys_heap = dma_heap_add(&exp_info); 524 451 if (IS_ERR(sys_heap)) 525 452 return PTR_ERR(sys_heap);

+27 -8

drivers/iommu/dma-iommu.c

··· 1106 1106 return; 1107 1107 1108 1108 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 1109 - if (!dev_is_dma_coherent(dev)) 1109 + if (!dev_is_dma_coherent(dev)) { 1110 1110 arch_sync_dma_for_cpu(phys, size, dir); 1111 + arch_sync_dma_flush(); 1112 + } 1111 1113 1112 1114 swiotlb_sync_single_for_cpu(dev, phys, size, dir); 1113 1115 } ··· 1125 1123 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 1126 1124 swiotlb_sync_single_for_device(dev, phys, size, dir); 1127 1125 1128 - if (!dev_is_dma_coherent(dev)) 1126 + if (!dev_is_dma_coherent(dev)) { 1129 1127 arch_sync_dma_for_device(phys, size, dir); 1128 + arch_sync_dma_flush(); 1129 + } 1130 1130 } 1131 1131 1132 1132 void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, ··· 1137 1133 struct scatterlist *sg; 1138 1134 int i; 1139 1135 1140 - if (sg_dma_is_swiotlb(sgl)) 1136 + if (sg_dma_is_swiotlb(sgl)) { 1141 1137 for_each_sg(sgl, sg, nelems, i) 1142 1138 iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg), 1143 1139 sg->length, dir); 1144 - else if (!dev_is_dma_coherent(dev)) 1140 + } else if (!dev_is_dma_coherent(dev)) { 1145 1141 for_each_sg(sgl, sg, nelems, i) 1146 1142 arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); 1143 + arch_sync_dma_flush(); 1144 + } 1147 1145 } 1148 1146 1149 1147 void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, ··· 1154 1148 struct scatterlist *sg; 1155 1149 int i; 1156 1150 1157 - if (sg_dma_is_swiotlb(sgl)) 1151 + if (sg_dma_is_swiotlb(sgl)) { 1158 1152 for_each_sg(sgl, sg, nelems, i) 1159 1153 iommu_dma_sync_single_for_device(dev, 1160 1154 sg_dma_address(sg), 1161 1155 sg->length, dir); 1162 - else if (!dev_is_dma_coherent(dev)) 1156 + } else if (!dev_is_dma_coherent(dev)) { 1163 1157 for_each_sg(sgl, sg, nelems, i) 1164 1158 arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); 1159 + arch_sync_dma_flush(); 1160 + } 1165 1161 } 1166 1162 1167 1163 static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, ··· 1238 1230 return DMA_MAPPING_ERROR; 1239 1231 } 1240 1232 1241 - if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) 1233 + if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { 1242 1234 arch_sync_dma_for_device(phys, size, dir); 1235 + arch_sync_dma_flush(); 1236 + } 1243 1237 1244 1238 iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); 1245 1239 if (iova == DMA_MAPPING_ERROR && ··· 1264 1254 if (WARN_ON(!phys)) 1265 1255 return; 1266 1256 1267 - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) 1257 + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) { 1268 1258 arch_sync_dma_for_cpu(phys, size, dir); 1259 + arch_sync_dma_flush(); 1260 + } 1269 1261 1270 1262 __iommu_dma_unmap(dev, dma_handle, size); 1271 1263 ··· 2016 2004 dma_addr_t addr = state->addr + offset; 2017 2005 size_t iova_start_pad = iova_offset(iovad, addr); 2018 2006 2007 + if (!dev_is_dma_coherent(dev)) 2008 + arch_sync_dma_flush(); 2019 2009 return iommu_sync_map(domain, addr - iova_start_pad, 2020 2010 iova_align(iovad, size + iova_start_pad)); 2021 2011 } ··· 2031 2017 struct iommu_dma_cookie *cookie = domain->iova_cookie; 2032 2018 struct iova_domain *iovad = &cookie->iovad; 2033 2019 size_t iova_start_pad = iova_offset(iovad, addr); 2020 + bool need_sync_dma = !dev_is_dma_coherent(dev) && 2021 + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)); 2034 2022 dma_addr_t end = addr + size; 2035 2023 2036 2024 do { ··· 2056 2040 addr += len; 2057 2041 iova_start_pad = 0; 2058 2042 } while (addr < end); 2043 + 2044 + if (need_sync_dma) 2045 + arch_sync_dma_flush(); 2059 2046 } 2060 2047 2061 2048 static void __iommu_dma_iova_unlink(struct device *dev,

+10 -9

drivers/memory/tegra/tegra210-emc-table.c

··· 70 70 memunmap(timings); 71 71 } 72 72 73 - static const struct reserved_mem_ops tegra210_emc_table_ops = { 74 - .device_init = tegra210_emc_table_device_init, 75 - .device_release = tegra210_emc_table_device_release, 76 - }; 77 - 78 - static int tegra210_emc_table_init(struct reserved_mem *rmem) 73 + static int tegra210_emc_table_init(unsigned long node, 74 + struct reserved_mem *rmem) 79 75 { 80 76 pr_debug("Tegra210 EMC table at %pa, size %lu bytes\n", &rmem->base, 81 77 (unsigned long)rmem->size); 82 78 83 - rmem->ops = &tegra210_emc_table_ops; 84 - 85 79 return 0; 86 80 } 81 + 82 + static const struct reserved_mem_ops tegra210_emc_table_ops = { 83 + .node_init = tegra210_emc_table_init, 84 + .device_init = tegra210_emc_table_device_init, 85 + .device_release = tegra210_emc_table_device_release, 86 + }; 87 + 87 88 RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table", 88 - tegra210_emc_table_init); 89 + &tegra210_emc_table_ops);

+1 -1

drivers/of/fdt.c

··· 1295 1295 void *fdt = initial_boot_params; 1296 1296 1297 1297 /* Save the statically-placed regions in the reserved_mem array */ 1298 - fdt_scan_reserved_mem_reg_nodes(); 1298 + fdt_scan_reserved_mem_late(); 1299 1299 1300 1300 /* Populate an empty root node when bootloader doesn't provide one */ 1301 1301 if (!fdt) {

+1 -1

drivers/of/of_private.h

··· 186 186 #endif 187 187 188 188 int fdt_scan_reserved_mem(void); 189 - void __init fdt_scan_reserved_mem_reg_nodes(void); 189 + void __init fdt_scan_reserved_mem_late(void); 190 190 191 191 bool of_fdt_device_is_available(const void *blob, unsigned long node); 192 192

+197 -143

drivers/of/of_reserved_mem.c

··· 24 24 #include <linux/slab.h> 25 25 #include <linux/memblock.h> 26 26 #include <linux/kmemleak.h> 27 - #include <linux/cma.h> 28 - #include <linux/dma-map-ops.h> 29 27 30 28 #include "of_private.h" 31 29 ··· 102 104 reserved_mem = new_array; 103 105 } 104 106 105 - static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem); 106 - /* 107 - * fdt_reserved_mem_save_node() - save fdt node for second pass initialization 108 - */ 109 - static void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname, 110 - phys_addr_t base, phys_addr_t size) 111 - { 112 - struct reserved_mem *rmem = &reserved_mem[reserved_mem_count]; 113 - 114 - if (reserved_mem_count == total_reserved_mem_cnt) { 115 - pr_err("not enough space for all defined regions.\n"); 116 - return; 117 - } 118 - 119 - rmem->fdt_node = node; 120 - rmem->name = uname; 121 - rmem->base = base; 122 - rmem->size = size; 123 - 124 - /* Call the region specific initialization function */ 125 - fdt_init_reserved_mem_node(rmem); 126 - 127 - reserved_mem_count++; 128 - } 107 + static void fdt_init_reserved_mem_node(unsigned long node, const char *uname, 108 + phys_addr_t base, phys_addr_t size); 109 + static int fdt_validate_reserved_mem_node(unsigned long node, 110 + phys_addr_t *align); 111 + static int fdt_fixup_reserved_mem_node(unsigned long node, 112 + phys_addr_t base, phys_addr_t size); 129 113 130 114 static int __init early_init_dt_reserve_memory(phys_addr_t base, 131 115 phys_addr_t size, bool nomap) ··· 134 154 const char *uname) 135 155 { 136 156 phys_addr_t base, size; 137 - int i, len; 157 + int i, len, err; 138 158 const __be32 *prop; 139 - bool nomap, default_cma; 159 + bool nomap; 140 160 141 161 prop = of_flat_dt_get_addr_size_prop(node, "reg", &len); 142 162 if (!prop) 143 163 return -ENOENT; 144 164 145 165 nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; 146 - default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); 147 166 148 - if (default_cma && cma_skip_dt_default_reserved_mem()) { 149 - pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n"); 150 - return -EINVAL; 151 - } 167 + err = fdt_validate_reserved_mem_node(node, NULL); 168 + if (err && err != -ENODEV) 169 + return err; 152 170 153 171 for (i = 0; i < len; i++) { 154 172 u64 b, s; ··· 157 179 size = s; 158 180 159 181 if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) { 160 - /* Architecture specific contiguous memory fixup. */ 161 - if (of_flat_dt_is_compatible(node, "shared-dma-pool") && 162 - of_get_flat_dt_prop(node, "reusable", NULL)) 163 - dma_contiguous_early_fixup(base, size); 182 + fdt_fixup_reserved_mem_node(node, base, size); 164 183 pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", 165 184 uname, &base, (unsigned long)(size / SZ_1M)); 166 185 } else { ··· 191 216 return 0; 192 217 } 193 218 194 - static void __init __rmem_check_for_overlap(void); 219 + static int __init __rmem_cmp(const void *a, const void *b) 220 + { 221 + const struct reserved_mem *ra = a, *rb = b; 222 + 223 + if (ra->base < rb->base) 224 + return -1; 225 + 226 + if (ra->base > rb->base) 227 + return 1; 228 + 229 + /* 230 + * Put the dynamic allocations (address == 0, size == 0) before static 231 + * allocations at address 0x0 so that overlap detection works 232 + * correctly. 233 + */ 234 + if (ra->size < rb->size) 235 + return -1; 236 + if (ra->size > rb->size) 237 + return 1; 238 + 239 + return 0; 240 + } 241 + 242 + static void __init __rmem_check_for_overlap(void) 243 + { 244 + int i; 245 + 246 + if (reserved_mem_count < 2) 247 + return; 248 + 249 + sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]), 250 + __rmem_cmp, NULL); 251 + for (i = 0; i < reserved_mem_count - 1; i++) { 252 + struct reserved_mem *this, *next; 253 + 254 + this = &reserved_mem[i]; 255 + next = &reserved_mem[i + 1]; 256 + 257 + if (this->base + this->size > next->base) { 258 + phys_addr_t this_end, next_end; 259 + 260 + this_end = this->base + this->size; 261 + next_end = next->base + next->size; 262 + pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n", 263 + this->name, &this->base, &this_end, 264 + next->name, &next->base, &next_end); 265 + } 266 + } 267 + } 195 268 196 269 /** 197 - * fdt_scan_reserved_mem_reg_nodes() - Store info for the "reg" defined 198 - * reserved memory regions. 270 + * fdt_scan_reserved_mem_late() - Scan FDT and initialize remaining reserved 271 + * memory regions. 199 272 * 200 - * This function is used to scan through the DT and store the 201 - * information for the reserved memory regions that are defined using 202 - * the "reg" property. The region node number, name, base address, and 203 - * size are all stored in the reserved_mem array by calling the 204 - * fdt_reserved_mem_save_node() function. 273 + * This function is used to scan again through the DT and initialize the 274 + * "static" reserved memory regions, that are defined using the "reg" 275 + * property. Each such region is then initialized with its specific init 276 + * function and stored in the global reserved_mem array. 205 277 */ 206 - void __init fdt_scan_reserved_mem_reg_nodes(void) 278 + void __init fdt_scan_reserved_mem_late(void) 207 279 { 208 280 const void *fdt = initial_boot_params; 209 281 phys_addr_t base, size; ··· 275 253 276 254 fdt_for_each_subnode(child, fdt, node) { 277 255 const char *uname; 278 - bool default_cma = of_get_flat_dt_prop(child, "linux,cma-default", NULL); 279 256 u64 b, s; 257 + int ret; 280 258 281 259 if (!of_fdt_device_is_available(fdt, child)) 282 260 continue; 283 - if (default_cma && cma_skip_dt_default_reserved_mem()) 284 - continue; 285 261 286 262 if (!of_flat_dt_get_addr_size(child, "reg", &b, &s)) 263 + continue; 264 + 265 + ret = fdt_validate_reserved_mem_node(child, NULL); 266 + if (ret && ret != -ENODEV) 287 267 continue; 288 268 289 269 base = b; ··· 293 269 294 270 if (size) { 295 271 uname = fdt_get_name(fdt, child, NULL); 296 - fdt_reserved_mem_save_node(child, uname, base, size); 272 + fdt_init_reserved_mem_node(child, uname, base, size); 297 273 } 298 274 } 299 275 ··· 304 280 static int __init __reserved_mem_alloc_size(unsigned long node, const char *uname); 305 281 306 282 /* 307 - * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory 283 + * fdt_scan_reserved_mem() - reserve and allocate memory occupied by 284 + * reserved memory regions. 285 + * 286 + * This function is used to scan through the FDT and mark memory occupied 287 + * by all static (defined by the "reg" property) reserved memory regions. 288 + * Then memory for all dynamic regions (defined by size & alignment) is 289 + * allocated, a region specific init function is called and region information 290 + * is stored in the reserved_mem array. 308 291 */ 309 292 int __init fdt_scan_reserved_mem(void) 310 293 { ··· 428 397 phys_addr_t base = 0, align = 0, size; 429 398 int i, len; 430 399 const __be32 *prop; 431 - bool nomap, default_cma; 400 + bool nomap; 432 401 int ret; 433 402 434 403 prop = of_get_flat_dt_prop(node, "size", &len); ··· 452 421 } 453 422 454 423 nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; 455 - default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); 456 424 457 - if (default_cma && cma_skip_dt_default_reserved_mem()) { 458 - pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n"); 459 - return -EINVAL; 460 - } 461 - 462 - /* Need adjust the alignment to satisfy the CMA requirement */ 463 - if (IS_ENABLED(CONFIG_CMA) 464 - && of_flat_dt_is_compatible(node, "shared-dma-pool") 465 - && of_get_flat_dt_prop(node, "reusable", NULL) 466 - && !nomap) 467 - align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES); 425 + ret = fdt_validate_reserved_mem_node(node, &align); 426 + if (ret && ret != -ENODEV) 427 + return ret; 468 428 469 429 prop = of_flat_dt_get_addr_size_prop(node, "alloc-ranges", &len); 470 430 if (prop) { ··· 490 468 uname, (unsigned long)(size / SZ_1M)); 491 469 return -ENOMEM; 492 470 } 493 - /* Architecture specific contiguous memory fixup. */ 494 - if (of_flat_dt_is_compatible(node, "shared-dma-pool") && 495 - of_get_flat_dt_prop(node, "reusable", NULL)) 496 - dma_contiguous_early_fixup(base, size); 497 - /* Save region in the reserved_mem array */ 498 - fdt_reserved_mem_save_node(node, uname, base, size); 471 + 472 + fdt_fixup_reserved_mem_node(node, base, size); 473 + fdt_init_reserved_mem_node(node, uname, base, size); 474 + 499 475 return 0; 500 476 } 501 477 478 + extern const struct of_device_id __reservedmem_of_table[]; 502 479 static const struct of_device_id __rmem_of_table_sentinel 503 480 __used __section("__reservedmem_of_table_end"); 504 481 505 - /* 506 - * __reserved_mem_init_node() - call region specific reserved memory init code 482 + /** 483 + * fdt_fixup_reserved_mem_node() - call fixup function for a reserved memory node 484 + * @node: FDT node to fixup 485 + * @base: base address of the reserved memory region 486 + * @size: size of the reserved memory region 487 + * 488 + * This function iterates through the reserved memory drivers and calls 489 + * the node_fixup callback for the compatible entry matching the node. 490 + * 491 + * Return: 0 on success, -ENODEV if no compatible match found 507 492 */ 508 - static int __init __reserved_mem_init_node(struct reserved_mem *rmem) 493 + static int __init fdt_fixup_reserved_mem_node(unsigned long node, 494 + phys_addr_t base, phys_addr_t size) 509 495 { 510 - extern const struct of_device_id __reservedmem_of_table[]; 511 496 const struct of_device_id *i; 512 - int ret = -ENOENT; 497 + int ret = -ENODEV; 513 498 514 - for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) { 515 - reservedmem_of_init_fn initfn = i->data; 516 - const char *compat = i->compatible; 499 + for (i = __reservedmem_of_table; ret == -ENODEV && 500 + i < &__rmem_of_table_sentinel; i++) { 501 + const struct reserved_mem_ops *ops = i->data; 517 502 518 - if (!of_flat_dt_is_compatible(rmem->fdt_node, compat)) 503 + if (!of_flat_dt_is_compatible(node, i->compatible)) 519 504 continue; 520 505 521 - ret = initfn(rmem); 506 + if (ops->node_fixup) 507 + ret = ops->node_fixup(node, base, size); 508 + } 509 + return ret; 510 + } 511 + 512 + /** 513 + * fdt_validate_reserved_mem_node() - validate a reserved memory node 514 + * @node: FDT node to validate 515 + * @align: pointer to store the validated alignment (may be modified by callback) 516 + * 517 + * This function iterates through the reserved memory drivers and calls 518 + * the node_validate callback for the compatible entry matching the node. 519 + * 520 + * Return: 0 on success, -ENODEV if no compatible match found 521 + */ 522 + static int __init fdt_validate_reserved_mem_node(unsigned long node, phys_addr_t *align) 523 + { 524 + const struct of_device_id *i; 525 + int ret = -ENODEV; 526 + 527 + for (i = __reservedmem_of_table; ret == -ENODEV && 528 + i < &__rmem_of_table_sentinel; i++) { 529 + const struct reserved_mem_ops *ops = i->data; 530 + 531 + if (!of_flat_dt_is_compatible(node, i->compatible)) 532 + continue; 533 + 534 + if (ops->node_validate) 535 + ret = ops->node_validate(node, align); 536 + } 537 + return ret; 538 + } 539 + 540 + /** 541 + * __reserved_mem_init_node() - initialize a reserved memory region 542 + * @rmem: reserved_mem structure to initialize 543 + * @node: FDT node describing the reserved memory region 544 + * 545 + * This function iterates through the reserved memory drivers and calls the 546 + * node_init callback for the compatible entry matching the node. On success, 547 + * the operations pointer is stored in the reserved_mem structure. 548 + * 549 + * Return: 0 on success, -ENODEV if no compatible match found 550 + */ 551 + static int __init __reserved_mem_init_node(struct reserved_mem *rmem, 552 + unsigned long node) 553 + { 554 + const struct of_device_id *i; 555 + int ret = -ENODEV; 556 + 557 + for (i = __reservedmem_of_table; ret == -ENODEV && 558 + i < &__rmem_of_table_sentinel; i++) { 559 + const struct reserved_mem_ops *ops = i->data; 560 + const char *compat = i->compatible; 561 + 562 + if (!of_flat_dt_is_compatible(node, compat)) 563 + continue; 564 + 565 + ret = ops->node_init(node, rmem); 522 566 if (ret == 0) { 567 + rmem->ops = ops; 523 568 pr_info("initialized node %s, compatible id %s\n", 524 569 rmem->name, compat); 525 - break; 570 + return ret; 526 571 } 527 572 } 528 573 return ret; 529 574 } 530 575 531 - static int __init __rmem_cmp(const void *a, const void *b) 532 - { 533 - const struct reserved_mem *ra = a, *rb = b; 534 - 535 - if (ra->base < rb->base) 536 - return -1; 537 - 538 - if (ra->base > rb->base) 539 - return 1; 540 - 541 - /* 542 - * Put the dynamic allocations (address == 0, size == 0) before static 543 - * allocations at address 0x0 so that overlap detection works 544 - * correctly. 545 - */ 546 - if (ra->size < rb->size) 547 - return -1; 548 - if (ra->size > rb->size) 549 - return 1; 550 - 551 - if (ra->fdt_node < rb->fdt_node) 552 - return -1; 553 - if (ra->fdt_node > rb->fdt_node) 554 - return 1; 555 - 556 - return 0; 557 - } 558 - 559 - static void __init __rmem_check_for_overlap(void) 560 - { 561 - int i; 562 - 563 - if (reserved_mem_count < 2) 564 - return; 565 - 566 - sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]), 567 - __rmem_cmp, NULL); 568 - for (i = 0; i < reserved_mem_count - 1; i++) { 569 - struct reserved_mem *this, *next; 570 - 571 - this = &reserved_mem[i]; 572 - next = &reserved_mem[i + 1]; 573 - 574 - if (this->base + this->size > next->base) { 575 - phys_addr_t this_end, next_end; 576 - 577 - this_end = this->base + this->size; 578 - next_end = next->base + next->size; 579 - pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n", 580 - this->name, &this->base, &this_end, 581 - next->name, &next->base, &next_end); 582 - } 583 - } 584 - } 585 - 586 576 /** 587 577 * fdt_init_reserved_mem_node() - Initialize a reserved memory region 588 - * @rmem: reserved_mem struct of the memory region to be initialized. 578 + * @node: fdt node of the initialized region 579 + * @uname: name of the reserved memory node 580 + * @base: base address of the reserved memory region 581 + * @size: size of the reserved memory region 589 582 * 590 - * This function is used to call the region specific initialization 591 - * function for a reserved memory region. 583 + * This function calls the region-specific initialization function for a 584 + * reserved memory region and saves all region-specific data to the 585 + * reserved_mem array to allow of_reserved_mem_lookup() to find it. 592 586 */ 593 - static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem) 587 + static void __init fdt_init_reserved_mem_node(unsigned long node, const char *uname, 588 + phys_addr_t base, phys_addr_t size) 594 589 { 595 - unsigned long node = rmem->fdt_node; 596 590 int err = 0; 597 591 bool nomap; 598 592 593 + struct reserved_mem *rmem = &reserved_mem[reserved_mem_count]; 594 + 595 + if (reserved_mem_count == total_reserved_mem_cnt) { 596 + pr_err("not enough space for all defined regions.\n"); 597 + return; 598 + } 599 + 600 + rmem->name = uname; 601 + rmem->base = base; 602 + rmem->size = size; 603 + 599 604 nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; 600 605 601 - err = __reserved_mem_init_node(rmem); 602 - if (err != 0 && err != -ENOENT) { 606 + err = __reserved_mem_init_node(rmem, node); 607 + if (err != 0 && err != -ENODEV) { 603 608 pr_info("node %s compatible matching fail\n", rmem->name); 609 + rmem->name = NULL; 610 + 604 611 if (nomap) 605 612 memblock_clear_nomap(rmem->base, rmem->size); 606 613 else 607 614 memblock_phys_free(rmem->base, rmem->size); 615 + return; 608 616 } else { 609 617 phys_addr_t end = rmem->base + rmem->size - 1; 610 618 bool reusable = ··· 646 594 reusable ? "reusable" : "non-reusable", 647 595 rmem->name ? rmem->name : "unknown"); 648 596 } 597 + 598 + reserved_mem_count++; 649 599 } 650 600 651 601 struct rmem_assigned_device {

+16 -8

drivers/xen/swiotlb-xen.c

··· 262 262 263 263 done: 264 264 if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { 265 - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) 265 + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) { 266 266 arch_sync_dma_for_device(phys, size, dir); 267 - else 267 + arch_sync_dma_flush(); 268 + } else { 268 269 xen_dma_sync_for_device(dev, dev_addr, size, dir); 270 + } 269 271 } 270 272 return dev_addr; 271 273 } ··· 289 287 BUG_ON(dir == DMA_NONE); 290 288 291 289 if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { 292 - if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) 290 + if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) { 293 291 arch_sync_dma_for_cpu(paddr, size, dir); 294 - else 292 + arch_sync_dma_flush(); 293 + } else { 295 294 xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir); 295 + } 296 296 } 297 297 298 298 /* NOTE: We use dev_addr here, not paddr! */ ··· 312 308 struct io_tlb_pool *pool; 313 309 314 310 if (!dev_is_dma_coherent(dev)) { 315 - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) 311 + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) { 316 312 arch_sync_dma_for_cpu(paddr, size, dir); 317 - else 313 + arch_sync_dma_flush(); 314 + } else { 318 315 xen_dma_sync_for_cpu(dev, dma_addr, size, dir); 316 + } 319 317 } 320 318 321 319 pool = xen_swiotlb_find_pool(dev, dma_addr); ··· 337 331 __swiotlb_sync_single_for_device(dev, paddr, size, dir, pool); 338 332 339 333 if (!dev_is_dma_coherent(dev)) { 340 - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) 334 + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) { 341 335 arch_sync_dma_for_device(paddr, size, dir); 342 - else 336 + arch_sync_dma_flush(); 337 + } else { 343 338 xen_dma_sync_for_device(dev, dma_addr, size, dir); 339 + } 344 340 } 345 341 } 346 342

-10

include/linux/cma.h

··· 61 61 extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end); 62 62 63 63 extern void cma_reserve_pages_on_error(struct cma *cma); 64 - 65 - #ifdef CONFIG_DMA_CMA 66 - extern bool cma_skip_dt_default_reserved_mem(void); 67 - #else 68 - static inline bool cma_skip_dt_default_reserved_mem(void) 69 - { 70 - return false; 71 - } 72 - #endif 73 - 74 64 #endif

-16

include/linux/dma-buf/heaps/cma.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef DMA_BUF_HEAP_CMA_H_ 3 - #define DMA_BUF_HEAP_CMA_H_ 4 - 5 - struct cma; 6 - 7 - #ifdef CONFIG_DMABUF_HEAPS_CMA 8 - int dma_heap_cma_register_heap(struct cma *cma); 9 - #else 10 - static inline int dma_heap_cma_register_heap(struct cma *cma) 11 - { 12 - return 0; 13 - } 14 - #endif // CONFIG_DMABUF_HEAPS_CMA 15 - 16 - #endif // DMA_BUF_HEAP_CMA_H_

+12 -11

include/linux/dma-map-ops.h

··· 91 91 #endif /* CONFIG_ARCH_HAS_DMA_OPS */ 92 92 93 93 #ifdef CONFIG_DMA_CMA 94 - extern struct cma *dma_contiguous_default_area; 95 - 96 - static inline struct cma *dev_get_cma_area(struct device *dev) 97 - { 98 - if (dev && dev->cma_area) 99 - return dev->cma_area; 100 - return dma_contiguous_default_area; 101 - } 94 + struct cma *dev_get_cma_area(struct device *dev); 95 + struct cma *dma_contiguous_get_area_by_idx(unsigned int idx); 102 96 103 97 void dma_contiguous_reserve(phys_addr_t addr_limit); 104 98 int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, ··· 108 114 void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); 109 115 #else /* CONFIG_DMA_CMA */ 110 116 static inline struct cma *dev_get_cma_area(struct device *dev) 117 + { 118 + return NULL; 119 + } 120 + static inline struct cma *dma_contiguous_get_area_by_idx(unsigned int idx) 111 121 { 112 122 return NULL; 113 123 } ··· 144 146 size_t size) 145 147 { 146 148 __free_pages(page, get_order(size)); 147 - } 148 - static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) 149 - { 150 149 } 151 150 #endif /* CONFIG_DMA_CMA*/ 152 151 ··· 355 360 { 356 361 } 357 362 #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ 363 + 364 + #ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC 365 + static inline void arch_sync_dma_flush(void) 366 + { 367 + } 368 + #endif 358 369 359 370 #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL 360 371 void arch_sync_dma_for_cpu_all(void);

+11 -1

include/linux/dma-mapping.h

··· 9 9 #include <linux/bug.h> 10 10 #include <linux/cache.h> 11 11 12 - /** 12 + /* 13 13 * List of possible attributes associated with a DMA mapping. The semantics 14 14 * of each attribute should be defined in Documentation/core-api/dma-attributes.rst. 15 15 */ ··· 92 92 * flushing. 93 93 */ 94 94 #define DMA_ATTR_REQUIRE_COHERENT (1UL << 12) 95 + /* 96 + * DMA_ATTR_CC_SHARED: Indicates the DMA mapping is shared (decrypted) for 97 + * confidential computing guests. For normal system memory the caller must have 98 + * called set_memory_decrypted(), and pgprot_decrypted must be used when 99 + * creating CPU PTEs for the mapping. The same shared semantic may be passed 100 + * to the vIOMMU when it sets up the IOPTE. For MMIO use together with 101 + * DMA_ATTR_MMIO to indicate shared MMIO. Unless DMA_ATTR_MMIO is provided 102 + * a struct page is required. 103 + */ 104 + #define DMA_ATTR_CC_SHARED (1UL << 13) 95 105 96 106 /* 97 107 * A dma_addr_t can hold any valid DMA or bus address for the platform. It can

+9 -7

include/linux/of_reserved_mem.h

··· 11 11 12 12 struct reserved_mem { 13 13 const char *name; 14 - unsigned long fdt_node; 15 14 const struct reserved_mem_ops *ops; 16 15 phys_addr_t base; 17 16 phys_addr_t size; ··· 18 19 }; 19 20 20 21 struct reserved_mem_ops { 22 + int (*node_validate)(unsigned long fdt_node, phys_addr_t *align); 23 + int (*node_fixup)(unsigned long fdt_node, phys_addr_t base, 24 + phys_addr_t size); 25 + int (*node_init)(unsigned long fdt_node, struct reserved_mem *rmem); 21 26 int (*device_init)(struct reserved_mem *rmem, 22 27 struct device *dev); 23 28 void (*device_release)(struct reserved_mem *rmem, 24 29 struct device *dev); 25 30 }; 26 31 27 - typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem); 28 - 29 32 #ifdef CONFIG_OF_RESERVED_MEM 30 33 31 - #define RESERVEDMEM_OF_DECLARE(name, compat, init) \ 32 - _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn) 34 + #define RESERVEDMEM_OF_DECLARE(name, compat, ops) \ 35 + _OF_DECLARE(reservedmem, name, compat, ops, struct reserved_mem_ops *) 33 36 34 37 int of_reserved_mem_device_init_by_idx(struct device *dev, 35 38 struct device_node *np, int idx); ··· 49 48 50 49 #else 51 50 52 - #define RESERVEDMEM_OF_DECLARE(name, compat, init) \ 53 - _OF_DECLARE_STUB(reservedmem, name, compat, init, reservedmem_of_init_fn) 51 + #define RESERVEDMEM_OF_DECLARE(name, compat, ops) \ 52 + _OF_DECLARE_STUB(reservedmem, name, compat, ops, \ 53 + struct reserved_mem_ops *) 54 54 55 55 static inline int of_reserved_mem_device_init_by_idx(struct device *dev, 56 56 struct device_node *np, int idx)

+2 -1

include/trace/events/dma.h

··· 34 34 { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \ 35 35 { DMA_ATTR_MMIO, "MMIO" }, \ 36 36 { DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \ 37 - { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }) 37 + { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }, \ 38 + { DMA_ATTR_CC_SHARED, "CC_SHARED" }) 38 39 39 40 DECLARE_EVENT_CLASS(dma_map, 40 41 TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,

+11 -2

include/uapi/linux/map_benchmark.h

··· 17 17 #define DMA_MAP_TO_DEVICE 1 18 18 #define DMA_MAP_FROM_DEVICE 2 19 19 20 + enum { 21 + DMA_MAP_BENCH_SINGLE_MODE, 22 + DMA_MAP_BENCH_SG_MODE, 23 + DMA_MAP_BENCH_MODE_MAX 24 + }; 25 + 20 26 struct map_benchmark { 21 27 __u64 avg_map_100ns; /* average map latency in 100ns */ 22 28 __u64 map_stddev; /* standard deviation of map latency */ ··· 34 28 __u32 dma_bits; /* DMA addressing capability */ 35 29 __u32 dma_dir; /* DMA data direction */ 36 30 __u32 dma_trans_ns; /* time for DMA transmission in ns */ 37 - __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ 38 - __u8 expansion[76]; /* For future use */ 31 + __u32 granule; /* - SINGLE_MODE: number of pages mapped/unmapped per operation 32 + * - SG_MODE: number of scatterlist entries (each maps one page) 33 + */ 34 + __u8 map_mode; /* the mode of dma map */ 35 + __u8 expansion[75]; /* For future use */ 39 36 }; 40 37 41 38 #endif /* _UAPI_DMA_BENCHMARK_H */

+3

kernel/dma/Kconfig

··· 72 72 config ARCH_HAS_FORCE_DMA_UNENCRYPTED 73 73 bool 74 74 75 + config ARCH_HAS_BATCHED_DMA_SYNC 76 + bool 77 + 75 78 # 76 79 # Select this option if the architecture assumes DMA devices are coherent 77 80 # by default.

+9 -10

kernel/dma/coherent.c

··· 362 362 dev->dma_mem = NULL; 363 363 } 364 364 365 - static const struct reserved_mem_ops rmem_dma_ops = { 366 - .device_init = rmem_dma_device_init, 367 - .device_release = rmem_dma_device_release, 368 - }; 369 365 370 - static int __init rmem_dma_setup(struct reserved_mem *rmem) 366 + static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem) 371 367 { 372 - unsigned long node = rmem->fdt_node; 373 - 374 368 if (of_get_flat_dt_prop(node, "reusable", NULL)) 375 - return -EINVAL; 369 + return -ENODEV; 376 370 377 371 #ifdef CONFIG_ARM 378 372 if (!of_get_flat_dt_prop(node, "no-map", NULL)) { ··· 384 390 } 385 391 #endif 386 392 387 - rmem->ops = &rmem_dma_ops; 388 393 pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n", 389 394 &rmem->base, (unsigned long)rmem->size / SZ_1M); 390 395 return 0; ··· 400 407 core_initcall(dma_init_reserved_memory); 401 408 #endif /* CONFIG_DMA_GLOBAL_POOL */ 402 409 403 - RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup); 410 + static const struct reserved_mem_ops rmem_dma_ops = { 411 + .node_init = rmem_dma_setup, 412 + .device_init = rmem_dma_device_init, 413 + .device_release = rmem_dma_device_release, 414 + }; 415 + 416 + RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", &rmem_dma_ops); 404 417 #endif

+117 -31

kernel/dma/contiguous.c

··· 42 42 #include <linux/memblock.h> 43 43 #include <linux/err.h> 44 44 #include <linux/sizes.h> 45 - #include <linux/dma-buf/heaps/cma.h> 46 45 #include <linux/dma-map-ops.h> 47 46 #include <linux/cma.h> 48 47 #include <linux/nospec.h> ··· 52 53 #define CMA_SIZE_MBYTES 0 53 54 #endif 54 55 55 - struct cma *dma_contiguous_default_area; 56 + static struct cma *dma_contiguous_areas[MAX_CMA_AREAS]; 57 + static unsigned int dma_contiguous_areas_num; 58 + 59 + static int dma_contiguous_insert_area(struct cma *cma) 60 + { 61 + if (dma_contiguous_areas_num >= ARRAY_SIZE(dma_contiguous_areas)) 62 + return -EINVAL; 63 + 64 + dma_contiguous_areas[dma_contiguous_areas_num++] = cma; 65 + 66 + return 0; 67 + } 68 + 69 + /** 70 + * dma_contiguous_get_area_by_idx() - Get contiguous area at given index 71 + * @idx: index of the area we query 72 + * 73 + * Queries for the contiguous area located at index @idx. 74 + * 75 + * Returns: 76 + * A pointer to the requested contiguous area, or NULL otherwise. 77 + */ 78 + struct cma *dma_contiguous_get_area_by_idx(unsigned int idx) 79 + { 80 + if (idx >= dma_contiguous_areas_num) 81 + return NULL; 82 + 83 + return dma_contiguous_areas[idx]; 84 + } 85 + EXPORT_SYMBOL_GPL(dma_contiguous_get_area_by_idx); 86 + 87 + static struct cma *dma_contiguous_default_area; 56 88 57 89 /* 58 90 * Default global CMA area size can be defined in kernel's .config. ··· 121 91 } 122 92 early_param("cma", early_cma); 123 93 124 - /* 125 - * cma_skip_dt_default_reserved_mem - This is called from the 126 - * reserved_mem framework to detect if the default cma region is being 127 - * set by the "cma=" kernel parameter. 128 - */ 129 - bool __init cma_skip_dt_default_reserved_mem(void) 94 + struct cma *dev_get_cma_area(struct device *dev) 130 95 { 131 - return size_cmdline != -1; 96 + if (dev && dev->cma_area) 97 + return dev->cma_area; 98 + 99 + return dma_contiguous_default_area; 132 100 } 101 + EXPORT_SYMBOL_GPL(dev_get_cma_area); 133 102 134 103 #ifdef CONFIG_DMA_NUMA_CMA 135 104 ··· 293 264 if (ret) 294 265 return; 295 266 296 - ret = dma_heap_cma_register_heap(dma_contiguous_default_area); 267 + /* 268 + * We need to insert the new area in our list to avoid 269 + * any inconsistencies between having the default area 270 + * listed in the DT or not. 271 + * 272 + * The DT case is handled by rmem_cma_setup() and will 273 + * always insert all its areas in our list. However, if 274 + * it didn't run (because OF_RESERVED_MEM isn't set, or 275 + * there's no DT region specified), then we don't have a 276 + * default area yet, and no area in our list. 277 + * 278 + * This block creates the default area in such a case, 279 + * but we also need to insert it in our list to avoid 280 + * having a default area but an empty list. 281 + */ 282 + ret = dma_contiguous_insert_area(dma_contiguous_default_area); 297 283 if (ret) 298 - pr_warn("Couldn't register default CMA heap."); 284 + pr_warn("Couldn't queue default CMA region for heap creation."); 299 285 } 300 286 } 301 287 ··· 514 470 dev->cma_area = NULL; 515 471 } 516 472 517 - static const struct reserved_mem_ops rmem_cma_ops = { 518 - .device_init = rmem_cma_device_init, 519 - .device_release = rmem_cma_device_release, 520 - }; 521 - 522 - static int __init rmem_cma_setup(struct reserved_mem *rmem) 473 + static int __init __rmem_cma_verify_node(unsigned long node) 523 474 { 524 - unsigned long node = rmem->fdt_node; 525 - bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); 526 - struct cma *cma; 527 - int err; 528 - 529 475 if (!of_get_flat_dt_prop(node, "reusable", NULL) || 530 476 of_get_flat_dt_prop(node, "no-map", NULL)) 531 - return -EINVAL; 477 + return -ENODEV; 478 + 479 + if (size_cmdline != -1 && 480 + of_get_flat_dt_prop(node, "linux,cma-default", NULL)) { 481 + pr_err("Skipping dt linux,cma-default node in favor for \"cma=\" kernel param.\n"); 482 + return -EBUSY; 483 + } 484 + return 0; 485 + } 486 + 487 + static int __init rmem_cma_validate(unsigned long node, phys_addr_t *align) 488 + { 489 + int ret = __rmem_cma_verify_node(node); 490 + 491 + if (ret) 492 + return ret; 493 + 494 + if (align) 495 + *align = max_t(phys_addr_t, *align, CMA_MIN_ALIGNMENT_BYTES); 496 + 497 + return 0; 498 + } 499 + 500 + static int __init rmem_cma_fixup(unsigned long node, phys_addr_t base, 501 + phys_addr_t size) 502 + { 503 + int ret = __rmem_cma_verify_node(node); 504 + 505 + if (ret) 506 + return ret; 507 + 508 + /* Architecture specific contiguous memory fixup. */ 509 + dma_contiguous_early_fixup(base, size); 510 + return 0; 511 + } 512 + 513 + static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem) 514 + { 515 + bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); 516 + struct cma *cma; 517 + int ret; 518 + 519 + ret = __rmem_cma_verify_node(node); 520 + if (ret) 521 + return ret; 532 522 533 523 if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) { 534 524 pr_err("Reserved memory: incorrect alignment of CMA region\n"); 535 525 return -EINVAL; 536 526 } 537 527 538 - err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma); 539 - if (err) { 528 + ret = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma); 529 + if (ret) { 540 530 pr_err("Reserved memory: unable to setup CMA region\n"); 541 - return err; 531 + return ret; 542 532 } 543 533 544 534 if (default_cma) 545 535 dma_contiguous_default_area = cma; 546 536 547 - rmem->ops = &rmem_cma_ops; 548 537 rmem->priv = cma; 549 538 550 539 pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n", 551 540 &rmem->base, (unsigned long)rmem->size / SZ_1M); 552 541 553 - err = dma_heap_cma_register_heap(cma); 554 - if (err) 555 - pr_warn("Couldn't register CMA heap."); 542 + ret = dma_contiguous_insert_area(cma); 543 + if (ret) 544 + pr_warn("Couldn't store CMA reserved area."); 556 545 557 546 return 0; 558 547 } 559 - RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup); 548 + 549 + static const struct reserved_mem_ops rmem_cma_ops = { 550 + .node_validate = rmem_cma_validate, 551 + .node_fixup = rmem_cma_fixup, 552 + .node_init = rmem_cma_setup, 553 + .device_init = rmem_cma_device_init, 554 + .device_release = rmem_cma_device_release, 555 + }; 556 + 557 + RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", &rmem_cma_ops); 560 558 #endif

+18 -5

kernel/dma/direct.c

··· 406 406 arch_sync_dma_for_device(paddr, sg->length, 407 407 dir); 408 408 } 409 + if (!dev_is_dma_coherent(dev)) 410 + arch_sync_dma_flush(); 409 411 } 410 412 #endif 411 413 ··· 429 427 swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); 430 428 } 431 429 432 - if (!dev_is_dma_coherent(dev)) 430 + if (!dev_is_dma_coherent(dev)) { 431 + arch_sync_dma_flush(); 433 432 arch_sync_dma_for_cpu_all(); 433 + } 434 434 } 435 435 436 436 /* ··· 444 440 { 445 441 struct scatterlist *sg; 446 442 int i; 443 + bool need_sync = false; 447 444 448 445 for_each_sg(sgl, sg, nents, i) { 449 - if (sg_dma_is_bus_address(sg)) 446 + if (sg_dma_is_bus_address(sg)) { 450 447 sg_dma_unmark_bus_address(sg); 451 - else 448 + } else { 449 + need_sync = true; 452 450 dma_direct_unmap_phys(dev, sg->dma_address, 453 - sg_dma_len(sg), dir, attrs); 451 + sg_dma_len(sg), dir, attrs, false); 452 + } 454 453 } 454 + if (need_sync && !dev_is_dma_coherent(dev)) 455 + arch_sync_dma_flush(); 455 456 } 456 457 #endif 457 458 ··· 466 457 struct pci_p2pdma_map_state p2pdma_state = {}; 467 458 struct scatterlist *sg; 468 459 int i, ret; 460 + bool need_sync = false; 469 461 470 462 for_each_sg(sgl, sg, nents, i) { 471 463 switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) { ··· 478 468 */ 479 469 break; 480 470 case PCI_P2PDMA_MAP_NONE: 471 + need_sync = true; 481 472 sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg), 482 - sg->length, dir, attrs); 473 + sg->length, dir, attrs, false); 483 474 if (sg->dma_address == DMA_MAPPING_ERROR) { 484 475 ret = -EIO; 485 476 goto out_unmap; ··· 499 488 sg_dma_len(sg) = sg->length; 500 489 } 501 490 491 + if (need_sync && !dev_is_dma_coherent(dev)) 492 + arch_sync_dma_flush(); 502 493 return nents; 503 494 504 495 out_unmap:

+26 -9

kernel/dma/direct.h

··· 60 60 61 61 swiotlb_sync_single_for_device(dev, paddr, size, dir); 62 62 63 - if (!dev_is_dma_coherent(dev)) 63 + if (!dev_is_dma_coherent(dev)) { 64 64 arch_sync_dma_for_device(paddr, size, dir); 65 + arch_sync_dma_flush(); 66 + } 65 67 } 66 68 67 69 static inline void dma_direct_sync_single_for_cpu(struct device *dev, 68 - dma_addr_t addr, size_t size, enum dma_data_direction dir) 70 + dma_addr_t addr, size_t size, enum dma_data_direction dir, 71 + bool flush) 69 72 { 70 73 phys_addr_t paddr = dma_to_phys(dev, addr); 71 74 72 75 if (!dev_is_dma_coherent(dev)) { 73 76 arch_sync_dma_for_cpu(paddr, size, dir); 77 + if (flush) 78 + arch_sync_dma_flush(); 74 79 arch_sync_dma_for_cpu_all(); 75 80 } 76 81 ··· 84 79 85 80 static inline dma_addr_t dma_direct_map_phys(struct device *dev, 86 81 phys_addr_t phys, size_t size, enum dma_data_direction dir, 87 - unsigned long attrs) 82 + unsigned long attrs, bool flush) 88 83 { 89 84 dma_addr_t dma_addr; 90 85 91 86 if (is_swiotlb_force_bounce(dev)) { 92 - if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) 93 - return DMA_MAPPING_ERROR; 87 + if (!(attrs & DMA_ATTR_CC_SHARED)) { 88 + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) 89 + return DMA_MAPPING_ERROR; 94 90 95 - return swiotlb_map(dev, phys, size, dir, attrs); 91 + return swiotlb_map(dev, phys, size, dir, attrs); 92 + } 93 + } else if (attrs & DMA_ATTR_CC_SHARED) { 94 + return DMA_MAPPING_ERROR; 96 95 } 97 96 98 97 if (attrs & DMA_ATTR_MMIO) { 99 98 dma_addr = phys; 99 + if (unlikely(!dma_capable(dev, dma_addr, size, false))) 100 + goto err_overflow; 101 + } else if (attrs & DMA_ATTR_CC_SHARED) { 102 + dma_addr = phys_to_dma_unencrypted(dev, phys); 100 103 if (unlikely(!dma_capable(dev, dma_addr, size, false))) 101 104 goto err_overflow; 102 105 } else { ··· 120 107 } 121 108 122 109 if (!dev_is_dma_coherent(dev) && 123 - !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) 110 + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { 124 111 arch_sync_dma_for_device(phys, size, dir); 112 + if (flush) 113 + arch_sync_dma_flush(); 114 + } 125 115 return dma_addr; 126 116 127 117 err_overflow: ··· 136 120 } 137 121 138 122 static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, 139 - size_t size, enum dma_data_direction dir, unsigned long attrs) 123 + size_t size, enum dma_data_direction dir, unsigned long attrs, 124 + bool flush) 140 125 { 141 126 phys_addr_t phys; 142 127 ··· 147 130 148 131 phys = dma_to_phys(dev, addr); 149 132 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 150 - dma_direct_sync_single_for_cpu(dev, addr, size, dir); 133 + dma_direct_sync_single_for_cpu(dev, addr, size, dir, flush); 151 134 152 135 swiotlb_tbl_unmap_single(dev, phys, size, dir, 153 136 attrs | DMA_ATTR_SKIP_CPU_SYNC);

+225 -25

kernel/dma/map_benchmark.c

··· 5 5 6 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 7 8 + #include <linux/cleanup.h> 8 9 #include <linux/debugfs.h> 9 10 #include <linux/delay.h> 10 11 #include <linux/device.h> ··· 16 15 #include <linux/module.h> 17 16 #include <linux/pci.h> 18 17 #include <linux/platform_device.h> 18 + #include <linux/scatterlist.h> 19 19 #include <linux/slab.h> 20 20 #include <linux/timekeeping.h> 21 21 #include <uapi/linux/map_benchmark.h> ··· 33 31 atomic64_t loops; 34 32 }; 35 33 36 - static int map_benchmark_thread(void *data) 34 + struct map_benchmark_ops { 35 + void *(*prepare)(struct map_benchmark_data *map); 36 + void (*unprepare)(void *mparam); 37 + void (*initialize_data)(void *mparam); 38 + int (*do_map)(void *mparam); 39 + void (*do_unmap)(void *mparam); 40 + }; 41 + 42 + struct dma_single_map_param { 43 + struct device *dev; 44 + dma_addr_t addr; 45 + void *xbuf; 46 + u32 npages; 47 + u32 dma_dir; 48 + }; 49 + 50 + static void *dma_single_map_benchmark_prepare(struct map_benchmark_data *map) 37 51 { 38 - void *buf; 39 - dma_addr_t dma_addr; 40 - struct map_benchmark_data *map = data; 41 - int npages = map->bparam.granule; 42 - u64 size = npages * PAGE_SIZE; 52 + struct dma_single_map_param *params __free(kfree) = kzalloc(sizeof(*params), 53 + GFP_KERNEL); 54 + if (!params) 55 + return NULL; 56 + 57 + params->npages = map->bparam.granule; 58 + params->dma_dir = map->bparam.dma_dir; 59 + params->dev = map->dev; 60 + params->xbuf = alloc_pages_exact(params->npages * PAGE_SIZE, GFP_KERNEL); 61 + if (!params->xbuf) 62 + return NULL; 63 + 64 + return_ptr(params); 65 + } 66 + 67 + static void dma_single_map_benchmark_unprepare(void *mparam) 68 + { 69 + struct dma_single_map_param *params = mparam; 70 + 71 + free_pages_exact(params->xbuf, params->npages * PAGE_SIZE); 72 + kfree(params); 73 + } 74 + 75 + static void dma_single_map_benchmark_initialize_data(void *mparam) 76 + { 77 + struct dma_single_map_param *params = mparam; 78 + 79 + /* 80 + * for a non-coherent device, if we don't stain them in the 81 + * cache, this will give an underestimate of the real-world 82 + * overhead of BIDIRECTIONAL or TO_DEVICE mappings; 83 + * 66 means everything goes well! 66 is lucky. 84 + */ 85 + if (params->dma_dir != DMA_FROM_DEVICE) 86 + memset(params->xbuf, 0x66, params->npages * PAGE_SIZE); 87 + } 88 + 89 + static int dma_single_map_benchmark_do_map(void *mparam) 90 + { 91 + struct dma_single_map_param *params = mparam; 92 + 93 + params->addr = dma_map_single(params->dev, params->xbuf, 94 + params->npages * PAGE_SIZE, params->dma_dir); 95 + if (unlikely(dma_mapping_error(params->dev, params->addr))) { 96 + pr_err("dma_map_single failed on %s\n", dev_name(params->dev)); 97 + return -ENOMEM; 98 + } 99 + 100 + return 0; 101 + } 102 + 103 + static void dma_single_map_benchmark_do_unmap(void *mparam) 104 + { 105 + struct dma_single_map_param *params = mparam; 106 + 107 + dma_unmap_single(params->dev, params->addr, 108 + params->npages * PAGE_SIZE, params->dma_dir); 109 + } 110 + 111 + static struct map_benchmark_ops dma_single_map_benchmark_ops = { 112 + .prepare = dma_single_map_benchmark_prepare, 113 + .unprepare = dma_single_map_benchmark_unprepare, 114 + .initialize_data = dma_single_map_benchmark_initialize_data, 115 + .do_map = dma_single_map_benchmark_do_map, 116 + .do_unmap = dma_single_map_benchmark_do_unmap, 117 + }; 118 + 119 + struct dma_sg_map_param { 120 + struct sg_table sgt; 121 + struct device *dev; 122 + void **buf; 123 + u32 npages; 124 + u32 dma_dir; 125 + }; 126 + 127 + static void *dma_sg_map_benchmark_prepare(struct map_benchmark_data *map) 128 + { 129 + struct scatterlist *sg; 130 + int i; 131 + 132 + struct dma_sg_map_param *params = kzalloc(sizeof(*params), GFP_KERNEL); 133 + 134 + if (!params) 135 + return NULL; 136 + /* 137 + * Set the number of scatterlist entries based on the granule. 138 + * In SG mode, 'granule' represents the number of scatterlist entries. 139 + * Each scatterlist entry corresponds to a single page. 140 + */ 141 + params->npages = map->bparam.granule; 142 + params->dma_dir = map->bparam.dma_dir; 143 + params->dev = map->dev; 144 + params->buf = kmalloc_array(params->npages, sizeof(*params->buf), 145 + GFP_KERNEL); 146 + if (!params->buf) 147 + goto out; 148 + 149 + if (sg_alloc_table(&params->sgt, params->npages, GFP_KERNEL)) 150 + goto free_buf; 151 + 152 + for_each_sgtable_sg(&params->sgt, sg, i) { 153 + params->buf[i] = (void *)__get_free_page(GFP_KERNEL); 154 + if (!params->buf[i]) 155 + goto free_page; 156 + 157 + sg_set_buf(sg, params->buf[i], PAGE_SIZE); 158 + } 159 + 160 + return params; 161 + 162 + free_page: 163 + while (i-- > 0) 164 + free_page((unsigned long)params->buf[i]); 165 + 166 + sg_free_table(&params->sgt); 167 + free_buf: 168 + kfree(params->buf); 169 + out: 170 + kfree(params); 171 + return NULL; 172 + } 173 + 174 + static void dma_sg_map_benchmark_unprepare(void *mparam) 175 + { 176 + struct dma_sg_map_param *params = mparam; 177 + int i; 178 + 179 + for (i = 0; i < params->npages; i++) 180 + free_page((unsigned long)params->buf[i]); 181 + 182 + sg_free_table(&params->sgt); 183 + 184 + kfree(params->buf); 185 + kfree(params); 186 + } 187 + 188 + static void dma_sg_map_benchmark_initialize_data(void *mparam) 189 + { 190 + struct dma_sg_map_param *params = mparam; 191 + struct scatterlist *sg; 192 + int i = 0; 193 + 194 + if (params->dma_dir == DMA_FROM_DEVICE) 195 + return; 196 + 197 + for_each_sgtable_sg(&params->sgt, sg, i) 198 + memset(params->buf[i], 0x66, PAGE_SIZE); 199 + } 200 + 201 + static int dma_sg_map_benchmark_do_map(void *mparam) 202 + { 203 + struct dma_sg_map_param *params = mparam; 43 204 int ret = 0; 44 205 45 - buf = alloc_pages_exact(size, GFP_KERNEL); 46 - if (!buf) 206 + int sg_mapped = dma_map_sg(params->dev, params->sgt.sgl, 207 + params->npages, params->dma_dir); 208 + if (!sg_mapped) { 209 + pr_err("dma_map_sg failed on %s\n", dev_name(params->dev)); 210 + ret = -ENOMEM; 211 + } 212 + 213 + return ret; 214 + } 215 + 216 + static void dma_sg_map_benchmark_do_unmap(void *mparam) 217 + { 218 + struct dma_sg_map_param *params = mparam; 219 + 220 + dma_unmap_sg(params->dev, params->sgt.sgl, params->npages, 221 + params->dma_dir); 222 + } 223 + 224 + static struct map_benchmark_ops dma_sg_map_benchmark_ops = { 225 + .prepare = dma_sg_map_benchmark_prepare, 226 + .unprepare = dma_sg_map_benchmark_unprepare, 227 + .initialize_data = dma_sg_map_benchmark_initialize_data, 228 + .do_map = dma_sg_map_benchmark_do_map, 229 + .do_unmap = dma_sg_map_benchmark_do_unmap, 230 + }; 231 + 232 + static struct map_benchmark_ops *dma_map_benchmark_ops[DMA_MAP_BENCH_MODE_MAX] = { 233 + [DMA_MAP_BENCH_SINGLE_MODE] = &dma_single_map_benchmark_ops, 234 + [DMA_MAP_BENCH_SG_MODE] = &dma_sg_map_benchmark_ops, 235 + }; 236 + 237 + static int map_benchmark_thread(void *data) 238 + { 239 + struct map_benchmark_data *map = data; 240 + __u8 map_mode = map->bparam.map_mode; 241 + int ret = 0; 242 + 243 + struct map_benchmark_ops *mb_ops = dma_map_benchmark_ops[map_mode]; 244 + void *mparam = mb_ops->prepare(map); 245 + 246 + if (!mparam) 47 247 return -ENOMEM; 48 248 49 249 while (!kthread_should_stop()) { ··· 253 49 ktime_t map_stime, map_etime, unmap_stime, unmap_etime; 254 50 ktime_t map_delta, unmap_delta; 255 51 256 - /* 257 - * for a non-coherent device, if we don't stain them in the 258 - * cache, this will give an underestimate of the real-world 259 - * overhead of BIDIRECTIONAL or TO_DEVICE mappings; 260 - * 66 means evertything goes well! 66 is lucky. 261 - */ 262 - if (map->dir != DMA_FROM_DEVICE) 263 - memset(buf, 0x66, size); 264 - 52 + mb_ops->initialize_data(mparam); 265 53 map_stime = ktime_get(); 266 - dma_addr = dma_map_single(map->dev, buf, size, map->dir); 267 - if (unlikely(dma_mapping_error(map->dev, dma_addr))) { 268 - pr_err("dma_map_single failed on %s\n", 269 - dev_name(map->dev)); 270 - ret = -ENOMEM; 54 + ret = mb_ops->do_map(mparam); 55 + if (ret) 271 56 goto out; 272 - } 57 + 273 58 map_etime = ktime_get(); 274 59 map_delta = ktime_sub(map_etime, map_stime); 275 60 ··· 266 73 ndelay(map->bparam.dma_trans_ns); 267 74 268 75 unmap_stime = ktime_get(); 269 - dma_unmap_single(map->dev, dma_addr, size, map->dir); 76 + mb_ops->do_unmap(mparam); 77 + 270 78 unmap_etime = ktime_get(); 271 79 unmap_delta = ktime_sub(unmap_etime, unmap_stime); 272 80 ··· 302 108 } 303 109 304 110 out: 305 - free_pages_exact(buf, size); 111 + mb_ops->unprepare(mparam); 306 112 return ret; 307 113 } 308 114 ··· 403 209 404 210 switch (cmd) { 405 211 case DMA_MAP_BENCHMARK: 212 + if (map->bparam.map_mode < 0 || 213 + map->bparam.map_mode >= DMA_MAP_BENCH_MODE_MAX) { 214 + pr_err("invalid map mode\n"); 215 + return -EINVAL; 216 + } 217 + 406 218 if (map->bparam.threads == 0 || 407 219 map->bparam.threads > DMA_MAP_MAX_THREADS) { 408 220 pr_err("invalid thread number\n");

+14 -5

kernel/dma/mapping.c

··· 157 157 { 158 158 const struct dma_map_ops *ops = get_dma_ops(dev); 159 159 bool is_mmio = attrs & DMA_ATTR_MMIO; 160 + bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED; 160 161 dma_addr_t addr = DMA_MAPPING_ERROR; 161 162 162 163 BUG_ON(!valid_dma_direction(dir)); ··· 169 168 return DMA_MAPPING_ERROR; 170 169 171 170 if (dma_map_direct(dev, ops) || 172 - (!is_mmio && arch_dma_map_phys_direct(dev, phys + size))) 173 - addr = dma_direct_map_phys(dev, phys, size, dir, attrs); 171 + (!is_mmio && !is_cc_shared && 172 + arch_dma_map_phys_direct(dev, phys + size))) 173 + addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true); 174 + else if (is_cc_shared) 175 + return DMA_MAPPING_ERROR; 174 176 else if (use_dma_iommu(dev)) 175 177 addr = iommu_dma_map_phys(dev, phys, size, dir, attrs); 176 178 else if (ops->map_phys) ··· 210 206 { 211 207 const struct dma_map_ops *ops = get_dma_ops(dev); 212 208 bool is_mmio = attrs & DMA_ATTR_MMIO; 209 + bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED; 213 210 214 211 BUG_ON(!valid_dma_direction(dir)); 212 + 215 213 if (dma_map_direct(dev, ops) || 216 - (!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size))) 217 - dma_direct_unmap_phys(dev, addr, size, dir, attrs); 214 + (!is_mmio && !is_cc_shared && 215 + arch_dma_unmap_phys_direct(dev, addr + size))) 216 + dma_direct_unmap_phys(dev, addr, size, dir, attrs, true); 217 + else if (is_cc_shared) 218 + return; 218 219 else if (use_dma_iommu(dev)) 219 220 iommu_dma_unmap_phys(dev, addr, size, dir, attrs); 220 221 else if (ops->unmap_phys) ··· 388 379 389 380 BUG_ON(!valid_dma_direction(dir)); 390 381 if (dma_map_direct(dev, ops)) 391 - dma_direct_sync_single_for_cpu(dev, addr, size, dir); 382 + dma_direct_sync_single_for_cpu(dev, addr, size, dir, true); 392 383 else if (use_dma_iommu(dev)) 393 384 iommu_dma_sync_single_for_cpu(dev, addr, size, dir); 394 385 else if (ops->sync_single_for_cpu)

+15 -11

kernel/dma/swiotlb.c

··· 868 868 if (orig_addr == INVALID_PHYS_ADDR) 869 869 return; 870 870 871 + if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev)) 872 + arch_sync_dma_flush(); 873 + 871 874 /* 872 875 * It's valid for tlb_offset to be negative. This can happen when the 873 876 * "offset" returned by swiotlb_align_offset() is non-zero, and the ··· 1615 1612 return DMA_MAPPING_ERROR; 1616 1613 } 1617 1614 1618 - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1615 + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { 1619 1616 arch_sync_dma_for_device(swiotlb_addr, size, dir); 1617 + arch_sync_dma_flush(); 1618 + } 1620 1619 return dma_addr; 1621 1620 } 1622 1621 ··· 1877 1872 dev->dma_io_tlb_mem = &io_tlb_default_mem; 1878 1873 } 1879 1874 1880 - static const struct reserved_mem_ops rmem_swiotlb_ops = { 1881 - .device_init = rmem_swiotlb_device_init, 1882 - .device_release = rmem_swiotlb_device_release, 1883 - }; 1884 - 1885 - static int __init rmem_swiotlb_setup(struct reserved_mem *rmem) 1875 + static int __init rmem_swiotlb_setup(unsigned long node, 1876 + struct reserved_mem *rmem) 1886 1877 { 1887 - unsigned long node = rmem->fdt_node; 1888 - 1889 1878 if (of_get_flat_dt_prop(node, "reusable", NULL) || 1890 1879 of_get_flat_dt_prop(node, "linux,cma-default", NULL) || 1891 1880 of_get_flat_dt_prop(node, "linux,dma-default", NULL) || 1892 1881 of_get_flat_dt_prop(node, "no-map", NULL)) 1893 1882 return -EINVAL; 1894 1883 1895 - rmem->ops = &rmem_swiotlb_ops; 1896 1884 pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n", 1897 1885 &rmem->base, (unsigned long)rmem->size / SZ_1M); 1898 1886 return 0; 1899 1887 } 1900 1888 1901 - RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup); 1889 + static const struct reserved_mem_ops rmem_swiotlb_ops = { 1890 + .node_init = rmem_swiotlb_setup, 1891 + .device_init = rmem_swiotlb_device_init, 1892 + .device_release = rmem_swiotlb_device_release, 1893 + }; 1894 + 1895 + RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", &rmem_swiotlb_ops); 1902 1896 #endif /* CONFIG_DMA_RESTRICTED_POOL */

+3

mm/cma.c

··· 52 52 { 53 53 return cma->name; 54 54 } 55 + EXPORT_SYMBOL_GPL(cma_get_name); 55 56 56 57 static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, 57 58 unsigned int align_order) ··· 952 951 953 952 return page; 954 953 } 954 + EXPORT_SYMBOL_GPL(cma_alloc); 955 955 956 956 static struct cma_memrange *find_cma_memrange(struct cma *cma, 957 957 const struct page *pages, unsigned long count) ··· 1032 1030 1033 1031 return true; 1034 1032 } 1033 + EXPORT_SYMBOL_GPL(cma_release); 1035 1034 1036 1035 bool cma_release_frozen(struct cma *cma, const struct page *pages, 1037 1036 unsigned long count)

+20 -3

tools/dma/dma_map_benchmark.c

··· 20 20 "FROM_DEVICE", 21 21 }; 22 22 23 + static char *mode[] = { 24 + "SINGLE_MODE", 25 + "SG_MODE", 26 + }; 27 + 23 28 int main(int argc, char **argv) 24 29 { 25 30 struct map_benchmark map; 26 31 int fd, opt; 27 32 /* default single thread, run 20 seconds on NUMA_NO_NODE */ 28 33 int threads = 1, seconds = 20, node = -1; 34 + /* default single map mode */ 35 + int map_mode = DMA_MAP_BENCH_SINGLE_MODE; 29 36 /* default dma mask 32bit, bidirectional DMA */ 30 37 int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; 31 38 /* default granule 1 PAGESIZE */ ··· 40 33 41 34 int cmd = DMA_MAP_BENCHMARK; 42 35 43 - while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { 36 + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:m:")) != -1) { 44 37 switch (opt) { 45 38 case 't': 46 39 threads = atoi(optarg); ··· 63 56 case 'g': 64 57 granule = atoi(optarg); 65 58 break; 59 + case 'm': 60 + map_mode = atoi(optarg); 61 + break; 66 62 default: 67 63 return -1; 68 64 } 65 + } 66 + 67 + if (map_mode < 0 || map_mode >= DMA_MAP_BENCH_MODE_MAX) { 68 + fprintf(stderr, "invalid map mode, SINGLE_MODE:%d, SG_MODE: %d\n", 69 + DMA_MAP_BENCH_SINGLE_MODE, DMA_MAP_BENCH_SG_MODE); 70 + exit(1); 69 71 } 70 72 71 73 if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) { ··· 126 110 map.dma_dir = dir; 127 111 map.dma_trans_ns = xdelay; 128 112 map.granule = granule; 113 + map.map_mode = map_mode; 129 114 130 115 if (ioctl(fd, cmd, &map)) { 131 116 perror("ioctl"); 132 117 exit(1); 133 118 } 134 119 135 - printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n", 136 - threads, seconds, node, directions[dir], granule); 120 + printf("dma mapping benchmark(%s): threads:%d seconds:%d node:%d dir:%s granule:%d\n", 121 + mode[map_mode], threads, seconds, node, directions[dir], granule); 137 122 printf("average map latency(us):%.1f standard deviation:%.1f\n", 138 123 map.avg_map_100ns/10.0, map.map_stddev/10.0); 139 124 printf("average unmap latency(us):%.1f standard deviation:%.1f\n",

Configure Feed

Configure Feed