Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'dma-maping-6.9-2024-04-14' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping fixes from Christoph Hellwig:

- fix up swiotlb buffer padding even more (Petr Tesarik)

- fix for partial dma_sync on swiotlb (Michael Kelley)

- swiotlb debugfs fix (Dexuan Cui)

* tag 'dma-maping-6.9-2024-04-14' of git://git.infradead.org/users/hch/dma-mapping:
swiotlb: do not set total_used to 0 in swiotlb_create_debugfs_files()
swiotlb: fix swiotlb_bounce() to do partial sync's correctly
swiotlb: extend buffer pre-padding to alloc_align_mask if necessary

+58 -33
+58 -33
kernel/dma/swiotlb.c
··· 69 69 * @alloc_size: Size of the allocated buffer. 70 70 * @list: The free list describing the number of free entries available 71 71 * from each index. 72 + * @pad_slots: Number of preceding padding slots. Valid only in the first 73 + * allocated non-padding slot. 72 74 */ 73 75 struct io_tlb_slot { 74 76 phys_addr_t orig_addr; 75 77 size_t alloc_size; 76 - unsigned int list; 78 + unsigned short list; 79 + unsigned short pad_slots; 77 80 }; 78 81 79 82 static bool swiotlb_force_bounce; ··· 290 287 mem->nslabs - i); 291 288 mem->slots[i].orig_addr = INVALID_PHYS_ADDR; 292 289 mem->slots[i].alloc_size = 0; 290 + mem->slots[i].pad_slots = 0; 293 291 } 294 292 295 293 memset(vaddr, 0, bytes); ··· 825 821 #endif 826 822 } 827 823 828 - /* 829 - * Return the offset into a iotlb slot required to keep the device happy. 824 + /** 825 + * swiotlb_align_offset() - Get required offset into an IO TLB allocation. 826 + * @dev: Owning device. 827 + * @align_mask: Allocation alignment mask. 828 + * @addr: DMA address. 829 + * 830 + * Return the minimum offset from the start of an IO TLB allocation which is 831 + * required for a given buffer address and allocation alignment to keep the 832 + * device happy. 833 + * 834 + * First, the address bits covered by min_align_mask must be identical in the 835 + * original address and the bounce buffer address. High bits are preserved by 836 + * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra 837 + * padding bytes before the bounce buffer. 838 + * 839 + * Second, @align_mask specifies which bits of the first allocated slot must 840 + * be zero. This may require allocating additional padding slots, and then the 841 + * offset (in bytes) from the first such padding slot is returned. 830 842 */ 831 - static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) 843 + static unsigned int swiotlb_align_offset(struct device *dev, 844 + unsigned int align_mask, u64 addr) 832 845 { 833 - return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); 846 + return addr & dma_get_min_align_mask(dev) & 847 + (align_mask | (IO_TLB_SIZE - 1)); 834 848 } 835 849 836 850 /* ··· 863 841 size_t alloc_size = mem->slots[index].alloc_size; 864 842 unsigned long pfn = PFN_DOWN(orig_addr); 865 843 unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; 866 - unsigned int tlb_offset, orig_addr_offset; 844 + int tlb_offset; 867 845 868 846 if (orig_addr == INVALID_PHYS_ADDR) 869 847 return; 870 848 871 - tlb_offset = tlb_addr & (IO_TLB_SIZE - 1); 872 - orig_addr_offset = swiotlb_align_offset(dev, orig_addr); 873 - if (tlb_offset < orig_addr_offset) { 874 - dev_WARN_ONCE(dev, 1, 875 - "Access before mapping start detected. orig offset %u, requested offset %u.\n", 876 - orig_addr_offset, tlb_offset); 877 - return; 878 - } 879 - 880 - tlb_offset -= orig_addr_offset; 881 - if (tlb_offset > alloc_size) { 882 - dev_WARN_ONCE(dev, 1, 883 - "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n", 884 - alloc_size, size, tlb_offset); 885 - return; 886 - } 849 + /* 850 + * It's valid for tlb_offset to be negative. This can happen when the 851 + * "offset" returned by swiotlb_align_offset() is non-zero, and the 852 + * tlb_addr is pointing within the first "offset" bytes of the second 853 + * or subsequent slots of the allocated swiotlb area. While it's not 854 + * valid for tlb_addr to be pointing within the first "offset" bytes 855 + * of the first slot, there's no way to check for such an error since 856 + * this function can't distinguish the first slot from the second and 857 + * subsequent slots. 858 + */ 859 + tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - 860 + swiotlb_align_offset(dev, 0, orig_addr); 887 861 888 862 orig_addr += tlb_offset; 889 863 alloc_size -= tlb_offset; ··· 1023 1005 unsigned long max_slots = get_max_slots(boundary_mask); 1024 1006 unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); 1025 1007 unsigned int nslots = nr_slots(alloc_size), stride; 1026 - unsigned int offset = swiotlb_align_offset(dev, orig_addr); 1008 + unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); 1027 1009 unsigned int index, slots_checked, count = 0, i; 1028 1010 unsigned long flags; 1029 1011 unsigned int slot_base; ··· 1346 1328 unsigned long attrs) 1347 1329 { 1348 1330 struct io_tlb_mem *mem = dev->dma_io_tlb_mem; 1349 - unsigned int offset = swiotlb_align_offset(dev, orig_addr); 1331 + unsigned int offset; 1350 1332 struct io_tlb_pool *pool; 1351 1333 unsigned int i; 1352 1334 int index; 1353 1335 phys_addr_t tlb_addr; 1336 + unsigned short pad_slots; 1354 1337 1355 1338 if (!mem || !mem->nslabs) { 1356 1339 dev_warn_ratelimited(dev, ··· 1368 1349 return (phys_addr_t)DMA_MAPPING_ERROR; 1369 1350 } 1370 1351 1352 + offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); 1371 1353 index = swiotlb_find_slots(dev, orig_addr, 1372 1354 alloc_size + offset, alloc_align_mask, &pool); 1373 1355 if (index == -1) { ··· 1384 1364 * This is needed when we sync the memory. Then we sync the buffer if 1385 1365 * needed. 1386 1366 */ 1367 + pad_slots = offset >> IO_TLB_SHIFT; 1368 + offset &= (IO_TLB_SIZE - 1); 1369 + index += pad_slots; 1370 + pool->slots[index].pad_slots = pad_slots; 1387 1371 for (i = 0; i < nr_slots(alloc_size + offset); i++) 1388 1372 pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); 1389 1373 tlb_addr = slot_addr(pool->start, index) + offset; ··· 1408 1384 { 1409 1385 struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); 1410 1386 unsigned long flags; 1411 - unsigned int offset = swiotlb_align_offset(dev, tlb_addr); 1412 - int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; 1413 - int nslots = nr_slots(mem->slots[index].alloc_size + offset); 1414 - int aindex = index / mem->area_nslabs; 1415 - struct io_tlb_area *area = &mem->areas[aindex]; 1387 + unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); 1388 + int index, nslots, aindex; 1389 + struct io_tlb_area *area; 1416 1390 int count, i; 1391 + 1392 + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; 1393 + index -= mem->slots[index].pad_slots; 1394 + nslots = nr_slots(mem->slots[index].alloc_size + offset); 1395 + aindex = index / mem->area_nslabs; 1396 + area = &mem->areas[aindex]; 1417 1397 1418 1398 /* 1419 1399 * Return the buffer to the free list by setting the corresponding ··· 1441 1413 mem->slots[i].list = ++count; 1442 1414 mem->slots[i].orig_addr = INVALID_PHYS_ADDR; 1443 1415 mem->slots[i].alloc_size = 0; 1416 + mem->slots[i].pad_slots = 0; 1444 1417 } 1445 1418 1446 1419 /* ··· 1676 1647 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, 1677 1648 const char *dirname) 1678 1649 { 1679 - atomic_long_set(&mem->total_used, 0); 1680 - atomic_long_set(&mem->used_hiwater, 0); 1681 - 1682 1650 mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); 1683 1651 if (!mem->nslabs) 1684 1652 return; ··· 1686 1660 debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem, 1687 1661 &fops_io_tlb_hiwater); 1688 1662 #ifdef CONFIG_SWIOTLB_DYNAMIC 1689 - atomic_long_set(&mem->transient_nslabs, 0); 1690 1663 debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs, 1691 1664 mem, &fops_io_tlb_transient_used); 1692 1665 #endif